Rewrite history fetching (without CSV)
Very simple for now, does not handle going on the next page, or categorizations (like CSV does).
This commit is contained in:
parent
99f8a03895
commit
3199c0dd3c
2 changed files with 99 additions and 6 deletions
|
|
@ -19,7 +19,7 @@
|
||||||
|
|
||||||
|
|
||||||
from weboob.tools.browser import BaseBrowser, BrowserIncorrectPassword
|
from weboob.tools.browser import BaseBrowser, BrowserIncorrectPassword
|
||||||
from .pages import LoginPage, AccountPage, DownloadHistoryPage, SubmitPage, HistoryParser, UselessPage
|
from .pages import LoginPage, AccountPage, DownloadHistoryPage, SubmitPage, HistoryParser, UselessPage, HistoryPage
|
||||||
|
|
||||||
|
|
||||||
__all__ = ['Paypal']
|
__all__ = ['Paypal']
|
||||||
|
|
@ -36,6 +36,8 @@ class Paypal(BaseBrowser):
|
||||||
'/cgi-bin/webscr\?cmd=_login-processing.+$': UselessPage,
|
'/cgi-bin/webscr\?cmd=_login-processing.+$': UselessPage,
|
||||||
'/cgi-bin/webscr\?cmd=_account&nav=0.0$': AccountPage,
|
'/cgi-bin/webscr\?cmd=_account&nav=0.0$': AccountPage,
|
||||||
'/cgi-bin/webscr\?cmd=_history-download&nav=0.3.1$': DownloadHistoryPage,
|
'/cgi-bin/webscr\?cmd=_history-download&nav=0.3.1$': DownloadHistoryPage,
|
||||||
|
'/cgi-bin/webscr\?cmd=_history&nav=0.3.0$': HistoryPage,
|
||||||
|
'/cgi-bin/webscr\?cmd=_history&dispatch=[a-z0-9]+$': HistoryPage,
|
||||||
'/cgi-bin/webscr\?dispatch=[a-z0-9]+$': (SubmitPage, HistoryParser()),
|
'/cgi-bin/webscr\?dispatch=[a-z0-9]+$': (SubmitPage, HistoryParser()),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -73,11 +75,21 @@ class Paypal(BaseBrowser):
|
||||||
return self.page.get_account(_id)
|
return self.page.get_account(_id)
|
||||||
|
|
||||||
def get_history(self, account):
|
def get_history(self, account):
|
||||||
self.download_history()
|
self.history()
|
||||||
for transaction in self.page.iter_transactions(account):
|
for transaction in self.page.iter_transactions(account):
|
||||||
yield transaction
|
yield transaction
|
||||||
|
|
||||||
|
def history(self):
|
||||||
|
self.location('/en/cgi-bin/webscr?cmd=_history&nav=0.3.0')
|
||||||
|
self.page.filter()
|
||||||
|
assert self.is_on_page(HistoryPage)
|
||||||
|
|
||||||
def download_history(self):
|
def download_history(self):
|
||||||
|
"""
|
||||||
|
Download CSV history.
|
||||||
|
However, it is not normalized, and sometimes the download is refused
|
||||||
|
and sent later by mail.
|
||||||
|
"""
|
||||||
self.location('/en/cgi-bin/webscr?cmd=_history-download&nav=0.3.1')
|
self.location('/en/cgi-bin/webscr?cmd=_history-download&nav=0.3.1')
|
||||||
assert self.is_on_page(DownloadHistoryPage)
|
assert self.is_on_page(DownloadHistoryPage)
|
||||||
self.page.download()
|
self.page.download()
|
||||||
|
|
|
||||||
|
|
@ -21,6 +21,8 @@ from decimal import Decimal
|
||||||
import re
|
import re
|
||||||
import datetime
|
import datetime
|
||||||
|
|
||||||
|
import dateutil.parser
|
||||||
|
|
||||||
from weboob.tools.browser import BasePage, BrokenPageError
|
from weboob.tools.browser import BasePage, BrokenPageError
|
||||||
from weboob.tools.parsers.csvparser import CsvParser
|
from weboob.tools.parsers.csvparser import CsvParser
|
||||||
from weboob.tools.misc import to_unicode
|
from weboob.tools.misc import to_unicode
|
||||||
|
|
@ -121,14 +123,14 @@ class AccountPage(BasePage):
|
||||||
class DownloadHistoryPage(BasePage):
|
class DownloadHistoryPage(BasePage):
|
||||||
def download(self):
|
def download(self):
|
||||||
today = datetime.date.today()
|
today = datetime.date.today()
|
||||||
|
start = today - datetime.timedelta(days=90)
|
||||||
self.browser.select_form(name='form1')
|
self.browser.select_form(name='form1')
|
||||||
# download an entire year
|
|
||||||
self.browser['to_c'] = str(today.year)
|
self.browser['to_c'] = str(today.year)
|
||||||
self.browser['to_a'] = str(today.month)
|
self.browser['to_a'] = str(today.month)
|
||||||
self.browser['to_b'] = str(today.day)
|
self.browser['to_b'] = str(today.day)
|
||||||
self.browser['from_c'] = str(today.year - 1)
|
self.browser['from_c'] = str(start.year)
|
||||||
self.browser['from_a'] = str(today.month)
|
self.browser['from_a'] = str(start.month)
|
||||||
self.browser['from_b'] = str(today.day)
|
self.browser['from_b'] = str(start.day)
|
||||||
|
|
||||||
self.browser['custom_file_type'] = ['comma_balaffecting']
|
self.browser['custom_file_type'] = ['comma_balaffecting']
|
||||||
self.browser['latest_completed_file_type'] = ['']
|
self.browser['latest_completed_file_type'] = ['']
|
||||||
|
|
@ -211,3 +213,82 @@ class HistoryParser(CsvParser):
|
||||||
|
|
||||||
class UselessPage(BasePage):
|
class UselessPage(BasePage):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class HistoryPage(BasePage):
|
||||||
|
def guess_format(self):
|
||||||
|
rp = re.compile('PAYPAL\.widget\.CalendarLocales\.MDY_([A-Z]+)_POSITION\s*=\s*(\d)')
|
||||||
|
rd = re.compile('PAYPAL\.widget\.CalendarLocales\.DATE_DELIMITER\s*=\s*"(.)"')
|
||||||
|
rm = re.compile('PAYPAL\.widget\.CalendarLocales\.MONTH_NAMES\s*=\s*\[(.+)\]')
|
||||||
|
translate = {'DAY': '%d', 'MONTH': '%m', 'YEAR': '%Y'}
|
||||||
|
pos = {}
|
||||||
|
delim = '/'
|
||||||
|
months = {}
|
||||||
|
for script in self.document.xpath('//script'):
|
||||||
|
for line in script.text_content().splitlines():
|
||||||
|
m = rp.match(line)
|
||||||
|
if m and m.groups():
|
||||||
|
pos[int(m.groups()[1])] = translate[m.groups()[0]]
|
||||||
|
else:
|
||||||
|
m = rd.match(line)
|
||||||
|
if m:
|
||||||
|
delim = m.groups()[0]
|
||||||
|
else:
|
||||||
|
m = rm.match(line)
|
||||||
|
if m:
|
||||||
|
months = [month.strip("'").strip().lower()[0:3]
|
||||||
|
for month
|
||||||
|
in m.groups()[0].split(',')]
|
||||||
|
date_format = delim.join((pos[0], pos[1], pos[2]))
|
||||||
|
if date_format == "%m/%d/%Y":
|
||||||
|
time_format = "%I:%M:%S %p"
|
||||||
|
else:
|
||||||
|
time_format = "%H:%M:%S"
|
||||||
|
return date_format, time_format, months
|
||||||
|
|
||||||
|
def filter(self):
|
||||||
|
date_format = self.guess_format()[0]
|
||||||
|
today = datetime.date.today()
|
||||||
|
start = today - datetime.timedelta(days=90)
|
||||||
|
self.browser.select_form(name='history')
|
||||||
|
self.browser['dateoption'] = ['dateselect']
|
||||||
|
self.browser['from_date'] = start.strftime(date_format)
|
||||||
|
self.browser['to_date'] = today.strftime(date_format)
|
||||||
|
self.browser.submit(name='show')
|
||||||
|
|
||||||
|
def parse(self):
|
||||||
|
emonths = ['January', 'February', 'March', 'April',
|
||||||
|
'May', 'June', 'July', 'August',
|
||||||
|
'September', 'October', 'November', 'December']
|
||||||
|
date_format, time_format, months = self.guess_format()
|
||||||
|
for row in self.document.xpath('//table[@id="transactionTable"]/tbody/tr'):
|
||||||
|
amount = row.xpath('.//td[@headers="gross"]')[-1].text_content().strip()
|
||||||
|
if re.search('\d', amount):
|
||||||
|
currency = Account.get_currency(amount)
|
||||||
|
amount = clean_amount(amount)
|
||||||
|
else:
|
||||||
|
continue
|
||||||
|
|
||||||
|
idtext = row.xpath('.//td[@class="detailsNoPrint"]//span[@class="accessAid"]')[0] \
|
||||||
|
.text_content().replace(u'\xa0', u' ').strip().rpartition(' ')[-1]
|
||||||
|
trans = Transaction(idtext)
|
||||||
|
trans.amount = amount
|
||||||
|
trans._currency = currency
|
||||||
|
|
||||||
|
datetext = row.xpath('.//td[@class="dateInfo"]')[0].text_content().strip()
|
||||||
|
for i in range(0, 12):
|
||||||
|
datetext = datetext.replace(months[i], emonths[i])
|
||||||
|
date = dateutil.parser.parse(datetext)
|
||||||
|
trans.date = date
|
||||||
|
trans.rdate = date
|
||||||
|
|
||||||
|
trans.label = to_unicode(row.xpath('.//td[@class="emailInfo"]')[0].text_content().strip())
|
||||||
|
trans.raw = to_unicode(row.xpath('.//td[@class="paymentTypeInfo"]')[0].text_content().strip()) \
|
||||||
|
+ u' ' + trans.label
|
||||||
|
|
||||||
|
yield trans
|
||||||
|
|
||||||
|
def iter_transactions(self, account):
|
||||||
|
for trans in self.parse():
|
||||||
|
if trans._currency == account.currency:
|
||||||
|
yield trans
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue