Rewrite history fetching (without CSV)

Very simple for now, does not handle going on the next page, or
categorizations (like CSV does).
This commit is contained in:
Laurent Bachelier 2013-02-11 18:46:15 +01:00
commit 3199c0dd3c
2 changed files with 99 additions and 6 deletions

View file

@ -19,7 +19,7 @@
from weboob.tools.browser import BaseBrowser, BrowserIncorrectPassword from weboob.tools.browser import BaseBrowser, BrowserIncorrectPassword
from .pages import LoginPage, AccountPage, DownloadHistoryPage, SubmitPage, HistoryParser, UselessPage from .pages import LoginPage, AccountPage, DownloadHistoryPage, SubmitPage, HistoryParser, UselessPage, HistoryPage
__all__ = ['Paypal'] __all__ = ['Paypal']
@ -36,6 +36,8 @@ class Paypal(BaseBrowser):
'/cgi-bin/webscr\?cmd=_login-processing.+$': UselessPage, '/cgi-bin/webscr\?cmd=_login-processing.+$': UselessPage,
'/cgi-bin/webscr\?cmd=_account&nav=0.0$': AccountPage, '/cgi-bin/webscr\?cmd=_account&nav=0.0$': AccountPage,
'/cgi-bin/webscr\?cmd=_history-download&nav=0.3.1$': DownloadHistoryPage, '/cgi-bin/webscr\?cmd=_history-download&nav=0.3.1$': DownloadHistoryPage,
'/cgi-bin/webscr\?cmd=_history&nav=0.3.0$': HistoryPage,
'/cgi-bin/webscr\?cmd=_history&dispatch=[a-z0-9]+$': HistoryPage,
'/cgi-bin/webscr\?dispatch=[a-z0-9]+$': (SubmitPage, HistoryParser()), '/cgi-bin/webscr\?dispatch=[a-z0-9]+$': (SubmitPage, HistoryParser()),
} }
@ -73,11 +75,21 @@ class Paypal(BaseBrowser):
return self.page.get_account(_id) return self.page.get_account(_id)
def get_history(self, account): def get_history(self, account):
self.download_history() self.history()
for transaction in self.page.iter_transactions(account): for transaction in self.page.iter_transactions(account):
yield transaction yield transaction
def history(self):
self.location('/en/cgi-bin/webscr?cmd=_history&nav=0.3.0')
self.page.filter()
assert self.is_on_page(HistoryPage)
def download_history(self): def download_history(self):
"""
Download CSV history.
However, it is not normalized, and sometimes the download is refused
and sent later by mail.
"""
self.location('/en/cgi-bin/webscr?cmd=_history-download&nav=0.3.1') self.location('/en/cgi-bin/webscr?cmd=_history-download&nav=0.3.1')
assert self.is_on_page(DownloadHistoryPage) assert self.is_on_page(DownloadHistoryPage)
self.page.download() self.page.download()

View file

@ -21,6 +21,8 @@ from decimal import Decimal
import re import re
import datetime import datetime
import dateutil.parser
from weboob.tools.browser import BasePage, BrokenPageError from weboob.tools.browser import BasePage, BrokenPageError
from weboob.tools.parsers.csvparser import CsvParser from weboob.tools.parsers.csvparser import CsvParser
from weboob.tools.misc import to_unicode from weboob.tools.misc import to_unicode
@ -121,14 +123,14 @@ class AccountPage(BasePage):
class DownloadHistoryPage(BasePage): class DownloadHistoryPage(BasePage):
def download(self): def download(self):
today = datetime.date.today() today = datetime.date.today()
start = today - datetime.timedelta(days=90)
self.browser.select_form(name='form1') self.browser.select_form(name='form1')
# download an entire year
self.browser['to_c'] = str(today.year) self.browser['to_c'] = str(today.year)
self.browser['to_a'] = str(today.month) self.browser['to_a'] = str(today.month)
self.browser['to_b'] = str(today.day) self.browser['to_b'] = str(today.day)
self.browser['from_c'] = str(today.year - 1) self.browser['from_c'] = str(start.year)
self.browser['from_a'] = str(today.month) self.browser['from_a'] = str(start.month)
self.browser['from_b'] = str(today.day) self.browser['from_b'] = str(start.day)
self.browser['custom_file_type'] = ['comma_balaffecting'] self.browser['custom_file_type'] = ['comma_balaffecting']
self.browser['latest_completed_file_type'] = [''] self.browser['latest_completed_file_type'] = ['']
@ -211,3 +213,82 @@ class HistoryParser(CsvParser):
class UselessPage(BasePage): class UselessPage(BasePage):
pass pass
class HistoryPage(BasePage):
def guess_format(self):
rp = re.compile('PAYPAL\.widget\.CalendarLocales\.MDY_([A-Z]+)_POSITION\s*=\s*(\d)')
rd = re.compile('PAYPAL\.widget\.CalendarLocales\.DATE_DELIMITER\s*=\s*"(.)"')
rm = re.compile('PAYPAL\.widget\.CalendarLocales\.MONTH_NAMES\s*=\s*\[(.+)\]')
translate = {'DAY': '%d', 'MONTH': '%m', 'YEAR': '%Y'}
pos = {}
delim = '/'
months = {}
for script in self.document.xpath('//script'):
for line in script.text_content().splitlines():
m = rp.match(line)
if m and m.groups():
pos[int(m.groups()[1])] = translate[m.groups()[0]]
else:
m = rd.match(line)
if m:
delim = m.groups()[0]
else:
m = rm.match(line)
if m:
months = [month.strip("'").strip().lower()[0:3]
for month
in m.groups()[0].split(',')]
date_format = delim.join((pos[0], pos[1], pos[2]))
if date_format == "%m/%d/%Y":
time_format = "%I:%M:%S %p"
else:
time_format = "%H:%M:%S"
return date_format, time_format, months
def filter(self):
date_format = self.guess_format()[0]
today = datetime.date.today()
start = today - datetime.timedelta(days=90)
self.browser.select_form(name='history')
self.browser['dateoption'] = ['dateselect']
self.browser['from_date'] = start.strftime(date_format)
self.browser['to_date'] = today.strftime(date_format)
self.browser.submit(name='show')
def parse(self):
emonths = ['January', 'February', 'March', 'April',
'May', 'June', 'July', 'August',
'September', 'October', 'November', 'December']
date_format, time_format, months = self.guess_format()
for row in self.document.xpath('//table[@id="transactionTable"]/tbody/tr'):
amount = row.xpath('.//td[@headers="gross"]')[-1].text_content().strip()
if re.search('\d', amount):
currency = Account.get_currency(amount)
amount = clean_amount(amount)
else:
continue
idtext = row.xpath('.//td[@class="detailsNoPrint"]//span[@class="accessAid"]')[0] \
.text_content().replace(u'\xa0', u' ').strip().rpartition(' ')[-1]
trans = Transaction(idtext)
trans.amount = amount
trans._currency = currency
datetext = row.xpath('.//td[@class="dateInfo"]')[0].text_content().strip()
for i in range(0, 12):
datetext = datetext.replace(months[i], emonths[i])
date = dateutil.parser.parse(datetext)
trans.date = date
trans.rdate = date
trans.label = to_unicode(row.xpath('.//td[@class="emailInfo"]')[0].text_content().strip())
trans.raw = to_unicode(row.xpath('.//td[@class="paymentTypeInfo"]')[0].text_content().strip()) \
+ u' ' + trans.label
yield trans
def iter_transactions(self, account):
for trans in self.parse():
if trans._currency == account.currency:
yield trans