Rewrite history fetching (without CSV)

Very simple for now, does not handle going on the next page, or categorizations (like CSV does).
2013-02-11 18:46:15 +01:00 · 2013-02-11 18:46:15 +01:00 · 3199c0dd3c
commit 3199c0dd3c
parent 99f8a03895
2 changed files with 99 additions and 6 deletions
--- a/modules/paypal/browser.py
+++ b/modules/paypal/browser.py
@ -19,7 +19,7 @@


 from weboob.tools.browser import BaseBrowser, BrowserIncorrectPassword
-from .pages import LoginPage, AccountPage, DownloadHistoryPage, SubmitPage, HistoryParser, UselessPage
+from .pages import LoginPage, AccountPage, DownloadHistoryPage, SubmitPage, HistoryParser, UselessPage, HistoryPage


 __all__ = ['Paypal']
@ -36,6 +36,8 @@ class Paypal(BaseBrowser):
        '/cgi-bin/webscr\?cmd=_login-processing.+$':    UselessPage,
        '/cgi-bin/webscr\?cmd=_account&nav=0.0$':  AccountPage,
        '/cgi-bin/webscr\?cmd=_history-download&nav=0.3.1$':  DownloadHistoryPage,
+        '/cgi-bin/webscr\?cmd=_history&nav=0.3.0$':  HistoryPage,
+        '/cgi-bin/webscr\?cmd=_history&dispatch=[a-z0-9]+$':  HistoryPage,
        '/cgi-bin/webscr\?dispatch=[a-z0-9]+$': (SubmitPage, HistoryParser()),
    }

@ -73,11 +75,21 @@ class Paypal(BaseBrowser):
        return self.page.get_account(_id)

    def get_history(self, account):
-        self.download_history()
+        self.history()
        for transaction in self.page.iter_transactions(account):
            yield transaction

+    def history(self):
+        self.location('/en/cgi-bin/webscr?cmd=_history&nav=0.3.0')
+        self.page.filter()
+        assert self.is_on_page(HistoryPage)
+
    def download_history(self):
+        """
+        Download CSV history.
+        However, it is not normalized, and sometimes the download is refused
+        and sent later by mail.
+        """
        self.location('/en/cgi-bin/webscr?cmd=_history-download&nav=0.3.1')
        assert self.is_on_page(DownloadHistoryPage)
        self.page.download()
--- a/modules/paypal/pages.py
+++ b/modules/paypal/pages.py
@ -21,6 +21,8 @@ from decimal import Decimal
 import re
 import datetime

+import dateutil.parser
+
 from weboob.tools.browser import BasePage, BrokenPageError
 from weboob.tools.parsers.csvparser import CsvParser
 from weboob.tools.misc import to_unicode
@ -121,14 +123,14 @@ class AccountPage(BasePage):
 class DownloadHistoryPage(BasePage):
    def download(self):
        today = datetime.date.today()
+        start = today - datetime.timedelta(days=90)
        self.browser.select_form(name='form1')
-        # download an entire year
        self.browser['to_c'] = str(today.year)
        self.browser['to_a'] = str(today.month)
        self.browser['to_b'] = str(today.day)
-        self.browser['from_c'] = str(today.year - 1)
-        self.browser['from_a'] = str(today.month)
-        self.browser['from_b'] = str(today.day)
+        self.browser['from_c'] = str(start.year)
+        self.browser['from_a'] = str(start.month)
+        self.browser['from_b'] = str(start.day)

        self.browser['custom_file_type'] = ['comma_balaffecting']
        self.browser['latest_completed_file_type'] = ['']
@ -211,3 +213,82 @@ class HistoryParser(CsvParser):

 class UselessPage(BasePage):
    pass
+
+
+class HistoryPage(BasePage):
+    def guess_format(self):
+        rp = re.compile('PAYPAL\.widget\.CalendarLocales\.MDY_([A-Z]+)_POSITION\s*=\s*(\d)')
+        rd = re.compile('PAYPAL\.widget\.CalendarLocales\.DATE_DELIMITER\s*=\s*"(.)"')
+        rm = re.compile('PAYPAL\.widget\.CalendarLocales\.MONTH_NAMES\s*=\s*\[(.+)\]')
+        translate = {'DAY': '%d', 'MONTH': '%m', 'YEAR': '%Y'}
+        pos = {}
+        delim = '/'
+        months = {}
+        for script in self.document.xpath('//script'):
+            for line in script.text_content().splitlines():
+                m = rp.match(line)
+                if m and m.groups():
+                    pos[int(m.groups()[1])] = translate[m.groups()[0]]
+                else:
+                    m = rd.match(line)
+                    if m:
+                        delim = m.groups()[0]
+                    else:
+                        m = rm.match(line)
+                        if m:
+                            months = [month.strip("'").strip().lower()[0:3]
+                                      for month
+                                      in m.groups()[0].split(',')]
+        date_format = delim.join((pos[0], pos[1], pos[2]))
+        if date_format == "%m/%d/%Y":
+            time_format = "%I:%M:%S %p"
+        else:
+            time_format = "%H:%M:%S"
+        return date_format, time_format, months
+
+    def filter(self):
+        date_format = self.guess_format()[0]
+        today = datetime.date.today()
+        start = today - datetime.timedelta(days=90)
+        self.browser.select_form(name='history')
+        self.browser['dateoption'] = ['dateselect']
+        self.browser['from_date'] = start.strftime(date_format)
+        self.browser['to_date'] = today.strftime(date_format)
+        self.browser.submit(name='show')
+
+    def parse(self):
+        emonths = ['January', 'February', 'March', 'April',
+                   'May', 'June', 'July', 'August',
+                   'September', 'October', 'November', 'December']
+        date_format, time_format, months = self.guess_format()
+        for row in self.document.xpath('//table[@id="transactionTable"]/tbody/tr'):
+            amount = row.xpath('.//td[@headers="gross"]')[-1].text_content().strip()
+            if re.search('\d', amount):
+                currency = Account.get_currency(amount)
+                amount = clean_amount(amount)
+            else:
+                continue
+
+            idtext = row.xpath('.//td[@class="detailsNoPrint"]//span[@class="accessAid"]')[0] \
+                .text_content().replace(u'\xa0', u' ').strip().rpartition(' ')[-1]
+            trans = Transaction(idtext)
+            trans.amount = amount
+            trans._currency = currency
+
+            datetext = row.xpath('.//td[@class="dateInfo"]')[0].text_content().strip()
+            for i in range(0, 12):
+                datetext = datetext.replace(months[i], emonths[i])
+            date = dateutil.parser.parse(datetext)
+            trans.date = date
+            trans.rdate = date
+
+            trans.label = to_unicode(row.xpath('.//td[@class="emailInfo"]')[0].text_content().strip())
+            trans.raw = to_unicode(row.xpath('.//td[@class="paymentTypeInfo"]')[0].text_content().strip()) \
+                + u' ' + trans.label
+
+            yield trans
+
+    def iter_transactions(self, account):
+        for trans in self.parse():
+            if trans._currency == account.currency:
+                yield trans