hsbc to browser2!

2014-03-20 18:26:45 +01:00 · 2014-03-20 18:26:45 +01:00 · 2766983a33
commit 2766983a33
parent 4ac38b89cf
6 changed files with 212 additions and 339 deletions
--- a/modules/hsbc/backend.py
+++ b/modules/hsbc/backend.py
@ -59,14 +59,5 @@ class HSBCBackend(BaseBackend, ICapBank):
            raise AccountNotFound()
    def iter_history(self, account):
-        with self.browser:
+        for tr in self.browser.get_history(account):
-            for tr in self.browser.get_history(account):
+            yield tr
                # If there are deferred cards, strip CB invoices.
                if not tr._coming and (not tr.raw.startswith('FACTURES CB') or len(account._card_links) == 0):
                    yield tr
    def iter_coming(self, account):
        with self.browser:
            for tr in self.browser.get_history(account):
                if tr._coming:
                    yield tr
--- a/modules/hsbc/browser.py
+++ b/modules/hsbc/browser.py
@ -19,111 +19,60 @@
 from datetime import timedelta
 import urllib
 import re
 from weboob.tools.date import LinearDateGuesser
-from weboob.tools.browser import BaseBrowser, BrowserIncorrectPassword, BasePage, BrokenPageError
+from weboob.tools.browser import  BrowserIncorrectPassword
-from weboob.tools.decorators import retry
+from weboob.tools.browser2 import LoginBrowser, URL, need_login
-from .pages.accounts import AccountsListPage, CPTHistoryPage, CardHistoryPage
+from .pages import AccountsPage, CBOperationPage, CPTOperationPage, LoginPage
 from .pages.login import LoginPage
 __all__ = ['HSBC']
-class NotLoggedPage(BasePage):
+class HSBC(LoginBrowser):
-    pass
+    VERIFY = False
-
+    BASEURL = 'https://client.hsbc.fr'
 class HSBC(BaseBrowser):
    DOMAIN = 'client.hsbc.fr'
    PROTOCOL = 'https'
    CERTHASH = '48d84a782728eeeb622e9ff721688365e24f555ae1aec49b3be33831c7fe24e6'
-    ENCODING = None # refer to the HTML encoding
+    connection =        URL('https://www.hsbc.fr/1/2/hsbc-france/particuliers/connexion', LoginPage)
-    PAGES = {'https://client.hsbc.fr/session_absente.html':                     NotLoggedPage,
+    login =             URL('https://www.hsbc.fr/1/*', LoginPage)
-             'https://client.hsbc.fr/cgi-bin/emcgi.*\?.*debr=COMPTES_PAN':      AccountsListPage,
+    cptPage =            URL('/cgi-bin/emcgi.*\&CPT_IdPrestation.*',
-             'https://client.hsbc.fr/cgi-bin/emcgi.*\?.*CPT_IdPrestation=.*':   CPTHistoryPage,
+                            '/cgi-bin/emcgi.*\&Ass_IdPrestation.*',
-             'https://client.hsbc.fr/cgi-bin/emcgi.*\?.*CB_IdPrestation=.*':    CardHistoryPage,
+                            CPTOperationPage)
-             'https://www.hsbc.fr/.*':                                          LoginPage,
+    cbPage =          URL('/cgi-bin/emcgi.*\&CB_IdPrestation.*',
-             'https://client.hsbc.fr/cgi-bin/emcgi':                            LoginPage,
+                             CBOperationPage)
-            }
+    accounts =          URL('/cgi-bin/emcgi', AccountsPage)
    _session = None
    def __init__(self, username, password, secret, *args, **kwargs):
        self.secret = secret
-        BaseBrowser.__init__(self, username, password, *args, **kwargs)
+        LoginBrowser.__init__(self, username, password, *args, **kwargs)
    def home(self):
-        self.login()
+        return self.login.go()
-    def is_logged(self):
+    def do_login(self):
-        return self._session is not None and not self.is_on_page((NotLoggedPage,LoginPage))
+        self.connection.stay_or_go()
        self.page.login(self.username)
-    @retry(BrokenPageError, tries=2)
+        no_secure_key_link = self.page.get_no_secure_key()
-    def login(self):
+        if not no_secure_key_link:
        assert isinstance(self.username, basestring)
        assert isinstance(self.password, basestring)
        self._ua_handlers['_cookies'].cookiejar.clear()
        if len(self.username) == 11 and self.username.isdigit():
            self.login_france()
        else:
            self.login_world()
    def login_france(self):
        data = {'Ident': self.username}
        r = self.readurl('https://client.hsbc.fr/cgi-bin/emcgi?Appl=WEBACC', urllib.urlencode(data), if_fail='raise')
        m = re.search('sessionid=([^ "]+)', r, flags=re.MULTILINE)
        if not m:
            raise BrowserIncorrectPassword()
        self.location(no_secure_key_link)
-        self._session = m.group(1)
+        self.page.login_w_secure(self.login, self.password, self.secret)
        self.page.useless_form()
-        data = {'Secret': self.password}
+        home_url = self.page.get_frame()
-        r = self.readurl('https://client.hsbc.fr/cgi-bin/emcgi?sessionid=%s' % self._session, urllib.urlencode(data), if_fail='raise')
+        if not home_url:
        if r.find('Erreur Identification') >= 0:
            raise BrowserIncorrectPassword()
        self.location(home_url)
-        m = re.search('url = "/cgi-bin/emcgi\?sessionid=([^& "]+)&debr="', r, flags=re.MULTILINE)
+    @need_login
        if not m:
            raise BrokenPageError('Unable to find session token')
        self._session = m.group(1)
    def login_world(self):
        data = {'Appl':         'WEBACC',
                'CODE_ABONNE':  self.username,
                'Ident':        self.username,
                'ifr':          0,
                'nextPage':     'localsso.hbfr.Redirect',
                'secret':       '',
                'userid':       self.username,
               }
        self.location('https://www.hsbc.fr/1/2/?idv_cmd=idv.Authentication', urllib.urlencode(data), no_login=True)
        self.page.login(self.username, self.secret, self.password)
        error = self.page.get_error()
        if error is not None:
            raise BrowserIncorrectPassword(error)
        self._session = self.page.get_session()
    def get_accounts_list(self):
-        self.location(self.buildurl('/cgi-bin/emcgi', sessionid=self._session, debr='COMPTES_PAN'))
+        return self.accounts.stay_or_go().iter_accounts()
        return self.page.get_list()
    def get_account(self, id):
        assert isinstance(id, basestring)
-
+        l = self.get_accounts_list()
        if not self.is_on_page(AccountsListPage):
            l = self.get_accounts_list()
        else:
            l = self.page.get_list()
        for a in l:
            if a.id == id:
@ -134,25 +83,17 @@ class HSBC(BaseBrowser):
    def get_history(self, account):
        if account._link_id is None:
            return
        self.location(account._link_id)
-        for tr in self._get_history(account._link_id):
+        if self.page is None:
            return
        if self.cbPage.is_here():
            guesser = LinearDateGuesser(date_max_bump=timedelta(45))
            return self.pagination(lambda: self.page.get_history(date_guesser=guesser))
        else:
            return self._get_history()
    def _get_history(self):
        for tr in self.page.get_history():
            yield tr
        for card in account._card_links:
            for tr in self._get_history(card):
                yield tr
    def _get_history(self, link):
        num_page = 0
        guesser = LinearDateGuesser(date_max_bump=timedelta(45))
        while link is not None:
            self.location(link)
            if self.page is None:
                return
            for tr in self.page.get_operations(num_page, guesser):
                yield tr
            link = self.page.get_next_link()
            num_page += 1
--- a/modules/hsbc/pages.py
+++ b/modules/hsbc/pages.py
@ -0,0 +1,169 @@
 # -*- coding: utf-8 -*-
 # Copyright(C) 2010-2012 Julien Veyssier
 #
 # This file is part of weboob.
 #
 # weboob is free software: you can redistribute it and/or modify
 # it under the terms of the GNU Affero General Public License as published by
 # the Free Software Foundation, either version 3 of the License, or
 # (at your option) any later version.
 #
 # weboob is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 # GNU Affero General Public License for more details.
 #
 # You should have received a copy of the GNU Affero General Public License
 # along with weboob. If not, see <http://www.gnu.org/licenses/>.
 from urlparse import urlparse, parse_qs
 import re
 from weboob.tools.browser2.page import HTMLPage, method, ListElement, ItemElement, SkipItem, LoggedPage
 from weboob.tools.browser2.filters import Filter, Env, CleanText, CleanDecimal, Link, Field, DateGuesser, TableCell
 from weboob.capabilities import NotAvailable
 from weboob.capabilities.bank import Account
 from weboob.tools.capabilities.bank.transactions import FrenchTransaction
 class Transaction(FrenchTransaction):
    PATTERNS = [(re.compile('^VIR(EMENT)? (?P<text>.*)'), FrenchTransaction.TYPE_TRANSFER),
                (re.compile('^PRLV (?P<text>.*)'),        FrenchTransaction.TYPE_ORDER),
                (re.compile('^(?P<text>.*) CARTE \d+ PAIEMENT CB\s+(?P<dd>\d{2})(?P<mm>\d{2}) ?(.*)$'),
                                                          FrenchTransaction.TYPE_CARD),
                (re.compile('^RETRAIT DAB (?P<dd>\d{2})(?P<mm>\d{2}) (?P<text>.*) CARTE [\*\d]+'),
                                                          FrenchTransaction.TYPE_WITHDRAWAL),
                (re.compile('^CHEQUE( (?P<text>.*))?$'),  FrenchTransaction.TYPE_CHECK),
                (re.compile('^(F )?COTIS\.? (?P<text>.*)'),FrenchTransaction.TYPE_BANK),
                (re.compile('^(REMISE|REM CHQ) (?P<text>.*)'),FrenchTransaction.TYPE_DEPOSIT),
               ]
    _is_coming = False
 class AccountsPage(LoggedPage, HTMLPage):
    def get_frame(self):
        try:
            a = self.doc.xpath(u'//frame["@name=FrameWork"]')[0]
        except IndexError:
            return None
        else:
            return a.attrib['src']
    @method
    class iter_accounts(ListElement):
        item_xpath = '//tr'
        flush_at_end = True
        class item(ItemElement):
            klass = Account
            def condition(self):
                return len(self.el.xpath('./td')) > 2
            class Label(Filter):
                def filter(self, text):
                    return text.lstrip(' 0123456789').title()
            class Type(Filter):
                def filter(self, label):
                    return Account.TYPE_UNKNOWN
            obj_id = Env('id')
            obj_label = Label(CleanText('./td[1]/a'))
            obj_coming = Env('coming')
            obj_balance = Env('balance')
            obj_currency = FrenchTransaction.Currency('./td[2] | ./td[3]')
            obj__link_id = Link('./td[1]/a')
            obj_type = Type(Field('label'))
            def parse(self, el):
                link = el.xpath('./td[1]/a')[0].get('href', '')
                url = urlparse(link)
                p = parse_qs(url.query)
                if 'CPT_IdPrestation' in p:
                    id = p['CPT_IdPrestation'][0]
                elif 'Ass_IdPrestation' in p:
                    id = p['Ass_IdPrestation'][0]
                elif 'CB_IdPrestation' in p:
                    id = p['CB_IdPrestation'][0]
                else:
                    raise SkipItem()
                balance = CleanDecimal('./td[3]')(self)
                self.env['id'] = id
                self.env['balance'] = balance
                self.env['coming'] = NotAvailable
 class Pagination(object):
    def next_page(self):
        links = self.page.doc.xpath('//a[@class="fleche"]')
        if len(links) == 0:
            return
        current_page_found= False
        for link in links:
            l = link.attrib.get('href')
            if current_page_found and "#op" not in l:
                #Adding CB_IdPrestation so browser2 use CBOperationPage
                return l + "&CB_IdPrestation"
            elif "#op" in l:
                current_page_found = True
        return
 class CBOperationPage(LoggedPage, HTMLPage):
    @method
    class get_history(Pagination, Transaction.TransactionsElement):
        head_xpath = '//table//tr/th'
        item_xpath = '//table//tr'
        class item(Transaction.TransactionElement):
            condition = lambda self: len(self.el.xpath('./td')) >= 4
            obj_date = DateGuesser(CleanText(TableCell("date")), Env("date_guesser"))
            obj_vdate = DateGuesser(CleanText(TableCell("date")), Env("date_guesser"))
 class CPTOperationPage(LoggedPage, HTMLPage):
    def get_history(self):
         for script in self.doc.xpath('//script'):
             if script.text is None or script.text.find('\nCL(0') < 0:
                 continue
             for m in re.finditer(r"CL\((\d+),'(.+)','(.+)','(.+)','([\d -\.,]+)',('([\d -\.,]+)',)?'\d+','\d+','[\w\s]+'\);", script.text, flags=re.MULTILINE):
                 op = Transaction(m.group(1))
                 op.parse(date=m.group(3), raw=re.sub(u'[ ]+', u' ', m.group(4).replace(u'\n', u' ')))
                 op.set_amount(m.group(5))
                 op._coming = (re.match('\d+/\d+/\d+', m.group(2)) is None)
                 yield op
 class LoginPage(HTMLPage):
    def login(self, login):
        form = self.get_form(nr=2)
        form['userid'] = login
        form.submit()
    def get_no_secure_key(self):
        try:
            a = self.doc.xpath(u'//a[contains(text(), "Without HSBC Secure Key")]')[0]
        except IndexError:
            return None
        else:
            return a.attrib['href']
    def login_w_secure(self, login, password, secret):
        form = self.get_form(nr=0)
        form['memorableAnswer'] = secret
        inputs = self.doc.xpath(u'//input[starts-with(@id, "keyrcc_password_first")]')
        split_pass = u''
        for i,inpu in enumerate(inputs):
            #The good field are 1,2,3 and the bad one are 11,12,21,23,24,31 and so one
            if int(inpu.attrib['id'].split('first')[1]) < 10:
                split_pass += password[i]
        form['password'] = split_pass
        form.submit()
    def useless_form(self):
        form = self.get_form(nr=0)
        form.submit()
--- a/modules/hsbc/pages/init.py
+++ b/modules/hsbc/pages/init.py
--- a/modules/hsbc/pages/accounts.py
+++ b/modules/hsbc/pages/accounts.py
@ -1,152 +0,0 @@
 # -*- coding: utf-8 -*-
 # Copyright(C) 2012-2013  Romain Bignon
 #
 # This file is part of weboob.
 #
 # weboob is free software: you can redistribute it and/or modify
 # it under the terms of the GNU Affero General Public License as published by
 # the Free Software Foundation, either version 3 of the License, or
 # (at your option) any later version.
 #
 # weboob is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 # GNU Affero General Public License for more details.
 #
 # You should have received a copy of the GNU Affero General Public License
 # along with weboob. If not, see <http://www.gnu.org/licenses/>.
 from decimal import Decimal
 import re
 from weboob.tools.browser import BasePage
 from weboob.capabilities.bank import Account
 from weboob.tools.capabilities.bank.transactions import FrenchTransaction
 __all__ = ['AccountsListPage', 'CPTHistoryPage', 'CardHistoryPage']
 class AccountsListPage(BasePage):
    def get_list(self):
        accounts = []
        for tr in self.document.getiterator('tr'):
            tds = tr.findall('td')
            if len(tds) != 3 or tds[0].find('a') is None or tds[0].find('a').attrib.get('class', '') != 'flecheM':
                continue
            account = Account()
            account.id = tds[1].text.strip()
            a = tds[0].findall('a')[-1]
            account.label = unicode(a.text.strip())
            account._link_id = a.attrib['href']
            balance = u''.join([txt.strip() for txt in tds[2].itertext()])
            account.balance = Decimal(FrenchTransaction.clean_amount(balance))
            # check account type
            m = re.search('(\w+)_IdPrestation', account._link_id)
            account_type = None
            if m:
                account_type = m.group(1)
                if account_type != 'CPT':
                    account.id += '.%s' % account_type
            if account_type == 'CB':
                accounts[0]._card_links.append(account._link_id)
                if not accounts[0].coming:
                    accounts[0].coming = Decimal('0.0')
                accounts[0].coming += account.balance
                continue
            if account_type != 'CPT':
                # Don't support other kind of account histories.
                account._link_id = None
            account.currency = account.get_currency(tds[1].text)
            account._card_links = []
            accounts.append(account)
        return iter(accounts)
 class Transaction(FrenchTransaction):
    PATTERNS = [(re.compile('^VIR(EMENT)? (?P<text>.*)'), FrenchTransaction.TYPE_TRANSFER),
                (re.compile('^PRLV (?P<text>.*)'),        FrenchTransaction.TYPE_ORDER),
                (re.compile('^CB (?P<text>.*)\s+(?P<dd>\d+)/(?P<mm>\d+)\s*(?P<loc>.*)'),
                                                          FrenchTransaction.TYPE_CARD),
                (re.compile('^DAB (?P<dd>\d{2})/(?P<mm>\d{2}) ((?P<HH>\d{2})H(?P<MM>\d{2}) )?(?P<text>.*?)( CB N°.*)?$'),
                                                          FrenchTransaction.TYPE_WITHDRAWAL),
                (re.compile('^CHEQUE$'),                  FrenchTransaction.TYPE_CHECK),
                (re.compile('^COTIS\.? (?P<text>.*)'),    FrenchTransaction.TYPE_BANK),
                (re.compile('^REMISE (?P<text>.*)'),      FrenchTransaction.TYPE_DEPOSIT),
               ]
 class HistoryPage(BasePage):
    def get_next_link(self):
        return None
    def get_operations(self, num_page, date_guesser):
        raise NotImplementedError()
 class CPTHistoryPage(HistoryPage):
    def get_operations(self, num_page, date_guesser):
        for script in self.document.getiterator('script'):
            if script.text is None or script.text.find('\nCL(0') < 0:
                continue
            for m in re.finditer(r"CL\((\d+),'(.+)','(.+)','(.+)','([\d -\.,]+)',('([\d -\.,]+)',)?'\d+','\d+','[\w\s]+'\);", script.text, flags=re.MULTILINE):
                op = Transaction(m.group(1))
                op.parse(date=m.group(3), raw=re.sub(u'[ ]+', u' ', m.group(4).replace(u'\n', u' ')))
                op.set_amount(m.group(5))
                op._coming = (re.match('\d+/\d+/\d+', m.group(2)) is None)
                yield op
 class CardHistoryPage(HistoryPage):
    def get_next_link(self):
        ok = False
        for link in self.document.xpath('//form[@name="FORM_LIB_CARTE"]/a[@class="fleche"]'):
            if link.attrib['href'].startswith('#'):
                ok = True
            elif ok:
                # add CB_IdPrestation to handle the correct page on browser.
                return link.attrib['href'] + '&CB_IdPrestation='
    def parse_date(self, guesser, string, store=False):
        day, month = map(int, string.split('/'))
        return guesser.guess_date(day, month, store)
    def get_operations(self, num_page, date_guesser):
        debit_date = None
        for tr in self.document.xpath('//div[@id="tabs-1"]/table//tr'):
            cols = tr.findall('td')
            if len(cols) == 1:
                text = self.parser.tocleanstring(cols[0])
                m = re.search('(\d+/\d+)', text)
                if m:
                    # if there are several months on the same page, the second
                    # one's operations are already debited.
                    if debit_date is not None:
                        num_page += 1
                    debit_date = self.parse_date(date_guesser, m.group(1), True)
                continue
            if len(cols) < 4:
                continue
            op = Transaction('')
            op.parse(date=debit_date,
                     raw=self.parser.tocleanstring(cols[1]))
            op.rdate = self.parse_date(date_guesser, self.parser.tocleanstring(cols[0]))
            op.type = op.TYPE_CARD
            op._coming = (num_page == 0)
            op.set_amount(self.parser.tocleanstring(cols[-1]),
                          self.parser.tocleanstring(cols[-2]))
            yield op
--- a/modules/hsbc/pages/login.py
+++ b/modules/hsbc/pages/login.py
@ -1,76 +0,0 @@
 # -*- coding: utf-8 -*-
 # Copyright(C) 2012-2013  Romain Bignon
 #
 # This file is part of weboob.
 #
 # weboob is free software: you can redistribute it and/or modify
 # it under the terms of the GNU Affero General Public License as published by
 # the Free Software Foundation, either version 3 of the License, or
 # (at your option) any later version.
 #
 # weboob is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 # GNU Affero General Public License for more details.
 #
 # You should have received a copy of the GNU Affero General Public License
 # along with weboob. If not, see <http://www.gnu.org/licenses/>.
 import re
 import urllib
 from mechanize import FormNotFoundError
 from weboob.tools.browser import BasePage, BrokenPageError
 __all__ = ['LoginPage']
 class LoginPage(BasePage):
    def on_loaded(self):
        try:
            self.browser.select_form(name="form1")
        except FormNotFoundError:
            pass
        else:
            self.browser.submit(nologin=True)
    def login(self, username, secret, password):
        form_world = self.document.xpath('//form[@name="auth04"]')
        url = form_world[0].attrib['action']
        datastr = "TF1;015;;;;;;;;;;;;;;;;;;;;;;Mozilla;Netscape;5.0%20%28X11%29;20100101;undefined;true;Linux%20x86_64;true;Linux%20x86_64;undefined;Mozilla/5.0%20%28X11%3B%20Linux%20x86_64%3B%20rv%3A19.0%29%20Gecko/20100101%20Firefox/19.0%20Iceweasel/19.0.2;en-US;undefined;www.hsbc.fr;undefined;undefined;undefined;undefined;true;true;1365177015380;1;Tue%2007%20Jun%202005%2009%3A33%3A44%20PM%20CEST;1280;1024;;11.2;;;;;123;-60;-120;Fri%2005%20Apr%202013%2005%3A50%3A15%20PM%20CEST;24;1280;1024;0;0;;;;;;Shockwave%20Flash%7CShockwave%20Flash%2011.2%20r202;;;;;;;;;;;;;17;"
        data = {'FMNUserId': username,
                'memorableAnswer': secret,
                'password': '',
                '__data': datastr,
                '__custtype': 'GLOBAL',
               }
        for i, field in enumerate(form_world[0].xpath('.//div[@class="csLabel"]/nobr/input[@type="password"]')):
            if field.attrib['name'].startswith('keyrcc_password_first') and not 'disabled' in field.attrib:
                data[field.attrib['name']] = password[i]
                data['password'] += password[i]
        if url.startswith('/'):
            url = 'https://www.hsbc.fr%s' % url
        self.browser.location(url, urllib.urlencode(data), no_login=True)
    def get_error(self):
        try:
            return self.parser.tocleanstring(self.document.xpath('//font[@color="red"]')[0])
        except IndexError:
            return None
    def get_session(self):
        try:
            frame = self.document.xpath('//frame[@name="FrameWork"]')[0]
        except IndexError:
            raise BrokenPageError('Unable to find session token')
        m = re.search('sessionid=([^& "]+)', frame.attrib['src'])
        if not m:
            raise BrokenPageError('Unable to find session token')
        return m.group(1)