support new Crédit Agricole website

2013-03-18 15:52:36 +01:00 · 2013-03-18 15:52:36 +01:00 · 240efb85f4
commit 240efb85f4
parent 832119fc3a
11 changed files with 334 additions and 5 deletions
--- a/modules/cragr/backend.py
+++ b/modules/cragr/backend.py
@ -23,7 +23,8 @@ from weboob.tools.backend import BaseBackend, BackendConfig
 from weboob.tools.ordereddict import OrderedDict
 from weboob.tools.value import ValueBackendPassword, Value

-from .browser import Cragr
+from .web.browser import Cragr
+from .mobile.browser import CragrMobile


 __all__ = ['CragrBackend']
@ -79,9 +80,16 @@ class CragrBackend(BaseBackend, ICapBank):
    BROWSER = Cragr

    def create_default_browser(self):
-        return self.create_browser(self.config['website'].get(),
-                                   self.config['login'].get(),
-                                   self.config['password'].get())
+        try:
+            return self.create_browser(self.config['website'].get(),
+                                       self.config['login'].get(),
+                                       self.config['password'].get())
+        except Cragr.WebsiteNotSupported:
+            self.logger.debug('falling-back on mobile version')
+            self.BROWSER = CragrMobile
+            return self.create_browser(self.config['website'].get(),
+                                       self.config['login'].get(),
+                                       self.config['password'].get())

    def iter_accounts(self):
        return self.browser.get_accounts_list()
--- a/modules/cragr/mobile/init.py
+++ b/modules/cragr/mobile/init.py
--- a/modules/cragr/mobile/browser.py
+++ b/modules/cragr/mobile/browser.py
@ -27,7 +27,10 @@ from datetime import datetime
 import re


-class Cragr(BaseBrowser):
+__all__ = ['CragrMobile']
+
+
+class CragrMobile(BaseBrowser):
    PROTOCOL = 'https'
    ENCODING = 'utf-8'
    USER_AGENT = BaseBrowser.USER_AGENTS['wget']
--- a/modules/cragr/mobile/pages/init.py
+++ b/modules/cragr/mobile/pages/init.py
--- a/modules/cragr/mobile/pages/accounts_list.py
+++ b/modules/cragr/mobile/pages/accounts_list.py
--- a/modules/cragr/mobile/pages/base.py
+++ b/modules/cragr/mobile/pages/base.py
--- a/modules/cragr/mobile/pages/login.py
+++ b/modules/cragr/mobile/pages/login.py
--- a/modules/cragr/mobile/pages/tokenextractor.py
+++ b/modules/cragr/mobile/pages/tokenextractor.py
--- a/modules/cragr/web/init.py
+++ b/modules/cragr/web/init.py
--- a/modules/cragr/web/browser.py
+++ b/modules/cragr/web/browser.py
@ -0,0 +1,145 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2013  Romain Bignon
+#
+# This file is part of weboob.
+#
+# weboob is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# weboob is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with weboob. If not, see <http://www.gnu.org/licenses/>.
+
+
+import urllib
+import re
+
+from weboob.tools.browser import BaseBrowser, BrowserIncorrectPassword
+from weboob.tools.date import LinearDateGuesser
+
+from .pages import HomePage, LoginPage, LoginErrorPage, AccountsPage, TransactionsPage
+
+
+__all__ = ['Cragr']
+
+
+class Cragr(BaseBrowser):
+    PROTOCOL = 'https'
+    ENCODING = 'ISO-8859-1'
+
+    PAGES = {'https?://[^/]+/':                                     HomePage,
+             'https?://[^/]+/stb/entreeBam':                        LoginPage,
+             'https?://[^/]+/stb/entreeBam\?.*act=Synthcomptes':    AccountsPage,
+             'https?://[^/]+/stb/collecteNI\?.*act=Releves.*':      TransactionsPage,
+             'https?://[^/]+/stb/collecteNI\?.*sessionAPP=Releves.*': TransactionsPage,
+             'https?://[^/]+/stb/.*/erreur/.*':                     LoginErrorPage,
+            }
+
+    class WebsiteNotSupported(Exception):
+        pass
+
+    def __init__(self, website, *args, **kwargs):
+        self.DOMAIN = re.sub('^m\.', 'www.', website)
+        self.accounts_url = None
+        BaseBrowser.__init__(self, *args, **kwargs)
+
+    def home(self):
+        self.login()
+
+    def is_logged(self):
+        return self.page is not None and not self.is_on_page(HomePage)
+
+    def login(self):
+        """
+        Attempt to log in.
+        Note: this method does nothing if we are already logged in.
+        """
+        assert isinstance(self.username, basestring)
+        assert isinstance(self.password, basestring)
+
+        # Do we really need to login?
+        if self.is_logged():
+            self.logger.debug('already logged in')
+            return
+
+        if not self.is_on_page(HomePage):
+            self.location(self.absurl('/'), no_login=True)
+
+        # On the homepage, we get the URL of the auth service.
+        url = self.page.get_post_url()
+        if url is None:
+            raise self.WebsiteNotSupported()
+
+        # First, post account number to get the password prompt.
+        data = {'CCPTE':                self.username.encode(self.ENCODING),
+                'canal':                'WEB',
+                'hauteur_ecran':        768,
+                'largeur_ecran':        1024,
+                'liberror':             '',
+                'matrice':              'true',
+                'origine':              'vitrine',
+                'situationTravail':     'BANCAIRE',
+                'typeAuthentification': 'CLIC_ALLER',
+                'urlOrigine':           self.page.url,
+                'vitrine':              0,
+               }
+
+        self.location(url, urllib.urlencode(data))
+
+        assert self.is_on_page(LoginPage)
+
+        # Then, post the password.
+        self.page.login(self.password)
+
+        # The result of POST is the destination URL.
+        url = self.page.get_result_url()
+
+        self.location(url)
+
+        if self.is_on_page(LoginErrorPage) or not self.is_logged():
+            raise BrowserIncorrectPassword()
+
+        assert self.is_on_page(AccountsPage)
+
+        # Store the current url to go back when requesting accounts list.
+        self.accounts_url = self.page.url
+
+    def get_accounts_list(self):
+        if not self.is_on_page(AccountsPage):
+            self.location(self.accounts_url)
+        return self.page.get_list()
+
+    def get_account(self, id):
+        assert isinstance(id, basestring)
+
+        l = self.get_accounts_list()
+        for a in l:
+            if a.id == ('%s' % id):
+                return a
+
+        return None
+
+    def get_history(self, account):
+        # some accounts may exist without a link to any history page
+        if account._link is None:
+            return
+
+        self.location(account._link)
+        url = account._link
+        date_guesser = LinearDateGuesser()
+
+        while url:
+            self.location(url)
+            assert self.is_on_page(TransactionsPage)
+
+            for tr in self.page.get_history(date_guesser):
+                yield tr
+
+            url = self.page.get_next_url()
--- a/modules/cragr/web/pages.py
+++ b/modules/cragr/web/pages.py
@ -0,0 +1,173 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2013  Romain Bignon
+#
+# This file is part of weboob.
+#
+# weboob is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# weboob is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with weboob. If not, see <http://www.gnu.org/licenses/>.
+
+import re
+from decimal import Decimal
+
+from weboob.capabilities.bank import Account
+from weboob.tools.browser import BasePage
+from weboob.tools.capabilities.bank.transactions import FrenchTransaction as Transaction
+
+
+__all__ = ['HomePage', 'LoginPage', 'LoginErrorPage', 'AccountsPage', 'TransactionsPage']
+
+
+class HomePage(BasePage):
+    def get_post_url(self):
+        for script in self.document.xpath('//script'):
+            text = script.text
+            if text is None:
+                continue
+
+            m = re.search(r'var chemin = "([^"]+)"', text, re.MULTILINE)
+            if m:
+                return m.group(1)
+
+        return None
+
+class LoginPage(BasePage):
+    def login(self, password):
+        assert password.isdigit()
+        assert len(password) == 6
+
+        imgmap = {}
+        for td in self.document.xpath('//table[@id="pave-saisie-code"]/tr/td'):
+            a = td.find('a')
+            num = a.text.strip()
+            if num.isdigit():
+                imgmap[num] = int(a.attrib['tabindex']) - 1
+
+        self.browser.select_form(name='formulaire')
+        self.browser.set_all_readonly(False)
+        self.browser['CCCRYC'] = ','.join(['%02d' % imgmap[c] for c in password])
+        self.browser['CCCRYC2'] = '0' * len(password)
+        self.browser.submit(nologin=True)
+
+    def get_result_url(self):
+        return self.parser.tocleanstring(self.document.getroot())
+
+class LoginErrorPage(BasePage):
+    pass
+
+class AccountsPage(BasePage):
+    COL_LABEL    = 0
+    COL_ID       = 2
+    COL_VALUE    = 4
+    COL_CURRENCY = 5
+
+    def get_list(self):
+        for tr in self.document.xpath('//table[@class="ca-table"]/tr'):
+            if not tr.attrib.get('class', '').startswith('colcelligne'):
+                continue
+
+            cols = tr.findall('td')
+
+            account = Account()
+            account.id = self.parser.tocleanstring(cols[self.COL_ID])
+            account.label = self.parser.tocleanstring(cols[self.COL_LABEL])
+            account.balance = Decimal(Transaction.clean_amount(self.parser.tocleanstring(cols[self.COL_VALUE])))
+            account.currency = account.get_currency(self.parser.tocleanstring(cols[self.COL_CURRENCY]))
+            account._link = None
+
+            a = cols[0].find('a')
+            if a is not None:
+                account._link = a.attrib['href'].replace(' ', '%20')
+
+            yield account
+
+
+class TransactionsPage(BasePage):
+    def get_next_url(self):
+        links = self.document.xpath('//span[@class="pager"]/a[@class="liennavigationcorpspage"]')
+        if len(links) < 1:
+            return None
+
+        img = links[-1].find('img')
+        if img.attrib.get('alt', '') == 'Page suivante':
+            return links[-1].attrib['href']
+
+        return None
+
+    COL_DATE  = 0
+    COL_TEXT  = 1
+    COL_VALUE = -1
+
+    TYPES = {'Paiement Par Carte':          Transaction.TYPE_CARD,
+             'Retrait Au Distributeur':     Transaction.TYPE_WITHDRAWAL,
+             'Frais':                       Transaction.TYPE_BANK,
+             'Cotisation':                  Transaction.TYPE_BANK,
+             'Virement Emis':               Transaction.TYPE_TRANSFER,
+             'Virement':                    Transaction.TYPE_TRANSFER,
+             'Cheque Emis':                 Transaction.TYPE_CHECK,
+             'Remise De Cheque':            Transaction.TYPE_DEPOSIT,
+             'Prelevement':                 Transaction.TYPE_ORDER,
+            }
+
+    def get_history(self, date_guesser):
+        i = 0
+        for tr in self.document.xpath('//table[@class="ca-table"]/tr'):
+            if not tr.attrib.get('class', '').startswith('ligne-'):
+                continue
+
+            # On loan accounts, there is a ca-table with a summary. Skip it.
+            if tr.find('th') is not None:
+                continue
+
+            t = Transaction(i)
+
+            cols = tr.findall('td')
+            date = self.parser.tocleanstring(cols[self.COL_DATE])
+            raw = self.parser.tocleanstring(cols[self.COL_TEXT])
+            value = self.parser.tocleanstring(cols[self.COL_VALUE])
+
+            day, month = map(int, date.split('/', 1))
+            t.date = date_guesser.guess_date(day, month)
+            t.rdate = t.date
+            t.raw = raw
+
+            # On some accounts' history page, there is a <font> tag in columns.
+            col_text = cols[self.COL_TEXT]
+            if col_text.find('font') is not None:
+                col_text = col_text.find('font')
+
+            t.category = unicode(col_text.text.strip())
+            t.label = col_text.find('br').tail
+            if t.label is not None:
+                t.label = t.label.strip()
+            else:
+                # If there is only one line, try to separate category from label.
+                t.label = re.sub('(.*)  (.*)', r'\2', t.category).strip()
+            # Sometimes, the category contains the label, even if there is another line with it again.
+            t.category = re.sub('(.*)  .*', r'\1', t.category).strip()
+
+            # Parse operation date in label (for card transactions for example)
+            m = re.match('(.*) (\d{2})/(\d{2})$', t.label)
+            if m:
+                t.rdate = date_guesser.guess_date(int(m.group(2)), int(m.group(3)), change_current_date=False)
+                t.label = m.group(1).strip()
+
+            # Strip city or other useless information from label.
+            t.label = re.sub('(.*)  .*', r'\1', t.label).strip()
+
+            t.type = self.TYPES.get(t.category, t.TYPE_UNKNOWN)
+
+            t.set_amount(value)
+            yield t
+
+            i += 1