support new Crédit Agricole website

2013-03-18 15:52:36 +01:00 · 2013-03-18 15:52:36 +01:00 · 240efb85f4
commit 240efb85f4
parent 832119fc3a
11 changed files with 334 additions and 5 deletions
--- a/modules/cragr/backend.py
+++ b/modules/cragr/backend.py
@ -23,7 +23,8 @@ from weboob.tools.backend import BaseBackend, BackendConfig
 from weboob.tools.ordereddict import OrderedDict
 from weboob.tools.value import ValueBackendPassword, Value
-from .browser import Cragr
+from .web.browser import Cragr
 from .mobile.browser import CragrMobile
 __all__ = ['CragrBackend']
@ -79,9 +80,16 @@ class CragrBackend(BaseBackend, ICapBank):
    BROWSER = Cragr
    def create_default_browser(self):
-        return self.create_browser(self.config['website'].get(),
+        try:
-                                   self.config['login'].get(),
+            return self.create_browser(self.config['website'].get(),
-                                   self.config['password'].get())
+                                       self.config['login'].get(),
                                       self.config['password'].get())
        except Cragr.WebsiteNotSupported:
            self.logger.debug('falling-back on mobile version')
            self.BROWSER = CragrMobile
            return self.create_browser(self.config['website'].get(),
                                       self.config['login'].get(),
                                       self.config['password'].get())
    def iter_accounts(self):
        return self.browser.get_accounts_list()
--- a/modules/cragr/mobile/init.py
+++ b/modules/cragr/mobile/init.py
--- a/modules/cragr/mobile/browser.py
+++ b/modules/cragr/mobile/browser.py
@ -27,7 +27,10 @@ from datetime import datetime
 import re
-class Cragr(BaseBrowser):
+__all__ = ['CragrMobile']
 class CragrMobile(BaseBrowser):
    PROTOCOL = 'https'
    ENCODING = 'utf-8'
    USER_AGENT = BaseBrowser.USER_AGENTS['wget']
--- a/modules/cragr/mobile/pages/init.py
+++ b/modules/cragr/mobile/pages/init.py
--- a/modules/cragr/mobile/pages/accounts_list.py
+++ b/modules/cragr/mobile/pages/accounts_list.py
--- a/modules/cragr/mobile/pages/base.py
+++ b/modules/cragr/mobile/pages/base.py
--- a/modules/cragr/mobile/pages/login.py
+++ b/modules/cragr/mobile/pages/login.py
--- a/modules/cragr/mobile/pages/tokenextractor.py
+++ b/modules/cragr/mobile/pages/tokenextractor.py
--- a/modules/cragr/web/init.py
+++ b/modules/cragr/web/init.py
--- a/modules/cragr/web/browser.py
+++ b/modules/cragr/web/browser.py
@ -0,0 +1,145 @@
 # -*- coding: utf-8 -*-
 # Copyright(C) 2013  Romain Bignon
 #
 # This file is part of weboob.
 #
 # weboob is free software: you can redistribute it and/or modify
 # it under the terms of the GNU Affero General Public License as published by
 # the Free Software Foundation, either version 3 of the License, or
 # (at your option) any later version.
 #
 # weboob is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 # GNU Affero General Public License for more details.
 #
 # You should have received a copy of the GNU Affero General Public License
 # along with weboob. If not, see <http://www.gnu.org/licenses/>.
 import urllib
 import re
 from weboob.tools.browser import BaseBrowser, BrowserIncorrectPassword
 from weboob.tools.date import LinearDateGuesser
 from .pages import HomePage, LoginPage, LoginErrorPage, AccountsPage, TransactionsPage
 __all__ = ['Cragr']
 class Cragr(BaseBrowser):
    PROTOCOL = 'https'
    ENCODING = 'ISO-8859-1'
    PAGES = {'https?://[^/]+/':                                     HomePage,
             'https?://[^/]+/stb/entreeBam':                        LoginPage,
             'https?://[^/]+/stb/entreeBam\?.*act=Synthcomptes':    AccountsPage,
             'https?://[^/]+/stb/collecteNI\?.*act=Releves.*':      TransactionsPage,
             'https?://[^/]+/stb/collecteNI\?.*sessionAPP=Releves.*': TransactionsPage,
             'https?://[^/]+/stb/.*/erreur/.*':                     LoginErrorPage,
            }
    class WebsiteNotSupported(Exception):
        pass
    def __init__(self, website, *args, **kwargs):
        self.DOMAIN = re.sub('^m\.', 'www.', website)
        self.accounts_url = None
        BaseBrowser.__init__(self, *args, **kwargs)
    def home(self):
        self.login()
    def is_logged(self):
        return self.page is not None and not self.is_on_page(HomePage)
    def login(self):
        """
        Attempt to log in.
        Note: this method does nothing if we are already logged in.
        """
        assert isinstance(self.username, basestring)
        assert isinstance(self.password, basestring)
        # Do we really need to login?
        if self.is_logged():
            self.logger.debug('already logged in')
            return
        if not self.is_on_page(HomePage):
            self.location(self.absurl('/'), no_login=True)
        # On the homepage, we get the URL of the auth service.
        url = self.page.get_post_url()
        if url is None:
            raise self.WebsiteNotSupported()
        # First, post account number to get the password prompt.
        data = {'CCPTE':                self.username.encode(self.ENCODING),
                'canal':                'WEB',
                'hauteur_ecran':        768,
                'largeur_ecran':        1024,
                'liberror':             '',
                'matrice':              'true',
                'origine':              'vitrine',
                'situationTravail':     'BANCAIRE',
                'typeAuthentification': 'CLIC_ALLER',
                'urlOrigine':           self.page.url,
                'vitrine':              0,
               }
        self.location(url, urllib.urlencode(data))
        assert self.is_on_page(LoginPage)
        # Then, post the password.
        self.page.login(self.password)
        # The result of POST is the destination URL.
        url = self.page.get_result_url()
        self.location(url)
        if self.is_on_page(LoginErrorPage) or not self.is_logged():
            raise BrowserIncorrectPassword()
        assert self.is_on_page(AccountsPage)
        # Store the current url to go back when requesting accounts list.
        self.accounts_url = self.page.url
    def get_accounts_list(self):
        if not self.is_on_page(AccountsPage):
            self.location(self.accounts_url)
        return self.page.get_list()
    def get_account(self, id):
        assert isinstance(id, basestring)
        l = self.get_accounts_list()
        for a in l:
            if a.id == ('%s' % id):
                return a
        return None
    def get_history(self, account):
        # some accounts may exist without a link to any history page
        if account._link is None:
            return
        self.location(account._link)
        url = account._link
        date_guesser = LinearDateGuesser()
        while url:
            self.location(url)
            assert self.is_on_page(TransactionsPage)
            for tr in self.page.get_history(date_guesser):
                yield tr
            url = self.page.get_next_url()
--- a/modules/cragr/web/pages.py
+++ b/modules/cragr/web/pages.py
@ -0,0 +1,173 @@
 # -*- coding: utf-8 -*-
 # Copyright(C) 2013  Romain Bignon
 #
 # This file is part of weboob.
 #
 # weboob is free software: you can redistribute it and/or modify
 # it under the terms of the GNU Affero General Public License as published by
 # the Free Software Foundation, either version 3 of the License, or
 # (at your option) any later version.
 #
 # weboob is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 # GNU Affero General Public License for more details.
 #
 # You should have received a copy of the GNU Affero General Public License
 # along with weboob. If not, see <http://www.gnu.org/licenses/>.
 import re
 from decimal import Decimal
 from weboob.capabilities.bank import Account
 from weboob.tools.browser import BasePage
 from weboob.tools.capabilities.bank.transactions import FrenchTransaction as Transaction
 __all__ = ['HomePage', 'LoginPage', 'LoginErrorPage', 'AccountsPage', 'TransactionsPage']
 class HomePage(BasePage):
    def get_post_url(self):
        for script in self.document.xpath('//script'):
            text = script.text
            if text is None:
                continue
            m = re.search(r'var chemin = "([^"]+)"', text, re.MULTILINE)
            if m:
                return m.group(1)
        return None
 class LoginPage(BasePage):
    def login(self, password):
        assert password.isdigit()
        assert len(password) == 6
        imgmap = {}
        for td in self.document.xpath('//table[@id="pave-saisie-code"]/tr/td'):
            a = td.find('a')
            num = a.text.strip()
            if num.isdigit():
                imgmap[num] = int(a.attrib['tabindex']) - 1
        self.browser.select_form(name='formulaire')
        self.browser.set_all_readonly(False)
        self.browser['CCCRYC'] = ','.join(['%02d' % imgmap[c] for c in password])
        self.browser['CCCRYC2'] = '0' * len(password)
        self.browser.submit(nologin=True)
    def get_result_url(self):
        return self.parser.tocleanstring(self.document.getroot())
 class LoginErrorPage(BasePage):
    pass
 class AccountsPage(BasePage):
    COL_LABEL    = 0
    COL_ID       = 2
    COL_VALUE    = 4
    COL_CURRENCY = 5
    def get_list(self):
        for tr in self.document.xpath('//table[@class="ca-table"]/tr'):
            if not tr.attrib.get('class', '').startswith('colcelligne'):
                continue
            cols = tr.findall('td')
            account = Account()
            account.id = self.parser.tocleanstring(cols[self.COL_ID])
            account.label = self.parser.tocleanstring(cols[self.COL_LABEL])
            account.balance = Decimal(Transaction.clean_amount(self.parser.tocleanstring(cols[self.COL_VALUE])))
            account.currency = account.get_currency(self.parser.tocleanstring(cols[self.COL_CURRENCY]))
            account._link = None
            a = cols[0].find('a')
            if a is not None:
                account._link = a.attrib['href'].replace(' ', '%20')
            yield account
 class TransactionsPage(BasePage):
    def get_next_url(self):
        links = self.document.xpath('//span[@class="pager"]/a[@class="liennavigationcorpspage"]')
        if len(links) < 1:
            return None
        img = links[-1].find('img')
        if img.attrib.get('alt', '') == 'Page suivante':
            return links[-1].attrib['href']
        return None
    COL_DATE  = 0
    COL_TEXT  = 1
    COL_VALUE = -1
    TYPES = {'Paiement Par Carte':          Transaction.TYPE_CARD,
             'Retrait Au Distributeur':     Transaction.TYPE_WITHDRAWAL,
             'Frais':                       Transaction.TYPE_BANK,
             'Cotisation':                  Transaction.TYPE_BANK,
             'Virement Emis':               Transaction.TYPE_TRANSFER,
             'Virement':                    Transaction.TYPE_TRANSFER,
             'Cheque Emis':                 Transaction.TYPE_CHECK,
             'Remise De Cheque':            Transaction.TYPE_DEPOSIT,
             'Prelevement':                 Transaction.TYPE_ORDER,
            }
    def get_history(self, date_guesser):
        i = 0
        for tr in self.document.xpath('//table[@class="ca-table"]/tr'):
            if not tr.attrib.get('class', '').startswith('ligne-'):
                continue
            # On loan accounts, there is a ca-table with a summary. Skip it.
            if tr.find('th') is not None:
                continue
            t = Transaction(i)
            cols = tr.findall('td')
            date = self.parser.tocleanstring(cols[self.COL_DATE])
            raw = self.parser.tocleanstring(cols[self.COL_TEXT])
            value = self.parser.tocleanstring(cols[self.COL_VALUE])
            day, month = map(int, date.split('/', 1))
            t.date = date_guesser.guess_date(day, month)
            t.rdate = t.date
            t.raw = raw
            # On some accounts' history page, there is a <font> tag in columns.
            col_text = cols[self.COL_TEXT]
            if col_text.find('font') is not None:
                col_text = col_text.find('font')
            t.category = unicode(col_text.text.strip())
            t.label = col_text.find('br').tail
            if t.label is not None:
                t.label = t.label.strip()
            else:
                # If there is only one line, try to separate category from label.
                t.label = re.sub('(.*)  (.*)', r'\2', t.category).strip()
            # Sometimes, the category contains the label, even if there is another line with it again.
            t.category = re.sub('(.*)  .*', r'\1', t.category).strip()
            # Parse operation date in label (for card transactions for example)
            m = re.match('(.*) (\d{2})/(\d{2})$', t.label)
            if m:
                t.rdate = date_guesser.guess_date(int(m.group(2)), int(m.group(3)), change_current_date=False)
                t.label = m.group(1).strip()
            # Strip city or other useless information from label.
            t.label = re.sub('(.*)  .*', r'\1', t.label).strip()
            t.type = self.TYPES.get(t.category, t.TYPE_UNKNOWN)
            t.set_amount(value)
            yield t
            i += 1