diff --git a/modules/cragr/backend.py b/modules/cragr/backend.py index 8cbe22de..48cc5b0e 100644 --- a/modules/cragr/backend.py +++ b/modules/cragr/backend.py @@ -23,7 +23,8 @@ from weboob.tools.backend import BaseBackend, BackendConfig from weboob.tools.ordereddict import OrderedDict from weboob.tools.value import ValueBackendPassword, Value -from .browser import Cragr +from .web.browser import Cragr +from .mobile.browser import CragrMobile __all__ = ['CragrBackend'] @@ -79,9 +80,16 @@ class CragrBackend(BaseBackend, ICapBank): BROWSER = Cragr def create_default_browser(self): - return self.create_browser(self.config['website'].get(), - self.config['login'].get(), - self.config['password'].get()) + try: + return self.create_browser(self.config['website'].get(), + self.config['login'].get(), + self.config['password'].get()) + except Cragr.WebsiteNotSupported: + self.logger.debug('falling-back on mobile version') + self.BROWSER = CragrMobile + return self.create_browser(self.config['website'].get(), + self.config['login'].get(), + self.config['password'].get()) def iter_accounts(self): return self.browser.get_accounts_list() diff --git a/modules/cragr/mobile/__init__.py b/modules/cragr/mobile/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/modules/cragr/browser.py b/modules/cragr/mobile/browser.py similarity index 99% rename from modules/cragr/browser.py rename to modules/cragr/mobile/browser.py index db23d0f3..97784210 100644 --- a/modules/cragr/browser.py +++ b/modules/cragr/mobile/browser.py @@ -27,7 +27,10 @@ from datetime import datetime import re -class Cragr(BaseBrowser): +__all__ = ['CragrMobile'] + + +class CragrMobile(BaseBrowser): PROTOCOL = 'https' ENCODING = 'utf-8' USER_AGENT = BaseBrowser.USER_AGENTS['wget'] diff --git a/modules/cragr/pages/__init__.py b/modules/cragr/mobile/pages/__init__.py similarity index 100% rename from modules/cragr/pages/__init__.py rename to modules/cragr/mobile/pages/__init__.py diff --git a/modules/cragr/pages/accounts_list.py b/modules/cragr/mobile/pages/accounts_list.py similarity index 100% rename from modules/cragr/pages/accounts_list.py rename to modules/cragr/mobile/pages/accounts_list.py diff --git a/modules/cragr/pages/base.py b/modules/cragr/mobile/pages/base.py similarity index 100% rename from modules/cragr/pages/base.py rename to modules/cragr/mobile/pages/base.py diff --git a/modules/cragr/pages/login.py b/modules/cragr/mobile/pages/login.py similarity index 100% rename from modules/cragr/pages/login.py rename to modules/cragr/mobile/pages/login.py diff --git a/modules/cragr/pages/tokenextractor.py b/modules/cragr/mobile/pages/tokenextractor.py similarity index 100% rename from modules/cragr/pages/tokenextractor.py rename to modules/cragr/mobile/pages/tokenextractor.py diff --git a/modules/cragr/web/__init__.py b/modules/cragr/web/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/modules/cragr/web/browser.py b/modules/cragr/web/browser.py new file mode 100644 index 00000000..620afe84 --- /dev/null +++ b/modules/cragr/web/browser.py @@ -0,0 +1,145 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2013 Romain Bignon +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +import urllib +import re + +from weboob.tools.browser import BaseBrowser, BrowserIncorrectPassword +from weboob.tools.date import LinearDateGuesser + +from .pages import HomePage, LoginPage, LoginErrorPage, AccountsPage, TransactionsPage + + +__all__ = ['Cragr'] + + +class Cragr(BaseBrowser): + PROTOCOL = 'https' + ENCODING = 'ISO-8859-1' + + PAGES = {'https?://[^/]+/': HomePage, + 'https?://[^/]+/stb/entreeBam': LoginPage, + 'https?://[^/]+/stb/entreeBam\?.*act=Synthcomptes': AccountsPage, + 'https?://[^/]+/stb/collecteNI\?.*act=Releves.*': TransactionsPage, + 'https?://[^/]+/stb/collecteNI\?.*sessionAPP=Releves.*': TransactionsPage, + 'https?://[^/]+/stb/.*/erreur/.*': LoginErrorPage, + } + + class WebsiteNotSupported(Exception): + pass + + def __init__(self, website, *args, **kwargs): + self.DOMAIN = re.sub('^m\.', 'www.', website) + self.accounts_url = None + BaseBrowser.__init__(self, *args, **kwargs) + + def home(self): + self.login() + + def is_logged(self): + return self.page is not None and not self.is_on_page(HomePage) + + def login(self): + """ + Attempt to log in. + Note: this method does nothing if we are already logged in. + """ + assert isinstance(self.username, basestring) + assert isinstance(self.password, basestring) + + # Do we really need to login? + if self.is_logged(): + self.logger.debug('already logged in') + return + + if not self.is_on_page(HomePage): + self.location(self.absurl('/'), no_login=True) + + # On the homepage, we get the URL of the auth service. + url = self.page.get_post_url() + if url is None: + raise self.WebsiteNotSupported() + + # First, post account number to get the password prompt. + data = {'CCPTE': self.username.encode(self.ENCODING), + 'canal': 'WEB', + 'hauteur_ecran': 768, + 'largeur_ecran': 1024, + 'liberror': '', + 'matrice': 'true', + 'origine': 'vitrine', + 'situationTravail': 'BANCAIRE', + 'typeAuthentification': 'CLIC_ALLER', + 'urlOrigine': self.page.url, + 'vitrine': 0, + } + + self.location(url, urllib.urlencode(data)) + + assert self.is_on_page(LoginPage) + + # Then, post the password. + self.page.login(self.password) + + # The result of POST is the destination URL. + url = self.page.get_result_url() + + self.location(url) + + if self.is_on_page(LoginErrorPage) or not self.is_logged(): + raise BrowserIncorrectPassword() + + assert self.is_on_page(AccountsPage) + + # Store the current url to go back when requesting accounts list. + self.accounts_url = self.page.url + + def get_accounts_list(self): + if not self.is_on_page(AccountsPage): + self.location(self.accounts_url) + return self.page.get_list() + + def get_account(self, id): + assert isinstance(id, basestring) + + l = self.get_accounts_list() + for a in l: + if a.id == ('%s' % id): + return a + + return None + + def get_history(self, account): + # some accounts may exist without a link to any history page + if account._link is None: + return + + self.location(account._link) + url = account._link + date_guesser = LinearDateGuesser() + + while url: + self.location(url) + assert self.is_on_page(TransactionsPage) + + for tr in self.page.get_history(date_guesser): + yield tr + + url = self.page.get_next_url() diff --git a/modules/cragr/web/pages.py b/modules/cragr/web/pages.py new file mode 100644 index 00000000..09b15e8c --- /dev/null +++ b/modules/cragr/web/pages.py @@ -0,0 +1,173 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2013 Romain Bignon +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + +import re +from decimal import Decimal + +from weboob.capabilities.bank import Account +from weboob.tools.browser import BasePage +from weboob.tools.capabilities.bank.transactions import FrenchTransaction as Transaction + + +__all__ = ['HomePage', 'LoginPage', 'LoginErrorPage', 'AccountsPage', 'TransactionsPage'] + + +class HomePage(BasePage): + def get_post_url(self): + for script in self.document.xpath('//script'): + text = script.text + if text is None: + continue + + m = re.search(r'var chemin = "([^"]+)"', text, re.MULTILINE) + if m: + return m.group(1) + + return None + +class LoginPage(BasePage): + def login(self, password): + assert password.isdigit() + assert len(password) == 6 + + imgmap = {} + for td in self.document.xpath('//table[@id="pave-saisie-code"]/tr/td'): + a = td.find('a') + num = a.text.strip() + if num.isdigit(): + imgmap[num] = int(a.attrib['tabindex']) - 1 + + self.browser.select_form(name='formulaire') + self.browser.set_all_readonly(False) + self.browser['CCCRYC'] = ','.join(['%02d' % imgmap[c] for c in password]) + self.browser['CCCRYC2'] = '0' * len(password) + self.browser.submit(nologin=True) + + def get_result_url(self): + return self.parser.tocleanstring(self.document.getroot()) + +class LoginErrorPage(BasePage): + pass + +class AccountsPage(BasePage): + COL_LABEL = 0 + COL_ID = 2 + COL_VALUE = 4 + COL_CURRENCY = 5 + + def get_list(self): + for tr in self.document.xpath('//table[@class="ca-table"]/tr'): + if not tr.attrib.get('class', '').startswith('colcelligne'): + continue + + cols = tr.findall('td') + + account = Account() + account.id = self.parser.tocleanstring(cols[self.COL_ID]) + account.label = self.parser.tocleanstring(cols[self.COL_LABEL]) + account.balance = Decimal(Transaction.clean_amount(self.parser.tocleanstring(cols[self.COL_VALUE]))) + account.currency = account.get_currency(self.parser.tocleanstring(cols[self.COL_CURRENCY])) + account._link = None + + a = cols[0].find('a') + if a is not None: + account._link = a.attrib['href'].replace(' ', '%20') + + yield account + + +class TransactionsPage(BasePage): + def get_next_url(self): + links = self.document.xpath('//span[@class="pager"]/a[@class="liennavigationcorpspage"]') + if len(links) < 1: + return None + + img = links[-1].find('img') + if img.attrib.get('alt', '') == 'Page suivante': + return links[-1].attrib['href'] + + return None + + COL_DATE = 0 + COL_TEXT = 1 + COL_VALUE = -1 + + TYPES = {'Paiement Par Carte': Transaction.TYPE_CARD, + 'Retrait Au Distributeur': Transaction.TYPE_WITHDRAWAL, + 'Frais': Transaction.TYPE_BANK, + 'Cotisation': Transaction.TYPE_BANK, + 'Virement Emis': Transaction.TYPE_TRANSFER, + 'Virement': Transaction.TYPE_TRANSFER, + 'Cheque Emis': Transaction.TYPE_CHECK, + 'Remise De Cheque': Transaction.TYPE_DEPOSIT, + 'Prelevement': Transaction.TYPE_ORDER, + } + + def get_history(self, date_guesser): + i = 0 + for tr in self.document.xpath('//table[@class="ca-table"]/tr'): + if not tr.attrib.get('class', '').startswith('ligne-'): + continue + + # On loan accounts, there is a ca-table with a summary. Skip it. + if tr.find('th') is not None: + continue + + t = Transaction(i) + + cols = tr.findall('td') + date = self.parser.tocleanstring(cols[self.COL_DATE]) + raw = self.parser.tocleanstring(cols[self.COL_TEXT]) + value = self.parser.tocleanstring(cols[self.COL_VALUE]) + + day, month = map(int, date.split('/', 1)) + t.date = date_guesser.guess_date(day, month) + t.rdate = t.date + t.raw = raw + + # On some accounts' history page, there is a tag in columns. + col_text = cols[self.COL_TEXT] + if col_text.find('font') is not None: + col_text = col_text.find('font') + + t.category = unicode(col_text.text.strip()) + t.label = col_text.find('br').tail + if t.label is not None: + t.label = t.label.strip() + else: + # If there is only one line, try to separate category from label. + t.label = re.sub('(.*) (.*)', r'\2', t.category).strip() + # Sometimes, the category contains the label, even if there is another line with it again. + t.category = re.sub('(.*) .*', r'\1', t.category).strip() + + # Parse operation date in label (for card transactions for example) + m = re.match('(.*) (\d{2})/(\d{2})$', t.label) + if m: + t.rdate = date_guesser.guess_date(int(m.group(2)), int(m.group(3)), change_current_date=False) + t.label = m.group(1).strip() + + # Strip city or other useless information from label. + t.label = re.sub('(.*) .*', r'\1', t.label).strip() + + t.type = self.TYPES.get(t.category, t.TYPE_UNKNOWN) + + t.set_amount(value) + yield t + + i += 1