support new Crédit Agricole website

This commit is contained in:
Romain Bignon 2013-03-18 15:52:36 +01:00
commit 240efb85f4
11 changed files with 334 additions and 5 deletions

View file

@ -23,7 +23,8 @@ from weboob.tools.backend import BaseBackend, BackendConfig
from weboob.tools.ordereddict import OrderedDict
from weboob.tools.value import ValueBackendPassword, Value
from .browser import Cragr
from .web.browser import Cragr
from .mobile.browser import CragrMobile
__all__ = ['CragrBackend']
@ -79,9 +80,16 @@ class CragrBackend(BaseBackend, ICapBank):
BROWSER = Cragr
def create_default_browser(self):
return self.create_browser(self.config['website'].get(),
self.config['login'].get(),
self.config['password'].get())
try:
return self.create_browser(self.config['website'].get(),
self.config['login'].get(),
self.config['password'].get())
except Cragr.WebsiteNotSupported:
self.logger.debug('falling-back on mobile version')
self.BROWSER = CragrMobile
return self.create_browser(self.config['website'].get(),
self.config['login'].get(),
self.config['password'].get())
def iter_accounts(self):
return self.browser.get_accounts_list()

View file

View file

@ -27,7 +27,10 @@ from datetime import datetime
import re
class Cragr(BaseBrowser):
__all__ = ['CragrMobile']
class CragrMobile(BaseBrowser):
PROTOCOL = 'https'
ENCODING = 'utf-8'
USER_AGENT = BaseBrowser.USER_AGENTS['wget']

View file

View file

@ -0,0 +1,145 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2013 Romain Bignon
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
import urllib
import re
from weboob.tools.browser import BaseBrowser, BrowserIncorrectPassword
from weboob.tools.date import LinearDateGuesser
from .pages import HomePage, LoginPage, LoginErrorPage, AccountsPage, TransactionsPage
__all__ = ['Cragr']
class Cragr(BaseBrowser):
PROTOCOL = 'https'
ENCODING = 'ISO-8859-1'
PAGES = {'https?://[^/]+/': HomePage,
'https?://[^/]+/stb/entreeBam': LoginPage,
'https?://[^/]+/stb/entreeBam\?.*act=Synthcomptes': AccountsPage,
'https?://[^/]+/stb/collecteNI\?.*act=Releves.*': TransactionsPage,
'https?://[^/]+/stb/collecteNI\?.*sessionAPP=Releves.*': TransactionsPage,
'https?://[^/]+/stb/.*/erreur/.*': LoginErrorPage,
}
class WebsiteNotSupported(Exception):
pass
def __init__(self, website, *args, **kwargs):
self.DOMAIN = re.sub('^m\.', 'www.', website)
self.accounts_url = None
BaseBrowser.__init__(self, *args, **kwargs)
def home(self):
self.login()
def is_logged(self):
return self.page is not None and not self.is_on_page(HomePage)
def login(self):
"""
Attempt to log in.
Note: this method does nothing if we are already logged in.
"""
assert isinstance(self.username, basestring)
assert isinstance(self.password, basestring)
# Do we really need to login?
if self.is_logged():
self.logger.debug('already logged in')
return
if not self.is_on_page(HomePage):
self.location(self.absurl('/'), no_login=True)
# On the homepage, we get the URL of the auth service.
url = self.page.get_post_url()
if url is None:
raise self.WebsiteNotSupported()
# First, post account number to get the password prompt.
data = {'CCPTE': self.username.encode(self.ENCODING),
'canal': 'WEB',
'hauteur_ecran': 768,
'largeur_ecran': 1024,
'liberror': '',
'matrice': 'true',
'origine': 'vitrine',
'situationTravail': 'BANCAIRE',
'typeAuthentification': 'CLIC_ALLER',
'urlOrigine': self.page.url,
'vitrine': 0,
}
self.location(url, urllib.urlencode(data))
assert self.is_on_page(LoginPage)
# Then, post the password.
self.page.login(self.password)
# The result of POST is the destination URL.
url = self.page.get_result_url()
self.location(url)
if self.is_on_page(LoginErrorPage) or not self.is_logged():
raise BrowserIncorrectPassword()
assert self.is_on_page(AccountsPage)
# Store the current url to go back when requesting accounts list.
self.accounts_url = self.page.url
def get_accounts_list(self):
if not self.is_on_page(AccountsPage):
self.location(self.accounts_url)
return self.page.get_list()
def get_account(self, id):
assert isinstance(id, basestring)
l = self.get_accounts_list()
for a in l:
if a.id == ('%s' % id):
return a
return None
def get_history(self, account):
# some accounts may exist without a link to any history page
if account._link is None:
return
self.location(account._link)
url = account._link
date_guesser = LinearDateGuesser()
while url:
self.location(url)
assert self.is_on_page(TransactionsPage)
for tr in self.page.get_history(date_guesser):
yield tr
url = self.page.get_next_url()

173
modules/cragr/web/pages.py Normal file
View file

@ -0,0 +1,173 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2013 Romain Bignon
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
import re
from decimal import Decimal
from weboob.capabilities.bank import Account
from weboob.tools.browser import BasePage
from weboob.tools.capabilities.bank.transactions import FrenchTransaction as Transaction
__all__ = ['HomePage', 'LoginPage', 'LoginErrorPage', 'AccountsPage', 'TransactionsPage']
class HomePage(BasePage):
def get_post_url(self):
for script in self.document.xpath('//script'):
text = script.text
if text is None:
continue
m = re.search(r'var chemin = "([^"]+)"', text, re.MULTILINE)
if m:
return m.group(1)
return None
class LoginPage(BasePage):
def login(self, password):
assert password.isdigit()
assert len(password) == 6
imgmap = {}
for td in self.document.xpath('//table[@id="pave-saisie-code"]/tr/td'):
a = td.find('a')
num = a.text.strip()
if num.isdigit():
imgmap[num] = int(a.attrib['tabindex']) - 1
self.browser.select_form(name='formulaire')
self.browser.set_all_readonly(False)
self.browser['CCCRYC'] = ','.join(['%02d' % imgmap[c] for c in password])
self.browser['CCCRYC2'] = '0' * len(password)
self.browser.submit(nologin=True)
def get_result_url(self):
return self.parser.tocleanstring(self.document.getroot())
class LoginErrorPage(BasePage):
pass
class AccountsPage(BasePage):
COL_LABEL = 0
COL_ID = 2
COL_VALUE = 4
COL_CURRENCY = 5
def get_list(self):
for tr in self.document.xpath('//table[@class="ca-table"]/tr'):
if not tr.attrib.get('class', '').startswith('colcelligne'):
continue
cols = tr.findall('td')
account = Account()
account.id = self.parser.tocleanstring(cols[self.COL_ID])
account.label = self.parser.tocleanstring(cols[self.COL_LABEL])
account.balance = Decimal(Transaction.clean_amount(self.parser.tocleanstring(cols[self.COL_VALUE])))
account.currency = account.get_currency(self.parser.tocleanstring(cols[self.COL_CURRENCY]))
account._link = None
a = cols[0].find('a')
if a is not None:
account._link = a.attrib['href'].replace(' ', '%20')
yield account
class TransactionsPage(BasePage):
def get_next_url(self):
links = self.document.xpath('//span[@class="pager"]/a[@class="liennavigationcorpspage"]')
if len(links) < 1:
return None
img = links[-1].find('img')
if img.attrib.get('alt', '') == 'Page suivante':
return links[-1].attrib['href']
return None
COL_DATE = 0
COL_TEXT = 1
COL_VALUE = -1
TYPES = {'Paiement Par Carte': Transaction.TYPE_CARD,
'Retrait Au Distributeur': Transaction.TYPE_WITHDRAWAL,
'Frais': Transaction.TYPE_BANK,
'Cotisation': Transaction.TYPE_BANK,
'Virement Emis': Transaction.TYPE_TRANSFER,
'Virement': Transaction.TYPE_TRANSFER,
'Cheque Emis': Transaction.TYPE_CHECK,
'Remise De Cheque': Transaction.TYPE_DEPOSIT,
'Prelevement': Transaction.TYPE_ORDER,
}
def get_history(self, date_guesser):
i = 0
for tr in self.document.xpath('//table[@class="ca-table"]/tr'):
if not tr.attrib.get('class', '').startswith('ligne-'):
continue
# On loan accounts, there is a ca-table with a summary. Skip it.
if tr.find('th') is not None:
continue
t = Transaction(i)
cols = tr.findall('td')
date = self.parser.tocleanstring(cols[self.COL_DATE])
raw = self.parser.tocleanstring(cols[self.COL_TEXT])
value = self.parser.tocleanstring(cols[self.COL_VALUE])
day, month = map(int, date.split('/', 1))
t.date = date_guesser.guess_date(day, month)
t.rdate = t.date
t.raw = raw
# On some accounts' history page, there is a <font> tag in columns.
col_text = cols[self.COL_TEXT]
if col_text.find('font') is not None:
col_text = col_text.find('font')
t.category = unicode(col_text.text.strip())
t.label = col_text.find('br').tail
if t.label is not None:
t.label = t.label.strip()
else:
# If there is only one line, try to separate category from label.
t.label = re.sub('(.*) (.*)', r'\2', t.category).strip()
# Sometimes, the category contains the label, even if there is another line with it again.
t.category = re.sub('(.*) .*', r'\1', t.category).strip()
# Parse operation date in label (for card transactions for example)
m = re.match('(.*) (\d{2})/(\d{2})$', t.label)
if m:
t.rdate = date_guesser.guess_date(int(m.group(2)), int(m.group(3)), change_current_date=False)
t.label = m.group(1).strip()
# Strip city or other useless information from label.
t.label = re.sub('(.*) .*', r'\1', t.label).strip()
t.type = self.TYPES.get(t.category, t.TYPE_UNKNOWN)
t.set_amount(value)
yield t
i += 1