upgrade to browser2

This commit is contained in:
Romain Bignon 2014-03-09 15:44:18 +01:00
commit 1b2d3cfe48
3 changed files with 233 additions and 275 deletions

View file

@ -59,16 +59,14 @@ class CreditMutuelBackend(BaseBackend, ICapBank):
raise AccountNotFound()
def iter_coming(self, account):
with self.browser:
for tr in self.browser.get_history(account):
if tr._is_coming:
yield tr
for tr in self.browser.get_history(account):
if tr._is_coming:
yield tr
def iter_history(self, account):
with self.browser:
for tr in self.browser.get_history(account):
if not tr._is_coming:
yield tr
for tr in self.browser.get_history(account):
if not tr._is_coming:
yield tr
def iter_transfer_recipients(self, ignored):
for account in self.browser.get_accounts_list():

View file

@ -21,11 +21,12 @@
from urlparse import urlsplit, parse_qsl, urlparse
from datetime import datetime, timedelta
from weboob.tools.browser import BaseBrowser, BrowserIncorrectPassword
from weboob.tools.browser2 import LoginBrowser, URL, Wget, need_login
from weboob.tools.browser import BrowserIncorrectPassword
from weboob.capabilities.bank import Transfer, TransferError
from .pages import LoginPage, LoginErrorPage, AccountsPage, UserSpacePage, EmptyPage, \
OperationsPage, CardPage, ComingPage, NoOperationsPage, InfoPage, \
from .pages import LoginPage, LoginErrorPage, AccountsPage, \
OperationsPage, CardPage, ComingPage, NoOperationsPage, \
TransfertPage, ChangePasswordPage, VerifCodePage
@ -33,88 +34,68 @@ __all__ = ['CreditMutuelBrowser']
# Browser
class CreditMutuelBrowser(BaseBrowser):
PROTOCOL = 'https'
DOMAIN = 'www.creditmutuel.fr'
CERTHASH = '57beeba81e7a65d5fe15853219bcfcc2b2da27e0e618a78e6d97a689908ea57b'
ENCODING = 'iso-8859-1'
USER_AGENT = BaseBrowser.USER_AGENTS['wget']
PAGES = {'https://www.creditmutuel.fr/groupe/fr/index.html': LoginPage,
'https://www.creditmutuel.fr/.*/fr/identification/default.cgi': LoginErrorPage,
'https://www.creditmutuel.fr/.*/fr/banque/situation_financiere.cgi': AccountsPage,
'https://www.creditmutuel.fr/.*/fr/banque/espace_personnel.aspx': UserSpacePage,
'https://www.creditmutuel.fr/.*/fr/banque/mouvements.cgi.*': OperationsPage,
'https://www.creditmutuel.fr/.*/fr/banque/mvts_instance.cgi.*': ComingPage,
'https://www.creditmutuel.fr/.*/fr/banque/nr/nr_devbooster.aspx.*': OperationsPage,
'https://www.creditmutuel.fr/.*/fr/banque/operations_carte\.cgi.*': CardPage,
'https://www.creditmutuel.fr/.*/fr/banque/CR/arrivee\.asp.*': NoOperationsPage,
'https://www.creditmutuel.fr/.*/fr/banque/BAD.*': InfoPage,
'https://www.creditmutuel.fr/.*/fr/banque/.*Vir.*': TransfertPage,
'https://www.creditmutuel.fr/.*/fr/validation/change_password.cgi': ChangePasswordPage,
'https://www.creditmutuel.fr/.*/fr/validation/verif_code.cgi.*': VerifCodePage,
'https://www.creditmutuel.fr/.*/fr/': EmptyPage,
'https://www.creditmutuel.fr/.*/fr/banques/index.html': EmptyPage,
'https://www.creditmutuel.fr/.*/fr/banque/paci_beware_of_phishing.html.*': EmptyPage,
'https://www.creditmutuel.fr/.*/fr/validation/(?!change_password|verif_code).*': EmptyPage,
}
class CreditMutuelBrowser(LoginBrowser):
PROFILE = Wget()
BASEURL = 'https://www.creditmutuel.fr'
login = URL('/groupe/fr/index.html', LoginPage)
login_error = URL('/(?P<subbank>.*)/fr/identification/default.cgi', LoginErrorPage)
accounts = URL('/(?P<subbank>.*)/fr/banque/situation_financiere.cgi', AccountsPage)
user_space = URL('/(?P<subbank>.*)/fr/banque/espace_personnel.aspx')
operations = URL('/(?P<subbank>.*)/fr/banque/mouvements.cgi.*',
'/(?P<subbank>.*)/fr/banque/nr/nr_devbooster.aspx.*',
OperationsPage)
coming = URL('/(?P<subbank>.*)/fr/banque/mvts_instance.cgi.*', ComingPage)
card = URL('/(?P<subbank>.*)/fr/banque/operations_carte.cgi.*', CardPage)
noop = URL('/(?P<subbank>.*)/fr/banque/CR/arrivee.asp.*', NoOperationsPage)
info = URL('/(?P<subbank>.*)/fr/banque/BAD.*')
transfert = URL('/(?P<subbank>.*)/fr/banque/WI_VPLV_VirUniSaiCpt.asp\?(?P<params>.*)', TransfertPage)
change_pass = URL('/(?P<subbank>.*)/fr/validation/change_password.cgi', ChangePasswordPage)
verify_pass = URL('/(?P<subbank>.*)/fr/validation/verif_code.cgi.*', VerifCodePage)
empty = URL('/(?P<subbank>.*)/fr/',
'/(?P<subbank>.*)/fr/banques/index.html',
'/(?P<subbank>.*)/fr/banque/paci_beware_of_phishing.html.*',
'/(?P<subbank>.*)/fr/validation/(?!change_password|verif_code).*',
)
currentSubBank = None
def is_logged(self):
return not self.is_on_page(LoginPage) and not self.is_on_page(LoginErrorPage)
def home(self):
return self.location('https://www.creditmutuel.fr/groupe/fr/index.html')
return self.login.go()
def login(self):
assert isinstance(self.username, basestring)
assert isinstance(self.password, basestring)
if not self.is_on_page(LoginPage):
self.location('https://www.creditmutuel.fr/', no_login=True)
def do_login(self):
self.login.stay_or_go()
self.page.login(self.username, self.password)
if not self.is_logged() or self.is_on_page(LoginErrorPage):
if not self.page.logged or self.login_error.is_here():
raise BrowserIncorrectPassword()
self.getCurrentSubBank()
@need_login
def get_accounts_list(self):
if not self.is_on_page(AccountsPage):
self.location('https://www.creditmutuel.fr/%s/fr/banque/situation_financiere.cgi' % self.currentSubBank)
return self.page.get_list()
return self.accounts.stay_or_go(subbank=self.currentSubBank).iter_accounts()
def get_account(self, id):
assert isinstance(id, basestring)
l = self.get_accounts_list()
for a in l:
for a in self.get_accounts_list():
if a.id == id:
return a
return None
def getCurrentSubBank(self):
# the account list and history urls depend on the sub bank of the user
url = urlparse(self.geturl())
url = urlparse(self.url)
self.currentSubBank = url.path.lstrip('/').split('/')[0]
def list_operations(self, page_url):
if page_url.startswith('/'):
self.location(page_url)
else:
self.location('https://%s/%s/fr/banque/%s' % (self.DOMAIN, self.currentSubBank, page_url))
self.location('%s/%s/fr/banque/%s' % (self.BASEURL, self.currentSubBank, page_url))
go_next = True
while go_next:
if not self.is_on_page(OperationsPage):
return
for op in self.page.get_history():
yield op
go_next = self.page.go_next()
return self.pagination(lambda: self.page.get_history())
def get_history(self, account):
transactions = []
@ -127,7 +108,7 @@ class CreditMutuelBrowser(BaseBrowser):
elif last_debit is None:
last_debit = (tr.date - timedelta(days=10)).month
coming_link = self.page.get_coming_link() if self.is_on_page(OperationsPage) else None
coming_link = self.page.get_coming_link() if self.operations.is_here() else None
if coming_link is not None:
for tr in self.list_operations(coming_link):
transactions.append(tr)
@ -152,44 +133,44 @@ class CreditMutuelBrowser(BaseBrowser):
def transfer(self, account, to, amount, reason=None):
# access the transfer page
transfert_url = 'WI_VPLV_VirUniSaiCpt.asp?RAZ=ALL&Cat=6&PERM=N&CHX=A'
self.location('https://%s/%s/fr/banque/%s' % (self.DOMAIN, self.currentSubBank, transfert_url))
params = 'RAZ=ALL&Cat=6&PERM=N&CHX=A'
page = self.transfert.go(subbank=self.currentSubBank, params=params)
# fill the form
self.select_form(name='FormVirUniSaiCpt')
self['IDB'] = [account[-1]]
self['ICR'] = [to[-1]]
self['MTTVIR'] = '%s' % str(amount).replace('.', ',')
form = self.page.get_form(name='FormVirUniSaiCpt')
form['IDB'] = account[-1]
form['ICR'] = to[-1]
form['MTTVIR'] = '%s' % str(amount).replace('.', ',')
if reason is not None:
self['LIBDBT'] = reason
self['LIBCRT'] = reason
self.submit()
form['LIBDBT'] = reason
form['LIBCRT'] = reason
page = form.submit()
# look for known errors
content = unicode(self.response().get_data(), self.ENCODING)
content = page.response.text
insufficient_amount_message = u'Montant insuffisant.'
maximum_allowed_balance_message = u'Solde maximum autorisé dépassé.'
if content.find(insufficient_amount_message) != -1:
if insufficient_amount_message in content:
raise TransferError('The amount you tried to transfer is too low.')
if content.find(maximum_allowed_balance_message) != -1:
if maximum_allowed_balance_message in content:
raise TransferError('The maximum allowed balance for the target account has been / would be reached.')
# look for the known "all right" message
ready_for_transfer_message = u'Confirmez un virement entre vos comptes'
if not content.find(ready_for_transfer_message):
if ready_for_transfer_message in content:
raise TransferError('The expected message "%s" was not found.' % ready_for_transfer_message)
# submit the confirmation form
self.select_form(name='FormVirUniCnf')
form = page.get_form(name='FormVirUniCnf')
submit_date = datetime.now()
self.submit()
page = form.submit()
# look for the known "everything went well" message
content = unicode(self.response().get_data(), self.ENCODING)
content = page.response.text
transfer_ok_message = u'Votre virement a été exécuté ce jour'
if not content.find(transfer_ok_message):
if not transfer_ok_message in content:
raise TransferError('The expected message "%s" was not found.' % transfer_ok_message)
# We now have to return a Transfer object

View file

@ -18,56 +18,50 @@
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
import urllib
from urlparse import urlparse, parse_qs
from decimal import Decimal
import re
from dateutil.relativedelta import relativedelta
from weboob.tools.browser import BasePage, BrowserIncorrectPassword, BrokenPageError
from weboob.tools.ordereddict import OrderedDict
from weboob.tools.browser2.page import HTMLPage, method, ListElement, ItemElement, SkipItem, FormNotFound, TableElement
from weboob.tools.browser2.filters import Filter, Env, CleanText, CleanDecimal, Link, TableCell
from weboob.tools.browser import BrowserIncorrectPassword
from weboob.capabilities import NotAvailable
from weboob.capabilities.bank import Account
from weboob.tools.capabilities.bank.transactions import FrenchTransaction
from weboob.tools.date import parse_french_date
class LoginPage(BasePage):
class LoggedPage(object):
logged = True
class LoginPage(HTMLPage):
def login(self, login, passwd):
self.browser.select_form(nr=0)
self.browser['_cm_user'] = login.encode(self.browser.ENCODING)
self.browser['_cm_pwd'] = passwd.encode(self.browser.ENCODING)
self.browser.submit(nologin=True)
form = self.get_form(nr=0)
form['_cm_user'] = login
form['_cm_pwd'] = passwd
form.submit()
class LoginErrorPage(BasePage):
class LoginErrorPage(HTMLPage):
pass
class ChangePasswordPage(BasePage):
def on_loaded(self):
class ChangePasswordPage(LoggedPage, HTMLPage):
def on_load(self):
raise BrowserIncorrectPassword('Please change your password')
class VerifCodePage(BasePage):
def on_loaded(self):
class VerifCodePage(LoggedPage, HTMLPage):
def on_load(self):
raise BrowserIncorrectPassword('Unable to login: website asks a code from a card')
class InfoPage(BasePage):
class TransfertPage(LoggedPage, HTMLPage):
pass
class EmptyPage(BasePage):
pass
class TransfertPage(BasePage):
pass
class UserSpacePage(BasePage):
pass
class AccountsPage(BasePage):
class AccountsPage(LoggedPage, HTMLPage):
TYPES = {'C/C': Account.TYPE_CHECKING,
'Livret': Account.TYPE_SAVINGS,
'Pret': Account.TYPE_LOAN,
@ -76,55 +70,68 @@ class AccountsPage(BasePage):
'Compte Epargne': Account.TYPE_SAVINGS,
}
def get_list(self):
accounts = OrderedDict()
@method
class iter_accounts(ListElement):
item_xpath = '//tr'
flush_at_end = True
for tr in self.document.getiterator('tr'):
first_td = tr.getchildren()[0]
if (first_td.attrib.get('class', '') == 'i g' or first_td.attrib.get('class', '') == 'p g') \
and first_td.find('a') is not None:
class item(ItemElement):
klass = Account
a = first_td.find('a')
link = a.get('href', '')
def __filter__(self, el):
if len(el.xpath('./td')) < 2:
return False
first_td = el.xpath('./td')[0]
return ((first_td.attrib.get('class', '') == 'i g' or first_td.attrib.get('class', '') == 'p g')
and first_td.find('a') is not None)
class Label(Filter):
def filter(self, text):
return text.lstrip(' 0123456789').title()
obj_id = Env('id')
obj_label = Label(CleanText('./td[1]/a'))
obj_balance = CleanDecimal('./td[2] | ./td[3]')
obj_coming = Env('coming')
obj_balance = Env('balance')
obj_currency = FrenchTransaction.Currency('./td[2] | ./td[3]')
obj__link_id = Link('./td[1]/a')
obj__card_links = []
def obj_type(self):
for pattern, actype in AccountsPage.TYPES.iteritems():
if self.obj.label.startswith(pattern):
return actype
def parse(self, el):
link = el.xpath('./td[1]/a')[0].get('href', '')
if link.startswith('POR_SyntheseLst'):
continue
raise SkipItem()
url = urlparse(link)
p = parse_qs(url.query)
if not 'rib' in p:
continue
for i in (2,1):
balance = FrenchTransaction.clean_amount(tr.getchildren()[i].text)
currency = Account.get_currency(tr.getchildren()[i].text)
if len(balance) > 0:
break
balance = Decimal(balance)
raise SkipItem()
balance = CleanDecimal('./td[2] | ./td[3]')(self)
id = p['rib'][0]
if id in accounts:
account = accounts[id]
# Handle cards
if id in self.parent.objects:
account = self.parent.objects[id]
if not account.coming:
account.coming = Decimal('0.0')
account.coming += balance
account._card_links.append(link)
continue
raise SkipItem()
account = Account()
account.id = id
account.label = unicode(a.text).strip().lstrip(' 0123456789').title()
self.env['id'] = id
for pattern, actype in self.TYPES.iteritems():
if account.label.startswith(pattern):
account.type = actype
account._link_id = link
account._card_links = []
# Find accounting amount
page = self.browser.get_document(self.browser.openurl(link))
coming = self.find_amount(page, u"Opérations à venir")
accounting = self.find_amount(page, u"Solde comptable")
# Handle real balances
page = self.page.browser.open(link)
coming = page.find_amount(u"Opérations à venir")
accounting = page.find_amount(u"Solde comptable")
if accounting is not None and accounting + (coming or Decimal('0')) != balance:
self.logger.warning('%s + %s != %s' % (accounting, coming, balance))
@ -132,22 +139,8 @@ class AccountsPage(BasePage):
if accounting is not None:
balance = accounting
if coming is not None:
account.coming = coming
account.balance = balance
account.currency = currency
accounts[account.id] = account
return accounts.itervalues()
def find_amount(self, page, title):
try:
td = page.xpath(u'//th[contains(text(), "%s")]/../td' % title)[0]
except IndexError:
return None
else:
return Decimal(FrenchTransaction.clean_amount(td.text))
self.env['balance'] = balance
self.env['coming'] = coming or NotAvailable
class Transaction(FrenchTransaction):
@ -165,144 +158,130 @@ class Transaction(FrenchTransaction):
_is_coming = False
class OperationsPage(BasePage):
def get_history(self):
index = 0
for tr in self.document.getiterator('tr'):
# columns can be:
# - date | value | operation | debit | credit | contre-valeur
# - date | value | operation | debit | credit
# - date | operation | debit | credit
# That's why we skip any extra columns, and take operation, debit
# and credit from last instead of first indexes.
tds = tr.getchildren()[:5]
if len(tds) < 4:
continue
class Pagination(object):
def next_page(self):
try:
form = self.page.get_form('//form[@id="paginationForm"]')
except FormNotFound:
return
if tds[0].attrib.get('class', '') == 'i g' or \
tds[0].attrib.get('class', '') == 'p g' or \
tds[0].attrib.get('class', '').endswith('_c1 c _c1'):
operation = Transaction(index)
index += 1
parts = [txt.strip() for txt in tds[-3].itertext() if len(txt.strip()) > 0]
# To simplify categorization of CB, reverse order of parts to separate
# location and institution.
if parts[0].startswith('PAIEMENT CB'):
parts.reverse()
date = tds[0].text
vdate = tds[1].text if len(tds) >= 5 else None
raw = u' '.join(parts)
operation.parse(date=date, vdate=vdate, raw=raw)
credit = self.parser.tocleanstring(tds[-1])
debit = self.parser.tocleanstring(tds[-2])
operation.set_amount(credit, debit)
yield operation
def go_next(self):
form = self.document.xpath('//form[@id="paginationForm"]')
if len(form) == 0:
return False
form = form[0]
text = self.parser.tocleanstring(form)
text = CleanText.clean(form.el)
m = re.search(u'(\d+) / (\d+)', text or '', flags=re.MULTILINE)
if not m:
return False
return
cur = int(m.group(1))
last = int(m.group(2))
if cur == last:
return False
return
inputs = {}
for elm in form.xpath('.//input[@type="input"]'):
key = elm.attrib['name']
value = elm.attrib['value']
inputs[key] = value
form['page'] = str(cur + 1)
return form.request
inputs['page'] = str(cur + 1)
self.browser.location(form.attrib['action'], urllib.urlencode(inputs))
class OperationsPage(LoggedPage, HTMLPage):
@method
class get_history(Pagination, TableElement):
head_xpath = '//table[@class="liste"]//thead//tr/th'
item_xpath = '//table[@class="liste"]//tbody/tr'
return True
columns = {'date': u'Date',
'vdate': u'Valeur',
'raw': u'Opération',
'debit': u'Débit',
'credit': u'Crédit',
}
class item(ItemElement):
klass = Transaction
__filter__ = lambda el: len(el.xpath('./td')) >= 4 and len(el.xpath('./td[@class="i g" or @class="p g" or contains(@class, "_c1 c _c1")]')) > 0
class OwnRaw(Filter):
def __call__(self, item):
parts = [txt.strip() for txt in item.el.xpath('./td[last()-2]')[0].itertext() if len(txt.strip()) > 0]
# To simplify categorization of CB, reverse order of parts to separate
# location and institution.
if parts[0].startswith('PAIEMENT CB'):
parts.reverse()
return u' '.join(parts)
obj_raw = Transaction.Raw(OwnRaw())
obj_date = Transaction.Date(TableCell('date'))
obj_vdate = Transaction.Date(TableCell('vdate', 'date'))
obj_amount = Transaction.Amount(TableCell('credit'), TableCell('debit'))
def find_amount(self, title):
try:
td = self.doc.xpath(u'//th[contains(text(), "%s")]/../td' % title)[0]
except IndexError:
return None
else:
return Decimal(FrenchTransaction.clean_amount(td.text))
def get_coming_link(self):
try:
a = self.parser.select(self.document, u'//a[contains(text(), "Opérations à venir")]', 1, 'xpath')
except BrokenPageError:
a = self.doc.xpath(u'//a[contains(text(), "Opérations à venir")]')[0]
except IndexError:
return None
else:
return a.attrib['href']
class ComingPage(OperationsPage):
def get_history(self):
index = 0
for tr in self.document.xpath('//table[@class="liste"]/tbody/tr'):
tds = tr.findall('td')
if len(tds) < 3:
continue
class ComingPage(OperationsPage, LoggedPage):
@method
class get_history(Pagination, ListElement):
item_xpath = '//table[@class="liste"]/tbody/tr'
tr = Transaction(index)
class item(ItemElement):
klass = Transaction
__filter__ = lambda el: len(el.xpath('./td')) >= 3
date = self.parser.tocleanstring(tds[0])
raw = self.parser.tocleanstring(tds[1])
amount = self.parser.tocleanstring(tds[-1])
tr.parse(date=date, raw=raw)
tr.set_amount(amount)
tr._is_coming = True
yield tr
obj_date = Transaction.Date('./td[1]')
obj_raw = Transaction.Raw('./td[2]')
obj_amount = Transaction.Amount('./td[last()]')
obj__is_coming = True
class CardPage(OperationsPage):
def get_history(self):
index = 0
class CardPage(OperationsPage, LoggedPage):
@method
class get_history(Pagination, ListElement):
class list_cards(ListElement):
item_xpath = '//table[@class="liste"]/tbody/tr/td/a'
# Check if this is a multi-cards page
pages = []
for a in self.document.xpath('//table[@class="liste"]/tbody/tr/td/a'):
card_link = a.get('href')
history_url = 'https://%s/%s/fr/banque/%s' % (self.browser.DOMAIN, self.browser.currentSubBank, card_link)
page = self.browser.get_document(self.browser.openurl(history_url))
pages.append(page)
class item(ItemElement):
def __iter__(self):
card_link = self.el.get('href')
history_url = '%s/%s/fr/banque/%s' % (self.browser.BASEURL, self.browser.currentSubBank, card_link)
page = self.browser.location(history_url)
if len(pages) == 0:
# If not, add this page as transactions list
pages.append(self.document)
for op in page.get_history():
yield op
for page in pages:
label = self.parser.tocleanstring(self.parser.select(page.getroot(), 'div.lister p.c', 1))
label = re.findall('(\d+ [^ ]+ \d+)', label)[-1]
# use the trick of relativedelta to get the last day of month.
debit_date = parse_french_date(label) + relativedelta(day=31)
class list_history(ListElement):
item_xpath = '//table[@class="liste"]/tbody/tr'
for tr in page.xpath('//table[@class="liste"]/tbody/tr'):
tds = tr.findall('td')[:4]
if len(tds) < 4:
continue
def parse(self, el):
label = CleanText('//div[contains(@class, "lister")]//p[@class="c"]')(el)
label = re.findall('(\d+ [^ ]+ \d+)', label)[-1]
# use the trick of relativedelta to get the last day of month.
self.env['debit_date'] = parse_french_date(label) + relativedelta(day=31)
tr = Transaction(index)
class item(ItemElement):
klass = Transaction
__filter__ = lambda el: len(el.xpath('./td')) >= 4
parts = [txt.strip() for txt in list(tds[-3].itertext()) + list(tds[-2].itertext()) if len(txt.strip()) > 0]
obj_raw = Transaction.Raw('./td[last()-2] | ./td[last()-1]')
obj_type = Transaction.TYPE_CARD
obj_date = Env('debit_date')
obj_rdate = Transaction.Date('./td[1]')
obj_vdate = Transaction.Date('./td[1]')
obj_amount = Transaction.Amount('./td[last()]')
tr.parse(date=tds[0].text.strip(' \xa0'),
raw=u' '.join(parts))
tr.date = debit_date
tr.type = tr.TYPE_CARD
# Don't take all of the content (with tocleanstring for example),
# because there is a span.aide.
tr.set_amount(tds[-1].text)
yield tr
class NoOperationsPage(OperationsPage):
class NoOperationsPage(OperationsPage, LoggedPage):
def get_history(self):
return iter([])