diff --git a/modules/lcl/browser.py b/modules/lcl/browser.py index 9d9139d5..84f0fdfd 100644 --- a/modules/lcl/browser.py +++ b/modules/lcl/browser.py @@ -18,12 +18,13 @@ # along with weboob. If not, see . +import urllib from urlparse import urlsplit, parse_qsl -from mechanize import Cookie -from weboob.deprecated.browser import Browser, BrowserIncorrectPassword +from weboob.exceptions import BrowserIncorrectPassword +from weboob.browser import LoginBrowser, URL, need_login -from .pages import SkipPage, LoginPage, AccountsPage, AccountHistoryPage, \ +from .pages import LoginPage, AccountsPage, AccountHistoryPage, \ CBListPage, CBHistoryPage, ContractsPage @@ -31,67 +32,49 @@ __all__ = ['LCLBrowser','LCLProBrowser'] # Browser -class LCLBrowser(Browser): - PROTOCOL = 'https' - DOMAIN = 'particuliers.secure.lcl.fr' - CERTHASH = ['825a1cda9f3c7176af327013a20145ad587d1f7e2a7e226a1cb5c522e6e00b84'] - ENCODING = 'utf-8' - USER_AGENT = Browser.USER_AGENTS['wget'] - PAGES = { - 'https://particuliers.secure.lcl.fr/outil/UAUT/Authentication/authenticate': LoginPage, - 'https://particuliers.secure.lcl.fr/outil/UAUT\?from=.*': LoginPage, - 'https://particuliers.secure.lcl.fr/outil/UAUT/Accueil/preRoutageLogin': LoginPage, - 'https://particuliers.secure.lcl.fr//outil/UAUT/Contract/routing': LoginPage, - 'https://particuliers.secure.lcl.fr/outil/UWER/Accueil/majicER': LoginPage, - 'https://particuliers.secure.lcl.fr/outil/UWER/Enregistrement/forwardAcc': LoginPage, - 'https://particuliers.secure.lcl.fr/outil/UAUT/Contrat/choixContrat.*': ContractsPage, - 'https://particuliers.secure.lcl.fr/outil/UAUT/Contract/getContract.*': ContractsPage, - 'https://particuliers.secure.lcl.fr/outil/UAUT/Contract/selectContracts.*': ContractsPage, - 'https://particuliers.secure.lcl.fr/outil/UWSP/Synthese': AccountsPage, - 'https://particuliers.secure.lcl.fr/outil/UWLM/ListeMouvements.*/accesListeMouvements.*': AccountHistoryPage, - 'https://particuliers.secure.lcl.fr/outil/UWCB/UWCBEncours.*/listeCBCompte.*': CBListPage, - 'https://particuliers.secure.lcl.fr/outil/UWCB/UWCBEncours.*/listeOperations.*': CBHistoryPage, - 'https://particuliers.secure.lcl.fr/outil/UAUT/Contrat/selectionnerContrat.*': SkipPage, - 'https://particuliers.secure.lcl.fr/index.html': SkipPage - } +class LCLBrowser(LoginBrowser): + BASEURL = 'https://particuliers.secure.lcl.fr' - def is_logged(self): - return not self.is_on_page(LoginPage) + login = URL('/outil/UAUT/Authentication/authenticate', + '/outil/UAUT\?from=.*', + '/outil/UAUT/Accueil/preRoutageLogin', + '.*outil/UAUT/Contract/routing', + '/outil/UWER/Accueil/majicER', + '/outil/UWER/Enregistrement/forwardAcc', + LoginPage) + contracts = URL('/outil/UAUT/Contrat/choixContrat.*', + '/outil/UAUT/Contract/getContract.*', + '/outil/UAUT/Contract/selectContracts.*', + ContractsPage) + accounts = URL('/outil/UWSP/Synthese', AccountsPage) + history = URL('/outil/UWLM/ListeMouvements.*/accesListeMouvements.*', AccountHistoryPage) + cb_list = URL('/outil/UWCB/UWCBEncours.*/listeCBCompte.*', CBListPage) + cb_history = URL('/outil/UWCB/UWCBEncours.*/listeOperations.*', CBHistoryPage) + skip = URL('/outil/UAUT/Contrat/selectionnerContrat.*', + '/index.html') - def login(self): + def deinit(self): + pass + + def do_login(self): assert isinstance(self.username, basestring) assert isinstance(self.password, basestring) assert self.password.isdigit() - if not self.is_on_page(LoginPage): - self.location('%s://%s/outil/UAUT/Authentication/authenticate' - % (self.PROTOCOL, self.DOMAIN), - no_login=True) + self.login.stay_or_go() if not self.page.login(self.username, self.password) or \ - (self.is_on_page(LoginPage) and self.page.is_error()) : + (self.login.is_here() and self.page.is_error()) : raise BrowserIncorrectPassword("invalid login/password.\nIf you did not change anything, be sure to check for password renewal request\non the original web site.\nAutomatic renewal will be implemented later.") - self.location('%s://%s/outil/UWSP/Synthese' - % (self.PROTOCOL, self.DOMAIN), - no_login=True) + self.accounts.stay_or_go() + + @need_login def get_accounts_list(self): - if not self.is_on_page(AccountsPage): - self.location('%s://%s/outil/UWSP/Synthese' - % (self.PROTOCOL, self.DOMAIN)) - + self.accounts.stay_or_go() return self.page.get_list() - def get_account(self, id): - assert isinstance(id, basestring) - - l = self.get_accounts_list() - for a in l: - if a.id == id: - return a - - return None - + @need_login def get_history(self, account): self.location(account._link_id) for tr in self.page.get_operations(): @@ -100,6 +83,7 @@ class LCLBrowser(Browser): for tr in self.get_cb_operations(account, 1): yield tr + @need_login def get_cb_operations(self, account, month=0): """ Get CB operations. @@ -112,7 +96,7 @@ class LCLBrowser(Browser): args = dict(parse_qsl(v.query)) args['MOIS'] = month - self.location(self.buildurl(v.path, **args)) + self.location('%s?%s' % (v.path, urllib.urlencode(args))) for tr in self.page.get_operations(): yield tr @@ -124,45 +108,11 @@ class LCLBrowser(Browser): class LCLProBrowser(LCLBrowser): - PROTOCOL = 'https' - DOMAIN = 'professionnels.secure.lcl.fr' - CERTHASH = ['6ae7053ef30f7c7810673115b021a42713f518f3a87b2e73ef565c16ead79f81'] - ENCODING = 'utf-8' - USER_AGENT = Browser.USER_AGENTS['wget'] - PAGES = { - 'https://professionnels.secure.lcl.fr/outil/UAUT?from=/outil/UWHO/Accueil/': LoginPage, - 'https://professionnels.secure.lcl.fr/outil/UAUT\?from=.*': LoginPage, - 'https://professionnels.secure.lcl.fr/outil/UAUT/Accueil/preRoutageLogin': LoginPage, - 'https://professionnels.secure.lcl.fr//outil/UAUT/Contract/routing': LoginPage, - 'https://professionnels.secure.lcl.fr/outil/UWER/Accueil/majicER': LoginPage, - 'https://professionnels.secure.lcl.fr/outil/UWER/Enregistrement/forwardAcc': LoginPage, - 'https://professionnels.secure.lcl.fr/outil/UAUT/Contrat/choixContrat.*': ContractsPage, - 'https://professionnels.secure.lcl.fr/outil/UAUT/Contract/getContract.*': ContractsPage, - 'https://professionnels.secure.lcl.fr/outil/UAUT/Contract/selectContracts.*': ContractsPage, - 'https://professionnels.secure.lcl.fr/outil/UWSP/Synthese': AccountsPage, - 'https://professionnels.secure.lcl.fr/outil/UWLM/ListeMouvements.*/accesListeMouvements.*': AccountHistoryPage, - 'https://professionnels.secure.lcl.fr/outil/UWCB/UWCBEncours.*/listeCBCompte.*': CBListPage, - 'https://professionnels.secure.lcl.fr/outil/UWCB/UWCBEncours.*/listeOperations.*': CBHistoryPage, - 'https://professionnels.secure.lcl.fr/outil/UAUT/Contrat/selectionnerContrat.*': SkipPage, - 'https://professionnels.secure.lcl.fr/index.html': SkipPage - } + BASEURL = 'https://professionnels.secure.lcl.fr' + #We need to add this on the login form IDENTIFIANT_ROUTING = 'CLA' - def add_cookie(self, name, value): - c = Cookie(0, name, value, - None, False, - '.' + self.DOMAIN, True, True, - '/', False, - False, - None, - False, - None, - None, - {}) - cookiejar = self._ua_handlers["_cookies"].cookiejar - cookiejar.set_cookie(c) - def __init__(self, *args, **kwargs): - Browser.__init__(self, *args, **kwargs) - self.add_cookie("lclgen","professionnels") + super(LCLProBrowser, self).__init__(*args, **kwargs) + self.session.cookies.set("lclgen","professionnels") diff --git a/modules/lcl/enterprise/browser.py b/modules/lcl/enterprise/browser.py index a0e2e563..4ba6d5d0 100644 --- a/modules/lcl/enterprise/browser.py +++ b/modules/lcl/enterprise/browser.py @@ -118,6 +118,9 @@ class LCLEnterpriseBrowser(Browser): for tr in self.page.get_operations(): yield tr + def get_cb_operations(self, account): + raise NotImplementedError() + class LCLEspaceProBrowser(LCLEnterpriseBrowser): BASEURL = 'https://espacepro.secure.lcl.fr' diff --git a/modules/lcl/module.py b/modules/lcl/module.py index 2bf38ae4..3a7bea32 100644 --- a/modules/lcl/module.py +++ b/modules/lcl/module.py @@ -21,6 +21,7 @@ from weboob.capabilities.bank import CapBank, AccountNotFound from weboob.tools.backend import Module, BackendConfig from weboob.tools.value import ValueBackendPassword, Value +from weboob.capabilities.base import find_object from .browser import LCLBrowser, LCLProBrowser from .enterprise.browser import LCLEnterpriseBrowser, LCLEspaceProBrowser @@ -64,36 +65,20 @@ class LCLModule(Module, CapBank): if not self._browser: return - try: - deinit = self.browser.deinit - except AttributeError: - pass - else: - deinit() + self.browser.deinit() def iter_accounts(self): - for account in self.browser.get_accounts_list(): - yield account + return self.browser.get_accounts_list() def get_account(self, _id): - with self.browser: - account = self.browser.get_account(_id) - if account: - return account - else: - raise AccountNotFound() + return find_object(self.browser.get_accounts_list(), id=_id, error=AccountNotFound) def iter_coming(self, account): - if self.BROWSER != LCLBrowser: - raise NotImplementedError() - - with self.browser: - transactions = list(self.browser.get_cb_operations(account)) - transactions.sort(key=lambda tr: tr.rdate, reverse=True) - return transactions + transactions = list(self.browser.get_cb_operations(account)) + transactions.sort(key=lambda tr: tr.rdate, reverse=True) + return transactions def iter_history(self, account): - with self.browser: - transactions = list(self.browser.get_history(account)) - transactions.sort(key=lambda tr: tr.rdate, reverse=True) - return transactions + transactions = list(self.browser.get_history(account)) + transactions.sort(key=lambda tr: tr.rdate, reverse=True) + return transactions diff --git a/modules/lcl/pages.py b/modules/lcl/pages.py index c4dae7c3..d26ea63b 100644 --- a/modules/lcl/pages.py +++ b/modules/lcl/pages.py @@ -20,15 +20,20 @@ import re import base64 from decimal import Decimal -from logging import error import math import random +from cStringIO import StringIO from weboob.capabilities.bank import Account -from weboob.deprecated.browser import Page, BrowserUnavailable -from weboob.tools.captcha.virtkeyboard import MappedVirtKeyboard, VirtKeyboardError +from weboob.browser.elements import method, ListElement, ItemElement, SkipItem +from weboob.exceptions import ParseError +from weboob.browser.pages import LoggedPage, HTMLPage, FormNotFound +from weboob.browser.filters.standard import CleanText, Field, Regexp, Format, \ + CleanDecimal, Map +from weboob.exceptions import BrowserUnavailable from weboob.tools.capabilities.bank.transactions import FrenchTransaction +from weboob.tools.captcha.virtkeyboard import MappedVirtKeyboard, VirtKeyboardError class LCLVirtKeyboard(MappedVirtKeyboard): @@ -48,169 +53,140 @@ class LCLVirtKeyboard(MappedVirtKeyboard): color=(255,255,255,255) - def __init__(self,basepage): - img=basepage.document.find("//img[@id='idImageClavier']") + def __init__(self, basepage): + img=basepage.doc.find("//img[@id='idImageClavier']") random.seed() - self.url+="%s"%str(long(math.floor(long(random.random()*1000000000000000000000)))) - MappedVirtKeyboard.__init__(self,basepage.browser.openurl(self.url), - basepage.document,img,self.color,"id") + self.url += "%s"%str(long(math.floor(long(random.random()*1000000000000000000000)))) + super(LCLVirtKeyboard, self).__init__(StringIO(basepage.browser.open(self.url).content), basepage.doc,img,self.color, "id") self.check_symbols(self.symbols,basepage.browser.responses_dirname) - def get_symbol_code(self,md5sum): - code=MappedVirtKeyboard.get_symbol_code(self,md5sum) + def get_symbol_code(self, md5sum): + code=MappedVirtKeyboard.get_symbol_code(self, md5sum) return code[-2:] - def get_string_code(self,string): + def get_string_code(self, string): code='' for c in string: - code+=self.get_symbol_code(self.symbols[c]) + code += self.get_symbol_code(self.symbols[c]) return code -class SkipPage(Page): - pass - - -class LoginPage(Page): - def on_loaded(self): +class LoginPage(HTMLPage): + def on_load(self): try: - self.browser.select_form(name='form') - except: - try: - self.browser.select_form(predicate=lambda x: x.attrs.get('id','')=='setInfosCGS') - except: - return + form = self.get_form(xpath='//form[@id="setInfosCGS" or @name="form"]') + except FormNotFound: + return - self.browser.submit(nologin=True) + form.submit() def myXOR(self,value,seed): - s='' + s = '' for i in xrange(len(value)): - s+=chr(seed^ord(value[i])) + s += chr(seed^ord(value[i])) return s def login(self, login, passwd): try: - vk=LCLVirtKeyboard(self) + vk = LCLVirtKeyboard(self) except VirtKeyboardError as err: - error("Error: %s"%err) + self.logger.exception(err) return False - password=vk.get_string_code(passwd) + password = vk.get_string_code(passwd) - seed=-1 - str="var aleatoire = " - for script in self.document.findall("//script"): - if(script.text is None or len(script.text)==0): + seed = -1 + s = "var aleatoire = " + for script in self.doc.findall("//script"): + if script.text is None or len(script.text) == 0: continue - offset=script.text.find(str) - if offset!=-1: - seed=int(script.text[offset+len(str)+1:offset+len(str)+2]) + offset = script.text.find(s) + if offset != -1: + seed = int(script.text[offset+len(s)+1:offset+len(s)+2]) break if seed==-1: - error("Variable 'aleatoire' not found") - return False + raise ParseError("Variable 'aleatoire' not found") - self.browser.select_form( - predicate=lambda x: x.attrs.get('id','')=='formAuthenticate') - self.browser.form.set_all_readonly(False) - self.browser['identifiant'] = login.encode('utf-8') - self.browser['postClavierXor'] = base64.b64encode(self.myXOR(password,seed)) + form = self.get_form('//form[@id="formAuthenticate"]') + form['identifiant'] = login + form['postClavierXor'] = base64.b64encode(self.myXOR(password,seed)) try: - self.browser['identifiantRouting'] = self.browser.IDENTIFIANT_ROUTING + form['identifiantRouting'] = self.browser.IDENTIFIANT_ROUTING except AttributeError: pass try: - self.browser.submit(nologin=True) + form.submit() except BrowserUnavailable: # Login is not valid return False return True def is_error(self): - errors = self.document.xpath(u'//div[@class="erreur" or @class="messError"]') + errors = self.doc.xpath(u'//div[@class="erreur" or @class="messError"]') return len(errors) > 0 -class ContractsPage(Page): - def on_loaded(self): +class ContractsPage(LoggedPage, HTMLPage): + def on_load(self): self.select_contract() def select_contract(self): # XXX We select automatically the default contract in list. We should let user # ask what contract he wants to see, or display accounts for all contracts. - self.browser.select_form(nr=0) - self.browser.submit(nologin=True) + form = self.get_form(nr=0) + form.submit() -class AccountsPage(Page): - def on_loaded(self): - warn = self.document.xpath('//div[@id="attTxt"]') +class AccountsPage(LoggedPage, HTMLPage): + def on_load(self): + warn = self.doc.xpath('//div[@id="attTxt"]') if len(warn) > 0: raise BrowserUnavailable(warn[0].text) - def get_list(self): - l = [] - ids = set() - for a in self.document.getiterator('a'): - link=a.attrib.get('href') - if link is None: - continue - if link.startswith("/outil/UWLM/ListeMouvements"): - account = Account() - #by default the website propose the last 7 days or last 45 days but we can force to have the last 55days - account._link_id=link+"&mode=55" - account._coming_links = [] - parameters=link.split("?").pop().split("&") - for parameter in parameters: - list=parameter.split("=") - value=list.pop() - name=list.pop() - if name=="agence": - account.id=value - elif name=="compte": - account.id+=value - elif name=="nature": - # TODO parse this string to get the right Account.TYPE_* to - # store in account.type. - account._type=value + @method + class get_list(ListElement): + item_xpath = '//tr[contains(@onclick, "redirect")]' + flush_at_end = True - if account.id in ids: - continue + class account(ItemElement): + klass = Account - ids.add(account.id) - div = a.getparent().getprevious() - if not div.text.strip(): - div = div.find('div') - account.label=u''+div.text.strip() - balance = FrenchTransaction.clean_amount(a.text) - if '-' in balance: - balance='-'+balance.replace('-', '') - account.balance=Decimal(balance) - account.currency = account.get_currency(a.text) - self.logger.debug('%s Type: %s' % (account.label, account._type)) - l.append(account) - if link.startswith('/outil/UWCB/UWCBEncours'): - if len(l) == 0: - self.logger.warning('There is a card account but not any check account') - continue + def condition(self): + return '/outil/UWLM/ListeMouvement' in self.el.attrib['onclick'] - account = l[-1] + NATURE2TYPE = {'006': Account.TYPE_CHECKING, + '049': Account.TYPE_SAVINGS, + '068': Account.TYPE_MARKET, + '069': Account.TYPE_SAVINGS, + } - coming = FrenchTransaction.clean_amount(a.text) - if '-' in coming: - coming = '-'+coming.replace('-', '') + obj__link_id = Format('%s&mode=55', Regexp(CleanText('./@onclick'), "'(.*)'")) + obj_id = Regexp(Field('_link_id'), r'.*agence=(\w+).*compte=(\w+)', r'\1\2') + obj__coming_links = [] + obj_label = CleanText('.//div[@class="libelleCompte"]') + obj_balance = CleanDecimal('.//td[has-class("right")]', replace_dots=True) + obj_currency = FrenchTransaction.Currency('.//td[has-class("right")]') + obj_type = Map(Regexp(Field('_link_id'), r'.*nature=(\w+)'), NATURE2TYPE, default=Account.TYPE_UNKNOWN) + + class card(ItemElement): + def condition(self): + return '/outil/UWCB/UWCBEncours' in self.el.attrib['onclick'] + + def parse(self, el): + link = Regexp(CleanText('./@onclick'), "'(.*)'")(el) + id = Regexp(CleanText('./@onclick'), r'.*AGENCE=(\w+).*COMPTE=(\w+).*CLE=(\w+)', r'\1\2\3')(el) + + account = self.parent.objects[id] if not account.coming: account.coming = Decimal('0') - account.coming += Decimal(coming) + + account.coming += CleanDecimal('.//td[has-class("right")]', replace_dots=True)(el) account._coming_links.append(link) - - return l - + raise SkipItem() class Transaction(FrenchTransaction): - PATTERNS = [(re.compile('^(?PCB) (?PRETRAIT) DU (?P
\d+)/(?P\d+)'), + PATTERNS = [(re.compile('^(?PCB) (?PRETRAIT) DU (?P
\d+)/(?P\d+)'), FrenchTransaction.TYPE_WITHDRAWAL), (re.compile('^(?P(PRLV|PE)) (?P.*)'), FrenchTransaction.TYPE_ORDER), @@ -235,103 +211,26 @@ class Transaction(FrenchTransaction): ] -class AccountHistoryPage(Page): - def get_table(self): - tables=self.document.findall("//table[@class='tagTab pyjama']") - for table in tables: - # Look for the relevant table in the Pro version - header=table.getprevious() - while header is not None and str(header.tag) != 'div': - header=header.getprevious() - if header is not None: - header=header.find("div") - if header is not None: - header=header.find("span") +class AccountHistoryPage(LoggedPage, HTMLPage): + @method + class _get_operations(Transaction.TransactionsElement): + item_xpath = '//table[has-class("tagTab") and (not(@style) or @style="")]/tr' + head_xpath = '//table[has-class("tagTab") and (not(@style) or @style="")]/tr/th' - if header is not None and \ - header.text.strip().startswith("Opérations effectuées".decode('utf-8')): - return table + col_raw = [u'Vos opérations', u'Libellé'] - # Look for the relevant table in the Particulier version - header=table.find("thead").find("tr").find("th[@class='titleTab titleTableft']") - if header is not None and\ - header.text.strip().startswith("Solde au"): - return table + class item(Transaction.TransactionElement): + def condition(self): + return self.parent.get_colnum('date') is not None and len(self.el.findall('td')) >= 3 - def strip_label(self, s): - return s + def validate(self, obj): + return obj.category != 'RELEVE CB' def get_operations(self): - table = self.get_table() - operations = [] - - if table is None: - return operations - - for tr in table.iter('tr'): - # skip headers and empty rows - if len(tr.findall("th"))!=0 or\ - len(tr.findall("td"))<=1: - continue - mntColumn = 0 - - date = None - raw = None - credit = '' - debit = '' - for td in tr.iter('td'): - value = td.attrib.get('id') - if value is None: - # if tag has no id nor class, assume it's a label - value = td.attrib.get('class', 'opLib') - - if value.startswith("date") or value.endswith('center'): - # some transaction are included in a tag - date = u''.join([txt.strip() for txt in td.itertext()]) - elif value.startswith("lib") or value.startswith("opLib"): - # misclosed A tag requires to grab text from td - tooltip = td.xpath('./div[@class="autoTooltip"]') - if len(tooltip) > 0: - td.remove(tooltip[0]) - raw = self.parser.tocleanstring(td) - elif value.startswith("solde") or value.startswith("mnt") or \ - value.startswith('debit') or value.startswith('credit'): - mntColumn += 1 - amount = u''.join([txt.strip() for txt in td.itertext()]) - if amount != "": - if value.startswith("soldeDeb") or value.startswith('debit') or mntColumn==1: - debit = amount - else: - credit = amount - - if date is None: - # skip non-transaction - continue - - operation = Transaction(len(operations)) - operation.parse(date, raw) - operation.set_amount(credit, debit) - - if operation.category == 'RELEVE CB': - # strip that transaction which is detailled in CBListPage. - continue - - operations.append(operation) - return operations + return self._get_operations() class CBHistoryPage(AccountHistoryPage): - def get_table(self): - # there is only one table on the page - try: - return self.document.findall("//table[@class='tagTab pyjama']")[0] - except IndexError: - return None - - def strip_label(self, label): - # prevent to be considered as a category if there are two spaces. - return re.sub(r'[ ]+', ' ', label).strip() - def get_operations(self): for tr in AccountHistoryPage.get_operations(self): tr.type = tr.TYPE_CARD @@ -341,8 +240,8 @@ class CBHistoryPage(AccountHistoryPage): class CBListPage(CBHistoryPage): def get_cards(self): cards = [] - for a in self.document.getiterator('a'): - link = a.attrib.get('href', '') - if link.startswith('/outil/UWCB/UWCBEncours') and 'listeOperations' in link: + for tr in self.doc.getiterator('tr'): + link = Regexp(CleanText('./@onclick'), "'(.*)'", default=None)(tr) + if link is not None and link.startswith('/outil/UWCB/UWCBEncours') and 'listeOperations' in link: cards.append(link) return cards