lcl: website change, rewrite with browser2

This commit is contained in:
Romain Bignon 2014-12-03 23:22:29 +01:00
commit 446bb3416c
4 changed files with 157 additions and 320 deletions

View file

@ -18,12 +18,13 @@
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
import urllib
from urlparse import urlsplit, parse_qsl
from mechanize import Cookie
from weboob.deprecated.browser import Browser, BrowserIncorrectPassword
from weboob.exceptions import BrowserIncorrectPassword
from weboob.browser import LoginBrowser, URL, need_login
from .pages import SkipPage, LoginPage, AccountsPage, AccountHistoryPage, \
from .pages import LoginPage, AccountsPage, AccountHistoryPage, \
CBListPage, CBHistoryPage, ContractsPage
@ -31,67 +32,49 @@ __all__ = ['LCLBrowser','LCLProBrowser']
# Browser
class LCLBrowser(Browser):
PROTOCOL = 'https'
DOMAIN = 'particuliers.secure.lcl.fr'
CERTHASH = ['825a1cda9f3c7176af327013a20145ad587d1f7e2a7e226a1cb5c522e6e00b84']
ENCODING = 'utf-8'
USER_AGENT = Browser.USER_AGENTS['wget']
PAGES = {
'https://particuliers.secure.lcl.fr/outil/UAUT/Authentication/authenticate': LoginPage,
'https://particuliers.secure.lcl.fr/outil/UAUT\?from=.*': LoginPage,
'https://particuliers.secure.lcl.fr/outil/UAUT/Accueil/preRoutageLogin': LoginPage,
'https://particuliers.secure.lcl.fr//outil/UAUT/Contract/routing': LoginPage,
'https://particuliers.secure.lcl.fr/outil/UWER/Accueil/majicER': LoginPage,
'https://particuliers.secure.lcl.fr/outil/UWER/Enregistrement/forwardAcc': LoginPage,
'https://particuliers.secure.lcl.fr/outil/UAUT/Contrat/choixContrat.*': ContractsPage,
'https://particuliers.secure.lcl.fr/outil/UAUT/Contract/getContract.*': ContractsPage,
'https://particuliers.secure.lcl.fr/outil/UAUT/Contract/selectContracts.*': ContractsPage,
'https://particuliers.secure.lcl.fr/outil/UWSP/Synthese': AccountsPage,
'https://particuliers.secure.lcl.fr/outil/UWLM/ListeMouvements.*/accesListeMouvements.*': AccountHistoryPage,
'https://particuliers.secure.lcl.fr/outil/UWCB/UWCBEncours.*/listeCBCompte.*': CBListPage,
'https://particuliers.secure.lcl.fr/outil/UWCB/UWCBEncours.*/listeOperations.*': CBHistoryPage,
'https://particuliers.secure.lcl.fr/outil/UAUT/Contrat/selectionnerContrat.*': SkipPage,
'https://particuliers.secure.lcl.fr/index.html': SkipPage
}
class LCLBrowser(LoginBrowser):
BASEURL = 'https://particuliers.secure.lcl.fr'
def is_logged(self):
return not self.is_on_page(LoginPage)
login = URL('/outil/UAUT/Authentication/authenticate',
'/outil/UAUT\?from=.*',
'/outil/UAUT/Accueil/preRoutageLogin',
'.*outil/UAUT/Contract/routing',
'/outil/UWER/Accueil/majicER',
'/outil/UWER/Enregistrement/forwardAcc',
LoginPage)
contracts = URL('/outil/UAUT/Contrat/choixContrat.*',
'/outil/UAUT/Contract/getContract.*',
'/outil/UAUT/Contract/selectContracts.*',
ContractsPage)
accounts = URL('/outil/UWSP/Synthese', AccountsPage)
history = URL('/outil/UWLM/ListeMouvements.*/accesListeMouvements.*', AccountHistoryPage)
cb_list = URL('/outil/UWCB/UWCBEncours.*/listeCBCompte.*', CBListPage)
cb_history = URL('/outil/UWCB/UWCBEncours.*/listeOperations.*', CBHistoryPage)
skip = URL('/outil/UAUT/Contrat/selectionnerContrat.*',
'/index.html')
def login(self):
def deinit(self):
pass
def do_login(self):
assert isinstance(self.username, basestring)
assert isinstance(self.password, basestring)
assert self.password.isdigit()
if not self.is_on_page(LoginPage):
self.location('%s://%s/outil/UAUT/Authentication/authenticate'
% (self.PROTOCOL, self.DOMAIN),
no_login=True)
self.login.stay_or_go()
if not self.page.login(self.username, self.password) or \
(self.is_on_page(LoginPage) and self.page.is_error()) :
(self.login.is_here() and self.page.is_error()) :
raise BrowserIncorrectPassword("invalid login/password.\nIf you did not change anything, be sure to check for password renewal request\non the original web site.\nAutomatic renewal will be implemented later.")
self.location('%s://%s/outil/UWSP/Synthese'
% (self.PROTOCOL, self.DOMAIN),
no_login=True)
self.accounts.stay_or_go()
@need_login
def get_accounts_list(self):
if not self.is_on_page(AccountsPage):
self.location('%s://%s/outil/UWSP/Synthese'
% (self.PROTOCOL, self.DOMAIN))
self.accounts.stay_or_go()
return self.page.get_list()
def get_account(self, id):
assert isinstance(id, basestring)
l = self.get_accounts_list()
for a in l:
if a.id == id:
return a
return None
@need_login
def get_history(self, account):
self.location(account._link_id)
for tr in self.page.get_operations():
@ -100,6 +83,7 @@ class LCLBrowser(Browser):
for tr in self.get_cb_operations(account, 1):
yield tr
@need_login
def get_cb_operations(self, account, month=0):
"""
Get CB operations.
@ -112,7 +96,7 @@ class LCLBrowser(Browser):
args = dict(parse_qsl(v.query))
args['MOIS'] = month
self.location(self.buildurl(v.path, **args))
self.location('%s?%s' % (v.path, urllib.urlencode(args)))
for tr in self.page.get_operations():
yield tr
@ -124,45 +108,11 @@ class LCLBrowser(Browser):
class LCLProBrowser(LCLBrowser):
PROTOCOL = 'https'
DOMAIN = 'professionnels.secure.lcl.fr'
CERTHASH = ['6ae7053ef30f7c7810673115b021a42713f518f3a87b2e73ef565c16ead79f81']
ENCODING = 'utf-8'
USER_AGENT = Browser.USER_AGENTS['wget']
PAGES = {
'https://professionnels.secure.lcl.fr/outil/UAUT?from=/outil/UWHO/Accueil/': LoginPage,
'https://professionnels.secure.lcl.fr/outil/UAUT\?from=.*': LoginPage,
'https://professionnels.secure.lcl.fr/outil/UAUT/Accueil/preRoutageLogin': LoginPage,
'https://professionnels.secure.lcl.fr//outil/UAUT/Contract/routing': LoginPage,
'https://professionnels.secure.lcl.fr/outil/UWER/Accueil/majicER': LoginPage,
'https://professionnels.secure.lcl.fr/outil/UWER/Enregistrement/forwardAcc': LoginPage,
'https://professionnels.secure.lcl.fr/outil/UAUT/Contrat/choixContrat.*': ContractsPage,
'https://professionnels.secure.lcl.fr/outil/UAUT/Contract/getContract.*': ContractsPage,
'https://professionnels.secure.lcl.fr/outil/UAUT/Contract/selectContracts.*': ContractsPage,
'https://professionnels.secure.lcl.fr/outil/UWSP/Synthese': AccountsPage,
'https://professionnels.secure.lcl.fr/outil/UWLM/ListeMouvements.*/accesListeMouvements.*': AccountHistoryPage,
'https://professionnels.secure.lcl.fr/outil/UWCB/UWCBEncours.*/listeCBCompte.*': CBListPage,
'https://professionnels.secure.lcl.fr/outil/UWCB/UWCBEncours.*/listeOperations.*': CBHistoryPage,
'https://professionnels.secure.lcl.fr/outil/UAUT/Contrat/selectionnerContrat.*': SkipPage,
'https://professionnels.secure.lcl.fr/index.html': SkipPage
}
BASEURL = 'https://professionnels.secure.lcl.fr'
#We need to add this on the login form
IDENTIFIANT_ROUTING = 'CLA'
def add_cookie(self, name, value):
c = Cookie(0, name, value,
None, False,
'.' + self.DOMAIN, True, True,
'/', False,
False,
None,
False,
None,
None,
{})
cookiejar = self._ua_handlers["_cookies"].cookiejar
cookiejar.set_cookie(c)
def __init__(self, *args, **kwargs):
Browser.__init__(self, *args, **kwargs)
self.add_cookie("lclgen","professionnels")
super(LCLProBrowser, self).__init__(*args, **kwargs)
self.session.cookies.set("lclgen","professionnels")

View file

@ -118,6 +118,9 @@ class LCLEnterpriseBrowser(Browser):
for tr in self.page.get_operations():
yield tr
def get_cb_operations(self, account):
raise NotImplementedError()
class LCLEspaceProBrowser(LCLEnterpriseBrowser):
BASEURL = 'https://espacepro.secure.lcl.fr'

View file

@ -21,6 +21,7 @@
from weboob.capabilities.bank import CapBank, AccountNotFound
from weboob.tools.backend import Module, BackendConfig
from weboob.tools.value import ValueBackendPassword, Value
from weboob.capabilities.base import find_object
from .browser import LCLBrowser, LCLProBrowser
from .enterprise.browser import LCLEnterpriseBrowser, LCLEspaceProBrowser
@ -64,36 +65,20 @@ class LCLModule(Module, CapBank):
if not self._browser:
return
try:
deinit = self.browser.deinit
except AttributeError:
pass
else:
deinit()
self.browser.deinit()
def iter_accounts(self):
for account in self.browser.get_accounts_list():
yield account
return self.browser.get_accounts_list()
def get_account(self, _id):
with self.browser:
account = self.browser.get_account(_id)
if account:
return account
else:
raise AccountNotFound()
return find_object(self.browser.get_accounts_list(), id=_id, error=AccountNotFound)
def iter_coming(self, account):
if self.BROWSER != LCLBrowser:
raise NotImplementedError()
with self.browser:
transactions = list(self.browser.get_cb_operations(account))
transactions.sort(key=lambda tr: tr.rdate, reverse=True)
return transactions
transactions = list(self.browser.get_cb_operations(account))
transactions.sort(key=lambda tr: tr.rdate, reverse=True)
return transactions
def iter_history(self, account):
with self.browser:
transactions = list(self.browser.get_history(account))
transactions.sort(key=lambda tr: tr.rdate, reverse=True)
return transactions
transactions = list(self.browser.get_history(account))
transactions.sort(key=lambda tr: tr.rdate, reverse=True)
return transactions

View file

@ -20,15 +20,20 @@
import re
import base64
from decimal import Decimal
from logging import error
import math
import random
from cStringIO import StringIO
from weboob.capabilities.bank import Account
from weboob.deprecated.browser import Page, BrowserUnavailable
from weboob.tools.captcha.virtkeyboard import MappedVirtKeyboard, VirtKeyboardError
from weboob.browser.elements import method, ListElement, ItemElement, SkipItem
from weboob.exceptions import ParseError
from weboob.browser.pages import LoggedPage, HTMLPage, FormNotFound
from weboob.browser.filters.standard import CleanText, Field, Regexp, Format, \
CleanDecimal, Map
from weboob.exceptions import BrowserUnavailable
from weboob.tools.capabilities.bank.transactions import FrenchTransaction
from weboob.tools.captcha.virtkeyboard import MappedVirtKeyboard, VirtKeyboardError
class LCLVirtKeyboard(MappedVirtKeyboard):
@ -48,169 +53,140 @@ class LCLVirtKeyboard(MappedVirtKeyboard):
color=(255,255,255,255)
def __init__(self,basepage):
img=basepage.document.find("//img[@id='idImageClavier']")
def __init__(self, basepage):
img=basepage.doc.find("//img[@id='idImageClavier']")
random.seed()
self.url+="%s"%str(long(math.floor(long(random.random()*1000000000000000000000))))
MappedVirtKeyboard.__init__(self,basepage.browser.openurl(self.url),
basepage.document,img,self.color,"id")
self.url += "%s"%str(long(math.floor(long(random.random()*1000000000000000000000))))
super(LCLVirtKeyboard, self).__init__(StringIO(basepage.browser.open(self.url).content), basepage.doc,img,self.color, "id")
self.check_symbols(self.symbols,basepage.browser.responses_dirname)
def get_symbol_code(self,md5sum):
code=MappedVirtKeyboard.get_symbol_code(self,md5sum)
def get_symbol_code(self, md5sum):
code=MappedVirtKeyboard.get_symbol_code(self, md5sum)
return code[-2:]
def get_string_code(self,string):
def get_string_code(self, string):
code=''
for c in string:
code+=self.get_symbol_code(self.symbols[c])
code += self.get_symbol_code(self.symbols[c])
return code
class SkipPage(Page):
pass
class LoginPage(Page):
def on_loaded(self):
class LoginPage(HTMLPage):
def on_load(self):
try:
self.browser.select_form(name='form')
except:
try:
self.browser.select_form(predicate=lambda x: x.attrs.get('id','')=='setInfosCGS')
except:
return
form = self.get_form(xpath='//form[@id="setInfosCGS" or @name="form"]')
except FormNotFound:
return
self.browser.submit(nologin=True)
form.submit()
def myXOR(self,value,seed):
s=''
s = ''
for i in xrange(len(value)):
s+=chr(seed^ord(value[i]))
s += chr(seed^ord(value[i]))
return s
def login(self, login, passwd):
try:
vk=LCLVirtKeyboard(self)
vk = LCLVirtKeyboard(self)
except VirtKeyboardError as err:
error("Error: %s"%err)
self.logger.exception(err)
return False
password=vk.get_string_code(passwd)
password = vk.get_string_code(passwd)
seed=-1
str="var aleatoire = "
for script in self.document.findall("//script"):
if(script.text is None or len(script.text)==0):
seed = -1
s = "var aleatoire = "
for script in self.doc.findall("//script"):
if script.text is None or len(script.text) == 0:
continue
offset=script.text.find(str)
if offset!=-1:
seed=int(script.text[offset+len(str)+1:offset+len(str)+2])
offset = script.text.find(s)
if offset != -1:
seed = int(script.text[offset+len(s)+1:offset+len(s)+2])
break
if seed==-1:
error("Variable 'aleatoire' not found")
return False
raise ParseError("Variable 'aleatoire' not found")
self.browser.select_form(
predicate=lambda x: x.attrs.get('id','')=='formAuthenticate')
self.browser.form.set_all_readonly(False)
self.browser['identifiant'] = login.encode('utf-8')
self.browser['postClavierXor'] = base64.b64encode(self.myXOR(password,seed))
form = self.get_form('//form[@id="formAuthenticate"]')
form['identifiant'] = login
form['postClavierXor'] = base64.b64encode(self.myXOR(password,seed))
try:
self.browser['identifiantRouting'] = self.browser.IDENTIFIANT_ROUTING
form['identifiantRouting'] = self.browser.IDENTIFIANT_ROUTING
except AttributeError:
pass
try:
self.browser.submit(nologin=True)
form.submit()
except BrowserUnavailable:
# Login is not valid
return False
return True
def is_error(self):
errors = self.document.xpath(u'//div[@class="erreur" or @class="messError"]')
errors = self.doc.xpath(u'//div[@class="erreur" or @class="messError"]')
return len(errors) > 0
class ContractsPage(Page):
def on_loaded(self):
class ContractsPage(LoggedPage, HTMLPage):
def on_load(self):
self.select_contract()
def select_contract(self):
# XXX We select automatically the default contract in list. We should let user
# ask what contract he wants to see, or display accounts for all contracts.
self.browser.select_form(nr=0)
self.browser.submit(nologin=True)
form = self.get_form(nr=0)
form.submit()
class AccountsPage(Page):
def on_loaded(self):
warn = self.document.xpath('//div[@id="attTxt"]')
class AccountsPage(LoggedPage, HTMLPage):
def on_load(self):
warn = self.doc.xpath('//div[@id="attTxt"]')
if len(warn) > 0:
raise BrowserUnavailable(warn[0].text)
def get_list(self):
l = []
ids = set()
for a in self.document.getiterator('a'):
link=a.attrib.get('href')
if link is None:
continue
if link.startswith("/outil/UWLM/ListeMouvements"):
account = Account()
#by default the website propose the last 7 days or last 45 days but we can force to have the last 55days
account._link_id=link+"&mode=55"
account._coming_links = []
parameters=link.split("?").pop().split("&")
for parameter in parameters:
list=parameter.split("=")
value=list.pop()
name=list.pop()
if name=="agence":
account.id=value
elif name=="compte":
account.id+=value
elif name=="nature":
# TODO parse this string to get the right Account.TYPE_* to
# store in account.type.
account._type=value
@method
class get_list(ListElement):
item_xpath = '//tr[contains(@onclick, "redirect")]'
flush_at_end = True
if account.id in ids:
continue
class account(ItemElement):
klass = Account
ids.add(account.id)
div = a.getparent().getprevious()
if not div.text.strip():
div = div.find('div')
account.label=u''+div.text.strip()
balance = FrenchTransaction.clean_amount(a.text)
if '-' in balance:
balance='-'+balance.replace('-', '')
account.balance=Decimal(balance)
account.currency = account.get_currency(a.text)
self.logger.debug('%s Type: %s' % (account.label, account._type))
l.append(account)
if link.startswith('/outil/UWCB/UWCBEncours'):
if len(l) == 0:
self.logger.warning('There is a card account but not any check account')
continue
def condition(self):
return '/outil/UWLM/ListeMouvement' in self.el.attrib['onclick']
account = l[-1]
NATURE2TYPE = {'006': Account.TYPE_CHECKING,
'049': Account.TYPE_SAVINGS,
'068': Account.TYPE_MARKET,
'069': Account.TYPE_SAVINGS,
}
coming = FrenchTransaction.clean_amount(a.text)
if '-' in coming:
coming = '-'+coming.replace('-', '')
obj__link_id = Format('%s&mode=55', Regexp(CleanText('./@onclick'), "'(.*)'"))
obj_id = Regexp(Field('_link_id'), r'.*agence=(\w+).*compte=(\w+)', r'\1\2')
obj__coming_links = []
obj_label = CleanText('.//div[@class="libelleCompte"]')
obj_balance = CleanDecimal('.//td[has-class("right")]', replace_dots=True)
obj_currency = FrenchTransaction.Currency('.//td[has-class("right")]')
obj_type = Map(Regexp(Field('_link_id'), r'.*nature=(\w+)'), NATURE2TYPE, default=Account.TYPE_UNKNOWN)
class card(ItemElement):
def condition(self):
return '/outil/UWCB/UWCBEncours' in self.el.attrib['onclick']
def parse(self, el):
link = Regexp(CleanText('./@onclick'), "'(.*)'")(el)
id = Regexp(CleanText('./@onclick'), r'.*AGENCE=(\w+).*COMPTE=(\w+).*CLE=(\w+)', r'\1\2\3')(el)
account = self.parent.objects[id]
if not account.coming:
account.coming = Decimal('0')
account.coming += Decimal(coming)
account.coming += CleanDecimal('.//td[has-class("right")]', replace_dots=True)(el)
account._coming_links.append(link)
return l
raise SkipItem()
class Transaction(FrenchTransaction):
PATTERNS = [(re.compile('^(?P<category>CB) (?P<text>RETRAIT) DU (?P<dd>\d+)/(?P<mm>\d+)'),
PATTERNS = [(re.compile('^(?P<category>CB) (?P<text>RETRAIT) DU (?P<dd>\d+)/(?P<mm>\d+)'),
FrenchTransaction.TYPE_WITHDRAWAL),
(re.compile('^(?P<category>(PRLV|PE)) (?P<text>.*)'),
FrenchTransaction.TYPE_ORDER),
@ -235,103 +211,26 @@ class Transaction(FrenchTransaction):
]
class AccountHistoryPage(Page):
def get_table(self):
tables=self.document.findall("//table[@class='tagTab pyjama']")
for table in tables:
# Look for the relevant table in the Pro version
header=table.getprevious()
while header is not None and str(header.tag) != 'div':
header=header.getprevious()
if header is not None:
header=header.find("div")
if header is not None:
header=header.find("span")
class AccountHistoryPage(LoggedPage, HTMLPage):
@method
class _get_operations(Transaction.TransactionsElement):
item_xpath = '//table[has-class("tagTab") and (not(@style) or @style="")]/tr'
head_xpath = '//table[has-class("tagTab") and (not(@style) or @style="")]/tr/th'
if header is not None and \
header.text.strip().startswith("Opérations effectuées".decode('utf-8')):
return table
col_raw = [u'Vos opérations', u'Libellé']
# Look for the relevant table in the Particulier version
header=table.find("thead").find("tr").find("th[@class='titleTab titleTableft']")
if header is not None and\
header.text.strip().startswith("Solde au"):
return table
class item(Transaction.TransactionElement):
def condition(self):
return self.parent.get_colnum('date') is not None and len(self.el.findall('td')) >= 3
def strip_label(self, s):
return s
def validate(self, obj):
return obj.category != 'RELEVE CB'
def get_operations(self):
table = self.get_table()
operations = []
if table is None:
return operations
for tr in table.iter('tr'):
# skip headers and empty rows
if len(tr.findall("th"))!=0 or\
len(tr.findall("td"))<=1:
continue
mntColumn = 0
date = None
raw = None
credit = ''
debit = ''
for td in tr.iter('td'):
value = td.attrib.get('id')
if value is None:
# if tag has no id nor class, assume it's a label
value = td.attrib.get('class', 'opLib')
if value.startswith("date") or value.endswith('center'):
# some transaction are included in a <strong> tag
date = u''.join([txt.strip() for txt in td.itertext()])
elif value.startswith("lib") or value.startswith("opLib"):
# misclosed A tag requires to grab text from td
tooltip = td.xpath('./div[@class="autoTooltip"]')
if len(tooltip) > 0:
td.remove(tooltip[0])
raw = self.parser.tocleanstring(td)
elif value.startswith("solde") or value.startswith("mnt") or \
value.startswith('debit') or value.startswith('credit'):
mntColumn += 1
amount = u''.join([txt.strip() for txt in td.itertext()])
if amount != "":
if value.startswith("soldeDeb") or value.startswith('debit') or mntColumn==1:
debit = amount
else:
credit = amount
if date is None:
# skip non-transaction
continue
operation = Transaction(len(operations))
operation.parse(date, raw)
operation.set_amount(credit, debit)
if operation.category == 'RELEVE CB':
# strip that transaction which is detailled in CBListPage.
continue
operations.append(operation)
return operations
return self._get_operations()
class CBHistoryPage(AccountHistoryPage):
def get_table(self):
# there is only one table on the page
try:
return self.document.findall("//table[@class='tagTab pyjama']")[0]
except IndexError:
return None
def strip_label(self, label):
# prevent to be considered as a category if there are two spaces.
return re.sub(r'[ ]+', ' ', label).strip()
def get_operations(self):
for tr in AccountHistoryPage.get_operations(self):
tr.type = tr.TYPE_CARD
@ -341,8 +240,8 @@ class CBHistoryPage(AccountHistoryPage):
class CBListPage(CBHistoryPage):
def get_cards(self):
cards = []
for a in self.document.getiterator('a'):
link = a.attrib.get('href', '')
if link.startswith('/outil/UWCB/UWCBEncours') and 'listeOperations' in link:
for tr in self.doc.getiterator('tr'):
link = Regexp(CleanText('./@onclick'), "'(.*)'", default=None)(tr)
if link is not None and link.startswith('/outil/UWCB/UWCBEncours') and 'listeOperations' in link:
cards.append(link)
return cards