lcl: website change, rewrite with browser2

This commit is contained in:
Romain Bignon 2014-12-03 23:22:29 +01:00
commit 446bb3416c
4 changed files with 157 additions and 320 deletions

View file

@ -20,15 +20,20 @@
import re
import base64
from decimal import Decimal
from logging import error
import math
import random
from cStringIO import StringIO
from weboob.capabilities.bank import Account
from weboob.deprecated.browser import Page, BrowserUnavailable
from weboob.tools.captcha.virtkeyboard import MappedVirtKeyboard, VirtKeyboardError
from weboob.browser.elements import method, ListElement, ItemElement, SkipItem
from weboob.exceptions import ParseError
from weboob.browser.pages import LoggedPage, HTMLPage, FormNotFound
from weboob.browser.filters.standard import CleanText, Field, Regexp, Format, \
CleanDecimal, Map
from weboob.exceptions import BrowserUnavailable
from weboob.tools.capabilities.bank.transactions import FrenchTransaction
from weboob.tools.captcha.virtkeyboard import MappedVirtKeyboard, VirtKeyboardError
class LCLVirtKeyboard(MappedVirtKeyboard):
@ -48,169 +53,140 @@ class LCLVirtKeyboard(MappedVirtKeyboard):
color=(255,255,255,255)
def __init__(self,basepage):
img=basepage.document.find("//img[@id='idImageClavier']")
def __init__(self, basepage):
img=basepage.doc.find("//img[@id='idImageClavier']")
random.seed()
self.url+="%s"%str(long(math.floor(long(random.random()*1000000000000000000000))))
MappedVirtKeyboard.__init__(self,basepage.browser.openurl(self.url),
basepage.document,img,self.color,"id")
self.url += "%s"%str(long(math.floor(long(random.random()*1000000000000000000000))))
super(LCLVirtKeyboard, self).__init__(StringIO(basepage.browser.open(self.url).content), basepage.doc,img,self.color, "id")
self.check_symbols(self.symbols,basepage.browser.responses_dirname)
def get_symbol_code(self,md5sum):
code=MappedVirtKeyboard.get_symbol_code(self,md5sum)
def get_symbol_code(self, md5sum):
code=MappedVirtKeyboard.get_symbol_code(self, md5sum)
return code[-2:]
def get_string_code(self,string):
def get_string_code(self, string):
code=''
for c in string:
code+=self.get_symbol_code(self.symbols[c])
code += self.get_symbol_code(self.symbols[c])
return code
class SkipPage(Page):
pass
class LoginPage(Page):
def on_loaded(self):
class LoginPage(HTMLPage):
def on_load(self):
try:
self.browser.select_form(name='form')
except:
try:
self.browser.select_form(predicate=lambda x: x.attrs.get('id','')=='setInfosCGS')
except:
return
form = self.get_form(xpath='//form[@id="setInfosCGS" or @name="form"]')
except FormNotFound:
return
self.browser.submit(nologin=True)
form.submit()
def myXOR(self,value,seed):
s=''
s = ''
for i in xrange(len(value)):
s+=chr(seed^ord(value[i]))
s += chr(seed^ord(value[i]))
return s
def login(self, login, passwd):
try:
vk=LCLVirtKeyboard(self)
vk = LCLVirtKeyboard(self)
except VirtKeyboardError as err:
error("Error: %s"%err)
self.logger.exception(err)
return False
password=vk.get_string_code(passwd)
password = vk.get_string_code(passwd)
seed=-1
str="var aleatoire = "
for script in self.document.findall("//script"):
if(script.text is None or len(script.text)==0):
seed = -1
s = "var aleatoire = "
for script in self.doc.findall("//script"):
if script.text is None or len(script.text) == 0:
continue
offset=script.text.find(str)
if offset!=-1:
seed=int(script.text[offset+len(str)+1:offset+len(str)+2])
offset = script.text.find(s)
if offset != -1:
seed = int(script.text[offset+len(s)+1:offset+len(s)+2])
break
if seed==-1:
error("Variable 'aleatoire' not found")
return False
raise ParseError("Variable 'aleatoire' not found")
self.browser.select_form(
predicate=lambda x: x.attrs.get('id','')=='formAuthenticate')
self.browser.form.set_all_readonly(False)
self.browser['identifiant'] = login.encode('utf-8')
self.browser['postClavierXor'] = base64.b64encode(self.myXOR(password,seed))
form = self.get_form('//form[@id="formAuthenticate"]')
form['identifiant'] = login
form['postClavierXor'] = base64.b64encode(self.myXOR(password,seed))
try:
self.browser['identifiantRouting'] = self.browser.IDENTIFIANT_ROUTING
form['identifiantRouting'] = self.browser.IDENTIFIANT_ROUTING
except AttributeError:
pass
try:
self.browser.submit(nologin=True)
form.submit()
except BrowserUnavailable:
# Login is not valid
return False
return True
def is_error(self):
errors = self.document.xpath(u'//div[@class="erreur" or @class="messError"]')
errors = self.doc.xpath(u'//div[@class="erreur" or @class="messError"]')
return len(errors) > 0
class ContractsPage(Page):
def on_loaded(self):
class ContractsPage(LoggedPage, HTMLPage):
def on_load(self):
self.select_contract()
def select_contract(self):
# XXX We select automatically the default contract in list. We should let user
# ask what contract he wants to see, or display accounts for all contracts.
self.browser.select_form(nr=0)
self.browser.submit(nologin=True)
form = self.get_form(nr=0)
form.submit()
class AccountsPage(Page):
def on_loaded(self):
warn = self.document.xpath('//div[@id="attTxt"]')
class AccountsPage(LoggedPage, HTMLPage):
def on_load(self):
warn = self.doc.xpath('//div[@id="attTxt"]')
if len(warn) > 0:
raise BrowserUnavailable(warn[0].text)
def get_list(self):
l = []
ids = set()
for a in self.document.getiterator('a'):
link=a.attrib.get('href')
if link is None:
continue
if link.startswith("/outil/UWLM/ListeMouvements"):
account = Account()
#by default the website propose the last 7 days or last 45 days but we can force to have the last 55days
account._link_id=link+"&mode=55"
account._coming_links = []
parameters=link.split("?").pop().split("&")
for parameter in parameters:
list=parameter.split("=")
value=list.pop()
name=list.pop()
if name=="agence":
account.id=value
elif name=="compte":
account.id+=value
elif name=="nature":
# TODO parse this string to get the right Account.TYPE_* to
# store in account.type.
account._type=value
@method
class get_list(ListElement):
item_xpath = '//tr[contains(@onclick, "redirect")]'
flush_at_end = True
if account.id in ids:
continue
class account(ItemElement):
klass = Account
ids.add(account.id)
div = a.getparent().getprevious()
if not div.text.strip():
div = div.find('div')
account.label=u''+div.text.strip()
balance = FrenchTransaction.clean_amount(a.text)
if '-' in balance:
balance='-'+balance.replace('-', '')
account.balance=Decimal(balance)
account.currency = account.get_currency(a.text)
self.logger.debug('%s Type: %s' % (account.label, account._type))
l.append(account)
if link.startswith('/outil/UWCB/UWCBEncours'):
if len(l) == 0:
self.logger.warning('There is a card account but not any check account')
continue
def condition(self):
return '/outil/UWLM/ListeMouvement' in self.el.attrib['onclick']
account = l[-1]
NATURE2TYPE = {'006': Account.TYPE_CHECKING,
'049': Account.TYPE_SAVINGS,
'068': Account.TYPE_MARKET,
'069': Account.TYPE_SAVINGS,
}
coming = FrenchTransaction.clean_amount(a.text)
if '-' in coming:
coming = '-'+coming.replace('-', '')
obj__link_id = Format('%s&mode=55', Regexp(CleanText('./@onclick'), "'(.*)'"))
obj_id = Regexp(Field('_link_id'), r'.*agence=(\w+).*compte=(\w+)', r'\1\2')
obj__coming_links = []
obj_label = CleanText('.//div[@class="libelleCompte"]')
obj_balance = CleanDecimal('.//td[has-class("right")]', replace_dots=True)
obj_currency = FrenchTransaction.Currency('.//td[has-class("right")]')
obj_type = Map(Regexp(Field('_link_id'), r'.*nature=(\w+)'), NATURE2TYPE, default=Account.TYPE_UNKNOWN)
class card(ItemElement):
def condition(self):
return '/outil/UWCB/UWCBEncours' in self.el.attrib['onclick']
def parse(self, el):
link = Regexp(CleanText('./@onclick'), "'(.*)'")(el)
id = Regexp(CleanText('./@onclick'), r'.*AGENCE=(\w+).*COMPTE=(\w+).*CLE=(\w+)', r'\1\2\3')(el)
account = self.parent.objects[id]
if not account.coming:
account.coming = Decimal('0')
account.coming += Decimal(coming)
account.coming += CleanDecimal('.//td[has-class("right")]', replace_dots=True)(el)
account._coming_links.append(link)
return l
raise SkipItem()
class Transaction(FrenchTransaction):
PATTERNS = [(re.compile('^(?P<category>CB) (?P<text>RETRAIT) DU (?P<dd>\d+)/(?P<mm>\d+)'),
PATTERNS = [(re.compile('^(?P<category>CB) (?P<text>RETRAIT) DU (?P<dd>\d+)/(?P<mm>\d+)'),
FrenchTransaction.TYPE_WITHDRAWAL),
(re.compile('^(?P<category>(PRLV|PE)) (?P<text>.*)'),
FrenchTransaction.TYPE_ORDER),
@ -235,103 +211,26 @@ class Transaction(FrenchTransaction):
]
class AccountHistoryPage(Page):
def get_table(self):
tables=self.document.findall("//table[@class='tagTab pyjama']")
for table in tables:
# Look for the relevant table in the Pro version
header=table.getprevious()
while header is not None and str(header.tag) != 'div':
header=header.getprevious()
if header is not None:
header=header.find("div")
if header is not None:
header=header.find("span")
class AccountHistoryPage(LoggedPage, HTMLPage):
@method
class _get_operations(Transaction.TransactionsElement):
item_xpath = '//table[has-class("tagTab") and (not(@style) or @style="")]/tr'
head_xpath = '//table[has-class("tagTab") and (not(@style) or @style="")]/tr/th'
if header is not None and \
header.text.strip().startswith("Opérations effectuées".decode('utf-8')):
return table
col_raw = [u'Vos opérations', u'Libellé']
# Look for the relevant table in the Particulier version
header=table.find("thead").find("tr").find("th[@class='titleTab titleTableft']")
if header is not None and\
header.text.strip().startswith("Solde au"):
return table
class item(Transaction.TransactionElement):
def condition(self):
return self.parent.get_colnum('date') is not None and len(self.el.findall('td')) >= 3
def strip_label(self, s):
return s
def validate(self, obj):
return obj.category != 'RELEVE CB'
def get_operations(self):
table = self.get_table()
operations = []
if table is None:
return operations
for tr in table.iter('tr'):
# skip headers and empty rows
if len(tr.findall("th"))!=0 or\
len(tr.findall("td"))<=1:
continue
mntColumn = 0
date = None
raw = None
credit = ''
debit = ''
for td in tr.iter('td'):
value = td.attrib.get('id')
if value is None:
# if tag has no id nor class, assume it's a label
value = td.attrib.get('class', 'opLib')
if value.startswith("date") or value.endswith('center'):
# some transaction are included in a <strong> tag
date = u''.join([txt.strip() for txt in td.itertext()])
elif value.startswith("lib") or value.startswith("opLib"):
# misclosed A tag requires to grab text from td
tooltip = td.xpath('./div[@class="autoTooltip"]')
if len(tooltip) > 0:
td.remove(tooltip[0])
raw = self.parser.tocleanstring(td)
elif value.startswith("solde") or value.startswith("mnt") or \
value.startswith('debit') or value.startswith('credit'):
mntColumn += 1
amount = u''.join([txt.strip() for txt in td.itertext()])
if amount != "":
if value.startswith("soldeDeb") or value.startswith('debit') or mntColumn==1:
debit = amount
else:
credit = amount
if date is None:
# skip non-transaction
continue
operation = Transaction(len(operations))
operation.parse(date, raw)
operation.set_amount(credit, debit)
if operation.category == 'RELEVE CB':
# strip that transaction which is detailled in CBListPage.
continue
operations.append(operation)
return operations
return self._get_operations()
class CBHistoryPage(AccountHistoryPage):
def get_table(self):
# there is only one table on the page
try:
return self.document.findall("//table[@class='tagTab pyjama']")[0]
except IndexError:
return None
def strip_label(self, label):
# prevent to be considered as a category if there are two spaces.
return re.sub(r'[ ]+', ' ', label).strip()
def get_operations(self):
for tr in AccountHistoryPage.get_operations(self):
tr.type = tr.TYPE_CARD
@ -341,8 +240,8 @@ class CBHistoryPage(AccountHistoryPage):
class CBListPage(CBHistoryPage):
def get_cards(self):
cards = []
for a in self.document.getiterator('a'):
link = a.attrib.get('href', '')
if link.startswith('/outil/UWCB/UWCBEncours') and 'listeOperations' in link:
for tr in self.doc.getiterator('tr'):
link = Regexp(CleanText('./@onclick'), "'(.*)'", default=None)(tr)
if link is not None and link.startswith('/outil/UWCB/UWCBEncours') and 'listeOperations' in link:
cards.append(link)
return cards