upgrade to browser2

This commit is contained in:
Romain Bignon 2014-03-09 15:44:18 +01:00
commit 1b2d3cfe48
3 changed files with 233 additions and 275 deletions

View file

@ -18,56 +18,50 @@
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
import urllib
from urlparse import urlparse, parse_qs
from decimal import Decimal
import re
from dateutil.relativedelta import relativedelta
from weboob.tools.browser import BasePage, BrowserIncorrectPassword, BrokenPageError
from weboob.tools.ordereddict import OrderedDict
from weboob.tools.browser2.page import HTMLPage, method, ListElement, ItemElement, SkipItem, FormNotFound, TableElement
from weboob.tools.browser2.filters import Filter, Env, CleanText, CleanDecimal, Link, TableCell
from weboob.tools.browser import BrowserIncorrectPassword
from weboob.capabilities import NotAvailable
from weboob.capabilities.bank import Account
from weboob.tools.capabilities.bank.transactions import FrenchTransaction
from weboob.tools.date import parse_french_date
class LoginPage(BasePage):
class LoggedPage(object):
logged = True
class LoginPage(HTMLPage):
def login(self, login, passwd):
self.browser.select_form(nr=0)
self.browser['_cm_user'] = login.encode(self.browser.ENCODING)
self.browser['_cm_pwd'] = passwd.encode(self.browser.ENCODING)
self.browser.submit(nologin=True)
form = self.get_form(nr=0)
form['_cm_user'] = login
form['_cm_pwd'] = passwd
form.submit()
class LoginErrorPage(BasePage):
class LoginErrorPage(HTMLPage):
pass
class ChangePasswordPage(BasePage):
def on_loaded(self):
class ChangePasswordPage(LoggedPage, HTMLPage):
def on_load(self):
raise BrowserIncorrectPassword('Please change your password')
class VerifCodePage(BasePage):
def on_loaded(self):
class VerifCodePage(LoggedPage, HTMLPage):
def on_load(self):
raise BrowserIncorrectPassword('Unable to login: website asks a code from a card')
class InfoPage(BasePage):
class TransfertPage(LoggedPage, HTMLPage):
pass
class EmptyPage(BasePage):
pass
class TransfertPage(BasePage):
pass
class UserSpacePage(BasePage):
pass
class AccountsPage(BasePage):
class AccountsPage(LoggedPage, HTMLPage):
TYPES = {'C/C': Account.TYPE_CHECKING,
'Livret': Account.TYPE_SAVINGS,
'Pret': Account.TYPE_LOAN,
@ -76,55 +70,68 @@ class AccountsPage(BasePage):
'Compte Epargne': Account.TYPE_SAVINGS,
}
def get_list(self):
accounts = OrderedDict()
@method
class iter_accounts(ListElement):
item_xpath = '//tr'
flush_at_end = True
for tr in self.document.getiterator('tr'):
first_td = tr.getchildren()[0]
if (first_td.attrib.get('class', '') == 'i g' or first_td.attrib.get('class', '') == 'p g') \
and first_td.find('a') is not None:
class item(ItemElement):
klass = Account
a = first_td.find('a')
link = a.get('href', '')
def __filter__(self, el):
if len(el.xpath('./td')) < 2:
return False
first_td = el.xpath('./td')[0]
return ((first_td.attrib.get('class', '') == 'i g' or first_td.attrib.get('class', '') == 'p g')
and first_td.find('a') is not None)
class Label(Filter):
def filter(self, text):
return text.lstrip(' 0123456789').title()
obj_id = Env('id')
obj_label = Label(CleanText('./td[1]/a'))
obj_balance = CleanDecimal('./td[2] | ./td[3]')
obj_coming = Env('coming')
obj_balance = Env('balance')
obj_currency = FrenchTransaction.Currency('./td[2] | ./td[3]')
obj__link_id = Link('./td[1]/a')
obj__card_links = []
def obj_type(self):
for pattern, actype in AccountsPage.TYPES.iteritems():
if self.obj.label.startswith(pattern):
return actype
def parse(self, el):
link = el.xpath('./td[1]/a')[0].get('href', '')
if link.startswith('POR_SyntheseLst'):
continue
raise SkipItem()
url = urlparse(link)
p = parse_qs(url.query)
if not 'rib' in p:
continue
for i in (2,1):
balance = FrenchTransaction.clean_amount(tr.getchildren()[i].text)
currency = Account.get_currency(tr.getchildren()[i].text)
if len(balance) > 0:
break
balance = Decimal(balance)
raise SkipItem()
balance = CleanDecimal('./td[2] | ./td[3]')(self)
id = p['rib'][0]
if id in accounts:
account = accounts[id]
# Handle cards
if id in self.parent.objects:
account = self.parent.objects[id]
if not account.coming:
account.coming = Decimal('0.0')
account.coming += balance
account._card_links.append(link)
continue
raise SkipItem()
account = Account()
account.id = id
account.label = unicode(a.text).strip().lstrip(' 0123456789').title()
self.env['id'] = id
for pattern, actype in self.TYPES.iteritems():
if account.label.startswith(pattern):
account.type = actype
account._link_id = link
account._card_links = []
# Find accounting amount
page = self.browser.get_document(self.browser.openurl(link))
coming = self.find_amount(page, u"Opérations à venir")
accounting = self.find_amount(page, u"Solde comptable")
# Handle real balances
page = self.page.browser.open(link)
coming = page.find_amount(u"Opérations à venir")
accounting = page.find_amount(u"Solde comptable")
if accounting is not None and accounting + (coming or Decimal('0')) != balance:
self.logger.warning('%s + %s != %s' % (accounting, coming, balance))
@ -132,22 +139,8 @@ class AccountsPage(BasePage):
if accounting is not None:
balance = accounting
if coming is not None:
account.coming = coming
account.balance = balance
account.currency = currency
accounts[account.id] = account
return accounts.itervalues()
def find_amount(self, page, title):
try:
td = page.xpath(u'//th[contains(text(), "%s")]/../td' % title)[0]
except IndexError:
return None
else:
return Decimal(FrenchTransaction.clean_amount(td.text))
self.env['balance'] = balance
self.env['coming'] = coming or NotAvailable
class Transaction(FrenchTransaction):
@ -165,144 +158,130 @@ class Transaction(FrenchTransaction):
_is_coming = False
class OperationsPage(BasePage):
def get_history(self):
index = 0
for tr in self.document.getiterator('tr'):
# columns can be:
# - date | value | operation | debit | credit | contre-valeur
# - date | value | operation | debit | credit
# - date | operation | debit | credit
# That's why we skip any extra columns, and take operation, debit
# and credit from last instead of first indexes.
tds = tr.getchildren()[:5]
if len(tds) < 4:
continue
class Pagination(object):
def next_page(self):
try:
form = self.page.get_form('//form[@id="paginationForm"]')
except FormNotFound:
return
if tds[0].attrib.get('class', '') == 'i g' or \
tds[0].attrib.get('class', '') == 'p g' or \
tds[0].attrib.get('class', '').endswith('_c1 c _c1'):
operation = Transaction(index)
index += 1
parts = [txt.strip() for txt in tds[-3].itertext() if len(txt.strip()) > 0]
# To simplify categorization of CB, reverse order of parts to separate
# location and institution.
if parts[0].startswith('PAIEMENT CB'):
parts.reverse()
date = tds[0].text
vdate = tds[1].text if len(tds) >= 5 else None
raw = u' '.join(parts)
operation.parse(date=date, vdate=vdate, raw=raw)
credit = self.parser.tocleanstring(tds[-1])
debit = self.parser.tocleanstring(tds[-2])
operation.set_amount(credit, debit)
yield operation
def go_next(self):
form = self.document.xpath('//form[@id="paginationForm"]')
if len(form) == 0:
return False
form = form[0]
text = self.parser.tocleanstring(form)
text = CleanText.clean(form.el)
m = re.search(u'(\d+) / (\d+)', text or '', flags=re.MULTILINE)
if not m:
return False
return
cur = int(m.group(1))
last = int(m.group(2))
if cur == last:
return False
return
inputs = {}
for elm in form.xpath('.//input[@type="input"]'):
key = elm.attrib['name']
value = elm.attrib['value']
inputs[key] = value
form['page'] = str(cur + 1)
return form.request
inputs['page'] = str(cur + 1)
self.browser.location(form.attrib['action'], urllib.urlencode(inputs))
class OperationsPage(LoggedPage, HTMLPage):
@method
class get_history(Pagination, TableElement):
head_xpath = '//table[@class="liste"]//thead//tr/th'
item_xpath = '//table[@class="liste"]//tbody/tr'
return True
columns = {'date': u'Date',
'vdate': u'Valeur',
'raw': u'Opération',
'debit': u'Débit',
'credit': u'Crédit',
}
class item(ItemElement):
klass = Transaction
__filter__ = lambda el: len(el.xpath('./td')) >= 4 and len(el.xpath('./td[@class="i g" or @class="p g" or contains(@class, "_c1 c _c1")]')) > 0
class OwnRaw(Filter):
def __call__(self, item):
parts = [txt.strip() for txt in item.el.xpath('./td[last()-2]')[0].itertext() if len(txt.strip()) > 0]
# To simplify categorization of CB, reverse order of parts to separate
# location and institution.
if parts[0].startswith('PAIEMENT CB'):
parts.reverse()
return u' '.join(parts)
obj_raw = Transaction.Raw(OwnRaw())
obj_date = Transaction.Date(TableCell('date'))
obj_vdate = Transaction.Date(TableCell('vdate', 'date'))
obj_amount = Transaction.Amount(TableCell('credit'), TableCell('debit'))
def find_amount(self, title):
try:
td = self.doc.xpath(u'//th[contains(text(), "%s")]/../td' % title)[0]
except IndexError:
return None
else:
return Decimal(FrenchTransaction.clean_amount(td.text))
def get_coming_link(self):
try:
a = self.parser.select(self.document, u'//a[contains(text(), "Opérations à venir")]', 1, 'xpath')
except BrokenPageError:
a = self.doc.xpath(u'//a[contains(text(), "Opérations à venir")]')[0]
except IndexError:
return None
else:
return a.attrib['href']
class ComingPage(OperationsPage):
def get_history(self):
index = 0
for tr in self.document.xpath('//table[@class="liste"]/tbody/tr'):
tds = tr.findall('td')
if len(tds) < 3:
continue
class ComingPage(OperationsPage, LoggedPage):
@method
class get_history(Pagination, ListElement):
item_xpath = '//table[@class="liste"]/tbody/tr'
tr = Transaction(index)
class item(ItemElement):
klass = Transaction
__filter__ = lambda el: len(el.xpath('./td')) >= 3
date = self.parser.tocleanstring(tds[0])
raw = self.parser.tocleanstring(tds[1])
amount = self.parser.tocleanstring(tds[-1])
tr.parse(date=date, raw=raw)
tr.set_amount(amount)
tr._is_coming = True
yield tr
obj_date = Transaction.Date('./td[1]')
obj_raw = Transaction.Raw('./td[2]')
obj_amount = Transaction.Amount('./td[last()]')
obj__is_coming = True
class CardPage(OperationsPage):
def get_history(self):
index = 0
class CardPage(OperationsPage, LoggedPage):
@method
class get_history(Pagination, ListElement):
class list_cards(ListElement):
item_xpath = '//table[@class="liste"]/tbody/tr/td/a'
# Check if this is a multi-cards page
pages = []
for a in self.document.xpath('//table[@class="liste"]/tbody/tr/td/a'):
card_link = a.get('href')
history_url = 'https://%s/%s/fr/banque/%s' % (self.browser.DOMAIN, self.browser.currentSubBank, card_link)
page = self.browser.get_document(self.browser.openurl(history_url))
pages.append(page)
class item(ItemElement):
def __iter__(self):
card_link = self.el.get('href')
history_url = '%s/%s/fr/banque/%s' % (self.browser.BASEURL, self.browser.currentSubBank, card_link)
page = self.browser.location(history_url)
if len(pages) == 0:
# If not, add this page as transactions list
pages.append(self.document)
for op in page.get_history():
yield op
for page in pages:
label = self.parser.tocleanstring(self.parser.select(page.getroot(), 'div.lister p.c', 1))
label = re.findall('(\d+ [^ ]+ \d+)', label)[-1]
# use the trick of relativedelta to get the last day of month.
debit_date = parse_french_date(label) + relativedelta(day=31)
class list_history(ListElement):
item_xpath = '//table[@class="liste"]/tbody/tr'
for tr in page.xpath('//table[@class="liste"]/tbody/tr'):
tds = tr.findall('td')[:4]
if len(tds) < 4:
continue
def parse(self, el):
label = CleanText('//div[contains(@class, "lister")]//p[@class="c"]')(el)
label = re.findall('(\d+ [^ ]+ \d+)', label)[-1]
# use the trick of relativedelta to get the last day of month.
self.env['debit_date'] = parse_french_date(label) + relativedelta(day=31)
tr = Transaction(index)
class item(ItemElement):
klass = Transaction
__filter__ = lambda el: len(el.xpath('./td')) >= 4
parts = [txt.strip() for txt in list(tds[-3].itertext()) + list(tds[-2].itertext()) if len(txt.strip()) > 0]
obj_raw = Transaction.Raw('./td[last()-2] | ./td[last()-1]')
obj_type = Transaction.TYPE_CARD
obj_date = Env('debit_date')
obj_rdate = Transaction.Date('./td[1]')
obj_vdate = Transaction.Date('./td[1]')
obj_amount = Transaction.Amount('./td[last()]')
tr.parse(date=tds[0].text.strip(' \xa0'),
raw=u' '.join(parts))
tr.date = debit_date
tr.type = tr.TYPE_CARD
# Don't take all of the content (with tocleanstring for example),
# because there is a span.aide.
tr.set_amount(tds[-1].text)
yield tr
class NoOperationsPage(OperationsPage):
class NoOperationsPage(OperationsPage, LoggedPage):
def get_history(self):
return iter([])