weboob-devel/modules/bnporc/enterprise/pages.py
2015-02-21 14:23:10 +01:00

273 lines
11 KiB
Python

# -*- coding: utf-8 -*-
# Copyright(C) 2013 Laurent Bachelier
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from decimal import Decimal
import hashlib
from urlparse import parse_qs
from datetime import datetime
import re
from weboob.capabilities.bank import Account
from weboob.deprecated.browser import Page, BrokenPageError
from weboob.tools.capabilities.bank.transactions import FrenchTransaction
from weboob.tools.captcha.virtkeyboard import MappedVirtKeyboard, VirtKeyboardError
from weboob.tools.misc import to_unicode
class Transaction(FrenchTransaction):
PATTERNS = [(re.compile(u'^(?P<category>CHEQUE)(?P<text>.*)'), FrenchTransaction.TYPE_CHECK),
(re.compile('^(?P<category>FACTURE CARTE) DU (?P<dd>\d{2})(?P<mm>\d{2})(?P<yy>\d{2}) (?P<text>.*?)( CA?R?T?E? ?\d*X*\d*)?$'),
FrenchTransaction.TYPE_CARD),
(re.compile('^(?P<category>(PRELEVEMENT|TELEREGLEMENT|TIP)) (?P<text>.*)'),
FrenchTransaction.TYPE_ORDER),
(re.compile('^(?P<category>ECHEANCEPRET)(?P<text>.*)'), FrenchTransaction.TYPE_LOAN_PAYMENT),
(re.compile('^(?P<category>RETRAIT DAB) (?P<dd>\d{2})/(?P<mm>\d{2})/(?P<yy>\d{2})( (?P<HH>\d+)H(?P<MM>\d+))? (?P<text>.*)'),
FrenchTransaction.TYPE_WITHDRAWAL),
(re.compile('^(?P<category>VIR(EMEN)?T? ((RECU|FAVEUR) TIERS|SEPA RECU)?)( /FRM)?(?P<text>.*)'),
FrenchTransaction.TYPE_TRANSFER),
(re.compile('^(?P<category>REMBOURST) CB DU (?P<dd>\d{2})(?P<mm>\d{2})(?P<yy>\d{2}) (?P<text>.*)'),
FrenchTransaction.TYPE_PAYBACK),
(re.compile('^(?P<category>REMBOURST)(?P<text>.*)'), FrenchTransaction.TYPE_PAYBACK),
(re.compile('^(?P<category>COMMISSIONS)(?P<text>.*)'), FrenchTransaction.TYPE_BANK),
(re.compile('^(?P<text>(?P<category>REMUNERATION).*)'), FrenchTransaction.TYPE_BANK),
(re.compile('^(?P<category>REMISE CHEQUES)(?P<text>.*)'), FrenchTransaction.TYPE_DEPOSIT),
]
class BEPage(Page):
def get_error(self):
for title in self.document.xpath('/html/head/title'):
if 'erreur' in title.text or 'error' in title.text:
return self.parser.select(self.document.getroot(),
'//input[@name="titre_page"]', 1, 'xpath').value
class BNPVirtKeyboard(MappedVirtKeyboard):
symbols = {'0': '97a2b5816f2db74851fe05afd17dc9fe',
'1': '0a24fe3a35efeb0a89aa5e7b098e6842',
'2': '65ff550debf85eacf8efaadd6cd80aa5',
'3': '2bd67143fcd4207ac14d0ea8afdf4ebb',
'4': 'a46bfd21636805a31a579b253c3b23d5',
'5': '3f644894037255bc0feaba9abb1facfa',
'6': '40d91064a749563fa4dd31fb52e880f0',
'7': 'cd3af65da74d57df1e6a91ca946c09b7',
'8': '85b718e032a02e887c757a7745a1f0bd',
'9': 'c2cdc08c8c68855d83c0899d7e8c6719',
'-1': 'd41d8cd98f00b204e9800998ecf8427e',
}
color = 45
def __init__(self, basepage):
img = basepage.document.find("//img[@usemap='#MapGril']")
imgdata = basepage.browser.openurl(img.attrib['src'])
MappedVirtKeyboard.__init__(self, imgdata, basepage.document, img, self.color)
self.check_symbols(self.symbols, basepage.browser.responses_dirname)
def get_symbol_coords(self, coords):
# strip borders
x1, y1, x2, y2 = coords
return MappedVirtKeyboard.get_symbol_coords(self, (x1+6, y1+1, x2-6, y2-4))
def get_symbol_code(self, md5sum):
code = MappedVirtKeyboard.get_symbol_code(self, md5sum)
code = code.split("'")[1]
assert code.isdigit()
return code
def get_string_code(self, string):
code = ''
for c in string:
code += self.get_symbol_code(self.symbols[c])
return code
def checksum(self, coords):
"""Copy of parent checksum(), but cropping (removes empty lines)"""
x1, y1, x2, y2 = coords
s = ''
for y in range(y1, min(y2 + 1, self.height)):
for x in range(x1, min(x2 + 1, self.width)):
if self.check_color(self.pixar[x, y]):
s += " "
else:
s += "O"
s += "\n"
s = '\n'.join([l for l in s.splitlines() if l.strip()])
return hashlib.md5(s).hexdigest()
class LoginPage(BEPage):
def login(self, login, password):
try:
vk = BNPVirtKeyboard(self)
except VirtKeyboardError as err:
self.logger.error("Error: %s" % err)
return False
self.browser.select_form(name='ident')
self.browser.set_all_readonly(False)
self.browser['ch1'] = login.encode('iso-8859-1')
self.browser['chgrille'] = vk.get_string_code(password)
self.browser.submit()
class AccountsPage(BEPage):
def find_table(self):
for table in self.parser.select(self.document.getroot(), 'table', 'many'):
for td in self.parser.select(table, 'tr td'):
if td.text and td.text.strip().startswith('COMPTES '):
return table
def get_list(self):
table = self.find_table()
for tr in self.parser.select(table, 'tr', 'many'):
tds = self.parser.select(tr, 'td')
if len(tds) != 6:
continue
tdlabel, tdid, tdcur, tdupdated, tdbal, tdbalcur = tds
account = Account()
account.label = to_unicode(tdlabel.text_content().strip())
# this is important - and is also the last part of the id (considering spaces)
# we can't use only the link as it does not goes where we want
try:
link = self.parser.select(tdlabel, 'a', 1)
except BrokenPageError:
# probably an account we can't display the history
account._link_id = None
else:
account._link_id = parse_qs(link.attrib['href'])['ch4'][0]
account.id = to_unicode(tdid.text.strip().replace(' ', ''))
account.iban = 'FR76' + account.id
# just in case we are showing the converted balances
account._main_currency = Account.get_currency(tdcur.text)
# we have to ignore those accounts, because using NotAvailable
# makes boobank and probably many others crash
if tdbal.text_content().strip() == 'indisponible':
continue
account.balance = Decimal(Transaction.clean_amount(tdbal.text_content()))
account.currency = Account.get_currency(tdbalcur.text)
account._updated = datetime.strptime(tdupdated.text, '%d/%m/%Y')
yield account
class HistoryPage(BEPage):
def is_empty(self):
for td in self.parser.select(self.document.getroot(), 'td.V11vertGras'):
if u'Aucune opération enregistrée' in to_unicode(td.text_content()):
return True
return False
def find_table(self):
for table in self.parser.select(self.document.getroot(), 'table', 'many'):
for td in self.parser.select(table, 'tr td'):
if re.search('^OP.RATION', td.text_content().strip()):
return table
def get_date_range(self):
try:
radio = self.parser.select(self.document, '//input[@name="br_tout_date"]', 1, 'xpath')
except BrokenPageError:
input = self.document.xpath('//input[@name="chB"]')[0]
d1, d2 = re.findall('(\d+/\d+/\d+)', input.tail)
else:
d1 = radio.attrib['value'][0:10]
d2 = radio.attrib['value'][10:20]
return (d1, d2)
TXT2CONST = {u'DATE VALEUR': 'vdate',
u'DATE D\'OPE': 'date',
u'OP.RATION': 'label',
u'D.BIT': 'debit',
u'CR.DIT': 'credit',
}
def iter_history(self):
if self.is_empty():
return
columns = {'date': 0, 'vdate': 1, 'label': 2, 'debit': 3, 'credit': 4}
table = self.find_table()
for i, tr in enumerate(self.parser.select(table, 'tr', 'many')):
tds = self.parser.select(tr, 'td')
if len(tds) != 5:
continue
if self.parser.select(tr, 'td.thtitrefondbleu'):
for i, td in enumerate(tds):
txt = self.parser.tocleanstring(td)
for part, const in self.TXT2CONST.iteritems():
if re.search(part, txt):
columns[const] = i
break
continue
tddate = self.parser.tocleanstring(tds[columns['date']])
tdval = self.parser.tocleanstring(tds[columns['vdate']])
tdlabel = self.parser.tocleanstring(tds[columns['label']])
tddebit = self.parser.tocleanstring(tds[columns['debit']])
tdcredit = self.parser.tocleanstring(tds[columns['credit']])
if all((tddate, tdlabel, any((tddebit, tdcredit)))):
if tddebit:
tdamount = '- %s' % tddebit
else:
tdamount = tdcredit
t = Transaction(i)
t.set_amount(tdamount)
t.parse(tddate, tdlabel, tdval)
t._coming = (tds[0].find('span') is not None)
yield t
def get_next_numpage(self):
current = 1
m = re.search('chP=(\d+)', self.url)
if m:
current = int(m.group(1))
try:
pages = self.parser.tocleanstring(self.document.xpath('.//td[contains(text(), "Page")]')[0])
except IndexError:
# No pagination
return None
# We get list of all page numbers...
pages = sorted(map(int, re.findall('(\d+)', pages)))
try:
# ...find position of the current page...
curidx = pages.index(current)
except ValueError:
self.logger.warning('Unable to find the current page (%d)' % current)
return None
try:
# ... and return number of the next page
return pages[curidx+1]
except IndexError:
# Last page
return None
class UnknownPage(BEPage):
pass