add a class FrenchTransaction to factorize common parsing of french banks

This commit is contained in:
Romain Bignon 2012-03-22 16:21:38 +01:00
commit 2675510f32
5 changed files with 188 additions and 112 deletions

View file

@ -19,54 +19,31 @@
import re import re
from datetime import date
from weboob.tools.browser import BasePage from weboob.tools.browser import BasePage
from weboob.capabilities.bank import Transaction from weboob.tools.capabilities.bank.transactions import FrenchTransaction
from weboob.capabilities.base import NotAvailable
__all__ = ['AccountHistory', 'AccountComing'] __all__ = ['AccountHistory', 'AccountComing']
class TransactionsBasePage(BasePage): class Transaction(FrenchTransaction):
LABEL_PATTERNS = [(re.compile(u'^CHEQUEN°(?P<no>.*)'), PATTERNS = [(re.compile(u'^CHEQUE(?P<text>.*)'), FrenchTransaction.TYPE_CHECK),
Transaction.TYPE_CHECK, u'%(no)s'), (re.compile('^FACTURE CARTE DU (?P<dd>\d{2})(?P<mm>\d{2})(?P<yy>\d{2}) (?P<text>.*)'),
(re.compile('^FACTURE CARTE DU (?P<dd>\d{2})(?P<mm>\d{2})(?P<yy>\d{2}) (?P<text>.*)'), FrenchTransaction.TYPE_CARD),
Transaction.TYPE_CARD, u'20%(yy)s-%(mm)s-%(dd)s: %(text)s'), (re.compile('^(PRELEVEMENT|TELEREGLEMENT|TIP) (?P<text>.*)'),
(re.compile('^(PRELEVEMENT|TELEREGLEMENT|TIP) (?P<text>.*)'), FrenchTransaction.TYPE_ORDER),
Transaction.TYPE_ORDER, '%(text)s'), (re.compile('^ECHEANCEPRET(?P<text>.*)'), FrenchTransaction.TYPE_LOAN_PAYMENT),
(re.compile('^ECHEANCEPRET(?P<text>.*)'), (re.compile('^RETRAIT DAB (?P<dd>\d{2})/(?P<mm>\d{2})/(?P<yy>\d{2}) (?P<HH>\d+)H(?P<MM>\d+) (?P<text>.*)'),
Transaction.TYPE_LOAN_PAYMENT, u'%(text)s'), FrenchTransaction.TYPE_WITHDRAWAL),
(re.compile('^RETRAIT DAB (?P<dd>\d{2})/(?P<mm>\d{2})/(?P<yy>\d{2}) (?P<HH>\d+)H(?P<MM>\d+) (?P<text>.*)'), (re.compile('^VIR(EMEN)?T? (?P<text>.*)'), FrenchTransaction.TYPE_TRANSFER),
Transaction.TYPE_WITHDRAWAL, u'20%(yy)s-%(mm)s-%(dd)s %(HH)s:%(MM)s: %(text)s'), (re.compile('^REMBOURST (?P<text>.*)'), FrenchTransaction.TYPE_PAYBACK),
(re.compile('^VIR(EMEN)?T (?P<text>.*)'), (re.compile('^COMMISSIONS (?P<text>.*)'), FrenchTransaction.TYPE_BANK),
Transaction.TYPE_TRANSFER, u'%(text)s'), (re.compile('^(?P<text>REMUNERATION.*)'), FrenchTransaction.TYPE_BANK),
(re.compile('^REMBOURST (?P<text>.*)'), (re.compile('^REMISE CHEQUES(?P<text>.*)'), FrenchTransaction.TYPE_DEPOSIT),
Transaction.TYPE_PAYBACK, '%(text)s'), ]
(re.compile('^COMMISSIONS (?P<text>.*)'),
Transaction.TYPE_BANK, '%(text)s'),
(re.compile('^(?P<text>REMUNERATION.*)'),
Transaction.TYPE_BANK, '%(text)s'),
(re.compile('^REMISE CHEQUES(?P<text>.*)'),
Transaction.TYPE_DEPOSIT, '%(text)s'),
]
def parse_text(self, op): class AccountHistory(BasePage):
op.category = NotAvailable
if ' ' in op.raw:
op.category, useless, op.label = [part.strip() for part in op.label.partition(' ')]
else:
op.label = op.raw
for pattern, _type, _label in self.LABEL_PATTERNS:
m = pattern.match(op.raw)
if m:
op.type = _type
op.label = (_label % m.groupdict()).strip()
return
class AccountHistory(TransactionsBasePage):
def iter_operations(self): def iter_operations(self):
for tr in self.document.xpath('//table[@id="tableCompte"]//tr'): for tr in self.document.xpath('//table[@id="tableCompte"]//tr'):
if len(tr.xpath('td[@class="debit"]')) == 0: if len(tr.xpath('td[@class="debit"]')) == 0:
@ -74,21 +51,17 @@ class AccountHistory(TransactionsBasePage):
id = tr.find('td').find('input').attrib['value'] id = tr.find('td').find('input').attrib['value']
op = Transaction(id) op = Transaction(id)
op.raw = tr.findall('td')[2].text.replace(u'\xa0', u'').strip() op.parse(date=tr.findall('td')[1].text,
op.date = date(*reversed([int(x) for x in tr.findall('td')[1].text.split('/')])) raw=tr.findall('td')[2].text.replace(u'\xa0', u''))
self.parse_text(op) debit = tr.xpath('.//td[@class="debit"]')[0].text
credit = tr.xpath('.//td[@class="credit"]')[0].text
debit = tr.xpath('.//td[@class="debit"]')[0].text.replace('.','').replace(',','.').strip(u' \t\u20ac\xa0\n\r') op.set_amount(credit, debit)
credit = tr.xpath('.//td[@class="credit"]')[0].text.replace('.','').replace(',','.').strip(u' \t\u20ac\xa0\n\r')
if len(debit) > 0:
op.amount = - float(debit)
else:
op.amount = float(credit)
yield op yield op
class AccountComing(TransactionsBasePage): class AccountComing(BasePage):
def iter_operations(self): def iter_operations(self):
i = 0 i = 0
for tr in self.document.xpath('//table[@id="tableauOperations"]//tr'): for tr in self.document.xpath('//table[@id="tableauOperations"]//tr'):
@ -96,20 +69,16 @@ class AccountComing(TransactionsBasePage):
tds = tr.findall('td') tds = tr.findall('td')
if len(tds) != 3: if len(tds) != 3:
continue continue
d = tr.attrib['dateop']
d = date(int(d[4:8]), int(d[2:4]), int(d[0:2]))
text = tds[1].text or u'' text = tds[1].text or u''
text = text.replace(u'\xa0', u'') text = text.replace(u'\xa0', u'')
for child in tds[1].getchildren(): for child in tds[1].getchildren():
if child.text: text += child.text if child.text: text += child.text
if child.tail: text += child.tail if child.tail: text += child.tail
amount = tds[2].text.replace('.','').replace(',','.').strip(u' \t\u20ac\xa0\n\r')
i += 1 i += 1
operation = Transaction(i) operation = Transaction(i)
operation.date = d operation.parse(date=tr.attrib['dateop'],
operation.raw = text.strip() raw=text)
self.parse_text(operation) operation.set_amount(tds[2].text)
operation.amount = float(amount)
yield operation yield operation

View file

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright(C) 2010-2011 Julien Veyssier # Copyright(C) 2010-2012 Julien Veyssier
# #
# This file is part of weboob. # This file is part of weboob.
# #
@ -19,12 +19,10 @@
import re import re
from datetime import date
from weboob.tools.browser import BasePage from weboob.tools.browser import BasePage
from weboob.tools.misc import to_unicode
from weboob.capabilities.bank import Account from weboob.capabilities.bank import Account
from weboob.capabilities.bank import Transaction from weboob.tools.capabilities.bank.transactions import FrenchTransaction
class LoginPage(BasePage): class LoginPage(BasePage):
def login(self, login, passwd): def login(self, login, passwd):
@ -53,7 +51,7 @@ class AccountsPage(BasePage):
first_td = tr.getchildren()[0] first_td = tr.getchildren()[0]
if first_td.attrib.get('class', '') == 'i g' or first_td.attrib.get('class', '') == 'p g': if first_td.attrib.get('class', '') == 'i g' or first_td.attrib.get('class', '') == 'p g':
account = Account() account = Account()
account.label = u"%s"%first_td.find('a').text.strip() account.label = u"%s"%first_td.find('a').text.strip().lstrip(' 0123456789')
account._link_id = first_td.find('a').get('href', '') account._link_id = first_td.find('a').get('href', '')
if account._link_id.startswith('POR_SyntheseLst'): if account._link_id.startswith('POR_SyntheseLst'):
continue continue
@ -81,19 +79,20 @@ class AccountsPage(BasePage):
""" TODO pouvoir passer à la page des comptes suivante """ """ TODO pouvoir passer à la page des comptes suivante """
return 0 return 0
class Transaction(FrenchTransaction):
PATTERNS = [(re.compile('^VIR(EMENT)? (?P<text>.*)'), FrenchTransaction.TYPE_TRANSFER),
(re.compile('^PRLV (?P<text>.*)'), FrenchTransaction.TYPE_ORDER),
(re.compile('^(?P<text>.*) CARTE \d+ PAIEMENT CB (?P<dd>\d{2})(?P<mm>\d{2}) ?(.*)$'),
FrenchTransaction.TYPE_CARD),
(re.compile('^RETRAIT DAB (?P<dd>\d{2})(?P<mm>\d{2}) (?P<text>.*) CARTE \d+'),
FrenchTransaction.TYPE_WITHDRAWAL),
(re.compile('^CHEQUE$'), FrenchTransaction.TYPE_CHECK),
(re.compile('^COTIS\.? (?P<text>.*)'), FrenchTransaction.TYPE_BANK),
(re.compile('^REMISE (?P<text>.*)'), FrenchTransaction.TYPE_DEPOSIT),
]
class OperationsPage(BasePage): class OperationsPage(BasePage):
LABEL_PATTERNS = [(re.compile('^VIR(EMENT)? (?P<text>.*)'), Transaction.TYPE_TRANSFER, '%(text)s'),
(re.compile('^PRLV (?P<text>.*)'), Transaction.TYPE_ORDER, '%(text)s'),
(re.compile('^(?P<text>.*) CARTE \d+ PAIEMENT CB (?P<dd>\d{2})(?P<mm>\d{2}) ?(.*)$'),
Transaction.TYPE_CARD, '%(mm)s/%(dd)s: %(text)s'),
(re.compile('^RETRAIT DAB (?P<dd>\d{2})(?P<mm>\d{2}) (?P<text>.*) CARTE \d+'),
Transaction.TYPE_WITHDRAWAL, '%(mm)s/%(dd)s: %(text)s'),
(re.compile('^CHEQUE$'), Transaction.TYPE_CHECK, 'CHEQUE'),
(re.compile('^COTIS\.? (?P<text>.*)'), Transaction.TYPE_BANK, '%(text)s'),
(re.compile('^REMISE (?P<text>.*)'), Transaction.TYPE_DEPOSIT, '%(text)s'),
]
def get_history(self): def get_history(self):
index = 0 index = 0
for tr in self.document.getiterator('tr'): for tr in self.document.getiterator('tr'):
@ -107,9 +106,6 @@ class OperationsPage(BasePage):
operation = Transaction(index) operation = Transaction(index)
index += 1 index += 1
d = tds[0].text.strip().split('/')
operation.date = date(*reversed([int(x) for x in d]))
# Find different parts of label # Find different parts of label
parts = [] parts = []
if len(tds[-3].findall('a')) > 0: if len(tds[-3].findall('a')) > 0:
@ -124,15 +120,8 @@ class OperationsPage(BasePage):
if parts[0].startswith('PAIEMENT CB'): if parts[0].startswith('PAIEMENT CB'):
parts.reverse() parts.reverse()
operation.raw = to_unicode(re.sub(u'[ ]+', u' ', u' '.join(parts).replace(u'\n', u' '))) operation.parse(date=tds[0].text,
raw=u' '.join(parts))
# Categorization
for pattern, _type, _label in self.LABEL_PATTERNS:
mm = pattern.match(operation.raw)
if mm:
operation.type = _type
operation.label = to_unicode(_label % mm.groupdict()).strip()
break
if tds[-1].text is not None and len(tds[-1].text) > 2: if tds[-1].text is not None and len(tds[-1].text) > 2:
s = tds[-1].text.strip() s = tds[-1].text.strip()

View file

@ -19,11 +19,11 @@
import re import re
from datetime import date
from weboob.tools.browser import BasePage from weboob.tools.browser import BasePage
from weboob.capabilities.bank import Account, Transaction from weboob.capabilities.bank import Account
from weboob.capabilities.base import NotAvailable from weboob.capabilities import NotAvailable
from weboob.tools.capabilities.bank.transactions import FrenchTransaction
__all__ = ['AccountsListPage'] __all__ = ['AccountsListPage']
@ -54,18 +54,19 @@ class AccountsListPage(BasePage):
yield account yield account
class HistoryPage(BasePage): class Transaction(FrenchTransaction):
LABEL_PATTERNS = [(re.compile('^VIR(EMENT)? (?P<text>.*)'), Transaction.TYPE_TRANSFER, '%(text)s'), PATTERNS = [(re.compile('^VIR(EMENT)? (?P<text>.*)'), FrenchTransaction.TYPE_TRANSFER),
(re.compile('^PRLV (?P<text>.*)'), Transaction.TYPE_ORDER, '%(text)s'), (re.compile('^PRLV (?P<text>.*)'), FrenchTransaction.TYPE_ORDER),
(re.compile('^CB (?P<text>.*)\s+(?P<dd>\d+)/(?P<mm>\d+)\s*(?P<loc>.*)'), (re.compile('^CB (?P<text>.*)\s+(?P<dd>\d+)/(?P<mm>\d+)\s*(?P<loc>.*)'),
Transaction.TYPE_CARD, '%(mm)s/%(dd)s: %(text)s'), FrenchTransaction.TYPE_CARD),
(re.compile('^DAB (?P<dd>\d{2})/(?P<mm>\d{2}) (?P<text>.*)'), (re.compile('^DAB (?P<dd>\d{2})/(?P<mm>\d{2}) (?P<text>.*)'),
Transaction.TYPE_WITHDRAWAL, '%(mm)s/%(dd)s: %(text)s'), FrenchTransaction.TYPE_WITHDRAWAL),
(re.compile('^CHEQUE$'), Transaction.TYPE_CHECK, 'CHEQUE'), (re.compile('^CHEQUE$'), FrenchTransaction.TYPE_CHECK),
(re.compile('^COTIS\.? (?P<text>.*)'), Transaction.TYPE_BANK, '%(text)s'), (re.compile('^COTIS\.? (?P<text>.*)'), FrenchTransaction.TYPE_BANK),
(re.compile('^REMISE (?P<text>.*)'), Transaction.TYPE_DEPOSIT, '%(text)s'), (re.compile('^REMISE (?P<text>.*)'), FrenchTransaction.TYPE_DEPOSIT),
] ]
class HistoryPage(BasePage):
def get_operations(self): def get_operations(self):
for script in self.document.getiterator('script'): for script in self.document.getiterator('script'):
if script.text is None or script.text.find('\nCL(0') < 0: if script.text is None or script.text.find('\nCL(0') < 0:
@ -73,15 +74,6 @@ class HistoryPage(BasePage):
for m in re.finditer(r"CL\((\d+),'(.+)','(.+)','(.+)','([\d -\.,]+)','([\d -\.,]+)','\d+','\d+','[\w\s]+'\);", script.text, flags=re.MULTILINE): for m in re.finditer(r"CL\((\d+),'(.+)','(.+)','(.+)','([\d -\.,]+)','([\d -\.,]+)','\d+','\d+','[\w\s]+'\);", script.text, flags=re.MULTILINE):
op = Transaction(m.group(1)) op = Transaction(m.group(1))
op.raw = m.group(4) op.parse(date=m.group(3), raw=m.group(4))
for pattern, _type, _label in self.LABEL_PATTERNS: op.set_amount(m.group(5))
mm = pattern.match(op.raw)
if mm:
op.type = _type
op.label = re.sub('[ ]+', ' ', _label % mm.groupdict()).strip()
break
op.amount = float(m.group(5).replace('.','').replace(',','.').replace(' ', '').strip(u' \t\u20ac\xa0\n\r'))
op.date = date(*reversed([int(x) for x in m.group(3).split('/')]))
op.category = NotAvailable
yield op yield op

View file

@ -0,0 +1,126 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2009-2012 Romain Bignon
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
import re
import datetime
from weboob.capabilities.bank import Transaction
from weboob.capabilities import NotAvailable
from weboob.tools.misc import to_unicode
__all__ = ['FrenchTransaction']
class FrenchTransaction(Transaction):
PATTERNS = []
def clean_amount(self, text):
"""
Clean a string containing an amount.
"""
return text.replace(' ', '').replace('.','') \
.replace(',','.').strip(u' \t\u20ac\xa0\n\r')
def set_amount(self, credit='', debit=''):
"""
Set an amount value from a string.
Can take two strings if there are both credit and debit
columns.
"""
credit = self.clean_amount(credit)
debit = self.clean_amount(debit)
if len(debit) > 0:
self.amount = - float(debit)
else:
self.amount = float(credit)
def parse(self, date, raw):
"""
Parse date and raw strings to create datetime.date objects,
determine the type of transaction, and create a simplified label
When calling this method, you should have defined patterns (in the
PATTERN class attribute) with a list containing tuples of regexp
and the associated type, for example:
PATTERNS = [(re.compile('^VIR(EMENT)? (?P<text>.*)'), FrenchTransaction.TYPE_TRANSFER),
(re.compile('^PRLV (?P<text>.*)'), FrenchTransaction.TYPE_ORDER),
(re.compile('^(?P<text>.*) CARTE \d+ PAIEMENT CB (?P<dd>\d{2})(?P<mm>\d{2}) ?(.*)$'),
FrenchTransaction.TYPE_CARD)
]
In regexps, you can define this patterns:
- text: part of label to store in simplified label
- yy, mm, dd, HH, MM: date and time parts
"""
if not isinstance(date, (datetime.date, datetime.datetime)):
if date.isdigit() and len(date) == 8:
date = datetime.date(int(date[4:8]), int(date[2:4]), int(date[0:2]))
elif '/' in date:
date = datetime.date(*reversed([int(x) for x in date.split('/')]))
self.date = date
self.rdate = date
self.raw = to_unicode(re.sub(u'[ ]+', u' ', raw.replace(u'\n', u' ')).strip())
self.category = NotAvailable
if ' ' in self.raw:
self.category, useless, self.label = [part.strip() for part in self.raw.partition(' ')]
else:
self.label = self.raw
for pattern, _type in self.PATTERNS:
m = pattern.match(self.raw)
if m:
args = m.groupdict()
self.type = _type
if 'text' in args:
self.label = args['text'].strip()
# Set date from information in raw label.
if 'dd' and 'mm' in args:
dd = int(args['dd'])
mm = int(args['mm'])
if 'yy' in args:
yy = int(args['yy'])
else:
d = datetime.date.today()
try:
d = d.replace(month=mm, day=dd)
except ValueError:
d = d.replace(year=d.year-1, month=mm, day=dd)
yy = d.year
if d > datetime.date.today():
yy -= 1
if yy < 100:
yy += 2000
if 'HH' in args and 'MM' in args:
self.rdate = datetime.datetime(yy, mm, dd, int(args['HH']), int(args['MM']))
else:
self.rdate = datetime.date(yy, mm, dd)
return