add a class FrenchTransaction to factorize common parsing of french banks

This commit is contained in:
Romain Bignon 2012-03-22 16:21:38 +01:00
commit 2675510f32
5 changed files with 188 additions and 112 deletions

View file

@ -19,54 +19,31 @@
import re
from datetime import date
from weboob.tools.browser import BasePage
from weboob.capabilities.bank import Transaction
from weboob.capabilities.base import NotAvailable
from weboob.tools.capabilities.bank.transactions import FrenchTransaction
__all__ = ['AccountHistory', 'AccountComing']
class TransactionsBasePage(BasePage):
LABEL_PATTERNS = [(re.compile(u'^CHEQUEN°(?P<no>.*)'),
Transaction.TYPE_CHECK, u'%(no)s'),
(re.compile('^FACTURE CARTE DU (?P<dd>\d{2})(?P<mm>\d{2})(?P<yy>\d{2}) (?P<text>.*)'),
Transaction.TYPE_CARD, u'20%(yy)s-%(mm)s-%(dd)s: %(text)s'),
(re.compile('^(PRELEVEMENT|TELEREGLEMENT|TIP) (?P<text>.*)'),
Transaction.TYPE_ORDER, '%(text)s'),
(re.compile('^ECHEANCEPRET(?P<text>.*)'),
Transaction.TYPE_LOAN_PAYMENT, u'%(text)s'),
(re.compile('^RETRAIT DAB (?P<dd>\d{2})/(?P<mm>\d{2})/(?P<yy>\d{2}) (?P<HH>\d+)H(?P<MM>\d+) (?P<text>.*)'),
Transaction.TYPE_WITHDRAWAL, u'20%(yy)s-%(mm)s-%(dd)s %(HH)s:%(MM)s: %(text)s'),
(re.compile('^VIR(EMEN)?T (?P<text>.*)'),
Transaction.TYPE_TRANSFER, u'%(text)s'),
(re.compile('^REMBOURST (?P<text>.*)'),
Transaction.TYPE_PAYBACK, '%(text)s'),
(re.compile('^COMMISSIONS (?P<text>.*)'),
Transaction.TYPE_BANK, '%(text)s'),
(re.compile('^(?P<text>REMUNERATION.*)'),
Transaction.TYPE_BANK, '%(text)s'),
(re.compile('^REMISE CHEQUES(?P<text>.*)'),
Transaction.TYPE_DEPOSIT, '%(text)s'),
]
class Transaction(FrenchTransaction):
PATTERNS = [(re.compile(u'^CHEQUE(?P<text>.*)'), FrenchTransaction.TYPE_CHECK),
(re.compile('^FACTURE CARTE DU (?P<dd>\d{2})(?P<mm>\d{2})(?P<yy>\d{2}) (?P<text>.*)'),
FrenchTransaction.TYPE_CARD),
(re.compile('^(PRELEVEMENT|TELEREGLEMENT|TIP) (?P<text>.*)'),
FrenchTransaction.TYPE_ORDER),
(re.compile('^ECHEANCEPRET(?P<text>.*)'), FrenchTransaction.TYPE_LOAN_PAYMENT),
(re.compile('^RETRAIT DAB (?P<dd>\d{2})/(?P<mm>\d{2})/(?P<yy>\d{2}) (?P<HH>\d+)H(?P<MM>\d+) (?P<text>.*)'),
FrenchTransaction.TYPE_WITHDRAWAL),
(re.compile('^VIR(EMEN)?T? (?P<text>.*)'), FrenchTransaction.TYPE_TRANSFER),
(re.compile('^REMBOURST (?P<text>.*)'), FrenchTransaction.TYPE_PAYBACK),
(re.compile('^COMMISSIONS (?P<text>.*)'), FrenchTransaction.TYPE_BANK),
(re.compile('^(?P<text>REMUNERATION.*)'), FrenchTransaction.TYPE_BANK),
(re.compile('^REMISE CHEQUES(?P<text>.*)'), FrenchTransaction.TYPE_DEPOSIT),
]
def parse_text(self, op):
op.category = NotAvailable
if ' ' in op.raw:
op.category, useless, op.label = [part.strip() for part in op.label.partition(' ')]
else:
op.label = op.raw
for pattern, _type, _label in self.LABEL_PATTERNS:
m = pattern.match(op.raw)
if m:
op.type = _type
op.label = (_label % m.groupdict()).strip()
return
class AccountHistory(TransactionsBasePage):
class AccountHistory(BasePage):
def iter_operations(self):
for tr in self.document.xpath('//table[@id="tableCompte"]//tr'):
if len(tr.xpath('td[@class="debit"]')) == 0:
@ -74,21 +51,17 @@ class AccountHistory(TransactionsBasePage):
id = tr.find('td').find('input').attrib['value']
op = Transaction(id)
op.raw = tr.findall('td')[2].text.replace(u'\xa0', u'').strip()
op.date = date(*reversed([int(x) for x in tr.findall('td')[1].text.split('/')]))
op.parse(date=tr.findall('td')[1].text,
raw=tr.findall('td')[2].text.replace(u'\xa0', u''))
self.parse_text(op)
debit = tr.xpath('.//td[@class="debit"]')[0].text
credit = tr.xpath('.//td[@class="credit"]')[0].text
debit = tr.xpath('.//td[@class="debit"]')[0].text.replace('.','').replace(',','.').strip(u' \t\u20ac\xa0\n\r')
credit = tr.xpath('.//td[@class="credit"]')[0].text.replace('.','').replace(',','.').strip(u' \t\u20ac\xa0\n\r')
if len(debit) > 0:
op.amount = - float(debit)
else:
op.amount = float(credit)
op.set_amount(credit, debit)
yield op
class AccountComing(TransactionsBasePage):
class AccountComing(BasePage):
def iter_operations(self):
i = 0
for tr in self.document.xpath('//table[@id="tableauOperations"]//tr'):
@ -96,20 +69,16 @@ class AccountComing(TransactionsBasePage):
tds = tr.findall('td')
if len(tds) != 3:
continue
d = tr.attrib['dateop']
d = date(int(d[4:8]), int(d[2:4]), int(d[0:2]))
text = tds[1].text or u''
text = text.replace(u'\xa0', u'')
for child in tds[1].getchildren():
if child.text: text += child.text
if child.tail: text += child.tail
amount = tds[2].text.replace('.','').replace(',','.').strip(u' \t\u20ac\xa0\n\r')
i += 1
operation = Transaction(i)
operation.date = d
operation.raw = text.strip()
self.parse_text(operation)
operation.amount = float(amount)
operation.parse(date=tr.attrib['dateop'],
raw=text)
operation.set_amount(tds[2].text)
yield operation

View file

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2010-2011 Julien Veyssier
# Copyright(C) 2010-2012 Julien Veyssier
#
# This file is part of weboob.
#
@ -19,12 +19,10 @@
import re
from datetime import date
from weboob.tools.browser import BasePage
from weboob.tools.misc import to_unicode
from weboob.capabilities.bank import Account
from weboob.capabilities.bank import Transaction
from weboob.tools.capabilities.bank.transactions import FrenchTransaction
class LoginPage(BasePage):
def login(self, login, passwd):
@ -53,7 +51,7 @@ class AccountsPage(BasePage):
first_td = tr.getchildren()[0]
if first_td.attrib.get('class', '') == 'i g' or first_td.attrib.get('class', '') == 'p g':
account = Account()
account.label = u"%s"%first_td.find('a').text.strip()
account.label = u"%s"%first_td.find('a').text.strip().lstrip(' 0123456789')
account._link_id = first_td.find('a').get('href', '')
if account._link_id.startswith('POR_SyntheseLst'):
continue
@ -81,19 +79,20 @@ class AccountsPage(BasePage):
""" TODO pouvoir passer à la page des comptes suivante """
return 0
class Transaction(FrenchTransaction):
PATTERNS = [(re.compile('^VIR(EMENT)? (?P<text>.*)'), FrenchTransaction.TYPE_TRANSFER),
(re.compile('^PRLV (?P<text>.*)'), FrenchTransaction.TYPE_ORDER),
(re.compile('^(?P<text>.*) CARTE \d+ PAIEMENT CB (?P<dd>\d{2})(?P<mm>\d{2}) ?(.*)$'),
FrenchTransaction.TYPE_CARD),
(re.compile('^RETRAIT DAB (?P<dd>\d{2})(?P<mm>\d{2}) (?P<text>.*) CARTE \d+'),
FrenchTransaction.TYPE_WITHDRAWAL),
(re.compile('^CHEQUE$'), FrenchTransaction.TYPE_CHECK),
(re.compile('^COTIS\.? (?P<text>.*)'), FrenchTransaction.TYPE_BANK),
(re.compile('^REMISE (?P<text>.*)'), FrenchTransaction.TYPE_DEPOSIT),
]
class OperationsPage(BasePage):
LABEL_PATTERNS = [(re.compile('^VIR(EMENT)? (?P<text>.*)'), Transaction.TYPE_TRANSFER, '%(text)s'),
(re.compile('^PRLV (?P<text>.*)'), Transaction.TYPE_ORDER, '%(text)s'),
(re.compile('^(?P<text>.*) CARTE \d+ PAIEMENT CB (?P<dd>\d{2})(?P<mm>\d{2}) ?(.*)$'),
Transaction.TYPE_CARD, '%(mm)s/%(dd)s: %(text)s'),
(re.compile('^RETRAIT DAB (?P<dd>\d{2})(?P<mm>\d{2}) (?P<text>.*) CARTE \d+'),
Transaction.TYPE_WITHDRAWAL, '%(mm)s/%(dd)s: %(text)s'),
(re.compile('^CHEQUE$'), Transaction.TYPE_CHECK, 'CHEQUE'),
(re.compile('^COTIS\.? (?P<text>.*)'), Transaction.TYPE_BANK, '%(text)s'),
(re.compile('^REMISE (?P<text>.*)'), Transaction.TYPE_DEPOSIT, '%(text)s'),
]
def get_history(self):
index = 0
for tr in self.document.getiterator('tr'):
@ -107,9 +106,6 @@ class OperationsPage(BasePage):
operation = Transaction(index)
index += 1
d = tds[0].text.strip().split('/')
operation.date = date(*reversed([int(x) for x in d]))
# Find different parts of label
parts = []
if len(tds[-3].findall('a')) > 0:
@ -124,15 +120,8 @@ class OperationsPage(BasePage):
if parts[0].startswith('PAIEMENT CB'):
parts.reverse()
operation.raw = to_unicode(re.sub(u'[ ]+', u' ', u' '.join(parts).replace(u'\n', u' ')))
# Categorization
for pattern, _type, _label in self.LABEL_PATTERNS:
mm = pattern.match(operation.raw)
if mm:
operation.type = _type
operation.label = to_unicode(_label % mm.groupdict()).strip()
break
operation.parse(date=tds[0].text,
raw=u' '.join(parts))
if tds[-1].text is not None and len(tds[-1].text) > 2:
s = tds[-1].text.strip()

View file

@ -19,11 +19,11 @@
import re
from datetime import date
from weboob.tools.browser import BasePage
from weboob.capabilities.bank import Account, Transaction
from weboob.capabilities.base import NotAvailable
from weboob.capabilities.bank import Account
from weboob.capabilities import NotAvailable
from weboob.tools.capabilities.bank.transactions import FrenchTransaction
__all__ = ['AccountsListPage']
@ -54,18 +54,19 @@ class AccountsListPage(BasePage):
yield account
class HistoryPage(BasePage):
LABEL_PATTERNS = [(re.compile('^VIR(EMENT)? (?P<text>.*)'), Transaction.TYPE_TRANSFER, '%(text)s'),
(re.compile('^PRLV (?P<text>.*)'), Transaction.TYPE_ORDER, '%(text)s'),
(re.compile('^CB (?P<text>.*)\s+(?P<dd>\d+)/(?P<mm>\d+)\s*(?P<loc>.*)'),
Transaction.TYPE_CARD, '%(mm)s/%(dd)s: %(text)s'),
(re.compile('^DAB (?P<dd>\d{2})/(?P<mm>\d{2}) (?P<text>.*)'),
Transaction.TYPE_WITHDRAWAL, '%(mm)s/%(dd)s: %(text)s'),
(re.compile('^CHEQUE$'), Transaction.TYPE_CHECK, 'CHEQUE'),
(re.compile('^COTIS\.? (?P<text>.*)'), Transaction.TYPE_BANK, '%(text)s'),
(re.compile('^REMISE (?P<text>.*)'), Transaction.TYPE_DEPOSIT, '%(text)s'),
]
class Transaction(FrenchTransaction):
PATTERNS = [(re.compile('^VIR(EMENT)? (?P<text>.*)'), FrenchTransaction.TYPE_TRANSFER),
(re.compile('^PRLV (?P<text>.*)'), FrenchTransaction.TYPE_ORDER),
(re.compile('^CB (?P<text>.*)\s+(?P<dd>\d+)/(?P<mm>\d+)\s*(?P<loc>.*)'),
FrenchTransaction.TYPE_CARD),
(re.compile('^DAB (?P<dd>\d{2})/(?P<mm>\d{2}) (?P<text>.*)'),
FrenchTransaction.TYPE_WITHDRAWAL),
(re.compile('^CHEQUE$'), FrenchTransaction.TYPE_CHECK),
(re.compile('^COTIS\.? (?P<text>.*)'), FrenchTransaction.TYPE_BANK),
(re.compile('^REMISE (?P<text>.*)'), FrenchTransaction.TYPE_DEPOSIT),
]
class HistoryPage(BasePage):
def get_operations(self):
for script in self.document.getiterator('script'):
if script.text is None or script.text.find('\nCL(0') < 0:
@ -73,15 +74,6 @@ class HistoryPage(BasePage):
for m in re.finditer(r"CL\((\d+),'(.+)','(.+)','(.+)','([\d -\.,]+)','([\d -\.,]+)','\d+','\d+','[\w\s]+'\);", script.text, flags=re.MULTILINE):
op = Transaction(m.group(1))
op.raw = m.group(4)
for pattern, _type, _label in self.LABEL_PATTERNS:
mm = pattern.match(op.raw)
if mm:
op.type = _type
op.label = re.sub('[ ]+', ' ', _label % mm.groupdict()).strip()
break
op.amount = float(m.group(5).replace('.','').replace(',','.').replace(' ', '').strip(u' \t\u20ac\xa0\n\r'))
op.date = date(*reversed([int(x) for x in m.group(3).split('/')]))
op.category = NotAvailable
op.parse(date=m.group(3), raw=m.group(4))
op.set_amount(m.group(5))
yield op

View file

@ -0,0 +1,126 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2009-2012 Romain Bignon
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
import re
import datetime
from weboob.capabilities.bank import Transaction
from weboob.capabilities import NotAvailable
from weboob.tools.misc import to_unicode
__all__ = ['FrenchTransaction']
class FrenchTransaction(Transaction):
PATTERNS = []
def clean_amount(self, text):
"""
Clean a string containing an amount.
"""
return text.replace(' ', '').replace('.','') \
.replace(',','.').strip(u' \t\u20ac\xa0\n\r')
def set_amount(self, credit='', debit=''):
"""
Set an amount value from a string.
Can take two strings if there are both credit and debit
columns.
"""
credit = self.clean_amount(credit)
debit = self.clean_amount(debit)
if len(debit) > 0:
self.amount = - float(debit)
else:
self.amount = float(credit)
def parse(self, date, raw):
"""
Parse date and raw strings to create datetime.date objects,
determine the type of transaction, and create a simplified label
When calling this method, you should have defined patterns (in the
PATTERN class attribute) with a list containing tuples of regexp
and the associated type, for example:
PATTERNS = [(re.compile('^VIR(EMENT)? (?P<text>.*)'), FrenchTransaction.TYPE_TRANSFER),
(re.compile('^PRLV (?P<text>.*)'), FrenchTransaction.TYPE_ORDER),
(re.compile('^(?P<text>.*) CARTE \d+ PAIEMENT CB (?P<dd>\d{2})(?P<mm>\d{2}) ?(.*)$'),
FrenchTransaction.TYPE_CARD)
]
In regexps, you can define this patterns:
- text: part of label to store in simplified label
- yy, mm, dd, HH, MM: date and time parts
"""
if not isinstance(date, (datetime.date, datetime.datetime)):
if date.isdigit() and len(date) == 8:
date = datetime.date(int(date[4:8]), int(date[2:4]), int(date[0:2]))
elif '/' in date:
date = datetime.date(*reversed([int(x) for x in date.split('/')]))
self.date = date
self.rdate = date
self.raw = to_unicode(re.sub(u'[ ]+', u' ', raw.replace(u'\n', u' ')).strip())
self.category = NotAvailable
if ' ' in self.raw:
self.category, useless, self.label = [part.strip() for part in self.raw.partition(' ')]
else:
self.label = self.raw
for pattern, _type in self.PATTERNS:
m = pattern.match(self.raw)
if m:
args = m.groupdict()
self.type = _type
if 'text' in args:
self.label = args['text'].strip()
# Set date from information in raw label.
if 'dd' and 'mm' in args:
dd = int(args['dd'])
mm = int(args['mm'])
if 'yy' in args:
yy = int(args['yy'])
else:
d = datetime.date.today()
try:
d = d.replace(month=mm, day=dd)
except ValueError:
d = d.replace(year=d.year-1, month=mm, day=dd)
yy = d.year
if d > datetime.date.today():
yy -= 1
if yy < 100:
yy += 2000
if 'HH' in args and 'MM' in args:
self.rdate = datetime.datetime(yy, mm, dd, int(args['HH']), int(args['MM']))
else:
self.rdate = datetime.date(yy, mm, dd)
return