add a class FrenchTransaction to factorize common parsing of french banks
This commit is contained in:
parent
e6d5fd019f
commit
2675510f32
5 changed files with 188 additions and 112 deletions
|
|
@ -19,54 +19,31 @@
|
|||
|
||||
|
||||
import re
|
||||
from datetime import date
|
||||
|
||||
from weboob.tools.browser import BasePage
|
||||
from weboob.capabilities.bank import Transaction
|
||||
from weboob.capabilities.base import NotAvailable
|
||||
from weboob.tools.capabilities.bank.transactions import FrenchTransaction
|
||||
|
||||
|
||||
__all__ = ['AccountHistory', 'AccountComing']
|
||||
|
||||
|
||||
class TransactionsBasePage(BasePage):
|
||||
LABEL_PATTERNS = [(re.compile(u'^CHEQUEN°(?P<no>.*)'),
|
||||
Transaction.TYPE_CHECK, u'N°%(no)s'),
|
||||
(re.compile('^FACTURE CARTE DU (?P<dd>\d{2})(?P<mm>\d{2})(?P<yy>\d{2}) (?P<text>.*)'),
|
||||
Transaction.TYPE_CARD, u'20%(yy)s-%(mm)s-%(dd)s: %(text)s'),
|
||||
(re.compile('^(PRELEVEMENT|TELEREGLEMENT|TIP) (?P<text>.*)'),
|
||||
Transaction.TYPE_ORDER, '%(text)s'),
|
||||
(re.compile('^ECHEANCEPRET(?P<text>.*)'),
|
||||
Transaction.TYPE_LOAN_PAYMENT, u'n°%(text)s'),
|
||||
(re.compile('^RETRAIT DAB (?P<dd>\d{2})/(?P<mm>\d{2})/(?P<yy>\d{2}) (?P<HH>\d+)H(?P<MM>\d+) (?P<text>.*)'),
|
||||
Transaction.TYPE_WITHDRAWAL, u'20%(yy)s-%(mm)s-%(dd)s %(HH)s:%(MM)s: %(text)s'),
|
||||
(re.compile('^VIR(EMEN)?T (?P<text>.*)'),
|
||||
Transaction.TYPE_TRANSFER, u'%(text)s'),
|
||||
(re.compile('^REMBOURST (?P<text>.*)'),
|
||||
Transaction.TYPE_PAYBACK, '%(text)s'),
|
||||
(re.compile('^COMMISSIONS (?P<text>.*)'),
|
||||
Transaction.TYPE_BANK, '%(text)s'),
|
||||
(re.compile('^(?P<text>REMUNERATION.*)'),
|
||||
Transaction.TYPE_BANK, '%(text)s'),
|
||||
(re.compile('^REMISE CHEQUES(?P<text>.*)'),
|
||||
Transaction.TYPE_DEPOSIT, '%(text)s'),
|
||||
]
|
||||
class Transaction(FrenchTransaction):
|
||||
PATTERNS = [(re.compile(u'^CHEQUE(?P<text>.*)'), FrenchTransaction.TYPE_CHECK),
|
||||
(re.compile('^FACTURE CARTE DU (?P<dd>\d{2})(?P<mm>\d{2})(?P<yy>\d{2}) (?P<text>.*)'),
|
||||
FrenchTransaction.TYPE_CARD),
|
||||
(re.compile('^(PRELEVEMENT|TELEREGLEMENT|TIP) (?P<text>.*)'),
|
||||
FrenchTransaction.TYPE_ORDER),
|
||||
(re.compile('^ECHEANCEPRET(?P<text>.*)'), FrenchTransaction.TYPE_LOAN_PAYMENT),
|
||||
(re.compile('^RETRAIT DAB (?P<dd>\d{2})/(?P<mm>\d{2})/(?P<yy>\d{2}) (?P<HH>\d+)H(?P<MM>\d+) (?P<text>.*)'),
|
||||
FrenchTransaction.TYPE_WITHDRAWAL),
|
||||
(re.compile('^VIR(EMEN)?T? (?P<text>.*)'), FrenchTransaction.TYPE_TRANSFER),
|
||||
(re.compile('^REMBOURST (?P<text>.*)'), FrenchTransaction.TYPE_PAYBACK),
|
||||
(re.compile('^COMMISSIONS (?P<text>.*)'), FrenchTransaction.TYPE_BANK),
|
||||
(re.compile('^(?P<text>REMUNERATION.*)'), FrenchTransaction.TYPE_BANK),
|
||||
(re.compile('^REMISE CHEQUES(?P<text>.*)'), FrenchTransaction.TYPE_DEPOSIT),
|
||||
]
|
||||
|
||||
def parse_text(self, op):
|
||||
op.category = NotAvailable
|
||||
if ' ' in op.raw:
|
||||
op.category, useless, op.label = [part.strip() for part in op.label.partition(' ')]
|
||||
else:
|
||||
op.label = op.raw
|
||||
|
||||
for pattern, _type, _label in self.LABEL_PATTERNS:
|
||||
m = pattern.match(op.raw)
|
||||
if m:
|
||||
op.type = _type
|
||||
op.label = (_label % m.groupdict()).strip()
|
||||
return
|
||||
|
||||
class AccountHistory(TransactionsBasePage):
|
||||
class AccountHistory(BasePage):
|
||||
def iter_operations(self):
|
||||
for tr in self.document.xpath('//table[@id="tableCompte"]//tr'):
|
||||
if len(tr.xpath('td[@class="debit"]')) == 0:
|
||||
|
|
@ -74,21 +51,17 @@ class AccountHistory(TransactionsBasePage):
|
|||
|
||||
id = tr.find('td').find('input').attrib['value']
|
||||
op = Transaction(id)
|
||||
op.raw = tr.findall('td')[2].text.replace(u'\xa0', u'').strip()
|
||||
op.date = date(*reversed([int(x) for x in tr.findall('td')[1].text.split('/')]))
|
||||
op.parse(date=tr.findall('td')[1].text,
|
||||
raw=tr.findall('td')[2].text.replace(u'\xa0', u''))
|
||||
|
||||
self.parse_text(op)
|
||||
debit = tr.xpath('.//td[@class="debit"]')[0].text
|
||||
credit = tr.xpath('.//td[@class="credit"]')[0].text
|
||||
|
||||
debit = tr.xpath('.//td[@class="debit"]')[0].text.replace('.','').replace(',','.').strip(u' \t\u20ac\xa0€\n\r')
|
||||
credit = tr.xpath('.//td[@class="credit"]')[0].text.replace('.','').replace(',','.').strip(u' \t\u20ac\xa0€\n\r')
|
||||
if len(debit) > 0:
|
||||
op.amount = - float(debit)
|
||||
else:
|
||||
op.amount = float(credit)
|
||||
op.set_amount(credit, debit)
|
||||
|
||||
yield op
|
||||
|
||||
class AccountComing(TransactionsBasePage):
|
||||
class AccountComing(BasePage):
|
||||
def iter_operations(self):
|
||||
i = 0
|
||||
for tr in self.document.xpath('//table[@id="tableauOperations"]//tr'):
|
||||
|
|
@ -96,20 +69,16 @@ class AccountComing(TransactionsBasePage):
|
|||
tds = tr.findall('td')
|
||||
if len(tds) != 3:
|
||||
continue
|
||||
d = tr.attrib['dateop']
|
||||
d = date(int(d[4:8]), int(d[2:4]), int(d[0:2]))
|
||||
|
||||
text = tds[1].text or u''
|
||||
text = text.replace(u'\xa0', u'')
|
||||
for child in tds[1].getchildren():
|
||||
if child.text: text += child.text
|
||||
if child.tail: text += child.tail
|
||||
|
||||
amount = tds[2].text.replace('.','').replace(',','.').strip(u' \t\u20ac\xa0€\n\r')
|
||||
|
||||
i += 1
|
||||
operation = Transaction(i)
|
||||
operation.date = d
|
||||
operation.raw = text.strip()
|
||||
self.parse_text(operation)
|
||||
operation.amount = float(amount)
|
||||
operation.parse(date=tr.attrib['dateop'],
|
||||
raw=text)
|
||||
operation.set_amount(tds[2].text)
|
||||
yield operation
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright(C) 2010-2011 Julien Veyssier
|
||||
# Copyright(C) 2010-2012 Julien Veyssier
|
||||
#
|
||||
# This file is part of weboob.
|
||||
#
|
||||
|
|
@ -19,12 +19,10 @@
|
|||
|
||||
|
||||
import re
|
||||
from datetime import date
|
||||
|
||||
from weboob.tools.browser import BasePage
|
||||
from weboob.tools.misc import to_unicode
|
||||
from weboob.capabilities.bank import Account
|
||||
from weboob.capabilities.bank import Transaction
|
||||
from weboob.tools.capabilities.bank.transactions import FrenchTransaction
|
||||
|
||||
class LoginPage(BasePage):
|
||||
def login(self, login, passwd):
|
||||
|
|
@ -53,7 +51,7 @@ class AccountsPage(BasePage):
|
|||
first_td = tr.getchildren()[0]
|
||||
if first_td.attrib.get('class', '') == 'i g' or first_td.attrib.get('class', '') == 'p g':
|
||||
account = Account()
|
||||
account.label = u"%s"%first_td.find('a').text.strip()
|
||||
account.label = u"%s"%first_td.find('a').text.strip().lstrip(' 0123456789')
|
||||
account._link_id = first_td.find('a').get('href', '')
|
||||
if account._link_id.startswith('POR_SyntheseLst'):
|
||||
continue
|
||||
|
|
@ -81,19 +79,20 @@ class AccountsPage(BasePage):
|
|||
""" TODO pouvoir passer à la page des comptes suivante """
|
||||
return 0
|
||||
|
||||
class Transaction(FrenchTransaction):
|
||||
PATTERNS = [(re.compile('^VIR(EMENT)? (?P<text>.*)'), FrenchTransaction.TYPE_TRANSFER),
|
||||
(re.compile('^PRLV (?P<text>.*)'), FrenchTransaction.TYPE_ORDER),
|
||||
(re.compile('^(?P<text>.*) CARTE \d+ PAIEMENT CB (?P<dd>\d{2})(?P<mm>\d{2}) ?(.*)$'),
|
||||
FrenchTransaction.TYPE_CARD),
|
||||
(re.compile('^RETRAIT DAB (?P<dd>\d{2})(?P<mm>\d{2}) (?P<text>.*) CARTE \d+'),
|
||||
FrenchTransaction.TYPE_WITHDRAWAL),
|
||||
(re.compile('^CHEQUE$'), FrenchTransaction.TYPE_CHECK),
|
||||
(re.compile('^COTIS\.? (?P<text>.*)'), FrenchTransaction.TYPE_BANK),
|
||||
(re.compile('^REMISE (?P<text>.*)'), FrenchTransaction.TYPE_DEPOSIT),
|
||||
]
|
||||
|
||||
|
||||
class OperationsPage(BasePage):
|
||||
LABEL_PATTERNS = [(re.compile('^VIR(EMENT)? (?P<text>.*)'), Transaction.TYPE_TRANSFER, '%(text)s'),
|
||||
(re.compile('^PRLV (?P<text>.*)'), Transaction.TYPE_ORDER, '%(text)s'),
|
||||
(re.compile('^(?P<text>.*) CARTE \d+ PAIEMENT CB (?P<dd>\d{2})(?P<mm>\d{2}) ?(.*)$'),
|
||||
Transaction.TYPE_CARD, '%(mm)s/%(dd)s: %(text)s'),
|
||||
(re.compile('^RETRAIT DAB (?P<dd>\d{2})(?P<mm>\d{2}) (?P<text>.*) CARTE \d+'),
|
||||
Transaction.TYPE_WITHDRAWAL, '%(mm)s/%(dd)s: %(text)s'),
|
||||
(re.compile('^CHEQUE$'), Transaction.TYPE_CHECK, 'CHEQUE'),
|
||||
(re.compile('^COTIS\.? (?P<text>.*)'), Transaction.TYPE_BANK, '%(text)s'),
|
||||
(re.compile('^REMISE (?P<text>.*)'), Transaction.TYPE_DEPOSIT, '%(text)s'),
|
||||
]
|
||||
|
||||
|
||||
def get_history(self):
|
||||
index = 0
|
||||
for tr in self.document.getiterator('tr'):
|
||||
|
|
@ -107,9 +106,6 @@ class OperationsPage(BasePage):
|
|||
operation = Transaction(index)
|
||||
index += 1
|
||||
|
||||
d = tds[0].text.strip().split('/')
|
||||
operation.date = date(*reversed([int(x) for x in d]))
|
||||
|
||||
# Find different parts of label
|
||||
parts = []
|
||||
if len(tds[-3].findall('a')) > 0:
|
||||
|
|
@ -124,15 +120,8 @@ class OperationsPage(BasePage):
|
|||
if parts[0].startswith('PAIEMENT CB'):
|
||||
parts.reverse()
|
||||
|
||||
operation.raw = to_unicode(re.sub(u'[ ]+', u' ', u' '.join(parts).replace(u'\n', u' ')))
|
||||
|
||||
# Categorization
|
||||
for pattern, _type, _label in self.LABEL_PATTERNS:
|
||||
mm = pattern.match(operation.raw)
|
||||
if mm:
|
||||
operation.type = _type
|
||||
operation.label = to_unicode(_label % mm.groupdict()).strip()
|
||||
break
|
||||
operation.parse(date=tds[0].text,
|
||||
raw=u' '.join(parts))
|
||||
|
||||
if tds[-1].text is not None and len(tds[-1].text) > 2:
|
||||
s = tds[-1].text.strip()
|
||||
|
|
|
|||
|
|
@ -19,11 +19,11 @@
|
|||
|
||||
|
||||
import re
|
||||
from datetime import date
|
||||
|
||||
from weboob.tools.browser import BasePage
|
||||
from weboob.capabilities.bank import Account, Transaction
|
||||
from weboob.capabilities.base import NotAvailable
|
||||
from weboob.capabilities.bank import Account
|
||||
from weboob.capabilities import NotAvailable
|
||||
from weboob.tools.capabilities.bank.transactions import FrenchTransaction
|
||||
|
||||
|
||||
__all__ = ['AccountsListPage']
|
||||
|
|
@ -54,18 +54,19 @@ class AccountsListPage(BasePage):
|
|||
|
||||
yield account
|
||||
|
||||
class HistoryPage(BasePage):
|
||||
LABEL_PATTERNS = [(re.compile('^VIR(EMENT)? (?P<text>.*)'), Transaction.TYPE_TRANSFER, '%(text)s'),
|
||||
(re.compile('^PRLV (?P<text>.*)'), Transaction.TYPE_ORDER, '%(text)s'),
|
||||
(re.compile('^CB (?P<text>.*)\s+(?P<dd>\d+)/(?P<mm>\d+)\s*(?P<loc>.*)'),
|
||||
Transaction.TYPE_CARD, '%(mm)s/%(dd)s: %(text)s'),
|
||||
(re.compile('^DAB (?P<dd>\d{2})/(?P<mm>\d{2}) (?P<text>.*)'),
|
||||
Transaction.TYPE_WITHDRAWAL, '%(mm)s/%(dd)s: %(text)s'),
|
||||
(re.compile('^CHEQUE$'), Transaction.TYPE_CHECK, 'CHEQUE'),
|
||||
(re.compile('^COTIS\.? (?P<text>.*)'), Transaction.TYPE_BANK, '%(text)s'),
|
||||
(re.compile('^REMISE (?P<text>.*)'), Transaction.TYPE_DEPOSIT, '%(text)s'),
|
||||
]
|
||||
class Transaction(FrenchTransaction):
|
||||
PATTERNS = [(re.compile('^VIR(EMENT)? (?P<text>.*)'), FrenchTransaction.TYPE_TRANSFER),
|
||||
(re.compile('^PRLV (?P<text>.*)'), FrenchTransaction.TYPE_ORDER),
|
||||
(re.compile('^CB (?P<text>.*)\s+(?P<dd>\d+)/(?P<mm>\d+)\s*(?P<loc>.*)'),
|
||||
FrenchTransaction.TYPE_CARD),
|
||||
(re.compile('^DAB (?P<dd>\d{2})/(?P<mm>\d{2}) (?P<text>.*)'),
|
||||
FrenchTransaction.TYPE_WITHDRAWAL),
|
||||
(re.compile('^CHEQUE$'), FrenchTransaction.TYPE_CHECK),
|
||||
(re.compile('^COTIS\.? (?P<text>.*)'), FrenchTransaction.TYPE_BANK),
|
||||
(re.compile('^REMISE (?P<text>.*)'), FrenchTransaction.TYPE_DEPOSIT),
|
||||
]
|
||||
|
||||
class HistoryPage(BasePage):
|
||||
def get_operations(self):
|
||||
for script in self.document.getiterator('script'):
|
||||
if script.text is None or script.text.find('\nCL(0') < 0:
|
||||
|
|
@ -73,15 +74,6 @@ class HistoryPage(BasePage):
|
|||
|
||||
for m in re.finditer(r"CL\((\d+),'(.+)','(.+)','(.+)','([\d -\.,]+)','([\d -\.,]+)','\d+','\d+','[\w\s]+'\);", script.text, flags=re.MULTILINE):
|
||||
op = Transaction(m.group(1))
|
||||
op.raw = m.group(4)
|
||||
for pattern, _type, _label in self.LABEL_PATTERNS:
|
||||
mm = pattern.match(op.raw)
|
||||
if mm:
|
||||
op.type = _type
|
||||
op.label = re.sub('[ ]+', ' ', _label % mm.groupdict()).strip()
|
||||
break
|
||||
|
||||
op.amount = float(m.group(5).replace('.','').replace(',','.').replace(' ', '').strip(u' \t\u20ac\xa0€\n\r'))
|
||||
op.date = date(*reversed([int(x) for x in m.group(3).split('/')]))
|
||||
op.category = NotAvailable
|
||||
op.parse(date=m.group(3), raw=m.group(4))
|
||||
op.set_amount(m.group(5))
|
||||
yield op
|
||||
|
|
|
|||
0
weboob/tools/capabilities/bank/__init__.py
Normal file
0
weboob/tools/capabilities/bank/__init__.py
Normal file
126
weboob/tools/capabilities/bank/transactions.py
Normal file
126
weboob/tools/capabilities/bank/transactions.py
Normal file
|
|
@ -0,0 +1,126 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright(C) 2009-2012 Romain Bignon
|
||||
#
|
||||
# This file is part of weboob.
|
||||
#
|
||||
# weboob is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# weboob is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Affero General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
|
||||
import re
|
||||
import datetime
|
||||
|
||||
from weboob.capabilities.bank import Transaction
|
||||
from weboob.capabilities import NotAvailable
|
||||
from weboob.tools.misc import to_unicode
|
||||
|
||||
|
||||
__all__ = ['FrenchTransaction']
|
||||
|
||||
|
||||
class FrenchTransaction(Transaction):
|
||||
PATTERNS = []
|
||||
|
||||
def clean_amount(self, text):
|
||||
"""
|
||||
Clean a string containing an amount.
|
||||
"""
|
||||
return text.replace(' ', '').replace('.','') \
|
||||
.replace(',','.').strip(u' \t\u20ac\xa0€\n\r')
|
||||
|
||||
def set_amount(self, credit='', debit=''):
|
||||
"""
|
||||
Set an amount value from a string.
|
||||
|
||||
Can take two strings if there are both credit and debit
|
||||
columns.
|
||||
"""
|
||||
credit = self.clean_amount(credit)
|
||||
debit = self.clean_amount(debit)
|
||||
|
||||
if len(debit) > 0:
|
||||
self.amount = - float(debit)
|
||||
else:
|
||||
self.amount = float(credit)
|
||||
|
||||
def parse(self, date, raw):
|
||||
"""
|
||||
Parse date and raw strings to create datetime.date objects,
|
||||
determine the type of transaction, and create a simplified label
|
||||
|
||||
When calling this method, you should have defined patterns (in the
|
||||
PATTERN class attribute) with a list containing tuples of regexp
|
||||
and the associated type, for example:
|
||||
|
||||
PATTERNS = [(re.compile('^VIR(EMENT)? (?P<text>.*)'), FrenchTransaction.TYPE_TRANSFER),
|
||||
(re.compile('^PRLV (?P<text>.*)'), FrenchTransaction.TYPE_ORDER),
|
||||
(re.compile('^(?P<text>.*) CARTE \d+ PAIEMENT CB (?P<dd>\d{2})(?P<mm>\d{2}) ?(.*)$'),
|
||||
FrenchTransaction.TYPE_CARD)
|
||||
]
|
||||
|
||||
In regexps, you can define this patterns:
|
||||
- text: part of label to store in simplified label
|
||||
- yy, mm, dd, HH, MM: date and time parts
|
||||
"""
|
||||
if not isinstance(date, (datetime.date, datetime.datetime)):
|
||||
if date.isdigit() and len(date) == 8:
|
||||
date = datetime.date(int(date[4:8]), int(date[2:4]), int(date[0:2]))
|
||||
elif '/' in date:
|
||||
date = datetime.date(*reversed([int(x) for x in date.split('/')]))
|
||||
|
||||
self.date = date
|
||||
self.rdate = date
|
||||
self.raw = to_unicode(re.sub(u'[ ]+', u' ', raw.replace(u'\n', u' ')).strip())
|
||||
self.category = NotAvailable
|
||||
|
||||
if ' ' in self.raw:
|
||||
self.category, useless, self.label = [part.strip() for part in self.raw.partition(' ')]
|
||||
else:
|
||||
self.label = self.raw
|
||||
|
||||
for pattern, _type in self.PATTERNS:
|
||||
m = pattern.match(self.raw)
|
||||
if m:
|
||||
args = m.groupdict()
|
||||
self.type = _type
|
||||
if 'text' in args:
|
||||
self.label = args['text'].strip()
|
||||
|
||||
# Set date from information in raw label.
|
||||
if 'dd' and 'mm' in args:
|
||||
dd = int(args['dd'])
|
||||
mm = int(args['mm'])
|
||||
|
||||
if 'yy' in args:
|
||||
yy = int(args['yy'])
|
||||
else:
|
||||
d = datetime.date.today()
|
||||
try:
|
||||
d = d.replace(month=mm, day=dd)
|
||||
except ValueError:
|
||||
d = d.replace(year=d.year-1, month=mm, day=dd)
|
||||
|
||||
yy = d.year
|
||||
if d > datetime.date.today():
|
||||
yy -= 1
|
||||
|
||||
if yy < 100:
|
||||
yy += 2000
|
||||
|
||||
if 'HH' in args and 'MM' in args:
|
||||
self.rdate = datetime.datetime(yy, mm, dd, int(args['HH']), int(args['MM']))
|
||||
else:
|
||||
self.rdate = datetime.date(yy, mm, dd)
|
||||
|
||||
return
|
||||
Loading…
Add table
Add a link
Reference in a new issue