diff --git a/modules/bnporc/pages/transactions.py b/modules/bnporc/pages/transactions.py index ffc98828..944c7e2b 100644 --- a/modules/bnporc/pages/transactions.py +++ b/modules/bnporc/pages/transactions.py @@ -19,54 +19,31 @@ import re -from datetime import date from weboob.tools.browser import BasePage -from weboob.capabilities.bank import Transaction -from weboob.capabilities.base import NotAvailable +from weboob.tools.capabilities.bank.transactions import FrenchTransaction __all__ = ['AccountHistory', 'AccountComing'] -class TransactionsBasePage(BasePage): - LABEL_PATTERNS = [(re.compile(u'^CHEQUEN°(?P.*)'), - Transaction.TYPE_CHECK, u'N°%(no)s'), - (re.compile('^FACTURE CARTE DU (?P
\d{2})(?P\d{2})(?P\d{2}) (?P.*)'), - Transaction.TYPE_CARD, u'20%(yy)s-%(mm)s-%(dd)s: %(text)s'), - (re.compile('^(PRELEVEMENT|TELEREGLEMENT|TIP) (?P.*)'), - Transaction.TYPE_ORDER, '%(text)s'), - (re.compile('^ECHEANCEPRET(?P.*)'), - Transaction.TYPE_LOAN_PAYMENT, u'n°%(text)s'), - (re.compile('^RETRAIT DAB (?P
\d{2})/(?P\d{2})/(?P\d{2}) (?P\d+)H(?P\d+) (?P.*)'), - Transaction.TYPE_WITHDRAWAL, u'20%(yy)s-%(mm)s-%(dd)s %(HH)s:%(MM)s: %(text)s'), - (re.compile('^VIR(EMEN)?T (?P.*)'), - Transaction.TYPE_TRANSFER, u'%(text)s'), - (re.compile('^REMBOURST (?P.*)'), - Transaction.TYPE_PAYBACK, '%(text)s'), - (re.compile('^COMMISSIONS (?P.*)'), - Transaction.TYPE_BANK, '%(text)s'), - (re.compile('^(?PREMUNERATION.*)'), - Transaction.TYPE_BANK, '%(text)s'), - (re.compile('^REMISE CHEQUES(?P.*)'), - Transaction.TYPE_DEPOSIT, '%(text)s'), - ] +class Transaction(FrenchTransaction): + PATTERNS = [(re.compile(u'^CHEQUE(?P.*)'), FrenchTransaction.TYPE_CHECK), + (re.compile('^FACTURE CARTE DU (?P
\d{2})(?P\d{2})(?P\d{2}) (?P.*)'), + FrenchTransaction.TYPE_CARD), + (re.compile('^(PRELEVEMENT|TELEREGLEMENT|TIP) (?P.*)'), + FrenchTransaction.TYPE_ORDER), + (re.compile('^ECHEANCEPRET(?P.*)'), FrenchTransaction.TYPE_LOAN_PAYMENT), + (re.compile('^RETRAIT DAB (?P
\d{2})/(?P\d{2})/(?P\d{2}) (?P\d+)H(?P\d+) (?P.*)'), + FrenchTransaction.TYPE_WITHDRAWAL), + (re.compile('^VIR(EMEN)?T? (?P.*)'), FrenchTransaction.TYPE_TRANSFER), + (re.compile('^REMBOURST (?P.*)'), FrenchTransaction.TYPE_PAYBACK), + (re.compile('^COMMISSIONS (?P.*)'), FrenchTransaction.TYPE_BANK), + (re.compile('^(?PREMUNERATION.*)'), FrenchTransaction.TYPE_BANK), + (re.compile('^REMISE CHEQUES(?P.*)'), FrenchTransaction.TYPE_DEPOSIT), + ] - def parse_text(self, op): - op.category = NotAvailable - if ' ' in op.raw: - op.category, useless, op.label = [part.strip() for part in op.label.partition(' ')] - else: - op.label = op.raw - - for pattern, _type, _label in self.LABEL_PATTERNS: - m = pattern.match(op.raw) - if m: - op.type = _type - op.label = (_label % m.groupdict()).strip() - return - -class AccountHistory(TransactionsBasePage): +class AccountHistory(BasePage): def iter_operations(self): for tr in self.document.xpath('//table[@id="tableCompte"]//tr'): if len(tr.xpath('td[@class="debit"]')) == 0: @@ -74,21 +51,17 @@ class AccountHistory(TransactionsBasePage): id = tr.find('td').find('input').attrib['value'] op = Transaction(id) - op.raw = tr.findall('td')[2].text.replace(u'\xa0', u'').strip() - op.date = date(*reversed([int(x) for x in tr.findall('td')[1].text.split('/')])) + op.parse(date=tr.findall('td')[1].text, + raw=tr.findall('td')[2].text.replace(u'\xa0', u'')) - self.parse_text(op) + debit = tr.xpath('.//td[@class="debit"]')[0].text + credit = tr.xpath('.//td[@class="credit"]')[0].text - debit = tr.xpath('.//td[@class="debit"]')[0].text.replace('.','').replace(',','.').strip(u' \t\u20ac\xa0€\n\r') - credit = tr.xpath('.//td[@class="credit"]')[0].text.replace('.','').replace(',','.').strip(u' \t\u20ac\xa0€\n\r') - if len(debit) > 0: - op.amount = - float(debit) - else: - op.amount = float(credit) + op.set_amount(credit, debit) yield op -class AccountComing(TransactionsBasePage): +class AccountComing(BasePage): def iter_operations(self): i = 0 for tr in self.document.xpath('//table[@id="tableauOperations"]//tr'): @@ -96,20 +69,16 @@ class AccountComing(TransactionsBasePage): tds = tr.findall('td') if len(tds) != 3: continue - d = tr.attrib['dateop'] - d = date(int(d[4:8]), int(d[2:4]), int(d[0:2])) + text = tds[1].text or u'' text = text.replace(u'\xa0', u'') for child in tds[1].getchildren(): if child.text: text += child.text if child.tail: text += child.tail - amount = tds[2].text.replace('.','').replace(',','.').strip(u' \t\u20ac\xa0€\n\r') - i += 1 operation = Transaction(i) - operation.date = d - operation.raw = text.strip() - self.parse_text(operation) - operation.amount = float(amount) + operation.parse(date=tr.attrib['dateop'], + raw=text) + operation.set_amount(tds[2].text) yield operation diff --git a/modules/creditmutuel/pages.py b/modules/creditmutuel/pages.py index b952f9d3..d3f99b56 100644 --- a/modules/creditmutuel/pages.py +++ b/modules/creditmutuel/pages.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright(C) 2010-2011 Julien Veyssier +# Copyright(C) 2010-2012 Julien Veyssier # # This file is part of weboob. # @@ -19,12 +19,10 @@ import re -from datetime import date from weboob.tools.browser import BasePage -from weboob.tools.misc import to_unicode from weboob.capabilities.bank import Account -from weboob.capabilities.bank import Transaction +from weboob.tools.capabilities.bank.transactions import FrenchTransaction class LoginPage(BasePage): def login(self, login, passwd): @@ -53,7 +51,7 @@ class AccountsPage(BasePage): first_td = tr.getchildren()[0] if first_td.attrib.get('class', '') == 'i g' or first_td.attrib.get('class', '') == 'p g': account = Account() - account.label = u"%s"%first_td.find('a').text.strip() + account.label = u"%s"%first_td.find('a').text.strip().lstrip(' 0123456789') account._link_id = first_td.find('a').get('href', '') if account._link_id.startswith('POR_SyntheseLst'): continue @@ -81,19 +79,20 @@ class AccountsPage(BasePage): """ TODO pouvoir passer à la page des comptes suivante """ return 0 +class Transaction(FrenchTransaction): + PATTERNS = [(re.compile('^VIR(EMENT)? (?P.*)'), FrenchTransaction.TYPE_TRANSFER), + (re.compile('^PRLV (?P.*)'), FrenchTransaction.TYPE_ORDER), + (re.compile('^(?P.*) CARTE \d+ PAIEMENT CB (?P
\d{2})(?P\d{2}) ?(.*)$'), + FrenchTransaction.TYPE_CARD), + (re.compile('^RETRAIT DAB (?P
\d{2})(?P\d{2}) (?P.*) CARTE \d+'), + FrenchTransaction.TYPE_WITHDRAWAL), + (re.compile('^CHEQUE$'), FrenchTransaction.TYPE_CHECK), + (re.compile('^COTIS\.? (?P.*)'), FrenchTransaction.TYPE_BANK), + (re.compile('^REMISE (?P.*)'), FrenchTransaction.TYPE_DEPOSIT), + ] + + class OperationsPage(BasePage): - LABEL_PATTERNS = [(re.compile('^VIR(EMENT)? (?P.*)'), Transaction.TYPE_TRANSFER, '%(text)s'), - (re.compile('^PRLV (?P.*)'), Transaction.TYPE_ORDER, '%(text)s'), - (re.compile('^(?P.*) CARTE \d+ PAIEMENT CB (?P
\d{2})(?P\d{2}) ?(.*)$'), - Transaction.TYPE_CARD, '%(mm)s/%(dd)s: %(text)s'), - (re.compile('^RETRAIT DAB (?P
\d{2})(?P\d{2}) (?P.*) CARTE \d+'), - Transaction.TYPE_WITHDRAWAL, '%(mm)s/%(dd)s: %(text)s'), - (re.compile('^CHEQUE$'), Transaction.TYPE_CHECK, 'CHEQUE'), - (re.compile('^COTIS\.? (?P.*)'), Transaction.TYPE_BANK, '%(text)s'), - (re.compile('^REMISE (?P.*)'), Transaction.TYPE_DEPOSIT, '%(text)s'), - ] - - def get_history(self): index = 0 for tr in self.document.getiterator('tr'): @@ -107,9 +106,6 @@ class OperationsPage(BasePage): operation = Transaction(index) index += 1 - d = tds[0].text.strip().split('/') - operation.date = date(*reversed([int(x) for x in d])) - # Find different parts of label parts = [] if len(tds[-3].findall('a')) > 0: @@ -124,15 +120,8 @@ class OperationsPage(BasePage): if parts[0].startswith('PAIEMENT CB'): parts.reverse() - operation.raw = to_unicode(re.sub(u'[ ]+', u' ', u' '.join(parts).replace(u'\n', u' '))) - - # Categorization - for pattern, _type, _label in self.LABEL_PATTERNS: - mm = pattern.match(operation.raw) - if mm: - operation.type = _type - operation.label = to_unicode(_label % mm.groupdict()).strip() - break + operation.parse(date=tds[0].text, + raw=u' '.join(parts)) if tds[-1].text is not None and len(tds[-1].text) > 2: s = tds[-1].text.strip() diff --git a/modules/hsbc/pages/accounts.py b/modules/hsbc/pages/accounts.py index 8f93a1a0..6e602b6d 100644 --- a/modules/hsbc/pages/accounts.py +++ b/modules/hsbc/pages/accounts.py @@ -19,11 +19,11 @@ import re -from datetime import date from weboob.tools.browser import BasePage -from weboob.capabilities.bank import Account, Transaction -from weboob.capabilities.base import NotAvailable +from weboob.capabilities.bank import Account +from weboob.capabilities import NotAvailable +from weboob.tools.capabilities.bank.transactions import FrenchTransaction __all__ = ['AccountsListPage'] @@ -54,18 +54,19 @@ class AccountsListPage(BasePage): yield account -class HistoryPage(BasePage): - LABEL_PATTERNS = [(re.compile('^VIR(EMENT)? (?P.*)'), Transaction.TYPE_TRANSFER, '%(text)s'), - (re.compile('^PRLV (?P.*)'), Transaction.TYPE_ORDER, '%(text)s'), - (re.compile('^CB (?P.*)\s+(?P
\d+)/(?P\d+)\s*(?P.*)'), - Transaction.TYPE_CARD, '%(mm)s/%(dd)s: %(text)s'), - (re.compile('^DAB (?P
\d{2})/(?P\d{2}) (?P.*)'), - Transaction.TYPE_WITHDRAWAL, '%(mm)s/%(dd)s: %(text)s'), - (re.compile('^CHEQUE$'), Transaction.TYPE_CHECK, 'CHEQUE'), - (re.compile('^COTIS\.? (?P.*)'), Transaction.TYPE_BANK, '%(text)s'), - (re.compile('^REMISE (?P.*)'), Transaction.TYPE_DEPOSIT, '%(text)s'), - ] +class Transaction(FrenchTransaction): + PATTERNS = [(re.compile('^VIR(EMENT)? (?P.*)'), FrenchTransaction.TYPE_TRANSFER), + (re.compile('^PRLV (?P.*)'), FrenchTransaction.TYPE_ORDER), + (re.compile('^CB (?P.*)\s+(?P
\d+)/(?P\d+)\s*(?P.*)'), + FrenchTransaction.TYPE_CARD), + (re.compile('^DAB (?P
\d{2})/(?P\d{2}) (?P.*)'), + FrenchTransaction.TYPE_WITHDRAWAL), + (re.compile('^CHEQUE$'), FrenchTransaction.TYPE_CHECK), + (re.compile('^COTIS\.? (?P.*)'), FrenchTransaction.TYPE_BANK), + (re.compile('^REMISE (?P.*)'), FrenchTransaction.TYPE_DEPOSIT), + ] +class HistoryPage(BasePage): def get_operations(self): for script in self.document.getiterator('script'): if script.text is None or script.text.find('\nCL(0') < 0: @@ -73,15 +74,6 @@ class HistoryPage(BasePage): for m in re.finditer(r"CL\((\d+),'(.+)','(.+)','(.+)','([\d -\.,]+)','([\d -\.,]+)','\d+','\d+','[\w\s]+'\);", script.text, flags=re.MULTILINE): op = Transaction(m.group(1)) - op.raw = m.group(4) - for pattern, _type, _label in self.LABEL_PATTERNS: - mm = pattern.match(op.raw) - if mm: - op.type = _type - op.label = re.sub('[ ]+', ' ', _label % mm.groupdict()).strip() - break - - op.amount = float(m.group(5).replace('.','').replace(',','.').replace(' ', '').strip(u' \t\u20ac\xa0€\n\r')) - op.date = date(*reversed([int(x) for x in m.group(3).split('/')])) - op.category = NotAvailable + op.parse(date=m.group(3), raw=m.group(4)) + op.set_amount(m.group(5)) yield op diff --git a/weboob/tools/capabilities/bank/__init__.py b/weboob/tools/capabilities/bank/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/weboob/tools/capabilities/bank/transactions.py b/weboob/tools/capabilities/bank/transactions.py new file mode 100644 index 00000000..17cbce56 --- /dev/null +++ b/weboob/tools/capabilities/bank/transactions.py @@ -0,0 +1,126 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2009-2012 Romain Bignon +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +import re +import datetime + +from weboob.capabilities.bank import Transaction +from weboob.capabilities import NotAvailable +from weboob.tools.misc import to_unicode + + +__all__ = ['FrenchTransaction'] + + +class FrenchTransaction(Transaction): + PATTERNS = [] + + def clean_amount(self, text): + """ + Clean a string containing an amount. + """ + return text.replace(' ', '').replace('.','') \ + .replace(',','.').strip(u' \t\u20ac\xa0€\n\r') + + def set_amount(self, credit='', debit=''): + """ + Set an amount value from a string. + + Can take two strings if there are both credit and debit + columns. + """ + credit = self.clean_amount(credit) + debit = self.clean_amount(debit) + + if len(debit) > 0: + self.amount = - float(debit) + else: + self.amount = float(credit) + + def parse(self, date, raw): + """ + Parse date and raw strings to create datetime.date objects, + determine the type of transaction, and create a simplified label + + When calling this method, you should have defined patterns (in the + PATTERN class attribute) with a list containing tuples of regexp + and the associated type, for example: + + PATTERNS = [(re.compile('^VIR(EMENT)? (?P.*)'), FrenchTransaction.TYPE_TRANSFER), + (re.compile('^PRLV (?P.*)'), FrenchTransaction.TYPE_ORDER), + (re.compile('^(?P.*) CARTE \d+ PAIEMENT CB (?P
\d{2})(?P\d{2}) ?(.*)$'), + FrenchTransaction.TYPE_CARD) + ] + + In regexps, you can define this patterns: + - text: part of label to store in simplified label + - yy, mm, dd, HH, MM: date and time parts + """ + if not isinstance(date, (datetime.date, datetime.datetime)): + if date.isdigit() and len(date) == 8: + date = datetime.date(int(date[4:8]), int(date[2:4]), int(date[0:2])) + elif '/' in date: + date = datetime.date(*reversed([int(x) for x in date.split('/')])) + + self.date = date + self.rdate = date + self.raw = to_unicode(re.sub(u'[ ]+', u' ', raw.replace(u'\n', u' ')).strip()) + self.category = NotAvailable + + if ' ' in self.raw: + self.category, useless, self.label = [part.strip() for part in self.raw.partition(' ')] + else: + self.label = self.raw + + for pattern, _type in self.PATTERNS: + m = pattern.match(self.raw) + if m: + args = m.groupdict() + self.type = _type + if 'text' in args: + self.label = args['text'].strip() + + # Set date from information in raw label. + if 'dd' and 'mm' in args: + dd = int(args['dd']) + mm = int(args['mm']) + + if 'yy' in args: + yy = int(args['yy']) + else: + d = datetime.date.today() + try: + d = d.replace(month=mm, day=dd) + except ValueError: + d = d.replace(year=d.year-1, month=mm, day=dd) + + yy = d.year + if d > datetime.date.today(): + yy -= 1 + + if yy < 100: + yy += 2000 + + if 'HH' in args and 'MM' in args: + self.rdate = datetime.datetime(yy, mm, dd, int(args['HH']), int(args['MM'])) + else: + self.rdate = datetime.date(yy, mm, dd) + + return