From f23f849e05a4b4377ffc2043e8b471ecb570dbed Mon Sep 17 00:00:00 2001 From: Romain Bignon Date: Fri, 9 Mar 2012 12:48:50 +0100 Subject: [PATCH] several fixes and add categorization of transactions --- modules/creditmutuel/pages.py | 42 ++++++++++++++++++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) diff --git a/modules/creditmutuel/pages.py b/modules/creditmutuel/pages.py index f42570c6..cb81aaaf 100644 --- a/modules/creditmutuel/pages.py +++ b/modules/creditmutuel/pages.py @@ -18,6 +18,7 @@ # along with weboob. If not, see . +import re from datetime import date from weboob.tools.browser import BasePage @@ -58,6 +59,10 @@ class AccountsPage(BasePage): continue account.id = first_td.find('a').text.split(' ')[0]+first_td.find('a').text.split(' ')[1] + + if not account.id.isdigit(): + continue + s = tr.getchildren()[2].text if s.strip() == "": s = tr.getchildren()[1].text @@ -77,6 +82,18 @@ class AccountsPage(BasePage): return 0 class OperationsPage(BasePage): + LABEL_PATTERNS = [(re.compile('^VIR(EMENT)? (?P.*)'), Transaction.TYPE_TRANSFER, '%(text)s'), + (re.compile('^PRLV (?P.*)'), Transaction.TYPE_ORDER, '%(text)s'), + (re.compile('^(?P.*) CARTE \d+ PAIEMENT CB (?P
\d{2})(?P\d{2}) ?(.*)$'), + Transaction.TYPE_CARD, '%(mm)s/%(dd)s: %(text)s'), + (re.compile('^RETRAIT DAB (?P
\d{2})(?P\d{2}) (?P.*) CARTE \d+'), + Transaction.TYPE_WITHDRAWAL, '%(mm)s/%(dd)s: %(text)s'), + (re.compile('^CHEQUE$'), Transaction.TYPE_CHECK, 'CHEQUE'), + (re.compile('^COTIS\.? (?P.*)'), Transaction.TYPE_BANK, '%(text)s'), + (re.compile('^REMISE (?P.*)'), Transaction.TYPE_DEPOSIT, '%(text)s'), + ] + + def get_history(self): index = 0 for tr in self.document.getiterator('tr'): @@ -93,7 +110,30 @@ class OperationsPage(BasePage): d = tds[0].text.strip().split('/') operation.date = date(*reversed([int(x) for x in d])) - operation.raw = to_unicode(tds[-3].text.replace('\n',' ').strip()) + # Find different parts of label + parts = [] + if len(tds[-3].findall('a')) > 0: + parts = [a.text.strip() for a in tds[-3].findall('a')] + else: + parts.append(tds[-3].text.strip()) + if tds[-3].find('br') is not None: + parts.append(tds[-3].find('br').tail.strip()) + + # To simplify categorization of CB, reverse order of parts to separate + # location and institution. + if parts[0].startswith('PAIEMENT CB'): + parts.reverse() + + operation.raw = to_unicode(re.sub(u'[ ]+', u' ', u' '.join(parts).replace(u'\n', u' '))) + + # Categorization + for pattern, _type, _label in self.LABEL_PATTERNS: + mm = pattern.match(operation.raw) + if mm: + operation.type = _type + operation.label = to_unicode(_label % mm.groupdict()).strip() + break + if tds[-1].text is not None and len(tds[-1].text) > 2: s = tds[-1].text.strip() elif tds[-1].text is not None and len(tds[-2].text) > 2: