From 8fdbf330dd6444a43aadb9880fce2c09003fb048 Mon Sep 17 00:00:00 2001 From: Romain Bignon Date: Sun, 16 Feb 2014 19:14:58 +0100 Subject: [PATCH] improvements on transaction and account types detection --- modules/banquepopulaire/pages.py | 11 +++++++++- modules/bnporc/perso/transactions.py | 2 ++ modules/bp/pages/accounthistory.py | 7 ++++--- modules/bp/pages/accountlist.py | 11 +++++----- modules/cic/pages.py | 21 +++++++++++++++---- modules/cragr/web/pages.py | 9 ++++++++ modules/creditdunord/pages.py | 18 ++++++++++++++++ modules/creditmutuel/pages.py | 21 +++++++++++++++---- modules/ing/pages/accounts_list.py | 15 ++++++------- modules/lcl/pages.py | 9 +++++--- .../societegenerale/pages/accounts_list.py | 15 +++++++++++++ 11 files changed, 112 insertions(+), 27 deletions(-) diff --git a/modules/banquepopulaire/pages.py b/modules/banquepopulaire/pages.py index 1612dcd2..99a03baa 100644 --- a/modules/banquepopulaire/pages.py +++ b/modules/banquepopulaire/pages.py @@ -220,8 +220,11 @@ class HomePage(BasePage): class AccountsPage(BasePage): ACCOUNT_TYPES = {u'Mes comptes d\'épargne': Account.TYPE_SAVINGS, u'Mon épargne': Account.TYPE_SAVINGS, + u'Placements': Account.TYPE_SAVINGS, u'Mes comptes': Account.TYPE_CHECKING, + u'Comptes en euros': Account.TYPE_CHECKING, u'Mes emprunts': Account.TYPE_LOAN, + u'Financements': Account.TYPE_LOAN, u'Mes services': None, # ignore this kind of accounts (no bank ones) } @@ -257,6 +260,12 @@ class AccountsPage(BasePage): # ignore services accounts continue + currency = None + for th in div.getnext().xpath('.//thead//th'): + m = re.match('.*\((\w+)\)$', th.text) + if m and currency is None: + currency = Account.get_currency(m.group(1)) + for tr in div.getnext().xpath('.//tbody/tr'): if not 'id' in tr.attrib: continue @@ -276,7 +285,7 @@ class AccountsPage(BasePage): balance = FrenchTransaction.clean_amount(u''.join([txt.strip() for txt in tds[3].itertext()])) account.balance = Decimal(balance or '0.0') - account.currency = account.get_currency(balance) + account.currency = currency if account.type == account.TYPE_LOAN: account.balance = - abs(account.balance) diff --git a/modules/bnporc/perso/transactions.py b/modules/bnporc/perso/transactions.py index e196e418..2f8ee774 100644 --- a/modules/bnporc/perso/transactions.py +++ b/modules/bnporc/perso/transactions.py @@ -33,6 +33,8 @@ class Transaction(FrenchTransaction): FrenchTransaction.TYPE_CARD), (re.compile('^(?P(PRELEVEMENT|TELEREGLEMENT|TIP)) (?P.*)'), FrenchTransaction.TYPE_ORDER), + (re.compile('^(?PPRLV EUROPEEN SEP) (?P.*?)( ECH/\d+)?( ID EMET.*)?$'), + FrenchTransaction.TYPE_ORDER), (re.compile('^(?PECHEANCEPRET)(?P.*)'), FrenchTransaction.TYPE_LOAN_PAYMENT), (re.compile('^(?PRETRAIT DAB) (?P
\d{2})/(?P\d{2})/(?P\d{2})( (?P\d+)H(?P\d+))? (?P.*)'), FrenchTransaction.TYPE_WITHDRAWAL), diff --git a/modules/bp/pages/accounthistory.py b/modules/bp/pages/accounthistory.py index a5fa82ef..989accda 100644 --- a/modules/bp/pages/accounthistory.py +++ b/modules/bp/pages/accounthistory.py @@ -29,8 +29,8 @@ __all__ = ['AccountHistory', 'CardsList'] class Transaction(FrenchTransaction): - PATTERNS = [(re.compile(u'^(?PCHEQUE) (?P.*)'), FrenchTransaction.TYPE_CHECK), - (re.compile(r'^(?PACHAT CB) (?P.*) (?P
\d{2})\.(?P\d{2}).(?P\d{2})'), + PATTERNS = [(re.compile(u'^(?PCHEQUE)( N)? (?P.*)'), FrenchTransaction.TYPE_CHECK), + (re.compile(r'^(?PACHAT CB) (?P.*) (?P
\d{2})\.(?P\d{2}).(?P\d{2}).*'), FrenchTransaction.TYPE_CARD), (re.compile('^(?P(PRELEVEMENT DE|TELEREGLEMENT|TIP)) (?P.*)'), FrenchTransaction.TYPE_ORDER), @@ -43,8 +43,9 @@ class Transaction(FrenchTransaction): FrenchTransaction.TYPE_TRANSFER), (re.compile('^(?PREMBOURST)(?P.*)'), FrenchTransaction.TYPE_PAYBACK), (re.compile('^(?PCOMMISSIONS)(?P.*)'), FrenchTransaction.TYPE_BANK), + (re.compile('^(?PFRAIS POUR)(?P.*)'), FrenchTransaction.TYPE_BANK), (re.compile('^(?P(?PREMUNERATION).*)'), FrenchTransaction.TYPE_BANK), - (re.compile('^(?PREMISE DE CHEQUE) (?P.*)'), FrenchTransaction.TYPE_DEPOSIT), + (re.compile('^(?PREMISE DE CHEQUES?) (?P.*)'), FrenchTransaction.TYPE_DEPOSIT), ] diff --git a/modules/bp/pages/accountlist.py b/modules/bp/pages/accountlist.py index 7df1b002..6f6c0de8 100644 --- a/modules/bp/pages/accountlist.py +++ b/modules/bp/pages/accountlist.py @@ -33,16 +33,16 @@ __all__ = ['AccountList'] class AccountList(BasePage): def on_loaded(self): self.accounts = OrderedDict() - self.parse_table('comptes') - self.parse_table('comptesEpargne') - self.parse_table('comptesTitres') - self.parse_table('comptesVie') + self.parse_table('comptes', Account.TYPE_CHECKING) + self.parse_table('comptesEpargne', Account.TYPE_SAVINGS) + self.parse_table('comptesTitres', Account.TYPE_MARKET) + self.parse_table('comptesVie', Account.TYPE_DEPOSIT) self.parse_table('comptesRetraireEuros') def get_accounts_list(self): return self.accounts.itervalues() - def parse_table(self, what): + def parse_table(self, what, actype=None): tables = self.document.xpath("//table[@id='%s']" % what, smart_strings=False) if len(tables) < 1: return @@ -52,6 +52,7 @@ class AccountList(BasePage): account = Account() tmp = line.xpath("./td//a")[0] account.label = to_unicode(tmp.text) + account.type = actype account._link_id = tmp.get("href") if 'BourseEnLigne' in account._link_id: account.type = Account.TYPE_MARKET diff --git a/modules/cic/pages.py b/modules/cic/pages.py index b27a82b6..bbe6719f 100644 --- a/modules/cic/pages.py +++ b/modules/cic/pages.py @@ -68,6 +68,14 @@ class UserSpacePage(BasePage): class AccountsPage(BasePage): + TYPES = {'C/C': Account.TYPE_CHECKING, + 'Livret': Account.TYPE_SAVINGS, + 'Pret': Account.TYPE_LOAN, + 'Compte Courant': Account.TYPE_CHECKING, + 'Compte Cheque': Account.TYPE_CHECKING, + 'Compte Epargne': Account.TYPE_SAVINGS, + } + def get_list(self): accounts = OrderedDict() @@ -105,6 +113,11 @@ class AccountsPage(BasePage): account = Account() account.id = id account.label = unicode(a.text).strip().lstrip(' 0123456789').title() + + for pattern, actype in self.TYPES.iteritems(): + if account.label.startswith(pattern): + account.type = actype + account._link_id = link account._card_links = [] @@ -142,11 +155,11 @@ class Transaction(FrenchTransaction): (re.compile('^PRLV (?P.*)'), FrenchTransaction.TYPE_ORDER), (re.compile('^(?P.*) CARTE \d+ PAIEMENT CB\s+(?P
\d{2})(?P\d{2}) ?(.*)$'), FrenchTransaction.TYPE_CARD), - (re.compile('^RETRAIT DAB (?P
\d{2})(?P\d{2}) (?P.*) CARTE \d+'), + (re.compile('^RETRAIT DAB (?P
\d{2})(?P\d{2}) (?P.*) CARTE [\*\d]+'), FrenchTransaction.TYPE_WITHDRAWAL), - (re.compile('^CHEQUE$'), FrenchTransaction.TYPE_CHECK), - (re.compile('^COTIS\.? (?P.*)'), FrenchTransaction.TYPE_BANK), - (re.compile('^REMISE (?P.*)'), FrenchTransaction.TYPE_DEPOSIT), + (re.compile('^CHEQUE( (?P.*))?$'), FrenchTransaction.TYPE_CHECK), + (re.compile('^(F )?COTIS\.? (?P.*)'),FrenchTransaction.TYPE_BANK), + (re.compile('^(REMISE|REM CHQ) (?P.*)'),FrenchTransaction.TYPE_DEPOSIT), ] _is_coming = False diff --git a/modules/cragr/web/pages.py b/modules/cragr/web/pages.py index 7dd45254..eef2241c 100644 --- a/modules/cragr/web/pages.py +++ b/modules/cragr/web/pages.py @@ -75,6 +75,13 @@ class _AccountsPage(BasePage): COL_VALUE = 4 COL_CURRENCY = 5 + TYPES = {'CCHQ': Account.TYPE_CHECKING, + 'LIV A': Account.TYPE_SAVINGS, + 'LDD': Account.TYPE_SAVINGS, + 'PEL': Account.TYPE_MARKET, + 'TITR': Account.TYPE_MARKET, + } + def get_list(self): for tr in self.document.xpath('//table[@class="ca-table"]/tr'): if not tr.attrib.get('class', '').startswith('colcelligne'): @@ -87,6 +94,7 @@ class _AccountsPage(BasePage): account = Account() account.id = self.parser.tocleanstring(cols[self.COL_ID]) account.label = self.parser.tocleanstring(cols[self.COL_LABEL]) + account.type = self.TYPES.get(account.label, Account.TYPE_UNKNOWN) balance = self.parser.tocleanstring(cols[self.COL_VALUE]) # we have to ignore those accounts, because using NotAvailable # makes boobank and probably many others crash @@ -252,6 +260,7 @@ class TransactionsPage(BasePage): 'Remise De Cheque': Transaction.TYPE_DEPOSIT, 'Prelevement': Transaction.TYPE_ORDER, 'Prelevt': Transaction.TYPE_ORDER, + 'Prelevmnt': Transaction.TYPE_ORDER, } def get_history(self, date_guesser): diff --git a/modules/creditdunord/pages.py b/modules/creditdunord/pages.py index dc0a74b6..fff53fd5 100644 --- a/modules/creditdunord/pages.py +++ b/modules/creditdunord/pages.py @@ -79,6 +79,21 @@ class AccountsPage(CDNBasePage): COL_LABEL = 5 COL_BALANCE = -1 + TYPES = {'ASSURANCE VIE': Account.TYPE_DEPOSIT, + 'CARTE': Account.TYPE_CARD, + 'COMPTE COURANT': Account.TYPE_CHECKING, + 'COMPTE EPARGNE': Account.TYPE_SAVINGS, + 'COMPTE SUR LIVRET': Account.TYPE_SAVINGS, + 'LIVRET': Account.TYPE_SAVINGS, + 'P.E.A.': Account.TYPE_MARKET, + 'PEA': Account.TYPE_MARKET, + } + + def get_account_type(self, label): + for pattern, actype in self.TYPES.iteritems(): + if label.startswith(pattern): + return actype + def get_history_link(self): return self.parser.strip(self.get_from_js(",url: Ext.util.Format.htmlDecode('", "'")) @@ -98,6 +113,8 @@ class AccountsPage(CDNBasePage): fp = StringIO(unicode(line[self.COL_LABEL]).encode(self.browser.ENCODING)) a.label = self.parser.tocleanstring(self.parser.parse(fp, self.browser.ENCODING).xpath('//div[@class="libelleCompteTDB"]')[0]) a.balance = Decimal(FrenchTransaction.clean_amount(line[self.COL_BALANCE])) + a.currency = a.get_currency(line[self.COL_BALANCE]) + a.type = self.get_account_type(a.label) a._link = self.get_history_link() if line[self.COL_HISTORY] == 'true': a._args = {'_eventId': 'clicDetailCompte', @@ -158,6 +175,7 @@ class ProAccountsPage(AccountsPage): a = Account() a.id = cols[self.COL_ID].xpath('.//span[@class="right-underline"]')[0].text.strip() a.label = unicode(cols[self.COL_ID].xpath('.//span[@class="left-underline"]')[0].text.strip()) + a.type = self.get_account_type(a.label) balance = self.parser.tocleanstring(cols[self.COL_BALANCE]) a.balance = Decimal(FrenchTransaction.clean_amount(balance)) a.currency = a.get_currency(balance) diff --git a/modules/creditmutuel/pages.py b/modules/creditmutuel/pages.py index 6a93af31..767a86f6 100644 --- a/modules/creditmutuel/pages.py +++ b/modules/creditmutuel/pages.py @@ -68,6 +68,14 @@ class UserSpacePage(BasePage): class AccountsPage(BasePage): + TYPES = {'C/C': Account.TYPE_CHECKING, + 'Livret': Account.TYPE_SAVINGS, + 'Pret': Account.TYPE_LOAN, + 'Compte Courant': Account.TYPE_CHECKING, + 'Compte Cheque': Account.TYPE_CHECKING, + 'Compte Epargne': Account.TYPE_SAVINGS, + } + def get_list(self): accounts = OrderedDict() @@ -105,6 +113,11 @@ class AccountsPage(BasePage): account = Account() account.id = id account.label = unicode(a.text).strip().lstrip(' 0123456789').title() + + for pattern, actype in self.TYPES.iteritems(): + if account.label.startswith(pattern): + account.type = actype + account._link_id = link account._card_links = [] @@ -142,11 +155,11 @@ class Transaction(FrenchTransaction): (re.compile('^PRLV (?P.*)'), FrenchTransaction.TYPE_ORDER), (re.compile('^(?P.*) CARTE \d+ PAIEMENT CB\s+(?P
\d{2})(?P\d{2}) ?(.*)$'), FrenchTransaction.TYPE_CARD), - (re.compile('^RETRAIT DAB (?P
\d{2})(?P\d{2}) (?P.*) CARTE \d+'), + (re.compile('^RETRAIT DAB (?P
\d{2})(?P\d{2}) (?P.*) CARTE [\*\d]+'), FrenchTransaction.TYPE_WITHDRAWAL), - (re.compile('^CHEQUE$'), FrenchTransaction.TYPE_CHECK), - (re.compile('^COTIS\.? (?P.*)'), FrenchTransaction.TYPE_BANK), - (re.compile('^REMISE (?P.*)'), FrenchTransaction.TYPE_DEPOSIT), + (re.compile('^CHEQUE( (?P.*))?$'), FrenchTransaction.TYPE_CHECK), + (re.compile('^(F )?COTIS\.? (?P.*)'),FrenchTransaction.TYPE_BANK), + (re.compile('^(REMISE|REM CHQ) (?P.*)'),FrenchTransaction.TYPE_DEPOSIT), ] _is_coming = False diff --git a/modules/ing/pages/accounts_list.py b/modules/ing/pages/accounts_list.py index b9ca4712..cf4ad190 100644 --- a/modules/ing/pages/accounts_list.py +++ b/modules/ing/pages/accounts_list.py @@ -23,7 +23,7 @@ from datetime import date, timedelta import re import hashlib -from weboob.capabilities.bank import Account, Transaction +from weboob.capabilities.bank import Account from weboob.capabilities.base import NotAvailable from weboob.tools.browser import BasePage from weboob.tools.capabilities.bank.transactions import FrenchTransaction @@ -34,10 +34,11 @@ __all__ = ['AccountsList'] class Transaction(FrenchTransaction): PATTERNS = [(re.compile(u'^retrait dab (?P
\d{2})/(?P\d{2})/(?P\d{4}) (?P.*)'), FrenchTransaction.TYPE_WITHDRAWAL), - (re.compile(u'^carte (?P
\d{2})/(?P\d{2})/(?P\d{4}) (?P.*)'), Transaction.TYPE_CARD), - (re.compile(u'^virement ((sepa emis vers|emis vers|recu|sepa recu|emis)?) (?P.*)'), Transaction.TYPE_TRANSFER), - (re.compile(u'^prelevement (?P.*)'), Transaction.TYPE_ORDER), - (re.compile(u'^prélèvement sepa en faveur de (?P.*)'), Transaction.TYPE_ORDER), + (re.compile(u'^carte (?P
\d{2})/(?P\d{2})/(?P\d{4}) (?P.*)'), FrenchTransaction.TYPE_CARD), + (re.compile(u'^virement (sepa )?(emis vers|recu|emis)? (?P.*)'), FrenchTransaction.TYPE_TRANSFER), + (re.compile(u'^cheque (?P.*)'), FrenchTransaction.TYPE_CHECK), + (re.compile(u'^prelevement (?P.*)'), FrenchTransaction.TYPE_ORDER), + (re.compile(u'^prélèvement sepa en faveur de (?P.*)'), FrenchTransaction.TYPE_ORDER), ] @@ -108,7 +109,7 @@ class AccountsList(BasePage): textdate = textdate.replace(' ', '') textdate = textdate.replace(frenchmonth, '/%s/' %month) # We use lower for compatibility with old website - textraw = table.find('.//td[@class="lbl"]').text_content().strip().lower() + textraw = self.parser.tocleanstring(table.find('.//td[@class="lbl"]')).lower() # The id will be rewrite op = Transaction(1) amount = op.clean_amount(table.xpath('.//td[starts-with(@class, "amount")]')[0].text_content()) @@ -116,7 +117,7 @@ class AccountsList(BasePage): + amount.encode('utf-8')).hexdigest() op.id = id op.parse(date = date(*reversed([int(x) for x in textdate.split('/')])), - raw = textraw) + raw = textraw) category = table.find('.//td[@class="picto"]/span') category = unicode(category.attrib['class'].split('-')[0].lower()) try: diff --git a/modules/lcl/pages.py b/modules/lcl/pages.py index 3f61dffe..a608ca45 100644 --- a/modules/lcl/pages.py +++ b/modules/lcl/pages.py @@ -205,7 +205,7 @@ class AccountsPage(BasePage): class Transaction(FrenchTransaction): PATTERNS = [(re.compile('^(?PCB) (?PRETRAIT) DU (?P
\d+)/(?P\d+)'), FrenchTransaction.TYPE_WITHDRAWAL), - (re.compile('^(?PPRLV) (?P.*)'), + (re.compile('^(?P(PRLV|PE)) (?P.*)'), FrenchTransaction.TYPE_ORDER), (re.compile('^(?PCHQ\.) (?P.*)'), FrenchTransaction.TYPE_CHECK), @@ -216,7 +216,7 @@ class Transaction(FrenchTransaction): (re.compile('^(?P(PRELEVEMENT|TELEREGLEMENT|TIP)) (?P.*)'), FrenchTransaction.TYPE_ORDER), (re.compile('^(?PECHEANCEPRET)(?P.*)'), FrenchTransaction.TYPE_LOAN_PAYMENT), - (re.compile('^(?PVIR(EM(EN)?)?T? ((RECU|FAVEUR) TIERS|SEPA RECU)?)( /FRM)?(?P.*)'), + (re.compile('^(?PVIR(EM(EN)?)?T?(.PERMANENT)? ((RECU|FAVEUR) TIERS|SEPA RECU)?)( /FRM)?(?P.*)'), FrenchTransaction.TYPE_TRANSFER), (re.compile('^(?PREMBOURST)(?P.*)'), FrenchTransaction.TYPE_PAYBACK), (re.compile('^(?PCOM(MISSIONS?)?)(?P.*)'), FrenchTransaction.TYPE_BANK), @@ -280,7 +280,10 @@ class AccountHistoryPage(BasePage): date = u''.join([txt.strip() for txt in td.itertext()]) elif value.startswith("lib") or value.startswith("opLib"): # misclosed A tag requires to grab text from td - raw = self.strip_label(u''.join([txt.strip() for txt in td.itertext()])) + tooltip = td.xpath('./div[@class="autoTooltip"]') + if len(tooltip) > 0: + td.remove(tooltip[0]) + raw = self.parser.tocleanstring(td) elif value.startswith("solde") or value.startswith("mnt") or \ value.startswith('debit') or value.startswith('credit'): mntColumn += 1 diff --git a/modules/societegenerale/pages/accounts_list.py b/modules/societegenerale/pages/accounts_list.py index a1da7a28..67e995ec 100644 --- a/modules/societegenerale/pages/accounts_list.py +++ b/modules/societegenerale/pages/accounts_list.py @@ -42,6 +42,18 @@ class AccountsList(BasePage): def on_loaded(self): pass + TYPES = {u'Compte Bancaire': Account.TYPE_CHECKING, + u'Compte Epargne': Account.TYPE_SAVINGS, + u'Compte Sur Livret': Account.TYPE_SAVINGS, + u'Compte Titres': Account.TYPE_MARKET, + u'Crédit': Account.TYPE_LOAN, + u'Livret': Account.TYPE_SAVINGS, + u'PEA': Account.TYPE_MARKET, + u'Plan Epargne': Account.TYPE_SAVINGS, + u'Prêt': Account.TYPE_LOAN, + } + + def get_list(self): accounts = [] for tr in self.document.getiterator('tr'): @@ -55,6 +67,9 @@ class AccountsList(BasePage): if a is None: break account.label = unicode(a.find("span").text) + for pattern, actype in self.TYPES.iteritems(): + if account.label.startswith(pattern): + account.type = type account._link_id = a.get('href', '') elif td.attrib.get('headers', '') == 'NumeroCompte':