improvements on transaction and account types detection

This commit is contained in:
Romain Bignon 2014-02-16 19:14:58 +01:00 committed by Romain Bignon
commit 8fdbf330dd
11 changed files with 112 additions and 27 deletions

View file

@ -220,8 +220,11 @@ class HomePage(BasePage):
class AccountsPage(BasePage):
ACCOUNT_TYPES = {u'Mes comptes d\'épargne': Account.TYPE_SAVINGS,
u'Mon épargne': Account.TYPE_SAVINGS,
u'Placements': Account.TYPE_SAVINGS,
u'Mes comptes': Account.TYPE_CHECKING,
u'Comptes en euros': Account.TYPE_CHECKING,
u'Mes emprunts': Account.TYPE_LOAN,
u'Financements': Account.TYPE_LOAN,
u'Mes services': None, # ignore this kind of accounts (no bank ones)
}
@ -257,6 +260,12 @@ class AccountsPage(BasePage):
# ignore services accounts
continue
currency = None
for th in div.getnext().xpath('.//thead//th'):
m = re.match('.*\((\w+)\)$', th.text)
if m and currency is None:
currency = Account.get_currency(m.group(1))
for tr in div.getnext().xpath('.//tbody/tr'):
if not 'id' in tr.attrib:
continue
@ -276,7 +285,7 @@ class AccountsPage(BasePage):
balance = FrenchTransaction.clean_amount(u''.join([txt.strip() for txt in tds[3].itertext()]))
account.balance = Decimal(balance or '0.0')
account.currency = account.get_currency(balance)
account.currency = currency
if account.type == account.TYPE_LOAN:
account.balance = - abs(account.balance)

View file

@ -33,6 +33,8 @@ class Transaction(FrenchTransaction):
FrenchTransaction.TYPE_CARD),
(re.compile('^(?P<category>(PRELEVEMENT|TELEREGLEMENT|TIP)) (?P<text>.*)'),
FrenchTransaction.TYPE_ORDER),
(re.compile('^(?P<category>PRLV EUROPEEN SEP) (?P<text>.*?)( ECH/\d+)?( ID EMET.*)?$'),
FrenchTransaction.TYPE_ORDER),
(re.compile('^(?P<category>ECHEANCEPRET)(?P<text>.*)'), FrenchTransaction.TYPE_LOAN_PAYMENT),
(re.compile('^(?P<category>RETRAIT DAB) (?P<dd>\d{2})/(?P<mm>\d{2})/(?P<yy>\d{2})( (?P<HH>\d+)H(?P<MM>\d+))? (?P<text>.*)'),
FrenchTransaction.TYPE_WITHDRAWAL),

View file

@ -29,8 +29,8 @@ __all__ = ['AccountHistory', 'CardsList']
class Transaction(FrenchTransaction):
PATTERNS = [(re.compile(u'^(?P<category>CHEQUE) (?P<text>.*)'), FrenchTransaction.TYPE_CHECK),
(re.compile(r'^(?P<category>ACHAT CB) (?P<text>.*) (?P<dd>\d{2})\.(?P<mm>\d{2}).(?P<yy>\d{2})'),
PATTERNS = [(re.compile(u'^(?P<category>CHEQUE)( N)? (?P<text>.*)'), FrenchTransaction.TYPE_CHECK),
(re.compile(r'^(?P<category>ACHAT CB) (?P<text>.*) (?P<dd>\d{2})\.(?P<mm>\d{2}).(?P<yy>\d{2}).*'),
FrenchTransaction.TYPE_CARD),
(re.compile('^(?P<category>(PRELEVEMENT DE|TELEREGLEMENT|TIP)) (?P<text>.*)'),
FrenchTransaction.TYPE_ORDER),
@ -43,8 +43,9 @@ class Transaction(FrenchTransaction):
FrenchTransaction.TYPE_TRANSFER),
(re.compile('^(?P<category>REMBOURST)(?P<text>.*)'), FrenchTransaction.TYPE_PAYBACK),
(re.compile('^(?P<category>COMMISSIONS)(?P<text>.*)'), FrenchTransaction.TYPE_BANK),
(re.compile('^(?P<category>FRAIS POUR)(?P<text>.*)'), FrenchTransaction.TYPE_BANK),
(re.compile('^(?P<text>(?P<category>REMUNERATION).*)'), FrenchTransaction.TYPE_BANK),
(re.compile('^(?P<category>REMISE DE CHEQUE) (?P<text>.*)'), FrenchTransaction.TYPE_DEPOSIT),
(re.compile('^(?P<category>REMISE DE CHEQUES?) (?P<text>.*)'), FrenchTransaction.TYPE_DEPOSIT),
]

View file

@ -33,16 +33,16 @@ __all__ = ['AccountList']
class AccountList(BasePage):
def on_loaded(self):
self.accounts = OrderedDict()
self.parse_table('comptes')
self.parse_table('comptesEpargne')
self.parse_table('comptesTitres')
self.parse_table('comptesVie')
self.parse_table('comptes', Account.TYPE_CHECKING)
self.parse_table('comptesEpargne', Account.TYPE_SAVINGS)
self.parse_table('comptesTitres', Account.TYPE_MARKET)
self.parse_table('comptesVie', Account.TYPE_DEPOSIT)
self.parse_table('comptesRetraireEuros')
def get_accounts_list(self):
return self.accounts.itervalues()
def parse_table(self, what):
def parse_table(self, what, actype=None):
tables = self.document.xpath("//table[@id='%s']" % what, smart_strings=False)
if len(tables) < 1:
return
@ -52,6 +52,7 @@ class AccountList(BasePage):
account = Account()
tmp = line.xpath("./td//a")[0]
account.label = to_unicode(tmp.text)
account.type = actype
account._link_id = tmp.get("href")
if 'BourseEnLigne' in account._link_id:
account.type = Account.TYPE_MARKET

View file

@ -68,6 +68,14 @@ class UserSpacePage(BasePage):
class AccountsPage(BasePage):
TYPES = {'C/C': Account.TYPE_CHECKING,
'Livret': Account.TYPE_SAVINGS,
'Pret': Account.TYPE_LOAN,
'Compte Courant': Account.TYPE_CHECKING,
'Compte Cheque': Account.TYPE_CHECKING,
'Compte Epargne': Account.TYPE_SAVINGS,
}
def get_list(self):
accounts = OrderedDict()
@ -105,6 +113,11 @@ class AccountsPage(BasePage):
account = Account()
account.id = id
account.label = unicode(a.text).strip().lstrip(' 0123456789').title()
for pattern, actype in self.TYPES.iteritems():
if account.label.startswith(pattern):
account.type = actype
account._link_id = link
account._card_links = []
@ -142,11 +155,11 @@ class Transaction(FrenchTransaction):
(re.compile('^PRLV (?P<text>.*)'), FrenchTransaction.TYPE_ORDER),
(re.compile('^(?P<text>.*) CARTE \d+ PAIEMENT CB\s+(?P<dd>\d{2})(?P<mm>\d{2}) ?(.*)$'),
FrenchTransaction.TYPE_CARD),
(re.compile('^RETRAIT DAB (?P<dd>\d{2})(?P<mm>\d{2}) (?P<text>.*) CARTE \d+'),
(re.compile('^RETRAIT DAB (?P<dd>\d{2})(?P<mm>\d{2}) (?P<text>.*) CARTE [\*\d]+'),
FrenchTransaction.TYPE_WITHDRAWAL),
(re.compile('^CHEQUE$'), FrenchTransaction.TYPE_CHECK),
(re.compile('^COTIS\.? (?P<text>.*)'), FrenchTransaction.TYPE_BANK),
(re.compile('^REMISE (?P<text>.*)'), FrenchTransaction.TYPE_DEPOSIT),
(re.compile('^CHEQUE( (?P<text>.*))?$'), FrenchTransaction.TYPE_CHECK),
(re.compile('^(F )?COTIS\.? (?P<text>.*)'),FrenchTransaction.TYPE_BANK),
(re.compile('^(REMISE|REM CHQ) (?P<text>.*)'),FrenchTransaction.TYPE_DEPOSIT),
]
_is_coming = False

View file

@ -75,6 +75,13 @@ class _AccountsPage(BasePage):
COL_VALUE = 4
COL_CURRENCY = 5
TYPES = {'CCHQ': Account.TYPE_CHECKING,
'LIV A': Account.TYPE_SAVINGS,
'LDD': Account.TYPE_SAVINGS,
'PEL': Account.TYPE_MARKET,
'TITR': Account.TYPE_MARKET,
}
def get_list(self):
for tr in self.document.xpath('//table[@class="ca-table"]/tr'):
if not tr.attrib.get('class', '').startswith('colcelligne'):
@ -87,6 +94,7 @@ class _AccountsPage(BasePage):
account = Account()
account.id = self.parser.tocleanstring(cols[self.COL_ID])
account.label = self.parser.tocleanstring(cols[self.COL_LABEL])
account.type = self.TYPES.get(account.label, Account.TYPE_UNKNOWN)
balance = self.parser.tocleanstring(cols[self.COL_VALUE])
# we have to ignore those accounts, because using NotAvailable
# makes boobank and probably many others crash
@ -252,6 +260,7 @@ class TransactionsPage(BasePage):
'Remise De Cheque': Transaction.TYPE_DEPOSIT,
'Prelevement': Transaction.TYPE_ORDER,
'Prelevt': Transaction.TYPE_ORDER,
'Prelevmnt': Transaction.TYPE_ORDER,
}
def get_history(self, date_guesser):

View file

@ -79,6 +79,21 @@ class AccountsPage(CDNBasePage):
COL_LABEL = 5
COL_BALANCE = -1
TYPES = {'ASSURANCE VIE': Account.TYPE_DEPOSIT,
'CARTE': Account.TYPE_CARD,
'COMPTE COURANT': Account.TYPE_CHECKING,
'COMPTE EPARGNE': Account.TYPE_SAVINGS,
'COMPTE SUR LIVRET': Account.TYPE_SAVINGS,
'LIVRET': Account.TYPE_SAVINGS,
'P.E.A.': Account.TYPE_MARKET,
'PEA': Account.TYPE_MARKET,
}
def get_account_type(self, label):
for pattern, actype in self.TYPES.iteritems():
if label.startswith(pattern):
return actype
def get_history_link(self):
return self.parser.strip(self.get_from_js(",url: Ext.util.Format.htmlDecode('", "'"))
@ -98,6 +113,8 @@ class AccountsPage(CDNBasePage):
fp = StringIO(unicode(line[self.COL_LABEL]).encode(self.browser.ENCODING))
a.label = self.parser.tocleanstring(self.parser.parse(fp, self.browser.ENCODING).xpath('//div[@class="libelleCompteTDB"]')[0])
a.balance = Decimal(FrenchTransaction.clean_amount(line[self.COL_BALANCE]))
a.currency = a.get_currency(line[self.COL_BALANCE])
a.type = self.get_account_type(a.label)
a._link = self.get_history_link()
if line[self.COL_HISTORY] == 'true':
a._args = {'_eventId': 'clicDetailCompte',
@ -158,6 +175,7 @@ class ProAccountsPage(AccountsPage):
a = Account()
a.id = cols[self.COL_ID].xpath('.//span[@class="right-underline"]')[0].text.strip()
a.label = unicode(cols[self.COL_ID].xpath('.//span[@class="left-underline"]')[0].text.strip())
a.type = self.get_account_type(a.label)
balance = self.parser.tocleanstring(cols[self.COL_BALANCE])
a.balance = Decimal(FrenchTransaction.clean_amount(balance))
a.currency = a.get_currency(balance)

View file

@ -68,6 +68,14 @@ class UserSpacePage(BasePage):
class AccountsPage(BasePage):
TYPES = {'C/C': Account.TYPE_CHECKING,
'Livret': Account.TYPE_SAVINGS,
'Pret': Account.TYPE_LOAN,
'Compte Courant': Account.TYPE_CHECKING,
'Compte Cheque': Account.TYPE_CHECKING,
'Compte Epargne': Account.TYPE_SAVINGS,
}
def get_list(self):
accounts = OrderedDict()
@ -105,6 +113,11 @@ class AccountsPage(BasePage):
account = Account()
account.id = id
account.label = unicode(a.text).strip().lstrip(' 0123456789').title()
for pattern, actype in self.TYPES.iteritems():
if account.label.startswith(pattern):
account.type = actype
account._link_id = link
account._card_links = []
@ -142,11 +155,11 @@ class Transaction(FrenchTransaction):
(re.compile('^PRLV (?P<text>.*)'), FrenchTransaction.TYPE_ORDER),
(re.compile('^(?P<text>.*) CARTE \d+ PAIEMENT CB\s+(?P<dd>\d{2})(?P<mm>\d{2}) ?(.*)$'),
FrenchTransaction.TYPE_CARD),
(re.compile('^RETRAIT DAB (?P<dd>\d{2})(?P<mm>\d{2}) (?P<text>.*) CARTE \d+'),
(re.compile('^RETRAIT DAB (?P<dd>\d{2})(?P<mm>\d{2}) (?P<text>.*) CARTE [\*\d]+'),
FrenchTransaction.TYPE_WITHDRAWAL),
(re.compile('^CHEQUE$'), FrenchTransaction.TYPE_CHECK),
(re.compile('^COTIS\.? (?P<text>.*)'), FrenchTransaction.TYPE_BANK),
(re.compile('^REMISE (?P<text>.*)'), FrenchTransaction.TYPE_DEPOSIT),
(re.compile('^CHEQUE( (?P<text>.*))?$'), FrenchTransaction.TYPE_CHECK),
(re.compile('^(F )?COTIS\.? (?P<text>.*)'),FrenchTransaction.TYPE_BANK),
(re.compile('^(REMISE|REM CHQ) (?P<text>.*)'),FrenchTransaction.TYPE_DEPOSIT),
]
_is_coming = False

View file

@ -23,7 +23,7 @@ from datetime import date, timedelta
import re
import hashlib
from weboob.capabilities.bank import Account, Transaction
from weboob.capabilities.bank import Account
from weboob.capabilities.base import NotAvailable
from weboob.tools.browser import BasePage
from weboob.tools.capabilities.bank.transactions import FrenchTransaction
@ -34,10 +34,11 @@ __all__ = ['AccountsList']
class Transaction(FrenchTransaction):
PATTERNS = [(re.compile(u'^retrait dab (?P<dd>\d{2})/(?P<mm>\d{2})/(?P<yy>\d{4}) (?P<text>.*)'), FrenchTransaction.TYPE_WITHDRAWAL),
(re.compile(u'^carte (?P<dd>\d{2})/(?P<mm>\d{2})/(?P<yy>\d{4}) (?P<text>.*)'), Transaction.TYPE_CARD),
(re.compile(u'^virement ((sepa emis vers|emis vers|recu|sepa recu|emis)?) (?P<text>.*)'), Transaction.TYPE_TRANSFER),
(re.compile(u'^prelevement (?P<text>.*)'), Transaction.TYPE_ORDER),
(re.compile(u'^prélèvement sepa en faveur de (?P<text>.*)'), Transaction.TYPE_ORDER),
(re.compile(u'^carte (?P<dd>\d{2})/(?P<mm>\d{2})/(?P<yy>\d{4}) (?P<text>.*)'), FrenchTransaction.TYPE_CARD),
(re.compile(u'^virement (sepa )?(emis vers|recu|emis)? (?P<text>.*)'), FrenchTransaction.TYPE_TRANSFER),
(re.compile(u'^cheque (?P<text>.*)'), FrenchTransaction.TYPE_CHECK),
(re.compile(u'^prelevement (?P<text>.*)'), FrenchTransaction.TYPE_ORDER),
(re.compile(u'^prélèvement sepa en faveur de (?P<text>.*)'), FrenchTransaction.TYPE_ORDER),
]
@ -108,7 +109,7 @@ class AccountsList(BasePage):
textdate = textdate.replace(' ', '')
textdate = textdate.replace(frenchmonth, '/%s/' %month)
# We use lower for compatibility with old website
textraw = table.find('.//td[@class="lbl"]').text_content().strip().lower()
textraw = self.parser.tocleanstring(table.find('.//td[@class="lbl"]')).lower()
# The id will be rewrite
op = Transaction(1)
amount = op.clean_amount(table.xpath('.//td[starts-with(@class, "amount")]')[0].text_content())
@ -116,7 +117,7 @@ class AccountsList(BasePage):
+ amount.encode('utf-8')).hexdigest()
op.id = id
op.parse(date = date(*reversed([int(x) for x in textdate.split('/')])),
raw = textraw)
raw = textraw)
category = table.find('.//td[@class="picto"]/span')
category = unicode(category.attrib['class'].split('-')[0].lower())
try:

View file

@ -205,7 +205,7 @@ class AccountsPage(BasePage):
class Transaction(FrenchTransaction):
PATTERNS = [(re.compile('^(?P<category>CB) (?P<text>RETRAIT) DU (?P<dd>\d+)/(?P<mm>\d+)'),
FrenchTransaction.TYPE_WITHDRAWAL),
(re.compile('^(?P<category>PRLV) (?P<text>.*)'),
(re.compile('^(?P<category>(PRLV|PE)) (?P<text>.*)'),
FrenchTransaction.TYPE_ORDER),
(re.compile('^(?P<category>CHQ\.) (?P<text>.*)'),
FrenchTransaction.TYPE_CHECK),
@ -216,7 +216,7 @@ class Transaction(FrenchTransaction):
(re.compile('^(?P<category>(PRELEVEMENT|TELEREGLEMENT|TIP)) (?P<text>.*)'),
FrenchTransaction.TYPE_ORDER),
(re.compile('^(?P<category>ECHEANCEPRET)(?P<text>.*)'), FrenchTransaction.TYPE_LOAN_PAYMENT),
(re.compile('^(?P<category>VIR(EM(EN)?)?T? ((RECU|FAVEUR) TIERS|SEPA RECU)?)( /FRM)?(?P<text>.*)'),
(re.compile('^(?P<category>VIR(EM(EN)?)?T?(.PERMANENT)? ((RECU|FAVEUR) TIERS|SEPA RECU)?)( /FRM)?(?P<text>.*)'),
FrenchTransaction.TYPE_TRANSFER),
(re.compile('^(?P<category>REMBOURST)(?P<text>.*)'), FrenchTransaction.TYPE_PAYBACK),
(re.compile('^(?P<category>COM(MISSIONS?)?)(?P<text>.*)'), FrenchTransaction.TYPE_BANK),
@ -280,7 +280,10 @@ class AccountHistoryPage(BasePage):
date = u''.join([txt.strip() for txt in td.itertext()])
elif value.startswith("lib") or value.startswith("opLib"):
# misclosed A tag requires to grab text from td
raw = self.strip_label(u''.join([txt.strip() for txt in td.itertext()]))
tooltip = td.xpath('./div[@class="autoTooltip"]')
if len(tooltip) > 0:
td.remove(tooltip[0])
raw = self.parser.tocleanstring(td)
elif value.startswith("solde") or value.startswith("mnt") or \
value.startswith('debit') or value.startswith('credit'):
mntColumn += 1

View file

@ -42,6 +42,18 @@ class AccountsList(BasePage):
def on_loaded(self):
pass
TYPES = {u'Compte Bancaire': Account.TYPE_CHECKING,
u'Compte Epargne': Account.TYPE_SAVINGS,
u'Compte Sur Livret': Account.TYPE_SAVINGS,
u'Compte Titres': Account.TYPE_MARKET,
u'Crédit': Account.TYPE_LOAN,
u'Livret': Account.TYPE_SAVINGS,
u'PEA': Account.TYPE_MARKET,
u'Plan Epargne': Account.TYPE_SAVINGS,
u'Prêt': Account.TYPE_LOAN,
}
def get_list(self):
accounts = []
for tr in self.document.getiterator('tr'):
@ -55,6 +67,9 @@ class AccountsList(BasePage):
if a is None:
break
account.label = unicode(a.find("span").text)
for pattern, actype in self.TYPES.iteritems():
if account.label.startswith(pattern):
account.type = type
account._link_id = a.get('href', '')
elif td.attrib.get('headers', '') == 'NumeroCompte':