diff --git a/modules/bnporc/pages/transactions.py b/modules/bnporc/pages/transactions.py index 3da96564..9099a884 100644 --- a/modules/bnporc/pages/transactions.py +++ b/modules/bnporc/pages/transactions.py @@ -50,13 +50,13 @@ class TransactionsBasePage(BasePage): def parse_text(self, op): op.category = NotAvailable - if ' ' in op.text: + if ' ' in op.raw: op.category, useless, op.label = [part.strip() for part in op.label.partition(' ')] else: - op.label = op.text + op.label = op.raw for pattern, _type, _label in self.LABEL_PATTERNS: - m = pattern.match(op.text) + m = pattern.match(op.raw) if m: op.type = _type op.label = (_label % m.groupdict()).strip() @@ -70,7 +70,7 @@ class AccountHistory(TransactionsBasePage): id = tr.find('td').find('input').attrib['value'] op = Transaction(id) - op.text = tr.findall('td')[2].text.replace(u'\xa0', u'').strip() + op.raw = tr.findall('td')[2].text.replace(u'\xa0', u'').strip() op.date = date(*reversed([int(x) for x in tr.findall('td')[1].text.split('/')])) self.parse_text(op) @@ -105,7 +105,7 @@ class AccountComing(TransactionsBasePage): i += 1 operation = Transaction(i) operation.date = d - operation.text = text.strip() + operation.raw = text.strip() self.parse_text(operation) operation.amount = float(amount) yield operation diff --git a/modules/boursorama/pages/account_history.py b/modules/boursorama/pages/account_history.py index 996b710a..bfcd6c0d 100644 --- a/modules/boursorama/pages/account_history.py +++ b/modules/boursorama/pages/account_history.py @@ -22,7 +22,7 @@ from datetime import date from weboob.tools.browser import BasePage -from weboob.capabilities.bank import Operation +from weboob.capabilities.bank import Transaction __all__ = ['AccountHistory'] @@ -56,7 +56,7 @@ class AccountHistory(BasePage): amount = amount.strip(u' \n\t\x80').replace(' ', '').replace(',', '.') # if we don't have exactly one '.', this is not a floatm try the next - operation = Operation(len(self.operations)) + operation = Transaction(len(self.operations)) operation.amount = float(amount) operation.date = d diff --git a/modules/bp/pages/accounthistory.py b/modules/bp/pages/accounthistory.py index 4508fb6c..e7e4bc01 100644 --- a/modules/bp/pages/accounthistory.py +++ b/modules/bp/pages/accounthistory.py @@ -20,7 +20,7 @@ import re -from weboob.capabilities.bank import Operation +from weboob.capabilities.bank import Transaction from weboob.tools.browser import BasePage @@ -36,10 +36,10 @@ class AccountHistory(BasePage): operations = [] for mvt in mvt_ligne: - operation = Operation(len(operations)) + operation = Transaction(len(operations)) operation.date = mvt.xpath("./td/span")[0].text tmp = mvt.xpath("./td/span")[1] - operation.label = unicode(self.parser.tocleanstring(tmp)) + operation.raw = unicode(self.parser.tocleanstring(tmp)) r = re.compile(r'\d+') diff --git a/modules/cmb/backend.py b/modules/cmb/backend.py index 26c4cc84..c95db6d7 100644 --- a/modules/cmb/backend.py +++ b/modules/cmb/backend.py @@ -19,7 +19,7 @@ from weboob.capabilities.bank import ICapBank, AccountNotFound -from weboob.capabilities.bank import Account, Operation +from weboob.capabilities.bank import Account, Transaction from weboob.tools.backend import BaseBackend, BackendConfig from weboob.tools.value import ValueBackendPassword from weboob.capabilities.base import NotAvailable @@ -201,7 +201,7 @@ class CmbBackend(BaseBackend, ICapBank): for tr in table.getiterator('tr'): if (tr.get('class') != 'LnTit' and tr.get('class') != 'LnTot'): - operation = Operation(i) + operation = Transaction(i) td = tr.xpath('td') div = td[1].xpath('div') @@ -210,7 +210,7 @@ class CmbBackend(BaseBackend, ICapBank): div = td[2].xpath('div') label = div[0].xpath('a')[0].text.replace('\n','') - operation.label = unicode(' '.join(label.split())) + operation.raw = unicode(' '.join(label.split())) amount = td[3].text if amount.count(',') != 1: diff --git a/modules/cragr/pages/accounts_list.py b/modules/cragr/pages/accounts_list.py index edab3d2d..b22718d5 100644 --- a/modules/cragr/pages/accounts_list.py +++ b/modules/cragr/pages/accounts_list.py @@ -21,7 +21,7 @@ import re from weboob.capabilities.bank import Account from .base import CragrBasePage -from weboob.capabilities.bank import Operation +from weboob.capabilities.bank import Transaction def clean_amount(amount): """ @@ -187,7 +187,7 @@ class AccountsList(CragrBasePage): Returns the history of a specific account. Note that this function expects the current page to be the one dedicated to this history. start_index is the id used for the first created operation. - start_offset allows ignoring the `n' first Operations on the page. + start_offset allows ignoring the `n' first Transactions on the page. """ # tested on CA Lorraine, Paris, Toulouse # avoir parsing the page as an account-dedicated page if it is not the case @@ -244,17 +244,17 @@ class AccountsList(CragrBasePage): if skipped < start_offset: skipped += 1 continue - operation = Operation(index) + operation = Transaction(index) index += 1 operation.date = self.extract_text(line[0]) - operation.label = self.extract_text(line[1]) + operation.raw = self.extract_text(line[1]) operation.amount = clean_amount(self.extract_text(line[2])) yield operation elif (not alternate_layout): for body_elmt in interesting_divs: if skipped < start_offset: if self.is_right_aligned_div(body_elmt): - skipped += 1 + skipped += 1 continue if (self.is_right_aligned_div(body_elmt)): # this is the second line of an operation entry, displaying the amount @@ -264,20 +264,20 @@ class AccountsList(CragrBasePage): # this is the first line of an operation entry, displaying the date and label data = self.extract_text(body_elmt) matches = re.findall('^([012][0-9]|3[01])/(0[1-9]|1[012]).(.+)$', data) - operation = Operation(index) + operation = Transaction(index) index += 1 if (matches): operation.date = u'%s/%s' % (matches[0][0], matches[0][1]) - operation.label = u'%s' % matches[0][2] + operation.raw = u'%s' % matches[0][2] else: operation.date = u'01/01' - operation.label = u'Unknown' + operation.raw = u'Unknown' else: for i in range(0, len(interesting_divs)/3): if skipped < start_offset: skipped += 1 continue - operation = Operation(index) + operation = Transaction(index) index += 1 # amount operation.amount = clean_amount(self.extract_text(interesting_divs[(i*3)+1])) @@ -288,5 +288,5 @@ class AccountsList(CragrBasePage): #label data = self.extract_text(interesting_divs[(i*3)+2]) data = re.sub(' +', ' ', data) - operation.label = u'%s' % data + operation.raw = u'%s' % data yield operation diff --git a/modules/creditmutuel/pages.py b/modules/creditmutuel/pages.py index d050299b..211b0de8 100644 --- a/modules/creditmutuel/pages.py +++ b/modules/creditmutuel/pages.py @@ -20,7 +20,7 @@ from weboob.tools.browser import BasePage from weboob.capabilities.bank import Account -from weboob.capabilities.bank import Operation +from weboob.capabilities.bank import Transaction class LoginPage(BasePage): def login(self, login, passwd): @@ -44,7 +44,7 @@ class UserSpacePage(BasePage): class AccountsPage(BasePage): def get_list(self): l = [] - + for tr in self.document.getiterator('tr'): first_td = tr.getchildren()[0] if first_td.attrib.get('class', '') == 'i g' or first_td.attrib.get('class', '') == 'p g': @@ -76,10 +76,10 @@ class OperationsPage(BasePage): for tr in self.document.getiterator('tr'): first_td = tr.getchildren()[0] if first_td.attrib.get('class', '') == 'i g' or first_td.attrib.get('class', '') == 'p g': - operation = Operation(index) + operation = Transaction(index) index += 1 operation.date = first_td.text - operation.label = u"%s"%tr.getchildren()[2].text.replace('\n',' ') + operation.raw = u"%s"%tr.getchildren()[2].text.replace('\n',' ') if len(tr.getchildren()[3].text) > 2: s = tr.getchildren()[3].text elif len(tr.getchildren()[4].text) > 2: diff --git a/modules/hsbc/pages/accounts.py b/modules/hsbc/pages/accounts.py index 6f4c887f..1e5c5ec8 100644 --- a/modules/hsbc/pages/accounts.py +++ b/modules/hsbc/pages/accounts.py @@ -22,7 +22,7 @@ import re from datetime import date from weboob.tools.browser import BasePage -from weboob.capabilities.bank import Account, Operation +from weboob.capabilities.bank import Account, Transaction from weboob.capabilities.base import NotAvailable @@ -52,14 +52,31 @@ class AccountsListPage(BasePage): yield account class HistoryPage(BasePage): + LABEL_PATTERNS = [(re.compile('^VIR(EMENT)? (?P.*)'), Transaction.TYPE_TRANSFER, '%(text)s'), + (re.compile('^PRLV (?P.*)'), Transaction.TYPE_ORDER, '%(text)s'), + (re.compile('^CB (?P.*)\s+(?P
\d+)/(?P\d+)\s*(?P.*)'), + Transaction.TYPE_CARD, '%(mm)s/%(dd)s: %(text)s'), + (re.compile('^DAB (?P.*)'), Transaction.TYPE_WITHDRAWAL, '%(text)s'), + (re.compile('^CHEQUE$'), Transaction.TYPE_CHECK, 'CHEQUE'), + (re.compile('^COTIS\.? (?P.*)'), Transaction.TYPE_BANK, '%(text)s'), + (re.compile('^REMISE (?P.*)'), Transaction.TYPE_DEPOSIT, '%(text)s'), + ] + def get_operations(self): for script in self.document.getiterator('script'): if script.text is None or script.text.find('\nCL(0') < 0: continue for m in re.finditer(r"CL\((\d+),'(.+)','(.+)','(.+)','([\d -\.,]+)','([\d -\.,]+)','\d+','\d+','[\w\s]+'\);", script.text, flags=re.MULTILINE): - op = Operation(m.group(1)) - op.label = m.group(4) + op = Transaction(m.group(1)) + op.raw = m.group(4) + for pattern, _type, _label in self.LABEL_PATTERNS: + mm = pattern.match(op.raw) + if mm: + op.type = _type + op.label = re.sub('[ ]+', ' ', _label % mm.groupdict()).strip() + break + op.amount = float(m.group(5).replace('.','').replace(',','.').replace(' ', '').strip(u' \t\u20ac\xa0€\n\r')) op.date = date(*reversed([int(x) for x in m.group(3).split('/')])) op.category = NotAvailable diff --git a/modules/ing/pages/account_history.py b/modules/ing/pages/account_history.py index 64fd70a6..3759f6ad 100644 --- a/modules/ing/pages/account_history.py +++ b/modules/ing/pages/account_history.py @@ -21,7 +21,7 @@ from datetime import date from weboob.tools.browser import BasePage -from weboob.capabilities.bank import Operation +from weboob.capabilities.bank import Transaction from weboob.capabilities.base import NotAvailable __all__ = ['AccountHistoryCC', 'AccountHistoryLA'] @@ -36,7 +36,7 @@ class AccountHistoryCC(BasePage): for tr in table.xpath('tr'): id = i texte = tr.text_content().split('\n') - op = Operation(id) + op = Transaction(id) op.label = texte[2] op.date = date(*reversed([int(x) for x in texte[0].split('/')])) op.category = texte[4] @@ -50,30 +50,30 @@ class AccountHistoryCC(BasePage): def get_operations(self): return self.operations -class AccountHistoryLA(BasePage): - +class AccountHistoryLA(BasePage): + def on_loaded(self): self.operations = [] - i = 1 + i = 1 history = self.document.xpath('//tr[@align="center"]') history.pop(0) for tr in history: id = i texte = tr.text_content().strip().split('\n') - op = Operation(id) + op = Transaction(id) # The size is not the same if there are two dates or only one length = len(texte) - op.label = unicode(texte[length - 2].strip()) + op.raw = unicode(texte[length - 2].strip()) op.date = date(*reversed([int(x) for x in texte[0].split('/')])) op.category = NotAvailable - + amount = texte[length - 1].replace('\t','').strip().replace('.', '').replace(u'€', '').replace(',', '.').replace(u'\xa0', u'') op.amount = float(amount) self.operations.append(op) i += 1 - + def get_operations(self): return self.operations diff --git a/modules/lcl/pages.py b/modules/lcl/pages.py index 834e19ba..237ac452 100644 --- a/modules/lcl/pages.py +++ b/modules/lcl/pages.py @@ -19,7 +19,7 @@ import base64 from datetime import date -from weboob.capabilities.bank import Operation +from weboob.capabilities.bank import Transaction from weboob.capabilities.bank import Account from weboob.tools.browser import BasePage, BrowserUnavailable from weboob.tools.captcha.virtkeyboard import MappedVirtKeyboard, VirtKeyboardError @@ -174,7 +174,7 @@ class AccountHistoryPage(BasePage): if len(tr.findall("th"))!=0 or\ len(tr.findall("td"))==0: continue - operation=Operation(len(operations)) + operation=Transaction(len(operations)) mntColumn=0 for td in tr.iter('td'): value=td.attrib.get('id') @@ -184,7 +184,7 @@ class AccountHistoryPage(BasePage): operation.date=date(*reversed([int(x) for x in td.text.split('/')])) elif value.startswith("lib") or value.startswith("opLib"): # misclosed A tag requires to grab text from td - operation.label=u''.join([txt.strip() for txt in td.itertext()]) + operation.raw=u''.join([txt.strip() for txt in td.itertext()]) elif value.startswith("solde") or value.startswith("mnt"): mntColumn+=1 if td.text.strip() != "": diff --git a/weboob/capabilities/bank.py b/weboob/capabilities/bank.py index 84478912..aa0d05e6 100644 --- a/weboob/capabilities/bank.py +++ b/weboob/capabilities/bank.py @@ -68,7 +68,7 @@ class Transaction(CapBaseObject): CapBaseObject.__init__(self, id) self.add_field('date', (basestring, datetime, date)) self.add_field('type', int, self.TYPE_UNKNOWN) - self.add_field('text', unicode) + self.add_field('raw', unicode) self.add_field('category', unicode) self.add_field('label', unicode) self.add_field('amount', float)