support deferred cards

This commit is contained in:
Romain Bignon 2013-02-08 14:04:25 +01:00
commit a2fcc209f1
3 changed files with 109 additions and 25 deletions

View file

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright(C) 2012 Romain Bignon # Copyright(C) 2012-2013 Romain Bignon
# #
# This file is part of weboob. # This file is part of weboob.
# #
@ -58,12 +58,13 @@ class HSBCBackend(BaseBackend, ICapBank):
def iter_history(self, account): def iter_history(self, account):
with self.browser: with self.browser:
for tr in self.browser.get_history(account._link_id): for tr in self.browser.get_history(account):
if not tr._coming: # If there are deferred cards, strip CB invoices.
if not tr._coming and not (tr.raw.startswith('FACTURES CB') or len(account._card_links) == 0):
yield tr yield tr
def iter_coming(self, account): def iter_coming(self, account):
with self.browser: with self.browser:
for tr in self.browser.get_history(account._link_id): for tr in self.browser.get_history(account):
if tr._coming: if tr._coming:
yield tr yield tr

View file

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright(C) 2012 Romain Bignon # Copyright(C) 2012-2013 Romain Bignon
# #
# This file is part of weboob. # This file is part of weboob.
# #
@ -18,11 +18,13 @@
# along with weboob. If not, see <http://www.gnu.org/licenses/>. # along with weboob. If not, see <http://www.gnu.org/licenses/>.
from datetime import timedelta
import urllib import urllib
import re import re
from weboob.tools.date import LinearDateGuesser
from weboob.tools.browser import BaseBrowser, BrowserIncorrectPassword, BasePage, BrokenPageError from weboob.tools.browser import BaseBrowser, BrowserIncorrectPassword, BasePage, BrokenPageError
from .pages.accounts import AccountsListPage, HistoryPage from .pages.accounts import AccountsListPage, CPTHistoryPage, CardHistoryPage
__all__ = ['HSBC'] __all__ = ['HSBC']
@ -38,7 +40,8 @@ class HSBC(BaseBrowser):
ENCODING = None # refer to the HTML encoding ENCODING = None # refer to the HTML encoding
PAGES = {'https://client.hsbc.fr/session_absente.html': NotLoggedPage, PAGES = {'https://client.hsbc.fr/session_absente.html': NotLoggedPage,
'https://client.hsbc.fr/cgi-bin/emcgi\?.*debr=COMPTES_PAN': AccountsListPage, 'https://client.hsbc.fr/cgi-bin/emcgi\?.*debr=COMPTES_PAN': AccountsListPage,
'https://client.hsbc.fr/cgi-bin/emcgi\?.*CPT_IdPrestation=.*': HistoryPage 'https://client.hsbc.fr/cgi-bin/emcgi\?.*CPT_IdPrestation=.*': CPTHistoryPage,
'https://client.hsbc.fr/cgi-bin/emcgi\?.*CB_IdPrestation=.*': CardHistoryPage,
} }
_session = None _session = None
@ -91,9 +94,28 @@ class HSBC(BaseBrowser):
return None return None
def get_history(self, link): def get_history(self, account):
if link is None: if account._link_id is None:
return iter([]) return
for tr in self._get_history(account._link_id):
yield tr
for card in account._card_links:
for tr in self._get_history(card):
yield tr
def _get_history(self, link):
num_page = 0
guesser = LinearDateGuesser(date_max_bump=timedelta(45))
while link is not None:
self.location(link) self.location(link)
return self.page.get_operations()
if self.page is None:
return
for tr in self.page.get_operations(num_page, guesser):
yield tr
link = self.page.get_next_link()
num_page += 1

View file

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright(C) 2012 Romain Bignon # Copyright(C) 2012-2013 Romain Bignon
# #
# This file is part of weboob. # This file is part of weboob.
# #
@ -23,15 +23,15 @@ import re
from weboob.tools.browser import BasePage from weboob.tools.browser import BasePage
from weboob.capabilities.bank import Account from weboob.capabilities.bank import Account
from weboob.capabilities import NotAvailable
from weboob.tools.capabilities.bank.transactions import FrenchTransaction from weboob.tools.capabilities.bank.transactions import FrenchTransaction
__all__ = ['AccountsListPage'] __all__ = ['AccountsListPage', 'CPTHistoryPage', 'CardHistoryPage']
class AccountsListPage(BasePage): class AccountsListPage(BasePage):
def get_list(self): def get_list(self):
accounts = []
for tr in self.document.getiterator('tr'): for tr in self.document.getiterator('tr'):
tds = tr.findall('td') tds = tr.findall('td')
if len(tds) != 3 or tds[0].find('a') is None or tds[0].find('a').attrib.get('class', '') != 'flecheM': if len(tds) != 3 or tds[0].find('a') is None or tds[0].find('a').attrib.get('class', '') != 'flecheM':
@ -41,21 +41,33 @@ class AccountsListPage(BasePage):
account.id = tds[1].text.strip() account.id = tds[1].text.strip()
a = tds[0].findall('a')[-1] a = tds[0].findall('a')[-1]
account.label = a.text.strip() account.label = unicode(a.text.strip())
account._link_id = a.attrib['href'] account._link_id = a.attrib['href']
m = re.search('(\w+)_IdPrestation', account._link_id)
if not m or m.group(1) != 'CPT':
account._link_id = None
if m:
account.id += '.%s' % m.group(1)
balance = u''.join([txt.strip() for txt in tds[2].itertext()]) balance = u''.join([txt.strip() for txt in tds[2].itertext()])
account.balance = Decimal(FrenchTransaction.clean_amount(balance)) account.balance = Decimal(FrenchTransaction.clean_amount(balance))
account.currency = account.get_currency(tds[1].text)
account.coming = NotAvailable
yield account # check account type
m = re.search('(\w+)_IdPrestation', account._link_id)
account_type = None
if m:
account_type = m.group(1)
if account_type != 'CPT':
account.id += '.%s' % account_type
if account_type == 'CB':
accounts[0]._card_links.append(account._link_id)
if not accounts[0].coming:
accounts[0].coming = Decimal('0.0')
accounts[0].coming += account.balance
continue
account.currency = account.get_currency(tds[1].text)
account._card_links = []
accounts.append(account)
return iter(accounts)
class Transaction(FrenchTransaction): class Transaction(FrenchTransaction):
PATTERNS = [(re.compile('^VIR(EMENT)? (?P<text>.*)'), FrenchTransaction.TYPE_TRANSFER), PATTERNS = [(re.compile('^VIR(EMENT)? (?P<text>.*)'), FrenchTransaction.TYPE_TRANSFER),
@ -70,7 +82,14 @@ class Transaction(FrenchTransaction):
] ]
class HistoryPage(BasePage): class HistoryPage(BasePage):
def get_operations(self): def get_next_link(self):
return None
def get_operations(self, num_page, date_guesser):
raise NotImplementedError()
class CPTHistoryPage(HistoryPage):
def get_operations(self, num_page, date_guesser):
for script in self.document.getiterator('script'): for script in self.document.getiterator('script'):
if script.text is None or script.text.find('\nCL(0') < 0: if script.text is None or script.text.find('\nCL(0') < 0:
continue continue
@ -81,3 +100,45 @@ class HistoryPage(BasePage):
op.set_amount(m.group(5)) op.set_amount(m.group(5))
op._coming = (re.match('\d+/\d+/\d+', m.group(2)) is None) op._coming = (re.match('\d+/\d+/\d+', m.group(2)) is None)
yield op yield op
class CardHistoryPage(HistoryPage):
def get_next_link(self):
ok = False
for link in self.document.xpath('//form[@name="FORM_LIB_CARTE"]/a[@class="fleche"]'):
if link.attrib['href'].startswith('#'):
ok = True
elif ok:
# add CB_IdPrestation to handle the correct page on browser.
return link.attrib['href'] + '&CB_IdPrestation='
def parse_date(self, guesser, string, store=False):
day, month = map(int, string.split('/'))
return guesser.guess_date(day, month, store)
def get_operations(self, num_page, date_guesser):
debit_date = None
for tr in self.document.xpath('//div[@id="tabs-1"]/table//tr'):
cols = tr.findall('td')
if len(cols) == 1:
text = self.parser.tocleanstring(cols[0])
m = re.search('(\d+/\d+)', text)
if m:
# if there are several months on the same page, the second
# one's operations are already debited.
if debit_date is not None:
num_page += 1
debit_date = self.parse_date(date_guesser, m.group(1), True)
continue
if len(cols) < 4:
continue
op = Transaction('')
op.parse(date=debit_date,
raw=self.parser.tocleanstring(cols[1]))
op.rdate = self.parse_date(date_guesser, self.parser.tocleanstring(cols[0]))
op.type = op.TYPE_CARD
op._coming = (num_page == 0)
op.set_amount(self.parser.tocleanstring(cols[-1]),
self.parser.tocleanstring(cols[-2]))
yield op