diff --git a/modules/hsbc/backend.py b/modules/hsbc/backend.py
index 302d0180..0796064e 100644
--- a/modules/hsbc/backend.py
+++ b/modules/hsbc/backend.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright(C) 2012 Romain Bignon
+# Copyright(C) 2012-2013 Romain Bignon
#
# This file is part of weboob.
#
@@ -58,12 +58,13 @@ class HSBCBackend(BaseBackend, ICapBank):
def iter_history(self, account):
with self.browser:
- for tr in self.browser.get_history(account._link_id):
- if not tr._coming:
+ for tr in self.browser.get_history(account):
+ # If there are deferred cards, strip CB invoices.
+ if not tr._coming and not (tr.raw.startswith('FACTURES CB') or len(account._card_links) == 0):
yield tr
def iter_coming(self, account):
with self.browser:
- for tr in self.browser.get_history(account._link_id):
+ for tr in self.browser.get_history(account):
if tr._coming:
yield tr
diff --git a/modules/hsbc/browser.py b/modules/hsbc/browser.py
index 7cd93f8b..035ace1b 100644
--- a/modules/hsbc/browser.py
+++ b/modules/hsbc/browser.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright(C) 2012 Romain Bignon
+# Copyright(C) 2012-2013 Romain Bignon
#
# This file is part of weboob.
#
@@ -18,11 +18,13 @@
# along with weboob. If not, see .
+from datetime import timedelta
import urllib
import re
+from weboob.tools.date import LinearDateGuesser
from weboob.tools.browser import BaseBrowser, BrowserIncorrectPassword, BasePage, BrokenPageError
-from .pages.accounts import AccountsListPage, HistoryPage
+from .pages.accounts import AccountsListPage, CPTHistoryPage, CardHistoryPage
__all__ = ['HSBC']
@@ -38,7 +40,8 @@ class HSBC(BaseBrowser):
ENCODING = None # refer to the HTML encoding
PAGES = {'https://client.hsbc.fr/session_absente.html': NotLoggedPage,
'https://client.hsbc.fr/cgi-bin/emcgi\?.*debr=COMPTES_PAN': AccountsListPage,
- 'https://client.hsbc.fr/cgi-bin/emcgi\?.*CPT_IdPrestation=.*': HistoryPage
+ 'https://client.hsbc.fr/cgi-bin/emcgi\?.*CPT_IdPrestation=.*': CPTHistoryPage,
+ 'https://client.hsbc.fr/cgi-bin/emcgi\?.*CB_IdPrestation=.*': CardHistoryPage,
}
_session = None
@@ -91,9 +94,28 @@ class HSBC(BaseBrowser):
return None
- def get_history(self, link):
- if link is None:
- return iter([])
+ def get_history(self, account):
+ if account._link_id is None:
+ return
- self.location(link)
- return self.page.get_operations()
+ for tr in self._get_history(account._link_id):
+ yield tr
+
+ for card in account._card_links:
+ for tr in self._get_history(card):
+ yield tr
+
+ def _get_history(self, link):
+ num_page = 0
+ guesser = LinearDateGuesser(date_max_bump=timedelta(45))
+ while link is not None:
+ self.location(link)
+
+ if self.page is None:
+ return
+
+ for tr in self.page.get_operations(num_page, guesser):
+ yield tr
+
+ link = self.page.get_next_link()
+ num_page += 1
diff --git a/modules/hsbc/pages/accounts.py b/modules/hsbc/pages/accounts.py
index 6fb6cc4c..d40eec53 100644
--- a/modules/hsbc/pages/accounts.py
+++ b/modules/hsbc/pages/accounts.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright(C) 2012 Romain Bignon
+# Copyright(C) 2012-2013 Romain Bignon
#
# This file is part of weboob.
#
@@ -23,15 +23,15 @@ import re
from weboob.tools.browser import BasePage
from weboob.capabilities.bank import Account
-from weboob.capabilities import NotAvailable
from weboob.tools.capabilities.bank.transactions import FrenchTransaction
-__all__ = ['AccountsListPage']
+__all__ = ['AccountsListPage', 'CPTHistoryPage', 'CardHistoryPage']
class AccountsListPage(BasePage):
def get_list(self):
+ accounts = []
for tr in self.document.getiterator('tr'):
tds = tr.findall('td')
if len(tds) != 3 or tds[0].find('a') is None or tds[0].find('a').attrib.get('class', '') != 'flecheM':
@@ -41,21 +41,33 @@ class AccountsListPage(BasePage):
account.id = tds[1].text.strip()
a = tds[0].findall('a')[-1]
- account.label = a.text.strip()
+ account.label = unicode(a.text.strip())
account._link_id = a.attrib['href']
- m = re.search('(\w+)_IdPrestation', account._link_id)
- if not m or m.group(1) != 'CPT':
- account._link_id = None
- if m:
- account.id += '.%s' % m.group(1)
-
balance = u''.join([txt.strip() for txt in tds[2].itertext()])
account.balance = Decimal(FrenchTransaction.clean_amount(balance))
- account.currency = account.get_currency(tds[1].text)
- account.coming = NotAvailable
- yield account
+ # check account type
+ m = re.search('(\w+)_IdPrestation', account._link_id)
+ account_type = None
+ if m:
+ account_type = m.group(1)
+ if account_type != 'CPT':
+ account.id += '.%s' % account_type
+
+ if account_type == 'CB':
+ accounts[0]._card_links.append(account._link_id)
+ if not accounts[0].coming:
+ accounts[0].coming = Decimal('0.0')
+ accounts[0].coming += account.balance
+ continue
+
+ account.currency = account.get_currency(tds[1].text)
+ account._card_links = []
+
+ accounts.append(account)
+
+ return iter(accounts)
class Transaction(FrenchTransaction):
PATTERNS = [(re.compile('^VIR(EMENT)? (?P.*)'), FrenchTransaction.TYPE_TRANSFER),
@@ -70,7 +82,14 @@ class Transaction(FrenchTransaction):
]
class HistoryPage(BasePage):
- def get_operations(self):
+ def get_next_link(self):
+ return None
+
+ def get_operations(self, num_page, date_guesser):
+ raise NotImplementedError()
+
+class CPTHistoryPage(HistoryPage):
+ def get_operations(self, num_page, date_guesser):
for script in self.document.getiterator('script'):
if script.text is None or script.text.find('\nCL(0') < 0:
continue
@@ -81,3 +100,45 @@ class HistoryPage(BasePage):
op.set_amount(m.group(5))
op._coming = (re.match('\d+/\d+/\d+', m.group(2)) is None)
yield op
+
+class CardHistoryPage(HistoryPage):
+ def get_next_link(self):
+ ok = False
+ for link in self.document.xpath('//form[@name="FORM_LIB_CARTE"]/a[@class="fleche"]'):
+ if link.attrib['href'].startswith('#'):
+ ok = True
+ elif ok:
+ # add CB_IdPrestation to handle the correct page on browser.
+ return link.attrib['href'] + '&CB_IdPrestation='
+
+ def parse_date(self, guesser, string, store=False):
+ day, month = map(int, string.split('/'))
+ return guesser.guess_date(day, month, store)
+
+ def get_operations(self, num_page, date_guesser):
+ debit_date = None
+ for tr in self.document.xpath('//div[@id="tabs-1"]/table//tr'):
+ cols = tr.findall('td')
+ if len(cols) == 1:
+ text = self.parser.tocleanstring(cols[0])
+ m = re.search('(\d+/\d+)', text)
+ if m:
+ # if there are several months on the same page, the second
+ # one's operations are already debited.
+ if debit_date is not None:
+ num_page += 1
+ debit_date = self.parse_date(date_guesser, m.group(1), True)
+ continue
+
+ if len(cols) < 4:
+ continue
+
+ op = Transaction('')
+ op.parse(date=debit_date,
+ raw=self.parser.tocleanstring(cols[1]))
+ op.rdate = self.parse_date(date_guesser, self.parser.tocleanstring(cols[0]))
+ op.type = op.TYPE_CARD
+ op._coming = (num_page == 0)
+ op.set_amount(self.parser.tocleanstring(cols[-1]),
+ self.parser.tocleanstring(cols[-2]))
+ yield op