add investment to boursorama bank module

2014-10-23 13:30:02 +02:00 · 2014-10-23 13:30:02 +02:00 · 8672a6b443
commit 8672a6b443
parent 56e9d53adb
5 changed files with 164 additions and 12 deletions
--- a/modules/boursorama/browser.py
+++ b/modules/boursorama/browser.py
@ -20,10 +20,14 @@
 # along with weboob. If not, see <http://www.gnu.org/licenses/>.


+import re
+from collections import defaultdict
+
 from weboob.deprecated.browser import Browser, BrowserIncorrectPassword
 from weboob.capabilities.bank import Account

-from .pages import LoginPage, AccountsList, AccountHistory, CardHistory, UpdateInfoPage, AuthenticationPage
+from .pages import (LoginPage, AccountsList, AccountHistory, CardHistory, UpdateInfoPage,
+                    AuthenticationPage, AccountInvestment, InvestmentDetail)


 __all__ = ['Boursorama']
@ -36,7 +40,8 @@ class BrowserIncorrectAuthenticationCode(BrowserIncorrectPassword):
 class Boursorama(Browser):
    DOMAIN = 'www.boursorama.com'
    PROTOCOL = 'https'
-    CERTHASH = ['6bdf8b6dd177bd417ddcb1cfb818ede153288e44115eb269f2ddd458c8461039', 'b290ef629c88f0508e9cc6305421c173bd4291175e3ddedbee05ee666b34c20e']
+    CERTHASH = ['6bdf8b6dd177bd417ddcb1cfb818ede153288e44115eb269f2ddd458c8461039',
+                'b290ef629c88f0508e9cc6305421c173bd4291175e3ddedbee05ee666b34c20e']
    ENCODING = None  # refer to the HTML encoding
    PAGES = {
             '.*/connexion/securisation/index.phtml': AuthenticationPage,
@ -46,6 +51,8 @@ class Boursorama(Browser):
             '.*/comptes/banque/cartes/mouvements.phtml.*': CardHistory,
             '.*/comptes/epargne/mouvements.phtml.*': AccountHistory,
             '.*/date_anniversaire.phtml.*':    UpdateInfoPage,
+             '.*/detail.phtml.*': AccountInvestment,
+             '.*/opcvm.phtml.*': InvestmentDetail
            }

    def __init__(self, device="weboob", enable_twofactors=False,
@ -136,5 +143,29 @@ class Boursorama(Browser):

            link = self.page.get_next_url()

+    def get_investment(self, account):
+        if account.type != Account.TYPE_MARKET or not account._detail_url:
+            raise NotImplementedError()
+        self.location(account._detail_url)
+
+        seen = defaultdict(int)
+        def slugify(label):
+            label = label.upper().replace('FONDS EN EUROS (', '')[:12]
+            slug = re.sub(r'[^A-Za-z0-9]', ' ', label).strip()
+            slug = re.sub(r'\s+', '-', slug)
+            if label in seen:
+                counter = str(seen[slug])
+                slug = slug[:-len(counter)] + counter
+            seen[label] += 1
+            return slug
+
+        for inv in self.page.get_investment():
+            if inv._detail_url:
+                self.location(inv._detail_url)
+                self.page.get_investment_detail(inv)
+            if not inv.id:
+                inv.id = inv.code = 'XX' + slugify(inv.label)
+            yield inv
+
    def transfer(self, from_id, to_id, amount, reason=None):
        raise NotImplementedError()
--- a/modules/boursorama/module.py
+++ b/modules/boursorama/module.py
@ -70,6 +70,11 @@ class BoursoramaModule(Module, CapBank):
            for history in self.browser.get_history(account):
                yield history

+    def iter_investment(self, account):
+        with self.browser:
+            for investment in self.browser.get_investment(account):
+                yield investment
+
    # TODO
    #def iter_coming(self, account):
    #    with self.browser:
--- a/modules/boursorama/pages/init.py
+++ b/modules/boursorama/pages/init.py
@ -23,8 +23,8 @@ from .account_history import AccountHistory
 from .card_history import CardHistory
 from .accounts_list import AccountsList
 from .login import LoginPage, UpdateInfoPage
-
 from .two_authentication import AuthenticationPage
+from .investment import AccountInvestment, InvestmentDetail


 class AccountPrelevement(AccountsList):
@ -36,4 +36,6 @@ __all__ = ['LoginPage',
           'CardHistory',
           'UpdateInfoPage',
           'AuthenticationPage',
+           'AccountInvestment',
+           'InvestmentDetail',
          ]
--- a/modules/boursorama/pages/accounts_list.py
+++ b/modules/boursorama/pages/accounts_list.py
@ -33,12 +33,16 @@ class AccountsList(Page):
    def get_list(self):
        blocks = self.document.xpath('//div[@id="synthese-list"]//div[@class="block"]')
        for div in blocks:
+            block_title = ''.join(div.xpath('.//span[@class="title"]//text()')).lower()
            for tr in div.getiterator('tr'):
                account = Account()
                account.id = None
                account._link_id = None
+                if 'assurance vie' in block_title:
+                    # Life insurance accounts are investments
+                    account.type = Account.TYPE_MARKET
                for td in tr.getiterator('td'):
-                    if td.attrib.get('class', '') == 'account-cb':
+                    if td.get('class', '') == 'account-cb':
                        try:
                            a = td.xpath('./*/a[@class="gras"]')[0]
                        except IndexError:
@ -47,11 +51,11 @@ class AccountsList(Page):
                        account.type = Account.TYPE_CARD
                        account.label = self.parser.tocleanstring(a)
                        try:
-                            account._link_id = td.xpath('.//a')[0].attrib['href']
+                            account._link_id = td.xpath('.//a')[0].get('href')
                        except KeyError:
                            pass

-                    elif td.attrib.get('class', '') == 'account-name':
+                    elif td.get('class', '') == 'account-name':
                        try:
                            span = td.xpath('./span[@class="label"]')[0]
                        except IndexError:
@ -59,23 +63,24 @@ class AccountsList(Page):
                            break
                        account.label = self.parser.tocleanstring(span)
                        try:
-                            account._link_id = td.xpath('.//a')[0].attrib['href']
+                            account._link_id = td.xpath('.//a')[0].get('href')
+                            account._detail_url = account._link_id
                        except KeyError:
                            pass

-                    elif td.attrib.get('class', '') == 'account-more-actions':
+                    elif td.get('class', '') == 'account-more-actions':
                        for a in td.getiterator('a'):
                            # For normal account, two "account-more-actions"
                            # One for the account, one for the credit card. Take the good one
-                            if "mouvements.phtml" in a.attrib['href'] and "/cartes/" not in a.attrib['href']:
-                                account._link_id = a.attrib['href']
+                            if "mouvements.phtml" in a.get('href') and "/cartes/" not in a.get('href'):
+                                account._link_id = a.get('href')

-                    elif td.attrib.get('class', '') == 'account-number':
+                    elif td.get('class', '') == 'account-number':
                        id = td.text
                        id = id.strip(u' \n\t')
                        account.id = id

-                    elif td.attrib.get('class', '') == 'account-total':
+                    elif td.get('class', '') == 'account-total':
                        span = td.find('span')
                        if span is None:
                            balance = td.text
--- a/modules/boursorama/pages/investment.py
+++ b/modules/boursorama/pages/investment.py
@ -0,0 +1,109 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2014       Ta mère la pute
+#
+# This file is part of weboob.
+#
+# weboob is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# weboob is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with weboob. If not, see <http://www.gnu.org/licenses/>.
+
+
+import re
+from lxml.etree import XPath
+
+from weboob.deprecated.browser import Page
+from weboob.capabilities.bank import Investment
+from weboob.browser.filters.standard import CleanDecimal
+
+
+_el_to_string = XPath('string()')
+
+def el_to_string(el):
+    return unicode(_el_to_string(el))
+
+
+class IsinMixin(object):
+    def get_isin(self, s):
+        mobj = self._re_isin.search(s)
+        if mobj:
+            return mobj.group(1)
+
+
+class AccountInvestment(IsinMixin, Page):
+    _re_isin = re.compile(r'isin=(\w+)')
+    _tr_list = XPath('//div[@id="content-gauche"]//table[@class="list"]/tbody/tr')
+    _td_list = XPath('./td')
+    _link = XPath('./td[1]/a/@href')
+
+    def get_investment(self):
+        Decimal = CleanDecimal(replace_dots=True).filter
+
+        for tr in self._tr_list(self.document):
+            cells = list(el_to_string(td) for td in self._td_list(tr))
+            link = unicode(self._link(tr)[0])
+
+            '''
+
+            Boursorama table cells
+            ----------------------
+
+            0. Fonds
+            1. Date de valeur
+            2. Valeur de part
+            3. Nombre de parts
+            4. Contre valeur
+            5. Prix revient
+            6. +/- value en €*
+            7. +/- value en %*
+
+            Investment model
+            ----------------
+
+            label =       StringField('Label of stocks')
+            code =        StringField('Short code identifier of the stock')
+            description = StringField('Description of the stock')
+            quantity =    IntField('Quantity of stocks')
+            unitprice =   DecimalField('Buy price of one stock')
+            unitvalue =   DecimalField('Current value of one stock')
+            valuation =   DecimalField('Total current valuation of the Investment')
+            diff =        DecimalField('Difference between the buy cost and the current valuation')
+
+            '''
+
+            inv = Investment()
+            isin = self.get_isin(link)
+
+            if isin:
+                inv.id = inv.code = isin
+            inv.label = cells[0]
+            inv.quantity = Decimal(cells[3])
+            inv.valuation = Decimal(cells[4])
+            inv.unitprice = Decimal(cells[5])
+            inv.unitvalue = Decimal(cells[2])
+            inv.diff = Decimal(cells[6])
+
+            inv._detail_url = link if '/cours.phtml' in link else None
+
+            yield inv
+
+
+class InvestmentDetail(IsinMixin, Page):
+    _re_isin = re.compile('(\w+)')
+    _isin = XPath('//h2[@class and contains(concat(" ", normalize-space(@class), " "), " fv-isin ")]')
+    _description = XPath('//p[@class="taj"]')
+
+    def get_investment_detail(self, inv):
+        subtitle = el_to_string(self._isin(self.document)[0])
+
+        inv.id = inv.code = self.get_isin(subtitle)
+        inv.description = el_to_string(self._description(self.document)[0]).strip()