diff --git a/modules/boursorama/browser.py b/modules/boursorama/browser.py index 72e462a1..a50435fe 100644 --- a/modules/boursorama/browser.py +++ b/modules/boursorama/browser.py @@ -20,10 +20,14 @@ # along with weboob. If not, see . +import re +from collections import defaultdict + from weboob.deprecated.browser import Browser, BrowserIncorrectPassword from weboob.capabilities.bank import Account -from .pages import LoginPage, AccountsList, AccountHistory, CardHistory, UpdateInfoPage, AuthenticationPage +from .pages import (LoginPage, AccountsList, AccountHistory, CardHistory, UpdateInfoPage, + AuthenticationPage, AccountInvestment, InvestmentDetail) __all__ = ['Boursorama'] @@ -36,7 +40,8 @@ class BrowserIncorrectAuthenticationCode(BrowserIncorrectPassword): class Boursorama(Browser): DOMAIN = 'www.boursorama.com' PROTOCOL = 'https' - CERTHASH = ['6bdf8b6dd177bd417ddcb1cfb818ede153288e44115eb269f2ddd458c8461039', 'b290ef629c88f0508e9cc6305421c173bd4291175e3ddedbee05ee666b34c20e'] + CERTHASH = ['6bdf8b6dd177bd417ddcb1cfb818ede153288e44115eb269f2ddd458c8461039', + 'b290ef629c88f0508e9cc6305421c173bd4291175e3ddedbee05ee666b34c20e'] ENCODING = None # refer to the HTML encoding PAGES = { '.*/connexion/securisation/index.phtml': AuthenticationPage, @@ -46,6 +51,8 @@ class Boursorama(Browser): '.*/comptes/banque/cartes/mouvements.phtml.*': CardHistory, '.*/comptes/epargne/mouvements.phtml.*': AccountHistory, '.*/date_anniversaire.phtml.*': UpdateInfoPage, + '.*/detail.phtml.*': AccountInvestment, + '.*/opcvm.phtml.*': InvestmentDetail } def __init__(self, device="weboob", enable_twofactors=False, @@ -136,5 +143,29 @@ class Boursorama(Browser): link = self.page.get_next_url() + def get_investment(self, account): + if account.type != Account.TYPE_MARKET or not account._detail_url: + raise NotImplementedError() + self.location(account._detail_url) + + seen = defaultdict(int) + def slugify(label): + label = label.upper().replace('FONDS EN EUROS (', '')[:12] + slug = re.sub(r'[^A-Za-z0-9]', ' ', label).strip() + slug = re.sub(r'\s+', '-', slug) + if label in seen: + counter = str(seen[slug]) + slug = slug[:-len(counter)] + counter + seen[label] += 1 + return slug + + for inv in self.page.get_investment(): + if inv._detail_url: + self.location(inv._detail_url) + self.page.get_investment_detail(inv) + if not inv.id: + inv.id = inv.code = 'XX' + slugify(inv.label) + yield inv + def transfer(self, from_id, to_id, amount, reason=None): raise NotImplementedError() diff --git a/modules/boursorama/module.py b/modules/boursorama/module.py index a79331e7..9cb12ffb 100644 --- a/modules/boursorama/module.py +++ b/modules/boursorama/module.py @@ -70,6 +70,11 @@ class BoursoramaModule(Module, CapBank): for history in self.browser.get_history(account): yield history + def iter_investment(self, account): + with self.browser: + for investment in self.browser.get_investment(account): + yield investment + # TODO #def iter_coming(self, account): # with self.browser: diff --git a/modules/boursorama/pages/__init__.py b/modules/boursorama/pages/__init__.py index 44da434a..d9195d15 100644 --- a/modules/boursorama/pages/__init__.py +++ b/modules/boursorama/pages/__init__.py @@ -23,8 +23,8 @@ from .account_history import AccountHistory from .card_history import CardHistory from .accounts_list import AccountsList from .login import LoginPage, UpdateInfoPage - from .two_authentication import AuthenticationPage +from .investment import AccountInvestment, InvestmentDetail class AccountPrelevement(AccountsList): @@ -36,4 +36,6 @@ __all__ = ['LoginPage', 'CardHistory', 'UpdateInfoPage', 'AuthenticationPage', + 'AccountInvestment', + 'InvestmentDetail', ] diff --git a/modules/boursorama/pages/accounts_list.py b/modules/boursorama/pages/accounts_list.py index 04a98bf4..c1515532 100644 --- a/modules/boursorama/pages/accounts_list.py +++ b/modules/boursorama/pages/accounts_list.py @@ -33,12 +33,16 @@ class AccountsList(Page): def get_list(self): blocks = self.document.xpath('//div[@id="synthese-list"]//div[@class="block"]') for div in blocks: + block_title = ''.join(div.xpath('.//span[@class="title"]//text()')).lower() for tr in div.getiterator('tr'): account = Account() account.id = None account._link_id = None + if 'assurance vie' in block_title: + # Life insurance accounts are investments + account.type = Account.TYPE_MARKET for td in tr.getiterator('td'): - if td.attrib.get('class', '') == 'account-cb': + if td.get('class', '') == 'account-cb': try: a = td.xpath('./*/a[@class="gras"]')[0] except IndexError: @@ -47,11 +51,11 @@ class AccountsList(Page): account.type = Account.TYPE_CARD account.label = self.parser.tocleanstring(a) try: - account._link_id = td.xpath('.//a')[0].attrib['href'] + account._link_id = td.xpath('.//a')[0].get('href') except KeyError: pass - elif td.attrib.get('class', '') == 'account-name': + elif td.get('class', '') == 'account-name': try: span = td.xpath('./span[@class="label"]')[0] except IndexError: @@ -59,23 +63,24 @@ class AccountsList(Page): break account.label = self.parser.tocleanstring(span) try: - account._link_id = td.xpath('.//a')[0].attrib['href'] + account._link_id = td.xpath('.//a')[0].get('href') + account._detail_url = account._link_id except KeyError: pass - elif td.attrib.get('class', '') == 'account-more-actions': + elif td.get('class', '') == 'account-more-actions': for a in td.getiterator('a'): # For normal account, two "account-more-actions" # One for the account, one for the credit card. Take the good one - if "mouvements.phtml" in a.attrib['href'] and "/cartes/" not in a.attrib['href']: - account._link_id = a.attrib['href'] + if "mouvements.phtml" in a.get('href') and "/cartes/" not in a.get('href'): + account._link_id = a.get('href') - elif td.attrib.get('class', '') == 'account-number': + elif td.get('class', '') == 'account-number': id = td.text id = id.strip(u' \n\t') account.id = id - elif td.attrib.get('class', '') == 'account-total': + elif td.get('class', '') == 'account-total': span = td.find('span') if span is None: balance = td.text diff --git a/modules/boursorama/pages/investment.py b/modules/boursorama/pages/investment.py new file mode 100644 index 00000000..2c6a08b3 --- /dev/null +++ b/modules/boursorama/pages/investment.py @@ -0,0 +1,109 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2014 Ta mère la pute +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +import re +from lxml.etree import XPath + +from weboob.deprecated.browser import Page +from weboob.capabilities.bank import Investment +from weboob.browser.filters.standard import CleanDecimal + + +_el_to_string = XPath('string()') + +def el_to_string(el): + return unicode(_el_to_string(el)) + + +class IsinMixin(object): + def get_isin(self, s): + mobj = self._re_isin.search(s) + if mobj: + return mobj.group(1) + + +class AccountInvestment(IsinMixin, Page): + _re_isin = re.compile(r'isin=(\w+)') + _tr_list = XPath('//div[@id="content-gauche"]//table[@class="list"]/tbody/tr') + _td_list = XPath('./td') + _link = XPath('./td[1]/a/@href') + + def get_investment(self): + Decimal = CleanDecimal(replace_dots=True).filter + + for tr in self._tr_list(self.document): + cells = list(el_to_string(td) for td in self._td_list(tr)) + link = unicode(self._link(tr)[0]) + + ''' + + Boursorama table cells + ---------------------- + + 0. Fonds + 1. Date de valeur + 2. Valeur de part + 3. Nombre de parts + 4. Contre valeur + 5. Prix revient + 6. +/- value en €* + 7. +/- value en %* + + Investment model + ---------------- + + label = StringField('Label of stocks') + code = StringField('Short code identifier of the stock') + description = StringField('Description of the stock') + quantity = IntField('Quantity of stocks') + unitprice = DecimalField('Buy price of one stock') + unitvalue = DecimalField('Current value of one stock') + valuation = DecimalField('Total current valuation of the Investment') + diff = DecimalField('Difference between the buy cost and the current valuation') + + ''' + + inv = Investment() + isin = self.get_isin(link) + + if isin: + inv.id = inv.code = isin + inv.label = cells[0] + inv.quantity = Decimal(cells[3]) + inv.valuation = Decimal(cells[4]) + inv.unitprice = Decimal(cells[5]) + inv.unitvalue = Decimal(cells[2]) + inv.diff = Decimal(cells[6]) + + inv._detail_url = link if '/cours.phtml' in link else None + + yield inv + + +class InvestmentDetail(IsinMixin, Page): + _re_isin = re.compile('(\w+)') + _isin = XPath('//h2[@class and contains(concat(" ", normalize-space(@class), " "), " fv-isin ")]') + _description = XPath('//p[@class="taj"]') + + def get_investment_detail(self, inv): + subtitle = el_to_string(self._isin(self.document)[0]) + + inv.id = inv.code = self.get_isin(subtitle) + inv.description = el_to_string(self._description(self.document)[0]).strip()