From 79d1bcfea6cdf7dc5eac276f342a9e0abe70e292 Mon Sep 17 00:00:00 2001
From: Mathieu Jourdan <mathieu.jourdan@gresille.org>
Date: Wed, 8 May 2013 19:45:09 +0200
Subject: [PATCH] gdfsuez-dolcevita.fr website

Signed-off-by: Mathieu Jourdan <mathieu.jourdan@gresille.org>
Signed-off-by: Romain Bignon <romain@symlink.me>
---
 modules/gdfsuez/__init__.py       |   3 +
 modules/gdfsuez/backend.py        |  95 +++++++++++++
 modules/gdfsuez/browser.py        |  99 ++++++++++++++
 modules/gdfsuez/pages/__init__.py |  23 ++++
 modules/gdfsuez/pages/history.py  | 213 ++++++++++++++++++++++++++++++
 modules/gdfsuez/pages/homepage.py |  68 ++++++++++
 modules/gdfsuez/test.py           |  35 +++++
 7 files changed, 536 insertions(+)
 create mode 100644 modules/gdfsuez/__init__.py
 create mode 100644 modules/gdfsuez/backend.py
 create mode 100644 modules/gdfsuez/browser.py
 create mode 100644 modules/gdfsuez/pages/__init__.py
 create mode 100644 modules/gdfsuez/pages/history.py
 create mode 100644 modules/gdfsuez/pages/homepage.py
 create mode 100644 modules/gdfsuez/test.py

diff --git a/modules/gdfsuez/__init__.py b/modules/gdfsuez/__init__.py
new file mode 100644
index 00000000..392e99f0
--- /dev/null
+++ b/modules/gdfsuez/__init__.py
@@ -0,0 +1,3 @@
+from .backend import GdfSuezBackend
+
+__all__ = ['GdfSuezBackend']
diff --git a/modules/gdfsuez/backend.py b/modules/gdfsuez/backend.py
new file mode 100644
index 00000000..ab57a849
--- /dev/null
+++ b/modules/gdfsuez/backend.py
@@ -0,0 +1,95 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2013 Mathieu Jourdan
+#
+# This file is part of weboob.
+#
+# weboob is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# weboob is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with weboob. If not, see <http://www.gnu.org/licenses/>.
+
+from weboob.capabilities.bill import ICapBill, SubscriptionNotFound,\
+            BillNotFound, Subscription, Bill
+from weboob.tools.backend import BaseBackend, BackendConfig
+from weboob.tools.value import ValueBackendPassword
+from .browser import GdfSuez
+
+__all__ = ['GdfSuezBackend']
+
+class GdfSuezBackend(BaseBackend, ICapBill):
+    NAME = 'gdfsuez'
+    MAINTAINER = u'Mathieu Jourdan'
+    EMAIL = 'mathieu.jourdan@gresille.org'
+    VERSION = '0.g'
+    LICENSE = 'AGPLv3+'
+    DESCRIPTION = 'GdF-Suez website'
+    CONFIG = BackendConfig(ValueBackendPassword('login',
+                                                label='Account ID (e-mail)',
+                                                masked=False),
+                           ValueBackendPassword('password',
+                                                label='Password',
+                                                masked=True)
+                          )
+    BROWSER = GdfSuez
+
+    def create_default_browser(self):
+        return self.create_browser(self.config['login'].get(),
+                                   self.config['password'].get())
+
+    def iter_subscription(self):
+        for subscription in self.browser.get_subscription_list():
+            yield subscription
+
+    def get_subscription(self, _id):
+        if not _id.isdigit():
+            raise SubscriptionnotFound()
+        with self.browser:
+            subscription = self.browser.get_subscription(_id)
+        if not subscription:
+            raise SubscriptionNotFound()
+        else:
+            return subscription
+
+    def iter_bills_history(self, subscription):
+        if not isinstance(subscription, Subscription):
+            subscription = self.get_subscription(subscription)
+        with self.browser:
+            for history in self.browser.get_history(subscription):
+                yield history
+            
+    def get_details(self, subscription):
+        if not isinstance(subscription, Subscription):
+            subscription = self.get_subscription(subscription)
+        with self.browser:
+            for detail in self.browser.get_details(subscription):
+                yield detail
+
+    def iter_bills(self, subscription):
+        if not isinstance(subscription, Subscription):
+            subscription = self.get_subscription(subscription)
+        with self.browser:
+            for bill in self.browser.iter_bills():
+                yield bill
+
+    def get_bill(self, id):
+        with self.browser:
+            bill = self.browser.get_bill(id)
+        if not bill:
+            raise BillNotFound()
+        else:
+            return bill
+
+    def download_bill(self, bill):
+        if not isinstance(bill, Bill):
+            bill = self.get_bill(bill)
+        with self.browser:
+            return self.browser.readurl(bill._url)
diff --git a/modules/gdfsuez/browser.py b/modules/gdfsuez/browser.py
new file mode 100644
index 00000000..d000267d
--- /dev/null
+++ b/modules/gdfsuez/browser.py
@@ -0,0 +1,99 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2013 Mathieu Jourdan
+#
+# This file is part of weboob.
+#
+# weboob is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# weboob is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with weboob. If not, see <http://www.gnu.org/licenses/>.
+
+import StringIO
+from weboob.tools.browser import BaseBrowser, BrowserIncorrectPassword
+from .pages import LoginPage, HomePage, AccountPage, TimeoutPage, HistoryPage, PdfPage
+
+__all__ = ['GdfSuez']
+
+class GdfSuez(BaseBrowser):
+    PROTOCOL = 'https'
+    DOMAIN = 'www.gdfsuez-dolcevita.fr'
+    PAGES = {'.*portail/clients.*?_nfpb=true&_pageLabel=page_identification':  LoginPage,
+             '.*portail/clients.*?_nfpb=true&_pageLabel=page_accueil_compte_en_ligne': HomePage,
+             '.*p/visualiser_mes_contrats.*?_nfpb=true': AccountPage,
+             '.*p/page_historique_de_mes_factures': HistoryPage,
+             '.*clients.*?_nfpb=true&_nfls=false&_pageLabel=page_erreur_timeout_session': TimeoutPage
+            }
+
+    loginp = '/portailClients/appmanager/portail/clients'
+    homep = '/portailClients/appmanager/portail/clients?_nfpb=true&_pageLabel=page_accueil_compte_en_ligne'
+    accountp = '/portailClients/client/p/visualiser_mes_contrats?_nfpb=true'
+    historyp = '/portailClients/client/p/page_historique_de_mes_factures'
+
+    def __init__(self, *args, **kwargs):
+        BaseBrowser.__init__(self, *args, **kwargs)
+
+    def home(self):
+        self.location(self.homep)
+
+    def is_logged(self): 
+        if self.is_on_page(LoginPage) or self.is_on_page(TimeoutPage):
+            return False
+        return True
+    
+    def login(self):
+        assert isinstance(self.username, basestring)
+        assert isinstance(self.password, basestring)
+        #assert isemail(self.username)
+        if not self.is_on_page(LoginPage):
+            self.location(self.loginp)
+        self.page.login(self.username, self.password)
+        if self.is_on_page(LoginPage):
+            raise BrowserIncorrectPassword()
+
+    def get_subscription_list(self):
+        if not self.is_on_page(AccountPage):
+            self.location(self.accountp)
+        return self.page.get_subscription_list()
+
+    def get_subscription(self, id):
+        assert isinstance(id, basestring)
+        for sub in self.get_subscription_list():
+            if sub.id == id:
+                return sub
+
+    def get_history(self, subscription):
+        if not self.is_on_page(HistoryPage):
+            self.location(self.historyp)
+        return self.page.get_history()
+
+    def get_details(self, subscription):
+        bills = self.iter_bills()
+        id = bills[0].id
+        if not self.is_on_page(HistoryPage):
+            self.location(self.historyp)
+        url = 'https://www.gdfsuez-dolcevita.fr/' + self.get_bill(id)._url
+        response = self.openurl(url)
+        pdf = PdfPage(StringIO.StringIO(response.read()))
+        for detail in pdf.get_details(subscription.label):
+            yield detail
+
+    def iter_bills(self):
+        if not self.is_on_page(HistoryPage):
+            self.location(self.historyp)
+        return self.page.get_bills()
+
+    def get_bill(self, id):
+        assert isinstance(id, basestring)
+        for b in self.iter_bills():
+            if b.id == id:
+                return b
+
diff --git a/modules/gdfsuez/pages/__init__.py b/modules/gdfsuez/pages/__init__.py
new file mode 100644
index 00000000..f77d2690
--- /dev/null
+++ b/modules/gdfsuez/pages/__init__.py
@@ -0,0 +1,23 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2013 Mathieu Jourdan
+#
+# This file is part of weboob.
+#
+# weboob is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# weboob is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with weboob. If not, see <http://www.gnu.org/licenses/>.
+
+from .history import HistoryPage, PdfPage
+from .homepage import LoginPage, HomePage, AccountPage, TimeoutPage
+
+__all__ = ['LoginPage', 'HomePage', 'AccountPage', 'HistoryPage', 'PdfPage', 'TimeoutPage']
diff --git a/modules/gdfsuez/pages/history.py b/modules/gdfsuez/pages/history.py
new file mode 100644
index 00000000..270d0b6a
--- /dev/null
+++ b/modules/gdfsuez/pages/history.py
@@ -0,0 +1,213 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2013 Mathieu Jourdan
+#
+# This file is part of weboob.
+#
+# weboob is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# weboob is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with weboob. If not, see <http://www.gnu.org/licenses/>.
+
+import re
+import os
+import subprocess
+import tempfile
+import shutil
+
+from datetime import datetime, date
+from decimal import Decimal
+
+from weboob.tools.browser import BasePage
+from weboob.capabilities.base import NotAvailable
+from weboob.capabilities.bill import Detail, Bill
+
+__all__ = ['HistoryPage', 'PdfPage']
+
+class HistoryPage(BasePage):
+
+    def on_loaded(self):
+        self.details = []
+        self.bills = []
+
+        # Latest bill
+        div = self.document.xpath('//div[@class="consulter_dernierefacture"]')[0]
+        bdate = div.xpath('p[@class="date"]/span[@class="textetertiaire"]')[0].text
+        bprice = div.xpath('p[@class="montant"]/span[@class="textetertiaire"]')[0].text
+        link = div.xpath('a[@id="display_popin"]')[0].attrib['href']
+        mydate = date(*reversed([int(x) for x in bdate.split("/")]))
+        price = Decimal(bprice.strip(u' € TTC').replace(',', '.'))
+        self.bills.append(self._create_bill(mydate, price, link))
+
+        # Previous bills
+        table = self.document.xpath('//table[@class="afficher_factures"]')[0]
+        for tr in table[0].xpath('//tbody/tr'):
+            cells = tr.xpath('td')
+            bdate = unicode(cells[0].text.strip())
+            mydate = date(*reversed([int(x) for x in bdate.split("/")]))
+            bprice = unicode(cells[1].text)
+            price = Decimal(bprice.strip(u' €').replace(',', '.'))
+            link = cells[3].xpath('a')[0].attrib['href']
+            self.bills.append(self._create_bill(mydate, price, link))
+
+    def _create_bill(self, date, price, link):
+        bill = Bill()
+        bill.id = date.__str__().replace('-', '') 
+        bill.date = date
+        bill._price = price
+        bill._url = link
+        bill.format = u'pdf'
+        bill.label = unicode(price)
+        return bill
+        
+    def get_details(self):
+        return self.details
+
+    def get_bills(self):
+        return self.bills
+
+class PdfPage():
+
+    def __init__(self, file):
+        self.pdf = file
+
+    def _parse_pdf(self):
+        pdffile = tempfile.NamedTemporaryFile(bufsize=100000, mode='w', suffix='.pdf')
+        temptxt = pdffile.name.replace('.pdf', '.txt')
+        cmd = "ebook-convert"
+        stdout = open("/dev/null", "w")
+        shutil.copyfileobj(self.pdf, pdffile)
+        pdffile.flush()
+        subprocess.call([cmd, pdffile.name, temptxt], stdout=stdout)
+        pdffile.close()
+        txtfile = open(temptxt, 'r')
+        txt = txtfile.read()
+        txtfile.close()
+        os.remove(temptxt)
+        return txt
+
+    def _parse_page(self, page):
+
+        # Regexp
+        footnote = re.compile(r'\([0-9]\) ')                # (f)
+        ht = re.compile('HT par mois')
+        base = re.compile('la base de')
+        begindate = re.compile(' \d\d\/\d\d ')              # MM/DD
+        enddate = re.compile('\d\d\/\d\d\/\d\d')            # YY/MM/DD
+        endwithdigit = re.compile('\d+$')                   # blah blah 42
+        endwitheuro = re.compile('€$')                      # blah 00,00 €
+        textwithcoma = re.compile('([a-z]|\d{4})\,')        # blah 2012, blah blah
+
+        # Parsing
+        details = []
+        for title in [  'Abonnement',
+                        'Consommation',
+                        'Contributions et taxes liées à l\'énergie']:
+            section = page.split(title,1)[1].split('Total ')[0]
+
+            # When a line holds '(0)', a newline is missing.
+            section = re.sub(footnote,'\n', section)
+
+            lines = section.split('\n')
+            lines = [x for x in lines if len(x) > 0]  # Remove empty lines
+            detail = None
+
+            for line in lines:
+                if re.match('[A-Za-z]', line[0]):
+
+                    # Things we want to merge with the one just before
+                    if 'facturées' in line:
+                        # Long lines are sometimes split, so we try to join them
+                        # That is the case for:
+                        # 'Déduction du montant des consommations
+                        # estimées facturées du 00/00/00 au 00/00/00'
+                        detail.label = detail.label + u' ' + unicode(line, encoding='utf-8')
+
+                    # Things for which we want a new detail
+                    else:
+                        # Entering here, we will instantiate a new detail.
+                        # We hadn't so before because of fragmented lines.
+                        if detail is not None and detail.label is not NotAvailable:
+                            # We have a new element, return the other one
+                            details.append(detail)
+                        detail = Detail()
+                        detail.price = Decimal(0)
+
+                        # If the coma is not a decimal separator, then
+                        # this is is probably a loooong sentence.
+                        # When it comes to jokes, keep it short and sweet.
+                        line = re.split(textwithcoma, line)[0]
+
+                        # Things we want for sure
+                        if re.findall(enddate, line):
+                            # When a line has been badly split after a date,
+                            # We want the label to end after the date, and maybe
+                            # the second part to be the info
+                            mydate = re.search(enddate, line).group(0)
+                            mylist = line.rpartition(mydate)
+                            label = mylist[0] + mylist[1]
+                            detail.label = unicode(label, encoding='utf-8')
+                        elif re.findall(endwithdigit, line):
+                            # What is this stupid number at the end of the line?
+                            # Line should have been split before the number
+                            detail.label = unicode(re.split(endwithdigit, line)[0], encoding='utf-8')
+                        # Things we don't want for sure
+                        elif ')' in line and '(' not in line:
+                            # First part of the parenthesis should have been drop before
+                            # Avoid to create a new empty detail
+                            detail.label = NotAvailable
+                        elif re.match(base, line):
+                            # This string should come always after a date,
+                            # usually, it will match one of the cases above.
+                            # Sometimes, it appears on a new line we don't need.
+                            detail.label = NotAvailable
+                        elif re.match(ht, line):
+                            # '00,00 € HT par mois' may have been split after HT
+                            # We don't need of the second line
+                            detail.label = NotAvailable
+                        # Things we probably want to keep
+                        else:
+                            # Well, maybe our line is correct, after all.
+                            # Not much to do.
+                            detail.label = unicode(line, encoding='utf-8')
+                        detail.infos = NotAvailable
+                elif ' %' in line:
+                    if isinstance(detail, Detail):
+                        # Sometimes the vat is not on a new line:
+                        # '00,00 00,0 %' instead of '00,0 %'
+                        vat = line.split()[line.count(' ')-1].replace(',', '.')
+                        detail.infos = unicode('TVA: ' + vat)
+                elif ' €' in line:
+                    price = line.replace(',','.')
+                    if isinstance(detail, Detail):
+                        detail.price = Decimal(price.strip(' €'))
+                elif re.match(enddate, line):
+                    # Line holding dates may have been mixed up
+                    label = detail.label.split(' au ')[0] + u' au ' + unicode(line, encoding='utf-8')
+                    detail.label = label
+            if detail.label is not NotAvailable:
+                # Do not append empty details to the list
+                # It seemed easier to create details anyway than dealing
+                # with None objects
+                details.append(detail)
+        return details
+
+    def get_details(self, label):
+        txt = self._parse_pdf()
+        page = None
+        if label == u'Gaz naturel':
+            page = txt.split('GAZ NATUREL')[1].split('TOTAL GAZ NATUREL TTC')[0]
+        elif label == u'Electricité':
+            page = txt.split('ELECTRICITE')[1].split('TOTAL ELECTRICITE TTC')[0]
+        else:
+            pass
+        return self._parse_page(page)
+
diff --git a/modules/gdfsuez/pages/homepage.py b/modules/gdfsuez/pages/homepage.py
new file mode 100644
index 00000000..1f7fecab
--- /dev/null
+++ b/modules/gdfsuez/pages/homepage.py
@@ -0,0 +1,68 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2013 Mathieu Jourdan
+#
+# This file is part of weboob.
+#
+# weboob is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# weboob is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with weboob. If not, see <http://www.gnu.org/licenses/>.
+
+from datetime import datetime, date
+
+from weboob.tools.browser import BasePage
+from weboob.tools.mech import ClientForm
+from weboob.capabilities.bill import Subscription
+
+__all__ = ['LoginPage', 'HomePage', 'AccountPage', 'TimeoutPage']
+
+class LoginPage(BasePage):
+
+    def login(self, login, password):
+        self.browser.select_form('symConnexionForm')
+        self.browser["portlet_login_plein_page_3{pageFlow.mForm.login}"] = str(login)
+        self.browser["portlet_login_plein_page_3{pageFlow.mForm.password}"] = str(password)
+        self.browser.submit()
+
+class HomePage(BasePage):
+
+    def on_loaded(self):
+        pass
+
+class AccountPage(BasePage):
+
+    def get_subscription_list(self):
+        table = self.document.xpath('//table[@id="ensemble_contrat_N0"]')[0]
+        if len(table) > 0:
+            # some clients may have subscriptions to gas and electricity,
+            # but they receive a single bill
+            # to avoid "boobill details" and "boobill bills" returning the same
+            # table twice, we could return only one subscription for both.
+            # We do not, and "boobill details" will take care of parsing only the
+            # relevant section in the bill files.
+            for line in table[0].xpath('//tbody/tr'):
+                cells = line.xpath('td')
+                snumber = cells[2].attrib['id'].replace('Contrat_', '')
+                slabel = cells[0].xpath('a')[0].text.replace('offre', '').strip()
+                d = unicode(cells[3].xpath('strong')[0].text.strip())
+                sdate = date(*reversed([int(x) for x in d.split("/")]))
+                sub = Subscription(snumber)
+                sub._id = snumber
+                sub.label = slabel
+                sub.subscriber = unicode(cells[1])
+                sub.renewdate = sdate
+                yield sub
+
+class TimeoutPage(BasePage):
+
+    def on_loaded(self):
+        pass
diff --git a/modules/gdfsuez/test.py b/modules/gdfsuez/test.py
new file mode 100644
index 00000000..31141858
--- /dev/null
+++ b/modules/gdfsuez/test.py
@@ -0,0 +1,35 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2012  Romain Bignon
+#
+# This file is part of weboob.
+#
+# weboob is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# weboob is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with weboob. If not, see <http://www.gnu.org/licenses/>.
+
+
+# This is a clone of freemobile/test.py for the gdfsuez module
+from weboob.tools.test import BackendTest
+
+
+__all__ = ['GdfSuezTest']
+
+
+class GdfSuezTest(BackendTest):
+    BACKEND = 'gdfsuez'
+
+    def test_gdfsuez(self):
+        for subscription in self.backend.iter_subscription():
+            list(self.backend.iter_history(subscription.id))
+            for bill in self.backend.iter_bills(subscription.id):
+                self.backend.download_bill(bill.id)