gdfsuez-dolcevita.fr website

Signed-off-by: Mathieu Jourdan <mathieu.jourdan@gresille.org>
Signed-off-by: Romain Bignon <romain@symlink.me>
This commit is contained in:
Mathieu Jourdan 2013-05-08 19:45:09 +02:00 committed by Romain Bignon
commit 79d1bcfea6
7 changed files with 536 additions and 0 deletions

View file

@ -0,0 +1,3 @@
from .backend import GdfSuezBackend
__all__ = ['GdfSuezBackend']

View file

@ -0,0 +1,95 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2013 Mathieu Jourdan
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.capabilities.bill import ICapBill, SubscriptionNotFound,\
BillNotFound, Subscription, Bill
from weboob.tools.backend import BaseBackend, BackendConfig
from weboob.tools.value import ValueBackendPassword
from .browser import GdfSuez
__all__ = ['GdfSuezBackend']
class GdfSuezBackend(BaseBackend, ICapBill):
NAME = 'gdfsuez'
MAINTAINER = u'Mathieu Jourdan'
EMAIL = 'mathieu.jourdan@gresille.org'
VERSION = '0.g'
LICENSE = 'AGPLv3+'
DESCRIPTION = 'GdF-Suez website'
CONFIG = BackendConfig(ValueBackendPassword('login',
label='Account ID (e-mail)',
masked=False),
ValueBackendPassword('password',
label='Password',
masked=True)
)
BROWSER = GdfSuez
def create_default_browser(self):
return self.create_browser(self.config['login'].get(),
self.config['password'].get())
def iter_subscription(self):
for subscription in self.browser.get_subscription_list():
yield subscription
def get_subscription(self, _id):
if not _id.isdigit():
raise SubscriptionnotFound()
with self.browser:
subscription = self.browser.get_subscription(_id)
if not subscription:
raise SubscriptionNotFound()
else:
return subscription
def iter_bills_history(self, subscription):
if not isinstance(subscription, Subscription):
subscription = self.get_subscription(subscription)
with self.browser:
for history in self.browser.get_history(subscription):
yield history
def get_details(self, subscription):
if not isinstance(subscription, Subscription):
subscription = self.get_subscription(subscription)
with self.browser:
for detail in self.browser.get_details(subscription):
yield detail
def iter_bills(self, subscription):
if not isinstance(subscription, Subscription):
subscription = self.get_subscription(subscription)
with self.browser:
for bill in self.browser.iter_bills():
yield bill
def get_bill(self, id):
with self.browser:
bill = self.browser.get_bill(id)
if not bill:
raise BillNotFound()
else:
return bill
def download_bill(self, bill):
if not isinstance(bill, Bill):
bill = self.get_bill(bill)
with self.browser:
return self.browser.readurl(bill._url)

View file

@ -0,0 +1,99 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2013 Mathieu Jourdan
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
import StringIO
from weboob.tools.browser import BaseBrowser, BrowserIncorrectPassword
from .pages import LoginPage, HomePage, AccountPage, TimeoutPage, HistoryPage, PdfPage
__all__ = ['GdfSuez']
class GdfSuez(BaseBrowser):
PROTOCOL = 'https'
DOMAIN = 'www.gdfsuez-dolcevita.fr'
PAGES = {'.*portail/clients.*?_nfpb=true&_pageLabel=page_identification': LoginPage,
'.*portail/clients.*?_nfpb=true&_pageLabel=page_accueil_compte_en_ligne': HomePage,
'.*p/visualiser_mes_contrats.*?_nfpb=true': AccountPage,
'.*p/page_historique_de_mes_factures': HistoryPage,
'.*clients.*?_nfpb=true&_nfls=false&_pageLabel=page_erreur_timeout_session': TimeoutPage
}
loginp = '/portailClients/appmanager/portail/clients'
homep = '/portailClients/appmanager/portail/clients?_nfpb=true&_pageLabel=page_accueil_compte_en_ligne'
accountp = '/portailClients/client/p/visualiser_mes_contrats?_nfpb=true'
historyp = '/portailClients/client/p/page_historique_de_mes_factures'
def __init__(self, *args, **kwargs):
BaseBrowser.__init__(self, *args, **kwargs)
def home(self):
self.location(self.homep)
def is_logged(self):
if self.is_on_page(LoginPage) or self.is_on_page(TimeoutPage):
return False
return True
def login(self):
assert isinstance(self.username, basestring)
assert isinstance(self.password, basestring)
#assert isemail(self.username)
if not self.is_on_page(LoginPage):
self.location(self.loginp)
self.page.login(self.username, self.password)
if self.is_on_page(LoginPage):
raise BrowserIncorrectPassword()
def get_subscription_list(self):
if not self.is_on_page(AccountPage):
self.location(self.accountp)
return self.page.get_subscription_list()
def get_subscription(self, id):
assert isinstance(id, basestring)
for sub in self.get_subscription_list():
if sub.id == id:
return sub
def get_history(self, subscription):
if not self.is_on_page(HistoryPage):
self.location(self.historyp)
return self.page.get_history()
def get_details(self, subscription):
bills = self.iter_bills()
id = bills[0].id
if not self.is_on_page(HistoryPage):
self.location(self.historyp)
url = 'https://www.gdfsuez-dolcevita.fr/' + self.get_bill(id)._url
response = self.openurl(url)
pdf = PdfPage(StringIO.StringIO(response.read()))
for detail in pdf.get_details(subscription.label):
yield detail
def iter_bills(self):
if not self.is_on_page(HistoryPage):
self.location(self.historyp)
return self.page.get_bills()
def get_bill(self, id):
assert isinstance(id, basestring)
for b in self.iter_bills():
if b.id == id:
return b

View file

@ -0,0 +1,23 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2013 Mathieu Jourdan
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from .history import HistoryPage, PdfPage
from .homepage import LoginPage, HomePage, AccountPage, TimeoutPage
__all__ = ['LoginPage', 'HomePage', 'AccountPage', 'HistoryPage', 'PdfPage', 'TimeoutPage']

View file

@ -0,0 +1,213 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2013 Mathieu Jourdan
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
import re
import os
import subprocess
import tempfile
import shutil
from datetime import datetime, date
from decimal import Decimal
from weboob.tools.browser import BasePage
from weboob.capabilities.base import NotAvailable
from weboob.capabilities.bill import Detail, Bill
__all__ = ['HistoryPage', 'PdfPage']
class HistoryPage(BasePage):
def on_loaded(self):
self.details = []
self.bills = []
# Latest bill
div = self.document.xpath('//div[@class="consulter_dernierefacture"]')[0]
bdate = div.xpath('p[@class="date"]/span[@class="textetertiaire"]')[0].text
bprice = div.xpath('p[@class="montant"]/span[@class="textetertiaire"]')[0].text
link = div.xpath('a[@id="display_popin"]')[0].attrib['href']
mydate = date(*reversed([int(x) for x in bdate.split("/")]))
price = Decimal(bprice.strip(u' € TTC').replace(',', '.'))
self.bills.append(self._create_bill(mydate, price, link))
# Previous bills
table = self.document.xpath('//table[@class="afficher_factures"]')[0]
for tr in table[0].xpath('//tbody/tr'):
cells = tr.xpath('td')
bdate = unicode(cells[0].text.strip())
mydate = date(*reversed([int(x) for x in bdate.split("/")]))
bprice = unicode(cells[1].text)
price = Decimal(bprice.strip(u'').replace(',', '.'))
link = cells[3].xpath('a')[0].attrib['href']
self.bills.append(self._create_bill(mydate, price, link))
def _create_bill(self, date, price, link):
bill = Bill()
bill.id = date.__str__().replace('-', '')
bill.date = date
bill._price = price
bill._url = link
bill.format = u'pdf'
bill.label = unicode(price)
return bill
def get_details(self):
return self.details
def get_bills(self):
return self.bills
class PdfPage():
def __init__(self, file):
self.pdf = file
def _parse_pdf(self):
pdffile = tempfile.NamedTemporaryFile(bufsize=100000, mode='w', suffix='.pdf')
temptxt = pdffile.name.replace('.pdf', '.txt')
cmd = "ebook-convert"
stdout = open("/dev/null", "w")
shutil.copyfileobj(self.pdf, pdffile)
pdffile.flush()
subprocess.call([cmd, pdffile.name, temptxt], stdout=stdout)
pdffile.close()
txtfile = open(temptxt, 'r')
txt = txtfile.read()
txtfile.close()
os.remove(temptxt)
return txt
def _parse_page(self, page):
# Regexp
footnote = re.compile(r'\([0-9]\) ') # (f)
ht = re.compile('HT par mois')
base = re.compile('la base de')
begindate = re.compile(' \d\d\/\d\d ') # MM/DD
enddate = re.compile('\d\d\/\d\d\/\d\d') # YY/MM/DD
endwithdigit = re.compile('\d+$') # blah blah 42
endwitheuro = re.compile('€$') # blah 00,00 €
textwithcoma = re.compile('([a-z]|\d{4})\,') # blah 2012, blah blah
# Parsing
details = []
for title in [ 'Abonnement',
'Consommation',
'Contributions et taxes liées à l\'énergie']:
section = page.split(title,1)[1].split('Total ')[0]
# When a line holds '(0)', a newline is missing.
section = re.sub(footnote,'\n', section)
lines = section.split('\n')
lines = [x for x in lines if len(x) > 0] # Remove empty lines
detail = None
for line in lines:
if re.match('[A-Za-z]', line[0]):
# Things we want to merge with the one just before
if 'facturées' in line:
# Long lines are sometimes split, so we try to join them
# That is the case for:
# 'Déduction du montant des consommations
# estimées facturées du 00/00/00 au 00/00/00'
detail.label = detail.label + u' ' + unicode(line, encoding='utf-8')
# Things for which we want a new detail
else:
# Entering here, we will instantiate a new detail.
# We hadn't so before because of fragmented lines.
if detail is not None and detail.label is not NotAvailable:
# We have a new element, return the other one
details.append(detail)
detail = Detail()
detail.price = Decimal(0)
# If the coma is not a decimal separator, then
# this is is probably a loooong sentence.
# When it comes to jokes, keep it short and sweet.
line = re.split(textwithcoma, line)[0]
# Things we want for sure
if re.findall(enddate, line):
# When a line has been badly split after a date,
# We want the label to end after the date, and maybe
# the second part to be the info
mydate = re.search(enddate, line).group(0)
mylist = line.rpartition(mydate)
label = mylist[0] + mylist[1]
detail.label = unicode(label, encoding='utf-8')
elif re.findall(endwithdigit, line):
# What is this stupid number at the end of the line?
# Line should have been split before the number
detail.label = unicode(re.split(endwithdigit, line)[0], encoding='utf-8')
# Things we don't want for sure
elif ')' in line and '(' not in line:
# First part of the parenthesis should have been drop before
# Avoid to create a new empty detail
detail.label = NotAvailable
elif re.match(base, line):
# This string should come always after a date,
# usually, it will match one of the cases above.
# Sometimes, it appears on a new line we don't need.
detail.label = NotAvailable
elif re.match(ht, line):
# '00,00 € HT par mois' may have been split after HT
# We don't need of the second line
detail.label = NotAvailable
# Things we probably want to keep
else:
# Well, maybe our line is correct, after all.
# Not much to do.
detail.label = unicode(line, encoding='utf-8')
detail.infos = NotAvailable
elif ' %' in line:
if isinstance(detail, Detail):
# Sometimes the vat is not on a new line:
# '00,00 00,0 %' instead of '00,0 %'
vat = line.split()[line.count(' ')-1].replace(',', '.')
detail.infos = unicode('TVA: ' + vat)
elif '' in line:
price = line.replace(',','.')
if isinstance(detail, Detail):
detail.price = Decimal(price.strip(''))
elif re.match(enddate, line):
# Line holding dates may have been mixed up
label = detail.label.split(' au ')[0] + u' au ' + unicode(line, encoding='utf-8')
detail.label = label
if detail.label is not NotAvailable:
# Do not append empty details to the list
# It seemed easier to create details anyway than dealing
# with None objects
details.append(detail)
return details
def get_details(self, label):
txt = self._parse_pdf()
page = None
if label == u'Gaz naturel':
page = txt.split('GAZ NATUREL')[1].split('TOTAL GAZ NATUREL TTC')[0]
elif label == u'Electricité':
page = txt.split('ELECTRICITE')[1].split('TOTAL ELECTRICITE TTC')[0]
else:
pass
return self._parse_page(page)

View file

@ -0,0 +1,68 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2013 Mathieu Jourdan
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from datetime import datetime, date
from weboob.tools.browser import BasePage
from weboob.tools.mech import ClientForm
from weboob.capabilities.bill import Subscription
__all__ = ['LoginPage', 'HomePage', 'AccountPage', 'TimeoutPage']
class LoginPage(BasePage):
def login(self, login, password):
self.browser.select_form('symConnexionForm')
self.browser["portlet_login_plein_page_3{pageFlow.mForm.login}"] = str(login)
self.browser["portlet_login_plein_page_3{pageFlow.mForm.password}"] = str(password)
self.browser.submit()
class HomePage(BasePage):
def on_loaded(self):
pass
class AccountPage(BasePage):
def get_subscription_list(self):
table = self.document.xpath('//table[@id="ensemble_contrat_N0"]')[0]
if len(table) > 0:
# some clients may have subscriptions to gas and electricity,
# but they receive a single bill
# to avoid "boobill details" and "boobill bills" returning the same
# table twice, we could return only one subscription for both.
# We do not, and "boobill details" will take care of parsing only the
# relevant section in the bill files.
for line in table[0].xpath('//tbody/tr'):
cells = line.xpath('td')
snumber = cells[2].attrib['id'].replace('Contrat_', '')
slabel = cells[0].xpath('a')[0].text.replace('offre', '').strip()
d = unicode(cells[3].xpath('strong')[0].text.strip())
sdate = date(*reversed([int(x) for x in d.split("/")]))
sub = Subscription(snumber)
sub._id = snumber
sub.label = slabel
sub.subscriber = unicode(cells[1])
sub.renewdate = sdate
yield sub
class TimeoutPage(BasePage):
def on_loaded(self):
pass

35
modules/gdfsuez/test.py Normal file
View file

@ -0,0 +1,35 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2012 Romain Bignon
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
# This is a clone of freemobile/test.py for the gdfsuez module
from weboob.tools.test import BackendTest
__all__ = ['GdfSuezTest']
class GdfSuezTest(BackendTest):
BACKEND = 'gdfsuez'
def test_gdfsuez(self):
for subscription in self.backend.iter_subscription():
list(self.backend.iter_history(subscription.id))
for bill in self.backend.iter_bills(subscription.id):
self.backend.download_bill(bill.id)