gdfsuez-dolcevita.fr website
Signed-off-by: Mathieu Jourdan <mathieu.jourdan@gresille.org> Signed-off-by: Romain Bignon <romain@symlink.me>
This commit is contained in:
parent
2c633a43c8
commit
79d1bcfea6
7 changed files with 536 additions and 0 deletions
213
modules/gdfsuez/pages/history.py
Normal file
213
modules/gdfsuez/pages/history.py
Normal file
|
|
@ -0,0 +1,213 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright(C) 2013 Mathieu Jourdan
|
||||
#
|
||||
# This file is part of weboob.
|
||||
#
|
||||
# weboob is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# weboob is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Affero General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
import re
|
||||
import os
|
||||
import subprocess
|
||||
import tempfile
|
||||
import shutil
|
||||
|
||||
from datetime import datetime, date
|
||||
from decimal import Decimal
|
||||
|
||||
from weboob.tools.browser import BasePage
|
||||
from weboob.capabilities.base import NotAvailable
|
||||
from weboob.capabilities.bill import Detail, Bill
|
||||
|
||||
__all__ = ['HistoryPage', 'PdfPage']
|
||||
|
||||
class HistoryPage(BasePage):
|
||||
|
||||
def on_loaded(self):
|
||||
self.details = []
|
||||
self.bills = []
|
||||
|
||||
# Latest bill
|
||||
div = self.document.xpath('//div[@class="consulter_dernierefacture"]')[0]
|
||||
bdate = div.xpath('p[@class="date"]/span[@class="textetertiaire"]')[0].text
|
||||
bprice = div.xpath('p[@class="montant"]/span[@class="textetertiaire"]')[0].text
|
||||
link = div.xpath('a[@id="display_popin"]')[0].attrib['href']
|
||||
mydate = date(*reversed([int(x) for x in bdate.split("/")]))
|
||||
price = Decimal(bprice.strip(u' € TTC').replace(',', '.'))
|
||||
self.bills.append(self._create_bill(mydate, price, link))
|
||||
|
||||
# Previous bills
|
||||
table = self.document.xpath('//table[@class="afficher_factures"]')[0]
|
||||
for tr in table[0].xpath('//tbody/tr'):
|
||||
cells = tr.xpath('td')
|
||||
bdate = unicode(cells[0].text.strip())
|
||||
mydate = date(*reversed([int(x) for x in bdate.split("/")]))
|
||||
bprice = unicode(cells[1].text)
|
||||
price = Decimal(bprice.strip(u' €').replace(',', '.'))
|
||||
link = cells[3].xpath('a')[0].attrib['href']
|
||||
self.bills.append(self._create_bill(mydate, price, link))
|
||||
|
||||
def _create_bill(self, date, price, link):
|
||||
bill = Bill()
|
||||
bill.id = date.__str__().replace('-', '')
|
||||
bill.date = date
|
||||
bill._price = price
|
||||
bill._url = link
|
||||
bill.format = u'pdf'
|
||||
bill.label = unicode(price)
|
||||
return bill
|
||||
|
||||
def get_details(self):
|
||||
return self.details
|
||||
|
||||
def get_bills(self):
|
||||
return self.bills
|
||||
|
||||
class PdfPage():
|
||||
|
||||
def __init__(self, file):
|
||||
self.pdf = file
|
||||
|
||||
def _parse_pdf(self):
|
||||
pdffile = tempfile.NamedTemporaryFile(bufsize=100000, mode='w', suffix='.pdf')
|
||||
temptxt = pdffile.name.replace('.pdf', '.txt')
|
||||
cmd = "ebook-convert"
|
||||
stdout = open("/dev/null", "w")
|
||||
shutil.copyfileobj(self.pdf, pdffile)
|
||||
pdffile.flush()
|
||||
subprocess.call([cmd, pdffile.name, temptxt], stdout=stdout)
|
||||
pdffile.close()
|
||||
txtfile = open(temptxt, 'r')
|
||||
txt = txtfile.read()
|
||||
txtfile.close()
|
||||
os.remove(temptxt)
|
||||
return txt
|
||||
|
||||
def _parse_page(self, page):
|
||||
|
||||
# Regexp
|
||||
footnote = re.compile(r'\([0-9]\) ') # (f)
|
||||
ht = re.compile('HT par mois')
|
||||
base = re.compile('la base de')
|
||||
begindate = re.compile(' \d\d\/\d\d ') # MM/DD
|
||||
enddate = re.compile('\d\d\/\d\d\/\d\d') # YY/MM/DD
|
||||
endwithdigit = re.compile('\d+$') # blah blah 42
|
||||
endwitheuro = re.compile('€$') # blah 00,00 €
|
||||
textwithcoma = re.compile('([a-z]|\d{4})\,') # blah 2012, blah blah
|
||||
|
||||
# Parsing
|
||||
details = []
|
||||
for title in [ 'Abonnement',
|
||||
'Consommation',
|
||||
'Contributions et taxes liées à l\'énergie']:
|
||||
section = page.split(title,1)[1].split('Total ')[0]
|
||||
|
||||
# When a line holds '(0)', a newline is missing.
|
||||
section = re.sub(footnote,'\n', section)
|
||||
|
||||
lines = section.split('\n')
|
||||
lines = [x for x in lines if len(x) > 0] # Remove empty lines
|
||||
detail = None
|
||||
|
||||
for line in lines:
|
||||
if re.match('[A-Za-z]', line[0]):
|
||||
|
||||
# Things we want to merge with the one just before
|
||||
if 'facturées' in line:
|
||||
# Long lines are sometimes split, so we try to join them
|
||||
# That is the case for:
|
||||
# 'Déduction du montant des consommations
|
||||
# estimées facturées du 00/00/00 au 00/00/00'
|
||||
detail.label = detail.label + u' ' + unicode(line, encoding='utf-8')
|
||||
|
||||
# Things for which we want a new detail
|
||||
else:
|
||||
# Entering here, we will instantiate a new detail.
|
||||
# We hadn't so before because of fragmented lines.
|
||||
if detail is not None and detail.label is not NotAvailable:
|
||||
# We have a new element, return the other one
|
||||
details.append(detail)
|
||||
detail = Detail()
|
||||
detail.price = Decimal(0)
|
||||
|
||||
# If the coma is not a decimal separator, then
|
||||
# this is is probably a loooong sentence.
|
||||
# When it comes to jokes, keep it short and sweet.
|
||||
line = re.split(textwithcoma, line)[0]
|
||||
|
||||
# Things we want for sure
|
||||
if re.findall(enddate, line):
|
||||
# When a line has been badly split after a date,
|
||||
# We want the label to end after the date, and maybe
|
||||
# the second part to be the info
|
||||
mydate = re.search(enddate, line).group(0)
|
||||
mylist = line.rpartition(mydate)
|
||||
label = mylist[0] + mylist[1]
|
||||
detail.label = unicode(label, encoding='utf-8')
|
||||
elif re.findall(endwithdigit, line):
|
||||
# What is this stupid number at the end of the line?
|
||||
# Line should have been split before the number
|
||||
detail.label = unicode(re.split(endwithdigit, line)[0], encoding='utf-8')
|
||||
# Things we don't want for sure
|
||||
elif ')' in line and '(' not in line:
|
||||
# First part of the parenthesis should have been drop before
|
||||
# Avoid to create a new empty detail
|
||||
detail.label = NotAvailable
|
||||
elif re.match(base, line):
|
||||
# This string should come always after a date,
|
||||
# usually, it will match one of the cases above.
|
||||
# Sometimes, it appears on a new line we don't need.
|
||||
detail.label = NotAvailable
|
||||
elif re.match(ht, line):
|
||||
# '00,00 € HT par mois' may have been split after HT
|
||||
# We don't need of the second line
|
||||
detail.label = NotAvailable
|
||||
# Things we probably want to keep
|
||||
else:
|
||||
# Well, maybe our line is correct, after all.
|
||||
# Not much to do.
|
||||
detail.label = unicode(line, encoding='utf-8')
|
||||
detail.infos = NotAvailable
|
||||
elif ' %' in line:
|
||||
if isinstance(detail, Detail):
|
||||
# Sometimes the vat is not on a new line:
|
||||
# '00,00 00,0 %' instead of '00,0 %'
|
||||
vat = line.split()[line.count(' ')-1].replace(',', '.')
|
||||
detail.infos = unicode('TVA: ' + vat)
|
||||
elif ' €' in line:
|
||||
price = line.replace(',','.')
|
||||
if isinstance(detail, Detail):
|
||||
detail.price = Decimal(price.strip(' €'))
|
||||
elif re.match(enddate, line):
|
||||
# Line holding dates may have been mixed up
|
||||
label = detail.label.split(' au ')[0] + u' au ' + unicode(line, encoding='utf-8')
|
||||
detail.label = label
|
||||
if detail.label is not NotAvailable:
|
||||
# Do not append empty details to the list
|
||||
# It seemed easier to create details anyway than dealing
|
||||
# with None objects
|
||||
details.append(detail)
|
||||
return details
|
||||
|
||||
def get_details(self, label):
|
||||
txt = self._parse_pdf()
|
||||
page = None
|
||||
if label == u'Gaz naturel':
|
||||
page = txt.split('GAZ NATUREL')[1].split('TOTAL GAZ NATUREL TTC')[0]
|
||||
elif label == u'Electricité':
|
||||
page = txt.split('ELECTRICITE')[1].split('TOTAL ELECTRICITE TTC')[0]
|
||||
else:
|
||||
pass
|
||||
return self._parse_page(page)
|
||||
|
||||
Loading…
Add table
Add a link
Reference in a new issue