Add details command

This commit is contained in:
Florent 2012-08-29 13:59:52 +02:00
commit 8fa430e61f
2 changed files with 44 additions and 10 deletions

View file

@ -112,7 +112,10 @@ class Leclercmobile(BaseBrowser):
def get_details(self):
if not self.is_on_page(HistoryPage):
self.location(self.conso)
return self.page.get_details()
response = self.openurl('/EspaceClient/pgeWERL015_RecupReleveConso.aspx?m=-0')
pdf = PdfPage(StringIO.StringIO(response.read()))
for detail in pdf.get_details():
yield detail
def iter_bills(self, parentid):
if not self.is_on_page(HistoryPage):

View file

@ -17,7 +17,7 @@
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
import os
import subprocess
import tempfile
import shutil
@ -40,14 +40,7 @@ class PdfPage():
def __init__(self, file):
self.pdf = file
# Standard pdf text extractor take text line by line
# But the position in the file is not always the "real" position to display...
# It produce some unsorted and unparsable data
# Example of bad software: pdfminer and others python tools
# This is why we have to use "ebook-convert" from calibre software,
# it is the only one to 'reflow" text and give some relevant results
# The bad new is that ebook-convert doesn't support simple use with stdin/stdout
def get_calls(self):
def _parse_pdf(self):
pdffile = tempfile.NamedTemporaryFile(bufsize=100000, mode='w', suffix='.pdf')
temptxt = pdffile.name.replace('.pdf', '.txt')
cmd = "ebook-convert"
@ -58,6 +51,44 @@ class PdfPage():
pdffile.close()
txtfile = open(temptxt, 'r')
txt = txtfile.read()
txtfile.close()
os.remove(temptxt)
return txt
def get_details(self):
txt = self._parse_pdf()
page = txt.split('CONSOMMATION MENSUELLE')[1].split('ACTIVITE DETAILLEE')[0]
lines = page.split('\n')
lines = [x for x in lines if len(x) > 0] # Remove empty lines
numitems = (len(lines) + 1) / 3 # Each line has three columns
lines.insert(len(lines), '') # Add an empty column for "Prélèvement mensuel
lines.pop(0)
details = []
for i in range(numitems):
nature = i * 3
conso = nature + 1
price = conso + 1
detail = Detail()
detail.label = unicode(lines[nature], encoding='utf-8')
detail.infos = unicode(lines[conso], encoding='utf-8')
try:
detail.price = Decimal(lines[price].replace('', ''))
except:
detail.price = Decimal(0)
details.append(detail)
return details
# Standard pdf text extractor take text line by line
# But the position in the file is not always the "real" position to display...
# It produce some unsorted and unparsable data
# Example of bad software: pdfminer and others python tools
# This is why we have to use "ebook-convert" from calibre software,
# it is the only one to 'reflow" text and give some relevant results
# The bad new is that ebook-convert doesn't support simple use with stdin/stdout
def get_calls(self):
txt = self._parse_pdf()
pages = txt.split("DEBIT (€)")
pages.pop(0) # remove headers
details = []