From 054279ac0ececdfa213b2b2be1fb75fd1339bbc3 Mon Sep 17 00:00:00 2001 From: Florent Date: Thu, 24 Jan 2013 22:48:25 +0100 Subject: [PATCH] Improve parsing of the pdf --- modules/leclercmobile/pages/history.py | 34 +++++++++++++++----------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/modules/leclercmobile/pages/history.py b/modules/leclercmobile/pages/history.py index 5d0663b8..6b8c5ef3 100644 --- a/modules/leclercmobile/pages/history.py +++ b/modules/leclercmobile/pages/history.py @@ -17,6 +17,7 @@ # You should have received a copy of the GNU Affero General Public License # along with weboob. If not, see . +import re import os import subprocess import tempfile @@ -25,6 +26,7 @@ import shutil from datetime import datetime, date, time from decimal import Decimal +from weboob.capabilities.base import NotAvailable from weboob.tools.browser import BasePage from weboob.capabilities.bill import Detail, Bill @@ -61,22 +63,26 @@ class PdfPage(): lines = page.split('\n') lines = [x for x in lines if len(x) > 0] # Remove empty lines numitems = ((len(lines) + 1) / 3) - 1 # Each line has three columns, remove one element (pictures) - lines.insert(len(lines) - 1, '') # Add an empty column for "Prélèvement mensuel - lines.pop(0) + lines.pop(0) # "MENSUELLE" + lines.pop(0) # "Votre consommation au " details = [] - for i in range(numitems): - nature = i * 3 - conso = nature + 1 - price = conso + 1 - - detail = Detail() - detail.label = unicode(lines[nature], encoding='utf-8') - detail.infos = unicode(lines[conso], encoding='utf-8') - try: - detail.price = Decimal(lines[price].replace('€', '')) - except: + first = True + for line in lines: + if re.match('[A-Za-z]', line[0]): + # We have a new element, return the other one + if not first: + details.append(detail) + else: + first = False + detail = Detail() detail.price = Decimal(0) - details.append(detail) + detail.infos = NotAvailable + detail.label = unicode(line, encoding='utf-8') + elif '€' in line: + detail.price = Decimal(line.replace('€', '')) + else: + detail.infos = unicode(line, encoding='utf-8') + details.append(detail) return details def get_balance(self):