Improve parsing of the pdf

This commit is contained in:
Florent 2013-01-24 22:48:25 +01:00
commit 054279ac0e

View file

@ -17,6 +17,7 @@
# You should have received a copy of the GNU Affero General Public License # You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>. # along with weboob. If not, see <http://www.gnu.org/licenses/>.
import re
import os import os
import subprocess import subprocess
import tempfile import tempfile
@ -25,6 +26,7 @@ import shutil
from datetime import datetime, date, time from datetime import datetime, date, time
from decimal import Decimal from decimal import Decimal
from weboob.capabilities.base import NotAvailable
from weboob.tools.browser import BasePage from weboob.tools.browser import BasePage
from weboob.capabilities.bill import Detail, Bill from weboob.capabilities.bill import Detail, Bill
@ -61,21 +63,25 @@ class PdfPage():
lines = page.split('\n') lines = page.split('\n')
lines = [x for x in lines if len(x) > 0] # Remove empty lines lines = [x for x in lines if len(x) > 0] # Remove empty lines
numitems = ((len(lines) + 1) / 3) - 1 # Each line has three columns, remove one element (pictures) numitems = ((len(lines) + 1) / 3) - 1 # Each line has three columns, remove one element (pictures)
lines.insert(len(lines) - 1, '') # Add an empty column for "Prélèvement mensuel lines.pop(0) # "MENSUELLE"
lines.pop(0) lines.pop(0) # "Votre consommation au "
details = [] details = []
for i in range(numitems): first = True
nature = i * 3 for line in lines:
conso = nature + 1 if re.match('[A-Za-z]', line[0]):
price = conso + 1 # We have a new element, return the other one
if not first:
details.append(detail)
else:
first = False
detail = Detail() detail = Detail()
detail.label = unicode(lines[nature], encoding='utf-8')
detail.infos = unicode(lines[conso], encoding='utf-8')
try:
detail.price = Decimal(lines[price].replace('', ''))
except:
detail.price = Decimal(0) detail.price = Decimal(0)
detail.infos = NotAvailable
detail.label = unicode(line, encoding='utf-8')
elif '' in line:
detail.price = Decimal(line.replace('', ''))
else:
detail.infos = unicode(line, encoding='utf-8')
details.append(detail) details.append(detail)
return details return details