Improve parsing of the pdf
This commit is contained in:
parent
fcd8432045
commit
054279ac0e
1 changed files with 20 additions and 14 deletions
|
|
@ -17,6 +17,7 @@
|
||||||
# You should have received a copy of the GNU Affero General Public License
|
# You should have received a copy of the GNU Affero General Public License
|
||||||
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
import re
|
||||||
import os
|
import os
|
||||||
import subprocess
|
import subprocess
|
||||||
import tempfile
|
import tempfile
|
||||||
|
|
@ -25,6 +26,7 @@ import shutil
|
||||||
from datetime import datetime, date, time
|
from datetime import datetime, date, time
|
||||||
from decimal import Decimal
|
from decimal import Decimal
|
||||||
|
|
||||||
|
from weboob.capabilities.base import NotAvailable
|
||||||
from weboob.tools.browser import BasePage
|
from weboob.tools.browser import BasePage
|
||||||
from weboob.capabilities.bill import Detail, Bill
|
from weboob.capabilities.bill import Detail, Bill
|
||||||
|
|
||||||
|
|
@ -61,21 +63,25 @@ class PdfPage():
|
||||||
lines = page.split('\n')
|
lines = page.split('\n')
|
||||||
lines = [x for x in lines if len(x) > 0] # Remove empty lines
|
lines = [x for x in lines if len(x) > 0] # Remove empty lines
|
||||||
numitems = ((len(lines) + 1) / 3) - 1 # Each line has three columns, remove one element (pictures)
|
numitems = ((len(lines) + 1) / 3) - 1 # Each line has three columns, remove one element (pictures)
|
||||||
lines.insert(len(lines) - 1, '') # Add an empty column for "Prélèvement mensuel
|
lines.pop(0) # "MENSUELLE"
|
||||||
lines.pop(0)
|
lines.pop(0) # "Votre consommation au "
|
||||||
details = []
|
details = []
|
||||||
for i in range(numitems):
|
first = True
|
||||||
nature = i * 3
|
for line in lines:
|
||||||
conso = nature + 1
|
if re.match('[A-Za-z]', line[0]):
|
||||||
price = conso + 1
|
# We have a new element, return the other one
|
||||||
|
if not first:
|
||||||
|
details.append(detail)
|
||||||
|
else:
|
||||||
|
first = False
|
||||||
detail = Detail()
|
detail = Detail()
|
||||||
detail.label = unicode(lines[nature], encoding='utf-8')
|
|
||||||
detail.infos = unicode(lines[conso], encoding='utf-8')
|
|
||||||
try:
|
|
||||||
detail.price = Decimal(lines[price].replace('€', ''))
|
|
||||||
except:
|
|
||||||
detail.price = Decimal(0)
|
detail.price = Decimal(0)
|
||||||
|
detail.infos = NotAvailable
|
||||||
|
detail.label = unicode(line, encoding='utf-8')
|
||||||
|
elif '€' in line:
|
||||||
|
detail.price = Decimal(line.replace('€', ''))
|
||||||
|
else:
|
||||||
|
detail.infos = unicode(line, encoding='utf-8')
|
||||||
details.append(detail)
|
details.append(detail)
|
||||||
return details
|
return details
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue