Add details command
This commit is contained in:
parent
d9ed7c2ed2
commit
8fa430e61f
2 changed files with 44 additions and 10 deletions
|
|
@ -112,7 +112,10 @@ class Leclercmobile(BaseBrowser):
|
||||||
def get_details(self):
|
def get_details(self):
|
||||||
if not self.is_on_page(HistoryPage):
|
if not self.is_on_page(HistoryPage):
|
||||||
self.location(self.conso)
|
self.location(self.conso)
|
||||||
return self.page.get_details()
|
response = self.openurl('/EspaceClient/pgeWERL015_RecupReleveConso.aspx?m=-0')
|
||||||
|
pdf = PdfPage(StringIO.StringIO(response.read()))
|
||||||
|
for detail in pdf.get_details():
|
||||||
|
yield detail
|
||||||
|
|
||||||
def iter_bills(self, parentid):
|
def iter_bills(self, parentid):
|
||||||
if not self.is_on_page(HistoryPage):
|
if not self.is_on_page(HistoryPage):
|
||||||
|
|
|
||||||
|
|
@ -17,7 +17,7 @@
|
||||||
# You should have received a copy of the GNU Affero General Public License
|
# You should have received a copy of the GNU Affero General Public License
|
||||||
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
import os
|
||||||
import subprocess
|
import subprocess
|
||||||
import tempfile
|
import tempfile
|
||||||
import shutil
|
import shutil
|
||||||
|
|
@ -40,14 +40,7 @@ class PdfPage():
|
||||||
def __init__(self, file):
|
def __init__(self, file):
|
||||||
self.pdf = file
|
self.pdf = file
|
||||||
|
|
||||||
# Standard pdf text extractor take text line by line
|
def _parse_pdf(self):
|
||||||
# But the position in the file is not always the "real" position to display...
|
|
||||||
# It produce some unsorted and unparsable data
|
|
||||||
# Example of bad software: pdfminer and others python tools
|
|
||||||
# This is why we have to use "ebook-convert" from calibre software,
|
|
||||||
# it is the only one to 'reflow" text and give some relevant results
|
|
||||||
# The bad new is that ebook-convert doesn't support simple use with stdin/stdout
|
|
||||||
def get_calls(self):
|
|
||||||
pdffile = tempfile.NamedTemporaryFile(bufsize=100000, mode='w', suffix='.pdf')
|
pdffile = tempfile.NamedTemporaryFile(bufsize=100000, mode='w', suffix='.pdf')
|
||||||
temptxt = pdffile.name.replace('.pdf', '.txt')
|
temptxt = pdffile.name.replace('.pdf', '.txt')
|
||||||
cmd = "ebook-convert"
|
cmd = "ebook-convert"
|
||||||
|
|
@ -58,6 +51,44 @@ class PdfPage():
|
||||||
pdffile.close()
|
pdffile.close()
|
||||||
txtfile = open(temptxt, 'r')
|
txtfile = open(temptxt, 'r')
|
||||||
txt = txtfile.read()
|
txt = txtfile.read()
|
||||||
|
txtfile.close()
|
||||||
|
os.remove(temptxt)
|
||||||
|
return txt
|
||||||
|
|
||||||
|
|
||||||
|
def get_details(self):
|
||||||
|
txt = self._parse_pdf()
|
||||||
|
page = txt.split('CONSOMMATION MENSUELLE')[1].split('ACTIVITE DETAILLEE')[0]
|
||||||
|
lines = page.split('\n')
|
||||||
|
lines = [x for x in lines if len(x) > 0] # Remove empty lines
|
||||||
|
numitems = (len(lines) + 1) / 3 # Each line has three columns
|
||||||
|
lines.insert(len(lines), '') # Add an empty column for "Prélèvement mensuel
|
||||||
|
lines.pop(0)
|
||||||
|
details = []
|
||||||
|
for i in range(numitems):
|
||||||
|
nature = i * 3
|
||||||
|
conso = nature + 1
|
||||||
|
price = conso + 1
|
||||||
|
|
||||||
|
detail = Detail()
|
||||||
|
detail.label = unicode(lines[nature], encoding='utf-8')
|
||||||
|
detail.infos = unicode(lines[conso], encoding='utf-8')
|
||||||
|
try:
|
||||||
|
detail.price = Decimal(lines[price].replace('€', ''))
|
||||||
|
except:
|
||||||
|
detail.price = Decimal(0)
|
||||||
|
details.append(detail)
|
||||||
|
return details
|
||||||
|
|
||||||
|
# Standard pdf text extractor take text line by line
|
||||||
|
# But the position in the file is not always the "real" position to display...
|
||||||
|
# It produce some unsorted and unparsable data
|
||||||
|
# Example of bad software: pdfminer and others python tools
|
||||||
|
# This is why we have to use "ebook-convert" from calibre software,
|
||||||
|
# it is the only one to 'reflow" text and give some relevant results
|
||||||
|
# The bad new is that ebook-convert doesn't support simple use with stdin/stdout
|
||||||
|
def get_calls(self):
|
||||||
|
txt = self._parse_pdf()
|
||||||
pages = txt.split("DEBIT (€)")
|
pages = txt.split("DEBIT (€)")
|
||||||
pages.pop(0) # remove headers
|
pages.pop(0) # remove headers
|
||||||
details = []
|
details = []
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue