Add details command

2012-08-29 13:59:52 +02:00 · 2012-08-29 13:59:52 +02:00 · 8fa430e61f
commit 8fa430e61f
parent d9ed7c2ed2
2 changed files with 44 additions and 10 deletions
--- a/modules/leclercmobile/browser.py
+++ b/modules/leclercmobile/browser.py
@ -112,7 +112,10 @@ class Leclercmobile(BaseBrowser):
    def get_details(self):
        if not self.is_on_page(HistoryPage):
            self.location(self.conso)
-        return self.page.get_details()
+        response = self.openurl('/EspaceClient/pgeWERL015_RecupReleveConso.aspx?m=-0')
+        pdf = PdfPage(StringIO.StringIO(response.read()))
+        for detail in pdf.get_details():
+            yield detail

    def iter_bills(self, parentid):
        if not self.is_on_page(HistoryPage):
--- a/modules/leclercmobile/pages/history.py
+++ b/modules/leclercmobile/pages/history.py
@ -17,7 +17,7 @@
 # You should have received a copy of the GNU Affero General Public License
 # along with weboob. If not, see <http://www.gnu.org/licenses/>.

-
+import os
 import subprocess
 import tempfile
 import shutil
@ -40,14 +40,7 @@ class PdfPage():
    def __init__(self, file):
        self.pdf = file

-    # Standard pdf text extractor take text line by line
-    # But the position in the file is not always the "real" position to display...
-    # It produce some unsorted and unparsable data
-    # Example of bad software: pdfminer and others python tools
-    # This is why we have to use "ebook-convert" from calibre software,
-    # it is the only one to 'reflow" text and give some relevant results
-    # The bad new is that ebook-convert doesn't support simple use with stdin/stdout
-    def get_calls(self):
+    def _parse_pdf(self):
        pdffile = tempfile.NamedTemporaryFile(bufsize=100000, mode='w', suffix='.pdf')
        temptxt = pdffile.name.replace('.pdf', '.txt')
        cmd = "ebook-convert"
@ -58,6 +51,44 @@ class PdfPage():
        pdffile.close()
        txtfile = open(temptxt, 'r')
        txt = txtfile.read()
+        txtfile.close()
+        os.remove(temptxt)
+        return txt
+
+
+    def get_details(self):
+        txt = self._parse_pdf()
+        page = txt.split('CONSOMMATION MENSUELLE')[1].split('ACTIVITE DETAILLEE')[0]
+        lines = page.split('\n')
+        lines = [x for x in lines if len(x) > 0]  # Remove empty lines
+        numitems = (len(lines) + 1) / 3  # Each line has three columns
+        lines.insert(len(lines), '')  # Add an empty column for "Prélèvement mensuel
+        lines.pop(0)
+        details = []
+        for i in range(numitems):
+            nature = i * 3
+            conso = nature + 1
+            price = conso + 1
+
+            detail = Detail()
+            detail.label = unicode(lines[nature], encoding='utf-8')
+            detail.infos = unicode(lines[conso], encoding='utf-8')
+            try:
+                detail.price = Decimal(lines[price].replace('€', ''))
+            except:
+                detail.price = Decimal(0)
+            details.append(detail)
+        return details
+
+    # Standard pdf text extractor take text line by line
+    # But the position in the file is not always the "real" position to display...
+    # It produce some unsorted and unparsable data
+    # Example of bad software: pdfminer and others python tools
+    # This is why we have to use "ebook-convert" from calibre software,
+    # it is the only one to 'reflow" text and give some relevant results
+    # The bad new is that ebook-convert doesn't support simple use with stdin/stdout
+    def get_calls(self):
+        txt = self._parse_pdf()
        pages = txt.split("DEBIT (€)")
        pages.pop(0)  # remove headers
        details = []