Convert freemobile to browser2

2014-04-03 13:08:59 +02:00 · 2014-04-03 13:08:59 +02:00 · 25725f79cf
commit 25725f79cf
parent d85675918a
7 changed files with 143 additions and 257 deletions
--- a/modules/freemobile/pages/history.py
+++ b/modules/freemobile/pages/history.py
@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-

-# Copyright(C) 2012 Florent Fourcot
+# Copyright(C) 2012-2014 Florent Fourcot
 #
 # This file is part of weboob.
 #
@ -18,125 +18,115 @@
 # along with weboob. If not, see <http://www.gnu.org/licenses/>.


-import re
 import calendar
-from datetime import datetime, date, time
+from StringIO import StringIO
+import lxml.html as html
+from datetime import datetime
 from decimal import Decimal

-from weboob.tools.browser import BasePage
+from weboob.tools.browser2.page import HTMLPage, method, ItemElement, ListElement, LoggedPage
+from weboob.tools.browser2.filters import Date, CleanText, Attr, Filter, CleanDecimal, Regexp, Field, DateTime, Format
 from weboob.capabilities.bill import Detail, Bill


-__all__ = ['HistoryPage', 'DetailsPage']
+__all__ = ['HistoryPage', 'DetailsPage', 'BadUTF8Page']


-def convert_price(div):
-    try:
-        price = div.find('div[@class="horsForfait"]/p/span').text
-        price = price.encode('utf-8', 'replace').replace('€', '').replace(',', '.')
-        return Decimal(price)
-    except:
-        return Decimal(0)
+class FormatDate(Filter):
+    def filter(self, txt):
+        return datetime.strptime(txt, "%Y%m%d").date()


-class DetailsPage(BasePage):
+class BadUTF8Page(HTMLPage):
+    def __init__(self, browser, response, *args, **kwargs):
+        super(HTMLPage, self).__init__(browser, response, *args, **kwargs)
+        parser = html.HTMLParser(encoding='UTF-8')
+        self.doc = html.parse(StringIO(response.content), parser)

-    def on_loaded(self):
+
+class DetailsPage(LoggedPage, BadUTF8Page):
+    def on_load(self):
        self.details = {}
-        self.datebills = {}
-        for div in self.document.xpath('//div[@class="infosLigne pointer"]'):
-            phonenumber = div.text
+        for div in self.doc.xpath('//div[@class="infosLigne pointer"]'):
+            phonenumber = CleanText('.')(div)
            phonenumber = phonenumber.split("-")[-1].strip()
            virtualnumber = div.attrib['onclick'].split('(')[1][1]
            self.details['num' + str(phonenumber)] = virtualnumber

-        for div in self.document.xpath('//div[@class="infosConso"]'):
+        for div in self.doc.xpath('//div[@class="infosConso"]'):
            num = div.attrib['id'].split('_')[1][0]
            self.details[num] = []

            # National parsing
            divnat = div.xpath('div[@class="national"]')[0]
-            self.parse_div(divnat, "National : %s | International : %s", num, False)
+            self._parse_div(divnat, "National : %s | International : %s", num, False)

            # International parsing
            divint = div.xpath('div[@class="international hide"]')[0]
            if divint.xpath('div[@class="detail"]'):
-                self.parse_div(divint, u"Appels émis : %s | Appels reçus : %s", num, True)
+                self._parse_div(divint, u"Appels émis : %s | Appels reçus : %s", num, True)

-        for divbills in self.document.xpath('//div[@id="factContainer"]'):
-            for divbill in divbills.xpath('.//div[@class="factLigne hide "]'):
-                alink = divbill.xpath('.//div[@class="pdf"]/a')[0]
-                localid = re.search('&l=(?P<id>\d*)&id',
-                        alink.attrib.get('href')).group('id')
-                mydate_str = re.search('&date=(?P<date>\d*)$',
-                        alink.attrib.get('href')).group('date')
-                mydate = datetime.strptime(mydate_str, "%Y%m%d").date()
-
-                bill = Bill()
-                bill.label = unicode(mydate_str)
-                bill.id = unicode(mydate_str)
-                bill.date = mydate
-                bill.format = u"pdf"
-                bill._url = alink.attrib.get('href')
-                if "pdfrecap" in alink.attrib.get('href'):
-                    bill.id = "recap-" + bill.id
-                if localid not in self.datebills:
-                    self.datebills[localid] = []
-                self.datebills[localid].append(bill)
-
-    def parse_div(self, divglobal, string, num, inter=False):
+    def _parse_div(self, divglobal, string, num, inter=False):
        divs = divglobal.xpath('div[@class="detail"]')
        # Two informations in one div...
        div = divs.pop(0)
-        voice = self.parse_voice(div, string, num, inter)
+        voice = self._parse_voice(div, string, num, inter)
        self.details[num].append(voice)
-        self.iter_divs(divs, num, inter)
+        self._iter_divs(divs, num, inter)

-    def iter_divs(self, divs, num, inter=False):
+    def _iter_divs(self, divs, num, inter=False):
        for div in divs:
            detail = Detail()
-
-            detail.label = unicode(div.find('div[@class="titre"]/p').text_content())
+            detail.label = CleanText('div[@class="titre"]/p')(div)
            detail.id = "-" + detail.label.split(' ')[1].lower()
            if inter:
                detail.label = detail.label + u" (international)"
                detail.id = detail.id + "-inter"
-            detail.infos = unicode(div.find('div[@class="conso"]/p').text_content().lstrip())
-            detail.price = convert_price(div)
+            detail.infos = CleanText('div[@class="conso"]/p')(div)
+            detail.price = CleanDecimal('div[@class="horsForfait"]/p/span', default=Decimal(0))(div)

            self.details[num].append(detail)

-    def parse_voice(self, div, string, num, inter=False):
+    def _parse_voice(self, div, string, num, inter=False):
+        voicediv = div.xpath('div[@class="conso"]')[0]
        voice = Detail()
        voice.id = "-voice"
-        voicediv = div.xpath('div[@class="conso"]')[0]
-        voice.label = unicode(div.find('div[@class="titre"]/p').text_content())
+        voice.label = CleanText('div[@class="titre"]/p')(div)
        if inter:
            voice.label = voice.label + " (international)"
            voice.id = voice.id + "-inter"
-        voice.price = convert_price(div)
-        voice1 = voicediv.xpath('.//span[@class="actif"]')[0].text
-        voice2 = voicediv.xpath('.//span[@class="actif"]')[1].text
+        voice.price = CleanDecimal('div[@class="horsForfait"]/p/span', default=0)(div)
+        voice1 = CleanText('.//span[@class="actif"][1]')(voicediv)
+        voice2 = CleanText('.//span[@class="actif"][2]')(voicediv)
        voice.infos = unicode(string) % (voice1, voice2)

        return voice

+    # XXX
    def get_details(self, subscription):
        num = self.details['num' + subscription.id]
        for detail in self.details[num]:
            detail.id = subscription.id + detail.id
            yield detail

-    def date_bills(self, subscription):
-        for bill in self.datebills[subscription._login]:
-            bill.id = subscription.id + '.' + bill.id
-            yield bill
+    @method
+    class date_bills(ListElement):
+        item_xpath = '//div[@class="factLigne hide "]'
+
+        class item(ItemElement):
+            klass = Bill
+
+            obj__url = Attr('.//div[@class="pdf"]/a', 'href')
+            obj__localid = Regexp(Field('_url'), '&l=(\d*)&id', u'\\1')
+            obj_label = Regexp(Field('_url'), '&date=(\d*)$', u'\\1')
+            obj_id = Field('label')
+            obj_date = FormatDate(Field('id'))
+            obj_format = u"pdf"
+            obj_price = CleanDecimal('div[@class="montant"]', default=Decimal(0), replace_dots=False)

    def get_renew_date(self, subscription):
-        login = subscription._login
-        div = self.document.xpath('//div[@login="%s"]' % login)[0]
-        mydate = div.xpath('.//span[@class="actif"]')[0].text
-        mydate = date(*reversed([int(x) for x in mydate.split("/")]))
+        div = self.doc.xpath('//div[@login="%s"]' % subscription._login)[0]
+        mydate = Date(CleanText('//div[@class="resumeConso"]/span[@class="actif"][1]'), dayfirst=True)(div)
        if mydate.month == 12:
            mydate = mydate.replace(month=1)
            mydate = mydate.replace(year=mydate.year + 1)
@ -149,30 +139,19 @@ class DetailsPage(BasePage):
        return mydate


-def _get_date(detail):
-    return detail.datetime
+class HistoryPage(LoggedPage, BadUTF8Page):
+    @method
+    class get_calls(ListElement):
+        item_xpath = '//tr'

+        class item(ItemElement):
+            klass = Detail

-class HistoryPage(BasePage):
+            def condition(self):
+                txt = self.el.xpath('td[1]')[0].text
+                return (txt is not None) and (txt != "Date")

-    def on_loaded(self):
-        self.calls = []
-        for tr in self.document.xpath('//tr'):
-            tds = tr.xpath('td')
-            if tds[0].text is None or tds[0].text == "Date":
-                pass
-            else:
-                detail = Detail()
-                mydate = date(*reversed([int(x) for x in tds[0].text.split(' ')[0].split("/")]))
-                mytime = time(*[int(x) for x in tds[0].text.split(' ')[2].split(":")])
-                detail.datetime = datetime.combine(mydate, mytime)
-                detail.label = u' '.join([unicode(td.text.strip()) for td in tds[1:4] if td.text is not None])
-                try:
-                    detail.price = Decimal(tds[4].text[0:4].replace(',', '.'))
-                except:
-                    detail.price = Decimal(0)
-
-                self.calls.append(detail)
-
-    def get_calls(self):
-        return sorted(self.calls, key=_get_date, reverse=True)
+            obj_datetime = DateTime(CleanText('td[1]'), dayfirst=True)
+            obj_label = Format(u'%s %s %s %s', CleanText('td[2]'), CleanText('td[3]'),
+                               CleanText('td[4]'), CleanText('td[5]'))
+            obj_price = CleanDecimal('td[5]', default=Decimal(0))