Convert freemobile to browser2

2014-04-03 13:08:59 +02:00 · 2014-04-03 13:08:59 +02:00 · 25725f79cf
commit 25725f79cf
parent d85675918a
7 changed files with 143 additions and 257 deletions
--- a/modules/freemobile/pages/history.py
+++ b/modules/freemobile/pages/history.py
@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-

-# Copyright(C) 2012 Florent Fourcot
+# Copyright(C) 2012-2014 Florent Fourcot
 #
 # This file is part of weboob.
 #
@ -18,125 +18,115 @@
 # along with weboob. If not, see <http://www.gnu.org/licenses/>.


-import re
 import calendar
-from datetime import datetime, date, time
+from StringIO import StringIO
+import lxml.html as html
+from datetime import datetime
 from decimal import Decimal

-from weboob.tools.browser import BasePage
+from weboob.tools.browser2.page import HTMLPage, method, ItemElement, ListElement, LoggedPage
+from weboob.tools.browser2.filters import Date, CleanText, Attr, Filter, CleanDecimal, Regexp, Field, DateTime, Format
 from weboob.capabilities.bill import Detail, Bill


-__all__ = ['HistoryPage', 'DetailsPage']
+__all__ = ['HistoryPage', 'DetailsPage', 'BadUTF8Page']


-def convert_price(div):
-    try:
-        price = div.find('div[@class="horsForfait"]/p/span').text
-        price = price.encode('utf-8', 'replace').replace('€', '').replace(',', '.')
-        return Decimal(price)
-    except:
-        return Decimal(0)
+class FormatDate(Filter):
+    def filter(self, txt):
+        return datetime.strptime(txt, "%Y%m%d").date()


-class DetailsPage(BasePage):
+class BadUTF8Page(HTMLPage):
+    def __init__(self, browser, response, *args, **kwargs):
+        super(HTMLPage, self).__init__(browser, response, *args, **kwargs)
+        parser = html.HTMLParser(encoding='UTF-8')
+        self.doc = html.parse(StringIO(response.content), parser)

-    def on_loaded(self):
+
+class DetailsPage(LoggedPage, BadUTF8Page):
+    def on_load(self):
        self.details = {}
-        self.datebills = {}
-        for div in self.document.xpath('//div[@class="infosLigne pointer"]'):
-            phonenumber = div.text
+        for div in self.doc.xpath('//div[@class="infosLigne pointer"]'):
+            phonenumber = CleanText('.')(div)
            phonenumber = phonenumber.split("-")[-1].strip()
            virtualnumber = div.attrib['onclick'].split('(')[1][1]
            self.details['num' + str(phonenumber)] = virtualnumber

-        for div in self.document.xpath('//div[@class="infosConso"]'):
+        for div in self.doc.xpath('//div[@class="infosConso"]'):
            num = div.attrib['id'].split('_')[1][0]
            self.details[num] = []

            # National parsing
            divnat = div.xpath('div[@class="national"]')[0]
-            self.parse_div(divnat, "National : %s | International : %s", num, False)
+            self._parse_div(divnat, "National : %s | International : %s", num, False)

            # International parsing
            divint = div.xpath('div[@class="international hide"]')[0]
            if divint.xpath('div[@class="detail"]'):
-                self.parse_div(divint, u"Appels émis : %s | Appels reçus : %s", num, True)
+                self._parse_div(divint, u"Appels émis : %s | Appels reçus : %s", num, True)

-        for divbills in self.document.xpath('//div[@id="factContainer"]'):
-            for divbill in divbills.xpath('.//div[@class="factLigne hide "]'):
-                alink = divbill.xpath('.//div[@class="pdf"]/a')[0]
-                localid = re.search('&l=(?P<id>\d*)&id',
-                        alink.attrib.get('href')).group('id')
-                mydate_str = re.search('&date=(?P<date>\d*)$',
-                        alink.attrib.get('href')).group('date')
-                mydate = datetime.strptime(mydate_str, "%Y%m%d").date()
-
-                bill = Bill()
-                bill.label = unicode(mydate_str)
-                bill.id = unicode(mydate_str)
-                bill.date = mydate
-                bill.format = u"pdf"
-                bill._url = alink.attrib.get('href')
-                if "pdfrecap" in alink.attrib.get('href'):
-                    bill.id = "recap-" + bill.id
-                if localid not in self.datebills:
-                    self.datebills[localid] = []
-                self.datebills[localid].append(bill)
-
-    def parse_div(self, divglobal, string, num, inter=False):
+    def _parse_div(self, divglobal, string, num, inter=False):
        divs = divglobal.xpath('div[@class="detail"]')
        # Two informations in one div...
        div = divs.pop(0)
-        voice = self.parse_voice(div, string, num, inter)
+        voice = self._parse_voice(div, string, num, inter)
        self.details[num].append(voice)
-        self.iter_divs(divs, num, inter)
+        self._iter_divs(divs, num, inter)

-    def iter_divs(self, divs, num, inter=False):
+    def _iter_divs(self, divs, num, inter=False):
        for div in divs:
            detail = Detail()
-
-            detail.label = unicode(div.find('div[@class="titre"]/p').text_content())
+            detail.label = CleanText('div[@class="titre"]/p')(div)
            detail.id = "-" + detail.label.split(' ')[1].lower()
            if inter:
                detail.label = detail.label + u" (international)"
                detail.id = detail.id + "-inter"
-            detail.infos = unicode(div.find('div[@class="conso"]/p').text_content().lstrip())
-            detail.price = convert_price(div)
+            detail.infos = CleanText('div[@class="conso"]/p')(div)
+            detail.price = CleanDecimal('div[@class="horsForfait"]/p/span', default=Decimal(0))(div)

            self.details[num].append(detail)

-    def parse_voice(self, div, string, num, inter=False):
+    def _parse_voice(self, div, string, num, inter=False):
+        voicediv = div.xpath('div[@class="conso"]')[0]
        voice = Detail()
        voice.id = "-voice"
-        voicediv = div.xpath('div[@class="conso"]')[0]
-        voice.label = unicode(div.find('div[@class="titre"]/p').text_content())
+        voice.label = CleanText('div[@class="titre"]/p')(div)
        if inter:
            voice.label = voice.label + " (international)"
            voice.id = voice.id + "-inter"
-        voice.price = convert_price(div)
-        voice1 = voicediv.xpath('.//span[@class="actif"]')[0].text
-        voice2 = voicediv.xpath('.//span[@class="actif"]')[1].text
+        voice.price = CleanDecimal('div[@class="horsForfait"]/p/span', default=0)(div)
+        voice1 = CleanText('.//span[@class="actif"][1]')(voicediv)
+        voice2 = CleanText('.//span[@class="actif"][2]')(voicediv)
        voice.infos = unicode(string) % (voice1, voice2)

        return voice

+    # XXX
    def get_details(self, subscription):
        num = self.details['num' + subscription.id]
        for detail in self.details[num]:
            detail.id = subscription.id + detail.id
            yield detail

-    def date_bills(self, subscription):
-        for bill in self.datebills[subscription._login]:
-            bill.id = subscription.id + '.' + bill.id
-            yield bill
+    @method
+    class date_bills(ListElement):
+        item_xpath = '//div[@class="factLigne hide "]'
+
+        class item(ItemElement):
+            klass = Bill
+
+            obj__url = Attr('.//div[@class="pdf"]/a', 'href')
+            obj__localid = Regexp(Field('_url'), '&l=(\d*)&id', u'\\1')
+            obj_label = Regexp(Field('_url'), '&date=(\d*)$', u'\\1')
+            obj_id = Field('label')
+            obj_date = FormatDate(Field('id'))
+            obj_format = u"pdf"
+            obj_price = CleanDecimal('div[@class="montant"]', default=Decimal(0), replace_dots=False)

    def get_renew_date(self, subscription):
-        login = subscription._login
-        div = self.document.xpath('//div[@login="%s"]' % login)[0]
-        mydate = div.xpath('.//span[@class="actif"]')[0].text
-        mydate = date(*reversed([int(x) for x in mydate.split("/")]))
+        div = self.doc.xpath('//div[@login="%s"]' % subscription._login)[0]
+        mydate = Date(CleanText('//div[@class="resumeConso"]/span[@class="actif"][1]'), dayfirst=True)(div)
        if mydate.month == 12:
            mydate = mydate.replace(month=1)
            mydate = mydate.replace(year=mydate.year + 1)
@ -149,30 +139,19 @@ class DetailsPage(BasePage):
        return mydate


-def _get_date(detail):
-    return detail.datetime
+class HistoryPage(LoggedPage, BadUTF8Page):
+    @method
+    class get_calls(ListElement):
+        item_xpath = '//tr'

+        class item(ItemElement):
+            klass = Detail

-class HistoryPage(BasePage):
+            def condition(self):
+                txt = self.el.xpath('td[1]')[0].text
+                return (txt is not None) and (txt != "Date")

-    def on_loaded(self):
-        self.calls = []
-        for tr in self.document.xpath('//tr'):
-            tds = tr.xpath('td')
-            if tds[0].text is None or tds[0].text == "Date":
-                pass
-            else:
-                detail = Detail()
-                mydate = date(*reversed([int(x) for x in tds[0].text.split(' ')[0].split("/")]))
-                mytime = time(*[int(x) for x in tds[0].text.split(' ')[2].split(":")])
-                detail.datetime = datetime.combine(mydate, mytime)
-                detail.label = u' '.join([unicode(td.text.strip()) for td in tds[1:4] if td.text is not None])
-                try:
-                    detail.price = Decimal(tds[4].text[0:4].replace(',', '.'))
-                except:
-                    detail.price = Decimal(0)
-
-                self.calls.append(detail)
-
-    def get_calls(self):
-        return sorted(self.calls, key=_get_date, reverse=True)
+            obj_datetime = DateTime(CleanText('td[1]'), dayfirst=True)
+            obj_label = Format(u'%s %s %s %s', CleanText('td[2]'), CleanText('td[3]'),
+                               CleanText('td[4]'), CleanText('td[5]'))
+            obj_price = CleanDecimal('td[5]', default=Decimal(0))
--- a/modules/freemobile/pages/homepage.py
+++ b/modules/freemobile/pages/homepage.py
@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-

-# Copyright(C) 2012  Florent Fourcot
+# Copyright(C) 2012-2014  Florent Fourcot
 #
 # This file is part of weboob.
 #
@ -17,35 +17,28 @@
 # You should have received a copy of the GNU Affero General Public License
 # along with weboob. If not, see <http://www.gnu.org/licenses/>.

+from .history import BadUTF8Page
 from weboob.capabilities.bill import Subscription
-from weboob.tools.browser import BasePage
-
+from weboob.tools.browser2.page import method, ListElement, ItemElement
+from weboob.tools.browser2.filters import CleanText, Attr, Field, Format, Filter

 __all__ = ['HomePage']


-class HomePage(BasePage):
-    def on_loaded(self):
-        pass
+class GetID(Filter):
+    def filter(self, txt):
+        return txt.split('=')[-1]

-    def get_list(self):
-        for divglobal in self.document.xpath('//div[@class="abonne"]'):
-            for link in divglobal.xpath('.//div[@class="acceuil_btn"]/a'):
-                login = link.attrib['href'].split('=').pop()
-                if login.isdigit():
-                    break
-            divabo = divglobal.xpath('div[@class="idAbonne pointer"]')[0]
-            owner = unicode(divabo.xpath('p')[0].text.replace(' - ', ''))
-            phone = unicode(divabo.xpath('p/span')[0].text)
-            self.browser.logger.debug('Found ' + login + ' as subscription identifier')
-            self.browser.logger.debug('Found ' + owner + ' as subscriber')
-            self.browser.logger.debug('Found ' + phone + ' as phone number')
-            phoneplan = unicode(self.document.xpath('//div[@class="forfaitChoisi"]')[0].text.lstrip().rstrip())
-            self.browser.logger.debug('Found ' + phoneplan + ' as subscription type')

-            subscription = Subscription(phone)
-            subscription.label = phone + ' - ' + phoneplan
-            subscription.subscriber = owner
-            subscription._login = login
+class HomePage(BadUTF8Page):
+    @method
+    class get_list(ListElement):
+        item_xpath = '//div[@class="abonne"]'

-            yield subscription
+        class item(ItemElement):
+            klass = Subscription
+
+            obj_subscriber = CleanText('div[@class="idAbonne pointer"]/p[1]', symbols='-', childs=False)
+            obj_id = CleanText('div[@class="idAbonne pointer"]/p/span')
+            obj__login = GetID(Attr('.//div[@class="acceuil_btn"]/a', 'href'))
+            obj_label = Format(u'%s - %s', Field('id'), CleanText('//div[@class="forfaitChoisi"]'))
--- a/modules/freemobile/pages/login.py
+++ b/modules/freemobile/pages/login.py
@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-

-# Copyright(C) 2012  Florent Fourcot
+# Copyright(C) 2012-2014  Florent Fourcot
 #
 # This file is part of weboob.
 #
@ -19,19 +19,15 @@


 import time
+from StringIO import StringIO
+from PIL import Image

-try:
-    from PIL import Image
-except ImportError:
-    raise ImportError('Please install python-imaging')
-
-from weboob.tools.browser import BasePage
+from weboob.tools.browser2.page import HTMLPage

 __all__ = ['LoginPage']


 class FreeKeyboard(object):
-    DEBUG = False
    symbols = {'0': '001111111111110011111111111111111111111111111110000000000011110000000000011111111111111111011111111111111001111111111110',
               '1': '001110000000000001110000000000001110000000000011111111111111111111111111111111111111111111000000000000000000000000000000',
               '2': '011110000001111011110000111111111000001111111110000011110011110000111100011111111111000011011111110000011001111000000011',
@ -42,16 +38,16 @@ class FreeKeyboard(object):
               '7': '111000000000000111000000000000111000000011111111000011111111111011111111111111111111000000111111000000000111100000000000',
               '8': '001110001111110011111111111111111111111111111110000110000011110000110000011111111111111111011111111111111001111001111110',
               '9': '001111111000110011111111100111111111111100111110000001100011110000001100011111111111111111011111111111111001111111111110'
-              }
+               }

    def __init__(self, basepage):
        self.basepage = basepage
        self.fingerprints = []
-        for htmlimg in basepage.document.xpath('//img[@class="ident_chiffre_img pointer"]'):
+        for htmlimg in self.basepage.doc.xpath('//img[@class="ident_chiffre_img pointer"]'):
            url = htmlimg.attrib.get("src")
-            fichier = basepage.browser.openurl(url)
-            image = Image.open(fichier)
-            matrix = image.load()
+            imgfile = StringIO(basepage.browser.open(url).content)
+            img = Image.open(imgfile)
+            matrix = img.load()
            s = ""
            # The digit is only displayed in the center of image
            for x in range(15, 23):
@ -64,38 +60,28 @@ class FreeKeyboard(object):
                        s += "0"

            self.fingerprints.append(s)
-            if self.DEBUG:
-                image.save('/tmp/' + s + '.png')

    def get_symbol_code(self, digit):
        fingerprint = self.symbols[digit]
-        i = 0
-        for string in self.fingerprints:
+        for i, string in enumerate(self.fingerprints):
            if string == fingerprint:
                return i
-            i += 1
        # Image contains some noise, and the match is not always perfect
        # (this is why we can't use md5 hashs)
        # But if we can't find the perfect one, we can take the best one
-        i = 0
        best = 0
        result = None
-        for string in self.fingerprints:
-            j = 0
+        for i, string in enumerate(self.fingerprints):
            match = 0
-            for bit in string:
+            for j, bit in enumerate(string):
                if bit == fingerprint[j]:
                    match += 1
-                j += 1
            if match > best:
                best = match
                result = i
-            i += 1
        self.basepage.browser.logger.debug(self.fingerprints[result] + " match " + digit)
        return result

-        # TODO : exception
-
    def get_string_code(self, string):
        code = ''
        for c in string:
@ -107,21 +93,16 @@ class FreeKeyboard(object):
        for c in string:
            time.sleep(0.5)
            url = 'https://mobile.free.fr/moncompte/chiffre.php?pos=' + c + '&small=1'
-            self.basepage.browser.openurl(url)
+            self.basepage.browser.open(url)


-class LoginPage(BasePage):
-    def on_loaded(self):
-        pass
-
+class LoginPage(HTMLPage):
    def login(self, login, password):
        vk = FreeKeyboard(self)
-
-        # Fucking form without name...
-        self.browser.select_form(nr=0)
-        self.browser.set_all_readonly(False)
        code = vk.get_string_code(login)
-        self.browser['login_abo'] = code.encode('utf-8')
-        vk.get_small(code)
-        self.browser['pwd_abo'] = password.encode('utf-8')
-        self.browser.submit(nologin=True)
+        vk.get_small(code)  # If img are not downloaded, the server do not accept the login
+
+        form = self.get_form(xpath='//form[@id="form_connect"]')
+        form['login_abo'] = code
+        form['pwd_abo'] = password
+        form.submit()