Convert freemobile to browser2

This commit is contained in:
Florent 2014-04-03 13:08:59 +02:00
commit 25725f79cf
7 changed files with 143 additions and 257 deletions

View file

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2012 Florent Fourcot
# Copyright(C) 2012-2014 Florent Fourcot
#
# This file is part of weboob.
#
@ -18,125 +18,115 @@
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
import re
import calendar
from datetime import datetime, date, time
from StringIO import StringIO
import lxml.html as html
from datetime import datetime
from decimal import Decimal
from weboob.tools.browser import BasePage
from weboob.tools.browser2.page import HTMLPage, method, ItemElement, ListElement, LoggedPage
from weboob.tools.browser2.filters import Date, CleanText, Attr, Filter, CleanDecimal, Regexp, Field, DateTime, Format
from weboob.capabilities.bill import Detail, Bill
__all__ = ['HistoryPage', 'DetailsPage']
__all__ = ['HistoryPage', 'DetailsPage', 'BadUTF8Page']
def convert_price(div):
try:
price = div.find('div[@class="horsForfait"]/p/span').text
price = price.encode('utf-8', 'replace').replace('', '').replace(',', '.')
return Decimal(price)
except:
return Decimal(0)
class FormatDate(Filter):
def filter(self, txt):
return datetime.strptime(txt, "%Y%m%d").date()
class DetailsPage(BasePage):
class BadUTF8Page(HTMLPage):
def __init__(self, browser, response, *args, **kwargs):
super(HTMLPage, self).__init__(browser, response, *args, **kwargs)
parser = html.HTMLParser(encoding='UTF-8')
self.doc = html.parse(StringIO(response.content), parser)
def on_loaded(self):
class DetailsPage(LoggedPage, BadUTF8Page):
def on_load(self):
self.details = {}
self.datebills = {}
for div in self.document.xpath('//div[@class="infosLigne pointer"]'):
phonenumber = div.text
for div in self.doc.xpath('//div[@class="infosLigne pointer"]'):
phonenumber = CleanText('.')(div)
phonenumber = phonenumber.split("-")[-1].strip()
virtualnumber = div.attrib['onclick'].split('(')[1][1]
self.details['num' + str(phonenumber)] = virtualnumber
for div in self.document.xpath('//div[@class="infosConso"]'):
for div in self.doc.xpath('//div[@class="infosConso"]'):
num = div.attrib['id'].split('_')[1][0]
self.details[num] = []
# National parsing
divnat = div.xpath('div[@class="national"]')[0]
self.parse_div(divnat, "National : %s | International : %s", num, False)
self._parse_div(divnat, "National : %s | International : %s", num, False)
# International parsing
divint = div.xpath('div[@class="international hide"]')[0]
if divint.xpath('div[@class="detail"]'):
self.parse_div(divint, u"Appels émis : %s | Appels reçus : %s", num, True)
self._parse_div(divint, u"Appels émis : %s | Appels reçus : %s", num, True)
for divbills in self.document.xpath('//div[@id="factContainer"]'):
for divbill in divbills.xpath('.//div[@class="factLigne hide "]'):
alink = divbill.xpath('.//div[@class="pdf"]/a')[0]
localid = re.search('&l=(?P<id>\d*)&id',
alink.attrib.get('href')).group('id')
mydate_str = re.search('&date=(?P<date>\d*)$',
alink.attrib.get('href')).group('date')
mydate = datetime.strptime(mydate_str, "%Y%m%d").date()
bill = Bill()
bill.label = unicode(mydate_str)
bill.id = unicode(mydate_str)
bill.date = mydate
bill.format = u"pdf"
bill._url = alink.attrib.get('href')
if "pdfrecap" in alink.attrib.get('href'):
bill.id = "recap-" + bill.id
if localid not in self.datebills:
self.datebills[localid] = []
self.datebills[localid].append(bill)
def parse_div(self, divglobal, string, num, inter=False):
def _parse_div(self, divglobal, string, num, inter=False):
divs = divglobal.xpath('div[@class="detail"]')
# Two informations in one div...
div = divs.pop(0)
voice = self.parse_voice(div, string, num, inter)
voice = self._parse_voice(div, string, num, inter)
self.details[num].append(voice)
self.iter_divs(divs, num, inter)
self._iter_divs(divs, num, inter)
def iter_divs(self, divs, num, inter=False):
def _iter_divs(self, divs, num, inter=False):
for div in divs:
detail = Detail()
detail.label = unicode(div.find('div[@class="titre"]/p').text_content())
detail.label = CleanText('div[@class="titre"]/p')(div)
detail.id = "-" + detail.label.split(' ')[1].lower()
if inter:
detail.label = detail.label + u" (international)"
detail.id = detail.id + "-inter"
detail.infos = unicode(div.find('div[@class="conso"]/p').text_content().lstrip())
detail.price = convert_price(div)
detail.infos = CleanText('div[@class="conso"]/p')(div)
detail.price = CleanDecimal('div[@class="horsForfait"]/p/span', default=Decimal(0))(div)
self.details[num].append(detail)
def parse_voice(self, div, string, num, inter=False):
def _parse_voice(self, div, string, num, inter=False):
voicediv = div.xpath('div[@class="conso"]')[0]
voice = Detail()
voice.id = "-voice"
voicediv = div.xpath('div[@class="conso"]')[0]
voice.label = unicode(div.find('div[@class="titre"]/p').text_content())
voice.label = CleanText('div[@class="titre"]/p')(div)
if inter:
voice.label = voice.label + " (international)"
voice.id = voice.id + "-inter"
voice.price = convert_price(div)
voice1 = voicediv.xpath('.//span[@class="actif"]')[0].text
voice2 = voicediv.xpath('.//span[@class="actif"]')[1].text
voice.price = CleanDecimal('div[@class="horsForfait"]/p/span', default=0)(div)
voice1 = CleanText('.//span[@class="actif"][1]')(voicediv)
voice2 = CleanText('.//span[@class="actif"][2]')(voicediv)
voice.infos = unicode(string) % (voice1, voice2)
return voice
# XXX
def get_details(self, subscription):
num = self.details['num' + subscription.id]
for detail in self.details[num]:
detail.id = subscription.id + detail.id
yield detail
def date_bills(self, subscription):
for bill in self.datebills[subscription._login]:
bill.id = subscription.id + '.' + bill.id
yield bill
@method
class date_bills(ListElement):
item_xpath = '//div[@class="factLigne hide "]'
class item(ItemElement):
klass = Bill
obj__url = Attr('.//div[@class="pdf"]/a', 'href')
obj__localid = Regexp(Field('_url'), '&l=(\d*)&id', u'\\1')
obj_label = Regexp(Field('_url'), '&date=(\d*)$', u'\\1')
obj_id = Field('label')
obj_date = FormatDate(Field('id'))
obj_format = u"pdf"
obj_price = CleanDecimal('div[@class="montant"]', default=Decimal(0), replace_dots=False)
def get_renew_date(self, subscription):
login = subscription._login
div = self.document.xpath('//div[@login="%s"]' % login)[0]
mydate = div.xpath('.//span[@class="actif"]')[0].text
mydate = date(*reversed([int(x) for x in mydate.split("/")]))
div = self.doc.xpath('//div[@login="%s"]' % subscription._login)[0]
mydate = Date(CleanText('//div[@class="resumeConso"]/span[@class="actif"][1]'), dayfirst=True)(div)
if mydate.month == 12:
mydate = mydate.replace(month=1)
mydate = mydate.replace(year=mydate.year + 1)
@ -149,30 +139,19 @@ class DetailsPage(BasePage):
return mydate
def _get_date(detail):
return detail.datetime
class HistoryPage(LoggedPage, BadUTF8Page):
@method
class get_calls(ListElement):
item_xpath = '//tr'
class item(ItemElement):
klass = Detail
class HistoryPage(BasePage):
def condition(self):
txt = self.el.xpath('td[1]')[0].text
return (txt is not None) and (txt != "Date")
def on_loaded(self):
self.calls = []
for tr in self.document.xpath('//tr'):
tds = tr.xpath('td')
if tds[0].text is None or tds[0].text == "Date":
pass
else:
detail = Detail()
mydate = date(*reversed([int(x) for x in tds[0].text.split(' ')[0].split("/")]))
mytime = time(*[int(x) for x in tds[0].text.split(' ')[2].split(":")])
detail.datetime = datetime.combine(mydate, mytime)
detail.label = u' '.join([unicode(td.text.strip()) for td in tds[1:4] if td.text is not None])
try:
detail.price = Decimal(tds[4].text[0:4].replace(',', '.'))
except:
detail.price = Decimal(0)
self.calls.append(detail)
def get_calls(self):
return sorted(self.calls, key=_get_date, reverse=True)
obj_datetime = DateTime(CleanText('td[1]'), dayfirst=True)
obj_label = Format(u'%s %s %s %s', CleanText('td[2]'), CleanText('td[3]'),
CleanText('td[4]'), CleanText('td[5]'))
obj_price = CleanDecimal('td[5]', default=Decimal(0))

View file

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2012 Florent Fourcot
# Copyright(C) 2012-2014 Florent Fourcot
#
# This file is part of weboob.
#
@ -17,35 +17,28 @@
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from .history import BadUTF8Page
from weboob.capabilities.bill import Subscription
from weboob.tools.browser import BasePage
from weboob.tools.browser2.page import method, ListElement, ItemElement
from weboob.tools.browser2.filters import CleanText, Attr, Field, Format, Filter
__all__ = ['HomePage']
class HomePage(BasePage):
def on_loaded(self):
pass
class GetID(Filter):
def filter(self, txt):
return txt.split('=')[-1]
def get_list(self):
for divglobal in self.document.xpath('//div[@class="abonne"]'):
for link in divglobal.xpath('.//div[@class="acceuil_btn"]/a'):
login = link.attrib['href'].split('=').pop()
if login.isdigit():
break
divabo = divglobal.xpath('div[@class="idAbonne pointer"]')[0]
owner = unicode(divabo.xpath('p')[0].text.replace(' - ', ''))
phone = unicode(divabo.xpath('p/span')[0].text)
self.browser.logger.debug('Found ' + login + ' as subscription identifier')
self.browser.logger.debug('Found ' + owner + ' as subscriber')
self.browser.logger.debug('Found ' + phone + ' as phone number')
phoneplan = unicode(self.document.xpath('//div[@class="forfaitChoisi"]')[0].text.lstrip().rstrip())
self.browser.logger.debug('Found ' + phoneplan + ' as subscription type')
subscription = Subscription(phone)
subscription.label = phone + ' - ' + phoneplan
subscription.subscriber = owner
subscription._login = login
class HomePage(BadUTF8Page):
@method
class get_list(ListElement):
item_xpath = '//div[@class="abonne"]'
yield subscription
class item(ItemElement):
klass = Subscription
obj_subscriber = CleanText('div[@class="idAbonne pointer"]/p[1]', symbols='-', childs=False)
obj_id = CleanText('div[@class="idAbonne pointer"]/p/span')
obj__login = GetID(Attr('.//div[@class="acceuil_btn"]/a', 'href'))
obj_label = Format(u'%s - %s', Field('id'), CleanText('//div[@class="forfaitChoisi"]'))

View file

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2012 Florent Fourcot
# Copyright(C) 2012-2014 Florent Fourcot
#
# This file is part of weboob.
#
@ -19,19 +19,15 @@
import time
from StringIO import StringIO
from PIL import Image
try:
from PIL import Image
except ImportError:
raise ImportError('Please install python-imaging')
from weboob.tools.browser import BasePage
from weboob.tools.browser2.page import HTMLPage
__all__ = ['LoginPage']
class FreeKeyboard(object):
DEBUG = False
symbols = {'0': '001111111111110011111111111111111111111111111110000000000011110000000000011111111111111111011111111111111001111111111110',
'1': '001110000000000001110000000000001110000000000011111111111111111111111111111111111111111111000000000000000000000000000000',
'2': '011110000001111011110000111111111000001111111110000011110011110000111100011111111111000011011111110000011001111000000011',
@ -42,16 +38,16 @@ class FreeKeyboard(object):
'7': '111000000000000111000000000000111000000011111111000011111111111011111111111111111111000000111111000000000111100000000000',
'8': '001110001111110011111111111111111111111111111110000110000011110000110000011111111111111111011111111111111001111001111110',
'9': '001111111000110011111111100111111111111100111110000001100011110000001100011111111111111111011111111111111001111111111110'
}
}
def __init__(self, basepage):
self.basepage = basepage
self.fingerprints = []
for htmlimg in basepage.document.xpath('//img[@class="ident_chiffre_img pointer"]'):
for htmlimg in self.basepage.doc.xpath('//img[@class="ident_chiffre_img pointer"]'):
url = htmlimg.attrib.get("src")
fichier = basepage.browser.openurl(url)
image = Image.open(fichier)
matrix = image.load()
imgfile = StringIO(basepage.browser.open(url).content)
img = Image.open(imgfile)
matrix = img.load()
s = ""
# The digit is only displayed in the center of image
for x in range(15, 23):
@ -64,38 +60,28 @@ class FreeKeyboard(object):
s += "0"
self.fingerprints.append(s)
if self.DEBUG:
image.save('/tmp/' + s + '.png')
def get_symbol_code(self, digit):
fingerprint = self.symbols[digit]
i = 0
for string in self.fingerprints:
for i, string in enumerate(self.fingerprints):
if string == fingerprint:
return i
i += 1
# Image contains some noise, and the match is not always perfect
# (this is why we can't use md5 hashs)
# But if we can't find the perfect one, we can take the best one
i = 0
best = 0
result = None
for string in self.fingerprints:
j = 0
for i, string in enumerate(self.fingerprints):
match = 0
for bit in string:
for j, bit in enumerate(string):
if bit == fingerprint[j]:
match += 1
j += 1
if match > best:
best = match
result = i
i += 1
self.basepage.browser.logger.debug(self.fingerprints[result] + " match " + digit)
return result
# TODO : exception
def get_string_code(self, string):
code = ''
for c in string:
@ -107,21 +93,16 @@ class FreeKeyboard(object):
for c in string:
time.sleep(0.5)
url = 'https://mobile.free.fr/moncompte/chiffre.php?pos=' + c + '&small=1'
self.basepage.browser.openurl(url)
self.basepage.browser.open(url)
class LoginPage(BasePage):
def on_loaded(self):
pass
class LoginPage(HTMLPage):
def login(self, login, password):
vk = FreeKeyboard(self)
# Fucking form without name...
self.browser.select_form(nr=0)
self.browser.set_all_readonly(False)
code = vk.get_string_code(login)
self.browser['login_abo'] = code.encode('utf-8')
vk.get_small(code)
self.browser['pwd_abo'] = password.encode('utf-8')
self.browser.submit(nologin=True)
vk.get_small(code) # If img are not downloaded, the server do not accept the login
form = self.get_form(xpath='//form[@id="form_connect"]')
form['login_abo'] = code
form['pwd_abo'] = password
form.submit()