weboob.tools.browser -> weboob.deprecated.browser weboob.tools.parsers -> weboob.deprecated.browser.parsers weboob.tools.mech -> weboob.deprecated.mech weboob.browser2 -> weboob.browser weboob.core.exceptions -> weboob.exceptions Also, the new tree for browser2 is: weboob.browser: import weboob.browser.browsers.* and weboob.browser.url.* weboob.browser.browsers: all browsers (including PagesBrowser and LoginBrowser) weboob.browser.url: the URL class weboob.browser.profiles: all Profile classes weboob.browser.sessions: WeboobSession and FuturesSession weboob.browser.cookies: that's a cookies thing weboob.browser.pages: all Page and derivated classes, and Form class weboob.browser.exceptions: specific browser exceptions weboob.browser.elements: AbstractElement classes, and 'method' decorator weboob.browser.filters.*: all filters
251 lines
9.6 KiB
Python
251 lines
9.6 KiB
Python
# -*- coding: utf-8 -*-
|
|
|
|
# Copyright(C) 2013 Vincent A
|
|
#
|
|
# This file is part of weboob.
|
|
#
|
|
# weboob is free software: you can redistribute it and/or modify
|
|
# it under the terms of the GNU Affero General Public License as published by
|
|
# the Free Software Foundation, either version 3 of the License, or
|
|
# (at your option) any later version.
|
|
#
|
|
# weboob is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU Affero General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU Affero General Public License
|
|
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
|
|
import datetime
|
|
import re
|
|
import urllib
|
|
from urlparse import urlsplit
|
|
from weboob.deprecated.browser import Page
|
|
from weboob.capabilities.messages import Message, Thread
|
|
from weboob.capabilities.contact import Contact, ProfileNode
|
|
from weboob.tools.date import parse_french_date
|
|
|
|
from . import ovsparse
|
|
|
|
|
|
class OvsPage(Page):
|
|
def is_logged(self):
|
|
return ovsparse.is_logged(self.document)
|
|
|
|
def login(self, username, password):
|
|
self.browser.select_form(name='connection')
|
|
self.browser['Pseudo'] = username.encode(self.browser.ENCODING)
|
|
self.browser['Password'] = password.encode(self.browser.ENCODING)
|
|
self.browser['Retenir'] = ['ok']
|
|
self.browser.submit(nologin=True)
|
|
|
|
|
|
class PagePrivateThreadsList(OvsPage):
|
|
def iter_threads_list(self):
|
|
# site is sorted from latest to oldest
|
|
for message_a in reversed(self.document.findAll('a', href=re.compile(r'message_read.php\?'))):
|
|
ovs_id = re.search(r'Id=(\d+)', message_a["href"]).group(1)
|
|
id_ = ovs_id
|
|
|
|
thread = Thread(id_)
|
|
thread.title = ovsparse.all_text_recursive(message_a)
|
|
thread.flags = Thread.IS_DISCUSSION
|
|
|
|
#~ parent_tr = message_a.findParent('tr')
|
|
#~ username = all_text_recursive(parent_tr.find('a', href=re.compile(r'profil_read.php\?.*')))
|
|
#~ notread_self = (parent_tr.get('class') == 'newmails')
|
|
#~ notread_other = (parent_tr.find('span', **{'class': 'new_sortiepartenaire'}) is not None)
|
|
|
|
yield thread
|
|
|
|
|
|
class PagePrivateThread(OvsPage):
|
|
def get_thread(self, _id):
|
|
thread = Thread(_id)
|
|
|
|
thread.title = self.document.find('div', 'PADtitreBlanc_txt').find('center').string
|
|
thread.flags = Thread.IS_DISCUSSION
|
|
root = True
|
|
|
|
for message in self._get_messages(thread):
|
|
if root:
|
|
message.children = []
|
|
thread.root = message
|
|
thread.date = message.date
|
|
message.title = thread.title
|
|
root = False
|
|
else:
|
|
message.title = 'Re: %s' % thread.title
|
|
message.children = []
|
|
message.parent = thread.root
|
|
thread.root.children.append(message)
|
|
|
|
return thread
|
|
|
|
def _get_messages(self, thread):
|
|
thread_div = self.document.find(True, 'PADpost_txt')
|
|
used_ids = set()
|
|
|
|
rcpt = self.document.find('input', attrs={'type': 'hidden', 'name': 'Dest'})['value']
|
|
sender_to_receiver = {rcpt: self.browser.username, self.browser.username: rcpt}
|
|
# site is sorted from latest to oldest message
|
|
for message_table in reversed(thread_div.findAll('table')):
|
|
for td in message_table.findAll('td'):
|
|
profile_a = td.find('a', href=re.compile(r'profil_read.php\?.*'))
|
|
if not profile_a:
|
|
continue
|
|
|
|
first_br = td.find('br')
|
|
assert first_br.nextSibling.name == 'br'
|
|
text_nodes = ovsparse.all_next_siblings(first_br.nextSibling.nextSibling) # TODO
|
|
#~ print text_nodes
|
|
|
|
# date will be used as id
|
|
sitedate = profile_a.findParent('div').find(text=re.compile(',.*')).replace(', ', '')
|
|
sysdate = parse_french_date(sitedate)
|
|
compactdate = datetime.datetime.strftime(sysdate, '%Y%m%dT%H%M%S')
|
|
|
|
# but make it unique
|
|
msg_id = ovsparse.create_unique_id(compactdate, used_ids)
|
|
used_ids.add(msg_id)
|
|
|
|
message = Message(thread, msg_id)
|
|
|
|
message.sender = re.search(r'\?(.+)', profile_a['href']).group(1)
|
|
message.receivers = [sender_to_receiver[message.sender]]
|
|
|
|
message.date = sysdate
|
|
|
|
message.content = ovsparse.html_message_to_text(text_nodes)
|
|
|
|
notread_self = bool(td.find('span', 'ColorSurligne'))
|
|
notread_other = bool(td.find('span', 'new_sortiepartenaire'))
|
|
if notread_other or notread_self:
|
|
message.flags |= Message.IS_NOT_RECEIVED
|
|
else:
|
|
message.flags |= Message.IS_RECEIVED
|
|
|
|
yield message
|
|
|
|
def post_to_thread(self, thread_id, subject, body):
|
|
form = ovsparse.private_message_form_fields(self.document)
|
|
recode_dict(form, self.browser.ENCODING)
|
|
form['Message'] = body.encode(self.browser.ENCODING)
|
|
self.browser.location('/message_action_envoi.php', urllib.urlencode(form))
|
|
|
|
# html code is so broken that mechanize won't parse the forms
|
|
#~ self.browser.select_form('envoimail')
|
|
#~ self.browser['Message'] = body.encode(self.browser.ENCODING)
|
|
#~ self.browser['Pere'] = thread_id.encode(self.browser.ENCODING)
|
|
#~ self.browser['Titre'] = subject.encode(self.browser.ENCODING)
|
|
#~ self.browser.submit()
|
|
|
|
|
|
class PageLogin(Page):
|
|
pass
|
|
|
|
|
|
class PageIndex(OvsPage):
|
|
pass
|
|
|
|
|
|
class DummyPage(Page):
|
|
pass
|
|
|
|
|
|
class PagePostMessage(OvsPage):
|
|
pass
|
|
|
|
|
|
class PageUserProfile(OvsPage):
|
|
def create_thread(self, recipient, subject, body):
|
|
form = ovsparse.private_message_form_fields(self.document)
|
|
recode_dict(form, self.browser.ENCODING)
|
|
form['Message'] = body.encode(self.browser.ENCODING)
|
|
form['Titre'] = subject.encode(self.browser.ENCODING)
|
|
self.browser.location('/message_action_envoi.php', urllib.urlencode(form))
|
|
|
|
#~ self.browser.select_form('envoimail')
|
|
#~ self.browser['Titre'] = subject.encode(self.browser.ENCODING)
|
|
#~ self.browser['Message'] = body.encode(self.browser.ENCODING)
|
|
#~ self.browser.submit()
|
|
|
|
def get_contact(self):
|
|
profile_a = self.document.find('a', href=re.compile(r'profil_read.php\?.*'))
|
|
_id = re.search(r'\?(.*)', profile_a['href']).group(1)
|
|
|
|
# not available in the 'boosted' version
|
|
contact = Contact(_id, _id, Contact.STATUS_OFFLINE)
|
|
contact.url = self.url
|
|
contact.profile = {}
|
|
|
|
thumbnail_url = 'http://photos.onvasortir.com/%s/photos/%s_resize.png' % (self.browser.city, _id)
|
|
if self.document.find('img', attrs={'src': thumbnail_url}):
|
|
photo_url = thumbnail_url.replace('_resize', '')
|
|
contact.set_photo('main', thumbnail_url=thumbnail_url, url=photo_url, hidden=False)
|
|
|
|
location_a = self.document.find('a', href=re.compile(r'vue_profil_carte\.php\?'))
|
|
if location_a:
|
|
lat = float(re.search(r'Lat=([\d.]+)', location_a['href']).group(1))
|
|
self._set_profile(contact, 'latitude', lat)
|
|
lng = float(re.search(r'Lng=([\d.]+)', location_a['href']).group(1))
|
|
self._set_profile(contact, 'longitude', lng)
|
|
|
|
div = self.document.find('div', attrs={'class': 'PADtitreBlanc_txt'}, text=re.compile('Personal Info'))
|
|
td = div.findParent('tr').findNextSibling('tr').td
|
|
infos_text = td.getText(separator='\n').strip()
|
|
it = iter(infos_text.split('\n'))
|
|
infos = dict(zip(it, it))
|
|
if infos['Sex :'] == 'Man':
|
|
self._set_profile(contact, 'sex', 'M')
|
|
elif infos['Sex :'] == 'Woman':
|
|
self._set_profile(contact, 'sex', 'F')
|
|
if infos['Birthday :'] != 'Unknown':
|
|
self._set_profile(contact, 'birthday', parse_french_date(re.search(r'(\d+ \w+ \d+)', infos['Birthday :']).group(1)))
|
|
self._try_attr(contact, infos, 'First Name :', 'first_name')
|
|
self._try_attr(contact, infos, 'Status :', 'marriage')
|
|
self._try_attr(contact, infos, 'Area :', 'area')
|
|
|
|
div = self.document.find('div', attrs={'class': 'PADtitreBlanc_txt'}, text=re.compile('A few words'))
|
|
td = div.findParent('tr').findNextSibling('tr').td
|
|
summary = td.getText(separator='\n').strip()
|
|
if summary == 'Unknown':
|
|
contact.summary = u''
|
|
else:
|
|
contact.summary = summary
|
|
|
|
div = self.document.find('div', style=re.compile('dashed'))
|
|
if div:
|
|
# TODO handle html, links and smileys
|
|
contact.status_msg = div.getText()
|
|
else:
|
|
contact.status_msg = u''
|
|
|
|
return contact
|
|
|
|
def _set_profile(self, contact, key, value):
|
|
contact.profile[key] = ProfileNode(key, key.capitalize(), value)
|
|
|
|
def _try_attr(self, contact, infos, html_attr, obj_attr):
|
|
if infos[html_attr] != 'Unknown':
|
|
self._set_profile(contact, obj_attr, infos[html_attr].strip())
|
|
|
|
|
|
class PageCityList(DummyPage):
|
|
def get_cities(self, master_domain='onvasortir.com'):
|
|
cities = {}
|
|
for home_a in self.document.findAll('a', href=re.compile(r'http://(.*)\.%s/?' % master_domain)):
|
|
hostname = urlsplit(home_a['href']).hostname
|
|
code = hostname.split('.')[0]
|
|
if code == 'www':
|
|
continue
|
|
name = home_a.text
|
|
cities[name] = {'code': code, 'hostname': hostname}
|
|
return cities
|
|
|
|
|
|
def recode_dict(dict_, encoding):
|
|
for k in dict_:
|
|
dict_[k] = dict_[k].encode(encoding)
|