[aum] new 'profiles walker' optimization feature

This commit is contained in:
Romain Bignon 2010-04-11 18:07:10 +02:00
commit b3a6596d25
5 changed files with 126 additions and 53 deletions

View file

@ -20,10 +20,13 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
from weboob.backend import Backend from weboob.backend import Backend
from weboob.capabilities.messages import ICapMessages, ICapMessagesReply from weboob.capabilities.messages import ICapMessages, ICapMessagesReply
from weboob.capabilities.dating import ICapDating
from weboob.tools.browser import BrowserUnavailable
from .adopte import AdopteUnMec from .adopte import AdopteUnMec
from .optim.profiles_walker import ProfilesWalker
class AuMBackend(Backend, ICapMessages, ICapMessagesReply): class AuMBackend(Backend, ICapMessages, ICapMessagesReply, ICapDating):
NAME = 'aum' NAME = 'aum'
MAINTAINER = 'Romain Bignon' MAINTAINER = 'Romain Bignon'
EMAIL = 'romain@peerfuse.org' EMAIL = 'romain@peerfuse.org'
@ -33,7 +36,11 @@ class AuMBackend(Backend, ICapMessages, ICapMessagesReply):
CONFIG = {'username': Backend.ConfigField(description='Username on website'), CONFIG = {'username': Backend.ConfigField(description='Username on website'),
'password': Backend.ConfigField(description='Password of account', is_masked=True), 'password': Backend.ConfigField(description='Password of account', is_masked=True),
} }
STORAGE = {'profiles_walker': {'viewed': []} }
# Private
_browser = None _browser = None
_profiles_walker = None
def __getattr__(self, name): def __getattr__(self, name):
if name == 'browser': if name == 'browser':
@ -60,28 +67,44 @@ class AuMBackend(Backend, ICapMessages, ICapMessagesReply):
yield message yield message
def _iter_messages(self, thread, only_new): def _iter_messages(self, thread, only_new):
if not only_new or self.browser.nb_new_mails(): try:
my_name = self.browser.get_my_name() if only_new and not self.browser.nb_new_mails():
contacts = self.browser.get_contact_list() my_name = self.browser.get_my_name()
contacts.reverse() contacts = self.browser.get_contact_list()
contacts.reverse()
for contact in contacts: for contact in contacts:
if only_new and not contact.is_new() or thread and int(thread) != contact.get_id(): if only_new and not contact.is_new() or thread and int(thread) != contact.get_id():
continue continue
mails = self.browser.get_thread_mails(contact.get_id()) mails = self.browser.get_thread_mails(contact.get_id())
profile = None profile = None
for i in xrange(len(mails)): for i in xrange(len(mails)):
mail = mails[i] mail = mails[i]
if only_new and mail.get_from() == my_name: if only_new and mail.get_from() == my_name:
break break
if not profile: if not profile:
profile = self.browser.get_profile(contact.get_id()) profile = self.browser.get_profile(contact.get_id())
mail.signature += u'\n%s' % profile.get_profile_text() mail.signature += u'\n%s' % profile.get_profile_text()
yield mail yield mail
except BrowserUnavailable:
pass
def post_reply(self, thread_id, reply_id, title, message): def post_reply(self, thread_id, reply_id, title, message):
for message in self._iter_messages(thread_id, True): for message in self._iter_messages(thread_id, True):
self.queue_messages.append(message) self.queue_messages.append(message)
return self.browser.post(thread_id, message) return self.browser.post(thread_id, message)
def get_profile(self, _id):
try:
return self.browser.get_profile(_id)
except BrowserUnavailable:
return None
def start_profiles_walker(self):
self._profile_walker = ProfilesWalker(self.weboob.scheduler, self.storage, self.browser)
def stop_profiles_walker(self):
self._profiles_walker.stop()
self._profiles_walker = None

View file

View file

@ -0,0 +1,75 @@
# -*- coding: utf-8 -*-
"""
Copyright(C) 2010 Romain Bignon
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, version 3 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
"""
from logging import debug
from random import randint
from weboob.tools.browser import BrowserUnavailable
class ProfilesWalker(object):
def __init__(self, sched, storage, browser):
self.sched = sched
self.storage = storage
self.browser = browser
self.visited_profiles = set(storage.get('profiles_walker', 'viewed'))
self.profiles_queue = set()
self.walk_cron = sched.repeat(60, self.walk)
self.view_cron = sched.schedule(randint(10,40), self.view_profile)
def save(self):
self.storage.set('profiles_walker', 'viewed', self.visited_profiles)
self.storage.save()
def stop(self):
self.event.cancel(self.event)
self.event = None
def walk(self):
self.profiles_queue = self.profiles_queue.union(self.browser.search_profiles()).difference(self.visited_profiles)
self.save()
def view_profile(self):
try:
try:
id = self.profiles_queue.pop()
except KeyError:
return # empty queue
try:
profile = self.browser.get_profile(id)
debug(u'Visited %s (%s)' % (profile.get_name(), id))
# Get score from the aum_score module
# d = self.nucentral_core.callService(context.Context.fromComponent(self), 'aum_score', 'score', profile)
# d.addCallback(self.score_cb, profile.getID())
# deferredlist.append(d)
# do not forget that we visited this profile, to avoid re-visiting it.
self.visited_profiles.add(id)
self.save()
except BrowserUnavailable:
# We consider this profil hasn't been [correctly] analysed
self.profiles_queue.add(id)
return
except Exception, e:
print e
finally:
self.sched.schedule(randint(10,40), self.view_profile)

View file

@ -50,9 +50,10 @@ class RegisterPage(PageBase):
if isinstance(nickname, unicode): if isinstance(nickname, unicode):
nickname = nickname.encode('iso-8859-15', 'ignore') nickname = nickname.encode('iso-8859-15', 'ignore')
self.browser['pseudo'] = nickname self.browser['pseudo'] = nickname
self.browser['email'] = self.browser.login self.browser['email'] = self.browser.username
self.browser['pass'] = password self.browser['pass'] = password
self.browser['sex'] = [str(sex)] self.browser['sex0'] = [str(sex)]
self.browser['sex'] = str(sex)
self.browser['birthday0'] = [str(birthday_d)] self.browser['birthday0'] = [str(birthday_d)]
self.browser['birthday1'] = [str(birthday_m)] self.browser['birthday1'] = [str(birthday_m)]
self.browser['birthday2'] = [str(birthday_y)] self.browser['birthday2'] = [str(birthday_y)]

View file

@ -19,6 +19,8 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
""" """
from weboob.backends.aum.pages.base import PageBase from weboob.backends.aum.pages.base import PageBase
from weboob.capabilities.dating import Profile
from copy import deepcopy from copy import deepcopy
from logging import warning from logging import warning
import re import re
@ -124,8 +126,7 @@ class FieldParticularSignes(FieldBase):
elif s.find('rousseur') >= 0: elif s.find('rousseur') >= 0:
d['freckle'] = True d['freckle'] = True
class ProfilePage(PageBase): class ProfilePage(PageBase, Profile):
empty_table = {'details': {'old': 0, empty_table = {'details': {'old': 0,
'birthday': (0,0,0), 'birthday': (0,0,0),
'zipcode': 0, 'zipcode': 0,
@ -229,7 +230,7 @@ class ProfilePage(PageBase):
def __repr__(self): def __repr__(self):
if isinstance(self.name, unicode): if isinstance(self.name, unicode):
name = self.name.encode('ascii', 'backslashreplace') name = self.name.encode('utf-8', 'backslashreplace')
else: else:
name = self.name name = self.name
return '<Profile name="%s">' % name return '<Profile name="%s">' % name
@ -312,7 +313,6 @@ class ProfilePage(PageBase):
self.description = description self.description = description
def parse_table(self, div): def parse_table(self, div):
d = self.table[self.tables[div.getAttribute('id')]] d = self.table[self.tables[div.getAttribute('id')]]
fields = self.fields[self.tables[div.getAttribute('id')]] fields = self.fields[self.tables[div.getAttribute('id')]]
table = div.getElementsByTagName('table')[1] table = div.getElementsByTagName('table')[1]
@ -391,29 +391,3 @@ class ProfilePage(PageBase):
def get_stats(self): def get_stats(self):
return self.stats return self.stats
def get_profile_text(self):
body = u'Status: %s' % unicode(self.status)
if self.photos:
body += u'\nPhotos:'
for photo in self.photos:
body += u'\n\t\t%s' % unicode(photo)
body += u'\nStats:'
for label, value in self.get_stats().iteritems():
body += u'\n\t\t%-15s %s' % (label + ':', value)
body += u'\n\nInformations:'
for section, d in self.get_table().iteritems():
body += u'\n\t%s\n' % section
for key, value in d.items():
key = '%s:' % key
if isinstance(value, list):
body += u'\t\t%-15s %s\n' % (key, u', '.join([unicode(s) for s in value]))
elif isinstance(value, float):
body += u'\t\t%-15s %.2f\n' % (key, value)
else:
body += u'\t\t%-15s %s\n' % (key, unicode(value))
body += u'\n\nDescription:\n%s' % unicode(self.get_description())
return body