remove useless optimizations and fix code

This commit is contained in:
Romain Bignon 2014-05-17 18:02:24 +02:00
commit cbe5b3a47d
7 changed files with 26 additions and 391 deletions

View file

@ -34,8 +34,6 @@ from weboob.tools.value import Value, ValueBackendPassword
from weboob.tools.misc import local2utc
from .browser import OkCBrowser
from .optim.visibility import Visibility
from .optim.queries_queue import QueriesQueue
from .optim.profiles_walker import ProfilesWalker
@ -88,8 +86,6 @@ class OkCBackend(BaseBackend, ICapMessages, ICapContact, ICapMessagesPost, ICapD
# ---- ICapDating methods ---------------------
def init_optimizations(self):
self.add_optimization('VISIBILITY', Visibility(self.weboob.scheduler, self.browser))
self.add_optimization('QUERIES_QUEUE', QueriesQueue(self.weboob.scheduler, self.storage, self.browser))
self.add_optimization('PROFILE_WALKER', ProfilesWalker(self.weboob.scheduler, self.storage, self.browser))
def iter_events(self):
@ -261,7 +257,7 @@ class OkCBackend(BaseBackend, ICapMessages, ICapContact, ICapMessagesPost, ICapD
# Check wether we already have a thread with this user
threads = self.browser.get_threads_list()
for thread in threads:
if thread['username'] == message.thread.id:
if thread['id'] == message.thread.id:
self.browser.post_reply(thread['id'], content)
break
else:

View file

@ -31,6 +31,14 @@ class OkCException(Exception):
pass
def check_login(func):
def inner(self, *args, **kwargs):
if not self.logged_in:
self.login()
return func(self, *args, **kwargs)
return inner
class OkCBrowser(BaseBrowser):
DOMAIN = 'm.okcupid.com'
PROTOCOL = 'https'
@ -60,13 +68,6 @@ class OkCBrowser(BaseBrowser):
def is_logged(self):
return self.logged_in
def check_login(func):
def inner(self, *args, **kwargs):
if not self.logged_in:
self.login()
return func(self, *args, **kwargs)
return inner
def get_consts(self):
return { 'conts' : 'blah' }
# if self.consts is not None:
@ -137,13 +138,7 @@ class OkCBrowser(BaseBrowser):
id = int(id)
self.location(self.absurl('/messages?readmsg=true&threadid=%i&folder=1' % id))
# Find the peer username
mails = self.page.get_thread_mails(count)
for mail in mails['messages']:
if mail['id_from'] != self.get_my_name():
mails['member']['pseudo'] = mail['id_from']
break
return mails
return self.page.get_thread_mails(count)
@check_login
def post_mail(self, id, content):
@ -184,10 +179,10 @@ class OkCBrowser(BaseBrowser):
# return True
@check_login
def search_profiles(self, **kwargs):
self.location(self.absurl('/quickmatch'))
user_id = self.page.get_id()
return set([user_id])
def find_match_profile(self, **kwargs):
self.location(self.absurl('/quickmatch'))
user_id = self.page.get_id()
return user_id
@check_login
def get_profile(self, id):

View file

@ -1,185 +0,0 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2010-2011 Romain Bignon
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
import random
from weboob.tools.browser import BrowserUnavailable, BrowserIncorrectPassword
from weboob.capabilities.dating import Optimization
from weboob.capabilities.account import AccountRegisterError
from weboob.tools.log import getLogger
from weboob.tools.value import Value, ValuesDict, ValueInt
from aum.captcha import CaptchaError
from aum.exceptions import AdopteWait, AdopteBanned
from aum.browser import AuMBrowser
__all__ = ['PriorityConnection']
class PriorityConnection(Optimization):
CONFIG = ValuesDict(ValueInt('minimal', label='Minimal of godchilds', default=5),
Value('domain', label='Domain to use for fake accounts emails', default='aum.example.com'),
ValueInt('interval', label='Interval of checks (seconds)', default=3600)
)
def __init__(self, sched, storage, browser):
self.sched = sched
self.storage = storage
self.browser = browser
self.logger = getLogger('priorityconn', browser.logger)
self.config = storage.get('priority_connection', 'config', default=None)
if self.config == {}:
self.config = None
self.check_cron = None
self.activity_cron = None
def start(self):
if self.config is None:
return False
self.check_cron = self.sched.repeat(int(self.config['interval']), self.check_godchilds)
self.activity_cron = self.sched.repeat(600, self.activity_fakes)
return True
def stop(self):
self.sched.cancel(self.check_cron)
self.check_cron = None
self.sched.cancel(self.activity_cron)
self.activity_cron = None
return True
def is_running(self):
return self.check_cron is not None
def set_config(self, params):
self.config = params
self.storage.set('priority_connection', 'config', self.config)
self.storage.save()
def get_config(self):
return self.config
def generate_name(self):
login = u''
for x in xrange(8):
if x % 2:
login += random.choice(u'aeiou')
else:
login += random.choice(u'bcdfghjklmnprstv')
fakes = self.storage.get('priority_connection', 'fakes')
while ('%s@%s' % (login, self.config['domain'])) in fakes.iterkeys():
login += '_'
return login
def generate_password(self):
return '%08x' % random.randint(1, int('ffffffff', 16))
def check_godchilds(self):
with self.browser:
try:
my_id = self.browser.get_my_id()
nb_godchilds = self.browser.nb_godchilds()
except AdopteWait:
nb_godchilds = 0
except BrowserUnavailable:
# We'll check later
return
missing_godchilds = int(self.config['minimal']) - nb_godchilds
self.logger.info('Missing godchilds: %s' % missing_godchilds)
if missing_godchilds <= 0:
return
for i in xrange(missing_godchilds):
registered = False
while not registered:
name = self.generate_name()
password = self.generate_password()
browser = AuMBrowser('%s@%s' % (name, self.config['domain']), proxy=self.browser.proxy)
try:
browser.register(password= password,
sex= 1, # slut
birthday_d= random.randint(1, 28),
birthday_m= random.randint(1, 12),
birthday_y= random.randint(1975, 1990),
zipcode= 75001,
country= 'fr',
godfather= my_id)
except AccountRegisterError as e:
self.logger.warning('Unable to register account: %s' % e)
except CaptchaError:
self.logger.warning('Unable to solve captcha... Retrying')
else:
registered = True
# set nickname
browser.set_nickname(name.strip('_').capitalize())
# rate my own profile with good score
for i in xrange(4):
browser.rate(my_id, i, 5.0)
# save fake in storage
fake = {'username': browser.username,
'password': password}
self.storage.set('priority_connection', 'fakes', name, fake)
self.storage.save()
self.logger.info('Fake account "%s" created (godfather=%s)' % (name, my_id))
def activity_fakes(self):
try:
fakes = self.storage.get('priority_connection', 'fakes', default={})
if len(fakes) == 0:
return
while True:
name = random.choice(fakes.keys())
fake = fakes[name]
try:
browser = AuMBrowser(fake['username'], fake['password'], proxy=self.browser.proxy)
except (AdopteBanned,BrowserIncorrectPassword) as e:
self.logger.warning('Fake %s can\'t login: %s' % (name, e))
continue
profiles = browser.search_profiles(country="fr",
dist='10',
save=True)
if not profiles:
continue
id = profiles.pop()
profile = browser.get_profile(id)
# bad rate
for i in xrange(4):
browser.rate(profile.get_id(), i, 0.6)
# deblock
browser.deblock(profile.get_id())
return
except BrowserUnavailable:
# don't care
pass

View file

@ -37,7 +37,6 @@ class ProfilesWalker(Optimization):
self.browser = browser
self.logger = getLogger('walker', browser.logger)
self.walk_cron = None
self.view_cron = None
self.visited_profiles = set(storage.get('profiles_walker', 'viewed'))
self.logger.info(u'Loaded %d already visited profiles from storage.' % len(self.visited_profiles))
@ -48,36 +47,23 @@ class ProfilesWalker(Optimization):
self.storage.save()
def start(self):
self.walk_cron = self.sched.repeat(60, self.enqueue_profiles)
self.view_cron = self.sched.schedule(randint(5, 10), self.view_profile)
return True
def stop(self):
self.sched.cancel(self.walk_cron)
self.sched.cancel(self.view_cron)
self.walk_cron = None
self.view_cron = None
return True
def is_running(self):
return self.walk_cron is not None
def enqueue_profiles(self):
try:
with self.browser:
profiles_to_visit = self.browser.search_profiles().difference(self.visited_profiles)
self.logger.info(u'Enqueuing profiles to visit: %s' % profiles_to_visit)
self.profiles_queue = set(profiles_to_visit)
self.save()
except BrowserUnavailable:
return
return self.view_cron is not None
def view_profile(self):
try:
try:
id = self.profiles_queue.pop()
except KeyError:
return # empty queue
id = self.browser.find_match_profile()
if id in self.visited_profiles:
return
try:
with self.browser:
# profile = self.browser.get_profile(id)

View file

@ -1,107 +0,0 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2010-2011 Romain Bignon
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.tools.browser import BrowserUnavailable
from weboob.capabilities.dating import Optimization
from weboob.capabilities.contact import QueryError
from weboob.tools.log import getLogger
__all__ = ['QueriesQueue']
class QueriesQueue(Optimization):
def __init__(self, sched, storage, browser):
self.sched = sched
self.storage = storage
self.browser = browser
self.logger = getLogger('queriesqueue', browser.logger)
self.queue = storage.get('queries_queue', 'queue', default=[])
self.check_cron = None
def save(self):
self.storage.set('queries_queue', 'queue', self.queue)
self.storage.save()
def start(self):
self.check_cron = self.sched.repeat(3600, self.flush_queue)
return True
def stop(self):
self.sched.cancel(self.check_cron)
self.check_cron = None
return True
def is_running(self):
return self.check_cron is not None
def enqueue_query(self, id, priority=999):
id_queue = [_id[1] for _id in self.queue]
if id in id_queue:
raise QueryError('This id is already queued')
self.queue.append((int(priority), id))
self.save()
# Try to flush queue to send it now.
self.flush_queue()
# Check if the enqueued query has been sent
for p, i in self.queue:
if i == id:
return False
return True
def flush_queue(self):
self.queue.sort()
priority = 0
id = None
try:
try:
while len(self.queue) > 0:
priority, id = self.queue.pop()
if not id:
continue
with self.browser:
if self.browser.visit_profile(id):
self.logger.info('Profile of %s visited' % id)
else:
self.queue.append((priority, id))
self.logger.info("Could not visit profile of %s visited" % id)
break
# As the charm has been correctly sent (no exception raised),
# we don't store anymore ID, because if nbAvailableCharms()
# fails, we don't want to re-queue this ID.
id = None
priority = 0
except BrowserUnavailable:
# We consider this profil hasn't been [correctly] analysed
if not id is None:
self.queue.append((priority, id))
finally:
self.save()

View file

@ -1,52 +0,0 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2010-2011 Romain Bignon
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.tools.browser import BrowserUnavailable
from weboob.capabilities.dating import Optimization
__all__ = ['Visibility']
class Visibility(Optimization):
def __init__(self, sched, browser):
self.sched = sched
self.browser = browser
self.cron = None
def start(self):
self.cron = self.sched.repeat(60*5, self.reconnect)
return True
def stop(self):
self.sched.cancel(self.cron)
self.cron = None
return True
def is_running(self):
return self.cron is not None
def reconnect(self):
try:
with self.browser:
self.browser.login()
except BrowserUnavailable as e:
print str(e)
pass

View file

@ -41,7 +41,7 @@ class ThreadPage(BasePage):
_class = elem.get('class', '')
if 'clearfix' in _class.split():
threads.append({
u'username' : unicode(elem.getchildren()[0].get('href').split('/')[-1]),
u'username' : unicode(elem.getchildren()[0].get('href').split('/')[-1].split('?')[0]),
u'id' : unicode(elem.get('id', '').split('_')[1]),
})
@ -57,11 +57,13 @@ class MessagesPage(BasePage):
'messages' : [],
}
for li_msg in ul_item.getchildren():
mails['member']['pseudo'] = self.document.xpath('//li[starts-with(@id, "usr_")]')[0].attrib['id'].split('_', 1)[-1]
for li_msg in reversed(ul_item.getchildren()):
div = li_msg.getchildren()[1]
txt = self.parser.tostring(div.getchildren()[1])
date = div.getchildren()[2].text
id_from = li_msg.getchildren()[0].get('href').split('/')[-1]
id_from = li_msg.getchildren()[0].get('href').split('/')[-1].split('?')[0]
if date is not None:
date = unicode(date)
@ -210,7 +212,7 @@ class QuickMatchPage(BasePage):
element = self.parser.select(self.document.getroot(), '//*[@id="sn"]', method='xpath')[0]
visitor_id = unicode(element.get('value'))
return visitor_id
def get_rating_params(self):
# initialization
userid = None
@ -255,4 +257,4 @@ class QuickMatchPage(BasePage):
# VoteHandler.process('vote', 'personality', stars, tuid, pass.succeed, pass.failure);
# var params = {voterid: CURRENTUSERID,target_userid: tuid,target_objectid: 0,type: vote_or_note,vote_type: vote_type,score: rating}
# var params = {voterid: CURRENTUSERID,target_userid: tuid,target_objectid: 0,type: vote_or_note,vote_type: vote_type,score: rating}