From cbe5b3a47da51bef6558e59a634d280306250001 Mon Sep 17 00:00:00 2001 From: Romain Bignon Date: Sat, 17 May 2014 18:02:24 +0200 Subject: [PATCH] remove useless optimizations and fix code --- modules/okc/backend.py | 6 +- modules/okc/browser.py | 31 ++-- modules/okc/optim/priority_connection.py | 185 ----------------------- modules/okc/optim/profiles_walker.py | 24 +-- modules/okc/optim/queries_queue.py | 107 ------------- modules/okc/optim/visibility.py | 52 ------- modules/okc/pages.py | 12 +- 7 files changed, 26 insertions(+), 391 deletions(-) delete mode 100644 modules/okc/optim/priority_connection.py delete mode 100644 modules/okc/optim/queries_queue.py delete mode 100644 modules/okc/optim/visibility.py diff --git a/modules/okc/backend.py b/modules/okc/backend.py index a23a2e35..c6f87703 100644 --- a/modules/okc/backend.py +++ b/modules/okc/backend.py @@ -34,8 +34,6 @@ from weboob.tools.value import Value, ValueBackendPassword from weboob.tools.misc import local2utc from .browser import OkCBrowser -from .optim.visibility import Visibility -from .optim.queries_queue import QueriesQueue from .optim.profiles_walker import ProfilesWalker @@ -88,8 +86,6 @@ class OkCBackend(BaseBackend, ICapMessages, ICapContact, ICapMessagesPost, ICapD # ---- ICapDating methods --------------------- def init_optimizations(self): - self.add_optimization('VISIBILITY', Visibility(self.weboob.scheduler, self.browser)) - self.add_optimization('QUERIES_QUEUE', QueriesQueue(self.weboob.scheduler, self.storage, self.browser)) self.add_optimization('PROFILE_WALKER', ProfilesWalker(self.weboob.scheduler, self.storage, self.browser)) def iter_events(self): @@ -261,7 +257,7 @@ class OkCBackend(BaseBackend, ICapMessages, ICapContact, ICapMessagesPost, ICapD # Check wether we already have a thread with this user threads = self.browser.get_threads_list() for thread in threads: - if thread['username'] == message.thread.id: + if thread['id'] == message.thread.id: self.browser.post_reply(thread['id'], content) break else: diff --git a/modules/okc/browser.py b/modules/okc/browser.py index 04c182e3..dd1f8497 100644 --- a/modules/okc/browser.py +++ b/modules/okc/browser.py @@ -31,6 +31,14 @@ class OkCException(Exception): pass +def check_login(func): + def inner(self, *args, **kwargs): + if not self.logged_in: + self.login() + return func(self, *args, **kwargs) + return inner + + class OkCBrowser(BaseBrowser): DOMAIN = 'm.okcupid.com' PROTOCOL = 'https' @@ -60,13 +68,6 @@ class OkCBrowser(BaseBrowser): def is_logged(self): return self.logged_in - def check_login(func): - def inner(self, *args, **kwargs): - if not self.logged_in: - self.login() - return func(self, *args, **kwargs) - return inner - def get_consts(self): return { 'conts' : 'blah' } # if self.consts is not None: @@ -137,13 +138,7 @@ class OkCBrowser(BaseBrowser): id = int(id) self.location(self.absurl('/messages?readmsg=true&threadid=%i&folder=1' % id)) - # Find the peer username - mails = self.page.get_thread_mails(count) - for mail in mails['messages']: - if mail['id_from'] != self.get_my_name(): - mails['member']['pseudo'] = mail['id_from'] - break - return mails + return self.page.get_thread_mails(count) @check_login def post_mail(self, id, content): @@ -184,10 +179,10 @@ class OkCBrowser(BaseBrowser): # return True @check_login - def search_profiles(self, **kwargs): - self.location(self.absurl('/quickmatch')) - user_id = self.page.get_id() - return set([user_id]) + def find_match_profile(self, **kwargs): + self.location(self.absurl('/quickmatch')) + user_id = self.page.get_id() + return user_id @check_login def get_profile(self, id): diff --git a/modules/okc/optim/priority_connection.py b/modules/okc/optim/priority_connection.py deleted file mode 100644 index 64b0f711..00000000 --- a/modules/okc/optim/priority_connection.py +++ /dev/null @@ -1,185 +0,0 @@ -# -*- coding: utf-8 -*- - -# Copyright(C) 2010-2011 Romain Bignon -# -# This file is part of weboob. -# -# weboob is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# weboob is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with weboob. If not, see . - - - - -import random - -from weboob.tools.browser import BrowserUnavailable, BrowserIncorrectPassword -from weboob.capabilities.dating import Optimization -from weboob.capabilities.account import AccountRegisterError -from weboob.tools.log import getLogger -from weboob.tools.value import Value, ValuesDict, ValueInt - -from aum.captcha import CaptchaError -from aum.exceptions import AdopteWait, AdopteBanned -from aum.browser import AuMBrowser - - -__all__ = ['PriorityConnection'] - - -class PriorityConnection(Optimization): - CONFIG = ValuesDict(ValueInt('minimal', label='Minimal of godchilds', default=5), - Value('domain', label='Domain to use for fake accounts emails', default='aum.example.com'), - ValueInt('interval', label='Interval of checks (seconds)', default=3600) - ) - - def __init__(self, sched, storage, browser): - self.sched = sched - self.storage = storage - self.browser = browser - self.logger = getLogger('priorityconn', browser.logger) - - self.config = storage.get('priority_connection', 'config', default=None) - if self.config == {}: - self.config = None - - self.check_cron = None - self.activity_cron = None - - def start(self): - if self.config is None: - return False - - self.check_cron = self.sched.repeat(int(self.config['interval']), self.check_godchilds) - self.activity_cron = self.sched.repeat(600, self.activity_fakes) - return True - - def stop(self): - self.sched.cancel(self.check_cron) - self.check_cron = None - self.sched.cancel(self.activity_cron) - self.activity_cron = None - return True - - def is_running(self): - return self.check_cron is not None - - def set_config(self, params): - self.config = params - self.storage.set('priority_connection', 'config', self.config) - self.storage.save() - - def get_config(self): - return self.config - - def generate_name(self): - login = u'' - for x in xrange(8): - if x % 2: - login += random.choice(u'aeiou') - else: - login += random.choice(u'bcdfghjklmnprstv') - - fakes = self.storage.get('priority_connection', 'fakes') - while ('%s@%s' % (login, self.config['domain'])) in fakes.iterkeys(): - login += '_' - return login - - def generate_password(self): - return '%08x' % random.randint(1, int('ffffffff', 16)) - - def check_godchilds(self): - with self.browser: - try: - my_id = self.browser.get_my_id() - nb_godchilds = self.browser.nb_godchilds() - except AdopteWait: - nb_godchilds = 0 - except BrowserUnavailable: - # We'll check later - return - - missing_godchilds = int(self.config['minimal']) - nb_godchilds - - self.logger.info('Missing godchilds: %s' % missing_godchilds) - - if missing_godchilds <= 0: - return - - for i in xrange(missing_godchilds): - registered = False - while not registered: - name = self.generate_name() - password = self.generate_password() - - browser = AuMBrowser('%s@%s' % (name, self.config['domain']), proxy=self.browser.proxy) - try: - browser.register(password= password, - sex= 1, # slut - birthday_d= random.randint(1, 28), - birthday_m= random.randint(1, 12), - birthday_y= random.randint(1975, 1990), - zipcode= 75001, - country= 'fr', - godfather= my_id) - except AccountRegisterError as e: - self.logger.warning('Unable to register account: %s' % e) - except CaptchaError: - self.logger.warning('Unable to solve captcha... Retrying') - else: - registered = True - - # set nickname - browser.set_nickname(name.strip('_').capitalize()) - # rate my own profile with good score - for i in xrange(4): - browser.rate(my_id, i, 5.0) - - # save fake in storage - fake = {'username': browser.username, - 'password': password} - self.storage.set('priority_connection', 'fakes', name, fake) - self.storage.save() - self.logger.info('Fake account "%s" created (godfather=%s)' % (name, my_id)) - - def activity_fakes(self): - try: - fakes = self.storage.get('priority_connection', 'fakes', default={}) - if len(fakes) == 0: - return - while True: - name = random.choice(fakes.keys()) - fake = fakes[name] - try: - browser = AuMBrowser(fake['username'], fake['password'], proxy=self.browser.proxy) - except (AdopteBanned,BrowserIncorrectPassword) as e: - self.logger.warning('Fake %s can\'t login: %s' % (name, e)) - continue - - profiles = browser.search_profiles(country="fr", - dist='10', - save=True) - - if not profiles: - continue - - id = profiles.pop() - profile = browser.get_profile(id) - # bad rate - for i in xrange(4): - browser.rate(profile.get_id(), i, 0.6) - # deblock - browser.deblock(profile.get_id()) - return - except BrowserUnavailable: - # don't care - pass diff --git a/modules/okc/optim/profiles_walker.py b/modules/okc/optim/profiles_walker.py index f27982cc..21584a22 100644 --- a/modules/okc/optim/profiles_walker.py +++ b/modules/okc/optim/profiles_walker.py @@ -37,7 +37,6 @@ class ProfilesWalker(Optimization): self.browser = browser self.logger = getLogger('walker', browser.logger) - self.walk_cron = None self.view_cron = None self.visited_profiles = set(storage.get('profiles_walker', 'viewed')) self.logger.info(u'Loaded %d already visited profiles from storage.' % len(self.visited_profiles)) @@ -48,36 +47,23 @@ class ProfilesWalker(Optimization): self.storage.save() def start(self): - self.walk_cron = self.sched.repeat(60, self.enqueue_profiles) self.view_cron = self.sched.schedule(randint(5, 10), self.view_profile) return True def stop(self): - self.sched.cancel(self.walk_cron) self.sched.cancel(self.view_cron) - self.walk_cron = None self.view_cron = None return True def is_running(self): - return self.walk_cron is not None - - def enqueue_profiles(self): - try: - with self.browser: - profiles_to_visit = self.browser.search_profiles().difference(self.visited_profiles) - self.logger.info(u'Enqueuing profiles to visit: %s' % profiles_to_visit) - self.profiles_queue = set(profiles_to_visit) - self.save() - except BrowserUnavailable: - return + return self.view_cron is not None def view_profile(self): try: - try: - id = self.profiles_queue.pop() - except KeyError: - return # empty queue + id = self.browser.find_match_profile() + if id in self.visited_profiles: + return + try: with self.browser: # profile = self.browser.get_profile(id) diff --git a/modules/okc/optim/queries_queue.py b/modules/okc/optim/queries_queue.py deleted file mode 100644 index f79ea246..00000000 --- a/modules/okc/optim/queries_queue.py +++ /dev/null @@ -1,107 +0,0 @@ -# -*- coding: utf-8 -*- - -# Copyright(C) 2010-2011 Romain Bignon -# -# This file is part of weboob. -# -# weboob is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# weboob is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with weboob. If not, see . - - - - -from weboob.tools.browser import BrowserUnavailable -from weboob.capabilities.dating import Optimization -from weboob.capabilities.contact import QueryError -from weboob.tools.log import getLogger - - -__all__ = ['QueriesQueue'] - - -class QueriesQueue(Optimization): - def __init__(self, sched, storage, browser): - self.sched = sched - self.storage = storage - self.browser = browser - self.logger = getLogger('queriesqueue', browser.logger) - - self.queue = storage.get('queries_queue', 'queue', default=[]) - - self.check_cron = None - - def save(self): - self.storage.set('queries_queue', 'queue', self.queue) - self.storage.save() - - def start(self): - self.check_cron = self.sched.repeat(3600, self.flush_queue) - return True - - def stop(self): - self.sched.cancel(self.check_cron) - self.check_cron = None - return True - - def is_running(self): - return self.check_cron is not None - - def enqueue_query(self, id, priority=999): - id_queue = [_id[1] for _id in self.queue] - if id in id_queue: - raise QueryError('This id is already queued') - self.queue.append((int(priority), id)) - self.save() - # Try to flush queue to send it now. - self.flush_queue() - - # Check if the enqueued query has been sent - for p, i in self.queue: - if i == id: - return False - return True - - def flush_queue(self): - self.queue.sort() - - priority = 0 - id = None - - try: - try: - while len(self.queue) > 0: - priority, id = self.queue.pop() - - if not id: - continue - - with self.browser: - if self.browser.visit_profile(id): - self.logger.info('Profile of %s visited' % id) - else: - self.queue.append((priority, id)) - self.logger.info("Could not visit profile of %s visited" % id) - break - - # As the charm has been correctly sent (no exception raised), - # we don't store anymore ID, because if nbAvailableCharms() - # fails, we don't want to re-queue this ID. - id = None - priority = 0 - - except BrowserUnavailable: - # We consider this profil hasn't been [correctly] analysed - if not id is None: - self.queue.append((priority, id)) - finally: - self.save() diff --git a/modules/okc/optim/visibility.py b/modules/okc/optim/visibility.py deleted file mode 100644 index e9a52251..00000000 --- a/modules/okc/optim/visibility.py +++ /dev/null @@ -1,52 +0,0 @@ -# -*- coding: utf-8 -*- - -# Copyright(C) 2010-2011 Romain Bignon -# -# This file is part of weboob. -# -# weboob is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# weboob is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with weboob. If not, see . - - -from weboob.tools.browser import BrowserUnavailable -from weboob.capabilities.dating import Optimization - - -__all__ = ['Visibility'] - - -class Visibility(Optimization): - def __init__(self, sched, browser): - self.sched = sched - self.browser = browser - self.cron = None - - def start(self): - self.cron = self.sched.repeat(60*5, self.reconnect) - return True - - def stop(self): - self.sched.cancel(self.cron) - self.cron = None - return True - - def is_running(self): - return self.cron is not None - - def reconnect(self): - try: - with self.browser: - self.browser.login() - except BrowserUnavailable as e: - print str(e) - pass diff --git a/modules/okc/pages.py b/modules/okc/pages.py index 4dcbffa3..e638008d 100644 --- a/modules/okc/pages.py +++ b/modules/okc/pages.py @@ -41,7 +41,7 @@ class ThreadPage(BasePage): _class = elem.get('class', '') if 'clearfix' in _class.split(): threads.append({ - u'username' : unicode(elem.getchildren()[0].get('href').split('/')[-1]), + u'username' : unicode(elem.getchildren()[0].get('href').split('/')[-1].split('?')[0]), u'id' : unicode(elem.get('id', '').split('_')[1]), }) @@ -57,11 +57,13 @@ class MessagesPage(BasePage): 'messages' : [], } - for li_msg in ul_item.getchildren(): + mails['member']['pseudo'] = self.document.xpath('//li[starts-with(@id, "usr_")]')[0].attrib['id'].split('_', 1)[-1] + + for li_msg in reversed(ul_item.getchildren()): div = li_msg.getchildren()[1] txt = self.parser.tostring(div.getchildren()[1]) date = div.getchildren()[2].text - id_from = li_msg.getchildren()[0].get('href').split('/')[-1] + id_from = li_msg.getchildren()[0].get('href').split('/')[-1].split('?')[0] if date is not None: date = unicode(date) @@ -210,7 +212,7 @@ class QuickMatchPage(BasePage): element = self.parser.select(self.document.getroot(), '//*[@id="sn"]', method='xpath')[0] visitor_id = unicode(element.get('value')) return visitor_id - + def get_rating_params(self): # initialization userid = None @@ -255,4 +257,4 @@ class QuickMatchPage(BasePage): # VoteHandler.process('vote', 'personality', stars, tuid, pass.succeed, pass.failure); - # var params = {voterid: CURRENTUSERID,target_userid: tuid,target_objectid: 0,type: vote_or_note,vote_type: vote_type,score: rating} \ No newline at end of file + # var params = {voterid: CURRENTUSERID,target_userid: tuid,target_objectid: 0,type: vote_or_note,vote_type: vote_type,score: rating}