Starting work on okc module : adding optimization
This commit is contained in:
parent
0349e85360
commit
296d6b7c9f
8 changed files with 501 additions and 8 deletions
|
|
@ -27,13 +27,15 @@ from dateutil.parser import parse as _parse_dt
|
|||
|
||||
from weboob.capabilities.base import NotLoaded
|
||||
from weboob.capabilities.messages import ICapMessages, ICapMessagesPost, Message, Thread
|
||||
#from weboob.capabilities.dating import ICapDating, OptimizationNotFound, Event
|
||||
from weboob.capabilities.dating import ICapDating, OptimizationNotFound, Event
|
||||
from weboob.capabilities.contact import ICapContact, ContactPhoto, Contact
|
||||
from weboob.tools.backend import BaseBackend, BackendConfig
|
||||
from weboob.tools.value import Value, ValueBackendPassword
|
||||
from weboob.tools.misc import local2utc
|
||||
|
||||
from .browser import OkCBrowser
|
||||
from .optim.visibility import Visibility
|
||||
from .optim.queries_queue import QueriesQueue
|
||||
|
||||
|
||||
__all__ = ['OkCBackend']
|
||||
|
|
@ -64,7 +66,7 @@ def parse_dt(s):
|
|||
return local2utc(d)
|
||||
|
||||
|
||||
class OkCBackend(BaseBackend, ICapMessages, ICapContact, ICapMessagesPost):
|
||||
class OkCBackend(BaseBackend, ICapMessages, ICapContact, ICapMessagesPost, ICapDating):
|
||||
NAME = 'okc'
|
||||
MAINTAINER = u'Roger Philibert'
|
||||
EMAIL = 'roger.philibert@gmail.com'
|
||||
|
|
@ -73,7 +75,7 @@ class OkCBackend(BaseBackend, ICapMessages, ICapContact, ICapMessagesPost):
|
|||
DESCRIPTION = u'OkCupid dating website'
|
||||
CONFIG = BackendConfig(Value('username', label='Username'),
|
||||
ValueBackendPassword('password', label='Password'))
|
||||
STORAGE = {
|
||||
STORAGE = {'queries_queue': {'queue': []},
|
||||
'sluts': {},
|
||||
#'notes': {},
|
||||
}
|
||||
|
|
@ -82,6 +84,32 @@ class OkCBackend(BaseBackend, ICapMessages, ICapContact, ICapMessagesPost):
|
|||
def create_default_browser(self):
|
||||
return self.create_browser(self.config['username'].get(), self.config['password'].get())
|
||||
|
||||
# ---- ICapDating methods ---------------------
|
||||
def init_optimizations(self):
|
||||
self.add_optimization('VISIBILITY', Visibility(self.weboob.scheduler, self.browser))
|
||||
self.add_optimization('QUERIES_QUEUE', QueriesQueue(self.weboob.scheduler, self.storage, self.browser))
|
||||
|
||||
def iter_events(self):
|
||||
all_events = {}
|
||||
with self.browser:
|
||||
all_events[u'visits'] = (self.browser.get_visits, 'Visited by %s')
|
||||
for type, (events, message) in all_events.iteritems():
|
||||
for event in events():
|
||||
e = Event(event['who']['id'])
|
||||
|
||||
e.date = parse_dt(event['date'])
|
||||
e.type = type
|
||||
# if 'who' in event:
|
||||
# e.contact = self._get_partial_contact(event['who'])
|
||||
# else:
|
||||
# e.contact = self._get_partial_contact(event)
|
||||
|
||||
# if not e.contact:
|
||||
# continue
|
||||
|
||||
# e.message = message % e.contact.name
|
||||
yield e
|
||||
|
||||
# ---- ICapMessages methods ---------------------
|
||||
|
||||
def fill_thread(self, thread, fields):
|
||||
|
|
|
|||
|
|
@ -22,7 +22,7 @@ import urllib
|
|||
from weboob.tools.browser import BaseBrowser, BasePage
|
||||
from weboob.tools.ordereddict import OrderedDict
|
||||
|
||||
from .pages import LoginPage, ThreadPage, MessagesPage, PostMessagePage, ProfilePage, PhotosPage
|
||||
from .pages import LoginPage, ThreadPage, MessagesPage, PostMessagePage, ProfilePage, PhotosPage, VisitsPage
|
||||
|
||||
__all__ = ['OkCBrowser']
|
||||
|
||||
|
|
@ -43,6 +43,7 @@ class OkCBrowser(BaseBrowser):
|
|||
('http://%s/messages\?.*' % DOMAIN, MessagesPage),
|
||||
('http://%s/profile/.*/photos' % DOMAIN, PhotosPage),
|
||||
('http://%s/profile/[^/]*' % DOMAIN, ProfilePage),
|
||||
('http://%s/visitors' % DOMAIN, VisitsPage)
|
||||
))
|
||||
|
||||
logged_in = False
|
||||
|
|
@ -120,10 +121,10 @@ class OkCBrowser(BaseBrowser):
|
|||
# r = self.api_request('me', 'flashs')
|
||||
# return r['result']['all']
|
||||
|
||||
#@check_login
|
||||
#def get_visits(self):
|
||||
# r = self.api_request('me', 'visits')
|
||||
# return r['result']['news'] + r['result']['olds']
|
||||
@check_login
|
||||
def get_visits(self):
|
||||
self.location('http://m.okcupid.com/visitors')
|
||||
return self.page.get_visits()
|
||||
|
||||
@check_login
|
||||
def get_threads_list(self, count=30):
|
||||
|
|
|
|||
0
modules/okc/optim/__init__.py
Normal file
0
modules/okc/optim/__init__.py
Normal file
185
modules/okc/optim/priority_connection.py
Normal file
185
modules/okc/optim/priority_connection.py
Normal file
|
|
@ -0,0 +1,185 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright(C) 2010-2011 Romain Bignon
|
||||
#
|
||||
# This file is part of weboob.
|
||||
#
|
||||
# weboob is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# weboob is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Affero General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
|
||||
|
||||
|
||||
import random
|
||||
|
||||
from weboob.tools.browser import BrowserUnavailable, BrowserIncorrectPassword
|
||||
from weboob.capabilities.dating import Optimization
|
||||
from weboob.capabilities.account import AccountRegisterError
|
||||
from weboob.tools.log import getLogger
|
||||
from weboob.tools.value import Value, ValuesDict, ValueInt
|
||||
|
||||
from aum.captcha import CaptchaError
|
||||
from aum.exceptions import AdopteWait, AdopteBanned
|
||||
from aum.browser import AuMBrowser
|
||||
|
||||
|
||||
__all__ = ['PriorityConnection']
|
||||
|
||||
|
||||
class PriorityConnection(Optimization):
|
||||
CONFIG = ValuesDict(ValueInt('minimal', label='Minimal of godchilds', default=5),
|
||||
Value('domain', label='Domain to use for fake accounts emails', default='aum.example.com'),
|
||||
ValueInt('interval', label='Interval of checks (seconds)', default=3600)
|
||||
)
|
||||
|
||||
def __init__(self, sched, storage, browser):
|
||||
self.sched = sched
|
||||
self.storage = storage
|
||||
self.browser = browser
|
||||
self.logger = getLogger('priorityconn', browser.logger)
|
||||
|
||||
self.config = storage.get('priority_connection', 'config', default=None)
|
||||
if self.config == {}:
|
||||
self.config = None
|
||||
|
||||
self.check_cron = None
|
||||
self.activity_cron = None
|
||||
|
||||
def start(self):
|
||||
if self.config is None:
|
||||
return False
|
||||
|
||||
self.check_cron = self.sched.repeat(int(self.config['interval']), self.check_godchilds)
|
||||
self.activity_cron = self.sched.repeat(600, self.activity_fakes)
|
||||
return True
|
||||
|
||||
def stop(self):
|
||||
self.sched.cancel(self.check_cron)
|
||||
self.check_cron = None
|
||||
self.sched.cancel(self.activity_cron)
|
||||
self.activity_cron = None
|
||||
return True
|
||||
|
||||
def is_running(self):
|
||||
return self.check_cron is not None
|
||||
|
||||
def set_config(self, params):
|
||||
self.config = params
|
||||
self.storage.set('priority_connection', 'config', self.config)
|
||||
self.storage.save()
|
||||
|
||||
def get_config(self):
|
||||
return self.config
|
||||
|
||||
def generate_name(self):
|
||||
login = u''
|
||||
for x in xrange(8):
|
||||
if x % 2:
|
||||
login += random.choice(u'aeiou')
|
||||
else:
|
||||
login += random.choice(u'bcdfghjklmnprstv')
|
||||
|
||||
fakes = self.storage.get('priority_connection', 'fakes')
|
||||
while ('%s@%s' % (login, self.config['domain'])) in fakes.iterkeys():
|
||||
login += '_'
|
||||
return login
|
||||
|
||||
def generate_password(self):
|
||||
return '%08x' % random.randint(1, int('ffffffff', 16))
|
||||
|
||||
def check_godchilds(self):
|
||||
with self.browser:
|
||||
try:
|
||||
my_id = self.browser.get_my_id()
|
||||
nb_godchilds = self.browser.nb_godchilds()
|
||||
except AdopteWait:
|
||||
nb_godchilds = 0
|
||||
except BrowserUnavailable:
|
||||
# We'll check later
|
||||
return
|
||||
|
||||
missing_godchilds = int(self.config['minimal']) - nb_godchilds
|
||||
|
||||
self.logger.info('Missing godchilds: %s' % missing_godchilds)
|
||||
|
||||
if missing_godchilds <= 0:
|
||||
return
|
||||
|
||||
for i in xrange(missing_godchilds):
|
||||
registered = False
|
||||
while not registered:
|
||||
name = self.generate_name()
|
||||
password = self.generate_password()
|
||||
|
||||
browser = AuMBrowser('%s@%s' % (name, self.config['domain']), proxy=self.browser.proxy)
|
||||
try:
|
||||
browser.register(password= password,
|
||||
sex= 1, # slut
|
||||
birthday_d= random.randint(1, 28),
|
||||
birthday_m= random.randint(1, 12),
|
||||
birthday_y= random.randint(1975, 1990),
|
||||
zipcode= 75001,
|
||||
country= 'fr',
|
||||
godfather= my_id)
|
||||
except AccountRegisterError as e:
|
||||
self.logger.warning('Unable to register account: %s' % e)
|
||||
except CaptchaError:
|
||||
self.logger.warning('Unable to solve captcha... Retrying')
|
||||
else:
|
||||
registered = True
|
||||
|
||||
# set nickname
|
||||
browser.set_nickname(name.strip('_').capitalize())
|
||||
# rate my own profile with good score
|
||||
for i in xrange(4):
|
||||
browser.rate(my_id, i, 5.0)
|
||||
|
||||
# save fake in storage
|
||||
fake = {'username': browser.username,
|
||||
'password': password}
|
||||
self.storage.set('priority_connection', 'fakes', name, fake)
|
||||
self.storage.save()
|
||||
self.logger.info('Fake account "%s" created (godfather=%s)' % (name, my_id))
|
||||
|
||||
def activity_fakes(self):
|
||||
try:
|
||||
fakes = self.storage.get('priority_connection', 'fakes', default={})
|
||||
if len(fakes) == 0:
|
||||
return
|
||||
while True:
|
||||
name = random.choice(fakes.keys())
|
||||
fake = fakes[name]
|
||||
try:
|
||||
browser = AuMBrowser(fake['username'], fake['password'], proxy=self.browser.proxy)
|
||||
except (AdopteBanned,BrowserIncorrectPassword) as e:
|
||||
self.logger.warning('Fake %s can\'t login: %s' % (name, e))
|
||||
continue
|
||||
|
||||
profiles = browser.search_profiles(country="fr",
|
||||
dist='10',
|
||||
save=True)
|
||||
|
||||
if not profiles:
|
||||
continue
|
||||
|
||||
id = profiles.pop()
|
||||
profile = browser.get_profile(id)
|
||||
# bad rate
|
||||
for i in xrange(4):
|
||||
browser.rate(profile.get_id(), i, 0.6)
|
||||
# deblock
|
||||
browser.deblock(profile.get_id())
|
||||
return
|
||||
except BrowserUnavailable:
|
||||
# don't care
|
||||
pass
|
||||
104
modules/okc/optim/profiles_walker.py
Normal file
104
modules/okc/optim/profiles_walker.py
Normal file
|
|
@ -0,0 +1,104 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright(C) 2010-2011 Romain Bignon, Christophe Benz
|
||||
#
|
||||
# This file is part of weboob.
|
||||
#
|
||||
# weboob is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# weboob is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Affero General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
|
||||
|
||||
|
||||
from random import randint
|
||||
|
||||
from weboob.tools.browser import BrowserUnavailable
|
||||
from weboob.capabilities.dating import Optimization
|
||||
from weboob.tools.log import getLogger
|
||||
|
||||
|
||||
__all__ = ['ProfilesWalker']
|
||||
|
||||
|
||||
class ProfilesWalker(Optimization):
|
||||
def __init__(self, sched, storage, browser):
|
||||
self.sched = sched
|
||||
self.storage = storage
|
||||
self.browser = browser
|
||||
self.logger = getLogger('walker', browser.logger)
|
||||
|
||||
self.walk_cron = None
|
||||
self.view_cron = None
|
||||
self.visited_profiles = set(storage.get('profiles_walker', 'viewed'))
|
||||
self.logger.info(u'Loaded %d already visited profiles from storage.' % len(self.visited_profiles))
|
||||
self.profiles_queue = set()
|
||||
|
||||
def save(self):
|
||||
self.storage.set('profiles_walker', 'viewed', list(self.visited_profiles))
|
||||
self.storage.save()
|
||||
|
||||
def start(self):
|
||||
self.walk_cron = self.sched.repeat(60, self.enqueue_profiles)
|
||||
self.view_cron = self.sched.schedule(randint(5, 10), self.view_profile)
|
||||
return True
|
||||
|
||||
def stop(self):
|
||||
self.sched.cancel(self.walk_cron)
|
||||
self.sched.cancel(self.view_cron)
|
||||
self.walk_cron = None
|
||||
self.view_cron = None
|
||||
return True
|
||||
|
||||
def is_running(self):
|
||||
return self.walk_cron is not None
|
||||
|
||||
def enqueue_profiles(self):
|
||||
try:
|
||||
with self.browser:
|
||||
profiles_to_visit = self.browser.search_profiles().difference(self.visited_profiles)
|
||||
self.logger.info(u'Enqueuing profiles to visit: %s' % profiles_to_visit)
|
||||
self.profiles_queue = set(profiles_to_visit)
|
||||
self.save()
|
||||
except BrowserUnavailable:
|
||||
return
|
||||
|
||||
def view_profile(self):
|
||||
try:
|
||||
try:
|
||||
id = self.profiles_queue.pop()
|
||||
except KeyError:
|
||||
return # empty queue
|
||||
|
||||
try:
|
||||
with self.browser:
|
||||
profile = self.browser.get_profile(id)
|
||||
self.logger.info(u'Visited profile %s (%s)' % (profile['pseudo'], id))
|
||||
|
||||
# Get score from the aum_score module
|
||||
#d = self.nucentral_core.callService(context.Context.fromComponent(self), 'aum_score', 'score', profile)
|
||||
# d.addCallback(self.score_cb, profile.getID())
|
||||
# deferredlist.append(d)
|
||||
|
||||
# do not forget that we visited this profile, to avoid re-visiting it.
|
||||
self.visited_profiles.add(id)
|
||||
self.save()
|
||||
|
||||
except BrowserUnavailable:
|
||||
# We consider this profil hasn't been [correctly] analysed
|
||||
self.profiles_queue.add(id)
|
||||
return
|
||||
except Exception as e:
|
||||
print e
|
||||
finally:
|
||||
if self.view_cron is not None:
|
||||
self.view_cron = self.sched.schedule(randint(5, 10), self.view_profile)
|
||||
107
modules/okc/optim/queries_queue.py
Normal file
107
modules/okc/optim/queries_queue.py
Normal file
|
|
@ -0,0 +1,107 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright(C) 2010-2011 Romain Bignon
|
||||
#
|
||||
# This file is part of weboob.
|
||||
#
|
||||
# weboob is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# weboob is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Affero General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
|
||||
|
||||
|
||||
from weboob.tools.browser import BrowserUnavailable
|
||||
from weboob.capabilities.dating import Optimization
|
||||
from weboob.capabilities.contact import QueryError
|
||||
from weboob.tools.log import getLogger
|
||||
|
||||
|
||||
__all__ = ['QueriesQueue']
|
||||
|
||||
|
||||
class QueriesQueue(Optimization):
|
||||
def __init__(self, sched, storage, browser):
|
||||
self.sched = sched
|
||||
self.storage = storage
|
||||
self.browser = browser
|
||||
self.logger = getLogger('queriesqueue', browser.logger)
|
||||
|
||||
self.queue = storage.get('queries_queue', 'queue', default=[])
|
||||
|
||||
self.check_cron = None
|
||||
|
||||
def save(self):
|
||||
self.storage.set('queries_queue', 'queue', self.queue)
|
||||
self.storage.save()
|
||||
|
||||
def start(self):
|
||||
self.check_cron = self.sched.repeat(3600, self.flush_queue)
|
||||
return True
|
||||
|
||||
def stop(self):
|
||||
self.sched.cancel(self.check_cron)
|
||||
self.check_cron = None
|
||||
return True
|
||||
|
||||
def is_running(self):
|
||||
return self.check_cron is not None
|
||||
|
||||
def enqueue_query(self, id, priority=999):
|
||||
id_queue = [_id[1] for _id in self.queue]
|
||||
if int(id) in id_queue:
|
||||
raise QueryError('This id is already queued')
|
||||
self.queue.append((int(priority), int(id)))
|
||||
self.save()
|
||||
# Try to flush queue to send it now.
|
||||
self.flush_queue()
|
||||
|
||||
# Check if the enqueued query has been sent
|
||||
for p, i in self.queue:
|
||||
if i == int(id):
|
||||
return False
|
||||
return True
|
||||
|
||||
def flush_queue(self):
|
||||
self.queue.sort()
|
||||
|
||||
priority = 0
|
||||
id = None
|
||||
|
||||
try:
|
||||
try:
|
||||
while len(self.queue) > 0:
|
||||
priority, id = self.queue.pop()
|
||||
|
||||
if not id:
|
||||
continue
|
||||
|
||||
with self.browser:
|
||||
if self.browser.send_charm(id):
|
||||
self.logger.info('Charm sent to %s' % id)
|
||||
else:
|
||||
self.queue.append((priority, id))
|
||||
self.logger.info("Charm can't be send to %s" % id)
|
||||
break
|
||||
|
||||
# As the charm has been correctly sent (no exception raised),
|
||||
# we don't store anymore ID, because if nbAvailableCharms()
|
||||
# fails, we don't want to re-queue this ID.
|
||||
id = None
|
||||
priority = 0
|
||||
|
||||
except BrowserUnavailable:
|
||||
# We consider this profil hasn't been [correctly] analysed
|
||||
if not id is None:
|
||||
self.queue.append((priority, id))
|
||||
finally:
|
||||
self.save()
|
||||
52
modules/okc/optim/visibility.py
Normal file
52
modules/okc/optim/visibility.py
Normal file
|
|
@ -0,0 +1,52 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright(C) 2010-2011 Romain Bignon
|
||||
#
|
||||
# This file is part of weboob.
|
||||
#
|
||||
# weboob is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# weboob is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Affero General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
|
||||
from weboob.tools.browser import BrowserUnavailable
|
||||
from weboob.capabilities.dating import Optimization
|
||||
|
||||
|
||||
__all__ = ['Visibility']
|
||||
|
||||
|
||||
class Visibility(Optimization):
|
||||
def __init__(self, sched, browser):
|
||||
self.sched = sched
|
||||
self.browser = browser
|
||||
self.cron = None
|
||||
|
||||
def start(self):
|
||||
self.cron = self.sched.repeat(60*5, self.reconnect)
|
||||
return True
|
||||
|
||||
def stop(self):
|
||||
self.sched.cancel(self.cron)
|
||||
self.cron = None
|
||||
return True
|
||||
|
||||
def is_running(self):
|
||||
return self.cron is not None
|
||||
|
||||
def reconnect(self):
|
||||
try:
|
||||
with self.browser:
|
||||
self.browser.login()
|
||||
except BrowserUnavailable as e:
|
||||
print str(e)
|
||||
pass
|
||||
|
|
@ -174,3 +174,19 @@ class PostMessagePage(BasePage):
|
|||
self.browser['r1'] = id
|
||||
self.browser['body'] = content
|
||||
self.browser.submit()
|
||||
|
||||
class VisitsPage(BasePage):
|
||||
def get_visits(self):
|
||||
ul_item = self.parser.select(self.document.getroot(), '//*[@id="page_content"]/ul[3]', method='xpath')[0]
|
||||
visitors = []
|
||||
for li in ul_item:
|
||||
visitor_id = unicode(li.get('id')[4:])
|
||||
visitor_timestamp = unicode(self.parser.select(li, './/div/span', method='xpath')[0].text.strip())
|
||||
visitors.append({
|
||||
'who': {
|
||||
'id': visitor_id
|
||||
},
|
||||
'date': visitor_timestamp
|
||||
})
|
||||
return visitors
|
||||
|
||||
Loading…
Add table
Add a link
Reference in a new issue