diff --git a/modules/okc/backend.py b/modules/okc/backend.py
index 1216688d..2d7b8ace 100644
--- a/modules/okc/backend.py
+++ b/modules/okc/backend.py
@@ -27,13 +27,15 @@ from dateutil.parser import parse as _parse_dt
from weboob.capabilities.base import NotLoaded
from weboob.capabilities.messages import ICapMessages, ICapMessagesPost, Message, Thread
-#from weboob.capabilities.dating import ICapDating, OptimizationNotFound, Event
+from weboob.capabilities.dating import ICapDating, OptimizationNotFound, Event
from weboob.capabilities.contact import ICapContact, ContactPhoto, Contact
from weboob.tools.backend import BaseBackend, BackendConfig
from weboob.tools.value import Value, ValueBackendPassword
from weboob.tools.misc import local2utc
from .browser import OkCBrowser
+from .optim.visibility import Visibility
+from .optim.queries_queue import QueriesQueue
__all__ = ['OkCBackend']
@@ -64,7 +66,7 @@ def parse_dt(s):
return local2utc(d)
-class OkCBackend(BaseBackend, ICapMessages, ICapContact, ICapMessagesPost):
+class OkCBackend(BaseBackend, ICapMessages, ICapContact, ICapMessagesPost, ICapDating):
NAME = 'okc'
MAINTAINER = u'Roger Philibert'
EMAIL = 'roger.philibert@gmail.com'
@@ -73,7 +75,7 @@ class OkCBackend(BaseBackend, ICapMessages, ICapContact, ICapMessagesPost):
DESCRIPTION = u'OkCupid dating website'
CONFIG = BackendConfig(Value('username', label='Username'),
ValueBackendPassword('password', label='Password'))
- STORAGE = {
+ STORAGE = {'queries_queue': {'queue': []},
'sluts': {},
#'notes': {},
}
@@ -82,6 +84,32 @@ class OkCBackend(BaseBackend, ICapMessages, ICapContact, ICapMessagesPost):
def create_default_browser(self):
return self.create_browser(self.config['username'].get(), self.config['password'].get())
+ # ---- ICapDating methods ---------------------
+ def init_optimizations(self):
+ self.add_optimization('VISIBILITY', Visibility(self.weboob.scheduler, self.browser))
+ self.add_optimization('QUERIES_QUEUE', QueriesQueue(self.weboob.scheduler, self.storage, self.browser))
+
+ def iter_events(self):
+ all_events = {}
+ with self.browser:
+ all_events[u'visits'] = (self.browser.get_visits, 'Visited by %s')
+ for type, (events, message) in all_events.iteritems():
+ for event in events():
+ e = Event(event['who']['id'])
+
+ e.date = parse_dt(event['date'])
+ e.type = type
+ # if 'who' in event:
+ # e.contact = self._get_partial_contact(event['who'])
+ # else:
+ # e.contact = self._get_partial_contact(event)
+
+ # if not e.contact:
+ # continue
+
+ # e.message = message % e.contact.name
+ yield e
+
# ---- ICapMessages methods ---------------------
def fill_thread(self, thread, fields):
diff --git a/modules/okc/browser.py b/modules/okc/browser.py
index 57b13247..177a6deb 100644
--- a/modules/okc/browser.py
+++ b/modules/okc/browser.py
@@ -22,7 +22,7 @@ import urllib
from weboob.tools.browser import BaseBrowser, BasePage
from weboob.tools.ordereddict import OrderedDict
-from .pages import LoginPage, ThreadPage, MessagesPage, PostMessagePage, ProfilePage, PhotosPage
+from .pages import LoginPage, ThreadPage, MessagesPage, PostMessagePage, ProfilePage, PhotosPage, VisitsPage
__all__ = ['OkCBrowser']
@@ -43,6 +43,7 @@ class OkCBrowser(BaseBrowser):
('http://%s/messages\?.*' % DOMAIN, MessagesPage),
('http://%s/profile/.*/photos' % DOMAIN, PhotosPage),
('http://%s/profile/[^/]*' % DOMAIN, ProfilePage),
+ ('http://%s/visitors' % DOMAIN, VisitsPage)
))
logged_in = False
@@ -120,10 +121,10 @@ class OkCBrowser(BaseBrowser):
# r = self.api_request('me', 'flashs')
# return r['result']['all']
- #@check_login
- #def get_visits(self):
- # r = self.api_request('me', 'visits')
- # return r['result']['news'] + r['result']['olds']
+ @check_login
+ def get_visits(self):
+ self.location('http://m.okcupid.com/visitors')
+ return self.page.get_visits()
@check_login
def get_threads_list(self, count=30):
diff --git a/modules/okc/optim/__init__.py b/modules/okc/optim/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/modules/okc/optim/priority_connection.py b/modules/okc/optim/priority_connection.py
new file mode 100644
index 00000000..64b0f711
--- /dev/null
+++ b/modules/okc/optim/priority_connection.py
@@ -0,0 +1,185 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2010-2011 Romain Bignon
+#
+# This file is part of weboob.
+#
+# weboob is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# weboob is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with weboob. If not, see .
+
+
+
+
+import random
+
+from weboob.tools.browser import BrowserUnavailable, BrowserIncorrectPassword
+from weboob.capabilities.dating import Optimization
+from weboob.capabilities.account import AccountRegisterError
+from weboob.tools.log import getLogger
+from weboob.tools.value import Value, ValuesDict, ValueInt
+
+from aum.captcha import CaptchaError
+from aum.exceptions import AdopteWait, AdopteBanned
+from aum.browser import AuMBrowser
+
+
+__all__ = ['PriorityConnection']
+
+
+class PriorityConnection(Optimization):
+ CONFIG = ValuesDict(ValueInt('minimal', label='Minimal of godchilds', default=5),
+ Value('domain', label='Domain to use for fake accounts emails', default='aum.example.com'),
+ ValueInt('interval', label='Interval of checks (seconds)', default=3600)
+ )
+
+ def __init__(self, sched, storage, browser):
+ self.sched = sched
+ self.storage = storage
+ self.browser = browser
+ self.logger = getLogger('priorityconn', browser.logger)
+
+ self.config = storage.get('priority_connection', 'config', default=None)
+ if self.config == {}:
+ self.config = None
+
+ self.check_cron = None
+ self.activity_cron = None
+
+ def start(self):
+ if self.config is None:
+ return False
+
+ self.check_cron = self.sched.repeat(int(self.config['interval']), self.check_godchilds)
+ self.activity_cron = self.sched.repeat(600, self.activity_fakes)
+ return True
+
+ def stop(self):
+ self.sched.cancel(self.check_cron)
+ self.check_cron = None
+ self.sched.cancel(self.activity_cron)
+ self.activity_cron = None
+ return True
+
+ def is_running(self):
+ return self.check_cron is not None
+
+ def set_config(self, params):
+ self.config = params
+ self.storage.set('priority_connection', 'config', self.config)
+ self.storage.save()
+
+ def get_config(self):
+ return self.config
+
+ def generate_name(self):
+ login = u''
+ for x in xrange(8):
+ if x % 2:
+ login += random.choice(u'aeiou')
+ else:
+ login += random.choice(u'bcdfghjklmnprstv')
+
+ fakes = self.storage.get('priority_connection', 'fakes')
+ while ('%s@%s' % (login, self.config['domain'])) in fakes.iterkeys():
+ login += '_'
+ return login
+
+ def generate_password(self):
+ return '%08x' % random.randint(1, int('ffffffff', 16))
+
+ def check_godchilds(self):
+ with self.browser:
+ try:
+ my_id = self.browser.get_my_id()
+ nb_godchilds = self.browser.nb_godchilds()
+ except AdopteWait:
+ nb_godchilds = 0
+ except BrowserUnavailable:
+ # We'll check later
+ return
+
+ missing_godchilds = int(self.config['minimal']) - nb_godchilds
+
+ self.logger.info('Missing godchilds: %s' % missing_godchilds)
+
+ if missing_godchilds <= 0:
+ return
+
+ for i in xrange(missing_godchilds):
+ registered = False
+ while not registered:
+ name = self.generate_name()
+ password = self.generate_password()
+
+ browser = AuMBrowser('%s@%s' % (name, self.config['domain']), proxy=self.browser.proxy)
+ try:
+ browser.register(password= password,
+ sex= 1, # slut
+ birthday_d= random.randint(1, 28),
+ birthday_m= random.randint(1, 12),
+ birthday_y= random.randint(1975, 1990),
+ zipcode= 75001,
+ country= 'fr',
+ godfather= my_id)
+ except AccountRegisterError as e:
+ self.logger.warning('Unable to register account: %s' % e)
+ except CaptchaError:
+ self.logger.warning('Unable to solve captcha... Retrying')
+ else:
+ registered = True
+
+ # set nickname
+ browser.set_nickname(name.strip('_').capitalize())
+ # rate my own profile with good score
+ for i in xrange(4):
+ browser.rate(my_id, i, 5.0)
+
+ # save fake in storage
+ fake = {'username': browser.username,
+ 'password': password}
+ self.storage.set('priority_connection', 'fakes', name, fake)
+ self.storage.save()
+ self.logger.info('Fake account "%s" created (godfather=%s)' % (name, my_id))
+
+ def activity_fakes(self):
+ try:
+ fakes = self.storage.get('priority_connection', 'fakes', default={})
+ if len(fakes) == 0:
+ return
+ while True:
+ name = random.choice(fakes.keys())
+ fake = fakes[name]
+ try:
+ browser = AuMBrowser(fake['username'], fake['password'], proxy=self.browser.proxy)
+ except (AdopteBanned,BrowserIncorrectPassword) as e:
+ self.logger.warning('Fake %s can\'t login: %s' % (name, e))
+ continue
+
+ profiles = browser.search_profiles(country="fr",
+ dist='10',
+ save=True)
+
+ if not profiles:
+ continue
+
+ id = profiles.pop()
+ profile = browser.get_profile(id)
+ # bad rate
+ for i in xrange(4):
+ browser.rate(profile.get_id(), i, 0.6)
+ # deblock
+ browser.deblock(profile.get_id())
+ return
+ except BrowserUnavailable:
+ # don't care
+ pass
diff --git a/modules/okc/optim/profiles_walker.py b/modules/okc/optim/profiles_walker.py
new file mode 100644
index 00000000..88acfb79
--- /dev/null
+++ b/modules/okc/optim/profiles_walker.py
@@ -0,0 +1,104 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2010-2011 Romain Bignon, Christophe Benz
+#
+# This file is part of weboob.
+#
+# weboob is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# weboob is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with weboob. If not, see .
+
+
+
+
+from random import randint
+
+from weboob.tools.browser import BrowserUnavailable
+from weboob.capabilities.dating import Optimization
+from weboob.tools.log import getLogger
+
+
+__all__ = ['ProfilesWalker']
+
+
+class ProfilesWalker(Optimization):
+ def __init__(self, sched, storage, browser):
+ self.sched = sched
+ self.storage = storage
+ self.browser = browser
+ self.logger = getLogger('walker', browser.logger)
+
+ self.walk_cron = None
+ self.view_cron = None
+ self.visited_profiles = set(storage.get('profiles_walker', 'viewed'))
+ self.logger.info(u'Loaded %d already visited profiles from storage.' % len(self.visited_profiles))
+ self.profiles_queue = set()
+
+ def save(self):
+ self.storage.set('profiles_walker', 'viewed', list(self.visited_profiles))
+ self.storage.save()
+
+ def start(self):
+ self.walk_cron = self.sched.repeat(60, self.enqueue_profiles)
+ self.view_cron = self.sched.schedule(randint(5, 10), self.view_profile)
+ return True
+
+ def stop(self):
+ self.sched.cancel(self.walk_cron)
+ self.sched.cancel(self.view_cron)
+ self.walk_cron = None
+ self.view_cron = None
+ return True
+
+ def is_running(self):
+ return self.walk_cron is not None
+
+ def enqueue_profiles(self):
+ try:
+ with self.browser:
+ profiles_to_visit = self.browser.search_profiles().difference(self.visited_profiles)
+ self.logger.info(u'Enqueuing profiles to visit: %s' % profiles_to_visit)
+ self.profiles_queue = set(profiles_to_visit)
+ self.save()
+ except BrowserUnavailable:
+ return
+
+ def view_profile(self):
+ try:
+ try:
+ id = self.profiles_queue.pop()
+ except KeyError:
+ return # empty queue
+
+ try:
+ with self.browser:
+ profile = self.browser.get_profile(id)
+ self.logger.info(u'Visited profile %s (%s)' % (profile['pseudo'], id))
+
+ # Get score from the aum_score module
+ #d = self.nucentral_core.callService(context.Context.fromComponent(self), 'aum_score', 'score', profile)
+ # d.addCallback(self.score_cb, profile.getID())
+ # deferredlist.append(d)
+
+ # do not forget that we visited this profile, to avoid re-visiting it.
+ self.visited_profiles.add(id)
+ self.save()
+
+ except BrowserUnavailable:
+ # We consider this profil hasn't been [correctly] analysed
+ self.profiles_queue.add(id)
+ return
+ except Exception as e:
+ print e
+ finally:
+ if self.view_cron is not None:
+ self.view_cron = self.sched.schedule(randint(5, 10), self.view_profile)
diff --git a/modules/okc/optim/queries_queue.py b/modules/okc/optim/queries_queue.py
new file mode 100644
index 00000000..ece864f2
--- /dev/null
+++ b/modules/okc/optim/queries_queue.py
@@ -0,0 +1,107 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2010-2011 Romain Bignon
+#
+# This file is part of weboob.
+#
+# weboob is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# weboob is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with weboob. If not, see .
+
+
+
+
+from weboob.tools.browser import BrowserUnavailable
+from weboob.capabilities.dating import Optimization
+from weboob.capabilities.contact import QueryError
+from weboob.tools.log import getLogger
+
+
+__all__ = ['QueriesQueue']
+
+
+class QueriesQueue(Optimization):
+ def __init__(self, sched, storage, browser):
+ self.sched = sched
+ self.storage = storage
+ self.browser = browser
+ self.logger = getLogger('queriesqueue', browser.logger)
+
+ self.queue = storage.get('queries_queue', 'queue', default=[])
+
+ self.check_cron = None
+
+ def save(self):
+ self.storage.set('queries_queue', 'queue', self.queue)
+ self.storage.save()
+
+ def start(self):
+ self.check_cron = self.sched.repeat(3600, self.flush_queue)
+ return True
+
+ def stop(self):
+ self.sched.cancel(self.check_cron)
+ self.check_cron = None
+ return True
+
+ def is_running(self):
+ return self.check_cron is not None
+
+ def enqueue_query(self, id, priority=999):
+ id_queue = [_id[1] for _id in self.queue]
+ if int(id) in id_queue:
+ raise QueryError('This id is already queued')
+ self.queue.append((int(priority), int(id)))
+ self.save()
+ # Try to flush queue to send it now.
+ self.flush_queue()
+
+ # Check if the enqueued query has been sent
+ for p, i in self.queue:
+ if i == int(id):
+ return False
+ return True
+
+ def flush_queue(self):
+ self.queue.sort()
+
+ priority = 0
+ id = None
+
+ try:
+ try:
+ while len(self.queue) > 0:
+ priority, id = self.queue.pop()
+
+ if not id:
+ continue
+
+ with self.browser:
+ if self.browser.send_charm(id):
+ self.logger.info('Charm sent to %s' % id)
+ else:
+ self.queue.append((priority, id))
+ self.logger.info("Charm can't be send to %s" % id)
+ break
+
+ # As the charm has been correctly sent (no exception raised),
+ # we don't store anymore ID, because if nbAvailableCharms()
+ # fails, we don't want to re-queue this ID.
+ id = None
+ priority = 0
+
+ except BrowserUnavailable:
+ # We consider this profil hasn't been [correctly] analysed
+ if not id is None:
+ self.queue.append((priority, id))
+ finally:
+ self.save()
diff --git a/modules/okc/optim/visibility.py b/modules/okc/optim/visibility.py
new file mode 100644
index 00000000..e9a52251
--- /dev/null
+++ b/modules/okc/optim/visibility.py
@@ -0,0 +1,52 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2010-2011 Romain Bignon
+#
+# This file is part of weboob.
+#
+# weboob is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# weboob is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with weboob. If not, see .
+
+
+from weboob.tools.browser import BrowserUnavailable
+from weboob.capabilities.dating import Optimization
+
+
+__all__ = ['Visibility']
+
+
+class Visibility(Optimization):
+ def __init__(self, sched, browser):
+ self.sched = sched
+ self.browser = browser
+ self.cron = None
+
+ def start(self):
+ self.cron = self.sched.repeat(60*5, self.reconnect)
+ return True
+
+ def stop(self):
+ self.sched.cancel(self.cron)
+ self.cron = None
+ return True
+
+ def is_running(self):
+ return self.cron is not None
+
+ def reconnect(self):
+ try:
+ with self.browser:
+ self.browser.login()
+ except BrowserUnavailable as e:
+ print str(e)
+ pass
diff --git a/modules/okc/pages.py b/modules/okc/pages.py
index 15104e48..aa24fb59 100644
--- a/modules/okc/pages.py
+++ b/modules/okc/pages.py
@@ -174,3 +174,19 @@ class PostMessagePage(BasePage):
self.browser['r1'] = id
self.browser['body'] = content
self.browser.submit()
+
+class VisitsPage(BasePage):
+ def get_visits(self):
+ ul_item = self.parser.select(self.document.getroot(), '//*[@id="page_content"]/ul[3]', method='xpath')[0]
+ visitors = []
+ for li in ul_item:
+ visitor_id = unicode(li.get('id')[4:])
+ visitor_timestamp = unicode(self.parser.select(li, './/div/span', method='xpath')[0].text.strip())
+ visitors.append({
+ 'who': {
+ 'id': visitor_id
+ },
+ 'date': visitor_timestamp
+ })
+ return visitors
+
\ No newline at end of file