diff --git a/modules/okc/__init__.py b/modules/okc/__init__.py new file mode 100644 index 00000000..0ac88f3c --- /dev/null +++ b/modules/okc/__init__.py @@ -0,0 +1,24 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2010-2011 Romain Bignon +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +from .browser import OkCBrowser +from .backend import OkCBackend + +__all__ = ['OkCBrowser', 'OkCBackend'] diff --git a/modules/okc/backend.py b/modules/okc/backend.py new file mode 100644 index 00000000..6f18fde6 --- /dev/null +++ b/modules/okc/backend.py @@ -0,0 +1,355 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2012 Roger Philibert +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + +from __future__ import with_statement + +import email +import time +import re +import datetime +from html2text import unescape +from dateutil import tz +from dateutil.parser import parse as _parse_dt + +from weboob.capabilities.base import NotLoaded +from weboob.capabilities.messages import ICapMessages, ICapMessagesPost, Message, Thread +#from weboob.capabilities.dating import ICapDating, OptimizationNotFound, Event +#from weboob.capabilities.contact import ICapContact, ContactPhoto, Query, QueryError +from weboob.tools.backend import BaseBackend, BackendConfig +from weboob.tools.browser import BrowserUnavailable +from weboob.tools.value import Value, ValuesDict, ValueBool, ValueBackendPassword +from weboob.tools.log import getLogger +from weboob.tools.misc import local2utc + +#from .contact import Contact +from .browser import OkCBrowser + + +__all__ = ['OkCBackend'] + + +def parse_dt(s): + now = datetime.datetime.now() + if u'–' in s: + # Date in form : "Yesterday – 20:45" + day, hour = s.split(u'–') + day = day.strip() + hour = hour.strip() + if day == 'Yesterday': + d = now - datetime.timedelta(days=1) + elif day == 'Today': + d = now + hour = _parse_dt(hour) + d = datetime.datetime(d.year, d.month, d.day, hour.hour, hour.minute) + else: + #if ',' in s: + # Date in form : "Dec 28, 2011") + d = _parse_dt(s) + return local2utc(d) + +class OkCBackend(BaseBackend, ICapMessages): + #, ICapMessagesPost, ICapContact): + NAME = 'okc' + MAINTAINER = 'Roger Philibert' + EMAIL = 'roger.philibert@gmail.com' + VERSION = '0.c' + LICENSE = 'AGPLv3+' + DESCRIPTION = u'OkCupid dating website' + CONFIG = BackendConfig(Value('username', label='Username'), + ValueBackendPassword('password', label='Password')) + STORAGE = { + 'sluts': {}, + #'notes': {}, + } + BROWSER = OkCBrowser + + def create_default_browser(self): + return self.create_browser(self.config['username'].get(), self.config['password'].get()) + + # ---- ICapMessages methods --------------------- + + def fill_thread(self, thread, fields): + return self.get_thread(thread) + + def iter_threads(self): + with self.browser: + threads = self.browser.get_threads_list() + + for thread in threads: + # Remove messages from user that quit + #if thread['member'].get('isBan', thread['member'].get('dead', False)): + # with self.browser: + # self.browser.delete_thread(thread['member']['id']) + # continue + t = Thread(int(thread['id'])) + t.flags = Thread.IS_DISCUSSION + t.title = 'Discussion with %s' % thread['username'] + yield t + + def get_thread(self, id, contacts=None, get_profiles=False): + """ + Get a thread and its messages. + + The 'contacts' parameters is only used for internal calls. + """ + thread = None + if isinstance(id, Thread): + thread = id + id = thread.id + + if not thread: + thread = Thread(int(id)) + thread.flags = Thread.IS_DISCUSSION + full = False + else: + full = True + + with self.browser: + mails = self.browser.get_thread_mails(id, 100) + my_name = self.browser.get_my_name() + + child = None + msg = None + slut = self._get_slut(mails['member']['pseudo']) + if contacts is None: + contacts = {} + + if not thread.title: + thread.title = u'Discussion with %s' % mails['member']['pseudo'] + + #self.storage.set('sluts', thread.id, 'status', mails['status']) + #self.storage.save() + + for mail in mails['messages']: + flags = Message.IS_HTML + if parse_dt(mail['date']) > slut['lastmsg'] and mail['id_from'] != self.browser.get_my_name(): + flags |= Message.IS_UNREAD + + if get_profiles: + if not mail['id_from'] in contacts: + with self.browser: + contacts[mail['id_from']] = self.get_contact(mail['id_from']) + + signature = u'' + if mail.get('src', None): + signature += u'Sent from my %s\n\n' % mail['src'] + if mail['id_from'] in contacts: + signature += contacts[mail['id_from']].get_text() + + msg = Message(thread=thread, + id=int(time.strftime('%Y%m%d%H%M%S', parse_dt(mail['date']).timetuple())), + title=thread.title, + sender=mail['id_from'], + receivers=[my_name if mail['id_from'] != my_name else mails['member']['pseudo']], + date=parse_dt(mail['date']), + content=unescape(mail['message']).strip(), + signature=signature, + children=[], + flags=flags) + if child: + msg.children.append(child) + child.parent = msg + + child = msg + + if full and msg: + # If we have get all the messages, replace NotLoaded with None as + # parent. + msg.parent = None + if not full and not msg: + # Perhaps there are hidden messages + msg = NotLoaded + + thread.root = msg + + return thread + + #def iter_unread_messages(self, thread=None): + # try: + # contacts = {} + # with self.browser: + # threads = self.browser.get_threads_list() + # for thread in threads: + # if thread['member'].get('isBan', thread['member'].get('dead', False)): + # with self.browser: + # self.browser.delete_thread(int(thread['member']['id'])) + # continue + # if self.antispam and not self.antispam.check_thread(thread): + # self.logger.info('Skipped a spam-unread-thread from %s' % thread['member']['pseudo']) + # self.report_spam(thread['member']['pseudo']) + # continue + # slut = self._get_slut(thread['member']['pseudo']) + # if parse_dt(thread['date']) > slut['lastmsg']: + # t = self.get_thread(thread['member']['pseudo'], contacts, get_profiles=True) + # for m in t.iter_all_messages(): + # if m.flags & m.IS_UNREAD: + # yield m + + # except BrowserUnavailable, e: + # self.logger.debug('No messages, browser is unavailable: %s' % e) + # pass # don't care about waiting + + def set_message_read(self, message): + if message.sender == self.browser.get_my_name(): + return + + slut = self._get_slut(message.sender) + if slut['lastmsg'] < message.date: + slut['lastmsg'] = message.date + self.storage.set('sluts', message.sender, slut) + self.storage.save() + + def _get_slut(self, id): + sluts = self.storage.get('sluts') + if not sluts or not id in sluts: + slut = {'lastmsg': datetime.datetime(1970,1,1)} + else: + slut = self.storage.get('sluts', id) + + slut['lastmsg'] = slut.get('lastmsg', datetime.datetime(1970,1,1)).replace(tzinfo=tz.tzutc()) + return slut + + # ---- ICapMessagesPost methods --------------------- + + #def post_message(self, message): + # with self.browser: + # self.browser.post_mail(message.thread.id, message.content) + + # ---- ICapContact methods --------------------- + + #def fill_contact(self, contact, fields): + # if 'profile' in fields: + # contact = self.get_contact(contact) + # if contact and 'photos' in fields: + # for name, photo in contact.photos.iteritems(): + # with self.browser: + # if photo.url and not photo.data: + # data = self.browser.openurl(photo.url).read() + # contact.set_photo(name, data=data) + # if photo.thumbnail_url and not photo.thumbnail_data: + # data = self.browser.openurl(photo.thumbnail_url).read() + # contact.set_photo(name, thumbnail_data=data) + + #def fill_photo(self, photo, fields): + # with self.browser: + # if 'data' in fields and photo.url and not photo.data: + # photo.data = self.browser.readurl(photo.url) + # if 'thumbnail_data' in fields and photo.thumbnail_url and not photo.thumbnail_data: + # photo.thumbnail_data = self.browser.readurl(photo.thumbnail_url) + # return photo + + #def get_contact(self, contact): + # with self.browser: + # if isinstance(contact, Contact): + # _id = contact.id + # elif isinstance(contact, (int,long,basestring)): + # _id = contact + # else: + # raise TypeError("The parameter 'contact' isn't a contact nor a int/long/str/unicode: %s" % contact) + + # profile = self.browser.get_profile(_id) + # if not profile: + # return None + + # _id = profile['id'] + + # if isinstance(contact, Contact): + # contact.id = _id + # contact.name = profile['pseudo'] + # else: + # contact = Contact(_id, profile['pseudo'], Contact.STATUS_ONLINE) + # contact.url = self.browser.id2url(_id) + # contact.parse_profile(profile, self.browser.get_consts()) + # return contact + + #def _get_partial_contact(self, contact): + # if contact.get('isBan', contact.get('dead', False)): + # with self.browser: + # self.browser.delete_thread(int(contact['id'])) + # return None + + # s = 0 + # if contact.get('isOnline', False): + # s = Contact.STATUS_ONLINE + # else: + # s = Contact.STATUS_OFFLINE + + # c = Contact(contact['id'], contact['pseudo'], s) + # c.url = self.browser.id2url(contact['id']) + # if 'birthday' in contact: + # birthday = _parse_dt(contact['birthday']) + # age = int((datetime.datetime.now() - birthday).days / 365.25) + # c.status_msg = u'%s old, %s' % (age, contact['city']) + # if contact['cover'].isdigit() and int(contact['cover']) > 0: + # url = 'http://s%s.adopteunmec.com/%s%%(type)s%s.jpg' % (contact['shard'], contact['path'], contact['cover']) + # else: + # url = 'http://s.adopteunmec.com/www/img/thumb0.gif' + + # c.set_photo('image%s' % contact['cover'], + # url=url % {'type': 'image'}, + # thumbnail_url=url % {'type': 'thumb0_'}) + # return c + + #def iter_contacts(self, status=Contact.STATUS_ALL, ids=None): + # with self.browser: + # threads = self.browser.get_threads_list(count=100) + + # for thread in threads: + # c = self._get_partial_contact(thread['member']) + # if c and (c.status & status) and (not ids or c.id in ids): + # yield c + + #def send_query(self, id): + # if isinstance(id, Contact): + # id = id.id + + # queries_queue = None + # try: + # queries_queue = self.get_optimization('QUERIES_QUEUE') + # except OptimizationNotFound: + # pass + + # if queries_queue and queries_queue.is_running(): + # if queries_queue.enqueue_query(id): + # return Query(id, 'A charm has been sent') + # else: + # return Query(id, 'Unable to send charm: it has been enqueued') + # else: + # with self.browser: + # if not self.browser.send_charm(id): + # raise QueryError('No enough charms available') + # return Query(id, 'A charm has been sent') + + #def get_notes(self, id): + # if isinstance(id, Contact): + # id = id.id + + # return self.storage.get('notes', id) + + #def save_notes(self, id, notes): + # if isinstance(id, Contact): + # id = id.id + + # self.storage.set('notes', id, notes) + # self.storage.save() + + OBJECTS = {Thread: fill_thread, + #Contact: fill_contact, + #ContactPhoto: fill_photo + } diff --git a/modules/okc/browser.py b/modules/okc/browser.py new file mode 100644 index 00000000..3b0e6d5c --- /dev/null +++ b/modules/okc/browser.py @@ -0,0 +1,276 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2012 Roger Philibert +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +import math +import re +import datetime +import random +import urllib +from htmlentitydefs import codepoint2name +try: + import json +except ImportError: + import simplejson as json + +from weboob.tools.browser import BaseBrowser, BrowserIncorrectPassword, BrowserUnavailable +from weboob.tools.ordereddict import OrderedDict + +from .pages import LoginPage, ThreadPage, MessagesPage + +__all__ = ['OkCBrowser'] + + +class OkCException(Exception): + pass + +class OkCBrowser(BaseBrowser): + DOMAIN = 'm.okcupid.com' + PROTOCOL = 'https' + ENCODING = 'UTF-8' + PAGES = OrderedDict(( + ('https://%s/login.*' % DOMAIN, LoginPage), + ('http://%s/messages' % DOMAIN, ThreadPage), + ('http://%s/messages\?.*' % DOMAIN, MessagesPage), + )) + + + logged_in = False + + def home(self): + self.location(self.absurl('/home')) + + def login(self): + self.location(self.absurl('/login'), no_login=True) + self.page.login(self.username, self.password) + self.logged_in = True + + def is_logged(self): + return self.logged_in + + def check_login(func): + def inner(self, *args, **kwargs): + if not self.logged_in: + self.login() + return func(self, *args, **kwargs) + return inner + + #def get_consts(self): + # if self.consts is not None: + # return self.consts + + # self.consts = [] + # for i in xrange(2): + # r = self.api_request('me', 'all_values', data={'sex': i}) + # self.consts.append(r['result']['values']) + + # return self.consts + + #@check_login + #def score(self): + # #r = self.api_request('member', 'view', data={'id': self.my_id}) + # return int(r['result']['member']['popu']['popu']) + + def get_my_name(self): + return self.username + + #@check_login + #def nb_new_mails(self): + # r = self.api_request('me', '[default]') + # return r['result']['news']['newMails'] + + #@check_login + #def nb_new_baskets(self): + # r = self.api_request('me', '[default]') + # return r['result']['news']['newBaskets'] + + #@check_login + #def nb_new_visites(self): + # r = self.api_request('me', '[default]') + # return r['result']['news']['newVisits'] + + #@check_login + #def nb_available_charms(self): + # r = self.login() + # return r['result']['flashs'] + + #@check_login + #def nb_godchilds(self): + # r = self.api_request('member', 'view', data={'id': self.my_id}) + # return int(r['result']['member']['popu']['invits']) + + #@check_login + #def get_baskets(self): + # r = self.api_request('me', 'basket') + # return r['result']['basket'] + + #@check_login + #def get_flashs(self): + # r = self.api_request('me', 'flashs') + # return r['result']['all'] + + #@check_login + #def get_visits(self): + # r = self.api_request('me', 'visits') + # return r['result']['news'] + r['result']['olds'] + + @check_login + def get_threads_list(self, count=30): + self.location('http://m.okcupid.com/messages') + return self.page.get_threads() + + @check_login + def get_thread_mails(self, id, count=30): + id = int(id) + self.location(self.absurl('/messages?readmsg=true&threadid=%i&folder=1' % id)) + + # Find the peer username + mails = self.page.get_thread_mails(count) + for mail in mails['messages']: + if mail['id_from'] != self.get_my_name(): + mails['member']['pseudo'] = mail['id_from'] + break + return mails + + #@check_login + #@url2id + #def post_mail(self, id, content): + # new_content = u'' + # for c in content: + # try: + # new_content += '&%s;' % codepoint2name[ord(c)] + # except KeyError: + # new_content += c + + # content = new_content.replace('\n', '\r\n').encode('Windows-1252', 'replace') + + # try: + # self.api_request('message', 'new', data={'memberId': id, 'message': content}) + # except AuMException, e: + # raise CantSendMessage(unicode(e)) + + #@check_login + #@url2id + #def delete_thread(self, id): + # r = self.api_request('message', 'delete', data={'id_user': id}) + # self.logger.debug('Thread deleted: %r' % r) + + #@check_login + #@url2id + #def send_charm(self, id): + # try: + # self.api_request('member', 'addBasket', data={'id': id}) + # except AuMException: + # return False + # else: + # return True + + #def search_profiles(self, **kwargs): + # if self.search_query is None: + # r = self.api_request('searchs', '[default]') + # self.search_query = r['result']['search']['query'] + + # params = {} + # for key, value in json.loads(self.search_query).iteritems(): + # if isinstance(value, dict): + # for k, v in value.iteritems(): + # params['%s%s' % (key, k.capitalize())] = v + # else: + # params[key] = value or '' + # r = self.api_request('searchs', 'advanced', '30,0', params) + # ids = [s['id'] for s in r['result']['search']] + # return set(ids) + + #@url2id + #def get_profile(self, id, with_pics=True): + # r = self.api_request('member', 'view', data={'id': id}) + # if not 'result' in r: + # print r + # profile = r['result']['member'] + + + # # Calculate distance in km. + # profile['dist'] = 0.0 + # if 'lat' in profile and 'lng' in profile: + # coords = (float(profile['lat']), float(profile['lng'])) + + # R = 6371 + # lat1 = math.radians(self.my_coords[0]) + # lat2 = math.radians(coords[0]) + # lon1 = math.radians(self.my_coords[1]) + # lon2 = math.radians(coords[1]) + # dLat = lat2 - lat1 + # dLong = lon2 - lon1 + # a= pow(math.sin(dLat/2), 2) + math.cos(lat1) * math.cos(lat2) * pow(math.sin(dLong/2), 2) + # c= 2 * math.atan2(math.sqrt(a), math.sqrt(1-a)) + # profile['dist'] = R * c + + # if with_pics: + # r = self.api_request('member', 'pictures', data={'id': id}) + # profile['pictures'] = [] + # for pic in r['result']['pictures']: + # d = {'hidden': False} + # d.update(pic) + # profile['pictures'].append(d) + + # return profile + + #def _get_chat_infos(self): + # try: + # data = json.load(self.openurl('http://www.adopteunmec.com/1.1_cht_get.php?anticache=%f' % random.random())) + # except ValueError: + # raise BrowserUnavailable() + + # if data['error']: + # raise ChatException(u'Error while getting chat infos. json:\n%s' % data) + # return data + + #def iter_contacts(self): + # def iter_dedupe(contacts): + # yielded_ids = set() + # for contact in contacts: + # if contact['id'] not in yielded_ids: + # yield contact + # yielded_ids.add(contact['id']) + + # data = self._get_chat_infos() + # return iter_dedupe(data['contacts']) + + #def iter_chat_messages(self, _id=None): + # data = self._get_chat_infos() + # if data['messages'] is not None: + # for message in data['messages']: + # yield ChatMessage(id_from=message['id_from'], id_to=message['id_to'], message=message['message'], date=message['date']) + + #def send_chat_message(self, _id, message): + # url = 'http://www.adopteunmec.com/1.1_cht_send.php?anticache=%f' % random.random() + # data = dict(id=_id, message=message) + # headers = { + # 'Content-type': 'application/x-www-form-urlencoded', + # 'Accept': 'text/plain', + # 'Referer': 'http://www.adopteunmec.com/chat.php', + # 'Origin': 'http://www.adopteunmec.com', + # } + # request = self.request_class(url, urllib.urlencode(data), headers) + # response = self.openurl(request).read() + # try: + # datetime.datetime.strptime(response, '%Y-%m-%d %H:%M:%S') + # return True + # except ValueError: + # return False diff --git a/modules/okc/pages.py b/modules/okc/pages.py new file mode 100644 index 00000000..2a7eca68 --- /dev/null +++ b/modules/okc/pages.py @@ -0,0 +1,66 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2012 Roger Philibert +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +from weboob.tools.browser import BasePage, BrokenPageError + +class LoginPage(BasePage): + def login(self, username, password): + self.browser.select_form(name='loginf') + self.browser['username'] = username.encode(self.browser.ENCODING) + self.browser['password'] = password.encode(self.browser.ENCODING) + self.browser.submit(id='login_btn', nologin=True) + +class ThreadPage(BasePage): + def get_threads(self): + li_elems = self.parser.select(self.document.getroot(), "//div[@id='page_content']//li", method= 'xpath') + + threads = [] + for elem in li_elems: + _class = elem.get('class', '') + if 'clearfix' in _class.split(): + threads.append({ + u'username' : elem.getchildren()[0].get('href').split('/')[-1], + u'id' : elem.get('id', '').split('_')[1], + }) + + return threads + +class MessagesPage(BasePage): + def get_thread_mails(self, count): + ul_item = self.parser.select(self.document.getroot(), "//ul[@id='rows']", method='xpath')[0] + + mails = { + 'member' : {}, + 'messages' : [], + } + + for li_msg in ul_item.getchildren(): + div = li_msg.getchildren()[1] + txt = self.parser.tostring(div.getchildren()[1]) + date = div.getchildren()[2].text + id_from = li_msg.getchildren()[0].get('href').split('/')[-1] + + mails['messages'].append({ + 'date' : date, + 'message' : txt, + 'id_from' : id_from, + }) + + return mails