[okc] Message reading

This commit is contained in:
Roger Philibert 2012-03-16 23:42:12 +01:00 committed by Romain Bignon
commit c44a4cdebc
4 changed files with 721 additions and 0 deletions

24
modules/okc/__init__.py Normal file
View file

@ -0,0 +1,24 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2010-2011 Romain Bignon
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from .browser import OkCBrowser
from .backend import OkCBackend
__all__ = ['OkCBrowser', 'OkCBackend']

355
modules/okc/backend.py Normal file
View file

@ -0,0 +1,355 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2012 Roger Philibert
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from __future__ import with_statement
import email
import time
import re
import datetime
from html2text import unescape
from dateutil import tz
from dateutil.parser import parse as _parse_dt
from weboob.capabilities.base import NotLoaded
from weboob.capabilities.messages import ICapMessages, ICapMessagesPost, Message, Thread
#from weboob.capabilities.dating import ICapDating, OptimizationNotFound, Event
#from weboob.capabilities.contact import ICapContact, ContactPhoto, Query, QueryError
from weboob.tools.backend import BaseBackend, BackendConfig
from weboob.tools.browser import BrowserUnavailable
from weboob.tools.value import Value, ValuesDict, ValueBool, ValueBackendPassword
from weboob.tools.log import getLogger
from weboob.tools.misc import local2utc
#from .contact import Contact
from .browser import OkCBrowser
__all__ = ['OkCBackend']
def parse_dt(s):
now = datetime.datetime.now()
if u'' in s:
# Date in form : "Yesterday 20:45"
day, hour = s.split(u'')
day = day.strip()
hour = hour.strip()
if day == 'Yesterday':
d = now - datetime.timedelta(days=1)
elif day == 'Today':
d = now
hour = _parse_dt(hour)
d = datetime.datetime(d.year, d.month, d.day, hour.hour, hour.minute)
else:
#if ',' in s:
# Date in form : "Dec 28, 2011")
d = _parse_dt(s)
return local2utc(d)
class OkCBackend(BaseBackend, ICapMessages):
#, ICapMessagesPost, ICapContact):
NAME = 'okc'
MAINTAINER = 'Roger Philibert'
EMAIL = 'roger.philibert@gmail.com'
VERSION = '0.c'
LICENSE = 'AGPLv3+'
DESCRIPTION = u'OkCupid dating website'
CONFIG = BackendConfig(Value('username', label='Username'),
ValueBackendPassword('password', label='Password'))
STORAGE = {
'sluts': {},
#'notes': {},
}
BROWSER = OkCBrowser
def create_default_browser(self):
return self.create_browser(self.config['username'].get(), self.config['password'].get())
# ---- ICapMessages methods ---------------------
def fill_thread(self, thread, fields):
return self.get_thread(thread)
def iter_threads(self):
with self.browser:
threads = self.browser.get_threads_list()
for thread in threads:
# Remove messages from user that quit
#if thread['member'].get('isBan', thread['member'].get('dead', False)):
# with self.browser:
# self.browser.delete_thread(thread['member']['id'])
# continue
t = Thread(int(thread['id']))
t.flags = Thread.IS_DISCUSSION
t.title = 'Discussion with %s' % thread['username']
yield t
def get_thread(self, id, contacts=None, get_profiles=False):
"""
Get a thread and its messages.
The 'contacts' parameters is only used for internal calls.
"""
thread = None
if isinstance(id, Thread):
thread = id
id = thread.id
if not thread:
thread = Thread(int(id))
thread.flags = Thread.IS_DISCUSSION
full = False
else:
full = True
with self.browser:
mails = self.browser.get_thread_mails(id, 100)
my_name = self.browser.get_my_name()
child = None
msg = None
slut = self._get_slut(mails['member']['pseudo'])
if contacts is None:
contacts = {}
if not thread.title:
thread.title = u'Discussion with %s' % mails['member']['pseudo']
#self.storage.set('sluts', thread.id, 'status', mails['status'])
#self.storage.save()
for mail in mails['messages']:
flags = Message.IS_HTML
if parse_dt(mail['date']) > slut['lastmsg'] and mail['id_from'] != self.browser.get_my_name():
flags |= Message.IS_UNREAD
if get_profiles:
if not mail['id_from'] in contacts:
with self.browser:
contacts[mail['id_from']] = self.get_contact(mail['id_from'])
signature = u''
if mail.get('src', None):
signature += u'Sent from my %s\n\n' % mail['src']
if mail['id_from'] in contacts:
signature += contacts[mail['id_from']].get_text()
msg = Message(thread=thread,
id=int(time.strftime('%Y%m%d%H%M%S', parse_dt(mail['date']).timetuple())),
title=thread.title,
sender=mail['id_from'],
receivers=[my_name if mail['id_from'] != my_name else mails['member']['pseudo']],
date=parse_dt(mail['date']),
content=unescape(mail['message']).strip(),
signature=signature,
children=[],
flags=flags)
if child:
msg.children.append(child)
child.parent = msg
child = msg
if full and msg:
# If we have get all the messages, replace NotLoaded with None as
# parent.
msg.parent = None
if not full and not msg:
# Perhaps there are hidden messages
msg = NotLoaded
thread.root = msg
return thread
#def iter_unread_messages(self, thread=None):
# try:
# contacts = {}
# with self.browser:
# threads = self.browser.get_threads_list()
# for thread in threads:
# if thread['member'].get('isBan', thread['member'].get('dead', False)):
# with self.browser:
# self.browser.delete_thread(int(thread['member']['id']))
# continue
# if self.antispam and not self.antispam.check_thread(thread):
# self.logger.info('Skipped a spam-unread-thread from %s' % thread['member']['pseudo'])
# self.report_spam(thread['member']['pseudo'])
# continue
# slut = self._get_slut(thread['member']['pseudo'])
# if parse_dt(thread['date']) > slut['lastmsg']:
# t = self.get_thread(thread['member']['pseudo'], contacts, get_profiles=True)
# for m in t.iter_all_messages():
# if m.flags & m.IS_UNREAD:
# yield m
# except BrowserUnavailable, e:
# self.logger.debug('No messages, browser is unavailable: %s' % e)
# pass # don't care about waiting
def set_message_read(self, message):
if message.sender == self.browser.get_my_name():
return
slut = self._get_slut(message.sender)
if slut['lastmsg'] < message.date:
slut['lastmsg'] = message.date
self.storage.set('sluts', message.sender, slut)
self.storage.save()
def _get_slut(self, id):
sluts = self.storage.get('sluts')
if not sluts or not id in sluts:
slut = {'lastmsg': datetime.datetime(1970,1,1)}
else:
slut = self.storage.get('sluts', id)
slut['lastmsg'] = slut.get('lastmsg', datetime.datetime(1970,1,1)).replace(tzinfo=tz.tzutc())
return slut
# ---- ICapMessagesPost methods ---------------------
#def post_message(self, message):
# with self.browser:
# self.browser.post_mail(message.thread.id, message.content)
# ---- ICapContact methods ---------------------
#def fill_contact(self, contact, fields):
# if 'profile' in fields:
# contact = self.get_contact(contact)
# if contact and 'photos' in fields:
# for name, photo in contact.photos.iteritems():
# with self.browser:
# if photo.url and not photo.data:
# data = self.browser.openurl(photo.url).read()
# contact.set_photo(name, data=data)
# if photo.thumbnail_url and not photo.thumbnail_data:
# data = self.browser.openurl(photo.thumbnail_url).read()
# contact.set_photo(name, thumbnail_data=data)
#def fill_photo(self, photo, fields):
# with self.browser:
# if 'data' in fields and photo.url and not photo.data:
# photo.data = self.browser.readurl(photo.url)
# if 'thumbnail_data' in fields and photo.thumbnail_url and not photo.thumbnail_data:
# photo.thumbnail_data = self.browser.readurl(photo.thumbnail_url)
# return photo
#def get_contact(self, contact):
# with self.browser:
# if isinstance(contact, Contact):
# _id = contact.id
# elif isinstance(contact, (int,long,basestring)):
# _id = contact
# else:
# raise TypeError("The parameter 'contact' isn't a contact nor a int/long/str/unicode: %s" % contact)
# profile = self.browser.get_profile(_id)
# if not profile:
# return None
# _id = profile['id']
# if isinstance(contact, Contact):
# contact.id = _id
# contact.name = profile['pseudo']
# else:
# contact = Contact(_id, profile['pseudo'], Contact.STATUS_ONLINE)
# contact.url = self.browser.id2url(_id)
# contact.parse_profile(profile, self.browser.get_consts())
# return contact
#def _get_partial_contact(self, contact):
# if contact.get('isBan', contact.get('dead', False)):
# with self.browser:
# self.browser.delete_thread(int(contact['id']))
# return None
# s = 0
# if contact.get('isOnline', False):
# s = Contact.STATUS_ONLINE
# else:
# s = Contact.STATUS_OFFLINE
# c = Contact(contact['id'], contact['pseudo'], s)
# c.url = self.browser.id2url(contact['id'])
# if 'birthday' in contact:
# birthday = _parse_dt(contact['birthday'])
# age = int((datetime.datetime.now() - birthday).days / 365.25)
# c.status_msg = u'%s old, %s' % (age, contact['city'])
# if contact['cover'].isdigit() and int(contact['cover']) > 0:
# url = 'http://s%s.adopteunmec.com/%s%%(type)s%s.jpg' % (contact['shard'], contact['path'], contact['cover'])
# else:
# url = 'http://s.adopteunmec.com/www/img/thumb0.gif'
# c.set_photo('image%s' % contact['cover'],
# url=url % {'type': 'image'},
# thumbnail_url=url % {'type': 'thumb0_'})
# return c
#def iter_contacts(self, status=Contact.STATUS_ALL, ids=None):
# with self.browser:
# threads = self.browser.get_threads_list(count=100)
# for thread in threads:
# c = self._get_partial_contact(thread['member'])
# if c and (c.status & status) and (not ids or c.id in ids):
# yield c
#def send_query(self, id):
# if isinstance(id, Contact):
# id = id.id
# queries_queue = None
# try:
# queries_queue = self.get_optimization('QUERIES_QUEUE')
# except OptimizationNotFound:
# pass
# if queries_queue and queries_queue.is_running():
# if queries_queue.enqueue_query(id):
# return Query(id, 'A charm has been sent')
# else:
# return Query(id, 'Unable to send charm: it has been enqueued')
# else:
# with self.browser:
# if not self.browser.send_charm(id):
# raise QueryError('No enough charms available')
# return Query(id, 'A charm has been sent')
#def get_notes(self, id):
# if isinstance(id, Contact):
# id = id.id
# return self.storage.get('notes', id)
#def save_notes(self, id, notes):
# if isinstance(id, Contact):
# id = id.id
# self.storage.set('notes', id, notes)
# self.storage.save()
OBJECTS = {Thread: fill_thread,
#Contact: fill_contact,
#ContactPhoto: fill_photo
}

276
modules/okc/browser.py Normal file
View file

@ -0,0 +1,276 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2012 Roger Philibert
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
import math
import re
import datetime
import random
import urllib
from htmlentitydefs import codepoint2name
try:
import json
except ImportError:
import simplejson as json
from weboob.tools.browser import BaseBrowser, BrowserIncorrectPassword, BrowserUnavailable
from weboob.tools.ordereddict import OrderedDict
from .pages import LoginPage, ThreadPage, MessagesPage
__all__ = ['OkCBrowser']
class OkCException(Exception):
pass
class OkCBrowser(BaseBrowser):
DOMAIN = 'm.okcupid.com'
PROTOCOL = 'https'
ENCODING = 'UTF-8'
PAGES = OrderedDict((
('https://%s/login.*' % DOMAIN, LoginPage),
('http://%s/messages' % DOMAIN, ThreadPage),
('http://%s/messages\?.*' % DOMAIN, MessagesPage),
))
logged_in = False
def home(self):
self.location(self.absurl('/home'))
def login(self):
self.location(self.absurl('/login'), no_login=True)
self.page.login(self.username, self.password)
self.logged_in = True
def is_logged(self):
return self.logged_in
def check_login(func):
def inner(self, *args, **kwargs):
if not self.logged_in:
self.login()
return func(self, *args, **kwargs)
return inner
#def get_consts(self):
# if self.consts is not None:
# return self.consts
# self.consts = []
# for i in xrange(2):
# r = self.api_request('me', 'all_values', data={'sex': i})
# self.consts.append(r['result']['values'])
# return self.consts
#@check_login
#def score(self):
# #r = self.api_request('member', 'view', data={'id': self.my_id})
# return int(r['result']['member']['popu']['popu'])
def get_my_name(self):
return self.username
#@check_login
#def nb_new_mails(self):
# r = self.api_request('me', '[default]')
# return r['result']['news']['newMails']
#@check_login
#def nb_new_baskets(self):
# r = self.api_request('me', '[default]')
# return r['result']['news']['newBaskets']
#@check_login
#def nb_new_visites(self):
# r = self.api_request('me', '[default]')
# return r['result']['news']['newVisits']
#@check_login
#def nb_available_charms(self):
# r = self.login()
# return r['result']['flashs']
#@check_login
#def nb_godchilds(self):
# r = self.api_request('member', 'view', data={'id': self.my_id})
# return int(r['result']['member']['popu']['invits'])
#@check_login
#def get_baskets(self):
# r = self.api_request('me', 'basket')
# return r['result']['basket']
#@check_login
#def get_flashs(self):
# r = self.api_request('me', 'flashs')
# return r['result']['all']
#@check_login
#def get_visits(self):
# r = self.api_request('me', 'visits')
# return r['result']['news'] + r['result']['olds']
@check_login
def get_threads_list(self, count=30):
self.location('http://m.okcupid.com/messages')
return self.page.get_threads()
@check_login
def get_thread_mails(self, id, count=30):
id = int(id)
self.location(self.absurl('/messages?readmsg=true&threadid=%i&folder=1' % id))
# Find the peer username
mails = self.page.get_thread_mails(count)
for mail in mails['messages']:
if mail['id_from'] != self.get_my_name():
mails['member']['pseudo'] = mail['id_from']
break
return mails
#@check_login
#@url2id
#def post_mail(self, id, content):
# new_content = u''
# for c in content:
# try:
# new_content += '&%s;' % codepoint2name[ord(c)]
# except KeyError:
# new_content += c
# content = new_content.replace('\n', '\r\n').encode('Windows-1252', 'replace')
# try:
# self.api_request('message', 'new', data={'memberId': id, 'message': content})
# except AuMException, e:
# raise CantSendMessage(unicode(e))
#@check_login
#@url2id
#def delete_thread(self, id):
# r = self.api_request('message', 'delete', data={'id_user': id})
# self.logger.debug('Thread deleted: %r' % r)
#@check_login
#@url2id
#def send_charm(self, id):
# try:
# self.api_request('member', 'addBasket', data={'id': id})
# except AuMException:
# return False
# else:
# return True
#def search_profiles(self, **kwargs):
# if self.search_query is None:
# r = self.api_request('searchs', '[default]')
# self.search_query = r['result']['search']['query']
# params = {}
# for key, value in json.loads(self.search_query).iteritems():
# if isinstance(value, dict):
# for k, v in value.iteritems():
# params['%s%s' % (key, k.capitalize())] = v
# else:
# params[key] = value or ''
# r = self.api_request('searchs', 'advanced', '30,0', params)
# ids = [s['id'] for s in r['result']['search']]
# return set(ids)
#@url2id
#def get_profile(self, id, with_pics=True):
# r = self.api_request('member', 'view', data={'id': id})
# if not 'result' in r:
# print r
# profile = r['result']['member']
# # Calculate distance in km.
# profile['dist'] = 0.0
# if 'lat' in profile and 'lng' in profile:
# coords = (float(profile['lat']), float(profile['lng']))
# R = 6371
# lat1 = math.radians(self.my_coords[0])
# lat2 = math.radians(coords[0])
# lon1 = math.radians(self.my_coords[1])
# lon2 = math.radians(coords[1])
# dLat = lat2 - lat1
# dLong = lon2 - lon1
# a= pow(math.sin(dLat/2), 2) + math.cos(lat1) * math.cos(lat2) * pow(math.sin(dLong/2), 2)
# c= 2 * math.atan2(math.sqrt(a), math.sqrt(1-a))
# profile['dist'] = R * c
# if with_pics:
# r = self.api_request('member', 'pictures', data={'id': id})
# profile['pictures'] = []
# for pic in r['result']['pictures']:
# d = {'hidden': False}
# d.update(pic)
# profile['pictures'].append(d)
# return profile
#def _get_chat_infos(self):
# try:
# data = json.load(self.openurl('http://www.adopteunmec.com/1.1_cht_get.php?anticache=%f' % random.random()))
# except ValueError:
# raise BrowserUnavailable()
# if data['error']:
# raise ChatException(u'Error while getting chat infos. json:\n%s' % data)
# return data
#def iter_contacts(self):
# def iter_dedupe(contacts):
# yielded_ids = set()
# for contact in contacts:
# if contact['id'] not in yielded_ids:
# yield contact
# yielded_ids.add(contact['id'])
# data = self._get_chat_infos()
# return iter_dedupe(data['contacts'])
#def iter_chat_messages(self, _id=None):
# data = self._get_chat_infos()
# if data['messages'] is not None:
# for message in data['messages']:
# yield ChatMessage(id_from=message['id_from'], id_to=message['id_to'], message=message['message'], date=message['date'])
#def send_chat_message(self, _id, message):
# url = 'http://www.adopteunmec.com/1.1_cht_send.php?anticache=%f' % random.random()
# data = dict(id=_id, message=message)
# headers = {
# 'Content-type': 'application/x-www-form-urlencoded',
# 'Accept': 'text/plain',
# 'Referer': 'http://www.adopteunmec.com/chat.php',
# 'Origin': 'http://www.adopteunmec.com',
# }
# request = self.request_class(url, urllib.urlencode(data), headers)
# response = self.openurl(request).read()
# try:
# datetime.datetime.strptime(response, '%Y-%m-%d %H:%M:%S')
# return True
# except ValueError:
# return False

66
modules/okc/pages.py Normal file
View file

@ -0,0 +1,66 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2012 Roger Philibert
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.tools.browser import BasePage, BrokenPageError
class LoginPage(BasePage):
def login(self, username, password):
self.browser.select_form(name='loginf')
self.browser['username'] = username.encode(self.browser.ENCODING)
self.browser['password'] = password.encode(self.browser.ENCODING)
self.browser.submit(id='login_btn', nologin=True)
class ThreadPage(BasePage):
def get_threads(self):
li_elems = self.parser.select(self.document.getroot(), "//div[@id='page_content']//li", method= 'xpath')
threads = []
for elem in li_elems:
_class = elem.get('class', '')
if 'clearfix' in _class.split():
threads.append({
u'username' : elem.getchildren()[0].get('href').split('/')[-1],
u'id' : elem.get('id', '').split('_')[1],
})
return threads
class MessagesPage(BasePage):
def get_thread_mails(self, count):
ul_item = self.parser.select(self.document.getroot(), "//ul[@id='rows']", method='xpath')[0]
mails = {
'member' : {},
'messages' : [],
}
for li_msg in ul_item.getchildren():
div = li_msg.getchildren()[1]
txt = self.parser.tostring(div.getchildren()[1])
date = div.getchildren()[2].text
id_from = li_msg.getchildren()[0].get('href').split('/')[-1]
mails['messages'].append({
'date' : date,
'message' : txt,
'id_from' : id_from,
})
return mails