diff --git a/modules/okc/backend.py b/modules/okc/backend.py index 252b106b..cc96b998 100644 --- a/modules/okc/backend.py +++ b/modules/okc/backend.py @@ -21,7 +21,6 @@ import time import datetime -from html2text import unescape from dateutil import tz from dateutil.parser import parse as _parse_dt @@ -152,7 +151,7 @@ class OkCBackend(BaseBackend, ICapMessages, ICapContact, ICapMessagesPost, ICapD thread.flags = Thread.IS_DISCUSSION with self.browser: - mails = self.browser.get_thread_mails(id, 100) + mails = self.browser.get_thread_mails(id) my_name = self.browser.get_my_name() child = None @@ -165,8 +164,8 @@ class OkCBackend(BaseBackend, ICapMessages, ICapContact, ICapMessagesPost, ICapD thread.title = u'Discussion with %s' % mails['member']['pseudo'] for mail in mails['messages']: - flags = Message.IS_HTML - if parse_dt(mail['date']) > slut['lastmsg']: + flags = 0 + if mail['date'] > slut['lastmsg']: flags |= Message.IS_UNREAD if get_profiles: @@ -181,13 +180,13 @@ class OkCBackend(BaseBackend, ICapMessages, ICapContact, ICapMessagesPost, ICapD signature += contacts[mail['id_from']].get_text() msg = Message(thread=thread, - id=int(time.strftime('%Y%m%d%H%M%S', parse_dt(mail['date']).timetuple())), + id=int(time.strftime('%Y%m%d%H%M%S', mail['date'].timetuple())), title=thread.title, sender=mail['id_from'], receivers=[my_name if mail['id_from'] != my_name else mails['member']['pseudo']], - date=parse_dt(mail['date']), - content=unescape(mail['message']).strip(), - signature='
%s
' % signature, + date=mail['date'], + content=mail['message'], + signature=signature, children=[], flags=flags) if child: @@ -329,7 +328,7 @@ class OkCBackend(BaseBackend, ICapMessages, ICapContact, ICapMessagesPost, ICapD def iter_contacts(self, status=Contact.STATUS_ALL, ids=None): with self.browser: - threads = self.browser.get_threads_list(count=100) + threads = self.browser.get_threads_list() for thread in threads: c = self.get_contact(thread['username']) diff --git a/modules/okc/browser.py b/modules/okc/browser.py index 8e3c6240..8644d65f 100644 --- a/modules/okc/browser.py +++ b/modules/okc/browser.py @@ -48,7 +48,7 @@ class OkCBrowser(BaseBrowser): ('http://%s/home' % DOMAIN, BasePage), ('http://%s/messages' % DOMAIN, ThreadPage), ('http://%s/messages\?compose=1' % DOMAIN, PostMessagePage), - ('http://%s/messages\?.*' % DOMAIN, MessagesPage), + ('http://\w+.okcupid.com/messages\?.*', MessagesPage), ('http://%s/profile/.*/photos' % DOMAIN, PhotosPage), ('http://%s/profile/[^/]*' % DOMAIN, ProfilePage), ('http://%s/visitors' % DOMAIN, VisitsPage), @@ -130,16 +130,16 @@ class OkCBrowser(BaseBrowser): return self.page.get_visits() @check_login - def get_threads_list(self, count=30): + def get_threads_list(self): self.location('http://m.okcupid.com/messages') return self.page.get_threads() @check_login - def get_thread_mails(self, id, count=30): + def get_thread_mails(self, id): id = int(id) - self.location(self.absurl('/messages?readmsg=true&threadid=%i&folder=1' % id)) + self.location('http://www.okcupid.com/messages?readmsg=true&threadid=%i&folder=1' % id) - return self.page.get_thread_mails(count) + return self.page.get_thread_mails() @check_login def post_mail(self, id, content): diff --git a/modules/okc/pages.py b/modules/okc/pages.py index 2fbbde43..0f85b11e 100644 --- a/modules/okc/pages.py +++ b/modules/okc/pages.py @@ -18,10 +18,12 @@ # along with weboob. If not, see . import re +from datetime import datetime from weboob.tools.browser import BasePage from weboob.tools.ordereddict import OrderedDict from weboob.capabilities.contact import ProfileNode +from weboob.tools.misc import local2utc, html2text class LoginPage(BasePage): @@ -50,27 +52,26 @@ class ThreadPage(BasePage): class MessagesPage(BasePage): - def get_thread_mails(self, count): - ul_item = self.parser.select(self.document.getroot(), "//ul[@id='rows']", method='xpath')[0] - + def get_thread_mails(self): mails = { 'member' : {}, 'messages' : [], } try: - mails['member']['pseudo'] = self.document.xpath('//li[starts-with(@id, "usr_")]')[0].attrib['id'].split('_', 1)[-1] + mails['member']['pseudo'] = self.parser.tocleanstring(self.document.getroot().cssselect('div#message_heading div.username span.name')[0]) except IndexError: mails['member']['pseudo'] = 'Unknown' - for li_msg in reversed(ul_item.getchildren()): - div = li_msg.getchildren()[1] - txt = self.parser.tostring(div.getchildren()[1]) - date = div.getchildren()[2].text - id_from = li_msg.getchildren()[0].get('href').split('/')[-1].split('?')[0] + for li in reversed(self.document.xpath('//ul[@id="thread"]//li[contains(@id, "message_")]')): + txt = self.parser.tostring(li.xpath('.//div[@class="message_body"]')[0]) + txt = html2text(txt).strip() - if date is not None: - date = unicode(date) + m = re.search(r'(\d+), ', li.xpath('.//span[@class="timestamp"]//script')[0].text) + assert m + date = local2utc(datetime.fromtimestamp(int(m.group(1)))) + + id_from = li.find('a').attrib['href'].split('/')[-1].split('?')[0] mails['messages'].append({ 'date' : date,