From 8f707e86e8c3867462cb7c2b58c7f93c3e5deb44 Mon Sep 17 00:00:00 2001 From: Bezleputh Date: Thu, 29 May 2014 15:17:54 +0200 Subject: [PATCH] [twitter] display trendy tweets --- modules/twitter/backend.py | 42 +++++++++++++++++++++++++++++--------- modules/twitter/browser.py | 23 +++++++++++++++++++-- modules/twitter/pages.py | 29 ++++++++++++++++++++++++-- 3 files changed, 80 insertions(+), 14 deletions(-) diff --git a/modules/twitter/backend.py b/modules/twitter/backend.py index e429e6ad..78c5d55b 100644 --- a/modules/twitter/backend.py +++ b/modules/twitter/backend.py @@ -118,23 +118,45 @@ class TwitterBackend(BaseBackend, ICapMessages, ICapMessagesPost, ICapCollection collection = self.get_collection(objs, split_path) if collection.path_level == 0: if self.config['username'].get(): - me = self.browser.get_me() - yield Collection([me], me) - profils = self.config['profils_subscribe'].get() - if profils: - for profil in profils.split(','): - yield Collection([profil], profil) + yield Collection([u'me'], u'me') + yield Collection([u'profils'], u'profils') + yield Collection([u'trendy'], u'trendy') if collection.path_level == 1: - for el in self.browser.get_tweets_from_collection(collection.split_path[0]): - yield el + if collection.split_path[0] == u'me': + for el in self.browser.get_tweets_from_profil(self.browser.get_me()): + yield el + + if collection.split_path[0] == u'profils': + profils = self.config['profils_subscribe'].get() + if profils: + for profil in profils.split(','): + yield Collection([profil], profil) + + if collection.split_path[0] == u'trendy': + for obj in self.browser.get_trendy_subjects(): + yield Collection([obj.id], obj.id) + + if collection.path_level == 2: + if collection.split_path[0] == u'profils': + for el in self.browser.get_tweets_from_profil(collection.split_path[1]): + yield el + + if collection.split_path[0] == u'trendy': + if collection.split_path[1].startswith('#'): + for el in self.browser.get_tweets_from_hashtag(collection.split_path[1]): + yield el + else: + for el in self.browser.get_tweets_from_search(collection.split_path[1]): + yield el def validate_collection(self, objs, collection): if collection.path_level == 0: return - if collection.path_level == 1: + if collection.path_level == 1 and collection.split_path[0] in [u'profils', u'trendy', u'me']: + return + if collection.path_level == 2: return - raise CollectionNotFound(collection.split_path) OBJECTS = {Thread: fill_thread} diff --git a/modules/twitter/browser.py b/modules/twitter/browser.py index 7bf5a232..8583b78e 100644 --- a/modules/twitter/browser.py +++ b/modules/twitter/browser.py @@ -20,7 +20,7 @@ from weboob.tools.browser2 import LoginBrowser, URL, need_login from weboob.tools.browser import BrowserIncorrectPassword from weboob.capabilities.messages import Message -from .pages import LoginPage, LoginErrorPage, ThreadPage, TwitterBasePage, Tweet +from .pages import LoginPage, LoginErrorPage, ThreadPage, TwitterBasePage, Tweet, TrendsPage __all__ = ['TwitterBrowser'] @@ -32,6 +32,9 @@ class TwitterBrowser(LoginBrowser): thread_page = URL(u'(?P.+)/status/(?P<_id>.+)', ThreadPage) login_error = URL(u'login/error.+', LoginErrorPage) tweet = URL(u'i/tweet/create', Tweet) + trends = URL(u'trends', TrendsPage) + hashtag = URL(u'hashtag/(?P.+)', TwitterBasePage) + search = URL(u'search\?q="(?P.+)"', TwitterBasePage) twitter_page = URL(u'(?P.+)/with_replies', TwitterBasePage) login = URL(u'', LoginPage) @@ -52,6 +55,16 @@ class TwitterBrowser(LoginBrowser): def iter_threads(self): return self.login.stay_or_go().iter_threads() + def get_trendy_subjects(self): + if self.username: + return self.get_logged_trendy_subject() + else: + return self.trends.open().get_trendy_subjects() + + @need_login + def get_logged_trendy_subject(self): + return self.trends.open().get_trendy_subjects() + @need_login def post(self, thread, message): datas = {'place_id': '', @@ -96,5 +109,11 @@ class TwitterBrowser(LoginBrowser): return thread - def get_tweets_from_collection(self, path): + def get_tweets_from_profil(self, path): return self.twitter_page.go(path=path).iter_threads() + + def get_tweets_from_hashtag(self, path): + return self.hashtag.go(path=path[1:]).iter_threads() + + def get_tweets_from_search(self, path): + return self.search.go(path=path).iter_threads() diff --git a/modules/twitter/pages.py b/modules/twitter/pages.py index 584780dc..f1d29c1b 100644 --- a/modules/twitter/pages.py +++ b/modules/twitter/pages.py @@ -19,12 +19,14 @@ from datetime import datetime from weboob.tools.date import DATE_TRANSLATE_FR +from io import StringIO +import lxml.html as html from weboob.tools.browser2.page import HTMLPage, JsonPage, method, ListElement, ItemElement, FormNotFound from weboob.tools.browser2.filters import CleanText, Format, Link, Regexp, Env, DateTime, Attr, Filter from weboob.capabilities.messages import Thread, Message - -__all__ = ['LoginPage', 'LoginErrorPage', 'ThreadPage', 'TwitterBasePage', 'Tweet'] +from weboob.capabilities.base import CapBaseObject +__all__ = ['LoginPage', 'LoginErrorPage', 'ThreadPage', 'TwitterBasePage', 'Tweet', 'TrendsPage'] class DatetimeFromTimestamp(Filter): @@ -32,6 +34,17 @@ class DatetimeFromTimestamp(Filter): return datetime.fromtimestamp(float(el)) +class TwitterJsonHMLPage(JsonPage): + + ENCODING = None + + def __init__(self, browser, response, *args, **kwargs): + super(TwitterJsonHMLPage, self).__init__(browser, response, *args, **kwargs) + self.encoding = self.ENCODING or response.encoding + parser = html.HTMLParser(encoding=self.encoding) + self.doc = html.parse(StringIO(self.doc['module_html']), parser) + + class TwitterBasePage(HTMLPage): @method class iter_threads(ListElement): @@ -109,6 +122,18 @@ class ThreadPage(HTMLPage): obj_date = DatetimeFromTimestamp(Attr('./div/div[@class="stream-item-header"]/small/a/span', 'data-time')) +class TrendsPage(TwitterJsonHMLPage): + + @method + class get_trendy_subjects(ListElement): + item_xpath = '//li[@class="trend-item js-trend-item "]' + + class item(ItemElement): + klass = CapBaseObject + + obj_id = Attr('.', 'data-trend-name') + + class LoginErrorPage(HTMLPage): pass