[twitter] follow timelines in search requests

2014-06-01 16:37:37 +02:00 · 2014-06-01 16:37:37 +02:00 · a8fad76245
commit a8fad76245
parent c20e6123fc
2 changed files with 50 additions and 22 deletions
--- a/modules/twitter/browser.py
+++ b/modules/twitter/browser.py
@ -20,7 +20,8 @@
 from weboob.tools.browser2 import LoginBrowser, URL, need_login
 from weboob.tools.browser import BrowserIncorrectPassword
 from weboob.capabilities.messages import Message
-from .pages import LoginPage, LoginErrorPage, ThreadPage, TwitterBasePage, Tweet, TrendsPage, TimelinePage, HomeTimelinePage
+from .pages import LoginPage, LoginErrorPage, ThreadPage, TwitterBasePage, Tweet, TrendsPage,\
                   TimelinePage, HomeTimelinePage, SearchTimelinePage
 __all__ = ['TwitterBrowser']
@ -34,8 +35,8 @@ class TwitterBrowser(LoginBrowser):
    tweet = URL(u'i/tweet/create', Tweet)
    trends = URL(u'trends', TrendsPage)
    hashtag = URL(u'hashtag/(?P<path>.+)\?f=realtime', TwitterBasePage)
-    search = URL(u'search\?q="(?P<path>.+)&f=realtime&src=typd"', TwitterBasePage)
+    search = URL(u'i/search/timeline', SearchTimelinePage)
-    profil = URL(u'i/profiles/show/(?P<path>.+)/timeline', HomeTimelinePage)
+    profil = URL(u'i/profiles/show/(?P<path>.+)/timeline/with_replies', HomeTimelinePage)
    timeline = URL(u'i/timeline', TimelinePage)
    login = URL(u'', LoginPage)
@ -117,4 +118,7 @@ class TwitterBrowser(LoginBrowser):
        return self.hashtag.go(path=path.lstrip('#')).iter_threads()
    def get_tweets_from_search(self, path):
-        return self.search.go(path=path).iter_threads()
+        params = {'q': "%s" % path,
                  'src': 'typd',
                  'f': 'realtime'}
        return self.search.go(params=params).iter_threads(params=params)
--- a/modules/twitter/pages.py
+++ b/modules/twitter/pages.py
@ -21,12 +21,13 @@ from datetime import datetime
 from weboob.tools.date import DATE_TRANSLATE_FR
 from io import StringIO
 import lxml.html as html
 import urllib
 from weboob.tools.browser2.page import HTMLPage, JsonPage, method, ListElement, ItemElement, FormNotFound, pagination
 from weboob.tools.browser2.filters import CleanText, Format, Link, Regexp, Env, DateTime, Attr, Filter
 from weboob.capabilities.messages import Thread, Message
 from weboob.capabilities.base import CapBaseObject
-__all__ = ['LoginPage', 'LoginErrorPage', 'ThreadPage', 'TwitterBasePage', 'Tweet', 'TrendsPage', 'TimelinePage', 'HomeTimelinePage']
+__all__ = ['LoginPage', 'LoginErrorPage', 'ThreadPage', 'TwitterBasePage', 'Tweet', 'TrendsPage', 'TimelinePage', 'HomeTimelinePage', 'SearchTimeLinePage']
 class DatetimeFromTimestamp(Filter):
@ -38,14 +39,18 @@ class TwitterJsonHTMLPage(JsonPage):
    ENCODING = None
    has_next = None
    scroll_cursor = None
    def __init__(self, browser, response, *args, **kwargs):
        super(TwitterJsonHTMLPage, self).__init__(browser, response, *args, **kwargs)
        self.encoding = self.ENCODING or response.encoding
        parser = html.HTMLParser(encoding=self.encoding)
-        if hasattr(self.doc, 'module_html'):
+        if 'module_html' in self.doc:
            self.doc = html.parse(StringIO(self.doc['module_html']), parser)
        else:
            if 'scroll_cursor' in self.doc:
                self.scroll_cursor = self.doc['scroll_cursor']
            self.has_next = self.doc['has_more_items']
            self.doc = html.parse(StringIO(self.doc['items_html']), parser)
@ -131,16 +136,9 @@ class TrendsPage(TwitterJsonHTMLPage):
            obj_id = Attr('.', 'data-trend-name')
-class TimelinePage(TwitterJsonHTMLPage):
+class TimelineListElement(ListElement):
    @pagination
    @method
    class iter_threads(ListElement):
    item_xpath = '//*[@data-item-type="tweet"]/div'
        def next_page(self):
            if self.page.has_next:
                return u'%s?max_position=%s' % (self.page.url.split('?')[0], self.get_last_id())
    def get_last_id(self):
        _el = self.page.doc.xpath('//*[@data-item-type="tweet"]/div')[-1]
        return Regexp(Link('./div/div/a[@class="details with-icn js-details"]|./div/div/span/a[@class="ProfileTweet-timestamp js-permalink js-nav js-tooltip"]'), '/.+/status/(.+)')(_el)
@ -157,12 +155,38 @@ class TimelinePage(TwitterJsonHTMLPage):
        obj_date = DatetimeFromTimestamp(Attr('./div/div[@class="stream-item-header"]/small/a/span|./div/div/span/a[@class="ProfileTweet-timestamp js-permalink js-nav js-tooltip"]/span', 'data-time'))
-class HomeTimelinePage(TimelinePage):
+class TimelinePage(TwitterJsonHTMLPage):
    @pagination
    @method
    class iter_threads(TimelineListElement):
        def next_page(self):
            if self.page.has_next:
                return u'%s?max_position=%s' % (self.page.url.split('?')[0], self.get_last_id())
 class HomeTimelinePage(TwitterJsonHTMLPage):
    @pagination
    @method
    class iter_threads(TimelineListElement):
        def next_page(self):
            if self.page.has_next:
                return u'%s?max_id=%s' % (self.page.url.split('?')[0], self.get_last_id())
 class SearchTimelinePage(TwitterJsonHTMLPage):
    @pagination
    @method
    class iter_threads(TimelineListElement):
        def next_page(self):
            params = self.env['params']
            params['scroll_cursor'] = self.page.scroll_cursor
            if self.page.has_next:
                return u'%s?%s' % (self.page.url.split('?')[0], urllib.urlencode(params))
 class LoginErrorPage(HTMLPage):
    pass