From a8fad76245f2470beb0c551e0f27a14153e6f8e8 Mon Sep 17 00:00:00 2001
From: Bezleputh <carton_ben@yahoo.fr>
Date: Sun, 1 Jun 2014 16:37:37 +0200
Subject: [PATCH] [twitter] follow timelines in search requests

---
 modules/twitter/browser.py | 12 +++++---
 modules/twitter/pages.py   | 62 ++++++++++++++++++++++++++------------
 2 files changed, 51 insertions(+), 23 deletions(-)
diff --git a/modules/twitter/browser.py b/modules/twitter/browser.py
index 6a5f9992..a2a40184 100644
--- a/modules/twitter/browser.py
+++ b/modules/twitter/browser.py
@@ -20,7 +20,8 @@
 from weboob.tools.browser2 import LoginBrowser, URL, need_login
 from weboob.tools.browser import BrowserIncorrectPassword
 from weboob.capabilities.messages import Message
-from .pages import LoginPage, LoginErrorPage, ThreadPage, TwitterBasePage, Tweet, TrendsPage, TimelinePage, HomeTimelinePage
+from .pages import LoginPage, LoginErrorPage, ThreadPage, TwitterBasePage, Tweet, TrendsPage,\
+                   TimelinePage, HomeTimelinePage, SearchTimelinePage
 
 
 __all__ = ['TwitterBrowser']
@@ -34,8 +35,8 @@ class TwitterBrowser(LoginBrowser):
     tweet = URL(u'i/tweet/create', Tweet)
     trends = URL(u'trends', TrendsPage)
     hashtag = URL(u'hashtag/(?P<path>.+)\?f=realtime', TwitterBasePage)
-    search = URL(u'search\?q="(?P<path>.+)&f=realtime&src=typd"', TwitterBasePage)
-    profil = URL(u'i/profiles/show/(?P<path>.+)/timeline', HomeTimelinePage)
+    search = URL(u'i/search/timeline', SearchTimelinePage)
+    profil = URL(u'i/profiles/show/(?P<path>.+)/timeline/with_replies', HomeTimelinePage)
     timeline = URL(u'i/timeline', TimelinePage)
     login = URL(u'', LoginPage)
 
@@ -117,4 +118,7 @@ class TwitterBrowser(LoginBrowser):
         return self.hashtag.go(path=path.lstrip('#')).iter_threads()
 
     def get_tweets_from_search(self, path):
-        return self.search.go(path=path).iter_threads()
+        params = {'q': "%s" % path,
+                  'src': 'typd',
+                  'f': 'realtime'}
+        return self.search.go(params=params).iter_threads(params=params)
diff --git a/modules/twitter/pages.py b/modules/twitter/pages.py
index f292fad5..92444570 100644
--- a/modules/twitter/pages.py
+++ b/modules/twitter/pages.py
@@ -21,12 +21,13 @@ from datetime import datetime
 from weboob.tools.date import DATE_TRANSLATE_FR
 from io import StringIO
 import lxml.html as html
+import urllib
 
 from weboob.tools.browser2.page import HTMLPage, JsonPage, method, ListElement, ItemElement, FormNotFound, pagination
 from weboob.tools.browser2.filters import CleanText, Format, Link, Regexp, Env, DateTime, Attr, Filter
 from weboob.capabilities.messages import Thread, Message
 from weboob.capabilities.base import CapBaseObject
-__all__ = ['LoginPage', 'LoginErrorPage', 'ThreadPage', 'TwitterBasePage', 'Tweet', 'TrendsPage', 'TimelinePage', 'HomeTimelinePage']
+__all__ = ['LoginPage', 'LoginErrorPage', 'ThreadPage', 'TwitterBasePage', 'Tweet', 'TrendsPage', 'TimelinePage', 'HomeTimelinePage', 'SearchTimeLinePage']
 
 
 class DatetimeFromTimestamp(Filter):
@@ -38,14 +39,18 @@ class TwitterJsonHTMLPage(JsonPage):
 
     ENCODING = None
     has_next = None
+    scroll_cursor = None
 
     def __init__(self, browser, response, *args, **kwargs):
         super(TwitterJsonHTMLPage, self).__init__(browser, response, *args, **kwargs)
         self.encoding = self.ENCODING or response.encoding
         parser = html.HTMLParser(encoding=self.encoding)
-        if hasattr(self.doc, 'module_html'):
+        if 'module_html' in self.doc:
             self.doc = html.parse(StringIO(self.doc['module_html']), parser)
         else:
+            if 'scroll_cursor' in self.doc:
+                self.scroll_cursor = self.doc['scroll_cursor']
+
             self.has_next = self.doc['has_more_items']
             self.doc = html.parse(StringIO(self.doc['items_html']), parser)
 
@@ -131,38 +136,57 @@ class TrendsPage(TwitterJsonHTMLPage):
             obj_id = Attr('.', 'data-trend-name')
 
 
+class TimelineListElement(ListElement):
+    item_xpath = '//*[@data-item-type="tweet"]/div'
+
+    def get_last_id(self):
+        _el = self.page.doc.xpath('//*[@data-item-type="tweet"]/div')[-1]
+        return Regexp(Link('./div/div/a[@class="details with-icn js-details"]|./div/div/span/a[@class="ProfileTweet-timestamp js-permalink js-nav js-tooltip"]'), '/.+/status/(.+)')(_el)
+
+    class item(ItemElement):
+        klass = Thread
+
+        obj_id = Regexp(Link('./div/div/a[@class="details with-icn js-details"]|./div/div/span/a[@class="ProfileTweet-timestamp js-permalink js-nav js-tooltip"]'), '/(.+)/status/(.+)', '\\1#\\2')
+        obj_title = Format('%s \n\t %s',
+                           CleanText('./div/div[@class="stream-item-header"]/a|./div/div[@class="ProfileTweet-authorDetails"]/a',
+                                     replace=[('@ ', '@'), ('# ', '#'), ('http:// ', 'http://')]),
+                           CleanText('./div/p',
+                                     replace=[('@ ', '@'), ('# ', '#'), ('http:// ', 'http://')]))
+        obj_date = DatetimeFromTimestamp(Attr('./div/div[@class="stream-item-header"]/small/a/span|./div/div/span/a[@class="ProfileTweet-timestamp js-permalink js-nav js-tooltip"]/span', 'data-time'))
+
+
 class TimelinePage(TwitterJsonHTMLPage):
     @pagination
     @method
-    class iter_threads(ListElement):
-        item_xpath = '//*[@data-item-type="tweet"]/div'
+    class iter_threads(TimelineListElement):
 
         def next_page(self):
             if self.page.has_next:
                 return u'%s?max_position=%s' % (self.page.url.split('?')[0], self.get_last_id())
 
-        def get_last_id(self):
-            _el = self.page.doc.xpath('//*[@data-item-type="tweet"]/div')[-1]
-            return Regexp(Link('./div/div/a[@class="details with-icn js-details"]|./div/div/span/a[@class="ProfileTweet-timestamp js-permalink js-nav js-tooltip"]'), '/.+/status/(.+)')(_el)
 
-        class item(ItemElement):
-            klass = Thread
+class HomeTimelinePage(TwitterJsonHTMLPage):
+    @pagination
+    @method
+    class iter_threads(TimelineListElement):
 
-            obj_id = Regexp(Link('./div/div/a[@class="details with-icn js-details"]|./div/div/span/a[@class="ProfileTweet-timestamp js-permalink js-nav js-tooltip"]'), '/(.+)/status/(.+)', '\\1#\\2')
-            obj_title = Format('%s \n\t %s',
-                               CleanText('./div/div[@class="stream-item-header"]/a|./div/div[@class="ProfileTweet-authorDetails"]/a',
-                                         replace=[('@ ', '@'), ('# ', '#'), ('http:// ', 'http://')]),
-                               CleanText('./div/p',
-                                         replace=[('@ ', '@'), ('# ', '#'), ('http:// ', 'http://')]))
-            obj_date = DatetimeFromTimestamp(Attr('./div/div[@class="stream-item-header"]/small/a/span|./div/div/span/a[@class="ProfileTweet-timestamp js-permalink js-nav js-tooltip"]/span', 'data-time'))
-
-
-class HomeTimelinePage(TimelinePage):
         def next_page(self):
             if self.page.has_next:
                 return u'%s?max_id=%s' % (self.page.url.split('?')[0], self.get_last_id())
 
 
+class SearchTimelinePage(TwitterJsonHTMLPage):
+    @pagination
+    @method
+    class iter_threads(TimelineListElement):
+
+        def next_page(self):
+            params = self.env['params']
+            params['scroll_cursor'] = self.page.scroll_cursor
+            if self.page.has_next:
+                return u'%s?%s' % (self.page.url.split('?')[0], urllib.urlencode(params))
+
+
 class LoginErrorPage(HTMLPage):
     pass