From db91ac28485451e3efc50c059054cf65249b9fb5 Mon Sep 17 00:00:00 2001 From: Bezleputh Date: Mon, 27 Jul 2015 16:53:13 +0200 Subject: [PATCH] [twitter] fix search pagination --- modules/twitter/browser.py | 14 ++++++++------ modules/twitter/pages.py | 18 +++++++++++++----- 2 files changed, 21 insertions(+), 11 deletions(-) diff --git a/modules/twitter/browser.py b/modules/twitter/browser.py index 588e33df..c52b7989 100644 --- a/modules/twitter/browser.py +++ b/modules/twitter/browser.py @@ -21,7 +21,7 @@ from weboob.browser import LoginBrowser, URL, need_login from weboob.exceptions import BrowserIncorrectPassword from weboob.capabilities.messages import Message from .pages import LoginPage, LoginErrorPage, ThreadPage, Tweet, TrendsPage,\ - TimelinePage, HomeTimelinePage, SearchTimelinePage, SearchHomePage + TimelinePage, HomeTimelinePage, SearchTimelinePage, SearchPage __all__ = ['TwitterBrowser'] @@ -35,9 +35,10 @@ class TwitterBrowser(LoginBrowser): thread_page = URL(u'(?P.+)/status/(?P<_id>.+)', ThreadPage) login_error = URL(u'login/error.+', LoginErrorPage) tweet = URL(u'i/tweet/create', Tweet) - search_home = URL(u'search-home', SearchHomePage) trends = URL(u'i/trends\?pc=true&show_context=false&src=search-home&k=(?P.*)', TrendsPage) search = URL(u'i/search/timeline', SearchTimelinePage) + search_page = URL(u'search\?q=(?P.+)&src=sprv', + u'search-home', SearchPage) profil = URL(u'i/profiles/show/(?P.+)/timeline/with_replies', HomeTimelinePage) timeline = URL(u'i/timeline', TimelinePage) login = URL(u'', LoginPage) @@ -69,7 +70,7 @@ class TwitterBrowser(LoginBrowser): if not self.authenticity_token: self.do_login() - trends_token = self.search_home.open().get_trends_token() + trends_token = self.search_page.open().get_trends_token() return self.trends.open(token=trends_token).get_trendy_subjects() @need_login @@ -124,7 +125,8 @@ class TwitterBrowser(LoginBrowser): return self.get_tweets_from_search(u'#%s' % path if not path.startswith('#') else path) def get_tweets_from_search(self, path): + min_position = self.search_page.go(pattern=path).get_min_position() params = {'q': "%s" % path, - 'src': 'typd', - 'f': 'realtime'} - return self.search.go(params=params).iter_threads(params=params) + 'src': 'sprv'} + + return self.search.go(params=params).iter_threads(params=params, min_position=min_position) diff --git a/modules/twitter/pages.py b/modules/twitter/pages.py index b018ae0f..ee7986fc 100644 --- a/modules/twitter/pages.py +++ b/modules/twitter/pages.py @@ -50,10 +50,12 @@ class TwitterJsonHTMLPage(JsonPage): if 'module_html' in self.doc: self.doc = html.parse(StringIO(self.doc['module_html']), parser) else: - if 'scroll_cursor' in self.doc: - self.scroll_cursor = self.doc['scroll_cursor'] - self.has_next = self.doc['has_more_items'] + + self.min_position = None + if 'min_position' in self.doc: + self.min_position = self.doc['min_position'] + if self.doc['items_html']: el = html.parse(StringIO(self.doc['items_html']), parser) self.doc = el if el.getroot() is not None else html.Element('brinbrin') @@ -115,11 +117,14 @@ class ThreadPage(HTMLPage): obj_date = DatetimeFromTimestamp(Attr('./div/div[@class="stream-item-header"]/small/a/span | ./div/div[@class="ProfileTweet-authorDetails"]/span/a/span', 'data-time')) -class SearchHomePage(HTMLPage): +class SearchPage(HTMLPage): def get_trends_token(self): json_data = CleanText('//input[@id="init-data"]/@value')(self.doc) return json.loads(json_data)['trendsCacheKey'] + def get_min_position(self): + return CleanText('//div[@class="stream-container "]/@data-min-position')(self.doc) + class TrendsPage(TwitterJsonHTMLPage): @@ -180,7 +185,10 @@ class SearchTimelinePage(TwitterJsonHTMLPage): def next_page(self): params = self.env['params'] - params['scroll_cursor'] = self.page.scroll_cursor + params['max_position'] = self.page.min_position + if 'min_position' in self.env and not params['max_position']: + params['max_position'] = self.env['min_position'] + if self.page.has_next: return u'%s?%s' % (self.page.url.split('?')[0], urllib.urlencode(params))