[twitter] fix search pagination

This commit is contained in:
Bezleputh 2015-07-27 16:53:13 +02:00
commit db91ac2848
2 changed files with 21 additions and 11 deletions

View file

@ -21,7 +21,7 @@ from weboob.browser import LoginBrowser, URL, need_login
from weboob.exceptions import BrowserIncorrectPassword from weboob.exceptions import BrowserIncorrectPassword
from weboob.capabilities.messages import Message from weboob.capabilities.messages import Message
from .pages import LoginPage, LoginErrorPage, ThreadPage, Tweet, TrendsPage,\ from .pages import LoginPage, LoginErrorPage, ThreadPage, Tweet, TrendsPage,\
TimelinePage, HomeTimelinePage, SearchTimelinePage, SearchHomePage TimelinePage, HomeTimelinePage, SearchTimelinePage, SearchPage
__all__ = ['TwitterBrowser'] __all__ = ['TwitterBrowser']
@ -35,9 +35,10 @@ class TwitterBrowser(LoginBrowser):
thread_page = URL(u'(?P<user>.+)/status/(?P<_id>.+)', ThreadPage) thread_page = URL(u'(?P<user>.+)/status/(?P<_id>.+)', ThreadPage)
login_error = URL(u'login/error.+', LoginErrorPage) login_error = URL(u'login/error.+', LoginErrorPage)
tweet = URL(u'i/tweet/create', Tweet) tweet = URL(u'i/tweet/create', Tweet)
search_home = URL(u'search-home', SearchHomePage)
trends = URL(u'i/trends\?pc=true&show_context=false&src=search-home&k=(?P<token>.*)', TrendsPage) trends = URL(u'i/trends\?pc=true&show_context=false&src=search-home&k=(?P<token>.*)', TrendsPage)
search = URL(u'i/search/timeline', SearchTimelinePage) search = URL(u'i/search/timeline', SearchTimelinePage)
search_page = URL(u'search\?q=(?P<pattern>.+)&src=sprv',
u'search-home', SearchPage)
profil = URL(u'i/profiles/show/(?P<path>.+)/timeline/with_replies', HomeTimelinePage) profil = URL(u'i/profiles/show/(?P<path>.+)/timeline/with_replies', HomeTimelinePage)
timeline = URL(u'i/timeline', TimelinePage) timeline = URL(u'i/timeline', TimelinePage)
login = URL(u'', LoginPage) login = URL(u'', LoginPage)
@ -69,7 +70,7 @@ class TwitterBrowser(LoginBrowser):
if not self.authenticity_token: if not self.authenticity_token:
self.do_login() self.do_login()
trends_token = self.search_home.open().get_trends_token() trends_token = self.search_page.open().get_trends_token()
return self.trends.open(token=trends_token).get_trendy_subjects() return self.trends.open(token=trends_token).get_trendy_subjects()
@need_login @need_login
@ -124,7 +125,8 @@ class TwitterBrowser(LoginBrowser):
return self.get_tweets_from_search(u'#%s' % path if not path.startswith('#') else path) return self.get_tweets_from_search(u'#%s' % path if not path.startswith('#') else path)
def get_tweets_from_search(self, path): def get_tweets_from_search(self, path):
min_position = self.search_page.go(pattern=path).get_min_position()
params = {'q': "%s" % path, params = {'q': "%s" % path,
'src': 'typd', 'src': 'sprv'}
'f': 'realtime'}
return self.search.go(params=params).iter_threads(params=params) return self.search.go(params=params).iter_threads(params=params, min_position=min_position)

View file

@ -50,10 +50,12 @@ class TwitterJsonHTMLPage(JsonPage):
if 'module_html' in self.doc: if 'module_html' in self.doc:
self.doc = html.parse(StringIO(self.doc['module_html']), parser) self.doc = html.parse(StringIO(self.doc['module_html']), parser)
else: else:
if 'scroll_cursor' in self.doc:
self.scroll_cursor = self.doc['scroll_cursor']
self.has_next = self.doc['has_more_items'] self.has_next = self.doc['has_more_items']
self.min_position = None
if 'min_position' in self.doc:
self.min_position = self.doc['min_position']
if self.doc['items_html']: if self.doc['items_html']:
el = html.parse(StringIO(self.doc['items_html']), parser) el = html.parse(StringIO(self.doc['items_html']), parser)
self.doc = el if el.getroot() is not None else html.Element('brinbrin') self.doc = el if el.getroot() is not None else html.Element('brinbrin')
@ -115,11 +117,14 @@ class ThreadPage(HTMLPage):
obj_date = DatetimeFromTimestamp(Attr('./div/div[@class="stream-item-header"]/small/a/span | ./div/div[@class="ProfileTweet-authorDetails"]/span/a/span', 'data-time')) obj_date = DatetimeFromTimestamp(Attr('./div/div[@class="stream-item-header"]/small/a/span | ./div/div[@class="ProfileTweet-authorDetails"]/span/a/span', 'data-time'))
class SearchHomePage(HTMLPage): class SearchPage(HTMLPage):
def get_trends_token(self): def get_trends_token(self):
json_data = CleanText('//input[@id="init-data"]/@value')(self.doc) json_data = CleanText('//input[@id="init-data"]/@value')(self.doc)
return json.loads(json_data)['trendsCacheKey'] return json.loads(json_data)['trendsCacheKey']
def get_min_position(self):
return CleanText('//div[@class="stream-container "]/@data-min-position')(self.doc)
class TrendsPage(TwitterJsonHTMLPage): class TrendsPage(TwitterJsonHTMLPage):
@ -180,7 +185,10 @@ class SearchTimelinePage(TwitterJsonHTMLPage):
def next_page(self): def next_page(self):
params = self.env['params'] params = self.env['params']
params['scroll_cursor'] = self.page.scroll_cursor params['max_position'] = self.page.min_position
if 'min_position' in self.env and not params['max_position']:
params['max_position'] = self.env['min_position']
if self.page.has_next: if self.page.has_next:
return u'%s?%s' % (self.page.url.split('?')[0], urllib.urlencode(params)) return u'%s?%s' % (self.page.url.split('?')[0], urllib.urlencode(params))