[twitter] fix search pagination
This commit is contained in:
parent
d21565b292
commit
db91ac2848
2 changed files with 21 additions and 11 deletions
|
|
@ -21,7 +21,7 @@ from weboob.browser import LoginBrowser, URL, need_login
|
||||||
from weboob.exceptions import BrowserIncorrectPassword
|
from weboob.exceptions import BrowserIncorrectPassword
|
||||||
from weboob.capabilities.messages import Message
|
from weboob.capabilities.messages import Message
|
||||||
from .pages import LoginPage, LoginErrorPage, ThreadPage, Tweet, TrendsPage,\
|
from .pages import LoginPage, LoginErrorPage, ThreadPage, Tweet, TrendsPage,\
|
||||||
TimelinePage, HomeTimelinePage, SearchTimelinePage, SearchHomePage
|
TimelinePage, HomeTimelinePage, SearchTimelinePage, SearchPage
|
||||||
|
|
||||||
|
|
||||||
__all__ = ['TwitterBrowser']
|
__all__ = ['TwitterBrowser']
|
||||||
|
|
@ -35,9 +35,10 @@ class TwitterBrowser(LoginBrowser):
|
||||||
thread_page = URL(u'(?P<user>.+)/status/(?P<_id>.+)', ThreadPage)
|
thread_page = URL(u'(?P<user>.+)/status/(?P<_id>.+)', ThreadPage)
|
||||||
login_error = URL(u'login/error.+', LoginErrorPage)
|
login_error = URL(u'login/error.+', LoginErrorPage)
|
||||||
tweet = URL(u'i/tweet/create', Tweet)
|
tweet = URL(u'i/tweet/create', Tweet)
|
||||||
search_home = URL(u'search-home', SearchHomePage)
|
|
||||||
trends = URL(u'i/trends\?pc=true&show_context=false&src=search-home&k=(?P<token>.*)', TrendsPage)
|
trends = URL(u'i/trends\?pc=true&show_context=false&src=search-home&k=(?P<token>.*)', TrendsPage)
|
||||||
search = URL(u'i/search/timeline', SearchTimelinePage)
|
search = URL(u'i/search/timeline', SearchTimelinePage)
|
||||||
|
search_page = URL(u'search\?q=(?P<pattern>.+)&src=sprv',
|
||||||
|
u'search-home', SearchPage)
|
||||||
profil = URL(u'i/profiles/show/(?P<path>.+)/timeline/with_replies', HomeTimelinePage)
|
profil = URL(u'i/profiles/show/(?P<path>.+)/timeline/with_replies', HomeTimelinePage)
|
||||||
timeline = URL(u'i/timeline', TimelinePage)
|
timeline = URL(u'i/timeline', TimelinePage)
|
||||||
login = URL(u'', LoginPage)
|
login = URL(u'', LoginPage)
|
||||||
|
|
@ -69,7 +70,7 @@ class TwitterBrowser(LoginBrowser):
|
||||||
if not self.authenticity_token:
|
if not self.authenticity_token:
|
||||||
self.do_login()
|
self.do_login()
|
||||||
|
|
||||||
trends_token = self.search_home.open().get_trends_token()
|
trends_token = self.search_page.open().get_trends_token()
|
||||||
return self.trends.open(token=trends_token).get_trendy_subjects()
|
return self.trends.open(token=trends_token).get_trendy_subjects()
|
||||||
|
|
||||||
@need_login
|
@need_login
|
||||||
|
|
@ -124,7 +125,8 @@ class TwitterBrowser(LoginBrowser):
|
||||||
return self.get_tweets_from_search(u'#%s' % path if not path.startswith('#') else path)
|
return self.get_tweets_from_search(u'#%s' % path if not path.startswith('#') else path)
|
||||||
|
|
||||||
def get_tweets_from_search(self, path):
|
def get_tweets_from_search(self, path):
|
||||||
|
min_position = self.search_page.go(pattern=path).get_min_position()
|
||||||
params = {'q': "%s" % path,
|
params = {'q': "%s" % path,
|
||||||
'src': 'typd',
|
'src': 'sprv'}
|
||||||
'f': 'realtime'}
|
|
||||||
return self.search.go(params=params).iter_threads(params=params)
|
return self.search.go(params=params).iter_threads(params=params, min_position=min_position)
|
||||||
|
|
|
||||||
|
|
@ -50,10 +50,12 @@ class TwitterJsonHTMLPage(JsonPage):
|
||||||
if 'module_html' in self.doc:
|
if 'module_html' in self.doc:
|
||||||
self.doc = html.parse(StringIO(self.doc['module_html']), parser)
|
self.doc = html.parse(StringIO(self.doc['module_html']), parser)
|
||||||
else:
|
else:
|
||||||
if 'scroll_cursor' in self.doc:
|
|
||||||
self.scroll_cursor = self.doc['scroll_cursor']
|
|
||||||
|
|
||||||
self.has_next = self.doc['has_more_items']
|
self.has_next = self.doc['has_more_items']
|
||||||
|
|
||||||
|
self.min_position = None
|
||||||
|
if 'min_position' in self.doc:
|
||||||
|
self.min_position = self.doc['min_position']
|
||||||
|
|
||||||
if self.doc['items_html']:
|
if self.doc['items_html']:
|
||||||
el = html.parse(StringIO(self.doc['items_html']), parser)
|
el = html.parse(StringIO(self.doc['items_html']), parser)
|
||||||
self.doc = el if el.getroot() is not None else html.Element('brinbrin')
|
self.doc = el if el.getroot() is not None else html.Element('brinbrin')
|
||||||
|
|
@ -115,11 +117,14 @@ class ThreadPage(HTMLPage):
|
||||||
obj_date = DatetimeFromTimestamp(Attr('./div/div[@class="stream-item-header"]/small/a/span | ./div/div[@class="ProfileTweet-authorDetails"]/span/a/span', 'data-time'))
|
obj_date = DatetimeFromTimestamp(Attr('./div/div[@class="stream-item-header"]/small/a/span | ./div/div[@class="ProfileTweet-authorDetails"]/span/a/span', 'data-time'))
|
||||||
|
|
||||||
|
|
||||||
class SearchHomePage(HTMLPage):
|
class SearchPage(HTMLPage):
|
||||||
def get_trends_token(self):
|
def get_trends_token(self):
|
||||||
json_data = CleanText('//input[@id="init-data"]/@value')(self.doc)
|
json_data = CleanText('//input[@id="init-data"]/@value')(self.doc)
|
||||||
return json.loads(json_data)['trendsCacheKey']
|
return json.loads(json_data)['trendsCacheKey']
|
||||||
|
|
||||||
|
def get_min_position(self):
|
||||||
|
return CleanText('//div[@class="stream-container "]/@data-min-position')(self.doc)
|
||||||
|
|
||||||
|
|
||||||
class TrendsPage(TwitterJsonHTMLPage):
|
class TrendsPage(TwitterJsonHTMLPage):
|
||||||
|
|
||||||
|
|
@ -180,7 +185,10 @@ class SearchTimelinePage(TwitterJsonHTMLPage):
|
||||||
|
|
||||||
def next_page(self):
|
def next_page(self):
|
||||||
params = self.env['params']
|
params = self.env['params']
|
||||||
params['scroll_cursor'] = self.page.scroll_cursor
|
params['max_position'] = self.page.min_position
|
||||||
|
if 'min_position' in self.env and not params['max_position']:
|
||||||
|
params['max_position'] = self.env['min_position']
|
||||||
|
|
||||||
if self.page.has_next:
|
if self.page.has_next:
|
||||||
return u'%s?%s' % (self.page.url.split('?')[0], urllib.urlencode(params))
|
return u'%s?%s' % (self.page.url.split('?')[0], urllib.urlencode(params))
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue