[twitter] follow timelines in search requests

This commit is contained in:
Bezleputh 2014-06-01 16:37:37 +02:00
commit a8fad76245
2 changed files with 50 additions and 22 deletions

View file

@ -20,7 +20,8 @@
from weboob.tools.browser2 import LoginBrowser, URL, need_login from weboob.tools.browser2 import LoginBrowser, URL, need_login
from weboob.tools.browser import BrowserIncorrectPassword from weboob.tools.browser import BrowserIncorrectPassword
from weboob.capabilities.messages import Message from weboob.capabilities.messages import Message
from .pages import LoginPage, LoginErrorPage, ThreadPage, TwitterBasePage, Tweet, TrendsPage, TimelinePage, HomeTimelinePage from .pages import LoginPage, LoginErrorPage, ThreadPage, TwitterBasePage, Tweet, TrendsPage,\
TimelinePage, HomeTimelinePage, SearchTimelinePage
__all__ = ['TwitterBrowser'] __all__ = ['TwitterBrowser']
@ -34,8 +35,8 @@ class TwitterBrowser(LoginBrowser):
tweet = URL(u'i/tweet/create', Tweet) tweet = URL(u'i/tweet/create', Tweet)
trends = URL(u'trends', TrendsPage) trends = URL(u'trends', TrendsPage)
hashtag = URL(u'hashtag/(?P<path>.+)\?f=realtime', TwitterBasePage) hashtag = URL(u'hashtag/(?P<path>.+)\?f=realtime', TwitterBasePage)
search = URL(u'search\?q="(?P<path>.+)&f=realtime&src=typd"', TwitterBasePage) search = URL(u'i/search/timeline', SearchTimelinePage)
profil = URL(u'i/profiles/show/(?P<path>.+)/timeline', HomeTimelinePage) profil = URL(u'i/profiles/show/(?P<path>.+)/timeline/with_replies', HomeTimelinePage)
timeline = URL(u'i/timeline', TimelinePage) timeline = URL(u'i/timeline', TimelinePage)
login = URL(u'', LoginPage) login = URL(u'', LoginPage)
@ -117,4 +118,7 @@ class TwitterBrowser(LoginBrowser):
return self.hashtag.go(path=path.lstrip('#')).iter_threads() return self.hashtag.go(path=path.lstrip('#')).iter_threads()
def get_tweets_from_search(self, path): def get_tweets_from_search(self, path):
return self.search.go(path=path).iter_threads() params = {'q': "%s" % path,
'src': 'typd',
'f': 'realtime'}
return self.search.go(params=params).iter_threads(params=params)

View file

@ -21,12 +21,13 @@ from datetime import datetime
from weboob.tools.date import DATE_TRANSLATE_FR from weboob.tools.date import DATE_TRANSLATE_FR
from io import StringIO from io import StringIO
import lxml.html as html import lxml.html as html
import urllib
from weboob.tools.browser2.page import HTMLPage, JsonPage, method, ListElement, ItemElement, FormNotFound, pagination from weboob.tools.browser2.page import HTMLPage, JsonPage, method, ListElement, ItemElement, FormNotFound, pagination
from weboob.tools.browser2.filters import CleanText, Format, Link, Regexp, Env, DateTime, Attr, Filter from weboob.tools.browser2.filters import CleanText, Format, Link, Regexp, Env, DateTime, Attr, Filter
from weboob.capabilities.messages import Thread, Message from weboob.capabilities.messages import Thread, Message
from weboob.capabilities.base import CapBaseObject from weboob.capabilities.base import CapBaseObject
__all__ = ['LoginPage', 'LoginErrorPage', 'ThreadPage', 'TwitterBasePage', 'Tweet', 'TrendsPage', 'TimelinePage', 'HomeTimelinePage'] __all__ = ['LoginPage', 'LoginErrorPage', 'ThreadPage', 'TwitterBasePage', 'Tweet', 'TrendsPage', 'TimelinePage', 'HomeTimelinePage', 'SearchTimeLinePage']
class DatetimeFromTimestamp(Filter): class DatetimeFromTimestamp(Filter):
@ -38,14 +39,18 @@ class TwitterJsonHTMLPage(JsonPage):
ENCODING = None ENCODING = None
has_next = None has_next = None
scroll_cursor = None
def __init__(self, browser, response, *args, **kwargs): def __init__(self, browser, response, *args, **kwargs):
super(TwitterJsonHTMLPage, self).__init__(browser, response, *args, **kwargs) super(TwitterJsonHTMLPage, self).__init__(browser, response, *args, **kwargs)
self.encoding = self.ENCODING or response.encoding self.encoding = self.ENCODING or response.encoding
parser = html.HTMLParser(encoding=self.encoding) parser = html.HTMLParser(encoding=self.encoding)
if hasattr(self.doc, 'module_html'): if 'module_html' in self.doc:
self.doc = html.parse(StringIO(self.doc['module_html']), parser) self.doc = html.parse(StringIO(self.doc['module_html']), parser)
else: else:
if 'scroll_cursor' in self.doc:
self.scroll_cursor = self.doc['scroll_cursor']
self.has_next = self.doc['has_more_items'] self.has_next = self.doc['has_more_items']
self.doc = html.parse(StringIO(self.doc['items_html']), parser) self.doc = html.parse(StringIO(self.doc['items_html']), parser)
@ -131,16 +136,9 @@ class TrendsPage(TwitterJsonHTMLPage):
obj_id = Attr('.', 'data-trend-name') obj_id = Attr('.', 'data-trend-name')
class TimelinePage(TwitterJsonHTMLPage): class TimelineListElement(ListElement):
@pagination
@method
class iter_threads(ListElement):
item_xpath = '//*[@data-item-type="tweet"]/div' item_xpath = '//*[@data-item-type="tweet"]/div'
def next_page(self):
if self.page.has_next:
return u'%s?max_position=%s' % (self.page.url.split('?')[0], self.get_last_id())
def get_last_id(self): def get_last_id(self):
_el = self.page.doc.xpath('//*[@data-item-type="tweet"]/div')[-1] _el = self.page.doc.xpath('//*[@data-item-type="tweet"]/div')[-1]
return Regexp(Link('./div/div/a[@class="details with-icn js-details"]|./div/div/span/a[@class="ProfileTweet-timestamp js-permalink js-nav js-tooltip"]'), '/.+/status/(.+)')(_el) return Regexp(Link('./div/div/a[@class="details with-icn js-details"]|./div/div/span/a[@class="ProfileTweet-timestamp js-permalink js-nav js-tooltip"]'), '/.+/status/(.+)')(_el)
@ -157,12 +155,38 @@ class TimelinePage(TwitterJsonHTMLPage):
obj_date = DatetimeFromTimestamp(Attr('./div/div[@class="stream-item-header"]/small/a/span|./div/div/span/a[@class="ProfileTweet-timestamp js-permalink js-nav js-tooltip"]/span', 'data-time')) obj_date = DatetimeFromTimestamp(Attr('./div/div[@class="stream-item-header"]/small/a/span|./div/div/span/a[@class="ProfileTweet-timestamp js-permalink js-nav js-tooltip"]/span', 'data-time'))
class HomeTimelinePage(TimelinePage): class TimelinePage(TwitterJsonHTMLPage):
@pagination
@method
class iter_threads(TimelineListElement):
def next_page(self):
if self.page.has_next:
return u'%s?max_position=%s' % (self.page.url.split('?')[0], self.get_last_id())
class HomeTimelinePage(TwitterJsonHTMLPage):
@pagination
@method
class iter_threads(TimelineListElement):
def next_page(self): def next_page(self):
if self.page.has_next: if self.page.has_next:
return u'%s?max_id=%s' % (self.page.url.split('?')[0], self.get_last_id()) return u'%s?max_id=%s' % (self.page.url.split('?')[0], self.get_last_id())
class SearchTimelinePage(TwitterJsonHTMLPage):
@pagination
@method
class iter_threads(TimelineListElement):
def next_page(self):
params = self.env['params']
params['scroll_cursor'] = self.page.scroll_cursor
if self.page.has_next:
return u'%s?%s' % (self.page.url.split('?')[0], urllib.urlencode(params))
class LoginErrorPage(HTMLPage): class LoginErrorPage(HTMLPage):
pass pass