[twitter] follow timelines in search requests

This commit is contained in:
Bezleputh 2014-06-01 16:37:37 +02:00
commit a8fad76245
2 changed files with 50 additions and 22 deletions

View file

@ -20,7 +20,8 @@
from weboob.tools.browser2 import LoginBrowser, URL, need_login
from weboob.tools.browser import BrowserIncorrectPassword
from weboob.capabilities.messages import Message
from .pages import LoginPage, LoginErrorPage, ThreadPage, TwitterBasePage, Tweet, TrendsPage, TimelinePage, HomeTimelinePage
from .pages import LoginPage, LoginErrorPage, ThreadPage, TwitterBasePage, Tweet, TrendsPage,\
TimelinePage, HomeTimelinePage, SearchTimelinePage
__all__ = ['TwitterBrowser']
@ -34,8 +35,8 @@ class TwitterBrowser(LoginBrowser):
tweet = URL(u'i/tweet/create', Tweet)
trends = URL(u'trends', TrendsPage)
hashtag = URL(u'hashtag/(?P<path>.+)\?f=realtime', TwitterBasePage)
search = URL(u'search\?q="(?P<path>.+)&f=realtime&src=typd"', TwitterBasePage)
profil = URL(u'i/profiles/show/(?P<path>.+)/timeline', HomeTimelinePage)
search = URL(u'i/search/timeline', SearchTimelinePage)
profil = URL(u'i/profiles/show/(?P<path>.+)/timeline/with_replies', HomeTimelinePage)
timeline = URL(u'i/timeline', TimelinePage)
login = URL(u'', LoginPage)
@ -117,4 +118,7 @@ class TwitterBrowser(LoginBrowser):
return self.hashtag.go(path=path.lstrip('#')).iter_threads()
def get_tweets_from_search(self, path):
return self.search.go(path=path).iter_threads()
params = {'q': "%s" % path,
'src': 'typd',
'f': 'realtime'}
return self.search.go(params=params).iter_threads(params=params)

View file

@ -21,12 +21,13 @@ from datetime import datetime
from weboob.tools.date import DATE_TRANSLATE_FR
from io import StringIO
import lxml.html as html
import urllib
from weboob.tools.browser2.page import HTMLPage, JsonPage, method, ListElement, ItemElement, FormNotFound, pagination
from weboob.tools.browser2.filters import CleanText, Format, Link, Regexp, Env, DateTime, Attr, Filter
from weboob.capabilities.messages import Thread, Message
from weboob.capabilities.base import CapBaseObject
__all__ = ['LoginPage', 'LoginErrorPage', 'ThreadPage', 'TwitterBasePage', 'Tweet', 'TrendsPage', 'TimelinePage', 'HomeTimelinePage']
__all__ = ['LoginPage', 'LoginErrorPage', 'ThreadPage', 'TwitterBasePage', 'Tweet', 'TrendsPage', 'TimelinePage', 'HomeTimelinePage', 'SearchTimeLinePage']
class DatetimeFromTimestamp(Filter):
@ -38,14 +39,18 @@ class TwitterJsonHTMLPage(JsonPage):
ENCODING = None
has_next = None
scroll_cursor = None
def __init__(self, browser, response, *args, **kwargs):
super(TwitterJsonHTMLPage, self).__init__(browser, response, *args, **kwargs)
self.encoding = self.ENCODING or response.encoding
parser = html.HTMLParser(encoding=self.encoding)
if hasattr(self.doc, 'module_html'):
if 'module_html' in self.doc:
self.doc = html.parse(StringIO(self.doc['module_html']), parser)
else:
if 'scroll_cursor' in self.doc:
self.scroll_cursor = self.doc['scroll_cursor']
self.has_next = self.doc['has_more_items']
self.doc = html.parse(StringIO(self.doc['items_html']), parser)
@ -131,38 +136,57 @@ class TrendsPage(TwitterJsonHTMLPage):
obj_id = Attr('.', 'data-trend-name')
class TimelineListElement(ListElement):
item_xpath = '//*[@data-item-type="tweet"]/div'
def get_last_id(self):
_el = self.page.doc.xpath('//*[@data-item-type="tweet"]/div')[-1]
return Regexp(Link('./div/div/a[@class="details with-icn js-details"]|./div/div/span/a[@class="ProfileTweet-timestamp js-permalink js-nav js-tooltip"]'), '/.+/status/(.+)')(_el)
class item(ItemElement):
klass = Thread
obj_id = Regexp(Link('./div/div/a[@class="details with-icn js-details"]|./div/div/span/a[@class="ProfileTweet-timestamp js-permalink js-nav js-tooltip"]'), '/(.+)/status/(.+)', '\\1#\\2')
obj_title = Format('%s \n\t %s',
CleanText('./div/div[@class="stream-item-header"]/a|./div/div[@class="ProfileTweet-authorDetails"]/a',
replace=[('@ ', '@'), ('# ', '#'), ('http:// ', 'http://')]),
CleanText('./div/p',
replace=[('@ ', '@'), ('# ', '#'), ('http:// ', 'http://')]))
obj_date = DatetimeFromTimestamp(Attr('./div/div[@class="stream-item-header"]/small/a/span|./div/div/span/a[@class="ProfileTweet-timestamp js-permalink js-nav js-tooltip"]/span', 'data-time'))
class TimelinePage(TwitterJsonHTMLPage):
@pagination
@method
class iter_threads(ListElement):
item_xpath = '//*[@data-item-type="tweet"]/div'
class iter_threads(TimelineListElement):
def next_page(self):
if self.page.has_next:
return u'%s?max_position=%s' % (self.page.url.split('?')[0], self.get_last_id())
def get_last_id(self):
_el = self.page.doc.xpath('//*[@data-item-type="tweet"]/div')[-1]
return Regexp(Link('./div/div/a[@class="details with-icn js-details"]|./div/div/span/a[@class="ProfileTweet-timestamp js-permalink js-nav js-tooltip"]'), '/.+/status/(.+)')(_el)
class item(ItemElement):
klass = Thread
class HomeTimelinePage(TwitterJsonHTMLPage):
@pagination
@method
class iter_threads(TimelineListElement):
obj_id = Regexp(Link('./div/div/a[@class="details with-icn js-details"]|./div/div/span/a[@class="ProfileTweet-timestamp js-permalink js-nav js-tooltip"]'), '/(.+)/status/(.+)', '\\1#\\2')
obj_title = Format('%s \n\t %s',
CleanText('./div/div[@class="stream-item-header"]/a|./div/div[@class="ProfileTweet-authorDetails"]/a',
replace=[('@ ', '@'), ('# ', '#'), ('http:// ', 'http://')]),
CleanText('./div/p',
replace=[('@ ', '@'), ('# ', '#'), ('http:// ', 'http://')]))
obj_date = DatetimeFromTimestamp(Attr('./div/div[@class="stream-item-header"]/small/a/span|./div/div/span/a[@class="ProfileTweet-timestamp js-permalink js-nav js-tooltip"]/span', 'data-time'))
class HomeTimelinePage(TimelinePage):
def next_page(self):
if self.page.has_next:
return u'%s?max_id=%s' % (self.page.url.split('?')[0], self.get_last_id())
class SearchTimelinePage(TwitterJsonHTMLPage):
@pagination
@method
class iter_threads(TimelineListElement):
def next_page(self):
params = self.env['params']
params['scroll_cursor'] = self.page.scroll_cursor
if self.page.has_next:
return u'%s?%s' % (self.page.url.split('?')[0], urllib.urlencode(params))
class LoginErrorPage(HTMLPage):
pass