[twitter] follow timelines in search requests
This commit is contained in:
parent
c20e6123fc
commit
a8fad76245
2 changed files with 50 additions and 22 deletions
|
|
@ -20,7 +20,8 @@
|
|||
from weboob.tools.browser2 import LoginBrowser, URL, need_login
|
||||
from weboob.tools.browser import BrowserIncorrectPassword
|
||||
from weboob.capabilities.messages import Message
|
||||
from .pages import LoginPage, LoginErrorPage, ThreadPage, TwitterBasePage, Tweet, TrendsPage, TimelinePage, HomeTimelinePage
|
||||
from .pages import LoginPage, LoginErrorPage, ThreadPage, TwitterBasePage, Tweet, TrendsPage,\
|
||||
TimelinePage, HomeTimelinePage, SearchTimelinePage
|
||||
|
||||
|
||||
__all__ = ['TwitterBrowser']
|
||||
|
|
@ -34,8 +35,8 @@ class TwitterBrowser(LoginBrowser):
|
|||
tweet = URL(u'i/tweet/create', Tweet)
|
||||
trends = URL(u'trends', TrendsPage)
|
||||
hashtag = URL(u'hashtag/(?P<path>.+)\?f=realtime', TwitterBasePage)
|
||||
search = URL(u'search\?q="(?P<path>.+)&f=realtime&src=typd"', TwitterBasePage)
|
||||
profil = URL(u'i/profiles/show/(?P<path>.+)/timeline', HomeTimelinePage)
|
||||
search = URL(u'i/search/timeline', SearchTimelinePage)
|
||||
profil = URL(u'i/profiles/show/(?P<path>.+)/timeline/with_replies', HomeTimelinePage)
|
||||
timeline = URL(u'i/timeline', TimelinePage)
|
||||
login = URL(u'', LoginPage)
|
||||
|
||||
|
|
@ -117,4 +118,7 @@ class TwitterBrowser(LoginBrowser):
|
|||
return self.hashtag.go(path=path.lstrip('#')).iter_threads()
|
||||
|
||||
def get_tweets_from_search(self, path):
|
||||
return self.search.go(path=path).iter_threads()
|
||||
params = {'q': "%s" % path,
|
||||
'src': 'typd',
|
||||
'f': 'realtime'}
|
||||
return self.search.go(params=params).iter_threads(params=params)
|
||||
|
|
|
|||
|
|
@ -21,12 +21,13 @@ from datetime import datetime
|
|||
from weboob.tools.date import DATE_TRANSLATE_FR
|
||||
from io import StringIO
|
||||
import lxml.html as html
|
||||
import urllib
|
||||
|
||||
from weboob.tools.browser2.page import HTMLPage, JsonPage, method, ListElement, ItemElement, FormNotFound, pagination
|
||||
from weboob.tools.browser2.filters import CleanText, Format, Link, Regexp, Env, DateTime, Attr, Filter
|
||||
from weboob.capabilities.messages import Thread, Message
|
||||
from weboob.capabilities.base import CapBaseObject
|
||||
__all__ = ['LoginPage', 'LoginErrorPage', 'ThreadPage', 'TwitterBasePage', 'Tweet', 'TrendsPage', 'TimelinePage', 'HomeTimelinePage']
|
||||
__all__ = ['LoginPage', 'LoginErrorPage', 'ThreadPage', 'TwitterBasePage', 'Tweet', 'TrendsPage', 'TimelinePage', 'HomeTimelinePage', 'SearchTimeLinePage']
|
||||
|
||||
|
||||
class DatetimeFromTimestamp(Filter):
|
||||
|
|
@ -38,14 +39,18 @@ class TwitterJsonHTMLPage(JsonPage):
|
|||
|
||||
ENCODING = None
|
||||
has_next = None
|
||||
scroll_cursor = None
|
||||
|
||||
def __init__(self, browser, response, *args, **kwargs):
|
||||
super(TwitterJsonHTMLPage, self).__init__(browser, response, *args, **kwargs)
|
||||
self.encoding = self.ENCODING or response.encoding
|
||||
parser = html.HTMLParser(encoding=self.encoding)
|
||||
if hasattr(self.doc, 'module_html'):
|
||||
if 'module_html' in self.doc:
|
||||
self.doc = html.parse(StringIO(self.doc['module_html']), parser)
|
||||
else:
|
||||
if 'scroll_cursor' in self.doc:
|
||||
self.scroll_cursor = self.doc['scroll_cursor']
|
||||
|
||||
self.has_next = self.doc['has_more_items']
|
||||
self.doc = html.parse(StringIO(self.doc['items_html']), parser)
|
||||
|
||||
|
|
@ -131,38 +136,57 @@ class TrendsPage(TwitterJsonHTMLPage):
|
|||
obj_id = Attr('.', 'data-trend-name')
|
||||
|
||||
|
||||
class TimelineListElement(ListElement):
|
||||
item_xpath = '//*[@data-item-type="tweet"]/div'
|
||||
|
||||
def get_last_id(self):
|
||||
_el = self.page.doc.xpath('//*[@data-item-type="tweet"]/div')[-1]
|
||||
return Regexp(Link('./div/div/a[@class="details with-icn js-details"]|./div/div/span/a[@class="ProfileTweet-timestamp js-permalink js-nav js-tooltip"]'), '/.+/status/(.+)')(_el)
|
||||
|
||||
class item(ItemElement):
|
||||
klass = Thread
|
||||
|
||||
obj_id = Regexp(Link('./div/div/a[@class="details with-icn js-details"]|./div/div/span/a[@class="ProfileTweet-timestamp js-permalink js-nav js-tooltip"]'), '/(.+)/status/(.+)', '\\1#\\2')
|
||||
obj_title = Format('%s \n\t %s',
|
||||
CleanText('./div/div[@class="stream-item-header"]/a|./div/div[@class="ProfileTweet-authorDetails"]/a',
|
||||
replace=[('@ ', '@'), ('# ', '#'), ('http:// ', 'http://')]),
|
||||
CleanText('./div/p',
|
||||
replace=[('@ ', '@'), ('# ', '#'), ('http:// ', 'http://')]))
|
||||
obj_date = DatetimeFromTimestamp(Attr('./div/div[@class="stream-item-header"]/small/a/span|./div/div/span/a[@class="ProfileTweet-timestamp js-permalink js-nav js-tooltip"]/span', 'data-time'))
|
||||
|
||||
|
||||
class TimelinePage(TwitterJsonHTMLPage):
|
||||
@pagination
|
||||
@method
|
||||
class iter_threads(ListElement):
|
||||
item_xpath = '//*[@data-item-type="tweet"]/div'
|
||||
class iter_threads(TimelineListElement):
|
||||
|
||||
def next_page(self):
|
||||
if self.page.has_next:
|
||||
return u'%s?max_position=%s' % (self.page.url.split('?')[0], self.get_last_id())
|
||||
|
||||
def get_last_id(self):
|
||||
_el = self.page.doc.xpath('//*[@data-item-type="tweet"]/div')[-1]
|
||||
return Regexp(Link('./div/div/a[@class="details with-icn js-details"]|./div/div/span/a[@class="ProfileTweet-timestamp js-permalink js-nav js-tooltip"]'), '/.+/status/(.+)')(_el)
|
||||
|
||||
class item(ItemElement):
|
||||
klass = Thread
|
||||
class HomeTimelinePage(TwitterJsonHTMLPage):
|
||||
@pagination
|
||||
@method
|
||||
class iter_threads(TimelineListElement):
|
||||
|
||||
obj_id = Regexp(Link('./div/div/a[@class="details with-icn js-details"]|./div/div/span/a[@class="ProfileTweet-timestamp js-permalink js-nav js-tooltip"]'), '/(.+)/status/(.+)', '\\1#\\2')
|
||||
obj_title = Format('%s \n\t %s',
|
||||
CleanText('./div/div[@class="stream-item-header"]/a|./div/div[@class="ProfileTweet-authorDetails"]/a',
|
||||
replace=[('@ ', '@'), ('# ', '#'), ('http:// ', 'http://')]),
|
||||
CleanText('./div/p',
|
||||
replace=[('@ ', '@'), ('# ', '#'), ('http:// ', 'http://')]))
|
||||
obj_date = DatetimeFromTimestamp(Attr('./div/div[@class="stream-item-header"]/small/a/span|./div/div/span/a[@class="ProfileTweet-timestamp js-permalink js-nav js-tooltip"]/span', 'data-time'))
|
||||
|
||||
|
||||
class HomeTimelinePage(TimelinePage):
|
||||
def next_page(self):
|
||||
if self.page.has_next:
|
||||
return u'%s?max_id=%s' % (self.page.url.split('?')[0], self.get_last_id())
|
||||
|
||||
|
||||
class SearchTimelinePage(TwitterJsonHTMLPage):
|
||||
@pagination
|
||||
@method
|
||||
class iter_threads(TimelineListElement):
|
||||
|
||||
def next_page(self):
|
||||
params = self.env['params']
|
||||
params['scroll_cursor'] = self.page.scroll_cursor
|
||||
if self.page.has_next:
|
||||
return u'%s?%s' % (self.page.url.split('?')[0], urllib.urlencode(params))
|
||||
|
||||
|
||||
class LoginErrorPage(HTMLPage):
|
||||
pass
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue