[twitter] add pagination on timeline

This commit is contained in:
Bezleputh 2014-05-30 22:54:44 +02:00
commit 169b75b72f
2 changed files with 34 additions and 21 deletions

View file

@ -20,7 +20,7 @@
from weboob.tools.browser2 import LoginBrowser, URL, need_login
from weboob.tools.browser import BrowserIncorrectPassword
from weboob.capabilities.messages import Message
from .pages import LoginPage, LoginErrorPage, ThreadPage, TwitterBasePage, Tweet, TrendsPage
from .pages import LoginPage, LoginErrorPage, ThreadPage, TwitterBasePage, Tweet, TrendsPage, TimelinePage
__all__ = ['TwitterBrowser']
@ -36,6 +36,7 @@ class TwitterBrowser(LoginBrowser):
hashtag = URL(u'hashtag/(?P<path>.+)', TwitterBasePage)
search = URL(u'search\?q="(?P<path>.+)"', TwitterBasePage)
profil = URL(u'(?P<path>.+)/with_replies', TwitterBasePage)
timeline = URL(u'i/timeline', TimelinePage)
login = URL(u'', LoginPage)
def do_login(self):
@ -53,7 +54,7 @@ class TwitterBrowser(LoginBrowser):
@need_login
def iter_threads(self):
return self.login.stay_or_go().iter_threads()
return self.timeline.go().iter_threads()
def get_trendy_subjects(self):
if self.username:

View file

@ -22,11 +22,11 @@ from weboob.tools.date import DATE_TRANSLATE_FR
from io import StringIO
import lxml.html as html
from weboob.tools.browser2.page import HTMLPage, JsonPage, method, ListElement, ItemElement, FormNotFound
from weboob.tools.browser2.page import HTMLPage, JsonPage, method, ListElement, ItemElement, FormNotFound, pagination
from weboob.tools.browser2.filters import CleanText, Format, Link, Regexp, Env, DateTime, Attr, Filter
from weboob.capabilities.messages import Thread, Message
from weboob.capabilities.base import CapBaseObject
__all__ = ['LoginPage', 'LoginErrorPage', 'ThreadPage', 'TwitterBasePage', 'Tweet', 'TrendsPage']
__all__ = ['LoginPage', 'LoginErrorPage', 'ThreadPage', 'TwitterBasePage', 'Tweet', 'TrendsPage', 'TimelinePage']
class DatetimeFromTimestamp(Filter):
@ -34,15 +34,20 @@ class DatetimeFromTimestamp(Filter):
return datetime.fromtimestamp(float(el))
class TwitterJsonHMLPage(JsonPage):
class TwitterJsonHTMLPage(JsonPage):
ENCODING = None
has_next = None
def __init__(self, browser, response, *args, **kwargs):
super(TwitterJsonHMLPage, self).__init__(browser, response, *args, **kwargs)
super(TwitterJsonHTMLPage, self).__init__(browser, response, *args, **kwargs)
self.encoding = self.ENCODING or response.encoding
parser = html.HTMLParser(encoding=self.encoding)
self.doc = html.parse(StringIO(self.doc['module_html']), parser)
if hasattr(self.doc, 'module_html'):
self.doc = html.parse(StringIO(self.doc['module_html']), parser)
else:
self.has_next = self.doc['has_more_items']
self.doc = html.parse(StringIO(self.doc['items_html']), parser)
class TwitterBasePage(HTMLPage):
@ -79,19 +84,6 @@ class LoginPage(TwitterBasePage):
def get_me(self):
return Regexp(Link('//a[@data-nav="profile"]'), '/(.+)')(self.doc)
@method
class iter_threads(ListElement):
item_xpath = '//li[@data-item-type="tweet"]/div'
class item(ItemElement):
klass = Thread
obj_id = Regexp(Link('./div/div/a[@class="details with-icn js-details"]'), '/(.+)/status/(.+)', '\\1#\\2')
obj_title = Format('%s \n\t %s',
CleanText('./div/div[@class="stream-item-header"]/a'),
CleanText('./div/p'))
obj_date = DatetimeFromTimestamp(Attr('./div/div[@class="stream-item-header"]/small/a/span', 'data-time'))
class ThreadPage(HTMLPage):
@ -122,7 +114,7 @@ class ThreadPage(HTMLPage):
obj_date = DatetimeFromTimestamp(Attr('./div/div[@class="stream-item-header"]/small/a/span', 'data-time'))
class TrendsPage(TwitterJsonHMLPage):
class TrendsPage(TwitterJsonHTMLPage):
@method
class get_trendy_subjects(ListElement):
@ -134,6 +126,26 @@ class TrendsPage(TwitterJsonHMLPage):
obj_id = Attr('.', 'data-trend-name')
class TimelinePage(TwitterJsonHTMLPage):
@pagination
@method
class iter_threads(ListElement):
item_xpath = '//*[@data-item-type="tweet"]/div'
def next_page(self):
if self.page.has_next:
return u'https://twitter.com/i/timeline?max_position=%s' % self.objects.keys()[-1].split('#')[-1]
class item(ItemElement):
klass = Thread
obj_id = Regexp(Link('./div/div/a[@class="details with-icn js-details"]|./div/div/span/a[@class="ProfileTweet-timestamp js-permalink js-nav js-tooltip"]'), '/(.+)/status/(.+)', '\\1#\\2')
obj_title = Format('%s \n\t %s',
CleanText('./div/div[@class="stream-item-header"]/a|./div/div[@class="ProfileTweet-authorDetails"]/a'),
CleanText('./div/p'))
obj_date = DatetimeFromTimestamp(Attr('./div/div[@class="stream-item-header"]/small/a/span|./div/div/span/a[@class="ProfileTweet-timestamp js-permalink js-nav js-tooltip"]/span', 'data-time'))
class LoginErrorPage(HTMLPage):
pass