diff --git a/modules/twitter/browser.py b/modules/twitter/browser.py index fb1d2e5e..41895477 100644 --- a/modules/twitter/browser.py +++ b/modules/twitter/browser.py @@ -20,7 +20,7 @@ from weboob.tools.browser2 import LoginBrowser, URL, need_login from weboob.tools.browser import BrowserIncorrectPassword from weboob.capabilities.messages import Message -from .pages import LoginPage, LoginErrorPage, ThreadPage, TwitterBasePage, Tweet, TrendsPage +from .pages import LoginPage, LoginErrorPage, ThreadPage, TwitterBasePage, Tweet, TrendsPage, TimelinePage __all__ = ['TwitterBrowser'] @@ -36,6 +36,7 @@ class TwitterBrowser(LoginBrowser): hashtag = URL(u'hashtag/(?P.+)', TwitterBasePage) search = URL(u'search\?q="(?P.+)"', TwitterBasePage) profil = URL(u'(?P.+)/with_replies', TwitterBasePage) + timeline = URL(u'i/timeline', TimelinePage) login = URL(u'', LoginPage) def do_login(self): @@ -53,7 +54,7 @@ class TwitterBrowser(LoginBrowser): @need_login def iter_threads(self): - return self.login.stay_or_go().iter_threads() + return self.timeline.go().iter_threads() def get_trendy_subjects(self): if self.username: diff --git a/modules/twitter/pages.py b/modules/twitter/pages.py index f1d29c1b..ad145b17 100644 --- a/modules/twitter/pages.py +++ b/modules/twitter/pages.py @@ -22,11 +22,11 @@ from weboob.tools.date import DATE_TRANSLATE_FR from io import StringIO import lxml.html as html -from weboob.tools.browser2.page import HTMLPage, JsonPage, method, ListElement, ItemElement, FormNotFound +from weboob.tools.browser2.page import HTMLPage, JsonPage, method, ListElement, ItemElement, FormNotFound, pagination from weboob.tools.browser2.filters import CleanText, Format, Link, Regexp, Env, DateTime, Attr, Filter from weboob.capabilities.messages import Thread, Message from weboob.capabilities.base import CapBaseObject -__all__ = ['LoginPage', 'LoginErrorPage', 'ThreadPage', 'TwitterBasePage', 'Tweet', 'TrendsPage'] +__all__ = ['LoginPage', 'LoginErrorPage', 'ThreadPage', 'TwitterBasePage', 'Tweet', 'TrendsPage', 'TimelinePage'] class DatetimeFromTimestamp(Filter): @@ -34,15 +34,20 @@ class DatetimeFromTimestamp(Filter): return datetime.fromtimestamp(float(el)) -class TwitterJsonHMLPage(JsonPage): +class TwitterJsonHTMLPage(JsonPage): ENCODING = None + has_next = None def __init__(self, browser, response, *args, **kwargs): - super(TwitterJsonHMLPage, self).__init__(browser, response, *args, **kwargs) + super(TwitterJsonHTMLPage, self).__init__(browser, response, *args, **kwargs) self.encoding = self.ENCODING or response.encoding parser = html.HTMLParser(encoding=self.encoding) - self.doc = html.parse(StringIO(self.doc['module_html']), parser) + if hasattr(self.doc, 'module_html'): + self.doc = html.parse(StringIO(self.doc['module_html']), parser) + else: + self.has_next = self.doc['has_more_items'] + self.doc = html.parse(StringIO(self.doc['items_html']), parser) class TwitterBasePage(HTMLPage): @@ -79,19 +84,6 @@ class LoginPage(TwitterBasePage): def get_me(self): return Regexp(Link('//a[@data-nav="profile"]'), '/(.+)')(self.doc) - @method - class iter_threads(ListElement): - item_xpath = '//li[@data-item-type="tweet"]/div' - - class item(ItemElement): - klass = Thread - - obj_id = Regexp(Link('./div/div/a[@class="details with-icn js-details"]'), '/(.+)/status/(.+)', '\\1#\\2') - obj_title = Format('%s \n\t %s', - CleanText('./div/div[@class="stream-item-header"]/a'), - CleanText('./div/p')) - obj_date = DatetimeFromTimestamp(Attr('./div/div[@class="stream-item-header"]/small/a/span', 'data-time')) - class ThreadPage(HTMLPage): @@ -122,7 +114,7 @@ class ThreadPage(HTMLPage): obj_date = DatetimeFromTimestamp(Attr('./div/div[@class="stream-item-header"]/small/a/span', 'data-time')) -class TrendsPage(TwitterJsonHMLPage): +class TrendsPage(TwitterJsonHTMLPage): @method class get_trendy_subjects(ListElement): @@ -134,6 +126,26 @@ class TrendsPage(TwitterJsonHMLPage): obj_id = Attr('.', 'data-trend-name') +class TimelinePage(TwitterJsonHTMLPage): + @pagination + @method + class iter_threads(ListElement): + item_xpath = '//*[@data-item-type="tweet"]/div' + + def next_page(self): + if self.page.has_next: + return u'https://twitter.com/i/timeline?max_position=%s' % self.objects.keys()[-1].split('#')[-1] + + class item(ItemElement): + klass = Thread + + obj_id = Regexp(Link('./div/div/a[@class="details with-icn js-details"]|./div/div/span/a[@class="ProfileTweet-timestamp js-permalink js-nav js-tooltip"]'), '/(.+)/status/(.+)', '\\1#\\2') + obj_title = Format('%s \n\t %s', + CleanText('./div/div[@class="stream-item-header"]/a|./div/div[@class="ProfileTweet-authorDetails"]/a'), + CleanText('./div/p')) + obj_date = DatetimeFromTimestamp(Attr('./div/div[@class="stream-item-header"]/small/a/span|./div/div/span/a[@class="ProfileTweet-timestamp js-permalink js-nav js-tooltip"]/span', 'data-time')) + + class LoginErrorPage(HTMLPage): pass