[twitter] add pagination on timeline
This commit is contained in:
parent
9cfdcd30f8
commit
169b75b72f
2 changed files with 34 additions and 21 deletions
|
|
@ -20,7 +20,7 @@
|
||||||
from weboob.tools.browser2 import LoginBrowser, URL, need_login
|
from weboob.tools.browser2 import LoginBrowser, URL, need_login
|
||||||
from weboob.tools.browser import BrowserIncorrectPassword
|
from weboob.tools.browser import BrowserIncorrectPassword
|
||||||
from weboob.capabilities.messages import Message
|
from weboob.capabilities.messages import Message
|
||||||
from .pages import LoginPage, LoginErrorPage, ThreadPage, TwitterBasePage, Tweet, TrendsPage
|
from .pages import LoginPage, LoginErrorPage, ThreadPage, TwitterBasePage, Tweet, TrendsPage, TimelinePage
|
||||||
|
|
||||||
|
|
||||||
__all__ = ['TwitterBrowser']
|
__all__ = ['TwitterBrowser']
|
||||||
|
|
@ -36,6 +36,7 @@ class TwitterBrowser(LoginBrowser):
|
||||||
hashtag = URL(u'hashtag/(?P<path>.+)', TwitterBasePage)
|
hashtag = URL(u'hashtag/(?P<path>.+)', TwitterBasePage)
|
||||||
search = URL(u'search\?q="(?P<path>.+)"', TwitterBasePage)
|
search = URL(u'search\?q="(?P<path>.+)"', TwitterBasePage)
|
||||||
profil = URL(u'(?P<path>.+)/with_replies', TwitterBasePage)
|
profil = URL(u'(?P<path>.+)/with_replies', TwitterBasePage)
|
||||||
|
timeline = URL(u'i/timeline', TimelinePage)
|
||||||
login = URL(u'', LoginPage)
|
login = URL(u'', LoginPage)
|
||||||
|
|
||||||
def do_login(self):
|
def do_login(self):
|
||||||
|
|
@ -53,7 +54,7 @@ class TwitterBrowser(LoginBrowser):
|
||||||
|
|
||||||
@need_login
|
@need_login
|
||||||
def iter_threads(self):
|
def iter_threads(self):
|
||||||
return self.login.stay_or_go().iter_threads()
|
return self.timeline.go().iter_threads()
|
||||||
|
|
||||||
def get_trendy_subjects(self):
|
def get_trendy_subjects(self):
|
||||||
if self.username:
|
if self.username:
|
||||||
|
|
|
||||||
|
|
@ -22,11 +22,11 @@ from weboob.tools.date import DATE_TRANSLATE_FR
|
||||||
from io import StringIO
|
from io import StringIO
|
||||||
import lxml.html as html
|
import lxml.html as html
|
||||||
|
|
||||||
from weboob.tools.browser2.page import HTMLPage, JsonPage, method, ListElement, ItemElement, FormNotFound
|
from weboob.tools.browser2.page import HTMLPage, JsonPage, method, ListElement, ItemElement, FormNotFound, pagination
|
||||||
from weboob.tools.browser2.filters import CleanText, Format, Link, Regexp, Env, DateTime, Attr, Filter
|
from weboob.tools.browser2.filters import CleanText, Format, Link, Regexp, Env, DateTime, Attr, Filter
|
||||||
from weboob.capabilities.messages import Thread, Message
|
from weboob.capabilities.messages import Thread, Message
|
||||||
from weboob.capabilities.base import CapBaseObject
|
from weboob.capabilities.base import CapBaseObject
|
||||||
__all__ = ['LoginPage', 'LoginErrorPage', 'ThreadPage', 'TwitterBasePage', 'Tweet', 'TrendsPage']
|
__all__ = ['LoginPage', 'LoginErrorPage', 'ThreadPage', 'TwitterBasePage', 'Tweet', 'TrendsPage', 'TimelinePage']
|
||||||
|
|
||||||
|
|
||||||
class DatetimeFromTimestamp(Filter):
|
class DatetimeFromTimestamp(Filter):
|
||||||
|
|
@ -34,15 +34,20 @@ class DatetimeFromTimestamp(Filter):
|
||||||
return datetime.fromtimestamp(float(el))
|
return datetime.fromtimestamp(float(el))
|
||||||
|
|
||||||
|
|
||||||
class TwitterJsonHMLPage(JsonPage):
|
class TwitterJsonHTMLPage(JsonPage):
|
||||||
|
|
||||||
ENCODING = None
|
ENCODING = None
|
||||||
|
has_next = None
|
||||||
|
|
||||||
def __init__(self, browser, response, *args, **kwargs):
|
def __init__(self, browser, response, *args, **kwargs):
|
||||||
super(TwitterJsonHMLPage, self).__init__(browser, response, *args, **kwargs)
|
super(TwitterJsonHTMLPage, self).__init__(browser, response, *args, **kwargs)
|
||||||
self.encoding = self.ENCODING or response.encoding
|
self.encoding = self.ENCODING or response.encoding
|
||||||
parser = html.HTMLParser(encoding=self.encoding)
|
parser = html.HTMLParser(encoding=self.encoding)
|
||||||
self.doc = html.parse(StringIO(self.doc['module_html']), parser)
|
if hasattr(self.doc, 'module_html'):
|
||||||
|
self.doc = html.parse(StringIO(self.doc['module_html']), parser)
|
||||||
|
else:
|
||||||
|
self.has_next = self.doc['has_more_items']
|
||||||
|
self.doc = html.parse(StringIO(self.doc['items_html']), parser)
|
||||||
|
|
||||||
|
|
||||||
class TwitterBasePage(HTMLPage):
|
class TwitterBasePage(HTMLPage):
|
||||||
|
|
@ -79,19 +84,6 @@ class LoginPage(TwitterBasePage):
|
||||||
def get_me(self):
|
def get_me(self):
|
||||||
return Regexp(Link('//a[@data-nav="profile"]'), '/(.+)')(self.doc)
|
return Regexp(Link('//a[@data-nav="profile"]'), '/(.+)')(self.doc)
|
||||||
|
|
||||||
@method
|
|
||||||
class iter_threads(ListElement):
|
|
||||||
item_xpath = '//li[@data-item-type="tweet"]/div'
|
|
||||||
|
|
||||||
class item(ItemElement):
|
|
||||||
klass = Thread
|
|
||||||
|
|
||||||
obj_id = Regexp(Link('./div/div/a[@class="details with-icn js-details"]'), '/(.+)/status/(.+)', '\\1#\\2')
|
|
||||||
obj_title = Format('%s \n\t %s',
|
|
||||||
CleanText('./div/div[@class="stream-item-header"]/a'),
|
|
||||||
CleanText('./div/p'))
|
|
||||||
obj_date = DatetimeFromTimestamp(Attr('./div/div[@class="stream-item-header"]/small/a/span', 'data-time'))
|
|
||||||
|
|
||||||
|
|
||||||
class ThreadPage(HTMLPage):
|
class ThreadPage(HTMLPage):
|
||||||
|
|
||||||
|
|
@ -122,7 +114,7 @@ class ThreadPage(HTMLPage):
|
||||||
obj_date = DatetimeFromTimestamp(Attr('./div/div[@class="stream-item-header"]/small/a/span', 'data-time'))
|
obj_date = DatetimeFromTimestamp(Attr('./div/div[@class="stream-item-header"]/small/a/span', 'data-time'))
|
||||||
|
|
||||||
|
|
||||||
class TrendsPage(TwitterJsonHMLPage):
|
class TrendsPage(TwitterJsonHTMLPage):
|
||||||
|
|
||||||
@method
|
@method
|
||||||
class get_trendy_subjects(ListElement):
|
class get_trendy_subjects(ListElement):
|
||||||
|
|
@ -134,6 +126,26 @@ class TrendsPage(TwitterJsonHMLPage):
|
||||||
obj_id = Attr('.', 'data-trend-name')
|
obj_id = Attr('.', 'data-trend-name')
|
||||||
|
|
||||||
|
|
||||||
|
class TimelinePage(TwitterJsonHTMLPage):
|
||||||
|
@pagination
|
||||||
|
@method
|
||||||
|
class iter_threads(ListElement):
|
||||||
|
item_xpath = '//*[@data-item-type="tweet"]/div'
|
||||||
|
|
||||||
|
def next_page(self):
|
||||||
|
if self.page.has_next:
|
||||||
|
return u'https://twitter.com/i/timeline?max_position=%s' % self.objects.keys()[-1].split('#')[-1]
|
||||||
|
|
||||||
|
class item(ItemElement):
|
||||||
|
klass = Thread
|
||||||
|
|
||||||
|
obj_id = Regexp(Link('./div/div/a[@class="details with-icn js-details"]|./div/div/span/a[@class="ProfileTweet-timestamp js-permalink js-nav js-tooltip"]'), '/(.+)/status/(.+)', '\\1#\\2')
|
||||||
|
obj_title = Format('%s \n\t %s',
|
||||||
|
CleanText('./div/div[@class="stream-item-header"]/a|./div/div[@class="ProfileTweet-authorDetails"]/a'),
|
||||||
|
CleanText('./div/p'))
|
||||||
|
obj_date = DatetimeFromTimestamp(Attr('./div/div[@class="stream-item-header"]/small/a/span|./div/div/span/a[@class="ProfileTweet-timestamp js-permalink js-nav js-tooltip"]/span', 'data-time'))
|
||||||
|
|
||||||
|
|
||||||
class LoginErrorPage(HTMLPage):
|
class LoginErrorPage(HTMLPage):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue