From dec4d08184527cbbd68b930ecb7e2aa5296ec7b9 Mon Sep 17 00:00:00 2001 From: Bezleputh Date: Thu, 22 May 2014 15:16:43 +0200 Subject: [PATCH] [twitter] improve page parsing --- modules/twitter/pages.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/modules/twitter/pages.py b/modules/twitter/pages.py index 60aac702..584780dc 100644 --- a/modules/twitter/pages.py +++ b/modules/twitter/pages.py @@ -35,16 +35,16 @@ class DatetimeFromTimestamp(Filter): class TwitterBasePage(HTMLPage): @method class iter_threads(ListElement): - item_xpath = '//li[@data-item-type="tweet"]/div' + item_xpath = '//*[@data-item-type="tweet"]/div' class item(ItemElement): klass = Thread - obj_id = Regexp(Link('./div/div/a[@class="details with-icn js-details"]'), '/(.+)/status/(.+)', '\\1#\\2') + obj_id = Regexp(Link('./div/div/a[@class="details with-icn js-details"]|./div/div/span/a[@class="ProfileTweet-timestamp js-permalink js-nav js-tooltip"]'), '/(.+)/status/(.+)', '\\1#\\2') obj_title = Format('%s \n\t %s', - CleanText('./div/div[@class="stream-item-header"]/a'), + CleanText('./div/div[@class="stream-item-header"]/a|./div/div[@class="ProfileTweet-authorDetails"]/a'), CleanText('./div/p')) - obj_date = DatetimeFromTimestamp(Attr('./div/div[@class="stream-item-header"]/small/a/span', 'data-time')) + obj_date = DatetimeFromTimestamp(Attr('./div/div[@class="stream-item-header"]/small/a/span|./div/div/span/a[@class="ProfileTweet-timestamp js-permalink js-nav js-tooltip"]/span', 'data-time')) class LoginPage(TwitterBasePage):