All dailys news in only one thread
This commit is contained in:
parent
1c4416b4cf
commit
abcc4067d2
3 changed files with 58 additions and 8 deletions
|
|
@ -47,8 +47,24 @@ class NewspaperPresseuropBackend(GenericNewspaperBackend, ICapMessages):
|
||||||
self.RSS_FEED = 'http://www.presseurop.eu/%s/rss.xml' % (self.config['lang'].get())
|
self.RSS_FEED = 'http://www.presseurop.eu/%s/rss.xml' % (self.config['lang'].get())
|
||||||
|
|
||||||
def iter_threads(self):
|
def iter_threads(self):
|
||||||
|
daily = []
|
||||||
for article in Newsfeed(self.RSS_FEED, self.RSSID).iter_entries():
|
for article in Newsfeed(self.RSS_FEED, self.RSSID).iter_entries():
|
||||||
thread = Thread(article.link)
|
if "/news-brief/" in article.link:
|
||||||
thread.title = article.title
|
day = self.browser.get_daily_date(article.link)
|
||||||
thread.date = article.datetime
|
if day and (day not in daily):
|
||||||
yield(thread)
|
daily.append(day)
|
||||||
|
id, title, date = self.browser.get_daily_infos(day)
|
||||||
|
thread = Thread(id)
|
||||||
|
thread.title = title
|
||||||
|
thread.date = date
|
||||||
|
yield(thread)
|
||||||
|
elif day is None:
|
||||||
|
thread = Thread(article.link)
|
||||||
|
thread.title = article.title
|
||||||
|
thread.date = article.datetime
|
||||||
|
yield(thread)
|
||||||
|
else:
|
||||||
|
thread = Thread(article.link)
|
||||||
|
thread.title = article.title
|
||||||
|
thread.date = article.datetime
|
||||||
|
yield(thread)
|
||||||
|
|
|
||||||
|
|
@ -18,7 +18,9 @@
|
||||||
# You should have received a copy of the GNU Affero General Public License
|
# You should have received a copy of the GNU Affero General Public License
|
||||||
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
from .pages.article import PresseuropPage, CartoonPage, DailyTitlesPage
|
from datetime import date, datetime, time
|
||||||
|
from .pages.article import PresseuropPage, CartoonPage, DailySinglePage,\
|
||||||
|
DailyTitlesPage
|
||||||
from weboob.tools.browser import BaseBrowser
|
from weboob.tools.browser import BaseBrowser
|
||||||
from weboob.tools.ordereddict import OrderedDict
|
from weboob.tools.ordereddict import OrderedDict
|
||||||
|
|
||||||
|
|
@ -26,8 +28,8 @@ from weboob.tools.ordereddict import OrderedDict
|
||||||
class NewspaperPresseuropBrowser(BaseBrowser):
|
class NewspaperPresseuropBrowser(BaseBrowser):
|
||||||
"NewspaperPresseuropBrowser class"
|
"NewspaperPresseuropBrowser class"
|
||||||
PAGES = OrderedDict((
|
PAGES = OrderedDict((
|
||||||
("http://www.presseurop.eu/.*/todays-front-pages/.*", DailyTitlesPage),
|
("http://www.presseurop.eu/.*/news-brief/.*", DailySinglePage),
|
||||||
("http://www.presseurop.eu/.*/front-page/.*", DailyTitlesPage),
|
("http://www.presseurop.eu/.*/today/.*", DailyTitlesPage),
|
||||||
("http://www.presseurop.eu/.*/cartoon/.*", CartoonPage),
|
("http://www.presseurop.eu/.*/cartoon/.*", CartoonPage),
|
||||||
("http://www.presseurop.eu/.*", PresseuropPage),
|
("http://www.presseurop.eu/.*", PresseuropPage),
|
||||||
))
|
))
|
||||||
|
|
@ -45,3 +47,17 @@ class NewspaperPresseuropBrowser(BaseBrowser):
|
||||||
"return page article content"
|
"return page article content"
|
||||||
self.location(_id)
|
self.location(_id)
|
||||||
return self.page.get_article(_id)
|
return self.page.get_article(_id)
|
||||||
|
|
||||||
|
def get_daily_date(self, _id):
|
||||||
|
self.location(_id)
|
||||||
|
return self.page.get_daily_date()
|
||||||
|
|
||||||
|
def get_daily_infos(self, _id):
|
||||||
|
url = "http://www.presseurop.eu/fr/today/" + _id
|
||||||
|
self.location(url)
|
||||||
|
title = self.page.get_title()
|
||||||
|
article_date = date(*[int(x)
|
||||||
|
for x in _id.split('-')])
|
||||||
|
article_time = time(0, 0, 0)
|
||||||
|
article_datetime = datetime.combine(article_date, article_time)
|
||||||
|
return url, title, article_datetime
|
||||||
|
|
|
||||||
|
|
@ -50,7 +50,25 @@ class DailyTitlesPage(PresseuropPage):
|
||||||
self.main_div = self.document.getroot()
|
self.main_div = self.document.getroot()
|
||||||
self.element_title_selector = "title"
|
self.element_title_selector = "title"
|
||||||
self.element_author_selector = "div[id=content-author]>a"
|
self.element_author_selector = "div[id=content-author]>a"
|
||||||
self.element_body_selector = "ul.articlebody"
|
self.element_body_selector = "section.main"
|
||||||
|
|
||||||
|
def get_body(self):
|
||||||
|
element_body = self.get_element_body()
|
||||||
|
try_drop_tree(self.parser, element_body, "li.button-social")
|
||||||
|
try_drop_tree(self.parser, element_body, "aside.articlerelated")
|
||||||
|
try_drop_tree(self.parser, element_body, "div.sharecount")
|
||||||
|
clean_relativ_urls(element_body, "http://presseurop.eu")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class DailySinglePage(PresseuropPage):
|
||||||
|
def get_daily_date(self):
|
||||||
|
ul = self.document.getroot().xpath("//ul[@class='carousel-skin carousel-today']")
|
||||||
|
if len(ul) > 0:
|
||||||
|
link = ul[0].xpath('li/a')[0]
|
||||||
|
date = link.attrib['href'].split('/')[3]
|
||||||
|
return date
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
class CartoonPage(PresseuropPage):
|
class CartoonPage(PresseuropPage):
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue