diff --git a/modules/liberation/__init__.py b/modules/liberation/__init__.py new file mode 100644 index 00000000..13df3695 --- /dev/null +++ b/modules/liberation/__init__.py @@ -0,0 +1,24 @@ +"NewspaperLibeBackend init" +# -*- coding: utf-8 -*- + +# Copyright(C) 2013 Florent Fourcot +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + +from .backend import NewspaperLibeBackend + + +__all__ = ['NewspaperLibeBackend'] diff --git a/modules/liberation/backend.py b/modules/liberation/backend.py new file mode 100644 index 00000000..7ae136ee --- /dev/null +++ b/modules/liberation/backend.py @@ -0,0 +1,67 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2013 Florent Fourcot +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . +"backend for http://liberation.fr" + +from weboob.tools.newsfeed import Newsfeed +from weboob.capabilities.messages import ICapMessages, Thread +from weboob.tools.capabilities.messages.GenericBackend import GenericNewspaperBackend +from weboob.tools.backend import BackendConfig +from weboob.tools.value import Value +from .browser import NewspaperLibeBrowser +from .tools import rssid, url2id + + +class NewspaperLibeBackend(GenericNewspaperBackend, ICapMessages): + MAINTAINER = u'Florent Fourcot' + EMAIL = 'weboob@flo.fourcot.fr' + VERSION = '0.h' + LICENSE = 'AGPLv3+' + STORAGE = {'seen': {}} + NAME = 'liberation' + DESCRIPTION = u'Libération newspaper website' + BROWSER = NewspaperLibeBrowser + RSSID = staticmethod(rssid) + URL2ID = staticmethod(url2id) + RSSSIZE = 30 + CONFIG = BackendConfig(Value('feed', label='RSS feed', + choices={'9': u'A la une sur Libération', + '10': u'Monde', + '11': u'Politiques', + '12': u'Société', + '13': u'Économie', + '14': u'Sports', + '17': u'Labo: audio, vidéo, diapos, podcasts', + '18': u'Rebonds', + '44': u'Les chroniques de Libération', + '53': u'Écrans', + '54': u'Next', + '58': u'Cinéma' + } + )) + + def __init__(self, *args, **kwargs): + GenericNewspaperBackend.__init__(self, *args, **kwargs) + self.RSS_FEED = "http://www.liberation.fr/rss/%s" % self.config['feed'].get() + + def iter_threads(self): + for article in Newsfeed(self.RSS_FEED, self.RSSID).iter_entries(): + thread = Thread(article.id) + thread.title = article.title + thread.date = article.datetime + yield(thread) diff --git a/modules/liberation/browser.py b/modules/liberation/browser.py new file mode 100644 index 00000000..d36e0707 --- /dev/null +++ b/modules/liberation/browser.py @@ -0,0 +1,40 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2013 Florent Fourcot +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + +from .pages.article import ArticlePage +from weboob.tools.browser import BaseBrowser + + +class NewspaperLibeBrowser(BaseBrowser): + "NewspaperLibeBrowser class" + PAGES = {"http://.*liberation.fr/.*": ArticlePage} + + def is_logged(self): + return False + + def login(self): + pass + + def fillobj(self, obj, fields): + pass + + def get_content(self, _id): + "return page article content" + self.location(_id) + return self.page.get_article(_id) diff --git a/modules/liberation/pages/__init__.py b/modules/liberation/pages/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/modules/liberation/pages/article.py b/modules/liberation/pages/article.py new file mode 100644 index 00000000..25b1d6bd --- /dev/null +++ b/modules/liberation/pages/article.py @@ -0,0 +1,58 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2013 Florent Fourcot +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + +from weboob.tools.browser import BrokenPageError +from weboob.tools.capabilities.messages.genericArticle import GenericNewsPage,\ + try_drop_tree, clean_relativ_urls, NoBodyElement, NoAuthorElement,\ + NoneMainDiv + + +class ArticlePage(GenericNewsPage): + "ArticlePage object for Libe" + + def on_loaded(self): + self.main_div = self.document.getroot() + self.element_title_selector = "title" + self.element_author_selector = "span.author" + self.element_body_selector = "div.article-body" + + def get_body(self): + if 'feuilletons.blogs.liberation.fr' in self.url: + self.element_body_selector = "div.entry-content" + try: + return self.parser.tostring(self.get_element_body()) + except NoBodyElement: + meta = self.document.xpath('//meta[@name="description"]')[0] + txt = meta.attrib['content'] + return txt + + def get_title(self): + title = GenericNewsPage.get_title(self) + return title.replace(u' - Libération', '') + + def get_author(self): + try: + author = self.get_element_author().text_content().strip() + if author.startswith('Par '): + return author.split('Par ', 1)[1] + else: + return author + except (NoAuthorElement, NoneMainDiv): + #TODO: Mettre un warning + return None diff --git a/modules/liberation/test.py b/modules/liberation/test.py new file mode 100644 index 00000000..8024a193 --- /dev/null +++ b/modules/liberation/test.py @@ -0,0 +1,32 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2013 Florent Fourcot +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +from weboob.tools.test import BackendTest + + +__all__ = ['LiberationTest'] + + +class LiberationTest(BackendTest): + BACKEND = 'liberation' + + def test_new_messages(self): + for message in self.backend.iter_unread_messages(): + pass diff --git a/modules/liberation/tools.py b/modules/liberation/tools.py new file mode 100644 index 00000000..75a90ff5 --- /dev/null +++ b/modules/liberation/tools.py @@ -0,0 +1,28 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2013 Florent Fourcot +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +import re + + +def url2id(url): + return url.split('0Dxtor')[0].split('0I')[-1] + + +def rssid(entry): + return entry.link