diff --git a/modules/presseurop/__init__.py b/modules/presseurop/__init__.py new file mode 100644 index 00000000..225006b0 --- /dev/null +++ b/modules/presseurop/__init__.py @@ -0,0 +1,24 @@ +"NewspaperEcransBackend init" +# -*- coding: utf-8 -*- + +# Copyright(C) 2011 Julien Hebert +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + +from .backend import NewspaperPresseuropBackend + + +__all__ = ['NewspaperPresseuropBackend'] diff --git a/modules/presseurop/backend.py b/modules/presseurop/backend.py new file mode 100644 index 00000000..816f8080 --- /dev/null +++ b/modules/presseurop/backend.py @@ -0,0 +1,53 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2011 Julien Hebert +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . +"backend for http://www.presseurop.eu" + +from weboob.capabilities.messages import ICapMessages, Thread +from weboob.tools.capabilities.messages.GenericBackend import GenericNewspaperBackend +from weboob.tools.backend import BackendConfig +from weboob.tools.value import Value +from .browser import NewspaperPresseuropBrowser +from .tools import rssid +from weboob.tools.newsfeed import Newsfeed + + +class NewspaperPresseuropBackend(GenericNewspaperBackend, ICapMessages): + MAINTAINER = 'Florent Fourcot' + EMAIL = 'weboob@flo.fourcot.fr' + VERSION = '0.a' + LICENSE = 'AGPLv3+' + STORAGE = {'seen': {}} + NAME = 'presseurop' + DESCRIPTION = u'Presseurop website' + BROWSER = NewspaperPresseuropBrowser + RSSID = rssid + CONFIG = BackendConfig(Value('lang', label='Lang of articles', + choices={'fr': 'fr', 'de': 'de', 'en': 'en', 'cs': 'cs', 'es' : 'es', 'it' : 'it', 'nl' : 'nl', 'pl' : 'pl', 'pt' : 'pt', 'ro' : 'ro'}, default='fr')) + + def __init__(self, *args, **kwargs): + GenericNewspaperBackend.__init__(self, *args, **kwargs) + self.RSS_FEED = 'http://www.presseurop.eu/%s/rss.xml' % (self.config['lang'].get()) + + def iter_threads(self): + for article in Newsfeed(self.RSS_FEED, self.RSSID).iter_entries(): + thread = Thread(article.link) + thread.title = article.title + thread.date = article.datetime + yield(thread) + diff --git a/modules/presseurop/browser.py b/modules/presseurop/browser.py new file mode 100644 index 00000000..52e94ea0 --- /dev/null +++ b/modules/presseurop/browser.py @@ -0,0 +1,45 @@ +"browser for ecrans website" +# -*- coding: utf-8 -*- + +# Copyright(C) 2011 Julien Hebert +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + +from .pages.article import ArticlePage +from weboob.tools.browser import BaseBrowser + + +class NewspaperPresseuropBrowser(BaseBrowser): + "NewspaperPresseuropBrowser class" + PAGES = { + "http://www.presseurop.eu/.*": ArticlePage, + } + + def is_logged(self): + return False + + def login(self): + pass + + def fillobj(self, obj, fields): + pass + + def get_content(self, _id): + "return page article content" + print _id + self.location(_id) + return self.page.get_article(_id) + diff --git a/modules/presseurop/pages/__init__.py b/modules/presseurop/pages/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/modules/presseurop/pages/article.py b/modules/presseurop/pages/article.py new file mode 100644 index 00000000..09c70807 --- /dev/null +++ b/modules/presseurop/pages/article.py @@ -0,0 +1,39 @@ +"ArticlePage object for inrocks" +# -*- coding: utf-8 -*- + +# Copyright(C) 2011 Julien Hebert +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + +from weboob.tools.capabilities.messages.genericArticle import GenericNewsPage +class ArticlePage(GenericNewsPage): + "ArticlePage object for presseurop" + + def on_loaded(self): + self.main_div = self.document.getroot() + self.element_title_selector = "title" + self.element_author_selector = "div.content-author>a" + self.element_body_selector = "div.block" + + def get_body(self): + element_body = self.get_element_body() + + return self.parser.tostring(element_body) + + def get_title(self): + title = GenericNewsPage.get_title(self) + title = title.split('|')[0] + return title diff --git a/modules/presseurop/test.py b/modules/presseurop/test.py new file mode 100644 index 00000000..f2c397c1 --- /dev/null +++ b/modules/presseurop/test.py @@ -0,0 +1,32 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2010-2011 Romain Bignon +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +from weboob.tools.test import BackendTest + + +__all__ = ['PresseuropTest'] + + +class PresseuropTest(BackendTest): + BACKEND = 'presseurop' + + def test_new_messages(self): + for message in self.backend.iter_unread_messages(): + pass diff --git a/modules/presseurop/tools.py b/modules/presseurop/tools.py new file mode 100644 index 00000000..745c12dc --- /dev/null +++ b/modules/presseurop/tools.py @@ -0,0 +1,41 @@ +"tools for lefigaro backend" +# -*- coding: utf-8 -*- + +# Copyright(C) 2011 Julien Hebert +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +import re +def id2url(_id): + "return an url from an id" + regexp2 = re.compile("(\w+).([0-9]+).(.*$)") + match = regexp2.match(_id) + if match: + return 'http://www.20minutes.fr/%s/%s/%s' % ( match.group(1), + match.group(2), + match.group(3)) + else: + raise ValueError("id doesn't match") + +def url2id(url): + "return an id from an url" + regexp = re.compile(".*/([0-9]+)-.*") + id = regexp.match(url).group(1) + return id + +def rssid(self, entry): + return url2id(entry.link)