diff --git a/weboob/backends/ecrans/GenericBackend.py b/weboob/backends/ecrans/GenericBackend.py new file mode 100644 index 00000000..923a5bce --- /dev/null +++ b/weboob/backends/ecrans/GenericBackend.py @@ -0,0 +1,97 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2011 Julien Hebert +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, version 3 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +# python2.5 compatibility +from __future__ import with_statement + +from weboob.capabilities.messages import ICapMessages, Message, Thread +from weboob.tools.backend import BaseBackend +from weboob.tools.newsfeed import Newsfeed +from .tools import rssid + +class GenericNewspaperBackend(BaseBackend, ICapMessages): + "GenericNewspaperBackend class" + MAINTAINER = 'Julien Hebert' + EMAIL = 'juke@free.fr' + VERSION = '0.7' + LICENSE = 'GPLv3' + STORAGE = {'seen': {}} + RSS_FEED = None + + def get_thread(self, _id): + if isinstance(_id, Thread): + thread = _id + _id = thread.id + else: + thread = None + + with self.browser: + content = self.browser.get_content(_id) + + if not thread: + thread = Thread(_id) + + flags = Message.IS_HTML + if not thread.id in self.storage.get('seen', default={}): + flags |= Message.IS_UNREAD + thread.title = content.title + if not thread.date: + thread.date = content.date + + thread.root = Message( + thread=thread, + id=0, + title=content.title, + sender=content.author, + receivers=None, + date=thread.date, + parent=None, + content=content.body, + signature='URL: %s' % content.url, + flags=flags, + children= []) + return thread + + def iter_threads(self): + for article in Newsfeed(self.RSS_FEED, rssid).iter_entries(): + thread = Thread(article.id) + thread.title = article.title + thread.date = article.datetime + yield(thread) + + def fill_thread(self, thread): + "fill the thread" + return self.get_thread(thread) + + def iter_unread_messages(self, thread=None): + for thread in self.iter_threads(): + self.fill_thread(thread) + for msg in thread.iter_all_messages(): + if msg.flags & msg.IS_UNREAD: + yield msg + + def set_message_read(self, message): + self.storage.set( + 'seen', + message.thread.id, + 'comments', + self.storage.get( + 'seen', + message.thread.id, + 'comments', + default=[]) + [message.id]) + self.storage.save() diff --git a/weboob/backends/ecrans/__init__.py b/weboob/backends/ecrans/__init__.py new file mode 100644 index 00000000..fbfa33b4 --- /dev/null +++ b/weboob/backends/ecrans/__init__.py @@ -0,0 +1,20 @@ +"NewspaperEcransBackend init" +# -*- coding: utf-8 -*- + +# Copyright(C) 2011 Julien Hebert +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, version 3 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +from .backend import NewspaperEcransBackend +__all__ = ['NewspaperEcransBackend'] diff --git a/weboob/backends/ecrans/backend.py b/weboob/backends/ecrans/backend.py new file mode 100644 index 00000000..5c2eb68b --- /dev/null +++ b/weboob/backends/ecrans/backend.py @@ -0,0 +1,38 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2011 Julien Hebert +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, version 3 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +"backend for http://www.ecrans.fr" + +# python2.5 compatibility +from __future__ import with_statement + +from weboob.capabilities.messages import ICapMessages +from .browser import NewspaperEcransBrowser +from .GenericBackend import GenericNewspaperBackend + +class NewspaperEcransBackend(GenericNewspaperBackend, ICapMessages): + "NewspaperEcransBackend class" + MAINTAINER = 'Julien Hebert' + EMAIL = 'juke@free.fr' + VERSION = '0.7' + LICENSE = 'GPLv3' + STORAGE = {'seen': {}} + NAME = 'ecrans' + DESCRIPTION = u'Ecrans French news website' + BROWSER = NewspaperEcransBrowser + RSS_FEED = 'http://www.ecrans.fr/spip.php?page=backend' + + diff --git a/weboob/backends/ecrans/browser.py b/weboob/backends/ecrans/browser.py new file mode 100644 index 00000000..afa61ac8 --- /dev/null +++ b/weboob/backends/ecrans/browser.py @@ -0,0 +1,42 @@ +"browser for ecrans website" +# -*- coding: utf-8 -*- + +# Copyright(C) 2011 Julien Hebert +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, version 3 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +from .pages.article import ArticlePage +from weboob.tools.browser import BaseBrowser + + + +class NewspaperEcransBrowser(BaseBrowser): + "NewspaperEcransBrowser class" + PAGES = { + "http://www.ecrans.fr/.*": ArticlePage, + } + + def is_logged(self): + return False + + def login(self): + pass + + def fillobj(self, obj, fields): + pass + + def get_content(self, _id): + "return page article content" + self.location(_id) + return self.page.get_article(_id) diff --git a/weboob/backends/ecrans/pages/__init__.py b/weboob/backends/ecrans/pages/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/weboob/backends/ecrans/pages/article.py b/weboob/backends/ecrans/pages/article.py new file mode 100644 index 00000000..64b0c25f --- /dev/null +++ b/weboob/backends/ecrans/pages/article.py @@ -0,0 +1,35 @@ +"ArticlePage object for inrocks" +# -*- coding: utf-8 -*- + +# Copyright(C) 2011 Julien Hebert +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, version 3 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +from weboob.tools.genericArticle import GenericNewsPage, remove_from_selector_list, try_remove_from_selector_list, try_drop_tree +class ArticlePage(GenericNewsPage): + "ArticlePage object for inrocks" + def on_loaded(self): + self.main_div = self.document.getroot() + self.element_title_selector = "h3" + self.element_author_selector = "p.auteur>a" + self.element_body_selector = "div.bloc_article_01" + + def get_body(self): + element_body = self.get_element_body() + remove_from_selector_list(element_body, [self.element_title_selector, "p.auteur", "h4", "h4"]) + try_remove_from_selector_list(element_body, ["p.tag", "div.alire"]) + try_drop_tree(element_body, "script") + + return self.browser.parser.tostring(element_body) + diff --git a/weboob/backends/ecrans/pages/flashactu.py b/weboob/backends/ecrans/pages/flashactu.py new file mode 100644 index 00000000..f218b567 --- /dev/null +++ b/weboob/backends/ecrans/pages/flashactu.py @@ -0,0 +1,33 @@ +"ArticlePage object for inrocks" +# -*- coding: utf-8 -*- + +# Copyright(C) 2011 Julien Hebert +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, version 3 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +from weboob.tools.genericArticle import GenericNewsPage + +class FlashActuPage(GenericNewsPage): + "ArticlePage object for inrocks" + def on_loaded(self): + self.main_div = self.document.getroot() + self.element_title_selector = "h1" + self.element_author_selector = "div.name>span" + self.element_body_selector = "h2" + + def get_body(self): + element_body = self.get_element_body() + element_body.tag = "div" + return self.browser.parser.tostring(element_body) + diff --git a/weboob/backends/ecrans/pages/simple.py b/weboob/backends/ecrans/pages/simple.py new file mode 100644 index 00000000..5782c31e --- /dev/null +++ b/weboob/backends/ecrans/pages/simple.py @@ -0,0 +1,27 @@ +"ArticlePage object for minutes20" +# -*- coding: utf-8 -*- + +# Copyright(C) 2011 Julien Hebert +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, version 3 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +from weboob.tools.genericArticle import GenericNewsPage + +class SimplePage(GenericNewsPage): + "ArticlePage object for minutes20" + def on_loaded(self): + self.main_div = self.document.getroot() + self.element_author_selector = "div.mna-signature" + self.element_body_selector = "#article" + diff --git a/weboob/backends/ecrans/test.py b/weboob/backends/ecrans/test.py new file mode 100644 index 00000000..2473a1a5 --- /dev/null +++ b/weboob/backends/ecrans/test.py @@ -0,0 +1,30 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2010-2011 Romain Bignon +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, version 3 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + + +from weboob.tools.test import BackendTest + + +__all__ = ['EcransTest'] + + +class EcransTest(BackendTest): + BACKEND = 'ecrans' + + def test_new_messages(self): + for message in self.backend.iter_unread_messages(): + pass diff --git a/weboob/backends/ecrans/tools.py b/weboob/backends/ecrans/tools.py new file mode 100644 index 00000000..9fc522a3 --- /dev/null +++ b/weboob/backends/ecrans/tools.py @@ -0,0 +1,37 @@ +"tools for lefigaro backend" +# -*- coding: utf-8 -*- + +# Copyright(C) 2011 Julien Hebert +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, version 3 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + + +import re +def id2url(_id): + "return an url from an id" + regexp2 = re.compile("(\w+).([0-9]+).(.*$)") + match = regexp2.match(_id) + if match: + return 'http://www.20minutes.fr/%s/%s/%s' % ( match.group(1), + match.group(2), + match.group(3)) + else: + raise ValueError("id doesn't match") + +def url2id(url): + "return an id from an url" + return url + +def rssid(entry): + return url2id(entry.id) diff --git a/weboob/tools/genericArticle.py b/weboob/tools/genericArticle.py index 452fc681..913cf723 100644 --- a/weboob/tools/genericArticle.py +++ b/weboob/tools/genericArticle.py @@ -101,7 +101,11 @@ class GenericNewsPage(BasePage): else: raise except SelectElementException: - raise NoTitleException("no title on %s" % (self.browser)) + try : + self.element_title_selector = "h1" + return self.get_title() + except SelectElementException: + raise NoTitleException("no title on %s" % (self.browser)) def get_element_body(self): try :