diff --git a/weboob/backends/inrocks/backend.py b/weboob/backends/inrocks/backend.py index 7a9ef487..0cb29265 100644 --- a/weboob/backends/inrocks/backend.py +++ b/weboob/backends/inrocks/backend.py @@ -25,8 +25,6 @@ from weboob.tools.newsfeed import Newsfeed from .tools import url2id from .browser import NewspaperInrocksBrowser -__all__ = ['NewspaperInrocksBackend'] - class NewspaperInrocksBackend(BaseBackend, ICapMessages): MAINTAINER = 'Julien Hebert' EMAIL = 'juke@free.fr' @@ -89,7 +87,6 @@ class NewspaperInrocksBackend(BaseBackend, ICapMessages): if msg.flags & msg.IS_UNREAD: yield msg - def set_message_read(self, message): self.storage.set( 'seen', diff --git a/weboob/backends/inrocks/browser.py b/weboob/backends/inrocks/browser.py index 70915bee..dabab44a 100644 --- a/weboob/backends/inrocks/browser.py +++ b/weboob/backends/inrocks/browser.py @@ -19,7 +19,6 @@ from .pages.article import ArticlePage from weboob.tools.browser import BaseBrowser from .tools import id2url -__all__ = ['NewspaperInrocksBrowser'] class NewspaperInrocksBrowser(BaseBrowser): @@ -35,4 +34,4 @@ class NewspaperInrocksBrowser(BaseBrowser): def get_content(self, _id): url = _id self.location(url) - return self.page.article + return self.page.get_article(_id) diff --git a/weboob/backends/inrocks/pages/article.py b/weboob/backends/inrocks/pages/article.py index 11ba5b72..b7fd8753 100644 --- a/weboob/backends/inrocks/pages/article.py +++ b/weboob/backends/inrocks/pages/article.py @@ -17,7 +17,7 @@ # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. from weboob.tools.parsers.lxmlparser import select, SelectElementException -from .inrocks import InrocksPage +from .genericArticle import GenericNewsPage def try_remove(base_element, selector): try : @@ -25,18 +25,22 @@ def try_remove(base_element, selector): except (SelectElementException, ValueError): pass -class ArticlePage(InrocksPage): +class ArticlePage(GenericNewsPage): "ArticlePage object for inrocks" + def on_loaded(self): + self.main_div = self.document.getroot() + self.element_author_selector = "div.name>span" + self.element_body_selector = "div.maincol" + def get_body(self): - try_remove(self.element_body, "div.sidebar") - details = select(self.element_body, "div.details", 1) + element_body = self.get_element_body() + try_remove(element_body, "div.sidebar") + details = select(element_body, "div.details", 1) try_remove(details, "div.footer") - header = select(self.element_body, "div.header", 1) + header = select(element_body, "div.header", 1) for selector in ["h1", "div.picture", "div.date", "div.news-single-img", "div.metas_img", "strong"]: try_remove(header, selector) - - return self.browser.parser.tostring(self.element_body) - + return self.browser.parser.tostring(element_body) diff --git a/weboob/backends/minutes20/pages/minutes20.py b/weboob/backends/inrocks/pages/genericArticle.py similarity index 67% rename from weboob/backends/minutes20/pages/minutes20.py rename to weboob/backends/inrocks/pages/genericArticle.py index 07bb8659..0bcef723 100644 --- a/weboob/backends/minutes20/pages/minutes20.py +++ b/weboob/backends/inrocks/pages/genericArticle.py @@ -16,8 +16,6 @@ # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. from weboob.tools.browser import BasePage from weboob.tools.parsers.lxmlparser import select, SelectElementException -from weboob.backends.minutes20.tools import url2id -__all__ = ['Minutes20Page', 'Article', 'NoAuthorElement'] class NoAuthorElement(SelectElementException): pass @@ -32,13 +30,13 @@ class Article(object): self.author = u'' self.date = None -class Minutes20Page(BasePage): - __main_div = NotImplementedError +class GenericNewsPage(BasePage): __element_body = NotImplementedError __article = Article - __element_author_selector = ValueError - __element_title_selector = ValueError - __element_body_selector = ValueError + __element_title_selector = "h1" + main_div = NotImplementedError + element_body_selector = NotImplementedError + element_author_selector = NotImplementedError def get_body(self): return self.browser.parser.tostring(self.get_element_body()) @@ -50,30 +48,25 @@ class Minutes20Page(BasePage): return None def get_title(self): - return select(self.__main_div, self.__element_title_selector, 1).text_content().strip() + return select( + self.main_div, + self.__element_title_selector, + 1).text_content().strip() def get_element_body(self): - return select(self.__main_div, self.__element_body_selector, 1) + return select(self.main_div, self.element_body_selector, 1) def get_element_author(self): try: - return select(self.__main_div, self.__element_author_selector, 1) + return select(self.main_div, self.element_author_selector, 1) except SelectElementException: raise NoAuthorElement() - def get_article(self): - __article = Article(self.browser, url2id(self.url) ) + def get_article(self, id): + __article = Article(self.browser, id) __article.author = self.get_author() __article.title = self.get_title() __article.url = self.url __article.body = self.get_body() return __article - - def on_loaded(self): - self.__main_div = self.document.getroot() - - self.__element_author_selector = "div.mna-signature" - self.__element_title_selector = "h1" - self.__element_body_selector = "div.mna-body" - diff --git a/weboob/backends/minutes20/backend.py b/weboob/backends/minutes20/backend.py index e9ae34f5..82061c1c 100644 --- a/weboob/backends/minutes20/backend.py +++ b/weboob/backends/minutes20/backend.py @@ -25,8 +25,6 @@ from weboob.tools.newsfeed import Newsfeed from .tools import url2id from .browser import Newspaper20minutesBrowser -__all__ = ['Newspaper20minutesBackend'] - class Newspaper20minutesBackend(BaseBackend, ICapMessages): MAINTAINER = 'Julien Hebert' EMAIL = 'juke@free.fr' diff --git a/weboob/backends/minutes20/browser.py b/weboob/backends/minutes20/browser.py index c0d3bac1..feb46808 100644 --- a/weboob/backends/minutes20/browser.py +++ b/weboob/backends/minutes20/browser.py @@ -16,17 +16,16 @@ # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. from .pages.article import ArticlePage +from .pages.simple import SimplePage from weboob.tools.browser import BaseBrowser from .tools import id2url -from .pages.minutes20 import Minutes20Page -__all__ = ['Newspaper20minutesBrowser'] class Newspaper20minutesBrowser(BaseBrowser): PAGES = { 'http://www.20minutes.fr/article/?.*': ArticlePage, - 'http://www.20minutes.fr/ledirect/?.*': Minutes20Page, - 'http://www.20minutes.fr/preums/?.*': Minutes20Page + 'http://www.20minutes.fr/ledirect/?.*': SimplePage, + 'http://www.20minutes.fr/preums/?.*': SimplePage } def is_logged(self): @@ -44,4 +43,4 @@ class Newspaper20minutesBrowser(BaseBrowser): raise ValueError("thread id is empty") else: raise - return self.page.get_article() + return self.page.get_article(_id) diff --git a/weboob/backends/minutes20/pages/article.py b/weboob/backends/minutes20/pages/article.py index 6ba57a45..7f71a3b1 100644 --- a/weboob/backends/minutes20/pages/article.py +++ b/weboob/backends/minutes20/pages/article.py @@ -17,7 +17,7 @@ # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. from weboob.tools.parsers.lxmlparser import select, SelectElementException -from .minutes20 import Minutes20Page, NoAuthorElement +from .genericArticle import GenericNewsPage, NoAuthorElement def try_remove(base_element, selector): try : @@ -25,8 +25,13 @@ def try_remove(base_element, selector): except (SelectElementException, ValueError): pass -class ArticlePage(Minutes20Page): +class ArticlePage(GenericNewsPage): "ArticlePage object for minutes20" + def on_loaded(self): + self.main_div = self.document.getroot() + self.element_author_selector = "div.mna-signature" + self.element_body_selector = "div.mna-body" + def get_body(self): element_body = self.get_element_body() try_remove(element_body, "div.mna-tools") @@ -36,3 +41,5 @@ class ArticlePage(Minutes20Page): except NoAuthorElement: pass return self.browser.parser.tostring(element_body) + + diff --git a/weboob/backends/inrocks/pages/inrocks.py b/weboob/backends/minutes20/pages/genericArticle.py similarity index 52% rename from weboob/backends/inrocks/pages/inrocks.py rename to weboob/backends/minutes20/pages/genericArticle.py index 5042ce0f..0bcef723 100644 --- a/weboob/backends/inrocks/pages/inrocks.py +++ b/weboob/backends/minutes20/pages/genericArticle.py @@ -16,10 +16,8 @@ # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. from weboob.tools.browser import BasePage from weboob.tools.parsers.lxmlparser import select, SelectElementException -from weboob.backends.inrocks.tools import url2id -__all__ = ['InrocksPage', 'Article', 'NoAuthorElement'] -class NoAuthorElement(Exception): +class NoAuthorElement(SelectElementException): pass class Article(object): @@ -32,40 +30,43 @@ class Article(object): self.author = u'' self.date = None -class InrocksPage(BasePage): +class GenericNewsPage(BasePage): + __element_body = NotImplementedError + __article = Article + __element_title_selector = "h1" main_div = NotImplementedError - element_body = NotImplementedError - article = Article - element_author_selector = ValueError - element_title_selector = ValueError - element_body_selector = ValueError + element_body_selector = NotImplementedError + element_author_selector = NotImplementedError def get_body(self): - return self.browser.parser.tostring(self.element_body) + return self.browser.parser.tostring(self.get_element_body()) def get_author(self): - try : - return select(self.main_div, self.element_author_selector, 1).text_content().strip() - except SelectElementException: - #TODO: test nombre d'element en retour - pass + try: + return self.get_element_author().text_content().strip() + except NoAuthorElement: + return None def get_title(self): - return select(self.main_div, self.element_title_selector, 1).text_content().strip() + return select( + self.main_div, + self.__element_title_selector, + 1).text_content().strip() - def on_loaded(self): - self.article = Article(self.browser, url2id(self.url) ) - self.main_div = self.document.getroot() + def get_element_body(self): + return select(self.main_div, self.element_body_selector, 1) - self.element_author_selector = "div.name>span" - self.element_title_selector = "h1" - self.element_body_selector = "div.maincol" + def get_element_author(self): + try: + return select(self.main_div, self.element_author_selector, 1) + except SelectElementException: + raise NoAuthorElement() - self.element_body = select(self.main_div, self.element_body_selector, 1) + def get_article(self, id): + __article = Article(self.browser, id) + __article.author = self.get_author() + __article.title = self.get_title() + __article.url = self.url + __article.body = self.get_body() - self.article.author = self.get_author() - self.article.title = self.get_title() - self.article.url = self.url - self.article.body = self.get_body() - - + return __article diff --git a/weboob/backends/minutes20/pages/simple.py b/weboob/backends/minutes20/pages/simple.py new file mode 100644 index 00000000..8d98323c --- /dev/null +++ b/weboob/backends/minutes20/pages/simple.py @@ -0,0 +1,34 @@ +"ArticlePage object for minutes20" +# -*- coding: utf-8 -*- + +# Copyright(C) 2011 Julien Hebert +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, version 3 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +from weboob.tools.parsers.lxmlparser import select, SelectElementException +from .genericArticle import GenericNewsPage, NoAuthorElement + +def try_remove(base_element, selector): + try : + base_element.remove(select(base_element, selector, 1 )) + except (SelectElementException, ValueError): + pass + +class SimplePage(GenericNewsPage): + "ArticlePage object for minutes20" + def on_loaded(self): + self.main_div = self.document.getroot() + self.element_author_selector = "div.mna-signature" + self.element_body_selector = "div.mna-body" +