From 5a7ae0a5bdbef0683f2d179e9a7b502451bdff3f Mon Sep 17 00:00:00 2001 From: Juke Date: Mon, 21 Feb 2011 23:31:38 +0100 Subject: [PATCH] move genericArticle to tools --- weboob/backends/inrocks/browser.py | 2 - weboob/backends/inrocks/pages/article.py | 12 +-- weboob/backends/lefigaro/pages/article.py | 24 +++--- weboob/backends/lefigaro/pages/flashactu.py | 9 +-- .../backends/lefigaro/pages/genericArticle.py | 81 ------------------- weboob/backends/lefigaro/pages/simple.py | 2 +- weboob/backends/minutes20/pages/article.py | 9 +-- .../minutes20/pages/genericArticle.py | 81 ------------------- weboob/backends/minutes20/pages/simple.py | 3 +- .../inrocks/pages => tools}/genericArticle.py | 7 ++ 10 files changed, 24 insertions(+), 206 deletions(-) delete mode 100644 weboob/backends/lefigaro/pages/genericArticle.py delete mode 100644 weboob/backends/minutes20/pages/genericArticle.py rename weboob/{backends/inrocks/pages => tools}/genericArticle.py (93%) diff --git a/weboob/backends/inrocks/browser.py b/weboob/backends/inrocks/browser.py index dabab44a..179af218 100644 --- a/weboob/backends/inrocks/browser.py +++ b/weboob/backends/inrocks/browser.py @@ -17,13 +17,11 @@ from .pages.article import ArticlePage from weboob.tools.browser import BaseBrowser -from .tools import id2url class NewspaperInrocksBrowser(BaseBrowser): PAGES = { - 'http://www.lesinrocks.com/actualite/actu-article/t/60121/date/2011-02-15/article/accuse-davoir-participe-a-une-mutinerie-un-detenu-porte-plainte/': ArticlePage, '.*': ArticlePage, } diff --git a/weboob/backends/inrocks/pages/article.py b/weboob/backends/inrocks/pages/article.py index 473224f8..c37e9300 100644 --- a/weboob/backends/inrocks/pages/article.py +++ b/weboob/backends/inrocks/pages/article.py @@ -16,22 +16,16 @@ # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -from weboob.tools.parsers.lxmlparser import select, SelectElementException -from .genericArticle import GenericNewsPage - -def try_remove(base_element, selector): - try : - base_element.remove(select(base_element, selector, 1 )) - except (SelectElementException, ValueError): - pass +from weboob.tools.parsers.lxmlparser import select +from weboob.tools.genericArticle import GenericNewsPage, try_remove class ArticlePage(GenericNewsPage): "ArticlePage object for inrocks" def on_loaded(self): self.main_div = self.document.getroot() self.element_author_selector = "div.name>span" - self.element_body_selector = "div.maincol" self.element_title_selector = "h1" + self.element_body_selector = "div.maincol" def get_body(self): element_body = self.get_element_body() diff --git a/weboob/backends/lefigaro/pages/article.py b/weboob/backends/lefigaro/pages/article.py index 56ab9981..19d8c337 100644 --- a/weboob/backends/lefigaro/pages/article.py +++ b/weboob/backends/lefigaro/pages/article.py @@ -16,31 +16,25 @@ # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -from weboob.tools.parsers.lxmlparser import select, SelectElementException -from .genericArticle import GenericNewsPage - -def try_remove(base_element, selector): - try : - base_element.remove(select(base_element, selector, 1 )) - except (SelectElementException, ValueError): - pass +from weboob.tools.parsers.lxmlparser import select +from weboob.tools.genericArticle import GenericNewsPage, try_remove class ArticlePage(GenericNewsPage): "ArticlePage object for inrocks" def on_loaded(self): self.main_div = self.document.getroot() self.element_author_selector = "div.name>span" - self.element_body_selector = "#article" self.element_title_selector = "h1" + self.element_body_selector = "#article" def get_body(self): - element_body = self.get_element_body() - h1 = select(element_body, self.element_title_selector, 1) - div_infos = select(element_body, "div.infos", 1) - toolsbar = select(element_body, "#toolsbar", 1) - el_script = select(element_body, "script", 1) + element_body = self.get_element_body() + h1_title = select(element_body, self.element_title_selector, 1) + div_infos = select(element_body, "div.infos", 1) + toolsbar = select(element_body, "#toolsbar", 1) + el_script = select(element_body, "script", 1) - element_body.remove(h1) + element_body.remove(h1_title) element_body.remove(div_infos) element_body.remove(toolsbar) diff --git a/weboob/backends/lefigaro/pages/flashactu.py b/weboob/backends/lefigaro/pages/flashactu.py index 451c2b91..69e88138 100644 --- a/weboob/backends/lefigaro/pages/flashactu.py +++ b/weboob/backends/lefigaro/pages/flashactu.py @@ -16,14 +16,7 @@ # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -from weboob.tools.parsers.lxmlparser import select, SelectElementException -from .genericArticle import GenericNewsPage - -def try_remove(base_element, selector): - try : - base_element.remove(select(base_element, selector, 1 )) - except (SelectElementException, ValueError): - pass +from weboob.tools.genericArticle import GenericNewsPage class FlashActuPage(GenericNewsPage): "ArticlePage object for inrocks" diff --git a/weboob/backends/lefigaro/pages/genericArticle.py b/weboob/backends/lefigaro/pages/genericArticle.py deleted file mode 100644 index 748c6c97..00000000 --- a/weboob/backends/lefigaro/pages/genericArticle.py +++ /dev/null @@ -1,81 +0,0 @@ -# -*- coding: utf-8 -*- - -# Copyright(C) 2011 Julien Hebert -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, version 3 of the License. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -from weboob.tools.browser import BasePage -from weboob.tools.parsers.lxmlparser import select, SelectElementException - -class NoAuthorElement(SelectElementException): - pass - -class NoneMainDiv(AttributeError): - pass - -class Article(object): - author = u'' - - def __init__(self, browser, _id): - self.browser = browser - self.id = _id - self.title = u'' - self.body = u'' - self.url = u'' - self.date = None - -class GenericNewsPage(BasePage): - __element_body = NotImplementedError - __article = Article - element_title_selector = NotImplementedError - main_div = NotImplementedError - element_body_selector = NotImplementedError - element_author_selector = NotImplementedError - - def get_body(self): - return self.browser.parser.tostring(self.get_element_body()) - - def get_author(self): - try: - return self.get_element_author().text_content().strip() - except NoAuthorElement: - return self.__article.author - - def get_title(self): - return select( - self.main_div, - self.element_title_selector, - 1).text_content().strip() - - def get_element_body(self): - return select(self.main_div, self.element_body_selector, 1) - - def get_element_author(self): - try: - return select(self.main_div, self.element_author_selector, 1) - except SelectElementException: - raise NoAuthorElement() - except AttributeError: - if self.main_div == None: - raise NoneMainDiv("main_div is none on %s" % (self.browser)) - else: - raise - - def get_article(self, _id): - __article = Article(self.browser, _id) - __article.author = self.get_author() - __article.title = self.get_title() - __article.url = self.url - __article.body = self.get_body() - - return __article diff --git a/weboob/backends/lefigaro/pages/simple.py b/weboob/backends/lefigaro/pages/simple.py index 706e68c2..5782c31e 100644 --- a/weboob/backends/lefigaro/pages/simple.py +++ b/weboob/backends/lefigaro/pages/simple.py @@ -16,7 +16,7 @@ # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -from .genericArticle import GenericNewsPage +from weboob.tools.genericArticle import GenericNewsPage class SimplePage(GenericNewsPage): "ArticlePage object for minutes20" diff --git a/weboob/backends/minutes20/pages/article.py b/weboob/backends/minutes20/pages/article.py index 287e8b62..9c6c8409 100644 --- a/weboob/backends/minutes20/pages/article.py +++ b/weboob/backends/minutes20/pages/article.py @@ -16,16 +16,9 @@ # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -from weboob.tools.parsers.lxmlparser import select, SelectElementException -from .genericArticle import NoAuthorElement +from weboob.tools.genericArticle import NoAuthorElement, try_remove from .simple import SimplePage -def try_remove(base_element, selector): - try : - base_element.remove(select(base_element, selector, 1 )) - except (SelectElementException, ValueError): - pass - class ArticlePage(SimplePage): "ArticlePage object for minutes20" diff --git a/weboob/backends/minutes20/pages/genericArticle.py b/weboob/backends/minutes20/pages/genericArticle.py deleted file mode 100644 index 748c6c97..00000000 --- a/weboob/backends/minutes20/pages/genericArticle.py +++ /dev/null @@ -1,81 +0,0 @@ -# -*- coding: utf-8 -*- - -# Copyright(C) 2011 Julien Hebert -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, version 3 of the License. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -from weboob.tools.browser import BasePage -from weboob.tools.parsers.lxmlparser import select, SelectElementException - -class NoAuthorElement(SelectElementException): - pass - -class NoneMainDiv(AttributeError): - pass - -class Article(object): - author = u'' - - def __init__(self, browser, _id): - self.browser = browser - self.id = _id - self.title = u'' - self.body = u'' - self.url = u'' - self.date = None - -class GenericNewsPage(BasePage): - __element_body = NotImplementedError - __article = Article - element_title_selector = NotImplementedError - main_div = NotImplementedError - element_body_selector = NotImplementedError - element_author_selector = NotImplementedError - - def get_body(self): - return self.browser.parser.tostring(self.get_element_body()) - - def get_author(self): - try: - return self.get_element_author().text_content().strip() - except NoAuthorElement: - return self.__article.author - - def get_title(self): - return select( - self.main_div, - self.element_title_selector, - 1).text_content().strip() - - def get_element_body(self): - return select(self.main_div, self.element_body_selector, 1) - - def get_element_author(self): - try: - return select(self.main_div, self.element_author_selector, 1) - except SelectElementException: - raise NoAuthorElement() - except AttributeError: - if self.main_div == None: - raise NoneMainDiv("main_div is none on %s" % (self.browser)) - else: - raise - - def get_article(self, _id): - __article = Article(self.browser, _id) - __article.author = self.get_author() - __article.title = self.get_title() - __article.url = self.url - __article.body = self.get_body() - - return __article diff --git a/weboob/backends/minutes20/pages/simple.py b/weboob/backends/minutes20/pages/simple.py index 307f079d..2ba4bc65 100644 --- a/weboob/backends/minutes20/pages/simple.py +++ b/weboob/backends/minutes20/pages/simple.py @@ -16,7 +16,8 @@ # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -from .genericArticle import GenericNewsPage +from weboob.tools.genericArticle import GenericNewsPage + class SimplePage(GenericNewsPage): "ArticlePage object for minutes20" diff --git a/weboob/backends/inrocks/pages/genericArticle.py b/weboob/tools/genericArticle.py similarity index 93% rename from weboob/backends/inrocks/pages/genericArticle.py rename to weboob/tools/genericArticle.py index 748c6c97..36b42348 100644 --- a/weboob/backends/inrocks/pages/genericArticle.py +++ b/weboob/tools/genericArticle.py @@ -17,6 +17,13 @@ from weboob.tools.browser import BasePage from weboob.tools.parsers.lxmlparser import select, SelectElementException +def try_remove(base_element, selector): + try : + base_element.remove(select(base_element, selector, 1 )) + except (SelectElementException, ValueError): + pass + + class NoAuthorElement(SelectElementException): pass