From 3c6348d7ad027f658903c89ac7300f54f8188acd Mon Sep 17 00:00:00 2001 From: Florent Date: Tue, 8 Jan 2013 19:21:53 +0100 Subject: [PATCH] Fix encoding and remove some unused files fix #819 --- modules/lefigaro/browser.py | 2 +- modules/lefigaro/pages/simple.py | 30 -------------------------- modules/lefigaro/pages/special.py | 35 ------------------------------- 3 files changed, 1 insertion(+), 66 deletions(-) delete mode 100644 modules/lefigaro/pages/simple.py delete mode 100644 modules/lefigaro/pages/special.py diff --git a/modules/lefigaro/browser.py b/modules/lefigaro/browser.py index 718b34a5..4f67995e 100644 --- a/modules/lefigaro/browser.py +++ b/modules/lefigaro/browser.py @@ -29,10 +29,10 @@ class IndexPage(BasePage): class NewspaperFigaroBrowser(BaseBrowser): "NewspaperFigaroBrowser class" + ENCODING = None PAGES = { "http://\w+.lefigaro.fr/flash-.*/(\d{4})/(\d{2})/(\d{2})/(.*$)": FlashActuPage, "http://\w+.lefigaro.fr/bd/(\d{4})/(\d{2})/(\d{2})/(.*$)": FlashActuPage, - "http://\w+.lefigaro.fr/actualite/(\d{4})/(\d{2})/(\d{2})/(.*$)": SpecialPage, "http://\w+.lefigaro.fr/(?!flash-|bd|actualite).+/(\d{4})/(\d{2})/(\d{2})/(.*$)": ArticlePage, "http://\w+.lefigaro.fr/actualite-.*/(\d{4})/(\d{2})/(\d{2})/(.*$)": ArticlePage, "http://\w+.lefigaro.fr/": IndexPage, diff --git a/modules/lefigaro/pages/simple.py b/modules/lefigaro/pages/simple.py deleted file mode 100644 index 3c2b1863..00000000 --- a/modules/lefigaro/pages/simple.py +++ /dev/null @@ -1,30 +0,0 @@ -"ArticlePage object for minutes20" -# -*- coding: utf-8 -*- - -# Copyright(C) 2011 Julien Hebert -# -# This file is part of weboob. -# -# weboob is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# weboob is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with weboob. If not, see . - -from weboob.tools.capabilities.messages.genericArticle import GenericNewsPage - - -class SimplePage(GenericNewsPage): - "ArticlePage object for minutes20" - - def on_loaded(self): - self.main_div = self.document.getroot() - self.element_author_selector = "div.mna-signature" - self.element_body_selector = "#article" diff --git a/modules/lefigaro/pages/special.py b/modules/lefigaro/pages/special.py deleted file mode 100644 index f2ff5c3d..00000000 --- a/modules/lefigaro/pages/special.py +++ /dev/null @@ -1,35 +0,0 @@ -# -*- coding: utf-8 -*- - -# Copyright(C) 2011 Romain Bignon -# -# This file is part of weboob. -# -# weboob is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# weboob is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with weboob. If not, see . - -from weboob.tools.capabilities.messages.genericArticle import GenericNewsPage, try_remove_from_selector_list - - -class SpecialPage(GenericNewsPage): - "ArticlePage object for inrocks" - def on_loaded(self): - self.main_div = self.document.getroot() - self.element_title_selector = "h2" - self.element_author_selector = "div.name>span" - self.element_body_selector = ".block-text" - - def get_body(self): - element_body = self.get_element_body() - try_remove_from_selector_list(self.parser, element_body, ['div']) - element_body.tag = "div" - return self.parser.tostring(element_body)