From 6444ee91467522d73ae328e805685a966d6432a9 Mon Sep 17 00:00:00 2001 From: Romain Bignon Date: Tue, 25 Oct 2011 14:24:08 +0200 Subject: [PATCH] fix broken backend --- weboob/backends/lefigaro/browser.py | 11 ++++--- weboob/backends/lefigaro/pages/special.py | 35 +++++++++++++++++++++++ 2 files changed, 42 insertions(+), 4 deletions(-) create mode 100644 weboob/backends/lefigaro/pages/special.py diff --git a/weboob/backends/lefigaro/browser.py b/weboob/backends/lefigaro/browser.py index d55df381..718b34a5 100644 --- a/weboob/backends/lefigaro/browser.py +++ b/weboob/backends/lefigaro/browser.py @@ -20,6 +20,7 @@ from .pages.article import ArticlePage from .pages.flashactu import FlashActuPage +from .pages.special import SpecialPage from weboob.tools.browser import BaseBrowser, BasePage class IndexPage(BasePage): @@ -29,10 +30,12 @@ class IndexPage(BasePage): class NewspaperFigaroBrowser(BaseBrowser): "NewspaperFigaroBrowser class" PAGES = { - "http://www.lefigaro.fr/flash-.*/(\d{4})/(\d{2})/(\d{2})/(.*$)": FlashActuPage, - "http://www.lefigaro.fr/bd/(\d{4})/(\d{2})/(\d{2})/(.*$)": FlashActuPage, - "http://www.lefigaro.fr/(?!flash-|bd).+/(\d{4})/(\d{2})/(\d{2})/(.*$)": ArticlePage, - "http://www.lefigaro.fr/": IndexPage, + "http://\w+.lefigaro.fr/flash-.*/(\d{4})/(\d{2})/(\d{2})/(.*$)": FlashActuPage, + "http://\w+.lefigaro.fr/bd/(\d{4})/(\d{2})/(\d{2})/(.*$)": FlashActuPage, + "http://\w+.lefigaro.fr/actualite/(\d{4})/(\d{2})/(\d{2})/(.*$)": SpecialPage, + "http://\w+.lefigaro.fr/(?!flash-|bd|actualite).+/(\d{4})/(\d{2})/(\d{2})/(.*$)": ArticlePage, + "http://\w+.lefigaro.fr/actualite-.*/(\d{4})/(\d{2})/(\d{2})/(.*$)": ArticlePage, + "http://\w+.lefigaro.fr/": IndexPage, } def is_logged(self): diff --git a/weboob/backends/lefigaro/pages/special.py b/weboob/backends/lefigaro/pages/special.py new file mode 100644 index 00000000..3cabddd0 --- /dev/null +++ b/weboob/backends/lefigaro/pages/special.py @@ -0,0 +1,35 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2011 Romain Bignon +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + +from weboob.tools.capabilities.messages.genericArticle import GenericNewsPage, try_remove_from_selector_list + +class SpecialPage(GenericNewsPage): + "ArticlePage object for inrocks" + def on_loaded(self): + self.main_div = self.document.getroot() + self.element_title_selector = "h2" + self.element_author_selector = "div.name>span" + self.element_body_selector = ".block-text" + + def get_body(self): + element_body = self.get_element_body() + try_remove_from_selector_list(self.parser, element_body, ['div']) + element_body.tag = "div" + return self.parser.tostring(element_body) +