From 7492c0bdc49965270e2e77fdc9d8eb509b264ef7 Mon Sep 17 00:00:00 2001 From: Bezleputh Date: Wed, 26 Feb 2014 20:05:59 +0100 Subject: [PATCH] [lefigaro] fix : site changed --- modules/lefigaro/browser.py | 8 ++++---- modules/lefigaro/pages/article.py | 5 ++--- modules/lefigaro/pages/flashactu.py | 2 +- 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/modules/lefigaro/browser.py b/modules/lefigaro/browser.py index a453a345..a7e52e08 100644 --- a/modules/lefigaro/browser.py +++ b/modules/lefigaro/browser.py @@ -29,15 +29,15 @@ class IndexPage(BasePage): class NewspaperFigaroBrowser(BaseBrowser): "NewspaperFigaroBrowser class" - ENCODING = None - PAGES = { - "http://\w+.lefigaro.fr/flash-.*/(\d{4})/(\d{2})/(\d{2})/(.*$)": FlashActuPage, + ENCODING = "UTF-8" + PAGES = {"http://\w+.lefigaro.fr/flash-.*/(\d{4})/(\d{2})/(\d{2})/(.*$)": FlashActuPage, "http://\w+.lefigaro.fr/bd/(\d{4})/(\d{2})/(\d{2})/(.*$)": FlashActuPage, "http://\w+.lefigaro.fr/(?!flash-|bd|actualite).+/(\d{4})/(\d{2})/(\d{2})/(.*$)": ArticlePage, "http://\w+.lefigaro.fr/actualite/(\d{4})/(\d{2})/(\d{2})/(.*$)": ActuPage, "http://\w+.lefigaro.fr/actualite-.*/(\d{4})/(\d{2})/(\d{2})/(.*$)": ArticlePage, "http://\w+.lefigaro.fr/": IndexPage, - } + "http://feeds.lefigaro.fr/c/32266/f/438190/s/\w+/sc/\d{2}/\d{1}/\w+/story01.htm": FlashActuPage, + } def is_logged(self): return False diff --git a/modules/lefigaro/pages/article.py b/modules/lefigaro/pages/article.py index d216600f..6f2f9aa6 100644 --- a/modules/lefigaro/pages/article.py +++ b/modules/lefigaro/pages/article.py @@ -18,7 +18,7 @@ # You should have received a copy of the GNU Affero General Public License # along with weboob. If not, see . -from weboob.tools.capabilities.messages.genericArticle import GenericNewsPage, remove_from_selector_list, drop_comments, try_drop_tree, try_remove_from_selector_list +from weboob.tools.capabilities.messages.genericArticle import GenericNewsPage, drop_comments, try_drop_tree, try_remove_from_selector_list class ArticlePage(GenericNewsPage): @@ -27,11 +27,10 @@ class ArticlePage(GenericNewsPage): self.main_div = self.document.getroot() self.element_title_selector = "h1" self.element_author_selector = "span.auteur>a, span.auteur_long>div" - self.element_body_selector = "#article, div.article" + self.element_body_selector = "article div.fig-article-body" def get_body(self): element_body = self.get_element_body() - remove_from_selector_list(self.parser, element_body, [self.element_title_selector]) drop_comments(element_body) try_drop_tree(self.parser, element_body, "script") try_drop_tree(self.parser, element_body, "liste") diff --git a/modules/lefigaro/pages/flashactu.py b/modules/lefigaro/pages/flashactu.py index 44b1d5cc..1b4a50c4 100644 --- a/modules/lefigaro/pages/flashactu.py +++ b/modules/lefigaro/pages/flashactu.py @@ -27,7 +27,7 @@ class FlashActuPage(GenericNewsPage): self.main_div = self.document.getroot() self.element_title_selector = "h1" self.element_author_selector = "div.name>span" - self.element_body_selector = "h2" + self.element_body_selector = "article, fig-article-body" def get_body(self): element_body = self.get_element_body()