From e8bdf594cc75a2b7b1df6223c5c37a6b2ed3a85f Mon Sep 17 00:00:00 2001 From: Romain Bignon Date: Sat, 27 Oct 2012 10:41:29 +0200 Subject: [PATCH] fix parsing live articles --- modules/lefigaro/pages/article.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/modules/lefigaro/pages/article.py b/modules/lefigaro/pages/article.py index ecf80612..a8ec04b2 100644 --- a/modules/lefigaro/pages/article.py +++ b/modules/lefigaro/pages/article.py @@ -27,7 +27,7 @@ class ArticlePage(GenericNewsPage): self.main_div = self.document.getroot() self.element_title_selector = "h1" self.element_author_selector = "div.name>span" - self.element_body_selector = "#article" + self.element_body_selector = "#article, div.article" def get_body(self): element_body = self.get_element_body() @@ -63,6 +63,8 @@ class ArticlePage(GenericNewsPage): break crappy_title.drop_tree() - element_body.find_class("texte")[0].drop_tag() + txts = element_body.find_class("texte") + if len(txts) > 0: + txts[0].drop_tag() element_body.tag = "div" return self.parser.tostring(element_body)