diff --git a/weboob/backends/lefigaro/browser.py b/weboob/backends/lefigaro/browser.py index f2ea0286..3fe4d543 100644 --- a/weboob/backends/lefigaro/browser.py +++ b/weboob/backends/lefigaro/browser.py @@ -44,6 +44,7 @@ class NewspaperFigaroBrowser(BaseBrowser): "http://www.lefigaro.fr/programmes-tele/([0-9][0-9][0-9][0-9])/([0-9][0-9])/([0-9][0-9])/(.*$)": ArticlePage, "http://www.lefigaro.fr/le-talk/([0-9][0-9][0-9][0-9])/([0-9][0-9])/([0-9][0-9])/(.*$)": ArticlePage, "http://www.lefigaro.fr/sortir-paris/([0-9][0-9][0-9][0-9])/([0-9][0-9])/([0-9][0-9])/(.*$)": ArticlePage, + "http://www.lefigaro.fr/autres-sports/([0-9][0-9][0-9][0-9])/([0-9][0-9])/([0-9][0-9])/(.*$)": ArticlePage, "http://www.lefigaro.fr/immobilier/([0-9][0-9][0-9][0-9])/([0-9][0-9])/([0-9][0-9])/(.*$)": ArticlePage, "http://www.lefigaro.fr/environnement/([0-9][0-9][0-9][0-9])/([0-9][0-9])/([0-9][0-9])/(.*$)": ArticlePage, "http://www.lefigaro.fr/rugby/([0-9][0-9][0-9][0-9])/([0-9][0-9])/([0-9][0-9])/(.*$)": ArticlePage, diff --git a/weboob/backends/lefigaro/pages/article.py b/weboob/backends/lefigaro/pages/article.py index 31475c5b..a47fb6d7 100644 --- a/weboob/backends/lefigaro/pages/article.py +++ b/weboob/backends/lefigaro/pages/article.py @@ -16,7 +16,7 @@ # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -from weboob.tools.parsers.lxmlparser import select +from weboob.tools.parsers.lxmlparser import select, SelectElementException from weboob.tools.genericArticle import GenericNewsPage, try_remove class ArticlePage(GenericNewsPage): @@ -30,19 +30,24 @@ class ArticlePage(GenericNewsPage): def get_body(self): element_body = self.get_element_body() h1_title = select(element_body, self.element_title_selector, 1) - div_infos = select(element_body, "div.infos", 1) - el_script = select(element_body, "script", 1) + + try: + el_script = select(element_body, "script", 1) + except SelectElementException: + pass + else: + el_script.drop_tree() + element_body.remove(h1_title) - element_body.remove(div_infos) + try_remove(element_body, "div.infos") try_remove(element_body, "div.photo") try_remove(element_body, "div.art_bandeau_bottom") try_remove(element_body, "div.view") try_remove(element_body, "span.auteur_long") try_remove(element_body, "#toolsbar") - el_script.drop_tree() element_body.find_class("texte")[0].drop_tag() element_body.tag = "div" return self.browser.parser.tostring(element_body)