From c9aadbfb34a22857e89b28fac9590f20ce280703 Mon Sep 17 00:00:00 2001 From: Florent Date: Mon, 30 Apr 2012 11:55:46 +0200 Subject: [PATCH] Replace relative link to complete link --- modules/ecrans/pages/article.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/modules/ecrans/pages/article.py b/modules/ecrans/pages/article.py index f33a97c7..65df62a5 100644 --- a/modules/ecrans/pages/article.py +++ b/modules/ecrans/pages/article.py @@ -1,4 +1,4 @@ -"ArticlePage object for inrocks" +"ArticlePage object for ecrans" # -*- coding: utf-8 -*- # Copyright(C) 2011 Julien Hebert @@ -22,7 +22,7 @@ from weboob.tools.capabilities.messages.genericArticle import GenericNewsPage, r class ArticlePage(GenericNewsPage): - "ArticlePage object for inrocks" + "ArticlePage object for ecrans" def on_loaded(self): self.main_div = self.document.getroot() self.element_title_selector = "title" @@ -34,5 +34,12 @@ class ArticlePage(GenericNewsPage): remove_from_selector_list(self.parser, element_body, ["p.auteur", "h4"]) try_remove_from_selector_list(self.parser, element_body, ["p.tag", "div.alire", self.element_title_selector, "h4"]) try_drop_tree(self.parser, element_body, "script") + for a in element_body.findall('.//a'): + if a.attrib["href"][0:7] != "http://": + a.attrib["href"] = "http://ecrans.fr/" + a.attrib["href"] + for img in element_body.xpath('.//img'): + if img.attrib["src"][0:7] != "http://": + img.attrib["src"] = "http://ecrans.fr/" + img.attrib["src"] + return self.parser.tostring(element_body)