Use clean_relative_urls

2013-01-24 22:21:12 +01:00 · 2013-01-24 22:21:12 +01:00 · fcd8432045
commit fcd8432045
parent b100f77f95
1 changed files with 2 additions and 10 deletions
--- a/modules/taz/pages/article.py
+++ b/modules/taz/pages/article.py
@ -19,7 +19,7 @@
 # along with weboob. If not, see <http://www.gnu.org/licenses/>.

 from weboob.tools.capabilities.messages.genericArticle import GenericNewsPage,\
-        try_drop_tree
+        try_drop_tree, clean_relativ_urls


 class ArticlePage(GenericNewsPage):
@ -33,15 +33,7 @@ class ArticlePage(GenericNewsPage):
    def get_body(self):
        div = self.document.getroot().find('.//div[@class="sectbody"]')
        try_drop_tree(self.parser, div, "div.anchor")
-        for a in div.findall('.//a'):
-            try:
-                if a.attrib["href"][0:7] != "http://":
-                    a.attrib["href"] = "http://taz.de/" + a.attrib["href"]
-            except:
-                continue
-        for img in div.findall('.//img'):
-            if img.attrib["src"][0:7] != "http://":
-                img.attrib["src"] = "http://taz.de/" + img.attrib["src"]
+        clean_relativ_urls(div, "http://taz.de")

        return self.parser.tostring(div)