Return html to applications

And change relative links to absolute links
This commit is contained in:
Florent 2012-04-30 12:27:32 +02:00
commit 49a476d61c

View file

@ -30,14 +30,18 @@ class ArticlePage(GenericNewsPage):
self.element_author_selector = ".content-author>a" self.element_author_selector = ".content-author>a"
def get_body(self): def get_body(self):
part = self.document.getroot().xpath('//p[@class="article"]') div = self.document.getroot().find('.//div[@class="sectbody"]')
total = "" for a in div.findall('.//a'):
for p in part: try:
if p.text: if a.attrib["href"][0:7] != "http://":
total += "<p>" a.attrib["href"] = "http://taz.de/" + a.attrib["href"]
total += self.browser.parser.tostring(p) except:
total += "</p>" continue
return total for img in div.findall('.//img'):
if img.attrib["src"][0:7] != "http://":
img.attrib["src"] = "http://taz.de/" + img.attrib["src"]
return self.parser.tostring(div)
def get_title(self): def get_title(self):
title = GenericNewsPage.get_title(self) title = GenericNewsPage.get_title(self)
@ -47,5 +51,3 @@ class ArticlePage(GenericNewsPage):
author = self.document.getroot().xpath('//span[@class="author"]') author = self.document.getroot().xpath('//span[@class="author"]')
if author: if author:
return author[0].text.replace('von ', '') return author[0].text.replace('von ', '')
else:
return ""