Return html to applications
And change relative links to absolute links
This commit is contained in:
parent
c9aadbfb34
commit
49a476d61c
1 changed files with 12 additions and 10 deletions
|
|
@ -30,14 +30,18 @@ class ArticlePage(GenericNewsPage):
|
||||||
self.element_author_selector = ".content-author>a"
|
self.element_author_selector = ".content-author>a"
|
||||||
|
|
||||||
def get_body(self):
|
def get_body(self):
|
||||||
part = self.document.getroot().xpath('//p[@class="article"]')
|
div = self.document.getroot().find('.//div[@class="sectbody"]')
|
||||||
total = ""
|
for a in div.findall('.//a'):
|
||||||
for p in part:
|
try:
|
||||||
if p.text:
|
if a.attrib["href"][0:7] != "http://":
|
||||||
total += "<p>"
|
a.attrib["href"] = "http://taz.de/" + a.attrib["href"]
|
||||||
total += self.browser.parser.tostring(p)
|
except:
|
||||||
total += "</p>"
|
continue
|
||||||
return total
|
for img in div.findall('.//img'):
|
||||||
|
if img.attrib["src"][0:7] != "http://":
|
||||||
|
img.attrib["src"] = "http://taz.de/" + img.attrib["src"]
|
||||||
|
|
||||||
|
return self.parser.tostring(div)
|
||||||
|
|
||||||
def get_title(self):
|
def get_title(self):
|
||||||
title = GenericNewsPage.get_title(self)
|
title = GenericNewsPage.get_title(self)
|
||||||
|
|
@ -47,5 +51,3 @@ class ArticlePage(GenericNewsPage):
|
||||||
author = self.document.getroot().xpath('//span[@class="author"]')
|
author = self.document.getroot().xpath('//span[@class="author"]')
|
||||||
if author:
|
if author:
|
||||||
return author[0].text.replace('von ', '')
|
return author[0].text.replace('von ', '')
|
||||||
else:
|
|
||||||
return ""
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue