Use clean_relative_urls
This commit is contained in:
parent
b100f77f95
commit
fcd8432045
1 changed files with 2 additions and 10 deletions
|
|
@ -19,7 +19,7 @@
|
|||
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
from weboob.tools.capabilities.messages.genericArticle import GenericNewsPage,\
|
||||
try_drop_tree
|
||||
try_drop_tree, clean_relativ_urls
|
||||
|
||||
|
||||
class ArticlePage(GenericNewsPage):
|
||||
|
|
@ -33,15 +33,7 @@ class ArticlePage(GenericNewsPage):
|
|||
def get_body(self):
|
||||
div = self.document.getroot().find('.//div[@class="sectbody"]')
|
||||
try_drop_tree(self.parser, div, "div.anchor")
|
||||
for a in div.findall('.//a'):
|
||||
try:
|
||||
if a.attrib["href"][0:7] != "http://":
|
||||
a.attrib["href"] = "http://taz.de/" + a.attrib["href"]
|
||||
except:
|
||||
continue
|
||||
for img in div.findall('.//img'):
|
||||
if img.attrib["src"][0:7] != "http://":
|
||||
img.attrib["src"] = "http://taz.de/" + img.attrib["src"]
|
||||
clean_relativ_urls(div, "http://taz.de")
|
||||
|
||||
return self.parser.tostring(div)
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue