# -*- coding: utf-8 -*- # Copyright(C) 2010-2011 Nicolas Duhamel # # This file is part of weboob. # # weboob is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # weboob is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License # along with weboob. If not, see . import re from weboob.tools.browser import BasePage from .base import DownparadisePage from weboob.capabilities.collection import Collection from weboob.capabilities.messages import Thread __all__ = ['ViewforumPage'] def remove_html_tags(data): p = re.compile(r'<.*?>') return p.sub('', data) class ViewforumPage(DownparadisePage): def on_loaded(self): pass def iter_threads(self): maintable = self.document.xpath("//div[@id='pagecontent']/table")[1] iter_lignes = maintable.xpath(".//a[@class='topictitle']") for i in iter_lignes: thread = Thread(i.get("href")) thread.title = remove_html_tags(self.parser.tostring(i)).strip().encode('raw_unicode_escape').decode('utf-8') yield thread