fix bug #473 can't parse ledirect url

This commit is contained in:
Juke 2011-02-05 13:21:49 +01:00 committed by Romain Bignon
commit 119df19f5b
5 changed files with 119 additions and 31 deletions

View file

@ -15,28 +15,17 @@
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
from weboob.tools.browser import BasePage
from weboob.tools.parsers.lxmlparser import select
from .minutes20 import Minutes20Page
class Article(object):
def __init__(self):
self.title = u''
self.body = u''
self.author = None
self.date = None
class ArticlePage(BasePage):
def on_loaded(self):
self.article = Article()
main_div = self.document.getroot()
self.article.title = select(main_div, "h1", 1).text_content()
element_body = select(main_div, "div.mn-line>div.mna-body", 1)
element_tools = select(element_body, "div.mna-tools", 1)
element_comment = select(element_body, "div.mna-comment-call", 1)
element_author = select(element_body, "#mna-signature", 1)
element_body.remove(element_tools)
element_body.remove(element_comment)
element_body.remove(element_author)
self.article.author = element_author.text_content().strip()
self.article.body = self.browser.parser.tostring(element_body)
class ArticlePage(Minutes20Page):
def set_body(self):
self.element_body = select(self.main_div, "div.mna-body", 1)
self.element_body.remove(select(self.element_body, "div.mna-tools", 1))
self.element_body.remove(select(self.element_body, "div.mna-comment-call", 1))
self.element_body.remove(self.get_element_author())
self.article.body = self.browser.parser.tostring(self.element_body)