From 119df19f5b4751c35218e67681b8aea919e24104 Mon Sep 17 00:00:00 2001 From: Juke Date: Sat, 5 Feb 2011 13:21:49 +0100 Subject: [PATCH] fix bug #473 can't parse ledirect url --- weboob/backends/minutes20/backend.py | 36 ++++++++++---- weboob/backends/minutes20/browser.py | 8 ++- weboob/backends/minutes20/pages/article.py | 31 ++++-------- weboob/backends/minutes20/pages/ledirect.py | 23 +++++++++ weboob/backends/minutes20/pages/minutes20.py | 52 ++++++++++++++++++++ 5 files changed, 119 insertions(+), 31 deletions(-) create mode 100644 weboob/backends/minutes20/pages/ledirect.py create mode 100644 weboob/backends/minutes20/pages/minutes20.py diff --git a/weboob/backends/minutes20/backend.py b/weboob/backends/minutes20/backend.py index 9c4c7c62..5fe6e198 100644 --- a/weboob/backends/minutes20/backend.py +++ b/weboob/backends/minutes20/backend.py @@ -58,12 +58,19 @@ class Newspaper20minutesBackend(BaseBackend, ICapMessages): if not thread.date: thread.date = content.date - #thread.root = Message(thread=thread, id=0, title=content.title, sender=content.author, receivers=None, date=thread.date, parent=None, content=content.body, signature=None, children = [], flags=flags) - - thread.root = Message(thread=thread, id=0, title=content.title, sender=content.author, receivers=None, date=thread.date, parent=None, content=content.body, flags=flags, children= []) + thread.root = Message( + thread=thread, + id=0, + title=content.title, + sender=content.author, + receivers=None, + date=thread.date, + parent=None, + content=content.body, + flags=flags, + children= []) return thread - def iter_threads(self): for article in Newsfeed('http://www.20minutes.fr/rss/une.xml').iter_entries(): thread = Thread(article.id) @@ -71,14 +78,25 @@ class Newspaper20minutesBackend(BaseBackend, ICapMessages): thread.date = article.datetime yield(thread) + def fill_thread(self, thread): + return self.get_thread(thread) + def iter_unread_messages(self, thread=None): for thread in self.iter_threads(): - self.fill_thread(thread, 'root') - for m in thread.iter_all_messages(): - if m.flags & m.IS_UNREAD: - yield m + self.fill_thread(thread) + for msg in thread.iter_all_messages(): + if msg.flags & msg.IS_UNREAD: + yield msg def set_message_read(self, message): - self.storage.set('seen', message.thread.id, 'comments', self.storage.get('seen', message.thread.id, 'comments', default=[]) + [message.id]) + self.storage.set( + 'seen', + message.thread.id, + 'comments', + self.storage.get( + 'seen', + message.thread.id, + 'comments', + default=[]) + [message.id]) self.storage.save() diff --git a/weboob/backends/minutes20/browser.py b/weboob/backends/minutes20/browser.py index b1378eba..e9cee79d 100644 --- a/weboob/backends/minutes20/browser.py +++ b/weboob/backends/minutes20/browser.py @@ -16,6 +16,7 @@ # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. from .pages.article import ArticlePage +from .pages.ledirect import LeDirectPage from weboob.tools.browser import BaseBrowser __all__ = ['Newspaper20minutesBrowser'] @@ -23,9 +24,14 @@ __all__ = ['Newspaper20minutesBrowser'] class Newspaper20minutesBrowser(BaseBrowser): PAGES = { - 'http://www.20minutes.fr/article/?.*': ArticlePage + 'http://www.20minutes.fr/article/?.*': ArticlePage, + 'http://www.20minutes.fr/ledirect/?.*': LeDirectPage } + + def is_logged(self): + return False + def get_content(self, url): self.location(url) return self.page.article diff --git a/weboob/backends/minutes20/pages/article.py b/weboob/backends/minutes20/pages/article.py index 74516807..14cb8dd7 100644 --- a/weboob/backends/minutes20/pages/article.py +++ b/weboob/backends/minutes20/pages/article.py @@ -15,28 +15,17 @@ # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -from weboob.tools.browser import BasePage + from weboob.tools.parsers.lxmlparser import select +from .minutes20 import Minutes20Page -class Article(object): - def __init__(self): - self.title = u'' - self.body = u'' - self.author = None - self.date = None -class ArticlePage(BasePage): - def on_loaded(self): - self.article = Article() - main_div = self.document.getroot() - self.article.title = select(main_div, "h1", 1).text_content() - element_body = select(main_div, "div.mn-line>div.mna-body", 1) - element_tools = select(element_body, "div.mna-tools", 1) - element_comment = select(element_body, "div.mna-comment-call", 1) - element_author = select(element_body, "#mna-signature", 1) - element_body.remove(element_tools) - element_body.remove(element_comment) - element_body.remove(element_author) - self.article.author = element_author.text_content().strip() - self.article.body = self.browser.parser.tostring(element_body) +class ArticlePage(Minutes20Page): + def set_body(self): + self.element_body = select(self.main_div, "div.mna-body", 1) + self.element_body.remove(select(self.element_body, "div.mna-tools", 1)) + self.element_body.remove(select(self.element_body, "div.mna-comment-call", 1)) + self.element_body.remove(self.get_element_author()) + self.article.body = self.browser.parser.tostring(self.element_body) + diff --git a/weboob/backends/minutes20/pages/ledirect.py b/weboob/backends/minutes20/pages/ledirect.py new file mode 100644 index 00000000..3f3fd1bc --- /dev/null +++ b/weboob/backends/minutes20/pages/ledirect.py @@ -0,0 +1,23 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2011 Julien Hebert +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, version 3 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +from weboob.tools.parsers.lxmlparser import select +from .minutes20 import Minutes20Page + +class LeDirectPage(Minutes20Page): + def set_body(self): + self.article.body = self.browser.parser.tostring(select(self.main_div, "div.mna-body", 1)) diff --git a/weboob/backends/minutes20/pages/minutes20.py b/weboob/backends/minutes20/pages/minutes20.py new file mode 100644 index 00000000..cddfa8a7 --- /dev/null +++ b/weboob/backends/minutes20/pages/minutes20.py @@ -0,0 +1,52 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2011 Julien Hebert +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, version 3 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +from weboob.tools.browser import BasePage +from weboob.tools.parsers.lxmlparser import select + +__all__ = ['Minutes20Page', 'Article'] + + +class Article(object): + def __init__(self): + self.title = u'' + self.body = u'' + self.author = None + self.date = None + +class Minutes20Page(BasePage): + main_div = NotImplementedError + element_body = NotImplementedError + article = Article() + def set_author(self): + self.article.author = self.get_element_author().text_content().strip() + + def get_element_author(self): + return select(self.main_div, "div.mna-signature", 1) + + def set_body(self): + raise NotImplementedError + + + def on_loaded(self): + self.article = Article() + self.main_div = self.document.getroot() + self.article.title = select(self.main_div, "h1", 1).text_content() + self.set_author() + self.set_body() + +