fix bug #473 can't parse ledirect url

2011-02-05 13:21:49 +01:00 · 2011-02-05 13:21:49 +01:00 · 119df19f5b
commit 119df19f5b
parent 63efb6f96a
5 changed files with 119 additions and 31 deletions
--- a/weboob/backends/minutes20/backend.py
+++ b/weboob/backends/minutes20/backend.py
@ -58,12 +58,19 @@ class Newspaper20minutesBackend(BaseBackend, ICapMessages):
        if not thread.date:
            thread.date = content.date
-        #thread.root = Message(thread=thread, id=0, title=content.title, sender=content.author, receivers=None, date=thread.date, parent=None, content=content.body, signature=None, children = [], flags=flags)
+        thread.root = Message(
-
+            thread=thread,
-        thread.root = Message(thread=thread, id=0, title=content.title, sender=content.author, receivers=None, date=thread.date, parent=None, content=content.body, flags=flags, children= [])
+            id=0,
            title=content.title,
            sender=content.author,
            receivers=None,
            date=thread.date,
            parent=None,
            content=content.body,
            flags=flags,
            children= [])
        return thread
    def iter_threads(self):
        for article in Newsfeed('http://www.20minutes.fr/rss/une.xml').iter_entries():
            thread = Thread(article.id)
@ -71,14 +78,25 @@ class Newspaper20minutesBackend(BaseBackend, ICapMessages):
            thread.date = article.datetime
            yield(thread)
    def fill_thread(self, thread):
        return self.get_thread(thread)
    def iter_unread_messages(self, thread=None):
        for thread in self.iter_threads():
-            self.fill_thread(thread, 'root')
+            self.fill_thread(thread)
-            for m in thread.iter_all_messages():
+            for msg in thread.iter_all_messages():
-                if m.flags & m.IS_UNREAD:
+                if msg.flags & msg.IS_UNREAD:
-                    yield m
+                    yield msg
    def set_message_read(self, message):
-        self.storage.set('seen', message.thread.id, 'comments', self.storage.get('seen', message.thread.id, 'comments', default=[]) + [message.id])
+        self.storage.set(
            'seen',
            message.thread.id,
            'comments',
            self.storage.get(
                'seen',
                message.thread.id,
                'comments',
                default=[]) + [message.id])
        self.storage.save()
--- a/weboob/backends/minutes20/browser.py
+++ b/weboob/backends/minutes20/browser.py
@ -16,6 +16,7 @@
 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 from .pages.article import ArticlePage
 from .pages.ledirect import LeDirectPage
 from weboob.tools.browser import BaseBrowser
 __all__ = ['Newspaper20minutesBrowser']
@ -23,9 +24,14 @@ __all__ = ['Newspaper20minutesBrowser']
 class Newspaper20minutesBrowser(BaseBrowser):
    PAGES = {
-             'http://www.20minutes.fr/article/?.*': ArticlePage
+             'http://www.20minutes.fr/article/?.*': ArticlePage,
             'http://www.20minutes.fr/ledirect/?.*': LeDirectPage
            }
    def is_logged(self):
        return False
    def get_content(self, url):
        self.location(url)
        return self.page.article
--- a/weboob/backends/minutes20/pages/article.py
+++ b/weboob/backends/minutes20/pages/article.py
@ -15,28 +15,17 @@
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
-from weboob.tools.browser import BasePage
+
 from weboob.tools.parsers.lxmlparser import select
 from .minutes20 import Minutes20Page
 class Article(object):
    def __init__(self):
        self.title = u''
        self.body = u''
        self.author = None 
        self.date = None
-class ArticlePage(BasePage):
+class ArticlePage(Minutes20Page):
-    def on_loaded(self):
+    def set_body(self):
-        self.article = Article()
+        self.element_body = select(self.main_div, "div.mna-body", 1) 
-        main_div = self.document.getroot()
+        self.element_body.remove(select(self.element_body, "div.mna-tools", 1))
-        self.article.title = select(main_div, "h1", 1).text_content()
+        self.element_body.remove(select(self.element_body, "div.mna-comment-call", 1))
-        element_body = select(main_div, "div.mn-line>div.mna-body", 1) 
+        self.element_body.remove(self.get_element_author())
-        element_tools = select(element_body, "div.mna-tools", 1)
+        self.article.body = self.browser.parser.tostring(self.element_body) 
        element_comment = select(element_body, "div.mna-comment-call", 1)
        element_author = select(element_body, "#mna-signature", 1)
        element_body.remove(element_tools)
        element_body.remove(element_comment)
        element_body.remove(element_author)
        self.article.author = element_author.text_content().strip()
        self.article.body = self.browser.parser.tostring(element_body)
--- a/weboob/backends/minutes20/pages/ledirect.py
+++ b/weboob/backends/minutes20/pages/ledirect.py
@ -0,0 +1,23 @@
 # -*- coding: utf-8 -*-
 # Copyright(C) 2011  Julien Hebert
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
 # the Free Software Foundation, version 3 of the License.
 #
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
 #
 # You should have received a copy of the GNU General Public License
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 from weboob.tools.parsers.lxmlparser import select
 from .minutes20 import Minutes20Page
 class LeDirectPage(Minutes20Page):
    def set_body(self):
        self.article.body = self.browser.parser.tostring(select(self.main_div, "div.mna-body", 1))
--- a/weboob/backends/minutes20/pages/minutes20.py
+++ b/weboob/backends/minutes20/pages/minutes20.py
@ -0,0 +1,52 @@
 # -*- coding: utf-8 -*-
 # Copyright(C) 2011  Julien Hebert
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
 # the Free Software Foundation, version 3 of the License.
 #
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
 #
 # You should have received a copy of the GNU General Public License
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 from weboob.tools.browser import BasePage
 from weboob.tools.parsers.lxmlparser import select 
 __all__ = ['Minutes20Page', 'Article']
 class Article(object):
    def __init__(self):
        self.title = u''
        self.body = u''
        self.author = None 
        self.date = None
 class Minutes20Page(BasePage):
    main_div = NotImplementedError
    element_body = NotImplementedError
    article = Article()
    def set_author(self):
        self.article.author = self.get_element_author().text_content().strip()
    def get_element_author(self):
        return select(self.main_div, "div.mna-signature", 1) 
    def set_body(self):
        raise NotImplementedError
    def on_loaded(self):
        self.article = Article()
        self.main_div = self.document.getroot()
        self.article.title = select(self.main_div, "h1", 1).text_content()
        self.set_author()
        self.set_body()