fix bug #473 can't parse ledirect url

2011-02-05 13:21:49 +01:00 · 2011-02-05 13:21:49 +01:00 · 119df19f5b
commit 119df19f5b
parent 63efb6f96a
5 changed files with 119 additions and 31 deletions
--- a/weboob/backends/minutes20/backend.py
+++ b/weboob/backends/minutes20/backend.py
@ -58,12 +58,19 @@ class Newspaper20minutesBackend(BaseBackend, ICapMessages):
        if not thread.date:
            thread.date = content.date

-        #thread.root = Message(thread=thread, id=0, title=content.title, sender=content.author, receivers=None, date=thread.date, parent=None, content=content.body, signature=None, children = [], flags=flags)
-
-        thread.root = Message(thread=thread, id=0, title=content.title, sender=content.author, receivers=None, date=thread.date, parent=None, content=content.body, flags=flags, children= [])
+        thread.root = Message(
+            thread=thread,
+            id=0,
+            title=content.title,
+            sender=content.author,
+            receivers=None,
+            date=thread.date,
+            parent=None,
+            content=content.body,
+            flags=flags,
+            children= [])
        return thread

-    
    def iter_threads(self):
        for article in Newsfeed('http://www.20minutes.fr/rss/une.xml').iter_entries():
            thread = Thread(article.id)
@ -71,14 +78,25 @@ class Newspaper20minutesBackend(BaseBackend, ICapMessages):
            thread.date = article.datetime
            yield(thread)

+    def fill_thread(self, thread):
+        return self.get_thread(thread)
+
    def iter_unread_messages(self, thread=None):
        for thread in self.iter_threads():
-            self.fill_thread(thread, 'root')
-            for m in thread.iter_all_messages():
-                if m.flags & m.IS_UNREAD:
-                    yield m
+            self.fill_thread(thread)
+            for msg in thread.iter_all_messages():
+                if msg.flags & msg.IS_UNREAD:
+                    yield msg


    def set_message_read(self, message):
-        self.storage.set('seen', message.thread.id, 'comments', self.storage.get('seen', message.thread.id, 'comments', default=[]) + [message.id])
+        self.storage.set(
+            'seen',
+            message.thread.id,
+            'comments',
+            self.storage.get(
+                'seen',
+                message.thread.id,
+                'comments',
+                default=[]) + [message.id])
        self.storage.save()
--- a/weboob/backends/minutes20/browser.py
+++ b/weboob/backends/minutes20/browser.py
@ -16,6 +16,7 @@
 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.

 from .pages.article import ArticlePage
+from .pages.ledirect import LeDirectPage
 from weboob.tools.browser import BaseBrowser

 __all__ = ['Newspaper20minutesBrowser']
@ -23,9 +24,14 @@ __all__ = ['Newspaper20minutesBrowser']

 class Newspaper20minutesBrowser(BaseBrowser):
    PAGES = {
-             'http://www.20minutes.fr/article/?.*': ArticlePage
+             'http://www.20minutes.fr/article/?.*': ArticlePage,
+             'http://www.20minutes.fr/ledirect/?.*': LeDirectPage
            }

+
+    def is_logged(self):
+        return False
+
    def get_content(self, url):
        self.location(url)
        return self.page.article
--- a/weboob/backends/minutes20/pages/article.py
+++ b/weboob/backends/minutes20/pages/article.py
@ -15,28 +15,17 @@
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.

-from weboob.tools.browser import BasePage
+
 from weboob.tools.parsers.lxmlparser import select
+from .minutes20 import Minutes20Page

-class Article(object):
-    def __init__(self):
-        self.title = u''
-        self.body = u''
-        self.author = None 
-        self.date = None

-class ArticlePage(BasePage):
-    def on_loaded(self):
-        self.article = Article()
-        main_div = self.document.getroot()
-        self.article.title = select(main_div, "h1", 1).text_content()
-        element_body = select(main_div, "div.mn-line>div.mna-body", 1) 
-        element_tools = select(element_body, "div.mna-tools", 1)
-        element_comment = select(element_body, "div.mna-comment-call", 1)
-        element_author = select(element_body, "#mna-signature", 1)
-        element_body.remove(element_tools)
-        element_body.remove(element_comment)
-        element_body.remove(element_author)
-        self.article.author = element_author.text_content().strip()
-        self.article.body = self.browser.parser.tostring(element_body)
+class ArticlePage(Minutes20Page):
+    def set_body(self):
+        self.element_body = select(self.main_div, "div.mna-body", 1) 
+        self.element_body.remove(select(self.element_body, "div.mna-tools", 1))
+        self.element_body.remove(select(self.element_body, "div.mna-comment-call", 1))
+        self.element_body.remove(self.get_element_author())
+        self.article.body = self.browser.parser.tostring(self.element_body) 

+    
--- a/weboob/backends/minutes20/pages/ledirect.py
+++ b/weboob/backends/minutes20/pages/ledirect.py
@ -0,0 +1,23 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2011  Julien Hebert
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+from weboob.tools.parsers.lxmlparser import select
+from .minutes20 import Minutes20Page
+
+class LeDirectPage(Minutes20Page):
+    def set_body(self):
+        self.article.body = self.browser.parser.tostring(select(self.main_div, "div.mna-body", 1))
--- a/weboob/backends/minutes20/pages/minutes20.py
+++ b/weboob/backends/minutes20/pages/minutes20.py
@ -0,0 +1,52 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2011  Julien Hebert
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+from weboob.tools.browser import BasePage
+from weboob.tools.parsers.lxmlparser import select 
+
+__all__ = ['Minutes20Page', 'Article']
+
+
+class Article(object):
+    def __init__(self):
+        self.title = u''
+        self.body = u''
+        self.author = None 
+        self.date = None
+
+class Minutes20Page(BasePage):
+    main_div = NotImplementedError
+    element_body = NotImplementedError
+    article = Article()
+    def set_author(self):
+        self.article.author = self.get_element_author().text_content().strip()
+
+    def get_element_author(self):
+        return select(self.main_div, "div.mna-signature", 1) 
+
+    def set_body(self):
+        raise NotImplementedError
+
+
+    def on_loaded(self):
+        self.article = Article()
+        self.main_div = self.document.getroot()
+        self.article.title = select(self.main_div, "h1", 1).text_content()
+        self.set_author()
+        self.set_body()
+
+