new backend

2011-02-10 23:47:17 +01:00 · 2011-02-10 23:47:17 +01:00 · 08252358eb
commit 08252358eb
parent 111a49e00e
6 changed files with 234 additions and 0 deletions
--- a/weboob/backends/inrocks/init.py
+++ b/weboob/backends/inrocks/init.py
@ -0,0 +1,21 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2011  Julien Hebert
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+from .backend import NewspaperInrocksBackend
+
+__all__ = ['NewspaperInrocksBackendBackend']
--- a/weboob/backends/inrocks/backend.py
+++ b/weboob/backends/inrocks/backend.py
@ -0,0 +1,107 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2011  Julien Hebert
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+"backend for http://20minutes.fr"
+
+# python2.5 compatibility
+from __future__ import with_statement
+
+from weboob.capabilities.messages import ICapMessages, Message, Thread
+from weboob.tools.backend import BaseBackend
+
+from .browser import NewspaperInrocksBrowser
+from weboob.tools.newsfeed import Newsfeed
+from .tools import url2id
+
+__all__ = ['NewspaperInrocksBackend']
+
+
+
+
+class NewspaperInrocksBackend(BaseBackend, ICapMessages):
+    NAME = 'inrocks'
+    MAINTAINER = 'Julien Hebert'
+    EMAIL = 'juke@free.fr'
+    VERSION = '0.6'
+    LICENSE = 'GPLv3'
+    DESCRIPTION = u'Inrock French news website'
+    STORAGE = {'seen': {}}
+    BROWSER = NewspaperInrocksBrowser
+
+    def get_thread(self, _id):
+        if isinstance(_id, Thread):
+            thread = _id
+            _id = thread.id
+        else:
+            thread = None
+
+        with self.browser:
+            content = self.browser.get_content(_id)
+
+        if not thread:
+            thread = Thread(_id)
+
+
+        flags = Message.IS_HTML
+        if not thread.id in self.storage.get('seen', default={}):
+            flags |= Message.IS_UNREAD
+        thread.title = content.title
+        if not thread.date:
+            thread.date = content.date
+
+        thread.root = Message(
+            thread=thread,
+            id=0,
+            title=content.title,
+            sender=content.author,
+            receivers=None,
+            date=thread.date,
+            parent=None,
+            content=content.body,
+            flags=flags,
+            children= [])
+        return thread
+
+    def iter_threads(self):
+        for article in Newsfeed('http://www.20minutes.fr/rss/20minutes.xml', 
+            url2id).iter_entries():
+            thread = Thread(article.id)
+            thread.title =  article.title
+            thread.date = article.datetime
+            yield(thread)
+
+    def fill_thread(self, thread):
+        return self.get_thread(thread)
+
+    def iter_unread_messages(self, thread=None):
+        for thread in self.iter_threads():
+            self.fill_thread(thread)
+            for msg in thread.iter_all_messages():
+                if msg.flags & msg.IS_UNREAD:
+                    yield msg
+
+
+    def set_message_read(self, message):
+        self.storage.set(
+            'seen',
+            message.thread.id,
+            'comments',
+            self.storage.get(
+                'seen',
+                message.thread.id,
+                'comments',
+                default=[]) + [message.id])
+        self.storage.save()
--- a/weboob/backends/inrocks/browser.py
+++ b/weboob/backends/inrocks/browser.py
@ -0,0 +1,34 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2011  Julien Hebert
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+from .pages.article import ArticlePage
+from weboob.tools.browser import BaseBrowser
+from .tools import id2url
+__all__ = ['NewspaperInrocksBrowser']
+
+class NewspaperInrocksBrowser(BaseBrowser):
+    PAGES = {
+             'http://www.20minutes.fr/article/?.*': ArticlePage,
+            }
+
+
+    def is_logged(self):
+        return False
+
+    def get_content(self, _id):
+        self.location(id2url(_id))
+        return self.page.article
--- a/weboob/backends/inrocks/pages/init.py
+++ b/weboob/backends/inrocks/pages/init.py
--- a/weboob/backends/inrocks/pages/article.py
+++ b/weboob/backends/inrocks/pages/article.py
@ -0,0 +1,43 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2011  Julien Hebert
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+from weboob.tools.parsers.lxmlparser import select, SelectElementException
+from .minutes20 import Minutes20Page, NoAuthorElement
+
+class ArticlePage(Minutes20Page):
+    def set_body(self):
+        self.element_body = select(self.main_div, "div.mna-body", 1)
+        element_tools = select(self.element_body, "div.mna-tools", 1)
+        try :
+            self.element_body.remove(element_tools)
+        except ValueError:
+            pass
+        try:
+            self.element_body.remove(
+                select(self.element_body, "div.mna-comment-call", 1))
+        except SelectElementException:
+            pass
+        except ValueError:
+            pass
+        try:
+            self.element_body.remove(self.get_element_author())
+        except NoAuthorElement:
+            pass
+        except ValueError:
+            pass
+        self.article.body = self.browser.parser.tostring(self.element_body)
--- a/weboob/backends/inrocks/tools.py
+++ b/weboob/backends/inrocks/tools.py
@ -0,0 +1,29 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2011  Julien Hebert
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+
+import re
+def id2url(_id):
+    regexp2 = re.compile("(\w+).(\w+).(.*$)")
+    match = regexp2.match(_id)
+    return 'http://www.20minutes.fr/%s/%s/%s' % (   match.group(1), 
+                                                    match.group(2), 
+                                                    match.group(3))
+def url2id(url):
+    regexp = re.compile("http://www.20minutes.fr/(\w+)/([0-9]+)/(.*$)")
+    match = regexp.match(url)
+    return '%s.%d.%s' % (match.group(1), int(match.group(2)), match.group(3))