move GenericBackend and GenericArticle modules into weboob.tools.capabilities.messages

2011-05-11 11:54:33 +02:00 · 2011-05-11 11:54:33 +02:00 · 72850a0a79
commit 72850a0a79
parent e0cb6d6dfe
18 changed files with 22 additions and 312 deletions
--- a/weboob/tools/capabilities/messages/GenericBackend.py
+++ b/weboob/tools/capabilities/messages/GenericBackend.py
@ -0,0 +1,99 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2011  Julien Hebert
+#
+# This file is part of weboob.
+#
+# weboob is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# weboob is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with weboob. If not, see <http://www.gnu.org/licenses/>.
+
+# python2.5 compatibility
+from __future__ import with_statement
+
+from weboob.capabilities.messages import ICapMessages, Message, Thread
+from weboob.tools.backend import BaseBackend
+from weboob.tools.newsfeed import Newsfeed
+
+class GenericNewspaperBackend(BaseBackend, ICapMessages):
+    "GenericNewspaperBackend class"
+    MAINTAINER = 'Julien Hebert'
+    EMAIL = 'juke@free.fr'
+    VERSION = '0.9'
+    LICENSE = 'AGPLv3+'
+    STORAGE = {'seen': {}}
+    RSS_FEED = None
+    RSSID = None
+
+    def get_thread(self, _id):
+        if isinstance(_id, Thread):
+            thread = _id
+            _id = thread.id
+        else:
+            thread = None
+
+        with self.browser:
+            content = self.browser.get_content(_id)
+
+        if not thread:
+            thread = Thread(_id)
+
+        flags = Message.IS_HTML
+        if not thread.id in self.storage.get('seen', default={}):
+            flags |= Message.IS_UNREAD
+        thread.title = content.title
+        if not thread.date:
+            thread.date = content.date
+
+        thread.root = Message(
+            thread=thread,
+            id=0,
+            title=content.title,
+            sender=content.author,
+            receivers=None,
+            date=thread.date,
+            parent=None,
+            content=content.body,
+            signature='URL: %s' % content.url,
+            flags=flags,
+            children= [])
+        return thread
+
+    def iter_threads(self):
+        for article in Newsfeed(self.RSS_FEED, GenericNewspaperBackend.RSSID).iter_entries():
+            thread = Thread(article.id)
+            thread.title =  article.title
+            thread.date = article.datetime
+            yield(thread)
+
+    def fill_thread(self, thread):
+        "fill the thread"
+        return self.get_thread(thread)
+
+    def iter_unread_messages(self, thread=None):
+        for thread in self.iter_threads():
+            self.fill_thread(thread)
+            for msg in thread.iter_all_messages():
+                if msg.flags & msg.IS_UNREAD:
+                    yield msg
+
+    def set_message_read(self, message):
+        self.storage.set(
+            'seen',
+            message.thread.id,
+            'comments',
+            self.storage.get(
+                'seen',
+                message.thread.id,
+                'comments',
+                default=[]) + [message.id])
+        self.storage.save()
--- a/weboob/tools/capabilities/messages/init.py
+++ b/weboob/tools/capabilities/messages/init.py
--- a/weboob/tools/capabilities/messages/genericArticle.py
+++ b/weboob/tools/capabilities/messages/genericArticle.py
@ -0,0 +1,142 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2011  Julien Hebert
+#
+# This file is part of weboob.
+#
+# weboob is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# weboob is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with weboob. If not, see <http://www.gnu.org/licenses/>.
+
+from weboob.tools.browser import BasePage
+from weboob.tools.browser import BrokenPageError
+from lxml.etree import Comment
+
+
+def try_remove(parser, base_element, selector):
+    try :
+        base_element.remove(parser.select(base_element, selector, 1 ))
+    except (BrokenPageError, ValueError):
+        pass
+
+
+def try_drop_tree(parser, base_element, selector):
+    try:
+        parser.select(base_element, selector, 1).drop_tree()
+    except BrokenPageError:
+        pass
+
+def remove_from_selector_list(parser, base_element, selector_list):
+    for selector in selector_list:
+        base_element.remove(parser.select(base_element, selector, 1))
+
+
+def try_remove_from_selector_list(parser, base_element, selector_list):
+    for selector in selector_list:
+        try_remove(parser, base_element, selector)
+
+def drop_comments(base_element):
+    for comment in base_element.getiterator(Comment):
+        comment.drop_tree()
+
+
+
+class NoAuthorElement(BrokenPageError):
+    pass
+
+class NoBodyElement(BrokenPageError):
+    pass
+
+class NoTitleException(BrokenPageError):
+    pass
+
+class NoneMainDiv(AttributeError):
+    pass
+
+class Article(object):
+    author = u''
+    title = u''
+
+    def __init__(self, browser, _id):
+        self.browser = browser
+        self.id = _id
+        self.body = u''
+        self.url = u''
+        self.date = None
+
+class GenericNewsPage(BasePage):
+    __element_body = NotImplementedError
+    __article = Article
+    element_title_selector  = NotImplementedError
+    main_div = NotImplementedError
+    element_body_selector = NotImplementedError
+    element_author_selector = NotImplementedError
+
+    def get_body(self):
+        return self.parser.tostring(self.get_element_body())
+
+    def get_author(self):
+        try:
+            return self.get_element_author().text_content().strip()
+        except (NoAuthorElement, NoneMainDiv):
+            #TODO: Mettre un warning
+            return self.__article.author
+
+    def get_title(self):
+        try :
+            return self.parser.select(
+                self.main_div,
+                self.element_title_selector,
+                1).text_content().strip()
+        except AttributeError:
+            if self.main_div == None:
+                #TODO: Mettre un warning
+                return self.__article.title
+            else:
+                raise
+        except BrokenPageError:
+            try :
+                self.element_title_selector = "h1"
+                return self.get_title()
+            except BrokenPageError:
+                raise NoTitleException("no title on %s" % (self.browser))
+
+    def get_element_body(self):
+        try :
+            return self.parser.select(self.main_div, self.element_body_selector, 1)
+        except BrokenPageError:
+            raise NoBodyElement("no body on %s" % (self.browser))
+        except AttributeError:
+            if self.main_div == None:
+                raise NoneMainDiv("main_div is none on %s" % (self.browser))
+            else:
+                raise
+
+    def get_element_author(self):
+        try:
+            return self.parser.select(self.main_div, self.element_author_selector, 1)
+        except BrokenPageError:
+            raise NoAuthorElement()
+        except AttributeError:
+            if self.main_div == None:
+                raise NoneMainDiv("main_div is none on %s" % (self.browser))
+            else:
+                raise
+
+    def get_article(self, _id):
+        __article = Article(self.browser, _id)
+        __article.author = self.get_author()
+        __article.title  = self.get_title()
+        __article.url    = self.url
+        __article.body   = self.get_body()
+
+        return __article