diff --git a/weboob/applications/cleanboob/cleanboob.py b/weboob/applications/cleanboob/cleanboob.py index e74178a4..a1906324 100644 --- a/weboob/applications/cleanboob/cleanboob.py +++ b/weboob/applications/cleanboob/cleanboob.py @@ -18,7 +18,7 @@ import sys -from weboob.capabilities.content import ICapContent +from weboob.capabilities.messages import ICapMessages, Message from weboob.tools.application.repl import ReplApplication @@ -30,9 +30,9 @@ class CleanBoob(ReplApplication): VERSION = '0.1' COPYRIGHT = 'Copyright(C) 2011-2012 Julien Hébert' DESCRIPTION = "CleanBoob is a console application to extract article from website." - CAPS = ICapContent + CAPS = ICapMessages def main(self, argv): - for backend, content in self.do('get_content', argv[1]): + for backend, content in self.do('get_thread', argv[1]): self.format(content) return 0 diff --git a/weboob/backends/minutes20/backend.py b/weboob/backends/minutes20/backend.py index 8d8db1f5..3d32cd76 100644 --- a/weboob/backends/minutes20/backend.py +++ b/weboob/backends/minutes20/backend.py @@ -19,7 +19,7 @@ # python2.5 compatibility from __future__ import with_statement -from weboob.capabilities.content import ICapContent, Content +from weboob.capabilities.messages import ICapMessages, Message, Thread from weboob.tools.backend import BaseBackend from .browser import Newspaper20minutesBrowser @@ -28,7 +28,7 @@ from .browser import Newspaper20minutesBrowser __all__ = ['Newspaper20minutesBackend'] -class Newspaper20minutesBackend(BaseBackend, ICapContent): +class Newspaper20minutesBackend(BaseBackend, ICapMessages): NAME = 'minutes20' MAINTAINER = 'Julien Hebert' EMAIL = 'juke@free.fr' @@ -39,7 +39,7 @@ class Newspaper20minutesBackend(BaseBackend, ICapContent): # Value('password', label='Password', masked=True)) BROWSER = Newspaper20minutesBrowser - def get_content(self, url): + """def get_content(self, url): if isinstance(url, basestring): content = Content(url) else: @@ -58,3 +58,32 @@ class Newspaper20minutesBackend(BaseBackend, ICapContent): def push_content(self, content, message = None): raise NotImplementedError() + """ + + def get_thread(self, id): + if isinstance(id, Thread): + thread = id + id = thread.id + else: + thread = None + + with self.browser: + content = self.browser.get_content(id) + + if not thread: + thread = Thread(id) + + flags = Message.IS_HTML + if not thread.id in self.storage.get('seen', default={}): + flags |= Message.IS_UNREAD + + + thread.title = content.title + if not thread.date: + thread.date = content.date + + #thread.root = Message(thread=thread, id=0, title=content.title, sender=content.author, receivers=None, date=thread.date, parent=None, content=content.body, signature=None, children = [], flags=flags) + + thread.root = Message(thread=thread, id=0, title=content.title) + return thread + diff --git a/weboob/backends/minutes20/pages/article.py b/weboob/backends/minutes20/pages/article.py index d63a3cb6..53a75547 100644 --- a/weboob/backends/minutes20/pages/article.py +++ b/weboob/backends/minutes20/pages/article.py @@ -19,9 +19,27 @@ from weboob.tools.browser import BasePage from weboob.tools.parsers.lxmlparser import select +class Article(object): + def __init__(self): + self.title = u'' + self.body = u'' + self.author =None + self.date = None + class ArticlePage(BasePage): + def on_loaded(self): + self.article = None + self.set_article() + + def set_article(self): + self.article = Article() + #elp(self.get_title().encode('iso8859-1')) + self.article.title = self.get_title() + self.article.body = self.get_article() + + def get_title(self): - return select(self.document.getroot(), "h1", 1).text_content() + return self.browser.parser.tostring(select(self.document.getroot(), "h1", 1)) def get_article(self): main_div = self.document.getroot() @@ -32,6 +50,4 @@ class ArticlePage(BasePage): return txt_article.replace(txt_to_remove, '', 1).replace( txt_to_remove2, '', 1) def get_content(self): - title = self.get_title() - content = self.get_article() - return [title, content] + return self.article