try to move to ICapMessages

This commit is contained in:
juke 2011-02-02 17:49:31 +01:00 committed by Romain Bignon
commit 717a579008
3 changed files with 55 additions and 10 deletions

View file

@ -18,7 +18,7 @@
import sys
from weboob.capabilities.content import ICapContent
from weboob.capabilities.messages import ICapMessages, Message
from weboob.tools.application.repl import ReplApplication
@ -30,9 +30,9 @@ class CleanBoob(ReplApplication):
VERSION = '0.1'
COPYRIGHT = 'Copyright(C) 2011-2012 Julien Hébert'
DESCRIPTION = "CleanBoob is a console application to extract article from website."
CAPS = ICapContent
CAPS = ICapMessages
def main(self, argv):
for backend, content in self.do('get_content', argv[1]):
for backend, content in self.do('get_thread', argv[1]):
self.format(content)
return 0

View file

@ -19,7 +19,7 @@
# python2.5 compatibility
from __future__ import with_statement
from weboob.capabilities.content import ICapContent, Content
from weboob.capabilities.messages import ICapMessages, Message, Thread
from weboob.tools.backend import BaseBackend
from .browser import Newspaper20minutesBrowser
@ -28,7 +28,7 @@ from .browser import Newspaper20minutesBrowser
__all__ = ['Newspaper20minutesBackend']
class Newspaper20minutesBackend(BaseBackend, ICapContent):
class Newspaper20minutesBackend(BaseBackend, ICapMessages):
NAME = 'minutes20'
MAINTAINER = 'Julien Hebert'
EMAIL = 'juke@free.fr'
@ -39,7 +39,7 @@ class Newspaper20minutesBackend(BaseBackend, ICapContent):
# Value('password', label='Password', masked=True))
BROWSER = Newspaper20minutesBrowser
def get_content(self, url):
"""def get_content(self, url):
if isinstance(url, basestring):
content = Content(url)
else:
@ -58,3 +58,32 @@ class Newspaper20minutesBackend(BaseBackend, ICapContent):
def push_content(self, content, message = None):
raise NotImplementedError()
"""
def get_thread(self, id):
if isinstance(id, Thread):
thread = id
id = thread.id
else:
thread = None
with self.browser:
content = self.browser.get_content(id)
if not thread:
thread = Thread(id)
flags = Message.IS_HTML
if not thread.id in self.storage.get('seen', default={}):
flags |= Message.IS_UNREAD
thread.title = content.title
if not thread.date:
thread.date = content.date
#thread.root = Message(thread=thread, id=0, title=content.title, sender=content.author, receivers=None, date=thread.date, parent=None, content=content.body, signature=None, children = [], flags=flags)
thread.root = Message(thread=thread, id=0, title=content.title)
return thread

View file

@ -19,9 +19,27 @@
from weboob.tools.browser import BasePage
from weboob.tools.parsers.lxmlparser import select
class Article(object):
def __init__(self):
self.title = u''
self.body = u''
self.author =None
self.date = None
class ArticlePage(BasePage):
def on_loaded(self):
self.article = None
self.set_article()
def set_article(self):
self.article = Article()
#elp(self.get_title().encode('iso8859-1'))
self.article.title = self.get_title()
self.article.body = self.get_article()
def get_title(self):
return select(self.document.getroot(), "h1", 1).text_content()
return self.browser.parser.tostring(select(self.document.getroot(), "h1", 1))
def get_article(self):
main_div = self.document.getroot()
@ -32,6 +50,4 @@ class ArticlePage(BasePage):
return txt_article.replace(txt_to_remove, '', 1).replace( txt_to_remove2, '', 1)
def get_content(self):
title = self.get_title()
content = self.get_article()
return [title, content]
return self.article