new backend ecrans (closes #591)

This commit is contained in:
Juke 2011-03-25 23:31:43 +01:00
commit 4515541639
11 changed files with 364 additions and 1 deletions

View file

@ -0,0 +1,97 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2011 Julien Hebert
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, version 3 of the License.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
# python2.5 compatibility
from __future__ import with_statement
from weboob.capabilities.messages import ICapMessages, Message, Thread
from weboob.tools.backend import BaseBackend
from weboob.tools.newsfeed import Newsfeed
from .tools import rssid
class GenericNewspaperBackend(BaseBackend, ICapMessages):
"GenericNewspaperBackend class"
MAINTAINER = 'Julien Hebert'
EMAIL = 'juke@free.fr'
VERSION = '0.7'
LICENSE = 'GPLv3'
STORAGE = {'seen': {}}
RSS_FEED = None
def get_thread(self, _id):
if isinstance(_id, Thread):
thread = _id
_id = thread.id
else:
thread = None
with self.browser:
content = self.browser.get_content(_id)
if not thread:
thread = Thread(_id)
flags = Message.IS_HTML
if not thread.id in self.storage.get('seen', default={}):
flags |= Message.IS_UNREAD
thread.title = content.title
if not thread.date:
thread.date = content.date
thread.root = Message(
thread=thread,
id=0,
title=content.title,
sender=content.author,
receivers=None,
date=thread.date,
parent=None,
content=content.body,
signature='URL: %s' % content.url,
flags=flags,
children= [])
return thread
def iter_threads(self):
for article in Newsfeed(self.RSS_FEED, rssid).iter_entries():
thread = Thread(article.id)
thread.title = article.title
thread.date = article.datetime
yield(thread)
def fill_thread(self, thread):
"fill the thread"
return self.get_thread(thread)
def iter_unread_messages(self, thread=None):
for thread in self.iter_threads():
self.fill_thread(thread)
for msg in thread.iter_all_messages():
if msg.flags & msg.IS_UNREAD:
yield msg
def set_message_read(self, message):
self.storage.set(
'seen',
message.thread.id,
'comments',
self.storage.get(
'seen',
message.thread.id,
'comments',
default=[]) + [message.id])
self.storage.save()

View file

@ -0,0 +1,20 @@
"NewspaperEcransBackend init"
# -*- coding: utf-8 -*-
# Copyright(C) 2011 Julien Hebert
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, version 3 of the License.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
from .backend import NewspaperEcransBackend
__all__ = ['NewspaperEcransBackend']

View file

@ -0,0 +1,38 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2011 Julien Hebert
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, version 3 of the License.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
"backend for http://www.ecrans.fr"
# python2.5 compatibility
from __future__ import with_statement
from weboob.capabilities.messages import ICapMessages
from .browser import NewspaperEcransBrowser
from .GenericBackend import GenericNewspaperBackend
class NewspaperEcransBackend(GenericNewspaperBackend, ICapMessages):
"NewspaperEcransBackend class"
MAINTAINER = 'Julien Hebert'
EMAIL = 'juke@free.fr'
VERSION = '0.7'
LICENSE = 'GPLv3'
STORAGE = {'seen': {}}
NAME = 'ecrans'
DESCRIPTION = u'Ecrans French news website'
BROWSER = NewspaperEcransBrowser
RSS_FEED = 'http://www.ecrans.fr/spip.php?page=backend'

View file

@ -0,0 +1,42 @@
"browser for ecrans website"
# -*- coding: utf-8 -*-
# Copyright(C) 2011 Julien Hebert
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, version 3 of the License.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
from .pages.article import ArticlePage
from weboob.tools.browser import BaseBrowser
class NewspaperEcransBrowser(BaseBrowser):
"NewspaperEcransBrowser class"
PAGES = {
"http://www.ecrans.fr/.*": ArticlePage,
}
def is_logged(self):
return False
def login(self):
pass
def fillobj(self, obj, fields):
pass
def get_content(self, _id):
"return page article content"
self.location(_id)
return self.page.get_article(_id)

View file

View file

@ -0,0 +1,35 @@
"ArticlePage object for inrocks"
# -*- coding: utf-8 -*-
# Copyright(C) 2011 Julien Hebert
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, version 3 of the License.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
from weboob.tools.genericArticle import GenericNewsPage, remove_from_selector_list, try_remove_from_selector_list, try_drop_tree
class ArticlePage(GenericNewsPage):
"ArticlePage object for inrocks"
def on_loaded(self):
self.main_div = self.document.getroot()
self.element_title_selector = "h3"
self.element_author_selector = "p.auteur>a"
self.element_body_selector = "div.bloc_article_01"
def get_body(self):
element_body = self.get_element_body()
remove_from_selector_list(element_body, [self.element_title_selector, "p.auteur", "h4", "h4"])
try_remove_from_selector_list(element_body, ["p.tag", "div.alire"])
try_drop_tree(element_body, "script")
return self.browser.parser.tostring(element_body)

View file

@ -0,0 +1,33 @@
"ArticlePage object for inrocks"
# -*- coding: utf-8 -*-
# Copyright(C) 2011 Julien Hebert
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, version 3 of the License.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
from weboob.tools.genericArticle import GenericNewsPage
class FlashActuPage(GenericNewsPage):
"ArticlePage object for inrocks"
def on_loaded(self):
self.main_div = self.document.getroot()
self.element_title_selector = "h1"
self.element_author_selector = "div.name>span"
self.element_body_selector = "h2"
def get_body(self):
element_body = self.get_element_body()
element_body.tag = "div"
return self.browser.parser.tostring(element_body)

View file

@ -0,0 +1,27 @@
"ArticlePage object for minutes20"
# -*- coding: utf-8 -*-
# Copyright(C) 2011 Julien Hebert
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, version 3 of the License.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
from weboob.tools.genericArticle import GenericNewsPage
class SimplePage(GenericNewsPage):
"ArticlePage object for minutes20"
def on_loaded(self):
self.main_div = self.document.getroot()
self.element_author_selector = "div.mna-signature"
self.element_body_selector = "#article"

View file

@ -0,0 +1,30 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2010-2011 Romain Bignon
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, version 3 of the License.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
from weboob.tools.test import BackendTest
__all__ = ['EcransTest']
class EcransTest(BackendTest):
BACKEND = 'ecrans'
def test_new_messages(self):
for message in self.backend.iter_unread_messages():
pass

View file

@ -0,0 +1,37 @@
"tools for lefigaro backend"
# -*- coding: utf-8 -*-
# Copyright(C) 2011 Julien Hebert
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, version 3 of the License.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
import re
def id2url(_id):
"return an url from an id"
regexp2 = re.compile("(\w+).([0-9]+).(.*$)")
match = regexp2.match(_id)
if match:
return 'http://www.20minutes.fr/%s/%s/%s' % ( match.group(1),
match.group(2),
match.group(3))
else:
raise ValueError("id doesn't match")
def url2id(url):
"return an id from an url"
return url
def rssid(entry):
return url2id(entry.id)

View file

@ -101,7 +101,11 @@ class GenericNewsPage(BasePage):
else:
raise
except SelectElementException:
raise NoTitleException("no title on %s" % (self.browser))
try :
self.element_title_selector = "h1"
return self.get_title()
except SelectElementException:
raise NoTitleException("no title on %s" % (self.browser))
def get_element_body(self):
try :