New module liberation

This commit is contained in:
Florent 2013-10-30 14:20:42 +01:00
commit 3d2ec0afe0
7 changed files with 249 additions and 0 deletions

View file

@ -0,0 +1,24 @@
"NewspaperLibeBackend init"
# -*- coding: utf-8 -*-
# Copyright(C) 2013 Florent Fourcot
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from .backend import NewspaperLibeBackend
__all__ = ['NewspaperLibeBackend']

View file

@ -0,0 +1,67 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2013 Florent Fourcot
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
"backend for http://liberation.fr"
from weboob.tools.newsfeed import Newsfeed
from weboob.capabilities.messages import ICapMessages, Thread
from weboob.tools.capabilities.messages.GenericBackend import GenericNewspaperBackend
from weboob.tools.backend import BackendConfig
from weboob.tools.value import Value
from .browser import NewspaperLibeBrowser
from .tools import rssid, url2id
class NewspaperLibeBackend(GenericNewspaperBackend, ICapMessages):
MAINTAINER = u'Florent Fourcot'
EMAIL = 'weboob@flo.fourcot.fr'
VERSION = '0.h'
LICENSE = 'AGPLv3+'
STORAGE = {'seen': {}}
NAME = 'liberation'
DESCRIPTION = u'Libération newspaper website'
BROWSER = NewspaperLibeBrowser
RSSID = staticmethod(rssid)
URL2ID = staticmethod(url2id)
RSSSIZE = 30
CONFIG = BackendConfig(Value('feed', label='RSS feed',
choices={'9': u'A la une sur Libération',
'10': u'Monde',
'11': u'Politiques',
'12': u'Société',
'13': u'Économie',
'14': u'Sports',
'17': u'Labo: audio, vidéo, diapos, podcasts',
'18': u'Rebonds',
'44': u'Les chroniques de Libération',
'53': u'Écrans',
'54': u'Next',
'58': u'Cinéma'
}
))
def __init__(self, *args, **kwargs):
GenericNewspaperBackend.__init__(self, *args, **kwargs)
self.RSS_FEED = "http://www.liberation.fr/rss/%s" % self.config['feed'].get()
def iter_threads(self):
for article in Newsfeed(self.RSS_FEED, self.RSSID).iter_entries():
thread = Thread(article.id)
thread.title = article.title
thread.date = article.datetime
yield(thread)

View file

@ -0,0 +1,40 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2013 Florent Fourcot
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from .pages.article import ArticlePage
from weboob.tools.browser import BaseBrowser
class NewspaperLibeBrowser(BaseBrowser):
"NewspaperLibeBrowser class"
PAGES = {"http://.*liberation.fr/.*": ArticlePage}
def is_logged(self):
return False
def login(self):
pass
def fillobj(self, obj, fields):
pass
def get_content(self, _id):
"return page article content"
self.location(_id)
return self.page.get_article(_id)

View file

View file

@ -0,0 +1,58 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2013 Florent Fourcot
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.tools.browser import BrokenPageError
from weboob.tools.capabilities.messages.genericArticle import GenericNewsPage,\
try_drop_tree, clean_relativ_urls, NoBodyElement, NoAuthorElement,\
NoneMainDiv
class ArticlePage(GenericNewsPage):
"ArticlePage object for Libe"
def on_loaded(self):
self.main_div = self.document.getroot()
self.element_title_selector = "title"
self.element_author_selector = "span.author"
self.element_body_selector = "div.article-body"
def get_body(self):
if 'feuilletons.blogs.liberation.fr' in self.url:
self.element_body_selector = "div.entry-content"
try:
return self.parser.tostring(self.get_element_body())
except NoBodyElement:
meta = self.document.xpath('//meta[@name="description"]')[0]
txt = meta.attrib['content']
return txt
def get_title(self):
title = GenericNewsPage.get_title(self)
return title.replace(u' - Libération', '')
def get_author(self):
try:
author = self.get_element_author().text_content().strip()
if author.startswith('Par '):
return author.split('Par ', 1)[1]
else:
return author
except (NoAuthorElement, NoneMainDiv):
#TODO: Mettre un warning
return None

View file

@ -0,0 +1,32 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2013 Florent Fourcot
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.tools.test import BackendTest
__all__ = ['LiberationTest']
class LiberationTest(BackendTest):
BACKEND = 'liberation'
def test_new_messages(self):
for message in self.backend.iter_unread_messages():
pass

View file

@ -0,0 +1,28 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2013 Florent Fourcot
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
import re
def url2id(url):
return url.split('0Dxtor')[0].split('0I')[-1]
def rssid(entry):
return entry.link