First implementation of « presseurop » module (europeans news aggregator).

This commit is contained in:
Florent 2012-02-17 17:24:09 +01:00 committed by Romain Bignon
commit e502bb2f69
7 changed files with 234 additions and 0 deletions

View file

@ -0,0 +1,24 @@
"NewspaperEcransBackend init"
# -*- coding: utf-8 -*-
# Copyright(C) 2011 Julien Hebert
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from .backend import NewspaperPresseuropBackend
__all__ = ['NewspaperPresseuropBackend']

View file

@ -0,0 +1,53 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2011 Julien Hebert
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
"backend for http://www.presseurop.eu"
from weboob.capabilities.messages import ICapMessages, Thread
from weboob.tools.capabilities.messages.GenericBackend import GenericNewspaperBackend
from weboob.tools.backend import BackendConfig
from weboob.tools.value import Value
from .browser import NewspaperPresseuropBrowser
from .tools import rssid
from weboob.tools.newsfeed import Newsfeed
class NewspaperPresseuropBackend(GenericNewspaperBackend, ICapMessages):
MAINTAINER = 'Florent Fourcot'
EMAIL = 'weboob@flo.fourcot.fr'
VERSION = '0.a'
LICENSE = 'AGPLv3+'
STORAGE = {'seen': {}}
NAME = 'presseurop'
DESCRIPTION = u'Presseurop website'
BROWSER = NewspaperPresseuropBrowser
RSSID = rssid
CONFIG = BackendConfig(Value('lang', label='Lang of articles',
choices={'fr': 'fr', 'de': 'de', 'en': 'en', 'cs': 'cs', 'es' : 'es', 'it' : 'it', 'nl' : 'nl', 'pl' : 'pl', 'pt' : 'pt', 'ro' : 'ro'}, default='fr'))
def __init__(self, *args, **kwargs):
GenericNewspaperBackend.__init__(self, *args, **kwargs)
self.RSS_FEED = 'http://www.presseurop.eu/%s/rss.xml' % (self.config['lang'].get())
def iter_threads(self):
for article in Newsfeed(self.RSS_FEED, self.RSSID).iter_entries():
thread = Thread(article.link)
thread.title = article.title
thread.date = article.datetime
yield(thread)

View file

@ -0,0 +1,45 @@
"browser for ecrans website"
# -*- coding: utf-8 -*-
# Copyright(C) 2011 Julien Hebert
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from .pages.article import ArticlePage
from weboob.tools.browser import BaseBrowser
class NewspaperPresseuropBrowser(BaseBrowser):
"NewspaperPresseuropBrowser class"
PAGES = {
"http://www.presseurop.eu/.*": ArticlePage,
}
def is_logged(self):
return False
def login(self):
pass
def fillobj(self, obj, fields):
pass
def get_content(self, _id):
"return page article content"
print _id
self.location(_id)
return self.page.get_article(_id)

View file

View file

@ -0,0 +1,39 @@
"ArticlePage object for inrocks"
# -*- coding: utf-8 -*-
# Copyright(C) 2011 Julien Hebert
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.tools.capabilities.messages.genericArticle import GenericNewsPage
class ArticlePage(GenericNewsPage):
"ArticlePage object for presseurop"
def on_loaded(self):
self.main_div = self.document.getroot()
self.element_title_selector = "title"
self.element_author_selector = "div.content-author>a"
self.element_body_selector = "div.block"
def get_body(self):
element_body = self.get_element_body()
return self.parser.tostring(element_body)
def get_title(self):
title = GenericNewsPage.get_title(self)
title = title.split('|')[0]
return title

View file

@ -0,0 +1,32 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2010-2011 Romain Bignon
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.tools.test import BackendTest
__all__ = ['PresseuropTest']
class PresseuropTest(BackendTest):
BACKEND = 'presseurop'
def test_new_messages(self):
for message in self.backend.iter_unread_messages():
pass

View file

@ -0,0 +1,41 @@
"tools for lefigaro backend"
# -*- coding: utf-8 -*-
# Copyright(C) 2011 Julien Hebert
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
import re
def id2url(_id):
"return an url from an id"
regexp2 = re.compile("(\w+).([0-9]+).(.*$)")
match = regexp2.match(_id)
if match:
return 'http://www.20minutes.fr/%s/%s/%s' % ( match.group(1),
match.group(2),
match.group(3))
else:
raise ValueError("id doesn't match")
def url2id(url):
"return an id from an url"
regexp = re.compile(".*/([0-9]+)-.*")
id = regexp.match(url).group(1)
return id
def rssid(self, entry):
return url2id(entry.link)