First implementation of taz.de module

This commit is contained in:
Florent 2012-03-15 15:18:05 +01:00 committed by Romain Bignon
commit 642cfbb41c
7 changed files with 221 additions and 0 deletions

24
modules/taz/__init__.py Normal file
View file

@ -0,0 +1,24 @@
"NewspaperTazBackend init"
# -*- coding: utf-8 -*-
# Copyright(C) 2012 Florent Fourcot
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from .backend import NewspaperTazBackend
__all__ = ['NewspaperTazBackend']

39
modules/taz/backend.py Normal file
View file

@ -0,0 +1,39 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2012 Florent Fourcot
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
"backend for http://www.taz.de"
from weboob.capabilities.messages import ICapMessages
from weboob.tools.capabilities.messages.GenericBackend import GenericNewspaperBackend
from .browser import NewspaperTazBrowser
from .tools import rssid, url2id
class NewspaperTazBackend(GenericNewspaperBackend, ICapMessages):
MAINTAINER = 'Florent Fourcot'
EMAIL = 'weboob@flo.fourcot.fr'
VERSION = '0.c'
LICENSE = 'AGPLv3+'
STORAGE = {'seen': {}}
NAME = 'taz'
DESCRIPTION = u'Taz newspaper website'
BROWSER = NewspaperTazBrowser
RSSID = staticmethod(rssid)
URL2ID = staticmethod(url2id)
RSSSIZE = 30
RSS_FEED = "http://www.taz.de/!p3270;rss/"

43
modules/taz/browser.py Normal file
View file

@ -0,0 +1,43 @@
"browser for taz website"
# -*- coding: utf-8 -*-
# Copyright(C) 2012 Florent Fourcot
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from .pages.article import ArticlePage
from weboob.tools.browser import BaseBrowser
class NewspaperTazBrowser(BaseBrowser):
"NewspaperTazBrowser class"
PAGES = {
"http://www.taz.de/.*": ArticlePage,
}
def is_logged(self):
return False
def login(self):
pass
def fillobj(self, obj, fields):
pass
def get_content(self, _id):
"return page article content"
self.location(_id)
return self.page.get_article(_id)

View file

View file

@ -0,0 +1,51 @@
"ArticlePage object for Taz newspaper"
# -*- coding: utf-8 -*-
# Copyright(C) 2012 Florent Fourcot
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.tools.capabilities.messages.genericArticle import GenericNewsPage
class ArticlePage(GenericNewsPage):
"ArticlePage object for taz"
def on_loaded(self):
self.main_div = self.document.getroot()
self.element_title_selector = "title"
self.element_author_selector = ".content-author>a"
def get_body(self):
part = self.document.getroot().xpath('//p[@class="article"]')
total = ""
for p in part:
if p.text:
total += "<p>"
total += self.browser.parser.tostring(p)
total += "</p>"
return total
def get_title(self):
title = GenericNewsPage.get_title(self)
return title
def get_author(self):
author = self.document.getroot().xpath('//span[@class="author"]')
if author:
return author[0].text.replace('von ', '')
else:
return ""

32
modules/taz/test.py Normal file
View file

@ -0,0 +1,32 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2012 Florent Fourcot
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.tools.test import BackendTest
__all__ = ['TazTest']
class TazTest(BackendTest):
BACKEND = 'taz'
def test_new_messages(self):
for message in self.backend.iter_unread_messages():
pass

32
modules/taz/tools.py Normal file
View file

@ -0,0 +1,32 @@
"tools for taz module"
# -*- coding: utf-8 -*-
# Copyright(C) 2012 Florent Fourcot
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
import re
def url2id(url):
"return an id from an url"
regexp = re.compile(".*/!([0-9]+)/")
id = regexp.match(url).group(1)
return id
def rssid(entry):
return url2id(entry.link)