From 642cfbb41c405bcee19ebeba24c5ff2dbd0b373f Mon Sep 17 00:00:00 2001 From: Florent Date: Thu, 15 Mar 2012 15:18:05 +0100 Subject: [PATCH] First implementation of taz.de module --- modules/taz/__init__.py | 24 +++++++++++++++++ modules/taz/backend.py | 39 +++++++++++++++++++++++++++ modules/taz/browser.py | 43 +++++++++++++++++++++++++++++ modules/taz/pages/__init__.py | 0 modules/taz/pages/article.py | 51 +++++++++++++++++++++++++++++++++++ modules/taz/test.py | 32 ++++++++++++++++++++++ modules/taz/tools.py | 32 ++++++++++++++++++++++ 7 files changed, 221 insertions(+) create mode 100644 modules/taz/__init__.py create mode 100644 modules/taz/backend.py create mode 100644 modules/taz/browser.py create mode 100644 modules/taz/pages/__init__.py create mode 100644 modules/taz/pages/article.py create mode 100644 modules/taz/test.py create mode 100644 modules/taz/tools.py diff --git a/modules/taz/__init__.py b/modules/taz/__init__.py new file mode 100644 index 00000000..e2141f51 --- /dev/null +++ b/modules/taz/__init__.py @@ -0,0 +1,24 @@ +"NewspaperTazBackend init" +# -*- coding: utf-8 -*- + +# Copyright(C) 2012 Florent Fourcot +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + +from .backend import NewspaperTazBackend + + +__all__ = ['NewspaperTazBackend'] diff --git a/modules/taz/backend.py b/modules/taz/backend.py new file mode 100644 index 00000000..8781237a --- /dev/null +++ b/modules/taz/backend.py @@ -0,0 +1,39 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2012 Florent Fourcot +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . +"backend for http://www.taz.de" + +from weboob.capabilities.messages import ICapMessages +from weboob.tools.capabilities.messages.GenericBackend import GenericNewspaperBackend +from .browser import NewspaperTazBrowser +from .tools import rssid, url2id + + +class NewspaperTazBackend(GenericNewspaperBackend, ICapMessages): + MAINTAINER = 'Florent Fourcot' + EMAIL = 'weboob@flo.fourcot.fr' + VERSION = '0.c' + LICENSE = 'AGPLv3+' + STORAGE = {'seen': {}} + NAME = 'taz' + DESCRIPTION = u'Taz newspaper website' + BROWSER = NewspaperTazBrowser + RSSID = staticmethod(rssid) + URL2ID = staticmethod(url2id) + RSSSIZE = 30 + RSS_FEED = "http://www.taz.de/!p3270;rss/" diff --git a/modules/taz/browser.py b/modules/taz/browser.py new file mode 100644 index 00000000..b7bdb1e7 --- /dev/null +++ b/modules/taz/browser.py @@ -0,0 +1,43 @@ +"browser for taz website" +# -*- coding: utf-8 -*- + +# Copyright(C) 2012 Florent Fourcot +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + +from .pages.article import ArticlePage +from weboob.tools.browser import BaseBrowser + + +class NewspaperTazBrowser(BaseBrowser): + "NewspaperTazBrowser class" + PAGES = { + "http://www.taz.de/.*": ArticlePage, + } + + def is_logged(self): + return False + + def login(self): + pass + + def fillobj(self, obj, fields): + pass + + def get_content(self, _id): + "return page article content" + self.location(_id) + return self.page.get_article(_id) diff --git a/modules/taz/pages/__init__.py b/modules/taz/pages/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/modules/taz/pages/article.py b/modules/taz/pages/article.py new file mode 100644 index 00000000..2304819d --- /dev/null +++ b/modules/taz/pages/article.py @@ -0,0 +1,51 @@ +"ArticlePage object for Taz newspaper" +# -*- coding: utf-8 -*- + +# Copyright(C) 2012 Florent Fourcot +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + +from weboob.tools.capabilities.messages.genericArticle import GenericNewsPage + + +class ArticlePage(GenericNewsPage): + "ArticlePage object for taz" + + def on_loaded(self): + self.main_div = self.document.getroot() + self.element_title_selector = "title" + self.element_author_selector = ".content-author>a" + + def get_body(self): + part = self.document.getroot().xpath('//p[@class="article"]') + total = "" + for p in part: + if p.text: + total += "

" + total += self.browser.parser.tostring(p) + total += "

" + return total + + def get_title(self): + title = GenericNewsPage.get_title(self) + return title + + def get_author(self): + author = self.document.getroot().xpath('//span[@class="author"]') + if author: + return author[0].text.replace('von ', '') + else: + return "" diff --git a/modules/taz/test.py b/modules/taz/test.py new file mode 100644 index 00000000..7d4faf54 --- /dev/null +++ b/modules/taz/test.py @@ -0,0 +1,32 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2012 Florent Fourcot +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +from weboob.tools.test import BackendTest + + +__all__ = ['TazTest'] + + +class TazTest(BackendTest): + BACKEND = 'taz' + + def test_new_messages(self): + for message in self.backend.iter_unread_messages(): + pass diff --git a/modules/taz/tools.py b/modules/taz/tools.py new file mode 100644 index 00000000..946e74e5 --- /dev/null +++ b/modules/taz/tools.py @@ -0,0 +1,32 @@ +"tools for taz module" +# -*- coding: utf-8 -*- + +# Copyright(C) 2012 Florent Fourcot +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +import re + + +def url2id(url): + "return an id from an url" + regexp = re.compile(".*/!([0-9]+)/") + id = regexp.match(url).group(1) + return id + + +def rssid(entry): + return url2id(entry.link)