From 85c7cf3093c64c637b4784293069e2f862a417c7 Mon Sep 17 00:00:00 2001 From: Juke Date: Wed, 9 Feb 2011 01:50:46 +0100 Subject: [PATCH] use id2url closes #480 --- weboob/backends/minutes20/backend.py | 21 ++++++++------ weboob/backends/minutes20/browser.py | 7 ++--- weboob/backends/minutes20/pages/minutes20.py | 17 ++++++++---- weboob/backends/minutes20/tools.py | 29 ++++++++++++++++++++ 4 files changed, 55 insertions(+), 19 deletions(-) create mode 100644 weboob/backends/minutes20/tools.py diff --git a/weboob/backends/minutes20/backend.py b/weboob/backends/minutes20/backend.py index 2bb0d6f2..f839245a 100644 --- a/weboob/backends/minutes20/backend.py +++ b/weboob/backends/minutes20/backend.py @@ -14,7 +14,7 @@ # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - +"backend for http://20minutes.fr" # python2.5 compatibility from __future__ import with_statement @@ -24,11 +24,13 @@ from weboob.tools.backend import BaseBackend from .browser import Newspaper20minutesBrowser from weboob.tools.newsfeed import Newsfeed - +from .tools import url2id __all__ = ['Newspaper20minutesBackend'] + + class Newspaper20minutesBackend(BaseBackend, ICapMessages): NAME = 'minutes20' MAINTAINER = 'Julien Hebert' @@ -40,18 +42,18 @@ class Newspaper20minutesBackend(BaseBackend, ICapMessages): # Value('password', label='Password', masked=True)) BROWSER = Newspaper20minutesBrowser - def get_thread(self, id): - if isinstance(id, Thread): - thread = id - id = thread.id + def get_thread(self, _id): + if isinstance(_id, Thread): + thread = _id + _id = thread.id else: thread = None with self.browser: - content = self.browser.get_content(id) + content = self.browser.get_content(_id) if not thread: - thread = Thread(id) + thread = Thread(_id) flags = Message.IS_HTML @@ -75,7 +77,8 @@ class Newspaper20minutesBackend(BaseBackend, ICapMessages): return thread def iter_threads(self): - for article in Newsfeed('http://www.20minutes.fr/rss/20minutes.xml').iter_entries(): + for article in Newsfeed('http://www.20minutes.fr/rss/20minutes.xml', + url2id).iter_entries(): thread = Thread(article.id) thread.title = article.title thread.date = article.datetime diff --git a/weboob/backends/minutes20/browser.py b/weboob/backends/minutes20/browser.py index a2eb20d4..006af2f5 100644 --- a/weboob/backends/minutes20/browser.py +++ b/weboob/backends/minutes20/browser.py @@ -18,10 +18,9 @@ from .pages.article import ArticlePage from .pages.minutes20 import Minutes20Page from weboob.tools.browser import BaseBrowser - +from .tools import id2url __all__ = ['Newspaper20minutesBrowser'] - class Newspaper20minutesBrowser(BaseBrowser): PAGES = { 'http://www.20minutes.fr/article/?.*': ArticlePage, @@ -33,6 +32,6 @@ class Newspaper20minutesBrowser(BaseBrowser): def is_logged(self): return False - def get_content(self, url): - self.location(url) + def get_content(self, _id): + self.location(id2url(_id)) return self.page.article diff --git a/weboob/backends/minutes20/pages/minutes20.py b/weboob/backends/minutes20/pages/minutes20.py index dfecf856..357df070 100644 --- a/weboob/backends/minutes20/pages/minutes20.py +++ b/weboob/backends/minutes20/pages/minutes20.py @@ -14,26 +14,28 @@ # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - from weboob.tools.browser import BasePage from weboob.tools.parsers.lxmlparser import select, SelectElementException - +from weboob.backends.minutes20.tools import url2id __all__ = ['Minutes20Page', 'Article', 'NoAuthorElement'] class NoAuthorElement(Exception): pass class Article(object): - def __init__(self): + def __init__(self, browser, _id): + self.browser = browser + self.id = _id self.title = u'' self.body = u'' + self.url = u'' self.author = None self.date = None class Minutes20Page(BasePage): main_div = NotImplementedError element_body = NotImplementedError - article = Article() + article = Article def set_author(self): self.article.author = self.get_element_author().text_content().strip() @@ -45,13 +47,16 @@ class Minutes20Page(BasePage): raise NoAuthorElement() def set_body(self): - self.article.body = self.browser.parser.tostring(select(self.main_div, "div.mna-body", 1)) + self.article.body = self.browser.parser.tostring(select(self.main_div, + "div.mna-body", + 1)) def on_loaded(self): - self.article = Article() + self.article = Article(self.browser, url2id(self.url) ) self.main_div = self.document.getroot() self.article.title = select(self.main_div, "h1", 1).text_content() + self.article.url = self.url try : self.set_author() except NoAuthorElement: diff --git a/weboob/backends/minutes20/tools.py b/weboob/backends/minutes20/tools.py new file mode 100644 index 00000000..10ccfd7c --- /dev/null +++ b/weboob/backends/minutes20/tools.py @@ -0,0 +1,29 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2011 Julien Hebert +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, version 3 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + + +import re +def id2url(_id): + regexp2 = re.compile("(\w+).(\w+).(.*$)") + match = regexp2.match(_id) + return 'http://www.20minutes.fr/%s/%s/%s' % ( match.group(1), + match.group(2), + match.group(3)) +def url2id(url): + regexp = re.compile("http://www.20minutes.fr/(\w+)/([0-9]+)/(.*$)") + match = regexp.match(url) + return '%s.%d.%s' % (match.group(1), int(match.group(2)), match.group(3))