From 4b10f6705825d10187e68341c2cbd5155069d837 Mon Sep 17 00:00:00 2001 From: juke Date: Wed, 2 Mar 2011 20:26:29 +0100 Subject: [PATCH] add new categories --- weboob/backends/lefigaro/browser.py | 10 ++++++++++ weboob/backends/minutes20/pages/simple.py | 2 +- weboob/tools/genericArticle.py | 14 ++++++++++---- 3 files changed, 21 insertions(+), 5 deletions(-) diff --git a/weboob/backends/lefigaro/browser.py b/weboob/backends/lefigaro/browser.py index 566a7e9f..ecc7923f 100644 --- a/weboob/backends/lefigaro/browser.py +++ b/weboob/backends/lefigaro/browser.py @@ -29,6 +29,8 @@ class NewspaperFigaroBrowser(BaseBrowser): "http://www.lefigaro.fr/flash-sport/(\d{4})/(\d{2})/(\d{2})/(.*$)": FlashActuPage, "http://www.lefigaro.fr/politique/(\d{4})/(\d{2})/(\d{2})/(.*$)": ArticlePage, "http://www.lefigaro.fr/sciences/(\d{4})/(\d{2})/(\d{2})/(.*$)": ArticlePage, + "http://www.lefigaro.fr/sport/(\d{4})/(\d{2})/(\d{2})/(.*$)": ArticlePage, + "http://www.lefigaro.fr/sport-business/(\d{4})/(\d{2})/(\d{2})/(.*$)": ArticlePage, "http://www.lefigaro.fr/football-ligue-1-et-2/(\d{4})/(\d{2})/(\d{2})/(.*$)": ArticlePage, "http://www.lefigaro.fr/international/(\d{4})/(\d{2})/(\d{2})/(.*$)": ArticlePage, "http://www.lefigaro.fr/livres/(\d{4})/(\d{2})/(\d{2})/(.*$)": ArticlePage, @@ -59,6 +61,14 @@ class NewspaperFigaroBrowser(BaseBrowser): "http://www.lefigaro.fr/sciences/(\d{4})/(\d{2})/(\d{2})/(.*$)": ArticlePage, "http://www.lefigaro.fr/assurance/(\d{4})/(\d{2})/(\d{2})/(.*$)": ArticlePage, "http://www.lefigaro.fr/retraite/(\d{4})/(\d{2})/(\d{2})/(.*$)": ArticlePage, + "http://www.lefigaro.fr/tennis/(\d{4})/(\d{2})/(\d{2})/(.*$)": ArticlePage, + "http://www.lefigaro.fr/emploi/(\d{4})/(\d{2})/(\d{2})/(.*$)": ArticlePage, + "http://www.lefigaro.fr/impots/(\d{4})/(\d{2})/(\d{2})/(.*$)": ArticlePage, + "http://www.lefigaro.fr/culture/(\d{4})/(\d{2})/(\d{2})/(.*$)": ArticlePage, + "http://www.lefigaro.fr/musique/(\d{4})/(\d{2})/(\d{2})/(.*$)": ArticlePage, + "http://www.lefigaro.fr/photos/(\d{4})/(\d{2})/(\d{2})/(.*$)": ArticlePage, + "http://www.lefigaro.fr/formation/(\d{4})/(\d{2})/(\d{2})/(.*$)": ArticlePage, + "http://www.lefigaro.fr/lefigaromagazine/(\d{4})/(\d{2})/(\d{2})/(.*$)": ArticlePage, "http://www.lefigaro.fr/creation-gestion-entreprise/(\d{4})/(\d{2})/(\d{2})/(.*$)": ArticlePage, "http://www.lefigaro.fr/flash-eco/(\d{4})/(\d{2})/(\d{2})/(.*$)": FlashActuPage, } diff --git a/weboob/backends/minutes20/pages/simple.py b/weboob/backends/minutes20/pages/simple.py index 2ba4bc65..43b58cb3 100644 --- a/weboob/backends/minutes20/pages/simple.py +++ b/weboob/backends/minutes20/pages/simple.py @@ -25,6 +25,6 @@ class SimplePage(GenericNewsPage): self.main_div = self.document.getroot() self.element_author_selector = "div.mna-signature" self.element_body_selector = "div.mna-body" - self.element_title_selector = "div.mn-left>h1" + self.element_title_selector = "h1" diff --git a/weboob/tools/genericArticle.py b/weboob/tools/genericArticle.py index 36b42348..ae4ead8b 100644 --- a/weboob/tools/genericArticle.py +++ b/weboob/tools/genericArticle.py @@ -27,6 +27,9 @@ def try_remove(base_element, selector): class NoAuthorElement(SelectElementException): pass +class NoTitleException(SelectElementException): + pass + class NoneMainDiv(AttributeError): pass @@ -59,10 +62,13 @@ class GenericNewsPage(BasePage): return self.__article.author def get_title(self): - return select( - self.main_div, - self.element_title_selector, - 1).text_content().strip() + try : + return select( + self.main_div, + self.element_title_selector, + 1).text_content().strip() + except SelectElementException: + raise NoTitleException("no title on %s" % (self.browser)) def get_element_body(self): return select(self.main_div, self.element_body_selector, 1)