diff --git a/weboob/backends/lefigaro/browser.py b/weboob/backends/lefigaro/browser.py index dd454407..bafc13ba 100644 --- a/weboob/backends/lefigaro/browser.py +++ b/weboob/backends/lefigaro/browser.py @@ -16,13 +16,27 @@ # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. from .pages.article import ArticlePage +from .pages.flashactu import FlashActuPage from weboob.tools.browser import BaseBrowser class NewspaperFigaroBrowser(BaseBrowser): PAGES = { - '.*': ArticlePage, + "http://www.lefigaro.fr/flash-actu/([0-9][0-9][0-9][0-9])/([0-9][0-9])/([0-9][0-9])/(.*$)": FlashActuPage, + "http://www.lefigaro.fr/flash-sport/([0-9][0-9][0-9][0-9])/([0-9][0-9])/([0-9][0-9])/(.*$)": FlashActuPage, + "http://www.lefigaro.fr/politique/([0-9][0-9][0-9][0-9])/([0-9][0-9])/([0-9][0-9])/(.*$)": ArticlePage, + "http://www.lefigaro.fr/football-ligue-1-et-2/([0-9][0-9][0-9][0-9])/([0-9][0-9])/([0-9][0-9])/(.*$)": ArticlePage, + "http://www.lefigaro.fr/international/([0-9][0-9][0-9][0-9])/([0-9][0-9])/([0-9][0-9])/(.*$)": ArticlePage, + "http://www.lefigaro.fr/actualite-france/([0-9][0-9][0-9][0-9])/([0-9][0-9])/([0-9][0-9])/(.*$)": ArticlePage, + "http://www.lefigaro.fr/cinema/([0-9][0-9][0-9][0-9])/([0-9][0-9])/([0-9][0-9])/(.*$)": ArticlePage, + "http://www.lefigaro.fr/conjoncture/([0-9][0-9][0-9][0-9])/([0-9][0-9])/([0-9][0-9])/(.*$)": ArticlePage, + "http://www.lefigaro.fr/automobile/([0-9][0-9][0-9][0-9])/([0-9][0-9])/([0-9][0-9])/(.*$)": ArticlePage, + "http://www.lefigaro.fr/actualites/([0-9][0-9][0-9][0-9])/([0-9][0-9])/([0-9][0-9])/(.*$)": ArticlePage, + "http://www.lefigaro.fr/matieres-premieres/([0-9][0-9][0-9][0-9])/([0-9][0-9])/([0-9][0-9])/(.*$)": ArticlePage, + "http://www.lefigaro.fr/le-talk/([0-9][0-9][0-9][0-9])/([0-9][0-9])/([0-9][0-9])/(.*$)": ArticlePage, + "http://www.lefigaro.fr/flash-eco/([0-9][0-9][0-9][0-9])/([0-9][0-9])/([0-9][0-9])/(.*$)": FlashActuPage, + #'.*': ArticlePage } def is_logged(self): @@ -31,4 +45,5 @@ class NewspaperFigaroBrowser(BaseBrowser): def get_content(self, _id): url = _id self.location(url) + print self.page return self.page.get_article(_id) diff --git a/weboob/backends/lefigaro/pages/flashactu.py b/weboob/backends/lefigaro/pages/flashactu.py new file mode 100644 index 00000000..451c2b91 --- /dev/null +++ b/weboob/backends/lefigaro/pages/flashactu.py @@ -0,0 +1,39 @@ +"ArticlePage object for inrocks" +# -*- coding: utf-8 -*- + +# Copyright(C) 2011 Julien Hebert +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, version 3 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +from weboob.tools.parsers.lxmlparser import select, SelectElementException +from .genericArticle import GenericNewsPage + +def try_remove(base_element, selector): + try : + base_element.remove(select(base_element, selector, 1 )) + except (SelectElementException, ValueError): + pass + +class FlashActuPage(GenericNewsPage): + "ArticlePage object for inrocks" + def on_loaded(self): + self.main_div = self.document.getroot() + self.element_author_selector = "div.name>span" + self.element_body_selector = "h2" + + def get_body(self): + element_body = self.get_element_body() + element_body.tag = "div" + return self.browser.parser.tostring(element_body) +