add new categories

This commit is contained in:
juke 2011-03-02 20:26:29 +01:00 committed by Romain Bignon
commit 4b10f67058
3 changed files with 21 additions and 5 deletions

View file

@ -29,6 +29,8 @@ class NewspaperFigaroBrowser(BaseBrowser):
"http://www.lefigaro.fr/flash-sport/(\d{4})/(\d{2})/(\d{2})/(.*$)": FlashActuPage,
"http://www.lefigaro.fr/politique/(\d{4})/(\d{2})/(\d{2})/(.*$)": ArticlePage,
"http://www.lefigaro.fr/sciences/(\d{4})/(\d{2})/(\d{2})/(.*$)": ArticlePage,
"http://www.lefigaro.fr/sport/(\d{4})/(\d{2})/(\d{2})/(.*$)": ArticlePage,
"http://www.lefigaro.fr/sport-business/(\d{4})/(\d{2})/(\d{2})/(.*$)": ArticlePage,
"http://www.lefigaro.fr/football-ligue-1-et-2/(\d{4})/(\d{2})/(\d{2})/(.*$)": ArticlePage,
"http://www.lefigaro.fr/international/(\d{4})/(\d{2})/(\d{2})/(.*$)": ArticlePage,
"http://www.lefigaro.fr/livres/(\d{4})/(\d{2})/(\d{2})/(.*$)": ArticlePage,
@ -59,6 +61,14 @@ class NewspaperFigaroBrowser(BaseBrowser):
"http://www.lefigaro.fr/sciences/(\d{4})/(\d{2})/(\d{2})/(.*$)": ArticlePage,
"http://www.lefigaro.fr/assurance/(\d{4})/(\d{2})/(\d{2})/(.*$)": ArticlePage,
"http://www.lefigaro.fr/retraite/(\d{4})/(\d{2})/(\d{2})/(.*$)": ArticlePage,
"http://www.lefigaro.fr/tennis/(\d{4})/(\d{2})/(\d{2})/(.*$)": ArticlePage,
"http://www.lefigaro.fr/emploi/(\d{4})/(\d{2})/(\d{2})/(.*$)": ArticlePage,
"http://www.lefigaro.fr/impots/(\d{4})/(\d{2})/(\d{2})/(.*$)": ArticlePage,
"http://www.lefigaro.fr/culture/(\d{4})/(\d{2})/(\d{2})/(.*$)": ArticlePage,
"http://www.lefigaro.fr/musique/(\d{4})/(\d{2})/(\d{2})/(.*$)": ArticlePage,
"http://www.lefigaro.fr/photos/(\d{4})/(\d{2})/(\d{2})/(.*$)": ArticlePage,
"http://www.lefigaro.fr/formation/(\d{4})/(\d{2})/(\d{2})/(.*$)": ArticlePage,
"http://www.lefigaro.fr/lefigaromagazine/(\d{4})/(\d{2})/(\d{2})/(.*$)": ArticlePage,
"http://www.lefigaro.fr/creation-gestion-entreprise/(\d{4})/(\d{2})/(\d{2})/(.*$)": ArticlePage,
"http://www.lefigaro.fr/flash-eco/(\d{4})/(\d{2})/(\d{2})/(.*$)": FlashActuPage,
}

View file

@ -25,6 +25,6 @@ class SimplePage(GenericNewsPage):
self.main_div = self.document.getroot()
self.element_author_selector = "div.mna-signature"
self.element_body_selector = "div.mna-body"
self.element_title_selector = "div.mn-left>h1"
self.element_title_selector = "h1"

View file

@ -27,6 +27,9 @@ def try_remove(base_element, selector):
class NoAuthorElement(SelectElementException):
pass
class NoTitleException(SelectElementException):
pass
class NoneMainDiv(AttributeError):
pass
@ -59,10 +62,13 @@ class GenericNewsPage(BasePage):
return self.__article.author
def get_title(self):
return select(
self.main_div,
self.element_title_selector,
1).text_content().strip()
try :
return select(
self.main_div,
self.element_title_selector,
1).text_content().strip()
except SelectElementException:
raise NoTitleException("no title on %s" % (self.browser))
def get_element_body(self):
return select(self.main_div, self.element_body_selector, 1)