add new categories
This commit is contained in:
parent
fdf82a6ba8
commit
4b10f67058
3 changed files with 21 additions and 5 deletions
|
|
@ -29,6 +29,8 @@ class NewspaperFigaroBrowser(BaseBrowser):
|
|||
"http://www.lefigaro.fr/flash-sport/(\d{4})/(\d{2})/(\d{2})/(.*$)": FlashActuPage,
|
||||
"http://www.lefigaro.fr/politique/(\d{4})/(\d{2})/(\d{2})/(.*$)": ArticlePage,
|
||||
"http://www.lefigaro.fr/sciences/(\d{4})/(\d{2})/(\d{2})/(.*$)": ArticlePage,
|
||||
"http://www.lefigaro.fr/sport/(\d{4})/(\d{2})/(\d{2})/(.*$)": ArticlePage,
|
||||
"http://www.lefigaro.fr/sport-business/(\d{4})/(\d{2})/(\d{2})/(.*$)": ArticlePage,
|
||||
"http://www.lefigaro.fr/football-ligue-1-et-2/(\d{4})/(\d{2})/(\d{2})/(.*$)": ArticlePage,
|
||||
"http://www.lefigaro.fr/international/(\d{4})/(\d{2})/(\d{2})/(.*$)": ArticlePage,
|
||||
"http://www.lefigaro.fr/livres/(\d{4})/(\d{2})/(\d{2})/(.*$)": ArticlePage,
|
||||
|
|
@ -59,6 +61,14 @@ class NewspaperFigaroBrowser(BaseBrowser):
|
|||
"http://www.lefigaro.fr/sciences/(\d{4})/(\d{2})/(\d{2})/(.*$)": ArticlePage,
|
||||
"http://www.lefigaro.fr/assurance/(\d{4})/(\d{2})/(\d{2})/(.*$)": ArticlePage,
|
||||
"http://www.lefigaro.fr/retraite/(\d{4})/(\d{2})/(\d{2})/(.*$)": ArticlePage,
|
||||
"http://www.lefigaro.fr/tennis/(\d{4})/(\d{2})/(\d{2})/(.*$)": ArticlePage,
|
||||
"http://www.lefigaro.fr/emploi/(\d{4})/(\d{2})/(\d{2})/(.*$)": ArticlePage,
|
||||
"http://www.lefigaro.fr/impots/(\d{4})/(\d{2})/(\d{2})/(.*$)": ArticlePage,
|
||||
"http://www.lefigaro.fr/culture/(\d{4})/(\d{2})/(\d{2})/(.*$)": ArticlePage,
|
||||
"http://www.lefigaro.fr/musique/(\d{4})/(\d{2})/(\d{2})/(.*$)": ArticlePage,
|
||||
"http://www.lefigaro.fr/photos/(\d{4})/(\d{2})/(\d{2})/(.*$)": ArticlePage,
|
||||
"http://www.lefigaro.fr/formation/(\d{4})/(\d{2})/(\d{2})/(.*$)": ArticlePage,
|
||||
"http://www.lefigaro.fr/lefigaromagazine/(\d{4})/(\d{2})/(\d{2})/(.*$)": ArticlePage,
|
||||
"http://www.lefigaro.fr/creation-gestion-entreprise/(\d{4})/(\d{2})/(\d{2})/(.*$)": ArticlePage,
|
||||
"http://www.lefigaro.fr/flash-eco/(\d{4})/(\d{2})/(\d{2})/(.*$)": FlashActuPage,
|
||||
}
|
||||
|
|
|
|||
|
|
@ -25,6 +25,6 @@ class SimplePage(GenericNewsPage):
|
|||
self.main_div = self.document.getroot()
|
||||
self.element_author_selector = "div.mna-signature"
|
||||
self.element_body_selector = "div.mna-body"
|
||||
self.element_title_selector = "div.mn-left>h1"
|
||||
self.element_title_selector = "h1"
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -27,6 +27,9 @@ def try_remove(base_element, selector):
|
|||
class NoAuthorElement(SelectElementException):
|
||||
pass
|
||||
|
||||
class NoTitleException(SelectElementException):
|
||||
pass
|
||||
|
||||
class NoneMainDiv(AttributeError):
|
||||
pass
|
||||
|
||||
|
|
@ -59,10 +62,13 @@ class GenericNewsPage(BasePage):
|
|||
return self.__article.author
|
||||
|
||||
def get_title(self):
|
||||
return select(
|
||||
self.main_div,
|
||||
self.element_title_selector,
|
||||
1).text_content().strip()
|
||||
try :
|
||||
return select(
|
||||
self.main_div,
|
||||
self.element_title_selector,
|
||||
1).text_content().strip()
|
||||
except SelectElementException:
|
||||
raise NoTitleException("no title on %s" % (self.browser))
|
||||
|
||||
def get_element_body(self):
|
||||
return select(self.main_div, self.element_body_selector, 1)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue