diff --git a/weboob/backends/inrocks/pages/article.py b/weboob/backends/inrocks/pages/article.py index f3908f5f..4eba0216 100644 --- a/weboob/backends/inrocks/pages/article.py +++ b/weboob/backends/inrocks/pages/article.py @@ -19,7 +19,7 @@ from weboob.tools.parsers.lxmlparser import select, SelectElementException from weboob.tools.genericArticle import GenericNewsPage, try_remove, \ try_remove_from_selector_list, \ - drop_comments + drop_comments, NoneMainDiv class ArticlePage(GenericNewsPage): "ArticlePage object for inrocks" @@ -30,45 +30,49 @@ class ArticlePage(GenericNewsPage): self.element_body_selector = "div.maincol" def get_body(self): - element_body = self.get_element_body() - div_header_element = select(element_body, "div.header", 1) - element_detail = select(element_body, "div.details", 1) - div_content_element = select(element_body, "div.content", 1) - - drop_comments(element_body) - try_remove(element_body, "div.sidebar") - try_remove(element_detail, "div.footer") - try_remove_from_selector_list(div_header_element, - ["h1", "div.picture", "div.date", - "div.news-single-img", - "div.metas_img", "strong"]) - try_remove_from_selector_list(div_content_element, - ["div.tw_button", "div.wpfblike"]) - try : - description_element = select(div_header_element, - "div.description", 1) - except SelectElementException: - pass + element_body = self.get_element_body() + except NoneMainDiv: + return None else: - text_content = description_element.text_content() - if len(text_content.strip()) == 0 : - description_element.drop_tree() + div_header_element = select(element_body, "div.header", 1) + element_detail = select(element_body, "div.details", 1) + div_content_element = select(element_body, "div.content", 1) + + drop_comments(element_body) + try_remove(element_body, "div.sidebar") + try_remove(element_detail, "div.footer") + try_remove_from_selector_list(div_header_element, + ["h1", "div.picture", "div.date", + "div.news-single-img", + "div.metas_img", "strong"]) + try_remove_from_selector_list(div_content_element, + ["div.tw_button", "div.wpfblike"]) + + try : + description_element = select(div_header_element, + "div.description", 1) + except SelectElementException: + pass else: - if len(description_element) == 1: - description_element.drop_tag() + text_content = description_element.text_content() + if len(text_content.strip()) == 0 : + description_element.drop_tree() + else: + if len(description_element) == 1: + description_element.drop_tag() - if len(div_header_element.text_content().strip()) == 0: - div_header_element.drop_tree() + if len(div_header_element.text_content().strip()) == 0: + div_header_element.drop_tree() - if len(div_header_element) == 1: - div_header_element.drop_tag() + if len(div_header_element) == 1: + div_header_element.drop_tag() - if len(element_detail) == 1: - element_detail.drop_tag() + if len(element_detail) == 1: + element_detail.drop_tag() - div_content_element.drop_tag() + div_content_element.drop_tag() - return self.browser.parser.tostring(element_body) + return self.browser.parser.tostring(element_body) diff --git a/weboob/tools/genericArticle.py b/weboob/tools/genericArticle.py index 929d3ffb..452fc681 100644 --- a/weboob/tools/genericArticle.py +++ b/weboob/tools/genericArticle.py @@ -61,11 +61,11 @@ class NoneMainDiv(AttributeError): class Article(object): author = u'' + title = u'' def __init__(self, browser, _id): self.browser = browser self.id = _id - self.title = u'' self.body = u'' self.url = u'' self.date = None @@ -84,7 +84,8 @@ class GenericNewsPage(BasePage): def get_author(self): try: return self.get_element_author().text_content().strip() - except NoAuthorElement: + except (NoAuthorElement, NoneMainDiv): + #TODO: Mettre un warning return self.__article.author def get_title(self): @@ -93,6 +94,12 @@ class GenericNewsPage(BasePage): self.main_div, self.element_title_selector, 1).text_content().strip() + except AttributeError: + if self.main_div == None: + #TODO: Mettre un warning + return self.__article.title + else: + raise except SelectElementException: raise NoTitleException("no title on %s" % (self.browser)) @@ -100,7 +107,12 @@ class GenericNewsPage(BasePage): try : return select(self.main_div, self.element_body_selector, 1) except SelectElementException: - raise NoBodyElement("no body on %s" % (self.browser)) + raise NoBodyElement("no body on %s" % (self.browser)) + except AttributeError: + if self.main_div == None: + raise NoneMainDiv("main_div is none on %s" % (self.browser)) + else: + raise def get_element_author(self): try: