new branch inrocks

This commit is contained in:
Juke 2011-02-19 16:50:04 +01:00 committed by Romain Bignon
commit 3ea6b143b8
9 changed files with 103 additions and 71 deletions

View file

@ -25,8 +25,6 @@ from weboob.tools.newsfeed import Newsfeed
from .tools import url2id from .tools import url2id
from .browser import NewspaperInrocksBrowser from .browser import NewspaperInrocksBrowser
__all__ = ['NewspaperInrocksBackend']
class NewspaperInrocksBackend(BaseBackend, ICapMessages): class NewspaperInrocksBackend(BaseBackend, ICapMessages):
MAINTAINER = 'Julien Hebert' MAINTAINER = 'Julien Hebert'
EMAIL = 'juke@free.fr' EMAIL = 'juke@free.fr'
@ -89,7 +87,6 @@ class NewspaperInrocksBackend(BaseBackend, ICapMessages):
if msg.flags & msg.IS_UNREAD: if msg.flags & msg.IS_UNREAD:
yield msg yield msg
def set_message_read(self, message): def set_message_read(self, message):
self.storage.set( self.storage.set(
'seen', 'seen',

View file

@ -19,7 +19,6 @@ from .pages.article import ArticlePage
from weboob.tools.browser import BaseBrowser from weboob.tools.browser import BaseBrowser
from .tools import id2url from .tools import id2url
__all__ = ['NewspaperInrocksBrowser']
class NewspaperInrocksBrowser(BaseBrowser): class NewspaperInrocksBrowser(BaseBrowser):
@ -35,4 +34,4 @@ class NewspaperInrocksBrowser(BaseBrowser):
def get_content(self, _id): def get_content(self, _id):
url = _id url = _id
self.location(url) self.location(url)
return self.page.article return self.page.get_article(_id)

View file

@ -17,7 +17,7 @@
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
from weboob.tools.parsers.lxmlparser import select, SelectElementException from weboob.tools.parsers.lxmlparser import select, SelectElementException
from .inrocks import InrocksPage from .genericArticle import GenericNewsPage
def try_remove(base_element, selector): def try_remove(base_element, selector):
try : try :
@ -25,18 +25,22 @@ def try_remove(base_element, selector):
except (SelectElementException, ValueError): except (SelectElementException, ValueError):
pass pass
class ArticlePage(InrocksPage): class ArticlePage(GenericNewsPage):
"ArticlePage object for inrocks" "ArticlePage object for inrocks"
def on_loaded(self):
self.main_div = self.document.getroot()
self.element_author_selector = "div.name>span"
self.element_body_selector = "div.maincol"
def get_body(self): def get_body(self):
try_remove(self.element_body, "div.sidebar") element_body = self.get_element_body()
details = select(self.element_body, "div.details", 1) try_remove(element_body, "div.sidebar")
details = select(element_body, "div.details", 1)
try_remove(details, "div.footer") try_remove(details, "div.footer")
header = select(self.element_body, "div.header", 1) header = select(element_body, "div.header", 1)
for selector in ["h1", "div.picture", "div.date", "div.news-single-img", for selector in ["h1", "div.picture", "div.date", "div.news-single-img",
"div.metas_img", "strong"]: "div.metas_img", "strong"]:
try_remove(header, selector) try_remove(header, selector)
return self.browser.parser.tostring(element_body)
return self.browser.parser.tostring(self.element_body)

View file

@ -16,8 +16,6 @@
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
from weboob.tools.browser import BasePage from weboob.tools.browser import BasePage
from weboob.tools.parsers.lxmlparser import select, SelectElementException from weboob.tools.parsers.lxmlparser import select, SelectElementException
from weboob.backends.minutes20.tools import url2id
__all__ = ['Minutes20Page', 'Article', 'NoAuthorElement']
class NoAuthorElement(SelectElementException): class NoAuthorElement(SelectElementException):
pass pass
@ -32,13 +30,13 @@ class Article(object):
self.author = u'' self.author = u''
self.date = None self.date = None
class Minutes20Page(BasePage): class GenericNewsPage(BasePage):
__main_div = NotImplementedError
__element_body = NotImplementedError __element_body = NotImplementedError
__article = Article __article = Article
__element_author_selector = ValueError __element_title_selector = "h1"
__element_title_selector = ValueError main_div = NotImplementedError
__element_body_selector = ValueError element_body_selector = NotImplementedError
element_author_selector = NotImplementedError
def get_body(self): def get_body(self):
return self.browser.parser.tostring(self.get_element_body()) return self.browser.parser.tostring(self.get_element_body())
@ -50,30 +48,25 @@ class Minutes20Page(BasePage):
return None return None
def get_title(self): def get_title(self):
return select(self.__main_div, self.__element_title_selector, 1).text_content().strip() return select(
self.main_div,
self.__element_title_selector,
1).text_content().strip()
def get_element_body(self): def get_element_body(self):
return select(self.__main_div, self.__element_body_selector, 1) return select(self.main_div, self.element_body_selector, 1)
def get_element_author(self): def get_element_author(self):
try: try:
return select(self.__main_div, self.__element_author_selector, 1) return select(self.main_div, self.element_author_selector, 1)
except SelectElementException: except SelectElementException:
raise NoAuthorElement() raise NoAuthorElement()
def get_article(self): def get_article(self, id):
__article = Article(self.browser, url2id(self.url) ) __article = Article(self.browser, id)
__article.author = self.get_author() __article.author = self.get_author()
__article.title = self.get_title() __article.title = self.get_title()
__article.url = self.url __article.url = self.url
__article.body = self.get_body() __article.body = self.get_body()
return __article return __article
def on_loaded(self):
self.__main_div = self.document.getroot()
self.__element_author_selector = "div.mna-signature"
self.__element_title_selector = "h1"
self.__element_body_selector = "div.mna-body"

View file

@ -25,8 +25,6 @@ from weboob.tools.newsfeed import Newsfeed
from .tools import url2id from .tools import url2id
from .browser import Newspaper20minutesBrowser from .browser import Newspaper20minutesBrowser
__all__ = ['Newspaper20minutesBackend']
class Newspaper20minutesBackend(BaseBackend, ICapMessages): class Newspaper20minutesBackend(BaseBackend, ICapMessages):
MAINTAINER = 'Julien Hebert' MAINTAINER = 'Julien Hebert'
EMAIL = 'juke@free.fr' EMAIL = 'juke@free.fr'

View file

@ -16,17 +16,16 @@
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
from .pages.article import ArticlePage from .pages.article import ArticlePage
from .pages.simple import SimplePage
from weboob.tools.browser import BaseBrowser from weboob.tools.browser import BaseBrowser
from .tools import id2url from .tools import id2url
from .pages.minutes20 import Minutes20Page
__all__ = ['Newspaper20minutesBrowser']
class Newspaper20minutesBrowser(BaseBrowser): class Newspaper20minutesBrowser(BaseBrowser):
PAGES = { PAGES = {
'http://www.20minutes.fr/article/?.*': ArticlePage, 'http://www.20minutes.fr/article/?.*': ArticlePage,
'http://www.20minutes.fr/ledirect/?.*': Minutes20Page, 'http://www.20minutes.fr/ledirect/?.*': SimplePage,
'http://www.20minutes.fr/preums/?.*': Minutes20Page 'http://www.20minutes.fr/preums/?.*': SimplePage
} }
def is_logged(self): def is_logged(self):
@ -44,4 +43,4 @@ class Newspaper20minutesBrowser(BaseBrowser):
raise ValueError("thread id is empty") raise ValueError("thread id is empty")
else: else:
raise raise
return self.page.get_article() return self.page.get_article(_id)

View file

@ -17,7 +17,7 @@
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
from weboob.tools.parsers.lxmlparser import select, SelectElementException from weboob.tools.parsers.lxmlparser import select, SelectElementException
from .minutes20 import Minutes20Page, NoAuthorElement from .genericArticle import GenericNewsPage, NoAuthorElement
def try_remove(base_element, selector): def try_remove(base_element, selector):
try : try :
@ -25,8 +25,13 @@ def try_remove(base_element, selector):
except (SelectElementException, ValueError): except (SelectElementException, ValueError):
pass pass
class ArticlePage(Minutes20Page): class ArticlePage(GenericNewsPage):
"ArticlePage object for minutes20" "ArticlePage object for minutes20"
def on_loaded(self):
self.main_div = self.document.getroot()
self.element_author_selector = "div.mna-signature"
self.element_body_selector = "div.mna-body"
def get_body(self): def get_body(self):
element_body = self.get_element_body() element_body = self.get_element_body()
try_remove(element_body, "div.mna-tools") try_remove(element_body, "div.mna-tools")
@ -36,3 +41,5 @@ class ArticlePage(Minutes20Page):
except NoAuthorElement: except NoAuthorElement:
pass pass
return self.browser.parser.tostring(element_body) return self.browser.parser.tostring(element_body)

View file

@ -16,10 +16,8 @@
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
from weboob.tools.browser import BasePage from weboob.tools.browser import BasePage
from weboob.tools.parsers.lxmlparser import select, SelectElementException from weboob.tools.parsers.lxmlparser import select, SelectElementException
from weboob.backends.inrocks.tools import url2id
__all__ = ['InrocksPage', 'Article', 'NoAuthorElement']
class NoAuthorElement(Exception): class NoAuthorElement(SelectElementException):
pass pass
class Article(object): class Article(object):
@ -32,40 +30,43 @@ class Article(object):
self.author = u'' self.author = u''
self.date = None self.date = None
class InrocksPage(BasePage): class GenericNewsPage(BasePage):
__element_body = NotImplementedError
__article = Article
__element_title_selector = "h1"
main_div = NotImplementedError main_div = NotImplementedError
element_body = NotImplementedError element_body_selector = NotImplementedError
article = Article element_author_selector = NotImplementedError
element_author_selector = ValueError
element_title_selector = ValueError
element_body_selector = ValueError
def get_body(self): def get_body(self):
return self.browser.parser.tostring(self.element_body) return self.browser.parser.tostring(self.get_element_body())
def get_author(self): def get_author(self):
try: try:
return select(self.main_div, self.element_author_selector, 1).text_content().strip() return self.get_element_author().text_content().strip()
except SelectElementException: except NoAuthorElement:
#TODO: test nombre d'element en retour return None
pass
def get_title(self): def get_title(self):
return select(self.main_div, self.element_title_selector, 1).text_content().strip() return select(
self.main_div,
self.__element_title_selector,
1).text_content().strip()
def on_loaded(self): def get_element_body(self):
self.article = Article(self.browser, url2id(self.url) ) return select(self.main_div, self.element_body_selector, 1)
self.main_div = self.document.getroot()
self.element_author_selector = "div.name>span" def get_element_author(self):
self.element_title_selector = "h1" try:
self.element_body_selector = "div.maincol" return select(self.main_div, self.element_author_selector, 1)
except SelectElementException:
self.element_body = select(self.main_div, self.element_body_selector, 1) raise NoAuthorElement()
self.article.author = self.get_author()
self.article.title = self.get_title()
self.article.url = self.url
self.article.body = self.get_body()
def get_article(self, id):
__article = Article(self.browser, id)
__article.author = self.get_author()
__article.title = self.get_title()
__article.url = self.url
__article.body = self.get_body()
return __article

View file

@ -0,0 +1,34 @@
"ArticlePage object for minutes20"
# -*- coding: utf-8 -*-
# Copyright(C) 2011 Julien Hebert
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, version 3 of the License.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
from weboob.tools.parsers.lxmlparser import select, SelectElementException
from .genericArticle import GenericNewsPage, NoAuthorElement
def try_remove(base_element, selector):
try :
base_element.remove(select(base_element, selector, 1 ))
except (SelectElementException, ValueError):
pass
class SimplePage(GenericNewsPage):
"ArticlePage object for minutes20"
def on_loaded(self):
self.main_div = self.document.getroot()
self.element_author_selector = "div.mna-signature"
self.element_body_selector = "div.mna-body"