Pep8 on ecrans module

This commit is contained in:
Florent 2012-03-13 17:31:24 +01:00
commit b0e8c10499
4 changed files with 18 additions and 14 deletions

View file

@ -23,6 +23,7 @@ from weboob.tools.capabilities.messages.GenericBackend import GenericNewspaperBa
from .browser import NewspaperEcransBrowser
from .tools import rssid, url2id
class NewspaperEcransBackend(GenericNewspaperBackend, ICapMessages):
MAINTAINER = 'Julien Hebert'
EMAIL = 'juke@free.fr'
@ -34,7 +35,6 @@ class NewspaperEcransBackend(GenericNewspaperBackend, ICapMessages):
BROWSER = NewspaperEcransBrowser
RSS_FEED = 'http://www.ecrans.fr/spip.php?page=backend'
RSSID = staticmethod(rssid)
URL2ID = staticmethod(url2id)
# RSS Size is actually 10, but some articles are not sorted by publication date
RSSSIZE = 40
URL2ID = staticmethod(url2id)
# RSS Size is actually 10, but some articles are not sorted by publication date
RSSSIZE = 40

View file

@ -22,7 +22,6 @@ from .pages.article import ArticlePage
from weboob.tools.browser import BaseBrowser
class NewspaperEcransBrowser(BaseBrowser):
"NewspaperEcransBrowser class"
PAGES = {

View file

@ -19,19 +19,20 @@
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.tools.capabilities.messages.genericArticle import GenericNewsPage, remove_from_selector_list, try_remove_from_selector_list, try_drop_tree
class ArticlePage(GenericNewsPage):
"ArticlePage object for inrocks"
def on_loaded(self):
self.main_div = self.document.getroot()
self.element_title_selector = "title"
self.element_author_selector = "p.auteur>a"
self.element_body_selector = "div.bloc_article_01"
self.element_author_selector = "p.auteur>a"
self.element_body_selector = "div.bloc_article_01"
def get_body(self):
element_body = self.get_element_body()
remove_from_selector_list(self.parser, element_body, ["p.auteur", "h4" ])
remove_from_selector_list(self.parser, element_body, ["p.auteur", "h4"])
try_remove_from_selector_list(self.parser, element_body, ["p.tag", "div.alire", self.element_title_selector, "h4"])
try_drop_tree(self.parser, element_body, "script")
return self.parser.tostring(element_body)

View file

@ -20,25 +20,29 @@
import re
def id2url(_id):
"return an url from an id"
regexp2 = re.compile("(\w+).([0-9]+).(.*$)")
match = regexp2.match(_id)
if match:
return 'http://www.20minutes.fr/%s/%s/%s' % ( match.group(1),
match.group(2),
match.group(3))
return 'http://www.20minutes.fr/%s/%s/%s' % (match.group(1),
match.group(2),
match.group(3))
else:
raise ValueError("id doesn't match")
def url2id(url):
"return an id from an url"
regexp = re.compile("(^.*),([0-9]+)\.html$")
regexp = re.compile("(^.*),([0-9]+)\.html$")
match = regexp.match(url)
if match:
if match:
return match.group(2)
else:
raise ValueError("Can't find an id for the url")
def rssid(entry):
return url2id(entry.id)