Pep8 on ecrans module
This commit is contained in:
parent
871a43b3be
commit
b0e8c10499
4 changed files with 18 additions and 14 deletions
|
|
@ -23,6 +23,7 @@ from weboob.tools.capabilities.messages.GenericBackend import GenericNewspaperBa
|
|||
from .browser import NewspaperEcransBrowser
|
||||
from .tools import rssid, url2id
|
||||
|
||||
|
||||
class NewspaperEcransBackend(GenericNewspaperBackend, ICapMessages):
|
||||
MAINTAINER = 'Julien Hebert'
|
||||
EMAIL = 'juke@free.fr'
|
||||
|
|
@ -34,7 +35,6 @@ class NewspaperEcransBackend(GenericNewspaperBackend, ICapMessages):
|
|||
BROWSER = NewspaperEcransBrowser
|
||||
RSS_FEED = 'http://www.ecrans.fr/spip.php?page=backend'
|
||||
RSSID = staticmethod(rssid)
|
||||
URL2ID = staticmethod(url2id)
|
||||
# RSS Size is actually 10, but some articles are not sorted by publication date
|
||||
RSSSIZE = 40
|
||||
|
||||
URL2ID = staticmethod(url2id)
|
||||
# RSS Size is actually 10, but some articles are not sorted by publication date
|
||||
RSSSIZE = 40
|
||||
|
|
|
|||
|
|
@ -22,7 +22,6 @@ from .pages.article import ArticlePage
|
|||
from weboob.tools.browser import BaseBrowser
|
||||
|
||||
|
||||
|
||||
class NewspaperEcransBrowser(BaseBrowser):
|
||||
"NewspaperEcransBrowser class"
|
||||
PAGES = {
|
||||
|
|
|
|||
|
|
@ -19,19 +19,20 @@
|
|||
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
from weboob.tools.capabilities.messages.genericArticle import GenericNewsPage, remove_from_selector_list, try_remove_from_selector_list, try_drop_tree
|
||||
|
||||
|
||||
class ArticlePage(GenericNewsPage):
|
||||
"ArticlePage object for inrocks"
|
||||
def on_loaded(self):
|
||||
self.main_div = self.document.getroot()
|
||||
self.element_title_selector = "title"
|
||||
self.element_author_selector = "p.auteur>a"
|
||||
self.element_body_selector = "div.bloc_article_01"
|
||||
self.element_author_selector = "p.auteur>a"
|
||||
self.element_body_selector = "div.bloc_article_01"
|
||||
|
||||
def get_body(self):
|
||||
element_body = self.get_element_body()
|
||||
remove_from_selector_list(self.parser, element_body, ["p.auteur", "h4" ])
|
||||
remove_from_selector_list(self.parser, element_body, ["p.auteur", "h4"])
|
||||
try_remove_from_selector_list(self.parser, element_body, ["p.tag", "div.alire", self.element_title_selector, "h4"])
|
||||
try_drop_tree(self.parser, element_body, "script")
|
||||
|
||||
return self.parser.tostring(element_body)
|
||||
|
||||
|
|
|
|||
|
|
@ -20,25 +20,29 @@
|
|||
|
||||
|
||||
import re
|
||||
|
||||
|
||||
def id2url(_id):
|
||||
"return an url from an id"
|
||||
regexp2 = re.compile("(\w+).([0-9]+).(.*$)")
|
||||
match = regexp2.match(_id)
|
||||
if match:
|
||||
return 'http://www.20minutes.fr/%s/%s/%s' % ( match.group(1),
|
||||
match.group(2),
|
||||
match.group(3))
|
||||
return 'http://www.20minutes.fr/%s/%s/%s' % (match.group(1),
|
||||
match.group(2),
|
||||
match.group(3))
|
||||
else:
|
||||
raise ValueError("id doesn't match")
|
||||
|
||||
|
||||
def url2id(url):
|
||||
"return an id from an url"
|
||||
regexp = re.compile("(^.*),([0-9]+)\.html$")
|
||||
regexp = re.compile("(^.*),([0-9]+)\.html$")
|
||||
match = regexp.match(url)
|
||||
if match:
|
||||
if match:
|
||||
return match.group(2)
|
||||
else:
|
||||
raise ValueError("Can't find an id for the url")
|
||||
|
||||
|
||||
def rssid(entry):
|
||||
return url2id(entry.id)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue