Pep8 on ecrans module
This commit is contained in:
parent
871a43b3be
commit
b0e8c10499
4 changed files with 18 additions and 14 deletions
|
|
@ -23,6 +23,7 @@ from weboob.tools.capabilities.messages.GenericBackend import GenericNewspaperBa
|
||||||
from .browser import NewspaperEcransBrowser
|
from .browser import NewspaperEcransBrowser
|
||||||
from .tools import rssid, url2id
|
from .tools import rssid, url2id
|
||||||
|
|
||||||
|
|
||||||
class NewspaperEcransBackend(GenericNewspaperBackend, ICapMessages):
|
class NewspaperEcransBackend(GenericNewspaperBackend, ICapMessages):
|
||||||
MAINTAINER = 'Julien Hebert'
|
MAINTAINER = 'Julien Hebert'
|
||||||
EMAIL = 'juke@free.fr'
|
EMAIL = 'juke@free.fr'
|
||||||
|
|
@ -34,7 +35,6 @@ class NewspaperEcransBackend(GenericNewspaperBackend, ICapMessages):
|
||||||
BROWSER = NewspaperEcransBrowser
|
BROWSER = NewspaperEcransBrowser
|
||||||
RSS_FEED = 'http://www.ecrans.fr/spip.php?page=backend'
|
RSS_FEED = 'http://www.ecrans.fr/spip.php?page=backend'
|
||||||
RSSID = staticmethod(rssid)
|
RSSID = staticmethod(rssid)
|
||||||
URL2ID = staticmethod(url2id)
|
URL2ID = staticmethod(url2id)
|
||||||
# RSS Size is actually 10, but some articles are not sorted by publication date
|
# RSS Size is actually 10, but some articles are not sorted by publication date
|
||||||
RSSSIZE = 40
|
RSSSIZE = 40
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -22,7 +22,6 @@ from .pages.article import ArticlePage
|
||||||
from weboob.tools.browser import BaseBrowser
|
from weboob.tools.browser import BaseBrowser
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class NewspaperEcransBrowser(BaseBrowser):
|
class NewspaperEcransBrowser(BaseBrowser):
|
||||||
"NewspaperEcransBrowser class"
|
"NewspaperEcransBrowser class"
|
||||||
PAGES = {
|
PAGES = {
|
||||||
|
|
|
||||||
|
|
@ -19,19 +19,20 @@
|
||||||
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
from weboob.tools.capabilities.messages.genericArticle import GenericNewsPage, remove_from_selector_list, try_remove_from_selector_list, try_drop_tree
|
from weboob.tools.capabilities.messages.genericArticle import GenericNewsPage, remove_from_selector_list, try_remove_from_selector_list, try_drop_tree
|
||||||
|
|
||||||
|
|
||||||
class ArticlePage(GenericNewsPage):
|
class ArticlePage(GenericNewsPage):
|
||||||
"ArticlePage object for inrocks"
|
"ArticlePage object for inrocks"
|
||||||
def on_loaded(self):
|
def on_loaded(self):
|
||||||
self.main_div = self.document.getroot()
|
self.main_div = self.document.getroot()
|
||||||
self.element_title_selector = "title"
|
self.element_title_selector = "title"
|
||||||
self.element_author_selector = "p.auteur>a"
|
self.element_author_selector = "p.auteur>a"
|
||||||
self.element_body_selector = "div.bloc_article_01"
|
self.element_body_selector = "div.bloc_article_01"
|
||||||
|
|
||||||
def get_body(self):
|
def get_body(self):
|
||||||
element_body = self.get_element_body()
|
element_body = self.get_element_body()
|
||||||
remove_from_selector_list(self.parser, element_body, ["p.auteur", "h4" ])
|
remove_from_selector_list(self.parser, element_body, ["p.auteur", "h4"])
|
||||||
try_remove_from_selector_list(self.parser, element_body, ["p.tag", "div.alire", self.element_title_selector, "h4"])
|
try_remove_from_selector_list(self.parser, element_body, ["p.tag", "div.alire", self.element_title_selector, "h4"])
|
||||||
try_drop_tree(self.parser, element_body, "script")
|
try_drop_tree(self.parser, element_body, "script")
|
||||||
|
|
||||||
return self.parser.tostring(element_body)
|
return self.parser.tostring(element_body)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -20,25 +20,29 @@
|
||||||
|
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
|
|
||||||
def id2url(_id):
|
def id2url(_id):
|
||||||
"return an url from an id"
|
"return an url from an id"
|
||||||
regexp2 = re.compile("(\w+).([0-9]+).(.*$)")
|
regexp2 = re.compile("(\w+).([0-9]+).(.*$)")
|
||||||
match = regexp2.match(_id)
|
match = regexp2.match(_id)
|
||||||
if match:
|
if match:
|
||||||
return 'http://www.20minutes.fr/%s/%s/%s' % ( match.group(1),
|
return 'http://www.20minutes.fr/%s/%s/%s' % (match.group(1),
|
||||||
match.group(2),
|
match.group(2),
|
||||||
match.group(3))
|
match.group(3))
|
||||||
else:
|
else:
|
||||||
raise ValueError("id doesn't match")
|
raise ValueError("id doesn't match")
|
||||||
|
|
||||||
|
|
||||||
def url2id(url):
|
def url2id(url):
|
||||||
"return an id from an url"
|
"return an id from an url"
|
||||||
regexp = re.compile("(^.*),([0-9]+)\.html$")
|
regexp = re.compile("(^.*),([0-9]+)\.html$")
|
||||||
match = regexp.match(url)
|
match = regexp.match(url)
|
||||||
if match:
|
if match:
|
||||||
return match.group(2)
|
return match.group(2)
|
||||||
else:
|
else:
|
||||||
raise ValueError("Can't find an id for the url")
|
raise ValueError("Can't find an id for the url")
|
||||||
|
|
||||||
|
|
||||||
def rssid(entry):
|
def rssid(entry):
|
||||||
return url2id(entry.id)
|
return url2id(entry.id)
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue