I used autopep8 on some files and did carefully check the changes. I ignored E501,E302,E231,E225,E222,E221,E241,E203 in my search, and at least E501 on any autopep8 run. Other style fixes not related to PEP8: * Only use new-style classes. I don't think the usage of old-style classes was voluntary. Old-style classes are removed in Python 3. * Convert an if/else to a one-liner in mediawiki, change docstring style change to a comment something that wasn't really appropriate for a docstring. * Unneeded first if condition in meteofrance
217 lines
7.5 KiB
Python
217 lines
7.5 KiB
Python
# -*- coding: utf-8 -*-
|
|
|
|
# Copyright(C) 2010-2011 Romain Bignon
|
|
#
|
|
# This file is part of weboob.
|
|
#
|
|
# weboob is free software: you can redistribute it and/or modify
|
|
# it under the terms of the GNU Affero General Public License as published by
|
|
# the Free Software Foundation, either version 3 of the License, or
|
|
# (at your option) any later version.
|
|
#
|
|
# weboob is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU Affero General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU Affero General Public License
|
|
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
|
|
from datetime import datetime
|
|
|
|
from weboob.tools.browser import BrokenPageError
|
|
from weboob.tools.misc import local2utc
|
|
from ..tools import url2id
|
|
|
|
from .index import DLFPPage
|
|
|
|
class RSSComment(DLFPPage):
|
|
def on_loaded(self):
|
|
pass
|
|
|
|
class Content(object):
|
|
TAGGABLE = False
|
|
|
|
def __init__(self, browser):
|
|
self.browser = browser
|
|
self.url = u''
|
|
self.id = u''
|
|
self.title = u''
|
|
self.author = u''
|
|
self.username = u''
|
|
self.body = u''
|
|
self.date = None
|
|
self.score = 0
|
|
self.comments = []
|
|
self.relevance_url = None
|
|
self.relevance_token = None
|
|
|
|
def is_taggable(self):
|
|
return False
|
|
|
|
class Comment(Content):
|
|
def __init__(self, article, div, reply_id):
|
|
Content.__init__(self, article.browser)
|
|
self.reply_id = reply_id
|
|
self.signature = u''
|
|
self.preurl = article.url
|
|
self.div = div
|
|
self.id = div.attrib['id'].split('-')[1]
|
|
subs = div.find('ul')
|
|
if subs is not None:
|
|
for sub in subs.findall('li'):
|
|
comment = Comment(article, sub, self.id)
|
|
self.comments.append(comment)
|
|
|
|
def parse(self):
|
|
self.url = '%s#%s' % (self.preurl, self.div.attrib['id'])
|
|
self.title = unicode(self.browser.parser.select(self.div.find('h2'), 'a.title', 1).text)
|
|
try:
|
|
a = self.browser.parser.select(self.div.find('p'), 'a[rel=author]', 1)
|
|
except BrokenPageError:
|
|
self.author = 'Anonyme'
|
|
self.username = None
|
|
else:
|
|
self.author = unicode(a.text)
|
|
self.username = unicode(a.attrib['href'].split('/')[2])
|
|
self.date = datetime.strptime(self.browser.parser.select(self.div.find('p'), 'time', 1).attrib['datetime'].split('+')[0],
|
|
'%Y-%m-%dT%H:%M:%S')
|
|
self.date = local2utc(self.date)
|
|
|
|
content = self.div.find('div')
|
|
try:
|
|
signature = self.browser.parser.select(content, 'p.signature', 1)
|
|
except BrokenPageError:
|
|
# No signature.
|
|
pass
|
|
else:
|
|
content.remove(signature)
|
|
self.signature = self.browser.parser.tostring(signature)
|
|
self.body = self.browser.parser.tostring(content)
|
|
|
|
self.score = int(self.browser.parser.select(self.div.find('p'), 'span.score', 1).text)
|
|
forms = self.browser.parser.select(self.div.find('footer'), 'form.button_to')
|
|
if len(forms) > 0:
|
|
self.relevance_url = forms[0].attrib['action'].rstrip('for').rstrip('against')
|
|
self.relevance_token = self.browser.parser.select(forms[0], 'input[name=authenticity_token]', 1).attrib['value']
|
|
|
|
def iter_all_comments(self):
|
|
for comment in self.comments:
|
|
yield comment
|
|
for c in comment.iter_all_comments():
|
|
yield c
|
|
|
|
def __repr__(self):
|
|
return u"<Comment id=%r author=%r title=%r>" % (self.id, self.author, self.title)
|
|
|
|
class Article(Content):
|
|
TAGGABLE = True
|
|
|
|
def __init__(self, browser, url, tree):
|
|
Content.__init__(self, browser)
|
|
self.url = url
|
|
self.id = url2id(self.url)
|
|
|
|
if tree is None:
|
|
return
|
|
|
|
header = tree.find('header')
|
|
self.title = u' — '.join([a.text for a in header.find('h1').findall('a')])
|
|
try:
|
|
a = self.browser.parser.select(header, 'a[rel=author]', 1)
|
|
except BrokenPageError:
|
|
self.author = 'Anonyme'
|
|
self.username = None
|
|
else:
|
|
self.author = unicode(a.text)
|
|
self.username = unicode(a.attrib['href'].split('/')[2])
|
|
self.body = self.browser.parser.tostring(self.browser.parser.select(tree, 'div.content', 1))
|
|
try:
|
|
self.date = datetime.strptime(self.browser.parser.select(header, 'time', 1).attrib['datetime'].split('+')[0],
|
|
'%Y-%m-%dT%H:%M:%S')
|
|
self.date = local2utc(self.date)
|
|
except BrokenPageError:
|
|
pass
|
|
for form in self.browser.parser.select(tree.find('footer'), 'form.button_to'):
|
|
if form.attrib['action'].endswith('/for'):
|
|
self.relevance_url = form.attrib['action'].rstrip('for').rstrip('against')
|
|
self.relevance_token = self.browser.parser.select(form, 'input[name=authenticity_token]', 1).attrib['value']
|
|
|
|
self.score = int(self.browser.parser.select(tree, 'div.figures figure.score', 1).text)
|
|
|
|
def append_comment(self, comment):
|
|
self.comments.append(comment)
|
|
|
|
def iter_all_comments(self):
|
|
for comment in self.comments:
|
|
yield comment
|
|
for c in comment.iter_all_comments():
|
|
yield c
|
|
|
|
class CommentPage(DLFPPage):
|
|
def get_comment(self):
|
|
article = Article(self.browser, self.url, None)
|
|
return Comment(article, self.parser.select(self.document.getroot(), 'li.comment', 1), 0)
|
|
|
|
class ContentPage(DLFPPage):
|
|
def on_loaded(self):
|
|
self.article = None
|
|
|
|
def is_taggable(self):
|
|
return True
|
|
|
|
def get_comment(self, id):
|
|
article = Article(self.browser, self.url, None)
|
|
try:
|
|
li = self.parser.select(self.document.getroot(), 'li#comment-%s' % id, 1)
|
|
except BrokenPageError:
|
|
return None
|
|
else:
|
|
return Comment(article, li, 0)
|
|
|
|
def get_article(self):
|
|
if not self.article:
|
|
self.article = Article(self.browser,
|
|
self.url,
|
|
self.parser.select(self.document.getroot(), 'div#contents article', 1))
|
|
|
|
try:
|
|
threads = self.parser.select(self.document.getroot(), 'ul.threads', 1)
|
|
except BrokenPageError:
|
|
pass # no comments
|
|
else:
|
|
for comment in threads.findall('li'):
|
|
self.article.append_comment(Comment(self.article, comment, 0))
|
|
|
|
return self.article
|
|
|
|
def get_post_comment_url(self):
|
|
return self.parser.select(self.document.getroot(), 'p#send-comment', 1).find('a').attrib['href']
|
|
|
|
def get_tag_url(self):
|
|
return self.parser.select(self.document.getroot(), 'div.tag_in_place', 1).find('a').attrib['href']
|
|
|
|
class NewCommentPage(DLFPPage):
|
|
pass
|
|
|
|
class NewTagPage(DLFPPage):
|
|
def _is_tag_form(self, form):
|
|
return form.action.endswith('/tags')
|
|
|
|
def tag(self, tag):
|
|
self.browser.select_form(predicate=self._is_tag_form)
|
|
self.browser['tags'] = tag
|
|
self.browser.submit()
|
|
|
|
class NodePage(DLFPPage):
|
|
def get_errors(self):
|
|
try:
|
|
div = self.parser.select(self.document.getroot(), 'div.errors', 1)
|
|
except BrokenPageError:
|
|
return []
|
|
|
|
l = []
|
|
for li in div.find('ul').findall('li'):
|
|
l.append(li.text)
|
|
return l
|