Comment and Article are now children of Content

This commit is contained in:
Romain Bignon 2011-03-18 10:46:44 +01:00
commit 57cbc29883
2 changed files with 56 additions and 41 deletions

View file

@ -17,6 +17,7 @@
import urllib
import re
from weboob.tools.browser import BaseBrowser, BrowserHTTPError, BrowserIncorrectPassword
from weboob.capabilities.messages import CantSendMessage
@ -47,6 +48,9 @@ class DLFP(BaseBrowser):
return self.location('https://linuxfr.org')
def parse_id(self, _id):
if re.match('^https?://linuxfr.org/nodes/\d+/comments/\d+$', _id):
return _id, None
url = id2url(_id)
if url is None:
if url2id(_id) is not None:
@ -64,10 +68,19 @@ class DLFP(BaseBrowser):
return None
self.location(url)
assert self.is_on_page(ContentPage)
self.page.url = self.absurl(url)
content = self.page.get_article()
content.id = _id
if self.is_on_page(CommentPage):
content = self.page.get_comment()
elif self.is_on_page(ContentPage):
m = re.match('.*#comment-(\d+)$', url)
if m:
content = self.page.get_comment(int(m.group(1)))
else:
content = self.page.get_article()
if _id is not None:
content.id = _id
return content
def _is_comment_submit_form(self, form):
@ -118,19 +131,6 @@ class DLFP(BaseBrowser):
def close_session(self):
self.openurl('/compte/deconnexion')
def get_comment(self, url):
self.location(url)
comment = None
self.page.url = self.absurl(url)
if self.is_on_page(CommentPage):
comment = self.page.get_comment()
elif self.is_on_page(ContentPage):
ignored, id = url.rsplit('#comment-', 1)
comment = self.page.get_comment(int(id))
return comment
def plusse(self, url):
return self.relevance(url, 'for')
@ -138,7 +138,7 @@ class DLFP(BaseBrowser):
return self.relevance(url, 'against')
def relevance(self, url, what):
comment = self.get_comment(url)
comment = self.get_content(url)
if comment is None:
raise ValueError('The given URL isn\'t a comment.')

View file

@ -24,20 +24,31 @@ from weboob.backends.dlfp.tools import url2id
from .index import DLFPPage
class Comment(object):
def __init__(self, article, div, reply_id):
self.browser = article.browser
self.id = ''
self.reply_id = reply_id
class Content(object):
TAGGABLE = False
def __init__(self, browser):
self.browser = browser
self.url = u''
self.id = u''
self.title = u''
self.author = u''
self.username = None
self.date = None
self.username = u''
self.body = u''
self.signature = u''
self.date = None
self.score = 0
self.url = u''
self.comments = []
self.relevance_url = None
self.relevance_token = None
def is_taggable(self):
return False
class Comment(Content):
def __init__(self, article, div, reply_id):
Content.__init__(self, article.browser)
self.reply_id = reply_id
self.signature = u''
self.id = div.attrib['id'].split('-')[1]
self.url = '%s#%s' % (article.url, div.attrib['id'])
@ -67,10 +78,7 @@ class Comment(object):
self.score = int(select(div.find('p'), 'span.score', 1).text)
forms = select(div.find('footer'), 'form.button_to')
if len(forms) == 0:
self.relevance_url = None
self.relevance_token = None
else:
if len(forms) > 0:
self.relevance_url = forms[0].attrib['action'].rstrip('for').rstrip('against')
self.relevance_token = select(forms[0], 'input[name=authenticity_token]', 1).attrib['value']
@ -89,16 +97,13 @@ class Comment(object):
def __repr__(self):
return u"<Comment id=%r author=%r title=%r>" % (self.id, self.author, self.title)
class Article(object):
class Article(Content):
TAGGABLE = True
def __init__(self, browser, url, tree):
self.browser = browser
Content.__init__(self, browser)
self.url = url
self.id = url2id(self.url)
self.title = None
self.author = None
self.body = None
self.date = None
self.comments = []
if tree is None:
return
@ -106,13 +111,23 @@ class Article(object):
header = tree.find('header')
self.title = u''.join([a.text for a in header.find('h1').findall('a')])
try:
self.author = select(header, 'a[rel=author]', 1).text
a = select(header, 'a[rel=author]', 1)
except SelectElementException:
self.author = 'Anonyme'
self.username = None
else:
self.author = unicode(a.text)
self.username = unicode(a.attrib['href'].split('/')[2])
self.body = self.browser.parser.tostring(select(tree, 'div.content', 1))
self.date = datetime.strptime(select(header, 'time', 1).attrib['datetime'].split('+')[0],
'%Y-%m-%dT%H:%M:%S')
self.date = local2utc(self.date)
forms = select(tree.find('footer'), 'form.button_to')
if len(forms) > 0:
self.relevance_url = forms[0].attrib['action'].rstrip('for').rstrip('against')
self.relevance_token = select(forms[0], 'input[name=authenticity_token]', 1).attrib['value']
self.score = int(select(tree, 'div.figures figure.score', 1).text)
def append_comment(self, comment):
self.comments.append(comment)
@ -123,9 +138,6 @@ class Article(object):
for c in comment.iter_all_comments():
yield c
def parse_part2(self, div):
self.part2 = self.browser.parser.tostring(div)
class CommentPage(DLFPPage):
def get_comment(self):
article = Article(self.browser, self.url, None)
@ -135,6 +147,9 @@ class ContentPage(DLFPPage):
def on_loaded(self):
self.article = None
def is_taggable(self):
return True
def get_comment(self, id):
article = Article(self.browser, self.url, None)
try: