From 57cbc29883008e3ae82cf76bdb67f077d1afbff7 Mon Sep 17 00:00:00 2001 From: Romain Bignon Date: Fri, 18 Mar 2011 10:46:44 +0100 Subject: [PATCH] Comment and Article are now children of Content --- weboob/backends/dlfp/browser.py | 34 ++++++++-------- weboob/backends/dlfp/pages/news.py | 63 ++++++++++++++++++------------ 2 files changed, 56 insertions(+), 41 deletions(-) diff --git a/weboob/backends/dlfp/browser.py b/weboob/backends/dlfp/browser.py index dabee0c5..ddf4cd61 100644 --- a/weboob/backends/dlfp/browser.py +++ b/weboob/backends/dlfp/browser.py @@ -17,6 +17,7 @@ import urllib +import re from weboob.tools.browser import BaseBrowser, BrowserHTTPError, BrowserIncorrectPassword from weboob.capabilities.messages import CantSendMessage @@ -47,6 +48,9 @@ class DLFP(BaseBrowser): return self.location('https://linuxfr.org') def parse_id(self, _id): + if re.match('^https?://linuxfr.org/nodes/\d+/comments/\d+$', _id): + return _id, None + url = id2url(_id) if url is None: if url2id(_id) is not None: @@ -64,10 +68,19 @@ class DLFP(BaseBrowser): return None self.location(url) - assert self.is_on_page(ContentPage) self.page.url = self.absurl(url) - content = self.page.get_article() - content.id = _id + + if self.is_on_page(CommentPage): + content = self.page.get_comment() + elif self.is_on_page(ContentPage): + m = re.match('.*#comment-(\d+)$', url) + if m: + content = self.page.get_comment(int(m.group(1))) + else: + content = self.page.get_article() + + if _id is not None: + content.id = _id return content def _is_comment_submit_form(self, form): @@ -118,19 +131,6 @@ class DLFP(BaseBrowser): def close_session(self): self.openurl('/compte/deconnexion') - def get_comment(self, url): - self.location(url) - - comment = None - self.page.url = self.absurl(url) - if self.is_on_page(CommentPage): - comment = self.page.get_comment() - elif self.is_on_page(ContentPage): - ignored, id = url.rsplit('#comment-', 1) - comment = self.page.get_comment(int(id)) - - return comment - def plusse(self, url): return self.relevance(url, 'for') @@ -138,7 +138,7 @@ class DLFP(BaseBrowser): return self.relevance(url, 'against') def relevance(self, url, what): - comment = self.get_comment(url) + comment = self.get_content(url) if comment is None: raise ValueError('The given URL isn\'t a comment.') diff --git a/weboob/backends/dlfp/pages/news.py b/weboob/backends/dlfp/pages/news.py index 010062bd..91ae098e 100644 --- a/weboob/backends/dlfp/pages/news.py +++ b/weboob/backends/dlfp/pages/news.py @@ -24,20 +24,31 @@ from weboob.backends.dlfp.tools import url2id from .index import DLFPPage -class Comment(object): - def __init__(self, article, div, reply_id): - self.browser = article.browser - self.id = '' - self.reply_id = reply_id +class Content(object): + TAGGABLE = False + + def __init__(self, browser): + self.browser = browser + self.url = u'' + self.id = u'' self.title = u'' self.author = u'' - self.username = None - self.date = None + self.username = u'' self.body = u'' - self.signature = u'' + self.date = None self.score = 0 - self.url = u'' self.comments = [] + self.relevance_url = None + self.relevance_token = None + + def is_taggable(self): + return False + +class Comment(Content): + def __init__(self, article, div, reply_id): + Content.__init__(self, article.browser) + self.reply_id = reply_id + self.signature = u'' self.id = div.attrib['id'].split('-')[1] self.url = '%s#%s' % (article.url, div.attrib['id']) @@ -67,10 +78,7 @@ class Comment(object): self.score = int(select(div.find('p'), 'span.score', 1).text) forms = select(div.find('footer'), 'form.button_to') - if len(forms) == 0: - self.relevance_url = None - self.relevance_token = None - else: + if len(forms) > 0: self.relevance_url = forms[0].attrib['action'].rstrip('for').rstrip('against') self.relevance_token = select(forms[0], 'input[name=authenticity_token]', 1).attrib['value'] @@ -89,16 +97,13 @@ class Comment(object): def __repr__(self): return u"" % (self.id, self.author, self.title) -class Article(object): +class Article(Content): + TAGGABLE = True + def __init__(self, browser, url, tree): - self.browser = browser + Content.__init__(self, browser) self.url = url self.id = url2id(self.url) - self.title = None - self.author = None - self.body = None - self.date = None - self.comments = [] if tree is None: return @@ -106,13 +111,23 @@ class Article(object): header = tree.find('header') self.title = u' — '.join([a.text for a in header.find('h1').findall('a')]) try: - self.author = select(header, 'a[rel=author]', 1).text + a = select(header, 'a[rel=author]', 1) except SelectElementException: self.author = 'Anonyme' + self.username = None + else: + self.author = unicode(a.text) + self.username = unicode(a.attrib['href'].split('/')[2]) self.body = self.browser.parser.tostring(select(tree, 'div.content', 1)) self.date = datetime.strptime(select(header, 'time', 1).attrib['datetime'].split('+')[0], '%Y-%m-%dT%H:%M:%S') self.date = local2utc(self.date) + forms = select(tree.find('footer'), 'form.button_to') + if len(forms) > 0: + self.relevance_url = forms[0].attrib['action'].rstrip('for').rstrip('against') + self.relevance_token = select(forms[0], 'input[name=authenticity_token]', 1).attrib['value'] + + self.score = int(select(tree, 'div.figures figure.score', 1).text) def append_comment(self, comment): self.comments.append(comment) @@ -123,9 +138,6 @@ class Article(object): for c in comment.iter_all_comments(): yield c - def parse_part2(self, div): - self.part2 = self.browser.parser.tostring(div) - class CommentPage(DLFPPage): def get_comment(self): article = Article(self.browser, self.url, None) @@ -135,6 +147,9 @@ class ContentPage(DLFPPage): def on_loaded(self): self.article = None + def is_taggable(self): + return True + def get_comment(self, id): article = Article(self.browser, self.url, None) try: