Comment and Article are now children of Content

2011-03-18 10:46:44 +01:00 · 2011-03-18 10:46:44 +01:00 · 57cbc29883
commit 57cbc29883
parent 308153a7c5
2 changed files with 56 additions and 41 deletions
--- a/weboob/backends/dlfp/browser.py
+++ b/weboob/backends/dlfp/browser.py
@ -17,6 +17,7 @@


 import urllib
+import re

 from weboob.tools.browser import BaseBrowser, BrowserHTTPError, BrowserIncorrectPassword
 from weboob.capabilities.messages import CantSendMessage
@ -47,6 +48,9 @@ class DLFP(BaseBrowser):
        return self.location('https://linuxfr.org')

    def parse_id(self, _id):
+        if re.match('^https?://linuxfr.org/nodes/\d+/comments/\d+$', _id):
+            return _id, None
+
        url = id2url(_id)
        if url is None:
            if url2id(_id) is not None:
@ -64,10 +68,19 @@ class DLFP(BaseBrowser):
            return None

        self.location(url)
-        assert self.is_on_page(ContentPage)
        self.page.url = self.absurl(url)
-        content = self.page.get_article()
-        content.id = _id
+
+        if self.is_on_page(CommentPage):
+            content = self.page.get_comment()
+        elif self.is_on_page(ContentPage):
+            m = re.match('.*#comment-(\d+)$', url)
+            if m:
+                content = self.page.get_comment(int(m.group(1)))
+            else:
+                content = self.page.get_article()
+
+        if _id is not None:
+            content.id = _id
        return content

    def _is_comment_submit_form(self, form):
@ -118,19 +131,6 @@ class DLFP(BaseBrowser):
    def close_session(self):
        self.openurl('/compte/deconnexion')

-    def get_comment(self, url):
-        self.location(url)
-
-        comment = None
-        self.page.url = self.absurl(url)
-        if self.is_on_page(CommentPage):
-            comment = self.page.get_comment()
-        elif self.is_on_page(ContentPage):
-            ignored, id = url.rsplit('#comment-', 1)
-            comment = self.page.get_comment(int(id))
-
-        return comment
-
    def plusse(self, url):
        return self.relevance(url, 'for')

@ -138,7 +138,7 @@ class DLFP(BaseBrowser):
        return self.relevance(url, 'against')

    def relevance(self, url, what):
-        comment = self.get_comment(url)
+        comment = self.get_content(url)

        if comment is None:
            raise ValueError('The given URL isn\'t a comment.')
--- a/weboob/backends/dlfp/pages/news.py
+++ b/weboob/backends/dlfp/pages/news.py
@ -24,20 +24,31 @@ from weboob.backends.dlfp.tools import url2id

 from .index import DLFPPage

-class Comment(object):
-    def __init__(self, article, div, reply_id):
-        self.browser = article.browser
-        self.id = ''
-        self.reply_id = reply_id
+class Content(object):
+    TAGGABLE = False
+
+    def __init__(self, browser):
+        self.browser = browser
+        self.url = u''
+        self.id = u''
        self.title = u''
        self.author = u''
-        self.username = None
-        self.date = None
+        self.username = u''
        self.body = u''
-        self.signature = u''
+        self.date = None
        self.score = 0
-        self.url = u''
        self.comments = []
+        self.relevance_url = None
+        self.relevance_token = None
+
+    def is_taggable(self):
+        return False
+
+class Comment(Content):
+    def __init__(self, article, div, reply_id):
+        Content.__init__(self, article.browser)
+        self.reply_id = reply_id
+        self.signature = u''

        self.id = div.attrib['id'].split('-')[1]
        self.url = '%s#%s' % (article.url, div.attrib['id'])
@ -67,10 +78,7 @@ class Comment(object):

        self.score = int(select(div.find('p'), 'span.score', 1).text)
        forms = select(div.find('footer'), 'form.button_to')
-        if len(forms) == 0:
-            self.relevance_url = None
-            self.relevance_token = None
-        else:
+        if len(forms) > 0:
            self.relevance_url = forms[0].attrib['action'].rstrip('for').rstrip('against')
            self.relevance_token = select(forms[0], 'input[name=authenticity_token]', 1).attrib['value']

@ -89,16 +97,13 @@ class Comment(object):
    def __repr__(self):
        return u"<Comment id=%r author=%r title=%r>" % (self.id, self.author, self.title)

-class Article(object):
+class Article(Content):
+    TAGGABLE = True
+
    def __init__(self, browser, url, tree):
-        self.browser = browser
+        Content.__init__(self, browser)
        self.url = url
        self.id = url2id(self.url)
-        self.title = None
-        self.author = None
-        self.body = None
-        self.date = None
-        self.comments = []

        if tree is None:
            return
@ -106,13 +111,23 @@ class Article(object):
        header = tree.find('header')
        self.title = u' — '.join([a.text for a in header.find('h1').findall('a')])
        try:
-            self.author = select(header, 'a[rel=author]', 1).text
+            a = select(header, 'a[rel=author]', 1)
        except SelectElementException:
            self.author = 'Anonyme'
+            self.username = None
+        else:
+            self.author = unicode(a.text)
+            self.username = unicode(a.attrib['href'].split('/')[2])
        self.body = self.browser.parser.tostring(select(tree, 'div.content', 1))
        self.date = datetime.strptime(select(header, 'time', 1).attrib['datetime'].split('+')[0],
                                      '%Y-%m-%dT%H:%M:%S')
        self.date = local2utc(self.date)
+        forms = select(tree.find('footer'), 'form.button_to')
+        if len(forms) > 0:
+            self.relevance_url = forms[0].attrib['action'].rstrip('for').rstrip('against')
+            self.relevance_token = select(forms[0], 'input[name=authenticity_token]', 1).attrib['value']
+
+        self.score = int(select(tree, 'div.figures figure.score', 1).text)

    def append_comment(self, comment):
        self.comments.append(comment)
@ -123,9 +138,6 @@ class Article(object):
            for c in comment.iter_all_comments():
                yield c

-    def parse_part2(self, div):
-        self.part2 = self.browser.parser.tostring(div)
-
 class CommentPage(DLFPPage):
    def get_comment(self):
        article = Article(self.browser, self.url, None)
@ -135,6 +147,9 @@ class ContentPage(DLFPPage):
    def on_loaded(self):
        self.article = None

+    def is_taggable(self):
+        return True
+
    def get_comment(self, id):
        article = Article(self.browser, self.url, None)
        try: