new backend 'phpbb'

2011-08-24 16:25:17 +02:00 · 2011-08-24 16:25:17 +02:00 · 2184951c4a
commit 2184951c4a
parent 8564f52898
8 changed files with 612 additions and 0 deletions
--- a/weboob/backends/phpbb/pages/forum.py
+++ b/weboob/backends/phpbb/pages/forum.py
@ -0,0 +1,134 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2010-2011  Romain Bignon
+#
+# This file is part of weboob.
+#
+# weboob is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# weboob is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with weboob. If not, see <http://www.gnu.org/licenses/>.
+
+
+from urlparse import urlsplit, parse_qs
+from datetime import datetime
+
+from .index import PhpBBPage
+from ..tools import parse_date
+
+
+__all__ = ['Link', 'ForumPage', 'TopicPage']
+
+
+class Link(object):
+    (FORUM,
+     TOPIC) = xrange(2)
+
+    def __init__(self, type, url):
+        self.type = type
+        self.url = url
+        self.title = u''
+        self.date = None
+        self.nb_messages = 0
+
+class ForumPage(PhpBBPage):
+    def iter_links(self):
+        for li in self.parser.select(self.document.getroot(), 'ul.forums li.row'):
+            title = li.cssselect('a.forumtitle')[0]
+            link = Link(Link.FORUM, title.attrib['href'])
+            link.title = title.text.strip()
+            yield link
+
+        for li in self.parser.select(self.document.getroot(), 'ul.topics li.row'):
+            title = li.cssselect('a.topictitle')[0]
+            link = Link(Link.TOPIC, title.attrib['href'])
+            link.title = title.text.strip()
+            link.date = parse_date(li.find('dl').find('dt').findall('a')[-1].tail.strip(u'» \r\n'))
+            # it only lists number of answers, so we add 1.
+            link.nb_messages = int(li.cssselect('dd.posts')[0].text.strip()) + 1
+            yield link
+
+class Post(object):
+    def __init__(self, topic, id):
+        self.id = int(id)
+        self.topic = topic
+        self.title = u''
+        self.author = u''
+        self.date = None
+        self.content = u''
+        self.signature = u''
+        self.parent = 0
+
+class TopicPage(PhpBBPage):
+    def on_loaded(self):
+        div = self.document.getroot().cssselect('div.pagination')[0]
+        strongs = div.cssselect('strong')
+        self.cur_page = int(strongs[0].text.strip())
+        self.tot_pages = int(strongs[1].text.strip())
+
+        v = urlsplit(self.url)
+        args = parse_qs(v.query)
+        self.topic_id = int(args['t'][0])
+
+        nav = self.parser.select(self.document.getroot(), 'li.icon-home', 1)
+        self.forum_title = u''
+        for a in nav.findall('a')[1:]:
+            text = a.text.strip()
+            if len(text) >= 20:
+                text = text[:20] + u'…'
+            self.forum_title = '[%s]' % text
+
+    def next_page_url(self):
+        return self.document.getroot().cssselect('a.right-box')[0].attrib['href']
+
+    def prev_page_url(self):
+        return self.document.getroot().cssselect('a.left-box')[0].attrib['href']
+
+    def iter_posts(self):
+        for div in self.parser.select(self.document.getroot(), 'div.post'):
+            yield self._get_post(div)
+
+    def riter_posts(self):
+        for div in reversed(self.parser.select(self.document.getroot(), 'div.post')):
+            yield self._get_post(div)
+
+    def get_post(self, id):
+        parent = 0
+        for div in self.parser.select(self.document.getroot(), 'div.post'):
+            if div.attrib['id'] == 'p%d' % id:
+                post = self._get_post(div)
+                post.parent = parent
+                return post
+            else:
+                parent = int(div.attrib['id'][1:])
+
+    def _get_post(self, div):
+        body = div.cssselect('div.postbody')[0]
+        profile = div.cssselect('dl.postprofile')[0]
+
+        id = div.attrib['id'][1:]
+        post = Post(self.topic_id, id)
+        post.title = '%s %s' % (self.forum_title, body.cssselect('h3 a')[0].text.strip())
+        post.author = profile.cssselect('dt a')[-1].text.strip()
+        post.date = parse_date(body.cssselect('p.author')[0].find('strong').tail.strip(u'» \n\r'))
+        post.content = self.parser.tostring(body.cssselect('div.content')[0])
+
+        signature = body.cssselect('div.signature')
+        if len(signature) > 0:
+            post.signature = self.parser.tostring(signature[0])
+        return post
+
+    def get_last_post_id(self):
+        id = 0
+        for div in self.parser.select(self.document.getroot(), 'div.post'):
+            id = int(div.attrib['id'][1:])
+        return id
+