From c0773c1f921739c55469b5eeeacbc6c93efa0d14 Mon Sep 17 00:00:00 2001 From: Romain Bignon Date: Wed, 24 Aug 2011 17:19:37 +0200 Subject: [PATCH] more flexible code --- weboob/backends/phpbb/backend.py | 26 +++++++------ weboob/backends/phpbb/browser.py | 4 +- weboob/backends/phpbb/pages/forum.py | 58 +++++++++++++++++++++++----- weboob/backends/phpbb/tools.py | 1 + 4 files changed, 68 insertions(+), 21 deletions(-) diff --git a/weboob/backends/phpbb/backend.py b/weboob/backends/phpbb/backend.py index ef910307..302c05e4 100644 --- a/weboob/backends/phpbb/backend.py +++ b/weboob/backends/phpbb/backend.py @@ -41,17 +41,21 @@ class PhpBBBackend(BaseBackend, ICapMessages): LICENSE = 'AGPLv3+' DESCRIPTION = "phpBB forum" CONFIG = BackendConfig(Value('url', label='URL of forum', regexp='https?://.*'), - Value('username', label='Username'), - ValueBackendPassword('password', label='Password'), + Value('username', label='Username', default=''), + ValueBackendPassword('password', label='Password', default=''), ValueInt('thread_unread_messages', label='Limit number of unread messages to retrieve for a thread', default=500) ) STORAGE = {'seen': {}} BROWSER = PhpBB def create_default_browser(self): + username = self.config['username'].get() + if len(username) > 0: + password = self.config['password'].get() + else: + password = None return self.create_browser(self.config['url'].get(), - self.config['username'].get(), - self.config['password'].get()) + username, password) #### ICapMessages ############################################## @@ -174,13 +178,13 @@ class PhpBBBackend(BaseBackend, ICapMessages): return self.get_thread(thread) #### ICapMessagesReply ######################################### - #def post_message(self, message): - # assert message.thread + def post_message(self, message): + assert message.thread - # with self.browser: - # return self.browser.post_comment(message.thread.id, - # message.parent.id, - # message.title, - # message.content) + with self.browser: + return self.browser.post_comment(message.thread.id, + message.parent.id, + message.title, + message.content) OBJECTS = {Thread: fill_thread} diff --git a/weboob/backends/phpbb/browser.py b/weboob/backends/phpbb/browser.py index e145c806..82e10ae2 100644 --- a/weboob/backends/phpbb/browser.py +++ b/weboob/backends/phpbb/browser.py @@ -31,8 +31,10 @@ from .tools import id2url, url2id # Browser class PhpBB(BaseBrowser): PAGES = {'https?://.*/index.php': ForumPage, + 'https?://.*/': ForumPage, 'https?://.*/viewforum.php\?f=(\d+)': ForumPage, - 'https?://.*/viewtopic.php\?.*': TopicPage, + 'https?://.*/search.php\?.*': ForumPage, + 'https?://.*/viewtopic.php\?.*': TopicPage, 'https?://.*/ucp.php\?mode=login.*': LoginPage, } diff --git a/weboob/backends/phpbb/pages/forum.py b/weboob/backends/phpbb/pages/forum.py index 1161745c..a36d24d9 100644 --- a/weboob/backends/phpbb/pages/forum.py +++ b/weboob/backends/phpbb/pages/forum.py @@ -19,7 +19,8 @@ from urlparse import urlsplit, parse_qs -from datetime import datetime + +from weboob.tools.browser import BrokenPageError from .index import PhpBBPage from ..tools import parse_date @@ -51,7 +52,16 @@ class ForumPage(PhpBBPage): title = li.cssselect('a.topictitle')[0] link = Link(Link.TOPIC, title.attrib['href']) link.title = title.text.strip() - link.date = parse_date(li.find('dl').find('dt').findall('a')[-1].tail.strip(u'» \r\n')) + for a in li.find('dl').find('dt').findall('a'): + for text in (a.text, a.tail): + if text is None: + continue + try: + link.date = parse_date(text.strip(u'» \r\n')) + except ValueError: + continue + else: + break # it only lists number of answers, so we add 1. link.nb_messages = int(li.cssselect('dd.posts')[0].text.strip()) + 1 yield link @@ -78,13 +88,13 @@ class TopicPage(PhpBBPage): args = parse_qs(v.query) self.topic_id = int(args['t'][0]) - nav = self.parser.select(self.document.getroot(), 'li.icon-home', 1) self.forum_title = u'' - for a in nav.findall('a')[1:]: - text = a.text.strip() + nav = self.parser.select(self.document.getroot(), 'li.icon-home') + if len(nav) > 0: + text = nav[0].findall('a')[-1].text.strip() if len(text) >= 20: text = text[:20] + u'…' - self.forum_title = '[%s]' % text + self.forum_title = '[%s] ' % text def next_page_url(self): return self.document.getroot().cssselect('a.right-box')[0].attrib['href'] @@ -116,9 +126,39 @@ class TopicPage(PhpBBPage): id = div.attrib['id'][1:] post = Post(self.topic_id, id) - post.title = '%s %s' % (self.forum_title, body.cssselect('h3 a')[0].text.strip()) - post.author = profile.cssselect('dt a')[-1].text.strip() - post.date = parse_date(body.cssselect('p.author')[0].find('strong').tail.strip(u'» \n\r')) + + title_tags = body.cssselect('h3 a') + if len(title_tags) == 0: + title_tags = self.document.getroot().cssselect('h2 a') + if len(title_tags) == 0: + title = u'' + self.logger.warning('Unable to parse title') + else: + title = title_tags[0].text.strip() + + post.title = self.forum_title + title + for a in profile.cssselect('dt a'): + if a.text: + post.author = a.text.strip() + + p_tags = body.cssselect('p.author') + if len(p_tags) == 0: + p_tags = body.find('p') + if len(p_tags) == 0: + post.date = None + self.logger.warning('Unable to parse datetime') + else: + p = p_tags[0] + text = p.find('strong') and p.find('strong').tail + if not text: + text = p.text[4:] + + text = text.strip(u'» \n\r') + try: + post.date = parse_date(text) + except ValueError: + self.logger.warning(u'Unable to parse datetime "%s"' % text) + post.content = self.parser.tostring(body.cssselect('div.content')[0]) signature = body.cssselect('div.signature') diff --git a/weboob/backends/phpbb/tools.py b/weboob/backends/phpbb/tools.py index 78ff64cf..8813a5b7 100644 --- a/weboob/backends/phpbb/tools.py +++ b/weboob/backends/phpbb/tools.py @@ -55,5 +55,6 @@ def parse_date(s): .replace(u'Juin', 'Jun') \ .replace(u'Juil', 'Jul') \ .replace(u'Aoû', 'Aug') \ + .replace(u'Ao\xfbt', 'Aug') \ .replace(u'Déc', 'Dec') return local2utc(_parse_dt(s))