From 4a864a97af60bb5c84a0b085d47046e09111d60a Mon Sep 17 00:00:00 2001 From: Romain Bignon Date: Sat, 19 Mar 2011 09:27:42 +0100 Subject: [PATCH] support wiki and forums --- weboob/backends/dlfp/browser.py | 2 ++ weboob/backends/dlfp/pages/news.py | 9 ++++++--- weboob/backends/dlfp/tools.py | 26 +++++++++++++++++--------- 3 files changed, 25 insertions(+), 12 deletions(-) diff --git a/weboob/backends/dlfp/browser.py b/weboob/backends/dlfp/browser.py index ddf4cd61..95ffc744 100644 --- a/weboob/backends/dlfp/browser.py +++ b/weboob/backends/dlfp/browser.py @@ -34,7 +34,9 @@ class DLFP(BaseBrowser): PAGES = {'https://linuxfr.org/?': IndexPage, 'https://linuxfr.org/login.html': LoginPage, 'https://linuxfr.org/news/[^\.]+': ContentPage, + 'https://linuxfr.org/wiki/[^\.]+': ContentPage, 'https://linuxfr.org/users/[\w\-_]+/journaux/[^\.]+': ContentPage, + 'https://linuxfr.org/forums/[\w\-_]+/posts/[^\.]+': ContentPage, 'https://linuxfr.org/nodes/(\d+)/comments/(\d+)$': CommentPage, 'https://linuxfr.org/nodes/(\d+)/comments/nouveau': NewCommentPage, 'https://linuxfr.org/nodes/(\d+)/comments$': NodePage, diff --git a/weboob/backends/dlfp/pages/news.py b/weboob/backends/dlfp/pages/news.py index 91ae098e..8ebd6c2b 100644 --- a/weboob/backends/dlfp/pages/news.py +++ b/weboob/backends/dlfp/pages/news.py @@ -119,9 +119,12 @@ class Article(Content): self.author = unicode(a.text) self.username = unicode(a.attrib['href'].split('/')[2]) self.body = self.browser.parser.tostring(select(tree, 'div.content', 1)) - self.date = datetime.strptime(select(header, 'time', 1).attrib['datetime'].split('+')[0], - '%Y-%m-%dT%H:%M:%S') - self.date = local2utc(self.date) + try: + self.date = datetime.strptime(select(header, 'time', 1).attrib['datetime'].split('+')[0], + '%Y-%m-%dT%H:%M:%S') + self.date = local2utc(self.date) + except SelectElementException: + pass forms = select(tree.find('footer'), 'form.button_to') if len(forms) > 0: self.relevance_url = forms[0].attrib['action'].rstrip('for').rstrip('against') diff --git a/weboob/backends/dlfp/tools.py b/weboob/backends/dlfp/tools.py index 98f55706..912759ef 100644 --- a/weboob/backends/dlfp/tools.py +++ b/weboob/backends/dlfp/tools.py @@ -22,6 +22,8 @@ RSSID_RE = re.compile('tag:.*:(\w)\w+/(\d+)') ID2URL_RE = re.compile('^(\w)([\w\-_]*)\.([^\.]+)$') URL2ID_DIARY_RE = re.compile('.*/users/([\w\-_]+)/journaux/([^\.]+)') URL2ID_NEWSPAPER_RE = re.compile('.*/news/(.+)') +URL2ID_WIKI_RE = re.compile('.*/wiki/(.+)') +URL2ID_FORUM_RE = re.compile('.*/forums/([\w\-_]+)/posts/([^\.]+)') def rssid(entry): m = RSSID_RE.match(entry.id) @@ -32,6 +34,11 @@ def rssid(entry): if not mm: return return 'D%s.%s' % (mm.group(1), m.group(2)) + if m.group(1) == 'F': + mm = URL2ID_FORUM_RE.match(entry.link) + if not mm: + return + return 'F%s.%s' % (mm.group(1), m.group(2)) return '%s.%s' % (m.group(1), m.group(2)) def id2url(id): @@ -43,6 +50,10 @@ def id2url(id): return '/news/%s' % m.group(3) if m.group(1) == 'D': return '/users/%s/journaux/%s' % (m.group(2), m.group(3)) + if m.group(1) == 'W': + return '/wiki/%s' % m.group(3) + if m.group(1) == 'F': + return '/forums/%s/posts/%s' % (m.group(2), m.group(3)) def url2id(url): m = URL2ID_NEWSPAPER_RE.match(url) @@ -51,17 +62,14 @@ def url2id(url): m = URL2ID_DIARY_RE.match(url) if m: return 'D%s.%s' % (m.group(1), m.group(2)) + m = URL2ID_WIKI_RE.match(url) + if m: + return 'W.%s' % (m.group(1)) + m = URL2ID_FORUM_RE.match(url) + if m: + return 'F%s.%s' % (m.group(1), m.group(2)) def id2threadid(id): m = ID2URL_RE.match(id) if m: return m.group(3) - -def id2contenttype(_id): - if not _id: - return None - if _id[0] == 'N': - return 1 - if _id[0] == 'D': - return 5 - return None