From 65a8470335e26d4fb059084bfc69f30e0c632b44 Mon Sep 17 00:00:00 2001 From: Romain Bignon Date: Mon, 21 Mar 2011 11:28:42 +0100 Subject: [PATCH] factorize --- weboob/backends/dlfp/tools.py | 88 ++++++++++++++++------------------- 1 file changed, 40 insertions(+), 48 deletions(-) diff --git a/weboob/backends/dlfp/tools.py b/weboob/backends/dlfp/tools.py index 7433278e..a7578d9c 100644 --- a/weboob/backends/dlfp/tools.py +++ b/weboob/backends/dlfp/tools.py @@ -18,68 +18,60 @@ import re -RSSID_RE = re.compile('tag:.*:(\w)\w+/(\d+)') +RSSID_RE = re.compile('tag:.*:(\w+)/(\d+)') ID2URL_RE = re.compile('^(\w)([\w\-_]*)\.([^ \.]+)$') -URL2ID_DIARY_RE = re.compile('.*/users/([\w\-_]+)/journaux/([^\.]+)') -URL2ID_NEWSPAPER_RE = re.compile('.*/news/(.+)') -URL2ID_WIKI_RE = re.compile('.*/wiki/([^ /]+)') -URL2ID_SUIVI_RE = re.compile('.*/suivi/([^ /]+)') -URL2ID_SONDAGE_RE = re.compile('.*/sondages/([^ /]+)') -URL2ID_FORUM_RE = re.compile('.*/forums/([\w\-_]+)/posts/([^\.]+)') + +REGEXPS = {'/users/%s/journaux/%s': 'D%s.%s', + '/news/%s': 'N.%s', + '/wiki/%s': 'W.%s', + '/suivi/%s': 'T.%s', + '/sondages/%s': 'P.%s', + '/forums/%s/posts/%s': 'B.%s', + } + +def f2re(f): + return '.*' + f.replace('%s', '([^ /]+)') def rssid(entry): m = RSSID_RE.match(entry.id) if not m: return None - if m.group(1) == 'D': - mm = URL2ID_DIARY_RE.match(entry.link) - if not mm: - return - return 'D%s.%s' % (mm.group(1), m.group(2)) - if m.group(1) == 'F': - mm = URL2ID_FORUM_RE.match(entry.link) - if not mm: - return - return 'F%s.%s' % (mm.group(1), m.group(2)) - return '%s.%s' % (m.group(1), m.group(2)) + + ind = m.group(1).replace('Post', 'Board')[0] + + for url_re, id_re in REGEXPS.iteritems(): + if id_re[0] != ind: + continue + + if id_re.count('%s') == 2: + mm = re.match(f2re(url_re), entry.link) + if not mm: + return + return '%s%s.%s' % (ind, mm.group(1), m.group(2)) + else: + return '%s.%s' % (ind, m.group(2)) def id2url(id): m = ID2URL_RE.match(id) if not m: return None - if m.group(1) == 'N': - return '/news/%s' % m.group(3) - if m.group(1) == 'D': - return '/users/%s/journaux/%s' % (m.group(2), m.group(3)) - if m.group(1) == 'W': - return '/wiki/%s' % m.group(3) - if m.group(1) == 'F': - return '/forums/%s/posts/%s' % (m.group(2), m.group(3)) - if m.group(1) == 'S': - return '/suivi/%s' % m.group(3) - if m.group(1) == 's': - return '/sondages/%s?results=1' % m.group(3) + for url_re, id_re in REGEXPS.iteritems(): + if id_re[0] != m.group(1): + continue + + if id_re.count('%s') == 2: + return url_re % (m.group(2), m.group(3)) + else: + return url_re % m.group(3) def url2id(url): - m = URL2ID_NEWSPAPER_RE.match(url) - if m: - return 'N.%s' % (m.group(1)) - m = URL2ID_DIARY_RE.match(url) - if m: - return 'D%s.%s' % (m.group(1), m.group(2)) - m = URL2ID_WIKI_RE.match(url) - if m: - return 'W.%s' % (m.group(1)) - m = URL2ID_FORUM_RE.match(url) - if m: - return 'F%s.%s' % (m.group(1), m.group(2)) - m = URL2ID_SUIVI_RE.match(url) - if m: - return 'S.%s' % (m.group(1)) - m = URL2ID_SONDAGE_RE.match(url) - if m: - return 's.%s' % (m.group(1)) + for url_re, id_re in REGEXPS.iteritems(): + m = re.match(f2re(url_re), url) + if not m: + continue + + return id_re % m.groups() def id2threadid(id): m = ID2URL_RE.match(id)