support wiki and forums
This commit is contained in:
parent
e5d40200a7
commit
4a864a97af
3 changed files with 25 additions and 12 deletions
|
|
@ -34,7 +34,9 @@ class DLFP(BaseBrowser):
|
||||||
PAGES = {'https://linuxfr.org/?': IndexPage,
|
PAGES = {'https://linuxfr.org/?': IndexPage,
|
||||||
'https://linuxfr.org/login.html': LoginPage,
|
'https://linuxfr.org/login.html': LoginPage,
|
||||||
'https://linuxfr.org/news/[^\.]+': ContentPage,
|
'https://linuxfr.org/news/[^\.]+': ContentPage,
|
||||||
|
'https://linuxfr.org/wiki/[^\.]+': ContentPage,
|
||||||
'https://linuxfr.org/users/[\w\-_]+/journaux/[^\.]+': ContentPage,
|
'https://linuxfr.org/users/[\w\-_]+/journaux/[^\.]+': ContentPage,
|
||||||
|
'https://linuxfr.org/forums/[\w\-_]+/posts/[^\.]+': ContentPage,
|
||||||
'https://linuxfr.org/nodes/(\d+)/comments/(\d+)$': CommentPage,
|
'https://linuxfr.org/nodes/(\d+)/comments/(\d+)$': CommentPage,
|
||||||
'https://linuxfr.org/nodes/(\d+)/comments/nouveau': NewCommentPage,
|
'https://linuxfr.org/nodes/(\d+)/comments/nouveau': NewCommentPage,
|
||||||
'https://linuxfr.org/nodes/(\d+)/comments$': NodePage,
|
'https://linuxfr.org/nodes/(\d+)/comments$': NodePage,
|
||||||
|
|
|
||||||
|
|
@ -119,9 +119,12 @@ class Article(Content):
|
||||||
self.author = unicode(a.text)
|
self.author = unicode(a.text)
|
||||||
self.username = unicode(a.attrib['href'].split('/')[2])
|
self.username = unicode(a.attrib['href'].split('/')[2])
|
||||||
self.body = self.browser.parser.tostring(select(tree, 'div.content', 1))
|
self.body = self.browser.parser.tostring(select(tree, 'div.content', 1))
|
||||||
|
try:
|
||||||
self.date = datetime.strptime(select(header, 'time', 1).attrib['datetime'].split('+')[0],
|
self.date = datetime.strptime(select(header, 'time', 1).attrib['datetime'].split('+')[0],
|
||||||
'%Y-%m-%dT%H:%M:%S')
|
'%Y-%m-%dT%H:%M:%S')
|
||||||
self.date = local2utc(self.date)
|
self.date = local2utc(self.date)
|
||||||
|
except SelectElementException:
|
||||||
|
pass
|
||||||
forms = select(tree.find('footer'), 'form.button_to')
|
forms = select(tree.find('footer'), 'form.button_to')
|
||||||
if len(forms) > 0:
|
if len(forms) > 0:
|
||||||
self.relevance_url = forms[0].attrib['action'].rstrip('for').rstrip('against')
|
self.relevance_url = forms[0].attrib['action'].rstrip('for').rstrip('against')
|
||||||
|
|
|
||||||
|
|
@ -22,6 +22,8 @@ RSSID_RE = re.compile('tag:.*:(\w)\w+/(\d+)')
|
||||||
ID2URL_RE = re.compile('^(\w)([\w\-_]*)\.([^\.]+)$')
|
ID2URL_RE = re.compile('^(\w)([\w\-_]*)\.([^\.]+)$')
|
||||||
URL2ID_DIARY_RE = re.compile('.*/users/([\w\-_]+)/journaux/([^\.]+)')
|
URL2ID_DIARY_RE = re.compile('.*/users/([\w\-_]+)/journaux/([^\.]+)')
|
||||||
URL2ID_NEWSPAPER_RE = re.compile('.*/news/(.+)')
|
URL2ID_NEWSPAPER_RE = re.compile('.*/news/(.+)')
|
||||||
|
URL2ID_WIKI_RE = re.compile('.*/wiki/(.+)')
|
||||||
|
URL2ID_FORUM_RE = re.compile('.*/forums/([\w\-_]+)/posts/([^\.]+)')
|
||||||
|
|
||||||
def rssid(entry):
|
def rssid(entry):
|
||||||
m = RSSID_RE.match(entry.id)
|
m = RSSID_RE.match(entry.id)
|
||||||
|
|
@ -32,6 +34,11 @@ def rssid(entry):
|
||||||
if not mm:
|
if not mm:
|
||||||
return
|
return
|
||||||
return 'D%s.%s' % (mm.group(1), m.group(2))
|
return 'D%s.%s' % (mm.group(1), m.group(2))
|
||||||
|
if m.group(1) == 'F':
|
||||||
|
mm = URL2ID_FORUM_RE.match(entry.link)
|
||||||
|
if not mm:
|
||||||
|
return
|
||||||
|
return 'F%s.%s' % (mm.group(1), m.group(2))
|
||||||
return '%s.%s' % (m.group(1), m.group(2))
|
return '%s.%s' % (m.group(1), m.group(2))
|
||||||
|
|
||||||
def id2url(id):
|
def id2url(id):
|
||||||
|
|
@ -43,6 +50,10 @@ def id2url(id):
|
||||||
return '/news/%s' % m.group(3)
|
return '/news/%s' % m.group(3)
|
||||||
if m.group(1) == 'D':
|
if m.group(1) == 'D':
|
||||||
return '/users/%s/journaux/%s' % (m.group(2), m.group(3))
|
return '/users/%s/journaux/%s' % (m.group(2), m.group(3))
|
||||||
|
if m.group(1) == 'W':
|
||||||
|
return '/wiki/%s' % m.group(3)
|
||||||
|
if m.group(1) == 'F':
|
||||||
|
return '/forums/%s/posts/%s' % (m.group(2), m.group(3))
|
||||||
|
|
||||||
def url2id(url):
|
def url2id(url):
|
||||||
m = URL2ID_NEWSPAPER_RE.match(url)
|
m = URL2ID_NEWSPAPER_RE.match(url)
|
||||||
|
|
@ -51,17 +62,14 @@ def url2id(url):
|
||||||
m = URL2ID_DIARY_RE.match(url)
|
m = URL2ID_DIARY_RE.match(url)
|
||||||
if m:
|
if m:
|
||||||
return 'D%s.%s' % (m.group(1), m.group(2))
|
return 'D%s.%s' % (m.group(1), m.group(2))
|
||||||
|
m = URL2ID_WIKI_RE.match(url)
|
||||||
|
if m:
|
||||||
|
return 'W.%s' % (m.group(1))
|
||||||
|
m = URL2ID_FORUM_RE.match(url)
|
||||||
|
if m:
|
||||||
|
return 'F%s.%s' % (m.group(1), m.group(2))
|
||||||
|
|
||||||
def id2threadid(id):
|
def id2threadid(id):
|
||||||
m = ID2URL_RE.match(id)
|
m = ID2URL_RE.match(id)
|
||||||
if m:
|
if m:
|
||||||
return m.group(3)
|
return m.group(3)
|
||||||
|
|
||||||
def id2contenttype(_id):
|
|
||||||
if not _id:
|
|
||||||
return None
|
|
||||||
if _id[0] == 'N':
|
|
||||||
return 1
|
|
||||||
if _id[0] == 'D':
|
|
||||||
return 5
|
|
||||||
return None
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue