support wiki and forums
This commit is contained in:
parent
e5d40200a7
commit
4a864a97af
3 changed files with 25 additions and 12 deletions
|
|
@ -34,7 +34,9 @@ class DLFP(BaseBrowser):
|
|||
PAGES = {'https://linuxfr.org/?': IndexPage,
|
||||
'https://linuxfr.org/login.html': LoginPage,
|
||||
'https://linuxfr.org/news/[^\.]+': ContentPage,
|
||||
'https://linuxfr.org/wiki/[^\.]+': ContentPage,
|
||||
'https://linuxfr.org/users/[\w\-_]+/journaux/[^\.]+': ContentPage,
|
||||
'https://linuxfr.org/forums/[\w\-_]+/posts/[^\.]+': ContentPage,
|
||||
'https://linuxfr.org/nodes/(\d+)/comments/(\d+)$': CommentPage,
|
||||
'https://linuxfr.org/nodes/(\d+)/comments/nouveau': NewCommentPage,
|
||||
'https://linuxfr.org/nodes/(\d+)/comments$': NodePage,
|
||||
|
|
|
|||
|
|
@ -119,9 +119,12 @@ class Article(Content):
|
|||
self.author = unicode(a.text)
|
||||
self.username = unicode(a.attrib['href'].split('/')[2])
|
||||
self.body = self.browser.parser.tostring(select(tree, 'div.content', 1))
|
||||
self.date = datetime.strptime(select(header, 'time', 1).attrib['datetime'].split('+')[0],
|
||||
'%Y-%m-%dT%H:%M:%S')
|
||||
self.date = local2utc(self.date)
|
||||
try:
|
||||
self.date = datetime.strptime(select(header, 'time', 1).attrib['datetime'].split('+')[0],
|
||||
'%Y-%m-%dT%H:%M:%S')
|
||||
self.date = local2utc(self.date)
|
||||
except SelectElementException:
|
||||
pass
|
||||
forms = select(tree.find('footer'), 'form.button_to')
|
||||
if len(forms) > 0:
|
||||
self.relevance_url = forms[0].attrib['action'].rstrip('for').rstrip('against')
|
||||
|
|
|
|||
|
|
@ -22,6 +22,8 @@ RSSID_RE = re.compile('tag:.*:(\w)\w+/(\d+)')
|
|||
ID2URL_RE = re.compile('^(\w)([\w\-_]*)\.([^\.]+)$')
|
||||
URL2ID_DIARY_RE = re.compile('.*/users/([\w\-_]+)/journaux/([^\.]+)')
|
||||
URL2ID_NEWSPAPER_RE = re.compile('.*/news/(.+)')
|
||||
URL2ID_WIKI_RE = re.compile('.*/wiki/(.+)')
|
||||
URL2ID_FORUM_RE = re.compile('.*/forums/([\w\-_]+)/posts/([^\.]+)')
|
||||
|
||||
def rssid(entry):
|
||||
m = RSSID_RE.match(entry.id)
|
||||
|
|
@ -32,6 +34,11 @@ def rssid(entry):
|
|||
if not mm:
|
||||
return
|
||||
return 'D%s.%s' % (mm.group(1), m.group(2))
|
||||
if m.group(1) == 'F':
|
||||
mm = URL2ID_FORUM_RE.match(entry.link)
|
||||
if not mm:
|
||||
return
|
||||
return 'F%s.%s' % (mm.group(1), m.group(2))
|
||||
return '%s.%s' % (m.group(1), m.group(2))
|
||||
|
||||
def id2url(id):
|
||||
|
|
@ -43,6 +50,10 @@ def id2url(id):
|
|||
return '/news/%s' % m.group(3)
|
||||
if m.group(1) == 'D':
|
||||
return '/users/%s/journaux/%s' % (m.group(2), m.group(3))
|
||||
if m.group(1) == 'W':
|
||||
return '/wiki/%s' % m.group(3)
|
||||
if m.group(1) == 'F':
|
||||
return '/forums/%s/posts/%s' % (m.group(2), m.group(3))
|
||||
|
||||
def url2id(url):
|
||||
m = URL2ID_NEWSPAPER_RE.match(url)
|
||||
|
|
@ -51,17 +62,14 @@ def url2id(url):
|
|||
m = URL2ID_DIARY_RE.match(url)
|
||||
if m:
|
||||
return 'D%s.%s' % (m.group(1), m.group(2))
|
||||
m = URL2ID_WIKI_RE.match(url)
|
||||
if m:
|
||||
return 'W.%s' % (m.group(1))
|
||||
m = URL2ID_FORUM_RE.match(url)
|
||||
if m:
|
||||
return 'F%s.%s' % (m.group(1), m.group(2))
|
||||
|
||||
def id2threadid(id):
|
||||
m = ID2URL_RE.match(id)
|
||||
if m:
|
||||
return m.group(3)
|
||||
|
||||
def id2contenttype(_id):
|
||||
if not _id:
|
||||
return None
|
||||
if _id[0] == 'N':
|
||||
return 1
|
||||
if _id[0] == 'D':
|
||||
return 5
|
||||
return None
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue