fix parsing of comments when templeet sucks, and display comments URL in signatures
This commit is contained in:
parent
5a33463fb9
commit
d1f805145d
2 changed files with 12 additions and 4 deletions
|
|
@ -110,7 +110,8 @@ class DLFPBackend(BaseBackend, ICapMessages, ICapMessagesPost):
|
||||||
date=com.date,
|
date=com.date,
|
||||||
parent=parent,
|
parent=parent,
|
||||||
content=com.body,
|
content=com.body,
|
||||||
signature='Score: %d' % com.score,
|
signature='<br />'.join(['Score: %d' % com.score,
|
||||||
|
'URL: %s' % com.url]),
|
||||||
children=[],
|
children=[],
|
||||||
flags=flags)
|
flags=flags)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -34,27 +34,34 @@ class Comment(object):
|
||||||
self.date = None
|
self.date = None
|
||||||
self.body = u''
|
self.body = u''
|
||||||
self.score = 0
|
self.score = 0
|
||||||
|
self.url = u''
|
||||||
self.comments = []
|
self.comments = []
|
||||||
|
|
||||||
for sub in div.getchildren():
|
for sub in div.getchildren():
|
||||||
if sub.tag == 'a':
|
if sub.tag == 'a':
|
||||||
self.id = sub.attrib['name']
|
self.id = sub.attrib['name']
|
||||||
|
self.url = u'https://linuxfr.org/comments/%s.html#%s' % (self.id, self.id)
|
||||||
elif sub.tag == 'h1':
|
elif sub.tag == 'h1':
|
||||||
try:
|
try:
|
||||||
self.title = sub.find('b').text
|
self.title = sub.find('b').text
|
||||||
except UnicodeError:
|
except UnicodeError:
|
||||||
warning('Bad encoded title, but DLFP sucks')
|
warning('Bad encoded title, but DLFP sucks')
|
||||||
elif sub.tag == 'div' and sub.attrib.get('class', '').startswith('comment'):
|
elif sub.tag == 'div' and sub.attrib.get('class', '').startswith('comment'):
|
||||||
self.author = sub.find('a').text
|
self.author = sub.find('a').text if sub.find('a') is not None else 'Unknown'
|
||||||
self.date = self.parse_date(sub.find('i').tail)
|
self.date = self.parse_date(sub.find('i').tail)
|
||||||
self.score = int(sub.findall('i')[1].find('span').text)
|
self.score = int(sub.findall('i')[-1].find('span').text)
|
||||||
self.body = self.browser.parser.tostring(sub.find('p'))
|
self.body = self.browser.parser.tostring(sub.find('p'))
|
||||||
elif sub.attrib.get('class', '') == 'commentsul':
|
elif sub.attrib.get('class', '') == 'commentsul':
|
||||||
comment = Comment(self.browser, sub.find('li'), self.id)
|
comment = Comment(self.browser, sub.find('li'), self.id)
|
||||||
self.comments.append(comment)
|
self.comments.append(comment)
|
||||||
|
|
||||||
def parse_date(self, date_s):
|
def parse_date(self, date_s):
|
||||||
return local2utc(datetime.strptime(date_s.strip().encode('utf-8'), u'le %d/%m/%Y \xe0 %H:%M.'.encode('utf-8')))
|
date_s = date_s.strip().encode('utf-8')
|
||||||
|
if not date_s:
|
||||||
|
date = datetime.now()
|
||||||
|
else:
|
||||||
|
date = datetime.strptime(date_s, u'le %d/%m/%Y \xe0 %H:%M.'.encode('utf-8'))
|
||||||
|
return local2utc(date)
|
||||||
|
|
||||||
def iter_all_comments(self):
|
def iter_all_comments(self):
|
||||||
for comment in self.comments:
|
for comment in self.comments:
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue