Fix bug bad characters in titles
The parser returns duble encoded unicode titles we have to convert to utf-8
This commit is contained in:
parent
d5460c4f8c
commit
4b9d48a391
1 changed files with 2 additions and 1 deletions
|
|
@ -48,7 +48,8 @@ class SearchPage(BasePage):
|
|||
video.thumbnail = BaseImage(u'http://boutique.ina.fr%s' % url)
|
||||
video.thumbnail.url = video.thumbnail.id
|
||||
|
||||
video.title = unicode(self.parser.select(li, 'p.titre', 1).text)
|
||||
# The title is poorly encoded is the source, we have to encode/decode it again
|
||||
video.title = unicode(self.parser.select(li, 'p.titre', 1).text).encode('raw_unicode_escape').decode('utf8')
|
||||
|
||||
date = self.parser.select(li, 'p.date', 1).text
|
||||
day, month, year = [int(s) for s in date.split('/')]
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue