Fix bug bad characters in titles

The parser returns duble encoded unicode titles we have to convert to utf-8
This commit is contained in:
Vincent Texier 2014-06-19 17:56:38 +02:00 committed by Florent
commit 4b9d48a391

View file

@ -48,7 +48,8 @@ class SearchPage(BasePage):
video.thumbnail = BaseImage(u'http://boutique.ina.fr%s' % url)
video.thumbnail.url = video.thumbnail.id
video.title = unicode(self.parser.select(li, 'p.titre', 1).text)
# The title is poorly encoded is the source, we have to encode/decode it again
video.title = unicode(self.parser.select(li, 'p.titre', 1).text).encode('raw_unicode_escape').decode('utf8')
date = self.parser.select(li, 'p.date', 1).text
day, month, year = [int(s) for s in date.split('/')]