diff --git a/modules/dailymotion/pages.py b/modules/dailymotion/pages.py index 5015705e..01b8eccb 100644 --- a/modules/dailymotion/pages.py +++ b/modules/dailymotion/pages.py @@ -45,7 +45,7 @@ class IndexPage(BasePage): video = DailymotionVideo(_id) video.title = unicode(self.parser.select(div, 'h3 a', 1).text).strip() video.author = unicode(self.parser.select(div, 'div.dmpi_user_login', 1).find('a').find('span').text).strip() - video.description = html2text(self.parser.tostring(self.parser.select(div, 'div.dmpi_video_description', 1))).strip() + video.description = html2text(self.parser.tostring(self.parser.select(div, 'div.dmpi_video_description', 1))).strip() or unicode() try: parts = self.parser.select(div, 'div.duration', 1).text.split(':') except BrokenPageError: @@ -68,7 +68,7 @@ class IndexPage(BasePage): url = re.sub('\?\d+', '', url) # use the bigger thumbnail url = url.replace('jpeg_preview_medium.jpg', 'jpeg_preview_large.jpg') - video.thumbnail = Thumbnail(url) + video.thumbnail = Thumbnail(unicode(url)) rating_div = self.parser.select(div, 'div.small_stars', 1) video.rating_max = self.get_rate(rating_div) @@ -95,7 +95,7 @@ class VideoPage(BasePage): video.title = unicode(self.parser.select(div, 'span.title', 1).text).strip() video.author = unicode(self.parser.select(div, 'a.name, span.name', 1).text).strip() try: - video.description = html2text(self.parser.tostring(self.parser.select(div, 'div#video_description', 1))).strip() + video.description = html2text(self.parser.tostring(self.parser.select(div, 'div#video_description', 1))).strip() or unicode() except BrokenPageError: video.description = u'' for script in self.parser.select(self.document.getroot(), 'div.dmco_html'): diff --git a/modules/ehentai/pages.py b/modules/ehentai/pages.py index 8afc7cd2..4b627e16 100644 --- a/modules/ehentai/pages.py +++ b/modules/ehentai/pages.py @@ -96,7 +96,7 @@ class GalleryPage(BasePage): thumbnail_style = self.document.xpath("//div[@class='gdtm']/div/attribute::style")[0] thumbnail_url = re.search(r"background:[^;]+url\((.+?)\)", thumbnail_style).group(1) - gallery.thumbnail = Thumbnail(thumbnail_url) + gallery.thumbnail = Thumbnail(unicode(thumbnail_url)) def _prev_page_link(self): try: diff --git a/modules/francetelevisions/pages.py b/modules/francetelevisions/pages.py index 27ee7d52..9d3db11d 100644 --- a/modules/francetelevisions/pages.py +++ b/modules/francetelevisions/pages.py @@ -61,7 +61,7 @@ class IndexPage(BasePage): minute) url = self.parser.select(div, 'img.illustration', 1).attrib['src'] - video.thumbnail = Thumbnail('http://www.pluzz.fr/%s' % url) + video.thumbnail = Thumbnail(u'http://www.pluzz.fr/%s' % url) yield video diff --git a/modules/francetelevisions/video.py b/modules/francetelevisions/video.py index 199acf40..52d97206 100644 --- a/modules/francetelevisions/video.py +++ b/modules/francetelevisions/video.py @@ -27,7 +27,7 @@ __all__ = ['PluzzVideo'] class PluzzVideo(BaseVideo): def __init__(self, *args, **kwargs): BaseVideo.__init__(self, *args, **kwargs) - self.ext = 'wmv' + self.ext = u'wmv' @classmethod def id2url(cls, _id): diff --git a/modules/ina/pages/search.py b/modules/ina/pages/search.py index 426ff34e..623ccd97 100644 --- a/modules/ina/pages/search.py +++ b/modules/ina/pages/search.py @@ -44,9 +44,9 @@ class SearchPage(BasePage): video = InaVideo('boutique.%s' % id) - video.thumbnail = Thumbnail('http://boutique.ina.fr%s' % li.find('a').find('img').attrib['src']) + video.thumbnail = Thumbnail(u'http://boutique.ina.fr%s' % li.find('a').find('img').attrib['src']) - video.title = self.parser.select(li, 'p.titre', 1).text + video.title = unicode(self.parser.select(li, 'p.titre', 1).text) date = self.parser.select(li, 'p.date', 1).text day, month, year = [int(s) for s in date.split('/')] diff --git a/modules/ina/pages/video.py b/modules/ina/pages/video.py index 37c28050..aa951190 100644 --- a/modules/ina/pages/video.py +++ b/modules/ina/pages/video.py @@ -60,7 +60,7 @@ class BaseVideoPage(BasePage): qs = parse_qs(self.document.getroot().cssselect('param[name="flashvars"]')[0].attrib['value']) s = self.browser.readurl('http://boutique.ina.fr/player/infovideo/id_notice/%s' % qs['id_notice'][0]) s = s[s.find('')+7:s.find('')] - return '%s/pkey/%s' % (s, qs['pkey'][0]) + return u'%s/pkey/%s' % (s, qs['pkey'][0]) def parse_date_and_duration(self, text): duration_regexp = re.compile('(.* - )?(.+) - ((.+)h)?((.+)min)?(.+)s') @@ -99,10 +99,10 @@ class VideoPage(BaseVideoPage): def get_title(self): qr = self.parser.select(self.document.getroot(), 'div.container-global-qr')[0].find('div').findall('div')[1] - return qr.find('h2').text.strip() + return unicode(qr.find('h2').text.strip()) def get_description(self): - return self.parser.select(self.document.getroot(), 'div.container-global-qr')[1].find('div').find('p').text.strip() + return unicode(self.parser.select(self.document.getroot(), 'div.container-global-qr')[1].find('div').find('p').text.strip()) class BoutiqueVideoPage(BaseVideoPage): @@ -114,7 +114,7 @@ class BoutiqueVideoPage(BaseVideoPage): def get_description(self): el = self.document.getroot().cssselect('div.bloc-produit-haut div.contenu p')[0] if el is not None: - return el.text.strip() + return unicode(el.text.strip()) def get_date_and_duration(self): el = self.document.getroot().cssselect('div.bloc-produit-haut p.date')[0] diff --git a/modules/radiofrance/backend.py b/modules/radiofrance/backend.py index 5fabe340..7c53df9b 100644 --- a/modules/radiofrance/backend.py +++ b/modules/radiofrance/backend.py @@ -193,7 +193,7 @@ class RadioFranceBackend(BaseBackend, ICapRadio, ICapCollection, ICapVideo): def fill_video(self, video, fields): if 'url' in fields: with self.browser: - video.url = self.browser.get_url(video.id) + video.url = unicode(self.browser.get_url(video.id)) return video diff --git a/modules/radiofrance/browser.py b/modules/radiofrance/browser.py index e34f4c14..2305889f 100644 --- a/modules/radiofrance/browser.py +++ b/modules/radiofrance/browser.py @@ -40,7 +40,7 @@ class RadioFranceVideo(BaseVideo): def __init__(self, *args, **kwargs): BaseVideo.__init__(self, *args, **kwargs) - self.ext = 'mp3' + self.ext = u'mp3' @classmethod def id2url(cls, _id): diff --git a/modules/youjizz/pages/index.py b/modules/youjizz/pages/index.py index 42c99275..8641a5b6 100644 --- a/modules/youjizz/pages/index.py +++ b/modules/youjizz/pages/index.py @@ -23,6 +23,7 @@ import re from weboob.tools.browser import BasePage, BrokenPageError from weboob.tools.capabilities.thumbnail import Thumbnail +from weboob.tools.misc import to_unicode from ..video import YoujizzVideo @@ -40,10 +41,10 @@ class IndexPage(BasePage): video = YoujizzVideo(_id) - video.thumbnail = Thumbnail(span.find('.//img').attrib['src']) + video.thumbnail = Thumbnail(unicode(span.find('.//img').attrib['src'])) title_el = self.parser.select(span, 'span#title1', 1) - video.title = title_el.text.strip() + video.title = to_unicode(title_el.text.strip()) time_span = self.parser.select(span, 'span.thumbtime span', 1) time_txt = time_span.text.strip().replace(';', ':') diff --git a/modules/youjizz/pages/video.py b/modules/youjizz/pages/video.py index 43b98850..e227af25 100644 --- a/modules/youjizz/pages/video.py +++ b/modules/youjizz/pages/video.py @@ -62,6 +62,6 @@ class VideoPage(BasePage): elif len(video_file_urls) > 1: raise BrokenPageError('Many video file URL found') else: - video.url = video_file_urls[0] + video.url = to_unicode(video_file_urls[0]) return video diff --git a/modules/youjizz/video.py b/modules/youjizz/video.py index dce129b0..06bb0bc5 100644 --- a/modules/youjizz/video.py +++ b/modules/youjizz/video.py @@ -28,7 +28,7 @@ class YoujizzVideo(BaseVideo): def __init__(self, *args, **kwargs): BaseVideo.__init__(self, *args, **kwargs) self.nsfw = True - self.ext = 'flv' + self.ext = u'flv' @classmethod def id2url(cls, _id): diff --git a/modules/youporn/pages/video.py b/modules/youporn/pages/video.py index c15ec75e..5b4a25b8 100644 --- a/modules/youporn/pages/video.py +++ b/modules/youporn/pages/video.py @@ -52,7 +52,7 @@ class VideoPage(PornPage): if m: ext = m.group(1).lower() else: - ext = 'flv' + ext = u'flv' return unicode(a.attrib['href']), unicode(ext) def get_title(self): diff --git a/modules/youtube/backend.py b/modules/youtube/backend.py index 11a03f37..21722844 100644 --- a/modules/youtube/backend.py +++ b/modules/youtube/backend.py @@ -89,8 +89,8 @@ class YoutubeBackend(BaseBackend, ICapVideo, ICapCollection): with self.browser: url, ext = self.browser.get_video_url(player_url) - video.url = url - video.ext = ext + video.url = unicode(url) + video.ext = unicode(ext) def get_video(self, _id): m = self.URL_RE.match(_id) diff --git a/weboob/tools/capabilities/thumbnail.py b/weboob/tools/capabilities/thumbnail.py index a6cb2d83..932da335 100644 --- a/weboob/tools/capabilities/thumbnail.py +++ b/weboob/tools/capabilities/thumbnail.py @@ -33,7 +33,7 @@ class Thumbnail(CapBaseObject): def __init__(self, url): CapBaseObject.__init__(self, url) - self.url = url.replace(' ', '%20') + self.url = url.replace(u' ', u'%20') def __str__(self): return self.url