From 9294f3e964dfbed206e5f95350384d81e095a928 Mon Sep 17 00:00:00 2001 From: Romain Bignon Date: Fri, 8 Jun 2012 11:24:57 +0200 Subject: [PATCH] fix parsing authors on anyclip videos --- modules/dailymotion/pages.py | 10 ++++++---- modules/dailymotion/video.py | 2 +- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/modules/dailymotion/pages.py b/modules/dailymotion/pages.py index 640200f5..2b3ee5d0 100644 --- a/modules/dailymotion/pages.py +++ b/modules/dailymotion/pages.py @@ -92,14 +92,16 @@ class VideoPage(BasePage): div = self.parser.select(self.document.getroot(), 'div#content', 1) - video.title = self.parser.select(div, 'span.title', 1).text - video.author = self.parser.select(div, 'a.name', 1).text + video.title = unicode(self.parser.select(div, 'span.title', 1).text) + video.author = unicode(self.parser.select(div, 'a.name, span.name', 1).text) try: - video.description = self.parser.select(div, 'div#video_description', 1).text + video.description = unicode(self.parser.select(div, 'div#video_description', 1).text) except BrokenPageError: video.description = u'' for script in self.parser.select(self.document.getroot(), 'div.dmco_html'): - if 'id' in script.attrib and script.attrib['id'].startswith('container_player_'): + # TODO support videos from anyclip, cf http://www.dailymotion.com/video/xkyjiv for example + if 'id' in script.attrib and script.attrib['id'].startswith('container_player_') and \ + script.find('script') is not None: text = script.find('script').text mobj = re.search(r'(?i)addVariable\(\"video\"\s*,\s*\"([^\"]*)\"\)', text) if mobj is None: diff --git a/modules/dailymotion/video.py b/modules/dailymotion/video.py index cff4e23e..1ff2e841 100644 --- a/modules/dailymotion/video.py +++ b/modules/dailymotion/video.py @@ -27,7 +27,7 @@ __all__ = ['DailymotionVideo'] class DailymotionVideo(BaseVideo): def __init__(self, *args, **kwargs): BaseVideo.__init__(self, *args, **kwargs) - self.ext = 'flv' + self.ext = u'flv' @classmethod def id2url(cls, _id):