From a79ccec8bb1087104be65c31ff4c62c9a3d34965 Mon Sep 17 00:00:00 2001 From: Vincent Texier Date: Sun, 12 Jan 2014 12:06:01 +0100 Subject: [PATCH] Fix empty fields in dailymotion plugin Change extension from flv to mp4, cause all quality formats are h264/mp4 Signed-off-by: Vincent Texier Signed-off-by: Romain Bignon --- modules/dailymotion/pages.py | 33 +++++++++++++++++++++++++++++---- modules/dailymotion/video.py | 2 +- 2 files changed, 30 insertions(+), 5 deletions(-) diff --git a/modules/dailymotion/pages.py b/modules/dailymotion/pages.py index ec3e0abf..fda40a40 100644 --- a/modules/dailymotion/pages.py +++ b/modules/dailymotion/pages.py @@ -86,12 +86,37 @@ class VideoPage(BasePage): if video is None: video = DailymotionVideo(self.group_dict['id']) - div = self.parser.select(self.document.getroot(), 'div#content', 1) + head = self.parser.select(self.document.getroot(), 'head', 1) + + video.title = unicode(self.parser.select(head, 'meta[property="og:title"]', 1).get("content")).strip() + video.author = unicode(self.parser.select(head, 'meta[name="author"]', 1).get("content")).strip() + + url = unicode(self.parser.select(head, 'meta[property="og:image"]', 1).get("content")).strip() + # remove the useless anti-caching + url = re.sub('\?\d+', '', url) + video.thumbnail = BaseImage(url) + video.thumbnail.url = video.thumbnail.id - video.title = unicode(self.parser.select(div, 'div, meta[itemprop=name]', 1).get("content")).strip() - video.author = unicode(self.parser.select(div, 'div, meta[itemprop=author]', 1).get("content")).strip() try: - video.description = html2text(self.parser.tostring(self.parser.select(div, 'div, meta[itemprop=description]', 1))).strip() or unicode() + parts = self.parser.select(head, 'meta[property="video:duration"]', 1).get("content").strip().split(':') + except BrokenPageError: + # it's probably a live, np. + video.duration = NotAvailable + else: + if len(parts) == 1: + seconds = parts[0] + hours = minutes = 0 + elif len(parts) == 2: + minutes, seconds = parts + hours = 0 + elif len(parts) == 3: + hours, minutes, seconds = parts + else: + raise BrokenPageError('Unable to parse duration %r' % parts) + video.duration = datetime.timedelta(hours=int(hours), minutes=int(minutes), seconds=int(seconds)) + + try: + video.description = html2text(self.parser.select(head, 'meta[property="og:description"]', 1).get("content")).strip() or unicode() except BrokenPageError: video.description = u'' diff --git a/modules/dailymotion/video.py b/modules/dailymotion/video.py index 1ff2e841..8e2f0847 100644 --- a/modules/dailymotion/video.py +++ b/modules/dailymotion/video.py @@ -27,7 +27,7 @@ __all__ = ['DailymotionVideo'] class DailymotionVideo(BaseVideo): def __init__(self, *args, **kwargs): BaseVideo.__init__(self, *args, **kwargs) - self.ext = u'flv' + self.ext = u'mp4' @classmethod def id2url(cls, _id):