add support for videos available at kids.dailymotion.com

2014-03-10 19:53:57 +01:00 · 2014-03-10 19:53:57 +01:00 · 9010ffb025
commit 9010ffb025
parent 158e8c5fdc
3 changed files with 103 additions and 6 deletions
--- a/modules/dailymotion/browser.py
+++ b/modules/dailymotion/browser.py
@ -22,7 +22,7 @@ from urllib import quote_plus
 from weboob.tools.browser import BaseBrowser
 from weboob.tools.browser.decorators import id2url

-from .pages import IndexPage, VideoPage
+from .pages import IndexPage, VideoPage, KidsVideoPage
 from .video import DailymotionVideo


@ -36,7 +36,8 @@ class DailymotionBrowser(BaseBrowser):
             r'http://[w\.]*dailymotion\.com/[a-z\-]{2,5}/1': IndexPage,
             r'http://[w\.]*dailymotion\.com/[a-z\-]{2,5}/(\w+/)?search/.*': IndexPage,
             r'http://[w\.]*dailymotion\.com/video/(?P<id>.+)': VideoPage,
-            }
+             r'http://kids\.dailymotion\.com/(?P<from>[^\/#]+)#(.*&)?video=(?P<id>.+)': KidsVideoPage,
+             }

    @id2url(DailymotionVideo.id2url)
    def get_video(self, url, video=None):
--- a/modules/dailymotion/pages.py
+++ b/modules/dailymotion/pages.py
@ -20,6 +20,9 @@
 from weboob.tools.json import json
 import datetime
 import re
+import urllib
+import urlparse
+import mechanize

 from weboob.capabilities import NotAvailable
 from weboob.capabilities.image import BaseImage
@ -30,7 +33,7 @@ from weboob.tools.browser import BasePage, BrokenPageError
 from .video import DailymotionVideo


-__all__ = ['IndexPage', 'VideoPage']
+__all__ = ['IndexPage', 'VideoPage', 'KidsVideoPage']


 class IndexPage(BasePage):
@ -86,6 +89,16 @@ class VideoPage(BasePage):
        if video is None:
            video = DailymotionVideo(self.group_dict['id'])

+        self.set_video_metadata(video)
+        self.set_video_url(video)
+
+        video.set_empty_fields(NotAvailable)
+
+        return video
+
+
+    def set_video_metadata(self, video):
+
        head = self.parser.select(self.document.getroot(), 'head', 1)

        video.title = unicode(self.parser.select(head, 'meta[property="og:title"]', 1).get("content")).strip()
@ -120,6 +133,9 @@ class VideoPage(BasePage):
        except BrokenPageError:
            video.description = u''

+
+    def set_video_url(self, video):
+
        embed_page = self.browser.readurl('http://www.dailymotion.com/embed/video/%s' % video.id)

        m = re.search('var info = ({.*?}),[^{"]', embed_page)
@ -136,8 +152,68 @@ class VideoPage(BasePage):
        else:
            raise BrokenPageError(u'Unable to extract video URL')

-        video.url = info[max_quality]
+        video.url = unicode(info[max_quality])

-        video.set_empty_fields(NotAvailable)

-        return video
+class KidsVideoPage(VideoPage):
+
+    CONTROLLER_PAGE = 'http://kids.dailymotion.com/controller/Page_Kids_KidsUserHome?%s'
+
+    def set_video_metadata(self, video):
+
+        # The player html code with all the required information is loaded
+        # after the main page using javascript and a special XmlHttpRequest
+        # we emulate this behaviour
+        from_request = self.group_dict['from']
+
+        query = urllib.urlencode({
+            'from_request': from_request,
+            'request': '/video/%s?get_video=1' % video.id
+            })
+
+        request = mechanize.Request(KidsVideoPage.CONTROLLER_PAGE % query)
+        # This header is mandatory to have the correct answer from dailymotion
+        request.add_header('X-Requested-With', 'XMLHttpRequest')
+        player_html = self.browser.readurl(request)
+
+        try:
+            m = re.search('<param name="flashvars" value="(?P<flashvars>.*?)"', player_html)
+            flashvars = urlparse.parse_qs(m.group('flashvars'))
+            info = json.loads(flashvars['sequence'][0])
+
+            # The video parameters seem to be always located at the same place
+            # in the structure: ['sequence'][0]['layerList'][0]['sequenceList']
+            #   [0]['layerList'][0]['param']['extraParams'])
+            #
+            # but to be more tolerant to future changes in the structure, we
+            # prefer to look for the parameters everywhere in the structure
+
+            def find_video_params(data):
+                if isinstance(data, dict):
+                    if 'param' in data and 'extraParams' in data['param']:
+                        return data['param']['extraParams']
+                    data = data.values()
+
+                if not isinstance(data, list):
+                    return None
+
+                for item in data:
+                    ret = find_video_params(item)
+                    if ret:
+                        return ret
+
+                return None
+
+            params = find_video_params(info['sequence'])
+
+            video.title = unicode(params['videoTitle'])
+            video.author = unicode(params['videoOwnerLogin'])
+            video.description = unicode(params['videoDescription'])
+            video.thumbnail = BaseImage(params['videoPreviewURL'])
+            video.thumbnail.url = unicode(params['videoPreviewURL'])
+            video.duration = datetime.timedelta(seconds=params['mediaDuration'])
+
+        except:
+            # If anything goes wrong, we prefer to return normally, this will
+            # allow video download to work even if we don't have the metadata
+            pass
--- a/modules/dailymotion/test.py
+++ b/modules/dailymotion/test.py
@ -27,6 +27,10 @@ from random import choice
 class DailymotionTest(BackendTest):
    BACKEND = 'dailymotion'

+    # Not easy to find a kids video which will always be there
+    # This might break in the future
+    KIDS_VIDEO_TITLE = 'Telmo et Tula'
+
    def test_search(self):
        l = list(self.backend.search_videos('chirac'))
        self.assertTrue(len(l) > 0)
@ -41,3 +45,19 @@ class DailymotionTest(BackendTest):
        v = choice(l)
        self.backend.fillobj(v, ('url',))
        self.assertTrue(v.url and v.url.startswith('http://'), 'URL for video "%s" not found: %s' % (v.id, v.url))
+
+    def test_kids_video(self):
+        l = list(self.backend.search_videos(DailymotionTest.KIDS_VIDEO_TITLE))
+        self.assertTrue(len(l) > 0)
+        for elt in l[:10]:
+            video_id = elt.id
+            video = self.backend.get_video(video_id) 
+            self.assertIsNotNone(video.title)
+            if DailymotionTest.KIDS_VIDEO_TITLE in video.title:
+                self.assertTrue(video.url and video.url.startswith('http://'), 'URL for video "%s" not found: %s' %
+                        (video.id, video.url))
+                return
+
+        self.fail("Can't find test video '%s' in kids.dailymotion.com video "
+                  "on dailymotion, maybe the test video should be changed."
+                  % DailymotionTest.KIDS_VIDEO_TITLE)