diff --git a/modules/dailymotion/browser.py b/modules/dailymotion/browser.py index f8448ca6..5d86aebd 100644 --- a/modules/dailymotion/browser.py +++ b/modules/dailymotion/browser.py @@ -22,7 +22,7 @@ from urllib import quote_plus from weboob.tools.browser import BaseBrowser from weboob.tools.browser.decorators import id2url -from .pages import IndexPage, VideoPage +from .pages import IndexPage, VideoPage, KidsVideoPage from .video import DailymotionVideo @@ -36,7 +36,8 @@ class DailymotionBrowser(BaseBrowser): r'http://[w\.]*dailymotion\.com/[a-z\-]{2,5}/1': IndexPage, r'http://[w\.]*dailymotion\.com/[a-z\-]{2,5}/(\w+/)?search/.*': IndexPage, r'http://[w\.]*dailymotion\.com/video/(?P.+)': VideoPage, - } + r'http://kids\.dailymotion\.com/(?P[^\/#]+)#(.*&)?video=(?P.+)': KidsVideoPage, + } @id2url(DailymotionVideo.id2url) def get_video(self, url, video=None): diff --git a/modules/dailymotion/pages.py b/modules/dailymotion/pages.py index fda40a40..4c9f5fd8 100644 --- a/modules/dailymotion/pages.py +++ b/modules/dailymotion/pages.py @@ -20,6 +20,9 @@ from weboob.tools.json import json import datetime import re +import urllib +import urlparse +import mechanize from weboob.capabilities import NotAvailable from weboob.capabilities.image import BaseImage @@ -30,7 +33,7 @@ from weboob.tools.browser import BasePage, BrokenPageError from .video import DailymotionVideo -__all__ = ['IndexPage', 'VideoPage'] +__all__ = ['IndexPage', 'VideoPage', 'KidsVideoPage'] class IndexPage(BasePage): @@ -86,6 +89,16 @@ class VideoPage(BasePage): if video is None: video = DailymotionVideo(self.group_dict['id']) + self.set_video_metadata(video) + self.set_video_url(video) + + video.set_empty_fields(NotAvailable) + + return video + + + def set_video_metadata(self, video): + head = self.parser.select(self.document.getroot(), 'head', 1) video.title = unicode(self.parser.select(head, 'meta[property="og:title"]', 1).get("content")).strip() @@ -120,6 +133,9 @@ class VideoPage(BasePage): except BrokenPageError: video.description = u'' + + def set_video_url(self, video): + embed_page = self.browser.readurl('http://www.dailymotion.com/embed/video/%s' % video.id) m = re.search('var info = ({.*?}),[^{"]', embed_page) @@ -136,8 +152,68 @@ class VideoPage(BasePage): else: raise BrokenPageError(u'Unable to extract video URL') - video.url = info[max_quality] + video.url = unicode(info[max_quality]) - video.set_empty_fields(NotAvailable) - return video +class KidsVideoPage(VideoPage): + + CONTROLLER_PAGE = 'http://kids.dailymotion.com/controller/Page_Kids_KidsUserHome?%s' + + def set_video_metadata(self, video): + + # The player html code with all the required information is loaded + # after the main page using javascript and a special XmlHttpRequest + # we emulate this behaviour + from_request = self.group_dict['from'] + + query = urllib.urlencode({ + 'from_request': from_request, + 'request': '/video/%s?get_video=1' % video.id + }) + + request = mechanize.Request(KidsVideoPage.CONTROLLER_PAGE % query) + # This header is mandatory to have the correct answer from dailymotion + request.add_header('X-Requested-With', 'XMLHttpRequest') + player_html = self.browser.readurl(request) + + try: + m = re.search(' 0) @@ -41,3 +45,19 @@ class DailymotionTest(BackendTest): v = choice(l) self.backend.fillobj(v, ('url',)) self.assertTrue(v.url and v.url.startswith('http://'), 'URL for video "%s" not found: %s' % (v.id, v.url)) + + def test_kids_video(self): + l = list(self.backend.search_videos(DailymotionTest.KIDS_VIDEO_TITLE)) + self.assertTrue(len(l) > 0) + for elt in l[:10]: + video_id = elt.id + video = self.backend.get_video(video_id) + self.assertIsNotNone(video.title) + if DailymotionTest.KIDS_VIDEO_TITLE in video.title: + self.assertTrue(video.url and video.url.startswith('http://'), 'URL for video "%s" not found: %s' % + (video.id, video.url)) + return + + self.fail("Can't find test video '%s' in kids.dailymotion.com video " + "on dailymotion, maybe the test video should be changed." + % DailymotionTest.KIDS_VIDEO_TITLE)