add support for videos available at kids.dailymotion.com

This commit is contained in:
Yann Rouillard 2014-03-10 19:53:57 +01:00 committed by Florent
commit 9010ffb025
3 changed files with 103 additions and 6 deletions

View file

@ -22,7 +22,7 @@ from urllib import quote_plus
from weboob.tools.browser import BaseBrowser from weboob.tools.browser import BaseBrowser
from weboob.tools.browser.decorators import id2url from weboob.tools.browser.decorators import id2url
from .pages import IndexPage, VideoPage from .pages import IndexPage, VideoPage, KidsVideoPage
from .video import DailymotionVideo from .video import DailymotionVideo
@ -36,7 +36,8 @@ class DailymotionBrowser(BaseBrowser):
r'http://[w\.]*dailymotion\.com/[a-z\-]{2,5}/1': IndexPage, r'http://[w\.]*dailymotion\.com/[a-z\-]{2,5}/1': IndexPage,
r'http://[w\.]*dailymotion\.com/[a-z\-]{2,5}/(\w+/)?search/.*': IndexPage, r'http://[w\.]*dailymotion\.com/[a-z\-]{2,5}/(\w+/)?search/.*': IndexPage,
r'http://[w\.]*dailymotion\.com/video/(?P<id>.+)': VideoPage, r'http://[w\.]*dailymotion\.com/video/(?P<id>.+)': VideoPage,
} r'http://kids\.dailymotion\.com/(?P<from>[^\/#]+)#(.*&)?video=(?P<id>.+)': KidsVideoPage,
}
@id2url(DailymotionVideo.id2url) @id2url(DailymotionVideo.id2url)
def get_video(self, url, video=None): def get_video(self, url, video=None):

View file

@ -20,6 +20,9 @@
from weboob.tools.json import json from weboob.tools.json import json
import datetime import datetime
import re import re
import urllib
import urlparse
import mechanize
from weboob.capabilities import NotAvailable from weboob.capabilities import NotAvailable
from weboob.capabilities.image import BaseImage from weboob.capabilities.image import BaseImage
@ -30,7 +33,7 @@ from weboob.tools.browser import BasePage, BrokenPageError
from .video import DailymotionVideo from .video import DailymotionVideo
__all__ = ['IndexPage', 'VideoPage'] __all__ = ['IndexPage', 'VideoPage', 'KidsVideoPage']
class IndexPage(BasePage): class IndexPage(BasePage):
@ -86,6 +89,16 @@ class VideoPage(BasePage):
if video is None: if video is None:
video = DailymotionVideo(self.group_dict['id']) video = DailymotionVideo(self.group_dict['id'])
self.set_video_metadata(video)
self.set_video_url(video)
video.set_empty_fields(NotAvailable)
return video
def set_video_metadata(self, video):
head = self.parser.select(self.document.getroot(), 'head', 1) head = self.parser.select(self.document.getroot(), 'head', 1)
video.title = unicode(self.parser.select(head, 'meta[property="og:title"]', 1).get("content")).strip() video.title = unicode(self.parser.select(head, 'meta[property="og:title"]', 1).get("content")).strip()
@ -120,6 +133,9 @@ class VideoPage(BasePage):
except BrokenPageError: except BrokenPageError:
video.description = u'' video.description = u''
def set_video_url(self, video):
embed_page = self.browser.readurl('http://www.dailymotion.com/embed/video/%s' % video.id) embed_page = self.browser.readurl('http://www.dailymotion.com/embed/video/%s' % video.id)
m = re.search('var info = ({.*?}),[^{"]', embed_page) m = re.search('var info = ({.*?}),[^{"]', embed_page)
@ -136,8 +152,68 @@ class VideoPage(BasePage):
else: else:
raise BrokenPageError(u'Unable to extract video URL') raise BrokenPageError(u'Unable to extract video URL')
video.url = info[max_quality] video.url = unicode(info[max_quality])
video.set_empty_fields(NotAvailable)
return video class KidsVideoPage(VideoPage):
CONTROLLER_PAGE = 'http://kids.dailymotion.com/controller/Page_Kids_KidsUserHome?%s'
def set_video_metadata(self, video):
# The player html code with all the required information is loaded
# after the main page using javascript and a special XmlHttpRequest
# we emulate this behaviour
from_request = self.group_dict['from']
query = urllib.urlencode({
'from_request': from_request,
'request': '/video/%s?get_video=1' % video.id
})
request = mechanize.Request(KidsVideoPage.CONTROLLER_PAGE % query)
# This header is mandatory to have the correct answer from dailymotion
request.add_header('X-Requested-With', 'XMLHttpRequest')
player_html = self.browser.readurl(request)
try:
m = re.search('<param name="flashvars" value="(?P<flashvars>.*?)"', player_html)
flashvars = urlparse.parse_qs(m.group('flashvars'))
info = json.loads(flashvars['sequence'][0])
# The video parameters seem to be always located at the same place
# in the structure: ['sequence'][0]['layerList'][0]['sequenceList']
# [0]['layerList'][0]['param']['extraParams'])
#
# but to be more tolerant to future changes in the structure, we
# prefer to look for the parameters everywhere in the structure
def find_video_params(data):
if isinstance(data, dict):
if 'param' in data and 'extraParams' in data['param']:
return data['param']['extraParams']
data = data.values()
if not isinstance(data, list):
return None
for item in data:
ret = find_video_params(item)
if ret:
return ret
return None
params = find_video_params(info['sequence'])
video.title = unicode(params['videoTitle'])
video.author = unicode(params['videoOwnerLogin'])
video.description = unicode(params['videoDescription'])
video.thumbnail = BaseImage(params['videoPreviewURL'])
video.thumbnail.url = unicode(params['videoPreviewURL'])
video.duration = datetime.timedelta(seconds=params['mediaDuration'])
except:
# If anything goes wrong, we prefer to return normally, this will
# allow video download to work even if we don't have the metadata
pass

View file

@ -27,6 +27,10 @@ from random import choice
class DailymotionTest(BackendTest): class DailymotionTest(BackendTest):
BACKEND = 'dailymotion' BACKEND = 'dailymotion'
# Not easy to find a kids video which will always be there
# This might break in the future
KIDS_VIDEO_TITLE = 'Telmo et Tula'
def test_search(self): def test_search(self):
l = list(self.backend.search_videos('chirac')) l = list(self.backend.search_videos('chirac'))
self.assertTrue(len(l) > 0) self.assertTrue(len(l) > 0)
@ -41,3 +45,19 @@ class DailymotionTest(BackendTest):
v = choice(l) v = choice(l)
self.backend.fillobj(v, ('url',)) self.backend.fillobj(v, ('url',))
self.assertTrue(v.url and v.url.startswith('http://'), 'URL for video "%s" not found: %s' % (v.id, v.url)) self.assertTrue(v.url and v.url.startswith('http://'), 'URL for video "%s" not found: %s' % (v.id, v.url))
def test_kids_video(self):
l = list(self.backend.search_videos(DailymotionTest.KIDS_VIDEO_TITLE))
self.assertTrue(len(l) > 0)
for elt in l[:10]:
video_id = elt.id
video = self.backend.get_video(video_id)
self.assertIsNotNone(video.title)
if DailymotionTest.KIDS_VIDEO_TITLE in video.title:
self.assertTrue(video.url and video.url.startswith('http://'), 'URL for video "%s" not found: %s' %
(video.id, video.url))
return
self.fail("Can't find test video '%s' in kids.dailymotion.com video "
"on dailymotion, maybe the test video should be changed."
% DailymotionTest.KIDS_VIDEO_TITLE)