add support for videos available at kids.dailymotion.com

This commit is contained in:
Yann Rouillard 2014-03-10 19:53:57 +01:00 committed by Florent
commit 9010ffb025
3 changed files with 103 additions and 6 deletions

View file

@ -20,6 +20,9 @@
from weboob.tools.json import json
import datetime
import re
import urllib
import urlparse
import mechanize
from weboob.capabilities import NotAvailable
from weboob.capabilities.image import BaseImage
@ -30,7 +33,7 @@ from weboob.tools.browser import BasePage, BrokenPageError
from .video import DailymotionVideo
__all__ = ['IndexPage', 'VideoPage']
__all__ = ['IndexPage', 'VideoPage', 'KidsVideoPage']
class IndexPage(BasePage):
@ -86,6 +89,16 @@ class VideoPage(BasePage):
if video is None:
video = DailymotionVideo(self.group_dict['id'])
self.set_video_metadata(video)
self.set_video_url(video)
video.set_empty_fields(NotAvailable)
return video
def set_video_metadata(self, video):
head = self.parser.select(self.document.getroot(), 'head', 1)
video.title = unicode(self.parser.select(head, 'meta[property="og:title"]', 1).get("content")).strip()
@ -120,6 +133,9 @@ class VideoPage(BasePage):
except BrokenPageError:
video.description = u''
def set_video_url(self, video):
embed_page = self.browser.readurl('http://www.dailymotion.com/embed/video/%s' % video.id)
m = re.search('var info = ({.*?}),[^{"]', embed_page)
@ -136,8 +152,68 @@ class VideoPage(BasePage):
else:
raise BrokenPageError(u'Unable to extract video URL')
video.url = info[max_quality]
video.url = unicode(info[max_quality])
video.set_empty_fields(NotAvailable)
return video
class KidsVideoPage(VideoPage):
CONTROLLER_PAGE = 'http://kids.dailymotion.com/controller/Page_Kids_KidsUserHome?%s'
def set_video_metadata(self, video):
# The player html code with all the required information is loaded
# after the main page using javascript and a special XmlHttpRequest
# we emulate this behaviour
from_request = self.group_dict['from']
query = urllib.urlencode({
'from_request': from_request,
'request': '/video/%s?get_video=1' % video.id
})
request = mechanize.Request(KidsVideoPage.CONTROLLER_PAGE % query)
# This header is mandatory to have the correct answer from dailymotion
request.add_header('X-Requested-With', 'XMLHttpRequest')
player_html = self.browser.readurl(request)
try:
m = re.search('<param name="flashvars" value="(?P<flashvars>.*?)"', player_html)
flashvars = urlparse.parse_qs(m.group('flashvars'))
info = json.loads(flashvars['sequence'][0])
# The video parameters seem to be always located at the same place
# in the structure: ['sequence'][0]['layerList'][0]['sequenceList']
# [0]['layerList'][0]['param']['extraParams'])
#
# but to be more tolerant to future changes in the structure, we
# prefer to look for the parameters everywhere in the structure
def find_video_params(data):
if isinstance(data, dict):
if 'param' in data and 'extraParams' in data['param']:
return data['param']['extraParams']
data = data.values()
if not isinstance(data, list):
return None
for item in data:
ret = find_video_params(item)
if ret:
return ret
return None
params = find_video_params(info['sequence'])
video.title = unicode(params['videoTitle'])
video.author = unicode(params['videoOwnerLogin'])
video.description = unicode(params['videoDescription'])
video.thumbnail = BaseImage(params['videoPreviewURL'])
video.thumbnail.url = unicode(params['videoPreviewURL'])
video.duration = datetime.timedelta(seconds=params['mediaDuration'])
except:
# If anything goes wrong, we prefer to return normally, this will
# allow video download to work even if we don't have the metadata
pass