add support for videos available at kids.dailymotion.com
This commit is contained in:
parent
158e8c5fdc
commit
9010ffb025
3 changed files with 103 additions and 6 deletions
|
|
@ -22,7 +22,7 @@ from urllib import quote_plus
|
|||
from weboob.tools.browser import BaseBrowser
|
||||
from weboob.tools.browser.decorators import id2url
|
||||
|
||||
from .pages import IndexPage, VideoPage
|
||||
from .pages import IndexPage, VideoPage, KidsVideoPage
|
||||
from .video import DailymotionVideo
|
||||
|
||||
|
||||
|
|
@ -36,7 +36,8 @@ class DailymotionBrowser(BaseBrowser):
|
|||
r'http://[w\.]*dailymotion\.com/[a-z\-]{2,5}/1': IndexPage,
|
||||
r'http://[w\.]*dailymotion\.com/[a-z\-]{2,5}/(\w+/)?search/.*': IndexPage,
|
||||
r'http://[w\.]*dailymotion\.com/video/(?P<id>.+)': VideoPage,
|
||||
}
|
||||
r'http://kids\.dailymotion\.com/(?P<from>[^\/#]+)#(.*&)?video=(?P<id>.+)': KidsVideoPage,
|
||||
}
|
||||
|
||||
@id2url(DailymotionVideo.id2url)
|
||||
def get_video(self, url, video=None):
|
||||
|
|
|
|||
|
|
@ -20,6 +20,9 @@
|
|||
from weboob.tools.json import json
|
||||
import datetime
|
||||
import re
|
||||
import urllib
|
||||
import urlparse
|
||||
import mechanize
|
||||
|
||||
from weboob.capabilities import NotAvailable
|
||||
from weboob.capabilities.image import BaseImage
|
||||
|
|
@ -30,7 +33,7 @@ from weboob.tools.browser import BasePage, BrokenPageError
|
|||
from .video import DailymotionVideo
|
||||
|
||||
|
||||
__all__ = ['IndexPage', 'VideoPage']
|
||||
__all__ = ['IndexPage', 'VideoPage', 'KidsVideoPage']
|
||||
|
||||
|
||||
class IndexPage(BasePage):
|
||||
|
|
@ -86,6 +89,16 @@ class VideoPage(BasePage):
|
|||
if video is None:
|
||||
video = DailymotionVideo(self.group_dict['id'])
|
||||
|
||||
self.set_video_metadata(video)
|
||||
self.set_video_url(video)
|
||||
|
||||
video.set_empty_fields(NotAvailable)
|
||||
|
||||
return video
|
||||
|
||||
|
||||
def set_video_metadata(self, video):
|
||||
|
||||
head = self.parser.select(self.document.getroot(), 'head', 1)
|
||||
|
||||
video.title = unicode(self.parser.select(head, 'meta[property="og:title"]', 1).get("content")).strip()
|
||||
|
|
@ -120,6 +133,9 @@ class VideoPage(BasePage):
|
|||
except BrokenPageError:
|
||||
video.description = u''
|
||||
|
||||
|
||||
def set_video_url(self, video):
|
||||
|
||||
embed_page = self.browser.readurl('http://www.dailymotion.com/embed/video/%s' % video.id)
|
||||
|
||||
m = re.search('var info = ({.*?}),[^{"]', embed_page)
|
||||
|
|
@ -136,8 +152,68 @@ class VideoPage(BasePage):
|
|||
else:
|
||||
raise BrokenPageError(u'Unable to extract video URL')
|
||||
|
||||
video.url = info[max_quality]
|
||||
video.url = unicode(info[max_quality])
|
||||
|
||||
video.set_empty_fields(NotAvailable)
|
||||
|
||||
return video
|
||||
class KidsVideoPage(VideoPage):
|
||||
|
||||
CONTROLLER_PAGE = 'http://kids.dailymotion.com/controller/Page_Kids_KidsUserHome?%s'
|
||||
|
||||
def set_video_metadata(self, video):
|
||||
|
||||
# The player html code with all the required information is loaded
|
||||
# after the main page using javascript and a special XmlHttpRequest
|
||||
# we emulate this behaviour
|
||||
from_request = self.group_dict['from']
|
||||
|
||||
query = urllib.urlencode({
|
||||
'from_request': from_request,
|
||||
'request': '/video/%s?get_video=1' % video.id
|
||||
})
|
||||
|
||||
request = mechanize.Request(KidsVideoPage.CONTROLLER_PAGE % query)
|
||||
# This header is mandatory to have the correct answer from dailymotion
|
||||
request.add_header('X-Requested-With', 'XMLHttpRequest')
|
||||
player_html = self.browser.readurl(request)
|
||||
|
||||
try:
|
||||
m = re.search('<param name="flashvars" value="(?P<flashvars>.*?)"', player_html)
|
||||
flashvars = urlparse.parse_qs(m.group('flashvars'))
|
||||
info = json.loads(flashvars['sequence'][0])
|
||||
|
||||
# The video parameters seem to be always located at the same place
|
||||
# in the structure: ['sequence'][0]['layerList'][0]['sequenceList']
|
||||
# [0]['layerList'][0]['param']['extraParams'])
|
||||
#
|
||||
# but to be more tolerant to future changes in the structure, we
|
||||
# prefer to look for the parameters everywhere in the structure
|
||||
|
||||
def find_video_params(data):
|
||||
if isinstance(data, dict):
|
||||
if 'param' in data and 'extraParams' in data['param']:
|
||||
return data['param']['extraParams']
|
||||
data = data.values()
|
||||
|
||||
if not isinstance(data, list):
|
||||
return None
|
||||
|
||||
for item in data:
|
||||
ret = find_video_params(item)
|
||||
if ret:
|
||||
return ret
|
||||
|
||||
return None
|
||||
|
||||
params = find_video_params(info['sequence'])
|
||||
|
||||
video.title = unicode(params['videoTitle'])
|
||||
video.author = unicode(params['videoOwnerLogin'])
|
||||
video.description = unicode(params['videoDescription'])
|
||||
video.thumbnail = BaseImage(params['videoPreviewURL'])
|
||||
video.thumbnail.url = unicode(params['videoPreviewURL'])
|
||||
video.duration = datetime.timedelta(seconds=params['mediaDuration'])
|
||||
|
||||
except:
|
||||
# If anything goes wrong, we prefer to return normally, this will
|
||||
# allow video download to work even if we don't have the metadata
|
||||
pass
|
||||
|
|
|
|||
|
|
@ -27,6 +27,10 @@ from random import choice
|
|||
class DailymotionTest(BackendTest):
|
||||
BACKEND = 'dailymotion'
|
||||
|
||||
# Not easy to find a kids video which will always be there
|
||||
# This might break in the future
|
||||
KIDS_VIDEO_TITLE = 'Telmo et Tula'
|
||||
|
||||
def test_search(self):
|
||||
l = list(self.backend.search_videos('chirac'))
|
||||
self.assertTrue(len(l) > 0)
|
||||
|
|
@ -41,3 +45,19 @@ class DailymotionTest(BackendTest):
|
|||
v = choice(l)
|
||||
self.backend.fillobj(v, ('url',))
|
||||
self.assertTrue(v.url and v.url.startswith('http://'), 'URL for video "%s" not found: %s' % (v.id, v.url))
|
||||
|
||||
def test_kids_video(self):
|
||||
l = list(self.backend.search_videos(DailymotionTest.KIDS_VIDEO_TITLE))
|
||||
self.assertTrue(len(l) > 0)
|
||||
for elt in l[:10]:
|
||||
video_id = elt.id
|
||||
video = self.backend.get_video(video_id)
|
||||
self.assertIsNotNone(video.title)
|
||||
if DailymotionTest.KIDS_VIDEO_TITLE in video.title:
|
||||
self.assertTrue(video.url and video.url.startswith('http://'), 'URL for video "%s" not found: %s' %
|
||||
(video.id, video.url))
|
||||
return
|
||||
|
||||
self.fail("Can't find test video '%s' in kids.dailymotion.com video "
|
||||
"on dailymotion, maybe the test video should be changed."
|
||||
% DailymotionTest.KIDS_VIDEO_TITLE)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue