From 1d0faa1af01df6222e5fc3b1493e2ffe404ce066 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Revol?= Date: Thu, 14 Mar 2013 03:59:20 +0100 Subject: [PATCH] vimeo: Fix for no-embed videos MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some videos like http://vimeo.com/61275290 have restrictions on embedding, which makes the usual JSON url fail. So we first try to parse the JSON data directly from the script element in the page. Signed-off-by: François Revol --- modules/vimeo/pages.py | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/modules/vimeo/pages.py b/modules/vimeo/pages.py index 5e02c078..6cf493f4 100644 --- a/modules/vimeo/pages.py +++ b/modules/vimeo/pages.py @@ -27,6 +27,7 @@ from urllib2 import HTTPError from weboob.tools.browser import BasePage from weboob.tools.json import json +import re import datetime from dateutil.parser import parse as parse_dt @@ -65,12 +66,24 @@ class VideoPage(BasePage): if len(obj) > 0: v.thumbnail = Thumbnail(unicode(obj[0].attrib['content'])) - # for the rest, use the JSON config descriptor - json_data = self.browser.openurl('http://%s/config/%s?type=%s&referrer=%s' % ("player.vimeo.com", int(v.id), "html5_desktop_local", "")) - data = json.load(json_data) + data = None + + # First try to find the JSON data in the page itself. + # it's the only location in case the video is not allowed to be embeded + for script in self.parser.select(self.document.getroot(), 'script'): + m = re.match('.* = {config:({.*}),assets:.*', unicode(script.text), re.DOTALL) + if m: + data = json.loads(m.group(1)) + break + + # Else fall back to the API + if data is None: + # for the rest, use the JSON config descriptor + json_data = self.browser.openurl('http://%s/config/%s?type=%s&referrer=%s' % ("player.vimeo.com", int(v.id), "html5_desktop_local", "")) + data = json.load(json_data) + if data is None: raise BrokenPageError('Unable to get JSON config for id: %r' % int(v.id)) - #print data if v.title is None: v.title = unicode(data['video']['title'])