From 1d0faa1af01df6222e5fc3b1493e2ffe404ce066 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Fran=C3=A7ois=20Revol?= <revol@free.fr>
Date: Thu, 14 Mar 2013 03:59:20 +0100
Subject: [PATCH] vimeo: Fix for no-embed videos
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Some videos like http://vimeo.com/61275290 have restrictions
on embedding, which makes the usual JSON url fail.
So we first try to parse the JSON data directly from the
script element in the page.

Signed-off-by: François Revol <revol@free.fr>
---
 modules/vimeo/pages.py | 21 +++++++++++++++++----
 1 file changed, 17 insertions(+), 4 deletions(-)

diff --git a/modules/vimeo/pages.py b/modules/vimeo/pages.py
index 5e02c078..6cf493f4 100644
--- a/modules/vimeo/pages.py
+++ b/modules/vimeo/pages.py
@@ -27,6 +27,7 @@ from urllib2 import HTTPError
 from weboob.tools.browser import BasePage
 from weboob.tools.json import json
 
+import re
 import datetime
 from dateutil.parser import parse as parse_dt
 
@@ -65,12 +66,24 @@ class VideoPage(BasePage):
         if len(obj) > 0:
             v.thumbnail = Thumbnail(unicode(obj[0].attrib['content']))
 
-        # for the rest, use the JSON config descriptor
-        json_data = self.browser.openurl('http://%s/config/%s?type=%s&referrer=%s' % ("player.vimeo.com", int(v.id), "html5_desktop_local", ""))
-        data = json.load(json_data)
+        data = None
+
+        # First try to find the JSON data in the page itself.
+        # it's the only location in case the video is not allowed to be embeded
+        for script in self.parser.select(self.document.getroot(), 'script'):
+            m = re.match('.* = {config:({.*}),assets:.*', unicode(script.text), re.DOTALL)
+            if m:
+                data = json.loads(m.group(1))
+                break
+
+        # Else fall back to the API
+        if data is None:
+            # for the rest, use the JSON config descriptor
+            json_data = self.browser.openurl('http://%s/config/%s?type=%s&referrer=%s' % ("player.vimeo.com", int(v.id), "html5_desktop_local", ""))
+            data = json.load(json_data)
+
         if data is None:
             raise BrokenPageError('Unable to get JSON config for id: %r' % int(v.id))
-        #print data
 
         if v.title is None:
             v.title = unicode(data['video']['title'])