From 8ef6e64dfefc4ba30b51c772eb9e66d7bca4ce71 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Revol?= Date: Wed, 12 Sep 2012 00:17:41 +0200 Subject: [PATCH] Add a video module for vimeo.com MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The trick is to log ourselves in to validate the provided signature, then the video file redirect works. Since the redirected url is not checked for User-Agent unlike the redirect one, we disable redirects to get the actual location and use it instead. This allows running wget or vlc without faking their User-Agents. For now it only downloads sd videos. TODO: get the highest quality TODO: search, latest Signed-off-by: François Revol Signed-off-by: Romain Bignon --- modules/vimeo/__init__.py | 3 + modules/vimeo/backend.py | 82 +++++++++++++++++++++++ modules/vimeo/browser.py | 57 ++++++++++++++++ modules/vimeo/favicon.png | Bin 0 -> 1913 bytes modules/vimeo/favicon.xcf | Bin 0 -> 6228 bytes modules/vimeo/pages.py | 136 ++++++++++++++++++++++++++++++++++++++ modules/vimeo/test.py | 42 ++++++++++++ modules/vimeo/video.py | 36 ++++++++++ 8 files changed, 356 insertions(+) create mode 100644 modules/vimeo/__init__.py create mode 100644 modules/vimeo/backend.py create mode 100644 modules/vimeo/browser.py create mode 100644 modules/vimeo/favicon.png create mode 100644 modules/vimeo/favicon.xcf create mode 100644 modules/vimeo/pages.py create mode 100644 modules/vimeo/test.py create mode 100644 modules/vimeo/video.py diff --git a/modules/vimeo/__init__.py b/modules/vimeo/__init__.py new file mode 100644 index 00000000..18aef779 --- /dev/null +++ b/modules/vimeo/__init__.py @@ -0,0 +1,3 @@ +from .backend import VimeoBackend + +__all__ = ['VimeoBackend'] diff --git a/modules/vimeo/backend.py b/modules/vimeo/backend.py new file mode 100644 index 00000000..5c1de8e2 --- /dev/null +++ b/modules/vimeo/backend.py @@ -0,0 +1,82 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2010-2011 Romain Bignon +# Copyright(C) 2012 François Revol +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +from __future__ import with_statement + +from weboob.capabilities.video import ICapVideo, BaseVideo +from weboob.tools.backend import BaseBackend +from weboob.capabilities.collection import ICapCollection, CollectionNotFound + +from .browser import VimeoBrowser +from .video import VimeoVideo + + +__all__ = ['VimeoBackend'] + + +class VimeoBackend(BaseBackend, ICapVideo, ICapCollection): + NAME = 'vimeo' + MAINTAINER = u'François Revol' + EMAIL = 'revol@free.fr' + VERSION = '0.d' + DESCRIPTION = 'Vimeo video streaming website' + LICENSE = 'AGPLv3+' + BROWSER = VimeoBrowser + + def get_video(self, _id): + with self.browser: + return self.browser.get_video(_id) + + SORTBY = ['relevance', 'rating', 'views', 'time'] + + # def search_videos(self, pattern, sortby=ICapVideo.SEARCH_RELEVANCE, nsfw=False, max_results=None): + # with self.browser: + # return self.browser.search_videos(pattern, self.SORTBY[sortby]) + + def fill_video(self, video, fields): + if fields != ['thumbnail']: + # if we don't want only the thumbnail, we probably want also every fields + with self.browser: + video = self.browser.get_video(VimeoVideo.id2url(video.id), video) + if 'thumbnail' in fields and video.thumbnail: + with self.browser: + video.thumbnail.data = self.browser.readurl(video.thumbnail.url) + + return video + + def iter_resources(self, objs, split_path): + if BaseVideo in objs: + collection = self.get_collection(objs, split_path) + if collection.path_level == 0: + yield self.get_collection(objs, [u'latest']) + if collection.split_path == [u'latest']: + for video in self.browser.latest_videos(): + yield video + + def validate_collection(self, objs, collection): + if collection.path_level == 0: + return + if BaseVideo in objs and collection.split_path == [u'latest']: + collection.title = u'Latest Vimeo videos' + return + raise CollectionNotFound(collection.split_path) + + OBJECTS = {VimeoVideo: fill_video} diff --git a/modules/vimeo/browser.py b/modules/vimeo/browser.py new file mode 100644 index 00000000..36b0fd50 --- /dev/null +++ b/modules/vimeo/browser.py @@ -0,0 +1,57 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2010-2011 Romain Bignon +# Copyright(C) 2012 François Revol +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +from weboob.tools.browser import BaseBrowser +from weboob.tools.browser.decorators import id2url + +#from .pages.index import IndexPage +from .pages import VideoPage +from .video import VimeoVideo + + +__all__ = ['VimeoBrowser'] + + +class VimeoBrowser(BaseBrowser): + DOMAIN = 'vimeo.com' + ENCODING = None + # USER_AGENT = BaseBrowser.USER_AGENTS['wget'] + # TODO: determine this dynamically, like: + # wget -d 127.0.0.1 -O /dev/null 2>&1 | grep '^User-Agent:' + #USER_AGENT = 'Wget/1.14 (linux-gnu)' + PAGES = {r'http://[w\.]*vimeo\.com/(?P\d+).*': VideoPage, + } + + @id2url(VimeoVideo.id2url) + def get_video(self, url, video=None): + self.location(url) + return self.page.get_video(video) + + # def search_videos(self, pattern, sortby): + # return None + # self.location(self.buildurl('http://vimeo.com/search%s' % q=pattern.encode('utf-8'))) + # assert self.is_on_page(IndexPage) + # return self.page.iter_videos() + + # def latest_videos(self): + # self.home() + # assert self.is_on_page(IndexPage) + # return self.page.iter_videos() diff --git a/modules/vimeo/favicon.png b/modules/vimeo/favicon.png new file mode 100644 index 0000000000000000000000000000000000000000..40d8665690c903c3f913410bf236868f0d22eb8d GIT binary patch literal 1913 zcmV-<2Zs2GP)4*RT010qNS#tmY3ljhU3ljkVnw%H_000McNliru+z1~F0}!k2*17-y02y>e zSad^gZEa<4bO1wgWnpw>WFU8GbZ8()Nlj2!fese{00!YnL_t(|+U=WbY*S?z$N%rS z^xlr$I^5QFH!co30Rcr4V+l{N2ErJDxv}-KTeq(3*86FDjvsn@TDH=ivTk(i-cRevyZ<@Q?>z7GzR&ZXmmr6> z`_~Qu3!t@cYaw#`lEq~BgMc85(ePYAki~BNOb|#h7@rLSDHh}J2Z0om@tGiyVl#d! z1PVpoNHMbto@kLPO3 zr(OTS>BhA&f8hYwRAInl<$76t*SdYO0X$KeS@*)wcwr5JCWCrT`mQi&GG6y^vBDYx zuV3nxNS=D8848-t3xGE+Cv=-jH27d+qjY^hrU%V?5aa>a{Nb#9so z`(ZBc9+1~rpl33a*I@tv8!J+A;O2Osu&2R%18CH%P@`2zzpsu?7CL~S3;-J0dOK=T z(_kPP!&P^I1LzM#5RAt1;sI)!U~5&0^!qQ@1fp}0wd8jtNC^672!s_X1(vb&j8ZR{ zE1_Z%{dk_od53#pEQRwC2XqXDU8a=#|LgW)AjB;c0JDz+d+LnPu#EKfzwLD`oMdRW z9#E%M;`y{3;Nr*x?0H`N%qaj@I0_&4V1bZ8z`o{MFydk<9E;;b_wd3?hwu8bdx1g0 z!l+`WUEkYKg@&}@@x5&%?<>D~0U#$1teD~j>&kT4W2uyW+kGLl+ufM!Hb(&D#DUbC zHmX>>wWbynm3W*u&*O{R{ou3w|Mu!qoP7FG?5r(Yr~nckpri=iZLz?hNDYaf`aS6J zWnWM1ZK#5lWzeWs7ajl)7lS99eco=+tHBcBfYpq;G$Wk;Fi!Ri&3KRBhsMz{=ELc} zk%wCk$QB68REqmAbr~8*H#fngOBv6_<2cso$hzb;X_R=!YJyH&C+JxkBjIRac>n-( z%NPdf#sk2~p5Ynid}lC%cDoy;N*1L`7K6b^;SB+~aUi{2bb0Zkb4*rWvo4V$yF(m8 zaiK5)+?6pEO0Xt^`0SPwAo9L#vS^inj0)641>lSEaLI7bu`q`t9k!fOU5n@ejz9qe zkh3E6ggG3%){Aj2E?ZN_P*QNU`wJXEca~w`jYRR@wLT1ma*N=}@t-dM%v=IE=NQ8| z$5>u9Eon8vH`kcZcBLoligCso&eHaHk+&rPO91An=@|-NZ(W7=*VjWyk%9rh%*$N_ z0dKE0V=xfG;TyI&sqYswfYy2=1VO;z4tqfvKW_le*hULB24n?rXnh^5Wr^L{e_USp zV{trQu7`rAaNaqFo{1oytuDoeay?uj4xip~-jmMO>y&u8p$c_66(|sRxfs5)jo^yg zH%#ky2cbA?`wBO4pBxXgcf&6$=6E;;CaOk=Xul?Nxj_1+Z zP=(!MAyuwcP5{hl%g(d5QGgJr#F~9D&f}MX1XYC?)mgEq2|>Vqs|gC4#^upTOh)5U zADW_2B35(K_1o10+8)E ziA9&lyTh@#lz3EYlw!Ugf*^|4h}VY2{k!H;EsW~KwL9J*^bCc)ja2}E4v!CaCW6!S zlbK_UnCp*6qTmDp&81o_7xg}nS7H;;OSb)0*Hzts8T0V z;w}-U9Ig8X!S-z}8Iz)9;lU{y%MwU8cKRdJTrYW}beB4Gy$}9aB42CLtFXDkfXiYK zlnNVZ4`>nrC$(aH>apBnrFm_s7BoeH=Xv}lel{mZYwFw29s>jcwVHbcOj;FaahxYT zz%066EY8Cz&Hy*kTs64?Ql?fSV%CSJ7b=Kcj?e!OH&phDROsOf00000NkvXXu0mjfLidEn literal 0 HcmV?d00001 diff --git a/modules/vimeo/favicon.xcf b/modules/vimeo/favicon.xcf new file mode 100644 index 0000000000000000000000000000000000000000..fb804d2abc9a33068c916d75fbab4a30218d65c5 GIT binary patch literal 6228 zcmeI0e{5S<6~~W%)OFw7rXy@)+EPx>J39kz@wGIYlTP+a3-!HZ(fd6)H{T zrh1rezk+F{J^0QQnhp;1hr8p+Sg@(m7vRv9V3~z>CgYi4JlWkF8K8YC8Xbsa<_AdY z|3)QpxhbE6FBb0W3zwgWT-Vsp+=5SATifl8&CP8aTQ|0_t$ev&7YQ838l0{Wlp7&I z2%f0;NTj-?-Ixw$WjRV z;8neOxM8>rO1mHv8O($d;h{*HdKizZc^z4>4nzM+vhR04PNtG|c!tqb67b$gIvx$) znM(8ow`RhLcz2~KG!TC<5^QR&?17SjLrV|2KRy^qaDBqbOdOWsf#8s{UWsrrwi8=1 zSQkx)qG>wZ9v+Bj3MpQjr&9%G91pT}yWC%2g^im<_LFcyb-O#ZjkXhgn04TTX~ zp~0a_!I@mZBU(V>j0^^2j#ZYX1=Rw1K``7m9pp65BSC}7mb#$< z8tf@lvg90A331G30lDNX?;VH%*z}HB9=FFztgMZ09ld#7mpsh+1PAVn7IqggKj)d0 z{Q*s(5i2rpCL=U(=7LcjXRRogpy6<5VP=iykI`TO$t>02?kqjQyN!o_t{^22A6S$k z&;|Q4G%WiTH_PH?S==m(k>&ptSz_l?JccH9AB#s1*WvKUhmKB8w(x+tUv(wQ;$wP( z%?I_DEGG0(HrA?snPrD+1Im1#>R*gGMqveexv%#-idP~ zpMHAg<=b%L{CoY+-Z(LKj|du`e0<`~9b~uc|Htv;-}F>797ETu?sq+;CLxFPDUsV} zPha1x_Dur*jJsR&epx>~z0P%1zg~spF4abPRL^VwK;pWnJ7t4vo20a`RGL+{%9SSA9JsTV8nU?|=N#Wr*hxu2x6~gvqOW7rx%L)V4;M>0CLrWxC7Mb_>dP zN^R@QsqL;x)_T>1wx#MFgcqw*IWtq)^XC1*e+c>$U$A+O{eCLf_M$Gu19#;A6w#4wCmakJGKT3xwM zrrL8(LViTRif}iBj1XBP6V0_^E`bz`oN?@oe5~ZZXeJ44r1cKrB%S-b+{AHOoc_)9 zw+u=ntWM$aQ@_5{vu$SNmXb3Y3H+_J@tZ;W6v6f(-#@Gu4VAtt;(oq z9w*sC@|x-oNWMz)Af2_)?h9nMk{tlce^2%NGKN$4MfyL9KbrC@k(qy&vGaMVeSqO~ z1aYjMXgAT_45zghRDa6Yc^}ckL>Po=B>E`PukeRez6tNa`x!dV()}4hIQ9Nnb$Sii zA*!uqIDLde`vybjx5(~eINgfi`PU_6U0^f2k-eP3e*vuwr#FHWo|on0wEH%L=PCFv zPFV*5>{l$3pVQM(-p@5MoIb>G+RqremEkn&Vs#b6>6pcEdK~)pKW|_-eHzZ}Q#Ui5 z?qWFoSG^NXaV&O}h3Qft8r~J}P9+=qlQG<49;NHu`fsq@-4*{Etc(RZtC3neR|mc+ zIP+H{4|c8&d_z_v%N4OY>$_?@LtQnUO?mh+Bo%6TB}(akv*#Ej<91B-U&-d%3@Le~cyKWr`d2*6hjeAdFZHELz+$7{2p z@tSPYcp$rB+{(6$`?Ia%zHHmLH@j)v194|JkGmiyJB}V$v^)3ia=RD0KG^tSYr#hV zzG~pJ7QU@4HVcyZkk&|(29T{=(xQH8RiCt}S2n2!;+DpSAF9$$tPENTtjG literal 0 HcmV?d00001 diff --git a/modules/vimeo/pages.py b/modules/vimeo/pages.py new file mode 100644 index 00000000..ebb28fa6 --- /dev/null +++ b/modules/vimeo/pages.py @@ -0,0 +1,136 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2010-2011 Romain Bignon +# Copyright(C) 2012 François Revol +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + +from weboob.tools.mech import ClientForm +ControlNotFoundError = ClientForm.ControlNotFoundError + +#HACK +from urllib2 import HTTPError + +from weboob.tools.browser import BasePage +from weboob.tools.browser import BrowserRetry +from weboob.tools.json import json + +from StringIO import StringIO +import re +import datetime +from dateutil.parser import parse as parse_dt + +from weboob.tools.capabilities.thumbnail import Thumbnail +from weboob.capabilities.base import NotAvailable +from weboob.tools.browser import BrokenPageError + +from .video import VimeoVideo + + + +__all__ = ['VideoPage'] + +class VideoPage(BasePage): + def get_video(self, video=None): + if video is None: + video = VimeoVideo(self.group_dict['id']) + self.set_details(video) + + video.set_empty_fields(NotAvailable) + return video + + def set_details(self, v): + # try to get as much from the page itself + obj = self.parser.select(self.document.getroot(), 'h1[itemprop=name]') + if len(obj) > 0: + v.title = unicode(obj[0].text) + + obj = self.parser.select(self.document.getroot(), 'meta[itemprop=dateCreated]') + if len(obj) > 0: + v.date = parse_dt(obj[0].attrib['content']) + + #obj = self.parser.select(self.document.getroot(), 'meta[itemprop=duration]') + + obj = self.parser.select(self.document.getroot(), 'meta[itemprop=thumbnailUrl]') + if len(obj) > 0: + v.thumbnail = Thumbnail(unicode(obj[0].attrib['content'])) + + # for the rest, use the JSON config descriptor + json_data = self.browser.openurl('http://%s/config/%s?type=%s&referrer=%s' % ("player.vimeo.com", int(v.id), "html5_desktop_local", "")) + data = json.load(json_data) + if data is None: + raise BrokenPageError('Unable to get JSON config for id: %r' % v.id) + #print data + + if v.title is None: + v.title = unicode(data['video']['title']) + if v.thumbnail is None: + v.thumbnail = Thumbnail(unicode(data['video']['thumbnail'])) + v.duration = datetime.timedelta(seconds=int(data['video']['duration'])) + + # log ourself to the site to validate the signature + log_data = self.browser.openurl('http://%s/log/client' % ("player.vimeo.com"), 'request_signature=%s&video=true&h264=probably&vp8=probably&vp6=probably&flash=null&touch=false&screen_width=1920&screen_height=1080' % (data['request']['signature'])) + + # failed attempts ahead + + # try to get the filename and url from the SMIL descriptor + # smil_url = data['video']['smil']['url'] + # smil_url += "?sig=%s&time=%s" % (data['request']['signature'], data['request']['timestamp']) + # smil = self.browser.get_document(self.browser.openurl(smil_url)) + + # obj = self.parser.select(smil.getroot(), 'meta[name=httpBase]', 1) + # http_base = obj.attrib['content'] + # print http_base + # if http_base is None: + # raise BrokenPageError('Missing tag in smil file') + + # url = None + # br = 0 + # for obj in self.parser.select(smil.getroot(), 'video'): + # print 'BR:' + obj.attrib['system-bitrate'] + ' url: ' + obj.attrib['src'] + + # if int(obj.attrib['system-bitrate']) > br : + # url = obj.attrib['src'] + + # rtmp_base = 'rtmp://' + data['request']['cdn_url'] + '/' + + # not working yet... + + #url += "&time=%s&sig=%s" % (data['request']['timestamp'], data['request']['signature']) + #url = "%s/%s/%s" %(data['request']['timestamp'], data['request']['signature'], url) + #v.url = unicode(http_base + url) + #v.url = unicode("http://" + data['request']['cdn_url'] + "/" + url) + #v.url = unicode(rtmp_base + url) + + # TODO: determine quality from data[...]['files']['h264'] + v.url = unicode("http://player.vimeo.com/play_redirect?quality=sd&codecs=h264&clip_id=%d&time=%s&sig=%s&type=html5_desktop_local" % (int(v.id), data['request']['timestamp'] , data['request']['signature'])) + + # attempt to determine the redirected URL to pass it instead + # since the target server doesn't check for User-Agent, unlike + # for the source one. + # HACK: we use mechanize directly here for now... FIXME + self.browser.set_handle_redirect(False) + #@retry(BrowserHTTPError, tries=0) + #redir = self.browser.openurl(v.url, if_fail = 'raise') + try: + redir = self.browser.open_novisit(v.url) + except HTTPError, e: + if e.getcode() == 302 and hasattr(e, 'hdrs'): + #print e.hdrs['Location'] + v.url = unicode(e.hdrs['Location']) + + self.browser.set_handle_redirect(True) + diff --git a/modules/vimeo/test.py b/modules/vimeo/test.py new file mode 100644 index 00000000..d5b67f87 --- /dev/null +++ b/modules/vimeo/test.py @@ -0,0 +1,42 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2010-2011 Romain Bignon +# Copyright(C) 2012 François Revol +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +from weboob.tools.test import BackendTest +#from weboob.capabilities.video import BaseVideo + + +class VimeoTest(BackendTest): + BACKEND = 'vimeo' + + # def test_search(self): + # l = list(self.backend.search_videos('haiku os')) + # self.assertTrue(len(l) > 0) + # v = l[0] + # self.backend.fillobj(v, ('url',)) + # self.assertTrue(v.url and v.url.startswith('http://'), 'URL for video "%s" not found: %s' % (v.id, v.url)) + # self.backend.browser.openurl(v.url) + + # def test_latest(self): + # l = list(self.backend.iter_resources([BaseVideo], [u'latest'])) + # self.assertTrue(len(l) > 0) + # v = l[0] + # self.backend.fillobj(v, ('url',)) + # self.assertTrue(v.url and v.url.startswith('http://'), 'URL for video "%s" not found: %s' % (v.id, v.url)) diff --git a/modules/vimeo/video.py b/modules/vimeo/video.py new file mode 100644 index 00000000..10dfd894 --- /dev/null +++ b/modules/vimeo/video.py @@ -0,0 +1,36 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2010-2011 Roger Philibert +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +from weboob.capabilities.video import BaseVideo + +import re + +__all__ = ['VimeoVideo'] + + +class VimeoVideo(BaseVideo): + def __init__(self, *args, **kwargs): + BaseVideo.__init__(self, *args, **kwargs) + self.ext = u'mp4' + + @classmethod + def id2url(cls, _id): + return u'http://vimeo.com/%s' % _id +