diff --git a/modules/vimeo/__init__.py b/modules/vimeo/__init__.py new file mode 100644 index 00000000..18aef779 --- /dev/null +++ b/modules/vimeo/__init__.py @@ -0,0 +1,3 @@ +from .backend import VimeoBackend + +__all__ = ['VimeoBackend'] diff --git a/modules/vimeo/backend.py b/modules/vimeo/backend.py new file mode 100644 index 00000000..5c1de8e2 --- /dev/null +++ b/modules/vimeo/backend.py @@ -0,0 +1,82 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2010-2011 Romain Bignon +# Copyright(C) 2012 François Revol +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +from __future__ import with_statement + +from weboob.capabilities.video import ICapVideo, BaseVideo +from weboob.tools.backend import BaseBackend +from weboob.capabilities.collection import ICapCollection, CollectionNotFound + +from .browser import VimeoBrowser +from .video import VimeoVideo + + +__all__ = ['VimeoBackend'] + + +class VimeoBackend(BaseBackend, ICapVideo, ICapCollection): + NAME = 'vimeo' + MAINTAINER = u'François Revol' + EMAIL = 'revol@free.fr' + VERSION = '0.d' + DESCRIPTION = 'Vimeo video streaming website' + LICENSE = 'AGPLv3+' + BROWSER = VimeoBrowser + + def get_video(self, _id): + with self.browser: + return self.browser.get_video(_id) + + SORTBY = ['relevance', 'rating', 'views', 'time'] + + # def search_videos(self, pattern, sortby=ICapVideo.SEARCH_RELEVANCE, nsfw=False, max_results=None): + # with self.browser: + # return self.browser.search_videos(pattern, self.SORTBY[sortby]) + + def fill_video(self, video, fields): + if fields != ['thumbnail']: + # if we don't want only the thumbnail, we probably want also every fields + with self.browser: + video = self.browser.get_video(VimeoVideo.id2url(video.id), video) + if 'thumbnail' in fields and video.thumbnail: + with self.browser: + video.thumbnail.data = self.browser.readurl(video.thumbnail.url) + + return video + + def iter_resources(self, objs, split_path): + if BaseVideo in objs: + collection = self.get_collection(objs, split_path) + if collection.path_level == 0: + yield self.get_collection(objs, [u'latest']) + if collection.split_path == [u'latest']: + for video in self.browser.latest_videos(): + yield video + + def validate_collection(self, objs, collection): + if collection.path_level == 0: + return + if BaseVideo in objs and collection.split_path == [u'latest']: + collection.title = u'Latest Vimeo videos' + return + raise CollectionNotFound(collection.split_path) + + OBJECTS = {VimeoVideo: fill_video} diff --git a/modules/vimeo/browser.py b/modules/vimeo/browser.py new file mode 100644 index 00000000..36b0fd50 --- /dev/null +++ b/modules/vimeo/browser.py @@ -0,0 +1,57 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2010-2011 Romain Bignon +# Copyright(C) 2012 François Revol +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +from weboob.tools.browser import BaseBrowser +from weboob.tools.browser.decorators import id2url + +#from .pages.index import IndexPage +from .pages import VideoPage +from .video import VimeoVideo + + +__all__ = ['VimeoBrowser'] + + +class VimeoBrowser(BaseBrowser): + DOMAIN = 'vimeo.com' + ENCODING = None + # USER_AGENT = BaseBrowser.USER_AGENTS['wget'] + # TODO: determine this dynamically, like: + # wget -d 127.0.0.1 -O /dev/null 2>&1 | grep '^User-Agent:' + #USER_AGENT = 'Wget/1.14 (linux-gnu)' + PAGES = {r'http://[w\.]*vimeo\.com/(?P\d+).*': VideoPage, + } + + @id2url(VimeoVideo.id2url) + def get_video(self, url, video=None): + self.location(url) + return self.page.get_video(video) + + # def search_videos(self, pattern, sortby): + # return None + # self.location(self.buildurl('http://vimeo.com/search%s' % q=pattern.encode('utf-8'))) + # assert self.is_on_page(IndexPage) + # return self.page.iter_videos() + + # def latest_videos(self): + # self.home() + # assert self.is_on_page(IndexPage) + # return self.page.iter_videos() diff --git a/modules/vimeo/favicon.png b/modules/vimeo/favicon.png new file mode 100644 index 00000000..40d86656 Binary files /dev/null and b/modules/vimeo/favicon.png differ diff --git a/modules/vimeo/favicon.xcf b/modules/vimeo/favicon.xcf new file mode 100644 index 00000000..fb804d2a Binary files /dev/null and b/modules/vimeo/favicon.xcf differ diff --git a/modules/vimeo/pages.py b/modules/vimeo/pages.py new file mode 100644 index 00000000..ebb28fa6 --- /dev/null +++ b/modules/vimeo/pages.py @@ -0,0 +1,136 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2010-2011 Romain Bignon +# Copyright(C) 2012 François Revol +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + +from weboob.tools.mech import ClientForm +ControlNotFoundError = ClientForm.ControlNotFoundError + +#HACK +from urllib2 import HTTPError + +from weboob.tools.browser import BasePage +from weboob.tools.browser import BrowserRetry +from weboob.tools.json import json + +from StringIO import StringIO +import re +import datetime +from dateutil.parser import parse as parse_dt + +from weboob.tools.capabilities.thumbnail import Thumbnail +from weboob.capabilities.base import NotAvailable +from weboob.tools.browser import BrokenPageError + +from .video import VimeoVideo + + + +__all__ = ['VideoPage'] + +class VideoPage(BasePage): + def get_video(self, video=None): + if video is None: + video = VimeoVideo(self.group_dict['id']) + self.set_details(video) + + video.set_empty_fields(NotAvailable) + return video + + def set_details(self, v): + # try to get as much from the page itself + obj = self.parser.select(self.document.getroot(), 'h1[itemprop=name]') + if len(obj) > 0: + v.title = unicode(obj[0].text) + + obj = self.parser.select(self.document.getroot(), 'meta[itemprop=dateCreated]') + if len(obj) > 0: + v.date = parse_dt(obj[0].attrib['content']) + + #obj = self.parser.select(self.document.getroot(), 'meta[itemprop=duration]') + + obj = self.parser.select(self.document.getroot(), 'meta[itemprop=thumbnailUrl]') + if len(obj) > 0: + v.thumbnail = Thumbnail(unicode(obj[0].attrib['content'])) + + # for the rest, use the JSON config descriptor + json_data = self.browser.openurl('http://%s/config/%s?type=%s&referrer=%s' % ("player.vimeo.com", int(v.id), "html5_desktop_local", "")) + data = json.load(json_data) + if data is None: + raise BrokenPageError('Unable to get JSON config for id: %r' % v.id) + #print data + + if v.title is None: + v.title = unicode(data['video']['title']) + if v.thumbnail is None: + v.thumbnail = Thumbnail(unicode(data['video']['thumbnail'])) + v.duration = datetime.timedelta(seconds=int(data['video']['duration'])) + + # log ourself to the site to validate the signature + log_data = self.browser.openurl('http://%s/log/client' % ("player.vimeo.com"), 'request_signature=%s&video=true&h264=probably&vp8=probably&vp6=probably&flash=null&touch=false&screen_width=1920&screen_height=1080' % (data['request']['signature'])) + + # failed attempts ahead + + # try to get the filename and url from the SMIL descriptor + # smil_url = data['video']['smil']['url'] + # smil_url += "?sig=%s&time=%s" % (data['request']['signature'], data['request']['timestamp']) + # smil = self.browser.get_document(self.browser.openurl(smil_url)) + + # obj = self.parser.select(smil.getroot(), 'meta[name=httpBase]', 1) + # http_base = obj.attrib['content'] + # print http_base + # if http_base is None: + # raise BrokenPageError('Missing tag in smil file') + + # url = None + # br = 0 + # for obj in self.parser.select(smil.getroot(), 'video'): + # print 'BR:' + obj.attrib['system-bitrate'] + ' url: ' + obj.attrib['src'] + + # if int(obj.attrib['system-bitrate']) > br : + # url = obj.attrib['src'] + + # rtmp_base = 'rtmp://' + data['request']['cdn_url'] + '/' + + # not working yet... + + #url += "&time=%s&sig=%s" % (data['request']['timestamp'], data['request']['signature']) + #url = "%s/%s/%s" %(data['request']['timestamp'], data['request']['signature'], url) + #v.url = unicode(http_base + url) + #v.url = unicode("http://" + data['request']['cdn_url'] + "/" + url) + #v.url = unicode(rtmp_base + url) + + # TODO: determine quality from data[...]['files']['h264'] + v.url = unicode("http://player.vimeo.com/play_redirect?quality=sd&codecs=h264&clip_id=%d&time=%s&sig=%s&type=html5_desktop_local" % (int(v.id), data['request']['timestamp'] , data['request']['signature'])) + + # attempt to determine the redirected URL to pass it instead + # since the target server doesn't check for User-Agent, unlike + # for the source one. + # HACK: we use mechanize directly here for now... FIXME + self.browser.set_handle_redirect(False) + #@retry(BrowserHTTPError, tries=0) + #redir = self.browser.openurl(v.url, if_fail = 'raise') + try: + redir = self.browser.open_novisit(v.url) + except HTTPError, e: + if e.getcode() == 302 and hasattr(e, 'hdrs'): + #print e.hdrs['Location'] + v.url = unicode(e.hdrs['Location']) + + self.browser.set_handle_redirect(True) + diff --git a/modules/vimeo/test.py b/modules/vimeo/test.py new file mode 100644 index 00000000..d5b67f87 --- /dev/null +++ b/modules/vimeo/test.py @@ -0,0 +1,42 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2010-2011 Romain Bignon +# Copyright(C) 2012 François Revol +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +from weboob.tools.test import BackendTest +#from weboob.capabilities.video import BaseVideo + + +class VimeoTest(BackendTest): + BACKEND = 'vimeo' + + # def test_search(self): + # l = list(self.backend.search_videos('haiku os')) + # self.assertTrue(len(l) > 0) + # v = l[0] + # self.backend.fillobj(v, ('url',)) + # self.assertTrue(v.url and v.url.startswith('http://'), 'URL for video "%s" not found: %s' % (v.id, v.url)) + # self.backend.browser.openurl(v.url) + + # def test_latest(self): + # l = list(self.backend.iter_resources([BaseVideo], [u'latest'])) + # self.assertTrue(len(l) > 0) + # v = l[0] + # self.backend.fillobj(v, ('url',)) + # self.assertTrue(v.url and v.url.startswith('http://'), 'URL for video "%s" not found: %s' % (v.id, v.url)) diff --git a/modules/vimeo/video.py b/modules/vimeo/video.py new file mode 100644 index 00000000..10dfd894 --- /dev/null +++ b/modules/vimeo/video.py @@ -0,0 +1,36 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2010-2011 Roger Philibert +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +from weboob.capabilities.video import BaseVideo + +import re + +__all__ = ['VimeoVideo'] + + +class VimeoVideo(BaseVideo): + def __init__(self, *args, **kwargs): + BaseVideo.__init__(self, *args, **kwargs) + self.ext = u'mp4' + + @classmethod + def id2url(cls, _id): + return u'http://vimeo.com/%s' % _id +