[vimeo] fix #1082 and adapt to browser2

This commit is contained in:
Bezleputh 2014-09-02 01:37:53 +02:00
commit b74ae37679
5 changed files with 116 additions and 166 deletions

View file

@ -19,15 +19,13 @@
# along with weboob. If not, see <http://www.gnu.org/licenses/>. # along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.capabilities.video import CapVideo, BaseVideo from weboob.capabilities.video import CapVideo, BaseVideo
from weboob.tools.backend import BaseBackend from weboob.tools.backend import BaseBackend
from weboob.capabilities.collection import CapCollection, CollectionNotFound from weboob.capabilities.collection import CapCollection, CollectionNotFound
from .browser import VimeoBrowser from .browser import VimeoBrowser
from .video import VimeoVideo
import re
__all__ = ['VimeoBackend'] __all__ = ['VimeoBackend']
@ -41,27 +39,29 @@ class VimeoBackend(BaseBackend, CapVideo, CapCollection):
LICENSE = 'AGPLv3+' LICENSE = 'AGPLv3+'
BROWSER = VimeoBrowser BROWSER = VimeoBrowser
def get_video(self, _id):
with self.browser:
return self.browser.get_video(_id)
SORTBY = ['relevance', 'rating', 'views', 'time'] SORTBY = ['relevance', 'rating', 'views', 'time']
# def search_videos(self, pattern, sortby=CapVideo.SEARCH_RELEVANCE, nsfw=False): def search_videos(self, pattern, sortby=CapVideo.SEARCH_RELEVANCE, nsfw=False):
# with self.browser: return self.browser.search_videos(pattern, self.SORTBY[sortby])
# return self.browser.search_videos(pattern, self.SORTBY[sortby])
def get_video(self, _id):
return self.browser.get_video(self.parse_id(_id))
def fill_video(self, video, fields): def fill_video(self, video, fields):
if fields != ['thumbnail']: if fields != ['thumbnail']:
# if we don't want only the thumbnail, we probably want also every fields # if we don't want only the thumbnail, we probably want also every fields
with self.browser: video = self.browser.get_video(video.id, video)
video = self.browser.get_video(VimeoVideo.id2url(video.id), video)
if 'thumbnail' in fields and video.thumbnail: if 'thumbnail' in fields and video.thumbnail:
with self.browser: video.thumbnail.data = self.browser.open(video.thumbnail.url).content
video.thumbnail.data = self.browser.readurl(video.thumbnail.url)
return video return video
def parse_id(self, _id):
m = re.match('https?://vimeo.com/(.*)', _id)
if m:
return m.group(1)
return _id
def iter_resources(self, objs, split_path): def iter_resources(self, objs, split_path):
if BaseVideo in objs: if BaseVideo in objs:
collection = self.get_collection(objs, split_path) collection = self.get_collection(objs, split_path)
@ -79,4 +79,4 @@ class VimeoBackend(BaseBackend, CapVideo, CapCollection):
return return
raise CollectionNotFound(collection.split_path) raise CollectionNotFound(collection.split_path)
OBJECTS = {VimeoVideo: fill_video} OBJECTS = {BaseVideo: fill_video}

View file

@ -18,34 +18,33 @@
# You should have received a copy of the GNU Affero General Public License # You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>. # along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.tools.browser2 import PagesBrowser, URL
from .pages import SearchPage, VideoPage, VideoJsonPage
from weboob.tools.browser import BaseBrowser import urllib
from weboob.tools.browser.decorators import id2url
#from .pages.index import IndexPage
from .pages import VideoPage
from .video import VimeoVideo
__all__ = ['VimeoBrowser'] __all__ = ['VimeoBrowser']
class VimeoBrowser(BaseBrowser): class VimeoBrowser(PagesBrowser):
DOMAIN = 'vimeo.com'
ENCODING = None
PAGES = {r'http://[w\.]*vimeo\.com/(?P<id>\d+).*': VideoPage,
}
@id2url(VimeoVideo.id2url) BASEURL = 'http://vimeo.com'
def get_video(self, url, video=None):
self.location(url)
return self.page.get_video(video)
# def search_videos(self, pattern, sortby): search_page = URL(r'search/page:(?P<page>.*)/sort:(?P<sortby>.*)/format:thumbnail\?type=videos&q=(?P<pattern>.*)',
# return None SearchPage)
# self.location(self.buildurl('http://vimeo.com/search%s' % q=pattern.encode('utf-8')))
# assert self.is_on_page(IndexPage) video_url = URL(r'http://player.vimeo.com/video/(?P<_id>.*)/config', VideoJsonPage)
# return self.page.iter_videos()
video_page = URL('http://vimeo.com/(?P<_id>.*)', VideoPage)
def get_video(self, _id, video=None):
video = self.video_page.go(_id=_id).get_video(video)
return self.video_url.open(_id=_id).fill_url(obj=video)
def search_videos(self, pattern, sortby):
return self.search_page.go(pattern=urllib.quote_plus(pattern.encode('utf-8')),
sortby=sortby,
page=1).iter_videos()
# def latest_videos(self): # def latest_videos(self):
# self.home() # self.home()

View file

@ -17,95 +17,82 @@
# #
# You should have received a copy of the GNU Affero General Public License # You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>. # along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.capabilities.video import BaseVideo
from weboob.capabilities.image import BaseImage
from weboob.tools.mech import ClientForm from weboob.tools.exceptions import ParseError
ControlNotFoundError = ClientForm.ControlNotFoundError from weboob.tools.browser2.elements import ItemElement, ListElement
from weboob.tools.browser2.page import HTMLPage, method, pagination, JsonPage
from weboob.tools.browser import BasePage from weboob.tools.browser2.filters import Attr, Regexp, Link, Env, CleanText, DateTime, Duration, Field
from weboob.tools.json import json
import re import re
import datetime
from dateutil.parser import parse as parse_dt
from weboob.capabilities.base import NotAvailable __all__ = ['VideoPage', 'SearchPage', 'VideoJsonPage']
from weboob.capabilities.image import BaseImage
from weboob.tools.browser import BrokenPageError
from .video import VimeoVideo
__all__ = ['VideoPage'] class VimeoDuration(Duration):
regexp = re.compile(r'(?P<hh>\d+)H(?P<mm>\d+)M(?P<ss>\d+)S')
class VideoPage(BasePage): class SearchPage(HTMLPage):
def get_video(self, video=None): @pagination
if video is None: @method
video = VimeoVideo(self.group_dict['id']) class iter_videos(ListElement):
self.set_details(video) item_xpath = '//div[@id="browse_content"]/ol/li'
video.set_empty_fields(NotAvailable) next_page = Link(u'//a[text()="Next"]')
return video
def set_details(self, v): class item(ItemElement):
# try to get as much from the page itself klass = BaseVideo
obj = self.parser.select(self.document.getroot(), 'h1[itemprop=name]')
if len(obj) > 0:
v.title = unicode(obj[0].text)
obj = self.parser.select(self.document.getroot(), 'meta[itemprop=dateCreated]') obj_id = Regexp(Attr('.', 'id'), 'clip_(.*)')
if len(obj) > 0: obj_title = Attr('./a', 'title')
v.date = parse_dt(obj[0].attrib['content'])
#obj = self.parser.select(self.document.getroot(), 'meta[itemprop=duration]') def obj_thumbnail(self):
thumbnail = BaseImage(self.xpath('./a/img')[0].attrib['src'])
thumbnail.url = thumbnail.id
return thumbnail
obj = self.parser.select(self.document.getroot(), 'meta[itemprop=thumbnailUrl]')
if len(obj) > 0:
v.thumbnail = BaseImage(obj[0].attrib['content'])
v.thumbnail.url = v.thumbnail.id
data = None class VideoPage(HTMLPage):
@method
class get_video(ItemElement):
klass = BaseVideo
# First try to find the JSON data in the page itself. _balise = lambda x: '//div[@itemprop="video"]/meta[@itemprop="%s"]/@content' % x
# it's the only location in case the video is not allowed to be embeded
for script in self.parser.select(self.document.getroot(), 'script'):
m = re.match('.* = {config:({.*}),assets:.*', unicode(script.text), re.DOTALL)
if m:
data = json.loads(m.group(1))
break
# Else fall back to the API obj_id = Env('_id')
if data is None: obj_title = CleanText(_balise('name'))
# for the rest, use the JSON config descriptor obj_date = DateTime(CleanText(_balise('dateCreated')))
json_data = self.browser.openurl('http://%s/video/%s/config?type=%s&referrer=%s' % ("player.vimeo.com", int(v.id), "html5_desktop_local", "")) obj_duration = VimeoDuration(CleanText(_balise('duration')))
data = json.load(json_data) obj_description = CleanText(_balise('description'))
obj_author = CleanText('//div[@itemprop="author"]/meta[@itemprop="name"]/@content')
if data is None: def obj_thumbnail(self):
raise BrokenPageError('Unable to get JSON config for id: %r' % int(v.id)) thumbnail = BaseImage(CleanText('//div[@itemprop="video"]/span[@itemprop="thumbnail"]/link/@href')(self.el))
thumbnail.url = thumbnail.id
return thumbnail
if v.title is None:
v.title = unicode(data['video']['title'])
if v.thumbnail is None:
v.thumbnail = BaseImage(data['video']['thumbnail'])
v.thumbnail.url = v.thumbnail.id
v.author = data['video']['owner']['name']
v.duration = datetime.timedelta(seconds=int(data['video']['duration']))
# determine available codec and quality class VideoJsonPage(JsonPage):
# use highest quality possible @method
quality = 'sd' class fill_url(ItemElement):
codec = None klass = BaseVideo
if 'vp6' in data['request']['files']:
codec = 'vp6'
if 'vp8' in data['request']['files']:
codec = 'vp8'
if 'h264' in data['request']['files']:
codec = 'h264'
if not codec:
raise BrokenPageError('Unable to detect available codec for id: %r' % int(v.id))
if 'hd' in data['request']['files'][codec]: def obj_url(self):
quality = 'hd' quality = 'sd'
codec = None
data = self.el
if 'vp6' in data['request']['files']:
codec = 'vp6'
if 'vp8' in data['request']['files']:
codec = 'vp8'
if 'h264' in data['request']['files']:
codec = 'h264'
if not codec:
raise ParseError('Unable to detect available codec for id: %r' % int(Field('id')(self)))
if 'hd' in data['request']['files'][codec]:
quality = 'hd'
return data['request']['files'][codec][quality]['url']
v.url = data['request']['files'][codec][quality]['url'] obj_ext = Regexp(Field('url'), '.*\.(.*?)\?.*')
return v

View file

@ -18,21 +18,19 @@
# You should have received a copy of the GNU Affero General Public License # You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>. # along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.tools.test import BackendTest from weboob.tools.test import BackendTest
#from weboob.capabilities.video import BaseVideo import itertools
class VimeoTest(BackendTest): class VimeoTest(BackendTest):
BACKEND = 'vimeo' BACKEND = 'vimeo'
# def test_search(self): def test_search(self):
# l = list(self.backend.search_videos('haiku os')) l = list(itertools.islice(self.backend.search_videos('boobs'), 0, 20))
# self.assertTrue(len(l) > 0) self.assertTrue(len(l) > 0)
# v = l[0] v = l[0]
# self.backend.fillobj(v, ('url',)) self.backend.fillobj(v, ('url',))
# self.assertTrue(v.url and v.url.startswith('http://'), 'URL for video "%s" not found: %s' % (v.id, v.url)) self.assertTrue(v.url and v.url.startswith('http://'), 'URL for video "%s" not found: %s' % (v.id, v.url))
# self.backend.browser.openurl(v.url)
# def test_latest(self): # def test_latest(self):
# l = list(self.backend.iter_resources([BaseVideo], [u'latest'])) # l = list(self.backend.iter_resources([BaseVideo], [u'latest']))

View file

@ -1,34 +0,0 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2010-2011 Roger Philibert
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.capabilities.video import BaseVideo
__all__ = ['VimeoVideo']
class VimeoVideo(BaseVideo):
def __init__(self, *args, **kwargs):
BaseVideo.__init__(self, *args, **kwargs)
self.ext = u'mp4'
@classmethod
def id2url(cls, _id):
return u'http://vimeo.com/%s' % _id