[vimeo] fix #1082 and adapt to browser2

This commit is contained in:
Bezleputh 2014-09-02 01:37:53 +02:00
commit b74ae37679
5 changed files with 116 additions and 166 deletions

View file

@ -19,15 +19,13 @@
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.capabilities.video import CapVideo, BaseVideo
from weboob.tools.backend import BaseBackend
from weboob.capabilities.collection import CapCollection, CollectionNotFound
from .browser import VimeoBrowser
from .video import VimeoVideo
import re
__all__ = ['VimeoBackend']
@ -41,27 +39,29 @@ class VimeoBackend(BaseBackend, CapVideo, CapCollection):
LICENSE = 'AGPLv3+'
BROWSER = VimeoBrowser
def get_video(self, _id):
with self.browser:
return self.browser.get_video(_id)
SORTBY = ['relevance', 'rating', 'views', 'time']
# def search_videos(self, pattern, sortby=CapVideo.SEARCH_RELEVANCE, nsfw=False):
# with self.browser:
# return self.browser.search_videos(pattern, self.SORTBY[sortby])
def search_videos(self, pattern, sortby=CapVideo.SEARCH_RELEVANCE, nsfw=False):
return self.browser.search_videos(pattern, self.SORTBY[sortby])
def get_video(self, _id):
return self.browser.get_video(self.parse_id(_id))
def fill_video(self, video, fields):
if fields != ['thumbnail']:
# if we don't want only the thumbnail, we probably want also every fields
with self.browser:
video = self.browser.get_video(VimeoVideo.id2url(video.id), video)
video = self.browser.get_video(video.id, video)
if 'thumbnail' in fields and video.thumbnail:
with self.browser:
video.thumbnail.data = self.browser.readurl(video.thumbnail.url)
video.thumbnail.data = self.browser.open(video.thumbnail.url).content
return video
def parse_id(self, _id):
m = re.match('https?://vimeo.com/(.*)', _id)
if m:
return m.group(1)
return _id
def iter_resources(self, objs, split_path):
if BaseVideo in objs:
collection = self.get_collection(objs, split_path)
@ -79,4 +79,4 @@ class VimeoBackend(BaseBackend, CapVideo, CapCollection):
return
raise CollectionNotFound(collection.split_path)
OBJECTS = {VimeoVideo: fill_video}
OBJECTS = {BaseVideo: fill_video}

View file

@ -18,34 +18,33 @@
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.tools.browser2 import PagesBrowser, URL
from .pages import SearchPage, VideoPage, VideoJsonPage
from weboob.tools.browser import BaseBrowser
from weboob.tools.browser.decorators import id2url
#from .pages.index import IndexPage
from .pages import VideoPage
from .video import VimeoVideo
import urllib
__all__ = ['VimeoBrowser']
class VimeoBrowser(BaseBrowser):
DOMAIN = 'vimeo.com'
ENCODING = None
PAGES = {r'http://[w\.]*vimeo\.com/(?P<id>\d+).*': VideoPage,
}
class VimeoBrowser(PagesBrowser):
@id2url(VimeoVideo.id2url)
def get_video(self, url, video=None):
self.location(url)
return self.page.get_video(video)
BASEURL = 'http://vimeo.com'
# def search_videos(self, pattern, sortby):
# return None
# self.location(self.buildurl('http://vimeo.com/search%s' % q=pattern.encode('utf-8')))
# assert self.is_on_page(IndexPage)
# return self.page.iter_videos()
search_page = URL(r'search/page:(?P<page>.*)/sort:(?P<sortby>.*)/format:thumbnail\?type=videos&q=(?P<pattern>.*)',
SearchPage)
video_url = URL(r'http://player.vimeo.com/video/(?P<_id>.*)/config', VideoJsonPage)
video_page = URL('http://vimeo.com/(?P<_id>.*)', VideoPage)
def get_video(self, _id, video=None):
video = self.video_page.go(_id=_id).get_video(video)
return self.video_url.open(_id=_id).fill_url(obj=video)
def search_videos(self, pattern, sortby):
return self.search_page.go(pattern=urllib.quote_plus(pattern.encode('utf-8')),
sortby=sortby,
page=1).iter_videos()
# def latest_videos(self):
# self.home()

View file

@ -17,95 +17,82 @@
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.capabilities.video import BaseVideo
from weboob.capabilities.image import BaseImage
from weboob.tools.mech import ClientForm
ControlNotFoundError = ClientForm.ControlNotFoundError
from weboob.tools.browser import BasePage
from weboob.tools.json import json
from weboob.tools.exceptions import ParseError
from weboob.tools.browser2.elements import ItemElement, ListElement
from weboob.tools.browser2.page import HTMLPage, method, pagination, JsonPage
from weboob.tools.browser2.filters import Attr, Regexp, Link, Env, CleanText, DateTime, Duration, Field
import re
import datetime
from dateutil.parser import parse as parse_dt
from weboob.capabilities.base import NotAvailable
from weboob.capabilities.image import BaseImage
from weboob.tools.browser import BrokenPageError
from .video import VimeoVideo
__all__ = ['VideoPage', 'SearchPage', 'VideoJsonPage']
__all__ = ['VideoPage']
class VimeoDuration(Duration):
regexp = re.compile(r'(?P<hh>\d+)H(?P<mm>\d+)M(?P<ss>\d+)S')
class VideoPage(BasePage):
def get_video(self, video=None):
if video is None:
video = VimeoVideo(self.group_dict['id'])
self.set_details(video)
class SearchPage(HTMLPage):
@pagination
@method
class iter_videos(ListElement):
item_xpath = '//div[@id="browse_content"]/ol/li'
video.set_empty_fields(NotAvailable)
return video
next_page = Link(u'//a[text()="Next"]')
def set_details(self, v):
# try to get as much from the page itself
obj = self.parser.select(self.document.getroot(), 'h1[itemprop=name]')
if len(obj) > 0:
v.title = unicode(obj[0].text)
class item(ItemElement):
klass = BaseVideo
obj = self.parser.select(self.document.getroot(), 'meta[itemprop=dateCreated]')
if len(obj) > 0:
v.date = parse_dt(obj[0].attrib['content'])
obj_id = Regexp(Attr('.', 'id'), 'clip_(.*)')
obj_title = Attr('./a', 'title')
#obj = self.parser.select(self.document.getroot(), 'meta[itemprop=duration]')
def obj_thumbnail(self):
thumbnail = BaseImage(self.xpath('./a/img')[0].attrib['src'])
thumbnail.url = thumbnail.id
return thumbnail
obj = self.parser.select(self.document.getroot(), 'meta[itemprop=thumbnailUrl]')
if len(obj) > 0:
v.thumbnail = BaseImage(obj[0].attrib['content'])
v.thumbnail.url = v.thumbnail.id
data = None
class VideoPage(HTMLPage):
@method
class get_video(ItemElement):
klass = BaseVideo
# First try to find the JSON data in the page itself.
# it's the only location in case the video is not allowed to be embeded
for script in self.parser.select(self.document.getroot(), 'script'):
m = re.match('.* = {config:({.*}),assets:.*', unicode(script.text), re.DOTALL)
if m:
data = json.loads(m.group(1))
break
_balise = lambda x: '//div[@itemprop="video"]/meta[@itemprop="%s"]/@content' % x
# Else fall back to the API
if data is None:
# for the rest, use the JSON config descriptor
json_data = self.browser.openurl('http://%s/video/%s/config?type=%s&referrer=%s' % ("player.vimeo.com", int(v.id), "html5_desktop_local", ""))
data = json.load(json_data)
obj_id = Env('_id')
obj_title = CleanText(_balise('name'))
obj_date = DateTime(CleanText(_balise('dateCreated')))
obj_duration = VimeoDuration(CleanText(_balise('duration')))
obj_description = CleanText(_balise('description'))
obj_author = CleanText('//div[@itemprop="author"]/meta[@itemprop="name"]/@content')
if data is None:
raise BrokenPageError('Unable to get JSON config for id: %r' % int(v.id))
def obj_thumbnail(self):
thumbnail = BaseImage(CleanText('//div[@itemprop="video"]/span[@itemprop="thumbnail"]/link/@href')(self.el))
thumbnail.url = thumbnail.id
return thumbnail
if v.title is None:
v.title = unicode(data['video']['title'])
if v.thumbnail is None:
v.thumbnail = BaseImage(data['video']['thumbnail'])
v.thumbnail.url = v.thumbnail.id
v.author = data['video']['owner']['name']
v.duration = datetime.timedelta(seconds=int(data['video']['duration']))
# determine available codec and quality
# use highest quality possible
quality = 'sd'
codec = None
if 'vp6' in data['request']['files']:
codec = 'vp6'
if 'vp8' in data['request']['files']:
codec = 'vp8'
if 'h264' in data['request']['files']:
codec = 'h264'
if not codec:
raise BrokenPageError('Unable to detect available codec for id: %r' % int(v.id))
class VideoJsonPage(JsonPage):
@method
class fill_url(ItemElement):
klass = BaseVideo
if 'hd' in data['request']['files'][codec]:
quality = 'hd'
def obj_url(self):
quality = 'sd'
codec = None
data = self.el
if 'vp6' in data['request']['files']:
codec = 'vp6'
if 'vp8' in data['request']['files']:
codec = 'vp8'
if 'h264' in data['request']['files']:
codec = 'h264'
if not codec:
raise ParseError('Unable to detect available codec for id: %r' % int(Field('id')(self)))
if 'hd' in data['request']['files'][codec]:
quality = 'hd'
return data['request']['files'][codec][quality]['url']
v.url = data['request']['files'][codec][quality]['url']
return v
obj_ext = Regexp(Field('url'), '.*\.(.*?)\?.*')

View file

@ -18,21 +18,19 @@
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.tools.test import BackendTest
#from weboob.capabilities.video import BaseVideo
import itertools
class VimeoTest(BackendTest):
BACKEND = 'vimeo'
# def test_search(self):
# l = list(self.backend.search_videos('haiku os'))
# self.assertTrue(len(l) > 0)
# v = l[0]
# self.backend.fillobj(v, ('url',))
# self.assertTrue(v.url and v.url.startswith('http://'), 'URL for video "%s" not found: %s' % (v.id, v.url))
# self.backend.browser.openurl(v.url)
def test_search(self):
l = list(itertools.islice(self.backend.search_videos('boobs'), 0, 20))
self.assertTrue(len(l) > 0)
v = l[0]
self.backend.fillobj(v, ('url',))
self.assertTrue(v.url and v.url.startswith('http://'), 'URL for video "%s" not found: %s' % (v.id, v.url))
# def test_latest(self):
# l = list(self.backend.iter_resources([BaseVideo], [u'latest']))

View file

@ -1,34 +0,0 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2010-2011 Roger Philibert
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.capabilities.video import BaseVideo
__all__ = ['VimeoVideo']
class VimeoVideo(BaseVideo):
def __init__(self, *args, **kwargs):
BaseVideo.__init__(self, *args, **kwargs)
self.ext = u'mp4'
@classmethod
def id2url(cls, _id):
return u'http://vimeo.com/%s' % _id