[vimeo] fix #1692 there is no latest category on vimeo.

allow to browse categories and channels
This commit is contained in:
Bezleputh 2014-12-18 15:59:52 +01:00 committed by Florent
commit c31e95403e
4 changed files with 96 additions and 19 deletions

View file

@ -18,9 +18,10 @@
# You should have received a copy of the GNU Affero General Public License # You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>. # along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.browser import PagesBrowser, URL from weboob.browser import PagesBrowser, URL
from weboob.browser.exceptions import HTTPNotFound from weboob.browser.exceptions import HTTPNotFound
from .pages import SearchPage, VideoPage, VideoJsonPage from .pages import SearchPage, VideoPage, VideoJsonPage, CategoriesPage, ChannelsPage
import urllib import urllib
@ -32,10 +33,14 @@ class VimeoBrowser(PagesBrowser):
BASEURL = 'http://vimeo.com' BASEURL = 'http://vimeo.com'
search_page = URL(r'search/page:(?P<page>.*)/sort:(?P<sortby>.*)/format:thumbnail\?type=videos&q=(?P<pattern>.*)', search_page = URL(r'search/page:(?P<page>.*)/sort:(?P<sortby>.*)/format:thumbnail\?type=videos&q=(?P<pattern>.*)',
r'channels/(?P<channel>.*)/videos/.*?',
r'categories/(?P<category>.*)/videos/.*?',
SearchPage) SearchPage)
video_url = URL(r'http://player.vimeo.com/video/(?P<_id>.*)/config', VideoJsonPage) categories_page = URL('categories', CategoriesPage)
channels_page = URL('channels', ChannelsPage)
video_url = URL(r'http://player.vimeo.com/video/(?P<_id>.*)/config', VideoJsonPage)
video_page = URL('http://vimeo.com/(?P<_id>.*)', VideoPage) video_page = URL('http://vimeo.com/(?P<_id>.*)', VideoPage)
def get_video(self, _id, video=None): def get_video(self, _id, video=None):
@ -50,7 +55,14 @@ class VimeoBrowser(PagesBrowser):
sortby=sortby, sortby=sortby,
page=1).iter_videos() page=1).iter_videos()
# def latest_videos(self): def get_categories(self):
# self.home() return self.categories_page.go().iter_categories()
# assert self.is_on_page(IndexPage)
# return self.page.iter_videos() def get_channels(self):
return self.channels_page.go().iter_channels()
def get_channel_videos(self, channel):
return self.search_page.go(channel=channel).iter_videos()
def get_category_videos(self, category):
return self.search_page.go(category=category).iter_videos()

View file

@ -21,7 +21,7 @@
from weboob.capabilities.video import CapVideo, BaseVideo from weboob.capabilities.video import CapVideo, BaseVideo
from weboob.tools.backend import Module from weboob.tools.backend import Module
from weboob.capabilities.collection import CapCollection, CollectionNotFound from weboob.capabilities.collection import CapCollection, CollectionNotFound, Collection
from .browser import VimeoBrowser from .browser import VimeoBrowser
@ -66,16 +66,30 @@ class VimeoModule(Module, CapVideo, CapCollection):
if BaseVideo in objs: if BaseVideo in objs:
collection = self.get_collection(objs, split_path) collection = self.get_collection(objs, split_path)
if collection.path_level == 0: if collection.path_level == 0:
yield self.get_collection(objs, [u'latest']) yield Collection([u'vimeo-categories'], u'Vimeo categories')
if collection.split_path == [u'latest']: yield Collection([u'vimeo-channels'], u'Vimeo channels')
for video in self.browser.latest_videos():
if collection.path_level == 1:
if collection.split_path == [u'vimeo-categories']:
for category in self.browser.get_categories():
yield category
if collection.split_path == [u'vimeo-channels']:
for channel in self.browser.get_channels():
yield channel
if collection.path_level == 2:
if collection.split_path[0] == u'vimeo-channels':
for video in self.browser.get_channel_videos(collection.split_path[1]):
yield video
if collection.split_path[0] == u'vimeo-categories':
for video in self.browser.get_category_videos(collection.split_path[1]):
yield video yield video
def validate_collection(self, objs, collection): def validate_collection(self, objs, collection):
if collection.path_level == 0: if collection.path_level == 0:
return return
if BaseVideo in objs and collection.split_path == [u'latest']: if BaseVideo in objs and (collection.split_path[0] == u'vimeo-categories' or
collection.title = u'Latest Vimeo videos' collection.split_path[0] == u'vimeo-channels'):
return return
raise CollectionNotFound(collection.split_path) raise CollectionNotFound(collection.split_path)

View file

@ -19,6 +19,7 @@
# along with weboob. If not, see <http://www.gnu.org/licenses/>. # along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.capabilities.video import BaseVideo from weboob.capabilities.video import BaseVideo
from weboob.capabilities.image import BaseImage from weboob.capabilities.image import BaseImage
from weboob.capabilities.collection import Collection
from weboob.exceptions import ParseError from weboob.exceptions import ParseError
from weboob.browser.elements import ItemElement, ListElement, method from weboob.browser.elements import ItemElement, ListElement, method
@ -95,3 +96,41 @@ class VideoJsonPage(JsonPage):
return data['request']['files'][codec][quality]['url'] return data['request']['files'][codec][quality]['url']
obj_ext = Regexp(Field('url'), '.*\.(.*?)\?.*') obj_ext = Regexp(Field('url'), '.*\.(.*?)\?.*')
class CategoriesPage(HTMLPage):
@method
class iter_categories(ListElement):
item_xpath = '//div[@class="col_large"]/section/ul/li/a'
class item(ItemElement):
klass = Collection
obj_id = CleanText('./@href')
obj_title = CleanText('./h2')
def obj_split_path(self):
split_path = ['vimeo-categories']
category = CleanText('./@href', replace=[('/categories/', '')])(self)
split_path.append(category)
return split_path
class ChannelsPage(HTMLPage):
@pagination
@method
class iter_channels(ListElement):
item_xpath = '//div[@id="browse_content"]/ol/li'
next_page = Link('//li[@class="pagination_next"]/a')
class item(ItemElement):
klass = Collection
obj_title = CleanText('div/a/div/p[@class="title"]')
obj_id = CleanText('./@id')
def obj_split_path(self):
split_path = ['vimeo-channels']
channel = CleanText('div/a/@href', replace=[('/channels/', '')])(self)
split_path.append(channel)
return split_path

View file

@ -18,6 +18,7 @@
# You should have received a copy of the GNU Affero General Public License # You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>. # along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.capabilities.video import BaseVideo
from weboob.tools.test import BackendTest from weboob.tools.test import BackendTest
import itertools import itertools
@ -32,9 +33,20 @@ class VimeoTest(BackendTest):
self.backend.fillobj(v, ('url',)) self.backend.fillobj(v, ('url',))
self.assertTrue(v.url and v.url.startswith('http://'), 'URL for video "%s" not found: %s' % (v.id, v.url)) self.assertTrue(v.url and v.url.startswith('http://'), 'URL for video "%s" not found: %s' % (v.id, v.url))
# def test_latest(self): def test_channels(self):
# l = list(self.backend.iter_resources([BaseVideo], [u'latest'])) l = list(itertools.islice(self.backend.iter_resources([BaseVideo], [u'vimeo-channels']), 0, 20))
# self.assertTrue(len(l) > 0) self.assertTrue(len(l) > 0)
# v = l[0] l1 = list(itertools.islice(self.backend.iter_resources([BaseVideo], l[0].split_path), 0, 20))
# self.backend.fillobj(v, ('url',)) self.assertTrue(len(l1) > 0)
# self.assertTrue(v.url and v.url.startswith('http://'), 'URL for video "%s" not found: %s' % (v.id, v.url)) v = l1[0]
self.backend.fillobj(v, ('url',))
self.assertTrue(v.url and v.url.startswith('http://'), 'URL for video "%s" not found: %s' % (v.id, v.url))
def test_categories(self):
l = list(itertools.islice(self.backend.iter_resources([BaseVideo], [u'vimeo-categories']), 0, 20))
self.assertTrue(len(l) > 0)
l1 = list(itertools.islice(self.backend.iter_resources([BaseVideo], l[0].split_path), 0, 20))
self.assertTrue(len(l1) > 0)
v = l1[0]
self.backend.fillobj(v, ('url',))
self.assertTrue(v.url and v.url.startswith('http://'), 'URL for video "%s" not found: %s' % (v.id, v.url))