[vimeo] fix #1692 there is no latest category on vimeo.

allow to browse categories and channels
This commit is contained in:
Bezleputh 2014-12-18 15:59:52 +01:00 committed by Florent
commit c31e95403e
4 changed files with 96 additions and 19 deletions

View file

@ -18,9 +18,10 @@
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.browser import PagesBrowser, URL
from weboob.browser.exceptions import HTTPNotFound
from .pages import SearchPage, VideoPage, VideoJsonPage
from .pages import SearchPage, VideoPage, VideoJsonPage, CategoriesPage, ChannelsPage
import urllib
@ -32,10 +33,14 @@ class VimeoBrowser(PagesBrowser):
BASEURL = 'http://vimeo.com'
search_page = URL(r'search/page:(?P<page>.*)/sort:(?P<sortby>.*)/format:thumbnail\?type=videos&q=(?P<pattern>.*)',
r'channels/(?P<channel>.*)/videos/.*?',
r'categories/(?P<category>.*)/videos/.*?',
SearchPage)
video_url = URL(r'http://player.vimeo.com/video/(?P<_id>.*)/config', VideoJsonPage)
categories_page = URL('categories', CategoriesPage)
channels_page = URL('channels', ChannelsPage)
video_url = URL(r'http://player.vimeo.com/video/(?P<_id>.*)/config', VideoJsonPage)
video_page = URL('http://vimeo.com/(?P<_id>.*)', VideoPage)
def get_video(self, _id, video=None):
@ -50,7 +55,14 @@ class VimeoBrowser(PagesBrowser):
sortby=sortby,
page=1).iter_videos()
# def latest_videos(self):
# self.home()
# assert self.is_on_page(IndexPage)
# return self.page.iter_videos()
def get_categories(self):
return self.categories_page.go().iter_categories()
def get_channels(self):
return self.channels_page.go().iter_channels()
def get_channel_videos(self, channel):
return self.search_page.go(channel=channel).iter_videos()
def get_category_videos(self, category):
return self.search_page.go(category=category).iter_videos()

View file

@ -21,7 +21,7 @@
from weboob.capabilities.video import CapVideo, BaseVideo
from weboob.tools.backend import Module
from weboob.capabilities.collection import CapCollection, CollectionNotFound
from weboob.capabilities.collection import CapCollection, CollectionNotFound, Collection
from .browser import VimeoBrowser
@ -66,16 +66,30 @@ class VimeoModule(Module, CapVideo, CapCollection):
if BaseVideo in objs:
collection = self.get_collection(objs, split_path)
if collection.path_level == 0:
yield self.get_collection(objs, [u'latest'])
if collection.split_path == [u'latest']:
for video in self.browser.latest_videos():
yield video
yield Collection([u'vimeo-categories'], u'Vimeo categories')
yield Collection([u'vimeo-channels'], u'Vimeo channels')
if collection.path_level == 1:
if collection.split_path == [u'vimeo-categories']:
for category in self.browser.get_categories():
yield category
if collection.split_path == [u'vimeo-channels']:
for channel in self.browser.get_channels():
yield channel
if collection.path_level == 2:
if collection.split_path[0] == u'vimeo-channels':
for video in self.browser.get_channel_videos(collection.split_path[1]):
yield video
if collection.split_path[0] == u'vimeo-categories':
for video in self.browser.get_category_videos(collection.split_path[1]):
yield video
def validate_collection(self, objs, collection):
if collection.path_level == 0:
return
if BaseVideo in objs and collection.split_path == [u'latest']:
collection.title = u'Latest Vimeo videos'
if BaseVideo in objs and (collection.split_path[0] == u'vimeo-categories' or
collection.split_path[0] == u'vimeo-channels'):
return
raise CollectionNotFound(collection.split_path)

View file

@ -19,6 +19,7 @@
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.capabilities.video import BaseVideo
from weboob.capabilities.image import BaseImage
from weboob.capabilities.collection import Collection
from weboob.exceptions import ParseError
from weboob.browser.elements import ItemElement, ListElement, method
@ -95,3 +96,41 @@ class VideoJsonPage(JsonPage):
return data['request']['files'][codec][quality]['url']
obj_ext = Regexp(Field('url'), '.*\.(.*?)\?.*')
class CategoriesPage(HTMLPage):
@method
class iter_categories(ListElement):
item_xpath = '//div[@class="col_large"]/section/ul/li/a'
class item(ItemElement):
klass = Collection
obj_id = CleanText('./@href')
obj_title = CleanText('./h2')
def obj_split_path(self):
split_path = ['vimeo-categories']
category = CleanText('./@href', replace=[('/categories/', '')])(self)
split_path.append(category)
return split_path
class ChannelsPage(HTMLPage):
@pagination
@method
class iter_channels(ListElement):
item_xpath = '//div[@id="browse_content"]/ol/li'
next_page = Link('//li[@class="pagination_next"]/a')
class item(ItemElement):
klass = Collection
obj_title = CleanText('div/a/div/p[@class="title"]')
obj_id = CleanText('./@id')
def obj_split_path(self):
split_path = ['vimeo-channels']
channel = CleanText('div/a/@href', replace=[('/channels/', '')])(self)
split_path.append(channel)
return split_path

View file

@ -18,6 +18,7 @@
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.capabilities.video import BaseVideo
from weboob.tools.test import BackendTest
import itertools
@ -32,9 +33,20 @@ class VimeoTest(BackendTest):
self.backend.fillobj(v, ('url',))
self.assertTrue(v.url and v.url.startswith('http://'), 'URL for video "%s" not found: %s' % (v.id, v.url))
# def test_latest(self):
# l = list(self.backend.iter_resources([BaseVideo], [u'latest']))
# self.assertTrue(len(l) > 0)
# v = l[0]
# self.backend.fillobj(v, ('url',))
# self.assertTrue(v.url and v.url.startswith('http://'), 'URL for video "%s" not found: %s' % (v.id, v.url))
def test_channels(self):
l = list(itertools.islice(self.backend.iter_resources([BaseVideo], [u'vimeo-channels']), 0, 20))
self.assertTrue(len(l) > 0)
l1 = list(itertools.islice(self.backend.iter_resources([BaseVideo], l[0].split_path), 0, 20))
self.assertTrue(len(l1) > 0)
v = l1[0]
self.backend.fillobj(v, ('url',))
self.assertTrue(v.url and v.url.startswith('http://'), 'URL for video "%s" not found: %s' % (v.id, v.url))
def test_categories(self):
l = list(itertools.islice(self.backend.iter_resources([BaseVideo], [u'vimeo-categories']), 0, 20))
self.assertTrue(len(l) > 0)
l1 = list(itertools.islice(self.backend.iter_resources([BaseVideo], l[0].split_path), 0, 20))
self.assertTrue(len(l1) > 0)
v = l1[0]
self.backend.fillobj(v, ('url',))
self.assertTrue(v.url and v.url.startswith('http://'), 'URL for video "%s" not found: %s' % (v.id, v.url))