[vimeo] fix #1692 there is no latest category on vimeo.

allow to browse categories and channels
2014-12-18 15:59:52 +01:00 · 2014-12-18 15:59:52 +01:00 · c31e95403e
commit c31e95403e
parent d012e45527
4 changed files with 96 additions and 19 deletions
--- a/modules/vimeo/browser.py
+++ b/modules/vimeo/browser.py
@ -18,9 +18,10 @@
 # You should have received a copy of the GNU Affero General Public License
 # along with weboob. If not, see <http://www.gnu.org/licenses/>.
 from weboob.browser import PagesBrowser, URL
 from weboob.browser.exceptions import HTTPNotFound
-from .pages import SearchPage, VideoPage, VideoJsonPage
+from .pages import SearchPage, VideoPage, VideoJsonPage, CategoriesPage, ChannelsPage
 import urllib
@ -32,10 +33,14 @@ class VimeoBrowser(PagesBrowser):
    BASEURL = 'http://vimeo.com'
    search_page = URL(r'search/page:(?P<page>.*)/sort:(?P<sortby>.*)/format:thumbnail\?type=videos&q=(?P<pattern>.*)',
                      r'channels/(?P<channel>.*)/videos/.*?',
                      r'categories/(?P<category>.*)/videos/.*?',
                      SearchPage)
-    video_url = URL(r'http://player.vimeo.com/video/(?P<_id>.*)/config', VideoJsonPage)
+    categories_page = URL('categories', CategoriesPage)
    channels_page = URL('channels', ChannelsPage)
    video_url = URL(r'http://player.vimeo.com/video/(?P<_id>.*)/config', VideoJsonPage)
    video_page = URL('http://vimeo.com/(?P<_id>.*)', VideoPage)
    def get_video(self, _id, video=None):
@ -50,7 +55,14 @@ class VimeoBrowser(PagesBrowser):
                                   sortby=sortby,
                                   page=1).iter_videos()
-    # def latest_videos(self):
+    def get_categories(self):
-    #     self.home()
+        return self.categories_page.go().iter_categories()
-    #     assert self.is_on_page(IndexPage)
+
-    #     return self.page.iter_videos()
+    def get_channels(self):
        return self.channels_page.go().iter_channels()
    def get_channel_videos(self, channel):
        return self.search_page.go(channel=channel).iter_videos()
    def get_category_videos(self, category):
        return self.search_page.go(category=category).iter_videos()
--- a/modules/vimeo/module.py
+++ b/modules/vimeo/module.py
@ -21,7 +21,7 @@
 from weboob.capabilities.video import CapVideo, BaseVideo
 from weboob.tools.backend import Module
-from weboob.capabilities.collection import CapCollection, CollectionNotFound
+from weboob.capabilities.collection import CapCollection, CollectionNotFound, Collection
 from .browser import VimeoBrowser
@ -66,16 +66,30 @@ class VimeoModule(Module, CapVideo, CapCollection):
        if BaseVideo in objs:
            collection = self.get_collection(objs, split_path)
            if collection.path_level == 0:
-                yield self.get_collection(objs, [u'latest'])
+                yield Collection([u'vimeo-categories'], u'Vimeo categories')
-            if collection.split_path == [u'latest']:
+                yield Collection([u'vimeo-channels'], u'Vimeo channels')
-                for video in self.browser.latest_videos():
+
            if collection.path_level == 1:
                if collection.split_path == [u'vimeo-categories']:
                    for category in self.browser.get_categories():
                        yield category
                if collection.split_path == [u'vimeo-channels']:
                    for channel in self.browser.get_channels():
                        yield channel
            if collection.path_level == 2:
                if collection.split_path[0] == u'vimeo-channels':
                    for video in self.browser.get_channel_videos(collection.split_path[1]):
                        yield video
                if collection.split_path[0] == u'vimeo-categories':
                    for video in self.browser.get_category_videos(collection.split_path[1]):
                        yield video
    def validate_collection(self, objs, collection):
        if collection.path_level == 0:
            return
-        if BaseVideo in objs and collection.split_path == [u'latest']:
+        if BaseVideo in objs and (collection.split_path[0] == u'vimeo-categories' or
-            collection.title = u'Latest Vimeo videos'
+                                  collection.split_path[0] == u'vimeo-channels'):
            return
        raise CollectionNotFound(collection.split_path)
--- a/modules/vimeo/pages.py
+++ b/modules/vimeo/pages.py
@ -19,6 +19,7 @@
 # along with weboob. If not, see <http://www.gnu.org/licenses/>.
 from weboob.capabilities.video import BaseVideo
 from weboob.capabilities.image import BaseImage
 from weboob.capabilities.collection import Collection
 from weboob.exceptions import ParseError
 from weboob.browser.elements import ItemElement, ListElement, method
@ -95,3 +96,41 @@ class VideoJsonPage(JsonPage):
            return data['request']['files'][codec][quality]['url']
        obj_ext = Regexp(Field('url'), '.*\.(.*?)\?.*')
 class CategoriesPage(HTMLPage):
    @method
    class iter_categories(ListElement):
        item_xpath = '//div[@class="col_large"]/section/ul/li/a'
        class item(ItemElement):
            klass = Collection
            obj_id = CleanText('./@href')
            obj_title = CleanText('./h2')
            def obj_split_path(self):
                split_path = ['vimeo-categories']
                category = CleanText('./@href', replace=[('/categories/', '')])(self)
                split_path.append(category)
                return split_path
 class ChannelsPage(HTMLPage):
    @pagination
    @method
    class iter_channels(ListElement):
        item_xpath = '//div[@id="browse_content"]/ol/li'
        next_page = Link('//li[@class="pagination_next"]/a')
        class item(ItemElement):
            klass = Collection
            obj_title = CleanText('div/a/div/p[@class="title"]')
            obj_id = CleanText('./@id')
            def obj_split_path(self):
                split_path = ['vimeo-channels']
                channel = CleanText('div/a/@href', replace=[('/channels/', '')])(self)
                split_path.append(channel)
                return split_path
--- a/modules/vimeo/test.py
+++ b/modules/vimeo/test.py
@ -18,6 +18,7 @@
 # You should have received a copy of the GNU Affero General Public License
 # along with weboob. If not, see <http://www.gnu.org/licenses/>.
 from weboob.capabilities.video import BaseVideo
 from weboob.tools.test import BackendTest
 import itertools
@ -32,9 +33,20 @@ class VimeoTest(BackendTest):
        self.backend.fillobj(v, ('url',))
        self.assertTrue(v.url and v.url.startswith('http://'), 'URL for video "%s" not found: %s' % (v.id, v.url))
-    # def test_latest(self):
+    def test_channels(self):
-    #     l = list(self.backend.iter_resources([BaseVideo], [u'latest']))
+        l = list(itertools.islice(self.backend.iter_resources([BaseVideo], [u'vimeo-channels']), 0, 20))
-    #     self.assertTrue(len(l) > 0)
+        self.assertTrue(len(l) > 0)
-    #     v = l[0]
+        l1 = list(itertools.islice(self.backend.iter_resources([BaseVideo], l[0].split_path), 0, 20))
-    #     self.backend.fillobj(v, ('url',))
+        self.assertTrue(len(l1) > 0)
-    #     self.assertTrue(v.url and v.url.startswith('http://'), 'URL for video "%s" not found: %s' % (v.id, v.url))
+        v = l1[0]
        self.backend.fillobj(v, ('url',))
        self.assertTrue(v.url and v.url.startswith('http://'), 'URL for video "%s" not found: %s' % (v.id, v.url))
    def test_categories(self):
        l = list(itertools.islice(self.backend.iter_resources([BaseVideo], [u'vimeo-categories']), 0, 20))
        self.assertTrue(len(l) > 0)
        l1 = list(itertools.islice(self.backend.iter_resources([BaseVideo], l[0].split_path), 0, 20))
        self.assertTrue(len(l1) > 0)
        v = l1[0]
        self.backend.fillobj(v, ('url',))
        self.assertTrue(v.url and v.url.startswith('http://'), 'URL for video "%s" not found: %s' % (v.id, v.url))