[vimeo] fix #1692 there is no latest category on vimeo.

allow to browse categories and channels
2014-12-18 15:59:52 +01:00 · 2014-12-18 15:59:52 +01:00 · c31e95403e
commit c31e95403e
parent d012e45527
4 changed files with 96 additions and 19 deletions
--- a/modules/vimeo/browser.py
+++ b/modules/vimeo/browser.py
@ -18,9 +18,10 @@
 # You should have received a copy of the GNU Affero General Public License
 # along with weboob. If not, see <http://www.gnu.org/licenses/>.

+
 from weboob.browser import PagesBrowser, URL
 from weboob.browser.exceptions import HTTPNotFound
-from .pages import SearchPage, VideoPage, VideoJsonPage
+from .pages import SearchPage, VideoPage, VideoJsonPage, CategoriesPage, ChannelsPage

 import urllib

@ -32,10 +33,14 @@ class VimeoBrowser(PagesBrowser):
    BASEURL = 'http://vimeo.com'

    search_page = URL(r'search/page:(?P<page>.*)/sort:(?P<sortby>.*)/format:thumbnail\?type=videos&q=(?P<pattern>.*)',
+                      r'channels/(?P<channel>.*)/videos/.*?',
+                      r'categories/(?P<category>.*)/videos/.*?',
                      SearchPage)

-    video_url = URL(r'http://player.vimeo.com/video/(?P<_id>.*)/config', VideoJsonPage)
+    categories_page = URL('categories', CategoriesPage)
+    channels_page = URL('channels', ChannelsPage)

+    video_url = URL(r'http://player.vimeo.com/video/(?P<_id>.*)/config', VideoJsonPage)
    video_page = URL('http://vimeo.com/(?P<_id>.*)', VideoPage)

    def get_video(self, _id, video=None):
@ -50,7 +55,14 @@ class VimeoBrowser(PagesBrowser):
                                   sortby=sortby,
                                   page=1).iter_videos()

-    # def latest_videos(self):
-    #     self.home()
-    #     assert self.is_on_page(IndexPage)
-    #     return self.page.iter_videos()
+    def get_categories(self):
+        return self.categories_page.go().iter_categories()
+
+    def get_channels(self):
+        return self.channels_page.go().iter_channels()
+
+    def get_channel_videos(self, channel):
+        return self.search_page.go(channel=channel).iter_videos()
+
+    def get_category_videos(self, category):
+        return self.search_page.go(category=category).iter_videos()
--- a/modules/vimeo/module.py
+++ b/modules/vimeo/module.py
@ -21,7 +21,7 @@

 from weboob.capabilities.video import CapVideo, BaseVideo
 from weboob.tools.backend import Module
-from weboob.capabilities.collection import CapCollection, CollectionNotFound
+from weboob.capabilities.collection import CapCollection, CollectionNotFound, Collection

 from .browser import VimeoBrowser

@ -66,16 +66,30 @@ class VimeoModule(Module, CapVideo, CapCollection):
        if BaseVideo in objs:
            collection = self.get_collection(objs, split_path)
            if collection.path_level == 0:
-                yield self.get_collection(objs, [u'latest'])
-            if collection.split_path == [u'latest']:
-                for video in self.browser.latest_videos():
-                    yield video
+                yield Collection([u'vimeo-categories'], u'Vimeo categories')
+                yield Collection([u'vimeo-channels'], u'Vimeo channels')
+
+            if collection.path_level == 1:
+                if collection.split_path == [u'vimeo-categories']:
+                    for category in self.browser.get_categories():
+                        yield category
+                if collection.split_path == [u'vimeo-channels']:
+                    for channel in self.browser.get_channels():
+                        yield channel
+
+            if collection.path_level == 2:
+                if collection.split_path[0] == u'vimeo-channels':
+                    for video in self.browser.get_channel_videos(collection.split_path[1]):
+                        yield video
+                if collection.split_path[0] == u'vimeo-categories':
+                    for video in self.browser.get_category_videos(collection.split_path[1]):
+                        yield video

    def validate_collection(self, objs, collection):
        if collection.path_level == 0:
            return
-        if BaseVideo in objs and collection.split_path == [u'latest']:
-            collection.title = u'Latest Vimeo videos'
+        if BaseVideo in objs and (collection.split_path[0] == u'vimeo-categories' or
+                                  collection.split_path[0] == u'vimeo-channels'):
            return
        raise CollectionNotFound(collection.split_path)

--- a/modules/vimeo/pages.py
+++ b/modules/vimeo/pages.py
@ -19,6 +19,7 @@
 # along with weboob. If not, see <http://www.gnu.org/licenses/>.
 from weboob.capabilities.video import BaseVideo
 from weboob.capabilities.image import BaseImage
+from weboob.capabilities.collection import Collection

 from weboob.exceptions import ParseError
 from weboob.browser.elements import ItemElement, ListElement, method
@ -95,3 +96,41 @@ class VideoJsonPage(JsonPage):
            return data['request']['files'][codec][quality]['url']

        obj_ext = Regexp(Field('url'), '.*\.(.*?)\?.*')
+
+
+class CategoriesPage(HTMLPage):
+    @method
+    class iter_categories(ListElement):
+        item_xpath = '//div[@class="col_large"]/section/ul/li/a'
+
+        class item(ItemElement):
+            klass = Collection
+
+            obj_id = CleanText('./@href')
+            obj_title = CleanText('./h2')
+
+            def obj_split_path(self):
+                split_path = ['vimeo-categories']
+                category = CleanText('./@href', replace=[('/categories/', '')])(self)
+                split_path.append(category)
+                return split_path
+
+
+class ChannelsPage(HTMLPage):
+    @pagination
+    @method
+    class iter_channels(ListElement):
+        item_xpath = '//div[@id="browse_content"]/ol/li'
+        next_page = Link('//li[@class="pagination_next"]/a')
+
+        class item(ItemElement):
+            klass = Collection
+
+            obj_title = CleanText('div/a/div/p[@class="title"]')
+            obj_id = CleanText('./@id')
+
+            def obj_split_path(self):
+                split_path = ['vimeo-channels']
+                channel = CleanText('div/a/@href', replace=[('/channels/', '')])(self)
+                split_path.append(channel)
+                return split_path
--- a/modules/vimeo/test.py
+++ b/modules/vimeo/test.py
@ -18,6 +18,7 @@
 # You should have received a copy of the GNU Affero General Public License
 # along with weboob. If not, see <http://www.gnu.org/licenses/>.

+from weboob.capabilities.video import BaseVideo
 from weboob.tools.test import BackendTest
 import itertools

@ -32,9 +33,20 @@ class VimeoTest(BackendTest):
        self.backend.fillobj(v, ('url',))
        self.assertTrue(v.url and v.url.startswith('http://'), 'URL for video "%s" not found: %s' % (v.id, v.url))

-    # def test_latest(self):
-    #     l = list(self.backend.iter_resources([BaseVideo], [u'latest']))
-    #     self.assertTrue(len(l) > 0)
-    #     v = l[0]
-    #     self.backend.fillobj(v, ('url',))
-    #     self.assertTrue(v.url and v.url.startswith('http://'), 'URL for video "%s" not found: %s' % (v.id, v.url))
+    def test_channels(self):
+        l = list(itertools.islice(self.backend.iter_resources([BaseVideo], [u'vimeo-channels']), 0, 20))
+        self.assertTrue(len(l) > 0)
+        l1 = list(itertools.islice(self.backend.iter_resources([BaseVideo], l[0].split_path), 0, 20))
+        self.assertTrue(len(l1) > 0)
+        v = l1[0]
+        self.backend.fillobj(v, ('url',))
+        self.assertTrue(v.url and v.url.startswith('http://'), 'URL for video "%s" not found: %s' % (v.id, v.url))
+
+    def test_categories(self):
+        l = list(itertools.islice(self.backend.iter_resources([BaseVideo], [u'vimeo-categories']), 0, 20))
+        self.assertTrue(len(l) > 0)
+        l1 = list(itertools.islice(self.backend.iter_resources([BaseVideo], l[0].split_path), 0, 20))
+        self.assertTrue(len(l1) > 0)
+        v = l1[0]
+        self.backend.fillobj(v, ('url',))
+        self.assertTrue(v.url and v.url.startswith('http://'), 'URL for video "%s" not found: %s' % (v.id, v.url))