add rmll module

2015-01-15 23:27:52 +01:00 · 2015-01-15 23:27:52 +01:00 · b84a8ba2e8
commit b84a8ba2e8
parent 1216aaa965
8 changed files with 406 additions and 0 deletions
--- a/modules/rmll/TODO
+++ b/modules/rmll/TODO
@ -0,0 +1,7 @@
+Add following pseudo channels:
+ most-viewed
+ most-commented
+
+count on latest
+
+add API_KEY
--- a/modules/rmll/init.py
+++ b/modules/rmll/init.py
@ -0,0 +1,22 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2015 Guilhem Bonnefille
+#
+# This file is part of weboob.
+#
+# weboob is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# weboob is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with weboob. If not, see <http://www.gnu.org/licenses/>.
+
+from .module import RmllModule
+
+__all__ = ['RmllModule']
--- a/modules/rmll/browser.py
+++ b/modules/rmll/browser.py
@ -0,0 +1,67 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2015 Guilhem Bonnefille
+#
+# This file is part of weboob.
+#
+# weboob is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# weboob is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with weboob. If not, see <http://www.gnu.org/licenses/>.
+
+from weboob.browser import PagesBrowser, URL
+
+from .pages import RmllCollectionPage, RmllVideoPage, RmllChannelsPage, RmllSearchPage, RmllLatestPage
+
+__all__ = ['RmllBrowser']
+
+
+class RmllBrowser(PagesBrowser):
+    BASEURL = 'http://video.rmll.info'
+
+    index_page = URL(r'channels/content/(?P<id>.+)', RmllCollectionPage)
+    latest_page = URL(r'api/v2/latest/', RmllLatestPage)
+    video_page = URL(r'permalink/(?P<id>.+)/', RmllVideoPage)
+    channels_page = URL(r'api/v2/channels/content/\?parent_oid=(?P<oid>.*)', RmllChannelsPage)
+    search_page = URL(r'api/v2/search/\?search=(?P<pattern>.+)', RmllSearchPage)
+
+    def __init__(self, *args, **kwargs):
+        self.channels = None
+        PagesBrowser.__init__(self, *args, **kwargs)
+
+    @video_page.id2url
+    def get_video(self, url, video=None):
+        self.location(url)
+        assert self.video_page.is_here()
+        video = self.page.get_video(obj=video)
+        return video
+
+    def search_videos(self, pattern):
+        url = self.search_page.build(pattern=pattern)
+        self.location(url)
+        return self.page.iter_resources()
+
+    def get_latest_videos(self):
+        url = self.latest_page.build()
+        self.location(url)
+        assert self.latest_page.is_here()
+        return self.page.iter_resources()
+
+    def get_channel_videos(self, split_path):
+        oid = ''
+        if len(split_path) > 0:
+            oid = split_path[-1]
+        url = self.channels_page.build(oid=oid)
+        self.location(url)
+        assert self.channels_page.is_here()
+        for video in self.page.iter_resources(split_path):
+            yield video
+
--- a/modules/rmll/favicon.png
+++ b/modules/rmll/favicon.png
--- a/modules/rmll/module.py
+++ b/modules/rmll/module.py
@ -0,0 +1,73 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2015 Guilhem Bonnefille
+#
+# This file is part of weboob.
+#
+# weboob is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# weboob is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with weboob. If not, see <http://www.gnu.org/licenses/>.
+
+from weboob.capabilities.video import CapVideo, BaseVideo
+from weboob.capabilities.collection import CapCollection, Collection
+from weboob.tools.backend import Module
+
+from .browser import RmllBrowser
+from .video import RmllVideo
+
+
+__all__ = ['RmllModule']
+
+
+class RmllModule(Module, CapVideo, CapCollection):
+    NAME = 'rmll'                          # The name of module
+    MAINTAINER = u'Guyou'                  # Name of maintainer of this module
+    EMAIL = 'guilhem.bonnefille@gmail.com' # Email address of the maintainer
+    VERSION = '1.1'                        # Version of weboob
+    DESCRIPTION = 'Videos from RMLL'       # Description of your module
+    LICENSE = 'AGPLv3+'                    # License of your module
+
+    BROWSER = RmllBrowser
+
+    def create_default_browser(self):
+        return self.create_browser()
+
+    def get_video(self, _id):
+        self.logger.debug("Getting video for %s", _id)
+        return self.browser.get_video(_id)
+
+    def search_videos(self, pattern, sortby=CapVideo.SEARCH_RELEVANCE, nsfw=False):
+        return self.browser.search_videos(pattern)
+
+    def fill_video(self, video, fields):
+        self.logger.debug("Fill video %s for fields %s", video.id, fields)
+        if fields != ['thumbnail']:
+            # if we don't want only the thumbnail, we probably want also every fields
+            video = self.browser.get_video(video.id, video)
+        if 'thumbnail' in fields and video and video.thumbnail:
+            video.thumbnail.data = self.browser.open(video.thumbnail.url).content
+
+        return video
+
+    def iter_resources(self, objs, split_path):
+        if BaseVideo in objs:
+            if len(split_path) == 0:
+                # Add fake Collection
+                yield Collection(['latest'], u'Latest')
+            if len(split_path) == 1 and split_path[0] == 'latest':
+                for video in self.browser.get_latest_videos():
+                    yield video
+            else:
+                for content in self.browser.get_channel_videos(split_path):
+                    yield content
+
+    OBJECTS = {RmllVideo: fill_video}
--- a/modules/rmll/pages.py
+++ b/modules/rmll/pages.py
@ -0,0 +1,141 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2010-2011 Romain Bignon
+#
+# This file is part of weboob.
+#
+# weboob is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# weboob is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with weboob. If not, see <http://www.gnu.org/licenses/>.
+
+import datetime
+import re
+
+from weboob.browser.pages import HTMLPage, JsonPage
+from weboob.browser.elements import ItemElement, ListElement, method
+from weboob.browser.filters.standard import Regexp, Format, CleanText, RegexpError, Duration, DateTime, Filter
+from weboob.browser.filters.html import Link, XPath, CleanHTML
+from weboob.browser.filters.json import Dict
+
+from weboob.capabilities import NotAvailable, NotLoaded
+from weboob.capabilities.image import BaseImage
+from weboob.capabilities.collection import Collection
+
+from .video import RmllVideo
+
+BASE_URL = 'http://video.rmll.info'
+
+class NormalizeThumbnail(Filter):
+    def filter(self, thumbnail):
+        if not thumbnail.startswith('http'):
+            thumbnail = BASE_URL + thumbnail
+        if thumbnail == "http://rmll.ubicast.tv/statics/mediaserver/images/video_icon.png":
+            # This is the default: remove it as any frontend default should be better
+            thumbnail = None
+        return thumbnail
+
+
+class RmllDuration(Duration):
+    _regexp = re.compile(r'((?P<hh>\d+) h )?((?P<mm>\d+) m )?(?P<ss>\d+) s')
+    kwargs = {'hours': 'hh', 'minutes': 'mm', 'seconds': 'ss'}
+
+
+def create_video(metadata):
+    video = RmllVideo(metadata['oid'])
+
+    video.title = unicode(metadata['title'])
+    video.date = DateTime(Dict('creation'), default=NotLoaded)(metadata)
+    video.duration = RmllDuration(Dict('duration', default=''), default=NotLoaded)(metadata)
+    thumbnail = NormalizeThumbnail(Dict('thumb'))(metadata)
+    video.thumbnail = BaseImage(thumbnail)
+    video.thumbnail.url = video.thumbnail.id
+    video.url = NotLoaded
+
+    return video
+
+class RmllVideoPage(HTMLPage):
+    @method
+    class get_video(ItemElement):
+        klass = RmllVideo
+
+        obj_id = CleanHTML('/html/head/meta[@property="og:url"]/@content') & CleanText() & Regexp(pattern=r'.*/permalink/(.+)/$')
+        obj_title = Format(u'%s', CleanHTML('/html/head/meta[@name="DC.title"]/@content') & CleanText())
+        obj_description = Format(u'%s', CleanHTML('/html/head/meta[@property="og:description"]/@content') & CleanText())
+
+        def obj_thumbnail(self):
+            url = NormalizeThumbnail(CleanText('/html/head/meta[@property="og:image"]/@content'))(self)
+            if url:
+                thumbnail = BaseImage(url)
+                thumbnail.url = thumbnail.id
+                return thumbnail
+
+        obj_duration = CleanText('/html/head/script[not(@src)]') & Regexp(pattern=r'media_duration: ([^,.]+),?.*,', default='') & Duration(default=NotAvailable)
+
+        def obj_url(self):
+            links = XPath('//div[@id="tab_sharing_content"]/div/div/div[@class="paragraph"]/div[@class="share"]/a[@target="_blank"]/@href')(self)
+            for link in links:
+                ext = str(link).split('.')[-1]
+                self.logger.debug("Link:%s Ext:%s", link, ext)
+                if ext in ['mp4', 'webm']:
+                    return unicode(link)
+
+class RmllCollectionPage(HTMLPage):
+
+    @method
+    class iter_videos(ListElement):
+        item_xpath = '//div[@class="item-entry type-video " or @class="item-entry type-vod "]'
+
+        class item(ItemElement):
+            klass = RmllVideo
+
+            obj_id = Link('a') & Regexp(pattern=r'.*/videos/(.+)/$')
+            obj_title = Format(u'%s', CleanHTML('a/span/span/span[@class="item-entry-title"]') & CleanText())
+            obj_url = NotLoaded
+            #obj_date = XPath('a/span/span/span[@class="item-entry-creation"]')
+
+            obj_duration = CleanText('a/span/span/span[@class="item-entry-duration"]') & RmllDuration()
+
+            def obj_thumbnail(self):
+                thumbnail = NormalizeThumbnail(CleanText('a/span[@class="item-entry-preview"]/img/@src'))(self)
+                if thumbnail:
+                    thumbnail = BaseImage(thumbnail)
+                    thumbnail.url = thumbnail.id
+                    return thumbnail
+
+class RmllChannelsPage(JsonPage):
+    def iter_resources(self, split_path):
+        if 'channels' in self.doc:
+            for metadata in self.doc['channels']:
+                collection = Collection(split_path+[metadata['oid']], metadata['title'])
+                yield collection
+
+        if 'videos' in self.doc:
+            for metadata in self.doc['videos']:
+                video = create_video(metadata)
+                yield video
+
+class RmllLatestPage(JsonPage):
+    def iter_resources(self):
+        for metadata in self.doc['items']:
+            if metadata['type'] == 'c':
+                collection = Collection([metadata['oid']], metadata['title'])
+                yield collection
+
+            if metadata['type'] == 'v':
+                video = create_video(metadata)
+                yield video
+
+class RmllSearchPage(JsonPage):
+    def iter_resources(self):
+        for metadata in self.doc['videos']:
+            video = create_video(metadata)
+            yield video
--- a/modules/rmll/test.py
+++ b/modules/rmll/test.py
@ -0,0 +1,65 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2015 Guilhem Bonnefille
+#
+# This file is part of weboob.
+#
+# weboob is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# weboob is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with weboob. If not, see <http://www.gnu.org/licenses/>.
+
+
+from weboob.tools.test import BackendTest
+from weboob.capabilities.video import BaseVideo
+
+
+class RmllTest(BackendTest):
+    MODULE = 'rmll'
+
+    def test_video_search(self):
+        videos = self.backend.search_videos('test')
+        self.assertTrue(videos)
+        for video in videos:
+            self.assertTrue(video.id, 'ID for video not found')
+
+    def test_video_page(self):
+        for slug in ["v124f0bc409e704d92cf", "http://video.rmll.info/permalink/v124f0bc409e704d92cf/"]:
+            video = self.backend.browser.get_video(slug)
+            self.assertTrue(video.id, 'ID for video not found')
+            self.assertTrue(video.url, 'URL for video "%s" not found' % (video.id))
+            self.assertTrue(video.thumbnail, 'Thumbnail for video "%s" not found' % (video.id))
+            self.assertTrue(video.title, 'Title for video "%s" not found' % (video.id))
+            #self.assertTrue(video.description, 'Description for video "%s" not found' % (video.id))
+            self.assertTrue(video.duration, 'Duration for video "%s" not found' % (video.id))
+            #help(video)
+
+    def test_video_fill(self):
+        slug = "v124f0bc409e704d92cf"
+        video = self.backend.browser.get_video(slug)
+        video = self.backend.fill_video(video, ["url"])
+        self.assertTrue(video)
+        self.assertTrue(video.url, 'URL for video "%s" not found' % (video.id))
+
+    def test_browse(self):
+        for path in [[], ['latest']]:
+            videos = self.backend.iter_resources([BaseVideo],path)
+            self.assertTrue(videos)
+            for video in videos:
+                self.assertTrue(video.id, 'ID for video not found')
+
+    def test_missing_duration(self):
+        videos = self.backend.search_videos('weboob')
+        self.assertTrue(videos)
+        for video in videos:
+            self.assertTrue(video.id, 'ID for video not found')
+            video = self.backend.fill_video(video, ["$full"])
+
--- a/modules/rmll/video.py
+++ b/modules/rmll/video.py
@ -0,0 +1,31 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2010-2011 Christophe Benz
+#
+# This file is part of weboob.
+#
+# weboob is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# weboob is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with weboob. If not, see <http://www.gnu.org/licenses/>.
+
+
+from weboob.capabilities.video import BaseVideo
+
+
+class RmllVideo(BaseVideo):
+    @classmethod
+    def id2url(cls, _id):
+        if _id.startswith('http'):
+            return _id
+        else:
+            return 'http://video.rmll.info/permalink/%s/' % (_id)
+