add rmll module

This commit is contained in:
Guilhem Bonnefille 2015-01-15 23:27:52 +01:00 committed by Romain Bignon
commit b84a8ba2e8
8 changed files with 406 additions and 0 deletions

7
modules/rmll/TODO Normal file
View file

@ -0,0 +1,7 @@
Add following pseudo channels:
most-viewed
most-commented
count on latest
add API_KEY

22
modules/rmll/__init__.py Normal file
View file

@ -0,0 +1,22 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2015 Guilhem Bonnefille
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from .module import RmllModule
__all__ = ['RmllModule']

67
modules/rmll/browser.py Normal file
View file

@ -0,0 +1,67 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2015 Guilhem Bonnefille
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.browser import PagesBrowser, URL
from .pages import RmllCollectionPage, RmllVideoPage, RmllChannelsPage, RmllSearchPage, RmllLatestPage
__all__ = ['RmllBrowser']
class RmllBrowser(PagesBrowser):
BASEURL = 'http://video.rmll.info'
index_page = URL(r'channels/content/(?P<id>.+)', RmllCollectionPage)
latest_page = URL(r'api/v2/latest/', RmllLatestPage)
video_page = URL(r'permalink/(?P<id>.+)/', RmllVideoPage)
channels_page = URL(r'api/v2/channels/content/\?parent_oid=(?P<oid>.*)', RmllChannelsPage)
search_page = URL(r'api/v2/search/\?search=(?P<pattern>.+)', RmllSearchPage)
def __init__(self, *args, **kwargs):
self.channels = None
PagesBrowser.__init__(self, *args, **kwargs)
@video_page.id2url
def get_video(self, url, video=None):
self.location(url)
assert self.video_page.is_here()
video = self.page.get_video(obj=video)
return video
def search_videos(self, pattern):
url = self.search_page.build(pattern=pattern)
self.location(url)
return self.page.iter_resources()
def get_latest_videos(self):
url = self.latest_page.build()
self.location(url)
assert self.latest_page.is_here()
return self.page.iter_resources()
def get_channel_videos(self, split_path):
oid = ''
if len(split_path) > 0:
oid = split_path[-1]
url = self.channels_page.build(oid=oid)
self.location(url)
assert self.channels_page.is_here()
for video in self.page.iter_resources(split_path):
yield video

BIN
modules/rmll/favicon.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.6 KiB

73
modules/rmll/module.py Normal file
View file

@ -0,0 +1,73 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2015 Guilhem Bonnefille
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.capabilities.video import CapVideo, BaseVideo
from weboob.capabilities.collection import CapCollection, Collection
from weboob.tools.backend import Module
from .browser import RmllBrowser
from .video import RmllVideo
__all__ = ['RmllModule']
class RmllModule(Module, CapVideo, CapCollection):
NAME = 'rmll' # The name of module
MAINTAINER = u'Guyou' # Name of maintainer of this module
EMAIL = 'guilhem.bonnefille@gmail.com' # Email address of the maintainer
VERSION = '1.1' # Version of weboob
DESCRIPTION = 'Videos from RMLL' # Description of your module
LICENSE = 'AGPLv3+' # License of your module
BROWSER = RmllBrowser
def create_default_browser(self):
return self.create_browser()
def get_video(self, _id):
self.logger.debug("Getting video for %s", _id)
return self.browser.get_video(_id)
def search_videos(self, pattern, sortby=CapVideo.SEARCH_RELEVANCE, nsfw=False):
return self.browser.search_videos(pattern)
def fill_video(self, video, fields):
self.logger.debug("Fill video %s for fields %s", video.id, fields)
if fields != ['thumbnail']:
# if we don't want only the thumbnail, we probably want also every fields
video = self.browser.get_video(video.id, video)
if 'thumbnail' in fields and video and video.thumbnail:
video.thumbnail.data = self.browser.open(video.thumbnail.url).content
return video
def iter_resources(self, objs, split_path):
if BaseVideo in objs:
if len(split_path) == 0:
# Add fake Collection
yield Collection(['latest'], u'Latest')
if len(split_path) == 1 and split_path[0] == 'latest':
for video in self.browser.get_latest_videos():
yield video
else:
for content in self.browser.get_channel_videos(split_path):
yield content
OBJECTS = {RmllVideo: fill_video}

141
modules/rmll/pages.py Normal file
View file

@ -0,0 +1,141 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2010-2011 Romain Bignon
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
import datetime
import re
from weboob.browser.pages import HTMLPage, JsonPage
from weboob.browser.elements import ItemElement, ListElement, method
from weboob.browser.filters.standard import Regexp, Format, CleanText, RegexpError, Duration, DateTime, Filter
from weboob.browser.filters.html import Link, XPath, CleanHTML
from weboob.browser.filters.json import Dict
from weboob.capabilities import NotAvailable, NotLoaded
from weboob.capabilities.image import BaseImage
from weboob.capabilities.collection import Collection
from .video import RmllVideo
BASE_URL = 'http://video.rmll.info'
class NormalizeThumbnail(Filter):
def filter(self, thumbnail):
if not thumbnail.startswith('http'):
thumbnail = BASE_URL + thumbnail
if thumbnail == "http://rmll.ubicast.tv/statics/mediaserver/images/video_icon.png":
# This is the default: remove it as any frontend default should be better
thumbnail = None
return thumbnail
class RmllDuration(Duration):
_regexp = re.compile(r'((?P<hh>\d+) h )?((?P<mm>\d+) m )?(?P<ss>\d+) s')
kwargs = {'hours': 'hh', 'minutes': 'mm', 'seconds': 'ss'}
def create_video(metadata):
video = RmllVideo(metadata['oid'])
video.title = unicode(metadata['title'])
video.date = DateTime(Dict('creation'), default=NotLoaded)(metadata)
video.duration = RmllDuration(Dict('duration', default=''), default=NotLoaded)(metadata)
thumbnail = NormalizeThumbnail(Dict('thumb'))(metadata)
video.thumbnail = BaseImage(thumbnail)
video.thumbnail.url = video.thumbnail.id
video.url = NotLoaded
return video
class RmllVideoPage(HTMLPage):
@method
class get_video(ItemElement):
klass = RmllVideo
obj_id = CleanHTML('/html/head/meta[@property="og:url"]/@content') & CleanText() & Regexp(pattern=r'.*/permalink/(.+)/$')
obj_title = Format(u'%s', CleanHTML('/html/head/meta[@name="DC.title"]/@content') & CleanText())
obj_description = Format(u'%s', CleanHTML('/html/head/meta[@property="og:description"]/@content') & CleanText())
def obj_thumbnail(self):
url = NormalizeThumbnail(CleanText('/html/head/meta[@property="og:image"]/@content'))(self)
if url:
thumbnail = BaseImage(url)
thumbnail.url = thumbnail.id
return thumbnail
obj_duration = CleanText('/html/head/script[not(@src)]') & Regexp(pattern=r'media_duration: ([^,.]+),?.*,', default='') & Duration(default=NotAvailable)
def obj_url(self):
links = XPath('//div[@id="tab_sharing_content"]/div/div/div[@class="paragraph"]/div[@class="share"]/a[@target="_blank"]/@href')(self)
for link in links:
ext = str(link).split('.')[-1]
self.logger.debug("Link:%s Ext:%s", link, ext)
if ext in ['mp4', 'webm']:
return unicode(link)
class RmllCollectionPage(HTMLPage):
@method
class iter_videos(ListElement):
item_xpath = '//div[@class="item-entry type-video " or @class="item-entry type-vod "]'
class item(ItemElement):
klass = RmllVideo
obj_id = Link('a') & Regexp(pattern=r'.*/videos/(.+)/$')
obj_title = Format(u'%s', CleanHTML('a/span/span/span[@class="item-entry-title"]') & CleanText())
obj_url = NotLoaded
#obj_date = XPath('a/span/span/span[@class="item-entry-creation"]')
obj_duration = CleanText('a/span/span/span[@class="item-entry-duration"]') & RmllDuration()
def obj_thumbnail(self):
thumbnail = NormalizeThumbnail(CleanText('a/span[@class="item-entry-preview"]/img/@src'))(self)
if thumbnail:
thumbnail = BaseImage(thumbnail)
thumbnail.url = thumbnail.id
return thumbnail
class RmllChannelsPage(JsonPage):
def iter_resources(self, split_path):
if 'channels' in self.doc:
for metadata in self.doc['channels']:
collection = Collection(split_path+[metadata['oid']], metadata['title'])
yield collection
if 'videos' in self.doc:
for metadata in self.doc['videos']:
video = create_video(metadata)
yield video
class RmllLatestPage(JsonPage):
def iter_resources(self):
for metadata in self.doc['items']:
if metadata['type'] == 'c':
collection = Collection([metadata['oid']], metadata['title'])
yield collection
if metadata['type'] == 'v':
video = create_video(metadata)
yield video
class RmllSearchPage(JsonPage):
def iter_resources(self):
for metadata in self.doc['videos']:
video = create_video(metadata)
yield video

65
modules/rmll/test.py Normal file
View file

@ -0,0 +1,65 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2015 Guilhem Bonnefille
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.tools.test import BackendTest
from weboob.capabilities.video import BaseVideo
class RmllTest(BackendTest):
MODULE = 'rmll'
def test_video_search(self):
videos = self.backend.search_videos('test')
self.assertTrue(videos)
for video in videos:
self.assertTrue(video.id, 'ID for video not found')
def test_video_page(self):
for slug in ["v124f0bc409e704d92cf", "http://video.rmll.info/permalink/v124f0bc409e704d92cf/"]:
video = self.backend.browser.get_video(slug)
self.assertTrue(video.id, 'ID for video not found')
self.assertTrue(video.url, 'URL for video "%s" not found' % (video.id))
self.assertTrue(video.thumbnail, 'Thumbnail for video "%s" not found' % (video.id))
self.assertTrue(video.title, 'Title for video "%s" not found' % (video.id))
#self.assertTrue(video.description, 'Description for video "%s" not found' % (video.id))
self.assertTrue(video.duration, 'Duration for video "%s" not found' % (video.id))
#help(video)
def test_video_fill(self):
slug = "v124f0bc409e704d92cf"
video = self.backend.browser.get_video(slug)
video = self.backend.fill_video(video, ["url"])
self.assertTrue(video)
self.assertTrue(video.url, 'URL for video "%s" not found' % (video.id))
def test_browse(self):
for path in [[], ['latest']]:
videos = self.backend.iter_resources([BaseVideo],path)
self.assertTrue(videos)
for video in videos:
self.assertTrue(video.id, 'ID for video not found')
def test_missing_duration(self):
videos = self.backend.search_videos('weboob')
self.assertTrue(videos)
for video in videos:
self.assertTrue(video.id, 'ID for video not found')
video = self.backend.fill_video(video, ["$full"])

31
modules/rmll/video.py Normal file
View file

@ -0,0 +1,31 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2010-2011 Christophe Benz
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.capabilities.video import BaseVideo
class RmllVideo(BaseVideo):
@classmethod
def id2url(cls, _id):
if _id.startswith('http'):
return _id
else:
return 'http://video.rmll.info/permalink/%s/' % (_id)