add rmll module
This commit is contained in:
parent
1216aaa965
commit
b84a8ba2e8
8 changed files with 406 additions and 0 deletions
7
modules/rmll/TODO
Normal file
7
modules/rmll/TODO
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
Add following pseudo channels:
|
||||
most-viewed
|
||||
most-commented
|
||||
|
||||
count on latest
|
||||
|
||||
add API_KEY
|
||||
22
modules/rmll/__init__.py
Normal file
22
modules/rmll/__init__.py
Normal file
|
|
@ -0,0 +1,22 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright(C) 2015 Guilhem Bonnefille
|
||||
#
|
||||
# This file is part of weboob.
|
||||
#
|
||||
# weboob is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# weboob is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Affero General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
from .module import RmllModule
|
||||
|
||||
__all__ = ['RmllModule']
|
||||
67
modules/rmll/browser.py
Normal file
67
modules/rmll/browser.py
Normal file
|
|
@ -0,0 +1,67 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright(C) 2015 Guilhem Bonnefille
|
||||
#
|
||||
# This file is part of weboob.
|
||||
#
|
||||
# weboob is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# weboob is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Affero General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
from weboob.browser import PagesBrowser, URL
|
||||
|
||||
from .pages import RmllCollectionPage, RmllVideoPage, RmllChannelsPage, RmllSearchPage, RmllLatestPage
|
||||
|
||||
__all__ = ['RmllBrowser']
|
||||
|
||||
|
||||
class RmllBrowser(PagesBrowser):
|
||||
BASEURL = 'http://video.rmll.info'
|
||||
|
||||
index_page = URL(r'channels/content/(?P<id>.+)', RmllCollectionPage)
|
||||
latest_page = URL(r'api/v2/latest/', RmllLatestPage)
|
||||
video_page = URL(r'permalink/(?P<id>.+)/', RmllVideoPage)
|
||||
channels_page = URL(r'api/v2/channels/content/\?parent_oid=(?P<oid>.*)', RmllChannelsPage)
|
||||
search_page = URL(r'api/v2/search/\?search=(?P<pattern>.+)', RmllSearchPage)
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
self.channels = None
|
||||
PagesBrowser.__init__(self, *args, **kwargs)
|
||||
|
||||
@video_page.id2url
|
||||
def get_video(self, url, video=None):
|
||||
self.location(url)
|
||||
assert self.video_page.is_here()
|
||||
video = self.page.get_video(obj=video)
|
||||
return video
|
||||
|
||||
def search_videos(self, pattern):
|
||||
url = self.search_page.build(pattern=pattern)
|
||||
self.location(url)
|
||||
return self.page.iter_resources()
|
||||
|
||||
def get_latest_videos(self):
|
||||
url = self.latest_page.build()
|
||||
self.location(url)
|
||||
assert self.latest_page.is_here()
|
||||
return self.page.iter_resources()
|
||||
|
||||
def get_channel_videos(self, split_path):
|
||||
oid = ''
|
||||
if len(split_path) > 0:
|
||||
oid = split_path[-1]
|
||||
url = self.channels_page.build(oid=oid)
|
||||
self.location(url)
|
||||
assert self.channels_page.is_here()
|
||||
for video in self.page.iter_resources(split_path):
|
||||
yield video
|
||||
|
||||
BIN
modules/rmll/favicon.png
Normal file
BIN
modules/rmll/favicon.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 4.6 KiB |
73
modules/rmll/module.py
Normal file
73
modules/rmll/module.py
Normal file
|
|
@ -0,0 +1,73 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright(C) 2015 Guilhem Bonnefille
|
||||
#
|
||||
# This file is part of weboob.
|
||||
#
|
||||
# weboob is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# weboob is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Affero General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
from weboob.capabilities.video import CapVideo, BaseVideo
|
||||
from weboob.capabilities.collection import CapCollection, Collection
|
||||
from weboob.tools.backend import Module
|
||||
|
||||
from .browser import RmllBrowser
|
||||
from .video import RmllVideo
|
||||
|
||||
|
||||
__all__ = ['RmllModule']
|
||||
|
||||
|
||||
class RmllModule(Module, CapVideo, CapCollection):
|
||||
NAME = 'rmll' # The name of module
|
||||
MAINTAINER = u'Guyou' # Name of maintainer of this module
|
||||
EMAIL = 'guilhem.bonnefille@gmail.com' # Email address of the maintainer
|
||||
VERSION = '1.1' # Version of weboob
|
||||
DESCRIPTION = 'Videos from RMLL' # Description of your module
|
||||
LICENSE = 'AGPLv3+' # License of your module
|
||||
|
||||
BROWSER = RmllBrowser
|
||||
|
||||
def create_default_browser(self):
|
||||
return self.create_browser()
|
||||
|
||||
def get_video(self, _id):
|
||||
self.logger.debug("Getting video for %s", _id)
|
||||
return self.browser.get_video(_id)
|
||||
|
||||
def search_videos(self, pattern, sortby=CapVideo.SEARCH_RELEVANCE, nsfw=False):
|
||||
return self.browser.search_videos(pattern)
|
||||
|
||||
def fill_video(self, video, fields):
|
||||
self.logger.debug("Fill video %s for fields %s", video.id, fields)
|
||||
if fields != ['thumbnail']:
|
||||
# if we don't want only the thumbnail, we probably want also every fields
|
||||
video = self.browser.get_video(video.id, video)
|
||||
if 'thumbnail' in fields and video and video.thumbnail:
|
||||
video.thumbnail.data = self.browser.open(video.thumbnail.url).content
|
||||
|
||||
return video
|
||||
|
||||
def iter_resources(self, objs, split_path):
|
||||
if BaseVideo in objs:
|
||||
if len(split_path) == 0:
|
||||
# Add fake Collection
|
||||
yield Collection(['latest'], u'Latest')
|
||||
if len(split_path) == 1 and split_path[0] == 'latest':
|
||||
for video in self.browser.get_latest_videos():
|
||||
yield video
|
||||
else:
|
||||
for content in self.browser.get_channel_videos(split_path):
|
||||
yield content
|
||||
|
||||
OBJECTS = {RmllVideo: fill_video}
|
||||
141
modules/rmll/pages.py
Normal file
141
modules/rmll/pages.py
Normal file
|
|
@ -0,0 +1,141 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright(C) 2010-2011 Romain Bignon
|
||||
#
|
||||
# This file is part of weboob.
|
||||
#
|
||||
# weboob is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# weboob is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Affero General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
import datetime
|
||||
import re
|
||||
|
||||
from weboob.browser.pages import HTMLPage, JsonPage
|
||||
from weboob.browser.elements import ItemElement, ListElement, method
|
||||
from weboob.browser.filters.standard import Regexp, Format, CleanText, RegexpError, Duration, DateTime, Filter
|
||||
from weboob.browser.filters.html import Link, XPath, CleanHTML
|
||||
from weboob.browser.filters.json import Dict
|
||||
|
||||
from weboob.capabilities import NotAvailable, NotLoaded
|
||||
from weboob.capabilities.image import BaseImage
|
||||
from weboob.capabilities.collection import Collection
|
||||
|
||||
from .video import RmllVideo
|
||||
|
||||
BASE_URL = 'http://video.rmll.info'
|
||||
|
||||
class NormalizeThumbnail(Filter):
|
||||
def filter(self, thumbnail):
|
||||
if not thumbnail.startswith('http'):
|
||||
thumbnail = BASE_URL + thumbnail
|
||||
if thumbnail == "http://rmll.ubicast.tv/statics/mediaserver/images/video_icon.png":
|
||||
# This is the default: remove it as any frontend default should be better
|
||||
thumbnail = None
|
||||
return thumbnail
|
||||
|
||||
|
||||
class RmllDuration(Duration):
|
||||
_regexp = re.compile(r'((?P<hh>\d+) h )?((?P<mm>\d+) m )?(?P<ss>\d+) s')
|
||||
kwargs = {'hours': 'hh', 'minutes': 'mm', 'seconds': 'ss'}
|
||||
|
||||
|
||||
def create_video(metadata):
|
||||
video = RmllVideo(metadata['oid'])
|
||||
|
||||
video.title = unicode(metadata['title'])
|
||||
video.date = DateTime(Dict('creation'), default=NotLoaded)(metadata)
|
||||
video.duration = RmllDuration(Dict('duration', default=''), default=NotLoaded)(metadata)
|
||||
thumbnail = NormalizeThumbnail(Dict('thumb'))(metadata)
|
||||
video.thumbnail = BaseImage(thumbnail)
|
||||
video.thumbnail.url = video.thumbnail.id
|
||||
video.url = NotLoaded
|
||||
|
||||
return video
|
||||
|
||||
class RmllVideoPage(HTMLPage):
|
||||
@method
|
||||
class get_video(ItemElement):
|
||||
klass = RmllVideo
|
||||
|
||||
obj_id = CleanHTML('/html/head/meta[@property="og:url"]/@content') & CleanText() & Regexp(pattern=r'.*/permalink/(.+)/$')
|
||||
obj_title = Format(u'%s', CleanHTML('/html/head/meta[@name="DC.title"]/@content') & CleanText())
|
||||
obj_description = Format(u'%s', CleanHTML('/html/head/meta[@property="og:description"]/@content') & CleanText())
|
||||
|
||||
def obj_thumbnail(self):
|
||||
url = NormalizeThumbnail(CleanText('/html/head/meta[@property="og:image"]/@content'))(self)
|
||||
if url:
|
||||
thumbnail = BaseImage(url)
|
||||
thumbnail.url = thumbnail.id
|
||||
return thumbnail
|
||||
|
||||
obj_duration = CleanText('/html/head/script[not(@src)]') & Regexp(pattern=r'media_duration: ([^,.]+),?.*,', default='') & Duration(default=NotAvailable)
|
||||
|
||||
def obj_url(self):
|
||||
links = XPath('//div[@id="tab_sharing_content"]/div/div/div[@class="paragraph"]/div[@class="share"]/a[@target="_blank"]/@href')(self)
|
||||
for link in links:
|
||||
ext = str(link).split('.')[-1]
|
||||
self.logger.debug("Link:%s Ext:%s", link, ext)
|
||||
if ext in ['mp4', 'webm']:
|
||||
return unicode(link)
|
||||
|
||||
class RmllCollectionPage(HTMLPage):
|
||||
|
||||
@method
|
||||
class iter_videos(ListElement):
|
||||
item_xpath = '//div[@class="item-entry type-video " or @class="item-entry type-vod "]'
|
||||
|
||||
class item(ItemElement):
|
||||
klass = RmllVideo
|
||||
|
||||
obj_id = Link('a') & Regexp(pattern=r'.*/videos/(.+)/$')
|
||||
obj_title = Format(u'%s', CleanHTML('a/span/span/span[@class="item-entry-title"]') & CleanText())
|
||||
obj_url = NotLoaded
|
||||
#obj_date = XPath('a/span/span/span[@class="item-entry-creation"]')
|
||||
|
||||
obj_duration = CleanText('a/span/span/span[@class="item-entry-duration"]') & RmllDuration()
|
||||
|
||||
def obj_thumbnail(self):
|
||||
thumbnail = NormalizeThumbnail(CleanText('a/span[@class="item-entry-preview"]/img/@src'))(self)
|
||||
if thumbnail:
|
||||
thumbnail = BaseImage(thumbnail)
|
||||
thumbnail.url = thumbnail.id
|
||||
return thumbnail
|
||||
|
||||
class RmllChannelsPage(JsonPage):
|
||||
def iter_resources(self, split_path):
|
||||
if 'channels' in self.doc:
|
||||
for metadata in self.doc['channels']:
|
||||
collection = Collection(split_path+[metadata['oid']], metadata['title'])
|
||||
yield collection
|
||||
|
||||
if 'videos' in self.doc:
|
||||
for metadata in self.doc['videos']:
|
||||
video = create_video(metadata)
|
||||
yield video
|
||||
|
||||
class RmllLatestPage(JsonPage):
|
||||
def iter_resources(self):
|
||||
for metadata in self.doc['items']:
|
||||
if metadata['type'] == 'c':
|
||||
collection = Collection([metadata['oid']], metadata['title'])
|
||||
yield collection
|
||||
|
||||
if metadata['type'] == 'v':
|
||||
video = create_video(metadata)
|
||||
yield video
|
||||
|
||||
class RmllSearchPage(JsonPage):
|
||||
def iter_resources(self):
|
||||
for metadata in self.doc['videos']:
|
||||
video = create_video(metadata)
|
||||
yield video
|
||||
65
modules/rmll/test.py
Normal file
65
modules/rmll/test.py
Normal file
|
|
@ -0,0 +1,65 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright(C) 2015 Guilhem Bonnefille
|
||||
#
|
||||
# This file is part of weboob.
|
||||
#
|
||||
# weboob is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# weboob is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Affero General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
|
||||
from weboob.tools.test import BackendTest
|
||||
from weboob.capabilities.video import BaseVideo
|
||||
|
||||
|
||||
class RmllTest(BackendTest):
|
||||
MODULE = 'rmll'
|
||||
|
||||
def test_video_search(self):
|
||||
videos = self.backend.search_videos('test')
|
||||
self.assertTrue(videos)
|
||||
for video in videos:
|
||||
self.assertTrue(video.id, 'ID for video not found')
|
||||
|
||||
def test_video_page(self):
|
||||
for slug in ["v124f0bc409e704d92cf", "http://video.rmll.info/permalink/v124f0bc409e704d92cf/"]:
|
||||
video = self.backend.browser.get_video(slug)
|
||||
self.assertTrue(video.id, 'ID for video not found')
|
||||
self.assertTrue(video.url, 'URL for video "%s" not found' % (video.id))
|
||||
self.assertTrue(video.thumbnail, 'Thumbnail for video "%s" not found' % (video.id))
|
||||
self.assertTrue(video.title, 'Title for video "%s" not found' % (video.id))
|
||||
#self.assertTrue(video.description, 'Description for video "%s" not found' % (video.id))
|
||||
self.assertTrue(video.duration, 'Duration for video "%s" not found' % (video.id))
|
||||
#help(video)
|
||||
|
||||
def test_video_fill(self):
|
||||
slug = "v124f0bc409e704d92cf"
|
||||
video = self.backend.browser.get_video(slug)
|
||||
video = self.backend.fill_video(video, ["url"])
|
||||
self.assertTrue(video)
|
||||
self.assertTrue(video.url, 'URL for video "%s" not found' % (video.id))
|
||||
|
||||
def test_browse(self):
|
||||
for path in [[], ['latest']]:
|
||||
videos = self.backend.iter_resources([BaseVideo],path)
|
||||
self.assertTrue(videos)
|
||||
for video in videos:
|
||||
self.assertTrue(video.id, 'ID for video not found')
|
||||
|
||||
def test_missing_duration(self):
|
||||
videos = self.backend.search_videos('weboob')
|
||||
self.assertTrue(videos)
|
||||
for video in videos:
|
||||
self.assertTrue(video.id, 'ID for video not found')
|
||||
video = self.backend.fill_video(video, ["$full"])
|
||||
|
||||
31
modules/rmll/video.py
Normal file
31
modules/rmll/video.py
Normal file
|
|
@ -0,0 +1,31 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright(C) 2010-2011 Christophe Benz
|
||||
#
|
||||
# This file is part of weboob.
|
||||
#
|
||||
# weboob is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# weboob is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Affero General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
|
||||
from weboob.capabilities.video import BaseVideo
|
||||
|
||||
|
||||
class RmllVideo(BaseVideo):
|
||||
@classmethod
|
||||
def id2url(cls, _id):
|
||||
if _id.startswith('http'):
|
||||
return _id
|
||||
else:
|
||||
return 'http://video.rmll.info/permalink/%s/' % (_id)
|
||||
|
||||
Loading…
Add table
Add a link
Reference in a new issue