diff --git a/modules/rmll/TODO b/modules/rmll/TODO new file mode 100644 index 00000000..5b2b12e1 --- /dev/null +++ b/modules/rmll/TODO @@ -0,0 +1,7 @@ +Add following pseudo channels: + most-viewed + most-commented + +count on latest + +add API_KEY diff --git a/modules/rmll/__init__.py b/modules/rmll/__init__.py new file mode 100644 index 00000000..55777ddc --- /dev/null +++ b/modules/rmll/__init__.py @@ -0,0 +1,22 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2015 Guilhem Bonnefille +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + +from .module import RmllModule + +__all__ = ['RmllModule'] diff --git a/modules/rmll/browser.py b/modules/rmll/browser.py new file mode 100644 index 00000000..5478cbc6 --- /dev/null +++ b/modules/rmll/browser.py @@ -0,0 +1,67 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2015 Guilhem Bonnefille +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + +from weboob.browser import PagesBrowser, URL + +from .pages import RmllCollectionPage, RmllVideoPage, RmllChannelsPage, RmllSearchPage, RmllLatestPage + +__all__ = ['RmllBrowser'] + + +class RmllBrowser(PagesBrowser): + BASEURL = 'http://video.rmll.info' + + index_page = URL(r'channels/content/(?P.+)', RmllCollectionPage) + latest_page = URL(r'api/v2/latest/', RmllLatestPage) + video_page = URL(r'permalink/(?P.+)/', RmllVideoPage) + channels_page = URL(r'api/v2/channels/content/\?parent_oid=(?P.*)', RmllChannelsPage) + search_page = URL(r'api/v2/search/\?search=(?P.+)', RmllSearchPage) + + def __init__(self, *args, **kwargs): + self.channels = None + PagesBrowser.__init__(self, *args, **kwargs) + + @video_page.id2url + def get_video(self, url, video=None): + self.location(url) + assert self.video_page.is_here() + video = self.page.get_video(obj=video) + return video + + def search_videos(self, pattern): + url = self.search_page.build(pattern=pattern) + self.location(url) + return self.page.iter_resources() + + def get_latest_videos(self): + url = self.latest_page.build() + self.location(url) + assert self.latest_page.is_here() + return self.page.iter_resources() + + def get_channel_videos(self, split_path): + oid = '' + if len(split_path) > 0: + oid = split_path[-1] + url = self.channels_page.build(oid=oid) + self.location(url) + assert self.channels_page.is_here() + for video in self.page.iter_resources(split_path): + yield video + diff --git a/modules/rmll/favicon.png b/modules/rmll/favicon.png new file mode 100644 index 00000000..6c2519e2 Binary files /dev/null and b/modules/rmll/favicon.png differ diff --git a/modules/rmll/module.py b/modules/rmll/module.py new file mode 100644 index 00000000..9fda5553 --- /dev/null +++ b/modules/rmll/module.py @@ -0,0 +1,73 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2015 Guilhem Bonnefille +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + +from weboob.capabilities.video import CapVideo, BaseVideo +from weboob.capabilities.collection import CapCollection, Collection +from weboob.tools.backend import Module + +from .browser import RmllBrowser +from .video import RmllVideo + + +__all__ = ['RmllModule'] + + +class RmllModule(Module, CapVideo, CapCollection): + NAME = 'rmll' # The name of module + MAINTAINER = u'Guyou' # Name of maintainer of this module + EMAIL = 'guilhem.bonnefille@gmail.com' # Email address of the maintainer + VERSION = '1.1' # Version of weboob + DESCRIPTION = 'Videos from RMLL' # Description of your module + LICENSE = 'AGPLv3+' # License of your module + + BROWSER = RmllBrowser + + def create_default_browser(self): + return self.create_browser() + + def get_video(self, _id): + self.logger.debug("Getting video for %s", _id) + return self.browser.get_video(_id) + + def search_videos(self, pattern, sortby=CapVideo.SEARCH_RELEVANCE, nsfw=False): + return self.browser.search_videos(pattern) + + def fill_video(self, video, fields): + self.logger.debug("Fill video %s for fields %s", video.id, fields) + if fields != ['thumbnail']: + # if we don't want only the thumbnail, we probably want also every fields + video = self.browser.get_video(video.id, video) + if 'thumbnail' in fields and video and video.thumbnail: + video.thumbnail.data = self.browser.open(video.thumbnail.url).content + + return video + + def iter_resources(self, objs, split_path): + if BaseVideo in objs: + if len(split_path) == 0: + # Add fake Collection + yield Collection(['latest'], u'Latest') + if len(split_path) == 1 and split_path[0] == 'latest': + for video in self.browser.get_latest_videos(): + yield video + else: + for content in self.browser.get_channel_videos(split_path): + yield content + + OBJECTS = {RmllVideo: fill_video} diff --git a/modules/rmll/pages.py b/modules/rmll/pages.py new file mode 100644 index 00000000..8ea71f2c --- /dev/null +++ b/modules/rmll/pages.py @@ -0,0 +1,141 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2010-2011 Romain Bignon +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + +import datetime +import re + +from weboob.browser.pages import HTMLPage, JsonPage +from weboob.browser.elements import ItemElement, ListElement, method +from weboob.browser.filters.standard import Regexp, Format, CleanText, RegexpError, Duration, DateTime, Filter +from weboob.browser.filters.html import Link, XPath, CleanHTML +from weboob.browser.filters.json import Dict + +from weboob.capabilities import NotAvailable, NotLoaded +from weboob.capabilities.image import BaseImage +from weboob.capabilities.collection import Collection + +from .video import RmllVideo + +BASE_URL = 'http://video.rmll.info' + +class NormalizeThumbnail(Filter): + def filter(self, thumbnail): + if not thumbnail.startswith('http'): + thumbnail = BASE_URL + thumbnail + if thumbnail == "http://rmll.ubicast.tv/statics/mediaserver/images/video_icon.png": + # This is the default: remove it as any frontend default should be better + thumbnail = None + return thumbnail + + +class RmllDuration(Duration): + _regexp = re.compile(r'((?P\d+) h )?((?P\d+) m )?(?P\d+) s') + kwargs = {'hours': 'hh', 'minutes': 'mm', 'seconds': 'ss'} + + +def create_video(metadata): + video = RmllVideo(metadata['oid']) + + video.title = unicode(metadata['title']) + video.date = DateTime(Dict('creation'), default=NotLoaded)(metadata) + video.duration = RmllDuration(Dict('duration', default=''), default=NotLoaded)(metadata) + thumbnail = NormalizeThumbnail(Dict('thumb'))(metadata) + video.thumbnail = BaseImage(thumbnail) + video.thumbnail.url = video.thumbnail.id + video.url = NotLoaded + + return video + +class RmllVideoPage(HTMLPage): + @method + class get_video(ItemElement): + klass = RmllVideo + + obj_id = CleanHTML('/html/head/meta[@property="og:url"]/@content') & CleanText() & Regexp(pattern=r'.*/permalink/(.+)/$') + obj_title = Format(u'%s', CleanHTML('/html/head/meta[@name="DC.title"]/@content') & CleanText()) + obj_description = Format(u'%s', CleanHTML('/html/head/meta[@property="og:description"]/@content') & CleanText()) + + def obj_thumbnail(self): + url = NormalizeThumbnail(CleanText('/html/head/meta[@property="og:image"]/@content'))(self) + if url: + thumbnail = BaseImage(url) + thumbnail.url = thumbnail.id + return thumbnail + + obj_duration = CleanText('/html/head/script[not(@src)]') & Regexp(pattern=r'media_duration: ([^,.]+),?.*,', default='') & Duration(default=NotAvailable) + + def obj_url(self): + links = XPath('//div[@id="tab_sharing_content"]/div/div/div[@class="paragraph"]/div[@class="share"]/a[@target="_blank"]/@href')(self) + for link in links: + ext = str(link).split('.')[-1] + self.logger.debug("Link:%s Ext:%s", link, ext) + if ext in ['mp4', 'webm']: + return unicode(link) + +class RmllCollectionPage(HTMLPage): + + @method + class iter_videos(ListElement): + item_xpath = '//div[@class="item-entry type-video " or @class="item-entry type-vod "]' + + class item(ItemElement): + klass = RmllVideo + + obj_id = Link('a') & Regexp(pattern=r'.*/videos/(.+)/$') + obj_title = Format(u'%s', CleanHTML('a/span/span/span[@class="item-entry-title"]') & CleanText()) + obj_url = NotLoaded + #obj_date = XPath('a/span/span/span[@class="item-entry-creation"]') + + obj_duration = CleanText('a/span/span/span[@class="item-entry-duration"]') & RmllDuration() + + def obj_thumbnail(self): + thumbnail = NormalizeThumbnail(CleanText('a/span[@class="item-entry-preview"]/img/@src'))(self) + if thumbnail: + thumbnail = BaseImage(thumbnail) + thumbnail.url = thumbnail.id + return thumbnail + +class RmllChannelsPage(JsonPage): + def iter_resources(self, split_path): + if 'channels' in self.doc: + for metadata in self.doc['channels']: + collection = Collection(split_path+[metadata['oid']], metadata['title']) + yield collection + + if 'videos' in self.doc: + for metadata in self.doc['videos']: + video = create_video(metadata) + yield video + +class RmllLatestPage(JsonPage): + def iter_resources(self): + for metadata in self.doc['items']: + if metadata['type'] == 'c': + collection = Collection([metadata['oid']], metadata['title']) + yield collection + + if metadata['type'] == 'v': + video = create_video(metadata) + yield video + +class RmllSearchPage(JsonPage): + def iter_resources(self): + for metadata in self.doc['videos']: + video = create_video(metadata) + yield video diff --git a/modules/rmll/test.py b/modules/rmll/test.py new file mode 100644 index 00000000..22f65220 --- /dev/null +++ b/modules/rmll/test.py @@ -0,0 +1,65 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2015 Guilhem Bonnefille +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +from weboob.tools.test import BackendTest +from weboob.capabilities.video import BaseVideo + + +class RmllTest(BackendTest): + MODULE = 'rmll' + + def test_video_search(self): + videos = self.backend.search_videos('test') + self.assertTrue(videos) + for video in videos: + self.assertTrue(video.id, 'ID for video not found') + + def test_video_page(self): + for slug in ["v124f0bc409e704d92cf", "http://video.rmll.info/permalink/v124f0bc409e704d92cf/"]: + video = self.backend.browser.get_video(slug) + self.assertTrue(video.id, 'ID for video not found') + self.assertTrue(video.url, 'URL for video "%s" not found' % (video.id)) + self.assertTrue(video.thumbnail, 'Thumbnail for video "%s" not found' % (video.id)) + self.assertTrue(video.title, 'Title for video "%s" not found' % (video.id)) + #self.assertTrue(video.description, 'Description for video "%s" not found' % (video.id)) + self.assertTrue(video.duration, 'Duration for video "%s" not found' % (video.id)) + #help(video) + + def test_video_fill(self): + slug = "v124f0bc409e704d92cf" + video = self.backend.browser.get_video(slug) + video = self.backend.fill_video(video, ["url"]) + self.assertTrue(video) + self.assertTrue(video.url, 'URL for video "%s" not found' % (video.id)) + + def test_browse(self): + for path in [[], ['latest']]: + videos = self.backend.iter_resources([BaseVideo],path) + self.assertTrue(videos) + for video in videos: + self.assertTrue(video.id, 'ID for video not found') + + def test_missing_duration(self): + videos = self.backend.search_videos('weboob') + self.assertTrue(videos) + for video in videos: + self.assertTrue(video.id, 'ID for video not found') + video = self.backend.fill_video(video, ["$full"]) + diff --git a/modules/rmll/video.py b/modules/rmll/video.py new file mode 100644 index 00000000..afdf7ce2 --- /dev/null +++ b/modules/rmll/video.py @@ -0,0 +1,31 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2010-2011 Christophe Benz +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +from weboob.capabilities.video import BaseVideo + + +class RmllVideo(BaseVideo): + @classmethod + def id2url(cls, _id): + if _id.startswith('http'): + return _id + else: + return 'http://video.rmll.info/permalink/%s/' % (_id) +