From b84a8ba2e852906b054c9428b42d95e8cd0438a3 Mon Sep 17 00:00:00 2001 From: Guilhem Bonnefille Date: Thu, 15 Jan 2015 23:27:52 +0100 Subject: [PATCH] add rmll module --- modules/rmll/TODO | 7 ++ modules/rmll/__init__.py | 22 ++++++ modules/rmll/browser.py | 67 +++++++++++++++++++ modules/rmll/favicon.png | Bin 0 -> 4729 bytes modules/rmll/module.py | 73 ++++++++++++++++++++ modules/rmll/pages.py | 141 +++++++++++++++++++++++++++++++++++++++ modules/rmll/test.py | 65 ++++++++++++++++++ modules/rmll/video.py | 31 +++++++++ 8 files changed, 406 insertions(+) create mode 100644 modules/rmll/TODO create mode 100644 modules/rmll/__init__.py create mode 100644 modules/rmll/browser.py create mode 100644 modules/rmll/favicon.png create mode 100644 modules/rmll/module.py create mode 100644 modules/rmll/pages.py create mode 100644 modules/rmll/test.py create mode 100644 modules/rmll/video.py diff --git a/modules/rmll/TODO b/modules/rmll/TODO new file mode 100644 index 00000000..5b2b12e1 --- /dev/null +++ b/modules/rmll/TODO @@ -0,0 +1,7 @@ +Add following pseudo channels: + most-viewed + most-commented + +count on latest + +add API_KEY diff --git a/modules/rmll/__init__.py b/modules/rmll/__init__.py new file mode 100644 index 00000000..55777ddc --- /dev/null +++ b/modules/rmll/__init__.py @@ -0,0 +1,22 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2015 Guilhem Bonnefille +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + +from .module import RmllModule + +__all__ = ['RmllModule'] diff --git a/modules/rmll/browser.py b/modules/rmll/browser.py new file mode 100644 index 00000000..5478cbc6 --- /dev/null +++ b/modules/rmll/browser.py @@ -0,0 +1,67 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2015 Guilhem Bonnefille +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + +from weboob.browser import PagesBrowser, URL + +from .pages import RmllCollectionPage, RmllVideoPage, RmllChannelsPage, RmllSearchPage, RmllLatestPage + +__all__ = ['RmllBrowser'] + + +class RmllBrowser(PagesBrowser): + BASEURL = 'http://video.rmll.info' + + index_page = URL(r'channels/content/(?P.+)', RmllCollectionPage) + latest_page = URL(r'api/v2/latest/', RmllLatestPage) + video_page = URL(r'permalink/(?P.+)/', RmllVideoPage) + channels_page = URL(r'api/v2/channels/content/\?parent_oid=(?P.*)', RmllChannelsPage) + search_page = URL(r'api/v2/search/\?search=(?P.+)', RmllSearchPage) + + def __init__(self, *args, **kwargs): + self.channels = None + PagesBrowser.__init__(self, *args, **kwargs) + + @video_page.id2url + def get_video(self, url, video=None): + self.location(url) + assert self.video_page.is_here() + video = self.page.get_video(obj=video) + return video + + def search_videos(self, pattern): + url = self.search_page.build(pattern=pattern) + self.location(url) + return self.page.iter_resources() + + def get_latest_videos(self): + url = self.latest_page.build() + self.location(url) + assert self.latest_page.is_here() + return self.page.iter_resources() + + def get_channel_videos(self, split_path): + oid = '' + if len(split_path) > 0: + oid = split_path[-1] + url = self.channels_page.build(oid=oid) + self.location(url) + assert self.channels_page.is_here() + for video in self.page.iter_resources(split_path): + yield video + diff --git a/modules/rmll/favicon.png b/modules/rmll/favicon.png new file mode 100644 index 0000000000000000000000000000000000000000..6c2519e20da8de2f7e2d9b9bf7e13edfb869cf02 GIT binary patch literal 4729 zcmV-<5{B)GP)Px#24YJ`L;(K){{a7>y{D4^000SaNLh0L01{0A01{0B^OwY@00007bV*G`2i^w( z4i+;a`kVHdTLTE^u36%t?u_ZZ9fHVe-@dFG- z*cd<1J*$75wdJj?ee|%6EjrmVXYTu)y}z^8x1RgL?j3&#ECrSW8-OjqYTy;%d%#3S z$SCaQ>rVv^3*P&b9qWLKkpfh*o3Gy!yjJh|M~tA6jG(zFAdJv4l7JeqAdNJj_#;q% z1ZtP6*ltzLUEv;-1BU`90aJiGfY*k3jxyjZ;CSGt!1F-&{|BJTfTMxafHQytfjxoW z0}pKHIcETu0v7`B1J43a0Y3-c`=kLi4wwa80esN_+u5dn4?E7Y;}zf*;2Geph;KHc zfSLgO1u)N~s4{RMKMsCz=RO#?7w80=# zKLB03(^}31o&!?va6tL5!_QkS5O$-|K&i9j{uo5xMSKD!dAzcn?{K_>PA@NY)$ zQ$L1z>|va^0QldWIfSpD0QTP%AYEcSGLWb%@JpZwjL$n!b6fSWF5J>yGZ3Ly1K$m9 zV44HgkJ!#+;84x>mB5Y0BcV^^UatbKwaIjcz%QoWg%O<~%h?E2G0Pm>k zIuTi0tnFAg*tv`njsR`|PK_89f6hMx&Nf4`Q-Qh!_>PgQdS=;wTw)&ci?IbYwN@b_ z1yvm{BFBixSJE_{BO+r(q(W6IJPVwP$YxdDtg2gqH$>zi5qVNo-%-_%3Wb86HG5GE zv^`86FAc1#1mOM5LZXd33#iW<5l;-jalhSaVZ`OYn@&wir4sXwU*gneqN>hP)h~<4 zTwotnMP%EhMMT59ubN#(RY{TrRV@KOQPrn_Ula<3vA)Y+)1|;wz~lj> z>k+f0TXqzvnZRSfk*4P{KjzzZY=}tFSP_{kB4?`VNshBb1XWephEyMrR8@f9&keBN z?-n~YNKqHhX%SU`<3g8ZA{NO!azBB|Xs6b1arkBk- ze#u*Qp8ZASE>-=!i1dCp2P_Aq^O2G?O&5yDg$;F`zX>G0U^H>1ac#J+_kg1YN+^b@ z;5opF5!5j*y2XLyoW|z1>I&d0RsB1$;qgfy&>T0Jx;-EwU!8aSlDC>#t7vYkrm??) zlRtcJ7OMJJ=4g)2QMXtY;d$VSVr@rvOM4BC^<6a7bviS@T0|~)U>OiTZ}rm%cR9Ne zb-?w_ZPgQk@4jj`I!eU994sQ=5|MJ}3x#(+ zN7bKftnXUi+*aMkN;zMsp|1075xGoN|HD@=&pJkF%Y9Eo<^fl&Fup>wCI_o?b6PX#njnmm69VB!CfIX6O#)5MdqYgP5+NVv1WthEGMQ*J;_9SEqYRs={Q;=SCdfe%u1TlE4}trHR7 z=NW43cu|_Bbxw*xu!^-E&T4HIkw%k*(7R?Jf;T+B+0b1yx`aUZd#hA_cygxoe0oS-`U?3Z6o+~B620roTjPfc9PWz=$@dehdB`+1~2srmRq2RXp$rc zipV|9ZPmxR1l<4x-(4sa&Q6k~#f30xBgQ#4w~91PzYwe;wfJsT#BfbEup|U(I$0Gq zME3(=Vc>yR0QJDwA!^)!YUsHW(}9Q6H2t$Y&MMY+(9&MRna91D0_TXxgSOy2g7?+w zVB=LkTe`y1$T6v^x5Pjnpk_wAaDcTTP$Dt`Yb6YGN;^1GRXtcl9&Bo@I=z>(a=7+WKQh_*k438xJ3TWk|;0d1W+a-*SECStSi=byx?UnW0JL@rM>2AkUd2>&vQTe zO)N>0y;XITCG6Z-Z$MQ9!ZOytdfx}?fSmbv`*BgBP|#AT^fWNd%PH;jQjZykvNTP5 z{^iNZTrziMJYlvCtc}10&27~RoTsW~eumU@OU4;c1U$TjtkWh;TuwHt-wV|ENZ8_v zm!;l>&g@0l>(Z_&B0k+&M|0bxSk)!;;Mu%+^F@`Fm7=O&2`#Zn>Ua?uW8CXY)Rvr> zbX>?$<6^mMc5$OtY@AlzmJ5>a!8f;6GykNQQ{Yw+S>;@i;Hwj?O5=|WKD`8E&k(4* z_QW1OA;WKITL;jPLI_lMg!Ku{<47LPGNx%-rm6)mHIu1H_Ikal zei;0;g8Yu24S7I?F2d+f3Q$RsIEk92s)u-m2j?>Yye~UuIz(I8hBsxnZmU9 zmax!J02Tok(7%&y3R%CwMu4+FyOgHZs)@iY780b}8|w|GNx=UA7vzEDlXy#e%}iB& zn5+v~y$q+H!7HiiACe^bFwmNPiSOWA?P`|(q?YAm5BCaE!-&WlRb8m6l_D}aNs`e? zlI*cF4iOn`$08BApjg}SYOFoTy=M;RG*$f`aE!Bbfm`5!dtX%-eQD0(l6THMfzJeF z?l5>cwEBsmw?5)1jW(UT!8PLb8jzv3njNBRMUj4#V?j_7-c|J_NFzDU5Ki_psK5Ol<3{v_waf~ zRnINfcDxoN@TS%(TxrWV=|13i%a&@G8}oV?O27-2CH2P)%bdjg$Hg(AHdul#R(+`j z4)R4lNs`5?`n*>{+Ah)#?&6PC^=pmwU9UPT=1WscdksmFICVQxM1G*EpY_fcQ#m&_ zsp?}vLa;#A7e6NAL#_bch`9l-${c@!Bv>0s*eJPhpBji)*-J;<_8URo#zIpnq*Qlh5j9c&yqE5tO+G5H7oOai&bIiKg^TR~s5my9p5|fi9@L%88Sl_icAn^*ac1yQLKg0PpS=*xK0kt0Z z3EO%aed|{N{JjIv<+B^>yM9X6^Bp>CgYvklz5<+ItnFCU(q5D6xH?V(?gx(Yi92>J z?_LP-qLZe;I241blP{Sard7* zJnTJik|ZK7EU8n~d#%<+%nb-#0sH865xJtFu5)E@jRU-!8%oV)8}%!Woeo&6_M8(>N?d=y9DL-a&wBWrT=Cj z|M`ao(!UNXEi}aMO(omWc@+8k^-@bu+y>kp=90o50`tH}nX8egYC%;cNs^v4O-n#2 zNs>~rwxcxIxy=0E3>@bf5F+7KYA(*>L*b?E9e)C}v29mniT>MC-EPZYnhFf*AH{+I zcsRNBPn(*3z~FnF-!o3yC6%QqW+_SoOw1i6bL1{G^&PVlu3u(W@?^5TD{B~dKe2o7 zHL_iwyL3$Shk zR}3r2HV<86l?-yyEHOS?VOHl&18}Vo>=w&ckGAuVHwm6>g^sCS4Q`U#4?niCr&XMvH4CFdlq?+vu(8iJTfy>X;%$~Ac2ruxd^hmhKrIIY zY&g=lnr%De58M?z^Kp#|%RU@Q*1CL@S+D)J^L(4E2-0O7^rA`9ySs{zcf~cz&6@3J zmTZ5^JRDB8gM5O8GNUYn>1NwsCUx248uR=&FmR{b$1?s8hW`FGl). + +from weboob.capabilities.video import CapVideo, BaseVideo +from weboob.capabilities.collection import CapCollection, Collection +from weboob.tools.backend import Module + +from .browser import RmllBrowser +from .video import RmllVideo + + +__all__ = ['RmllModule'] + + +class RmllModule(Module, CapVideo, CapCollection): + NAME = 'rmll' # The name of module + MAINTAINER = u'Guyou' # Name of maintainer of this module + EMAIL = 'guilhem.bonnefille@gmail.com' # Email address of the maintainer + VERSION = '1.1' # Version of weboob + DESCRIPTION = 'Videos from RMLL' # Description of your module + LICENSE = 'AGPLv3+' # License of your module + + BROWSER = RmllBrowser + + def create_default_browser(self): + return self.create_browser() + + def get_video(self, _id): + self.logger.debug("Getting video for %s", _id) + return self.browser.get_video(_id) + + def search_videos(self, pattern, sortby=CapVideo.SEARCH_RELEVANCE, nsfw=False): + return self.browser.search_videos(pattern) + + def fill_video(self, video, fields): + self.logger.debug("Fill video %s for fields %s", video.id, fields) + if fields != ['thumbnail']: + # if we don't want only the thumbnail, we probably want also every fields + video = self.browser.get_video(video.id, video) + if 'thumbnail' in fields and video and video.thumbnail: + video.thumbnail.data = self.browser.open(video.thumbnail.url).content + + return video + + def iter_resources(self, objs, split_path): + if BaseVideo in objs: + if len(split_path) == 0: + # Add fake Collection + yield Collection(['latest'], u'Latest') + if len(split_path) == 1 and split_path[0] == 'latest': + for video in self.browser.get_latest_videos(): + yield video + else: + for content in self.browser.get_channel_videos(split_path): + yield content + + OBJECTS = {RmllVideo: fill_video} diff --git a/modules/rmll/pages.py b/modules/rmll/pages.py new file mode 100644 index 00000000..8ea71f2c --- /dev/null +++ b/modules/rmll/pages.py @@ -0,0 +1,141 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2010-2011 Romain Bignon +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + +import datetime +import re + +from weboob.browser.pages import HTMLPage, JsonPage +from weboob.browser.elements import ItemElement, ListElement, method +from weboob.browser.filters.standard import Regexp, Format, CleanText, RegexpError, Duration, DateTime, Filter +from weboob.browser.filters.html import Link, XPath, CleanHTML +from weboob.browser.filters.json import Dict + +from weboob.capabilities import NotAvailable, NotLoaded +from weboob.capabilities.image import BaseImage +from weboob.capabilities.collection import Collection + +from .video import RmllVideo + +BASE_URL = 'http://video.rmll.info' + +class NormalizeThumbnail(Filter): + def filter(self, thumbnail): + if not thumbnail.startswith('http'): + thumbnail = BASE_URL + thumbnail + if thumbnail == "http://rmll.ubicast.tv/statics/mediaserver/images/video_icon.png": + # This is the default: remove it as any frontend default should be better + thumbnail = None + return thumbnail + + +class RmllDuration(Duration): + _regexp = re.compile(r'((?P\d+) h )?((?P\d+) m )?(?P\d+) s') + kwargs = {'hours': 'hh', 'minutes': 'mm', 'seconds': 'ss'} + + +def create_video(metadata): + video = RmllVideo(metadata['oid']) + + video.title = unicode(metadata['title']) + video.date = DateTime(Dict('creation'), default=NotLoaded)(metadata) + video.duration = RmllDuration(Dict('duration', default=''), default=NotLoaded)(metadata) + thumbnail = NormalizeThumbnail(Dict('thumb'))(metadata) + video.thumbnail = BaseImage(thumbnail) + video.thumbnail.url = video.thumbnail.id + video.url = NotLoaded + + return video + +class RmllVideoPage(HTMLPage): + @method + class get_video(ItemElement): + klass = RmllVideo + + obj_id = CleanHTML('/html/head/meta[@property="og:url"]/@content') & CleanText() & Regexp(pattern=r'.*/permalink/(.+)/$') + obj_title = Format(u'%s', CleanHTML('/html/head/meta[@name="DC.title"]/@content') & CleanText()) + obj_description = Format(u'%s', CleanHTML('/html/head/meta[@property="og:description"]/@content') & CleanText()) + + def obj_thumbnail(self): + url = NormalizeThumbnail(CleanText('/html/head/meta[@property="og:image"]/@content'))(self) + if url: + thumbnail = BaseImage(url) + thumbnail.url = thumbnail.id + return thumbnail + + obj_duration = CleanText('/html/head/script[not(@src)]') & Regexp(pattern=r'media_duration: ([^,.]+),?.*,', default='') & Duration(default=NotAvailable) + + def obj_url(self): + links = XPath('//div[@id="tab_sharing_content"]/div/div/div[@class="paragraph"]/div[@class="share"]/a[@target="_blank"]/@href')(self) + for link in links: + ext = str(link).split('.')[-1] + self.logger.debug("Link:%s Ext:%s", link, ext) + if ext in ['mp4', 'webm']: + return unicode(link) + +class RmllCollectionPage(HTMLPage): + + @method + class iter_videos(ListElement): + item_xpath = '//div[@class="item-entry type-video " or @class="item-entry type-vod "]' + + class item(ItemElement): + klass = RmllVideo + + obj_id = Link('a') & Regexp(pattern=r'.*/videos/(.+)/$') + obj_title = Format(u'%s', CleanHTML('a/span/span/span[@class="item-entry-title"]') & CleanText()) + obj_url = NotLoaded + #obj_date = XPath('a/span/span/span[@class="item-entry-creation"]') + + obj_duration = CleanText('a/span/span/span[@class="item-entry-duration"]') & RmllDuration() + + def obj_thumbnail(self): + thumbnail = NormalizeThumbnail(CleanText('a/span[@class="item-entry-preview"]/img/@src'))(self) + if thumbnail: + thumbnail = BaseImage(thumbnail) + thumbnail.url = thumbnail.id + return thumbnail + +class RmllChannelsPage(JsonPage): + def iter_resources(self, split_path): + if 'channels' in self.doc: + for metadata in self.doc['channels']: + collection = Collection(split_path+[metadata['oid']], metadata['title']) + yield collection + + if 'videos' in self.doc: + for metadata in self.doc['videos']: + video = create_video(metadata) + yield video + +class RmllLatestPage(JsonPage): + def iter_resources(self): + for metadata in self.doc['items']: + if metadata['type'] == 'c': + collection = Collection([metadata['oid']], metadata['title']) + yield collection + + if metadata['type'] == 'v': + video = create_video(metadata) + yield video + +class RmllSearchPage(JsonPage): + def iter_resources(self): + for metadata in self.doc['videos']: + video = create_video(metadata) + yield video diff --git a/modules/rmll/test.py b/modules/rmll/test.py new file mode 100644 index 00000000..22f65220 --- /dev/null +++ b/modules/rmll/test.py @@ -0,0 +1,65 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2015 Guilhem Bonnefille +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +from weboob.tools.test import BackendTest +from weboob.capabilities.video import BaseVideo + + +class RmllTest(BackendTest): + MODULE = 'rmll' + + def test_video_search(self): + videos = self.backend.search_videos('test') + self.assertTrue(videos) + for video in videos: + self.assertTrue(video.id, 'ID for video not found') + + def test_video_page(self): + for slug in ["v124f0bc409e704d92cf", "http://video.rmll.info/permalink/v124f0bc409e704d92cf/"]: + video = self.backend.browser.get_video(slug) + self.assertTrue(video.id, 'ID for video not found') + self.assertTrue(video.url, 'URL for video "%s" not found' % (video.id)) + self.assertTrue(video.thumbnail, 'Thumbnail for video "%s" not found' % (video.id)) + self.assertTrue(video.title, 'Title for video "%s" not found' % (video.id)) + #self.assertTrue(video.description, 'Description for video "%s" not found' % (video.id)) + self.assertTrue(video.duration, 'Duration for video "%s" not found' % (video.id)) + #help(video) + + def test_video_fill(self): + slug = "v124f0bc409e704d92cf" + video = self.backend.browser.get_video(slug) + video = self.backend.fill_video(video, ["url"]) + self.assertTrue(video) + self.assertTrue(video.url, 'URL for video "%s" not found' % (video.id)) + + def test_browse(self): + for path in [[], ['latest']]: + videos = self.backend.iter_resources([BaseVideo],path) + self.assertTrue(videos) + for video in videos: + self.assertTrue(video.id, 'ID for video not found') + + def test_missing_duration(self): + videos = self.backend.search_videos('weboob') + self.assertTrue(videos) + for video in videos: + self.assertTrue(video.id, 'ID for video not found') + video = self.backend.fill_video(video, ["$full"]) + diff --git a/modules/rmll/video.py b/modules/rmll/video.py new file mode 100644 index 00000000..afdf7ce2 --- /dev/null +++ b/modules/rmll/video.py @@ -0,0 +1,31 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2010-2011 Christophe Benz +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +from weboob.capabilities.video import BaseVideo + + +class RmllVideo(BaseVideo): + @classmethod + def id2url(cls, _id): + if _id.startswith('http'): + return _id + else: + return 'http://video.rmll.info/permalink/%s/' % (_id) +