diff --git a/modules/nolifetv/backend.py b/modules/nolifetv/backend.py index 44f71749..89f55361 100644 --- a/modules/nolifetv/backend.py +++ b/modules/nolifetv/backend.py @@ -18,20 +18,19 @@ # along with weboob. If not, see . - - from weboob.capabilities.video import ICapVideo, BaseVideo -from weboob.capabilities.collection import ICapCollection, CollectionNotFound +from weboob.capabilities.collection import ICapCollection, CollectionNotFound, Collection from weboob.tools.value import Value, ValueBackendPassword from weboob.tools.backend import BaseBackend, BackendConfig from .browser import NolifeTVBrowser from .video import NolifeTVVideo +import urllib, time +from hashlib import md5 __all__ = ['NolifeTVBackend'] - class NolifeTVBackend(BaseBackend, ICapVideo, ICapCollection): NAME = 'nolifetv' MAINTAINER = u'Romain Bignon' @@ -40,52 +39,88 @@ class NolifeTVBackend(BaseBackend, ICapVideo, ICapCollection): DESCRIPTION = 'NolifeTV French video streaming website' LICENSE = 'AGPLv3+' BROWSER = NolifeTVBrowser - CONFIG = BackendConfig(Value('username', label='Username', default=''), - ValueBackendPassword('password', label='Password', default='')) + CONFIG = BackendConfig(Value('username', label='Username', default=''), + ValueBackendPassword('password', label='Password', default=''), + Value('quality', label='Quality', + choices = { '1':'LQ', '2':'HQ', '3':'TV', '4':'720p', '5':'1080p' }, + default = '5' )) def create_default_browser(self): - username = self.config['username'].get() - if len(username) > 0: - password = self.config['password'].get() - else: - password = None - return self.create_browser(username, password) + return self.create_browser(self.config['username'].get(), self.config['password'].get()) + + def iter_resources(self, objs, split_path): + with self.browser: + if BaseVideo in objs: + collection = self.get_collection(objs, split_path) + if collection.path_level == 0: + yield Collection([u'theme'], u'Par theme') + yield Collection([u'type'], u'Par type') + yield Collection([u'latest'], u'Latest NolifeTV videos') + if collection.path_level == 1: + if split_path[0] == 'latest': + for vid in self.browser.get_latest(): + yield vid + else: + for cat in self.browser.iter_category(split_path[0]): + yield cat + if collection.path_level == 2: + for cat in self.browser.iter_family(split_path[0], split_path[1]): + yield cat + if collection.path_level == 3: + for cat in self.browser.iter_video(split_path[2]): + yield cat + + def validate_collection(self, objs, collection): + if BaseVideo in objs: + if collection.path_level == 0: + return + if collection.path_level == 1 and collection.split_path[0] in [u'theme', u'type', u'latest']: + return + if collection.path_level > 1: + return + raise CollectionNotFound(collection.split_path) def get_video(self, _id): with self.browser: - video = self.browser.get_video(_id) + return self.browser.get_video(_id) + + def fill_video(self, video, fields): + with self.browser: + self.browser.get_video(NolifeTVVideo.id2url(video.id), video) + + if 'thumbnail' in fields and video.thumbnail: + with self.browser: + video.thumbnail.data = self.browser.readurl(video.thumbnail.url) + + if 'url' in fields: + with self.browser: + video.url = self.get_url(video.id, self.config['quality'].get()) return video def search_videos(self, pattern, sortby=ICapVideo.SEARCH_RELEVANCE, nsfw=False): with self.browser: return self.browser.search_videos(pattern) - def fill_video(self, video, fields): - if fields != ['thumbnail']: - # if we don't want only the thumbnail, we probably want also every fields - with self.browser: - video = self.browser.get_video(NolifeTVVideo.id2url(video.id), video) - if 'thumbnail' in fields and video.thumbnail: - with self.browser: - video.thumbnail.data = self.browser.readurl(video.thumbnail.url) + OBJECTS = { NolifeTVVideo: fill_video } - return video + SALT = 'a53be1853770f0ebe0311d6993c7bcbe' - def iter_resources(self, objs, split_path): - if BaseVideo in objs: - collection = self.get_collection(objs, split_path) - if collection.path_level == 0: - yield self.get_collection(objs, [u'latest']) - if collection.split_path == [u'latest']: - for video in self.browser.latest_videos(): - yield video + def genkey(self): + # This website is really useful to get info: http://www.showmycode.com/ + timestamp = str(int(time.time())) + skey = md5(md5(timestamp).hexdigest() + self.SALT).hexdigest() + return skey, timestamp - def validate_collection(self, objs, collection): - if collection.path_level == 0: - return - if BaseVideo in objs and collection.split_path == [u'latest']: - collection.title = u'Latest NoLiveTV videos' - return - raise CollectionNotFound(collection.split_path) + def get_url(self, id, quality): + skey, timestamp = self.genkey() + self.browser.readurl('http://online.nolife-tv.com/_nlfplayer/api/api_player.php', + 'quality=%s&a=EML&skey=%s&id%%5Fnlshow=%s×tamp=%s' % (quality, skey, id, timestamp)) - OBJECTS = {NolifeTVVideo: fill_video} + skey, timestamp = self.genkey() + data = self.browser.readurl('http://online.nolife-tv.com/_nlfplayer/api/api_player.php', + 'quality=%s&a=UEM%%7CSEM%%7CMEM%%7CCH%%7CSWQ&skey=%s&id%%5Fnlshow=%s×tamp=%s' % (quality, skey, id, timestamp)) + values = dict([urllib.splitvalue(s) for s in data.split('&')]) + + if not 'url' in values: + return None + return unicode(values['url']) diff --git a/modules/nolifetv/browser.py b/modules/nolifetv/browser.py index 0c509a1c..5cd5ab9e 100644 --- a/modules/nolifetv/browser.py +++ b/modules/nolifetv/browser.py @@ -18,73 +18,90 @@ # along with weboob. If not, see . +from weboob.tools.browser import BaseBrowser, BrowserIncorrectPassword + import urllib -from weboob.tools.browser import BaseBrowser, BrowserIncorrectPassword from weboob.tools.browser.decorators import id2url - -from .pages.index import IndexPage -from .pages.video import VideoPage from .video import NolifeTVVideo - +from .pages import VideoPage, VideoListPage, FamilyPage, AboPage, LoginPage, HomePage __all__ = ['NolifeTVBrowser'] - class NolifeTVBrowser(BaseBrowser): - DOMAIN = 'online.nolife-tv.com' - ENCODING = 'utf-8' - PAGES = {r'http://online.nolife-tv.com/index.php\??': IndexPage, - r'http://online.nolife-tv.com/': IndexPage, - r'http://online.nolife-tv.com/do.php': IndexPage, - r'http://online.nolife-tv.com/emission-(?P[^/]+)/?.*': VideoPage} + USER_AGENT = BaseBrowser.USER_AGENTS['desktop_firefox'] + DOMAIN = 'mobile.nolife-tv.com' + PROTOCOL = 'http' + PAGES = { r'http://mobile.nolife-tv.com/online/familles-\w+/': FamilyPage, + r'http://mobile.nolife-tv.com/online/emission-(?P\d+)/': VideoPage, + 'http://mobile.nolife-tv.com/do.php': VideoListPage, + 'http://mobile.nolife-tv.com/online/': VideoListPage, + 'http://mobile.nolife-tv.com/abonnement/': AboPage, + 'http://mobile.nolife-tv.com/login': LoginPage, + 'http://mobile.nolife-tv.com/': HomePage, + } + AVAILABLE_VIDEOS = ['[Gratuit]'] def is_logged(self): - if self.password is None: - return True - - if not self.page: - return False - - l = self.page.document.xpath('//form[@name="login"]') - return len(l) == 0 + return not self.is_on_page(HomePage) or self.page.is_logged() def login(self): - if self.password is None: - return + if not self.is_on_page(LoginPage): + self.location('/login', no_login=True) - params = {'cookieuser': 1, - 'login': 1, - 'username': self.username, - 'password': self.password, - } + self.page.login(self.username, self.password) - self.readurl('http://online.nolife-tv.com/login', urllib.urlencode(params)) - - self.location('/', no_login=True) - - if not self.is_logged(): + if self.is_on_page(LoginPage): raise BrowserIncorrectPassword() + self.location('/abonnement/', no_login=True) + assert self.is_on_page(AboPage) + + self.AVAILABLE_VIDEOS = self.page.get_available_videos() + @id2url(NolifeTVVideo.id2url) def get_video(self, url, video=None): self.location(url) - assert self.is_on_page(VideoPage), 'Should be on video page.' + assert self.is_on_page(VideoPage) + return self.page.get_video(video) + def iter_family(self, type, sub): + self.location('/online/familles-%s/' % type) + assert self.is_on_page(FamilyPage) + + return self.page.iter_family(sub) + + def iter_category(self, type): + self.location('/online/familles-%s/' % type) + assert self.is_on_page(FamilyPage) + + return self.page.iter_category() + + def iter_video(self, family): + data = { 'a': 'ge', + 'famille': family, + 'emissions': 0 } + + while True: + self.location('/do.php', urllib.urlencode(data)) + assert self.is_on_page(VideoListPage) + + if self.page.is_list_empty(): + break + + for vid in self.page.iter_video(self.AVAILABLE_VIDEOS): + yield vid + data['emissions'] = data['emissions'] + 1 + + def get_latest(self): + return self.iter_video(0) + def search_videos(self, pattern): - data = {'a': 'search', - 'search': pattern.encode('utf-8'), - 'vu': 'all', - } - self.openurl('/do.php', urllib.urlencode(data)) - self.location('/do.php', 'a=em') + data = { 'search': pattern, + 'submit': 'Rechercher' } + self.location('/online/', urllib.urlencode(data)) + assert self.is_on_page(VideoListPage) - assert self.is_on_page(IndexPage) - return self.page.iter_videos() - - def latest_videos(self): - self.location('/do.php', 'a=em') - - assert self.is_on_page(IndexPage) - return self.page.iter_videos() + for vid in self.page.iter_video(self.AVAILABLE_VIDEOS): + yield vid diff --git a/modules/nolifetv/pages.py b/modules/nolifetv/pages.py new file mode 100644 index 00000000..aa1cd05f --- /dev/null +++ b/modules/nolifetv/pages.py @@ -0,0 +1,136 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2011 Romain Bignon +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +from weboob.capabilities.collection import Collection +from weboob.capabilities.image import BaseImage + +from weboob.tools.browser import BasePage + +import re +from datetime import datetime, timedelta + +from .video import NolifeTVVideo + +__all__ = ['VideoPage', 'VideoListPage', 'FamilyPage', 'AboPage', 'LoginPage', 'HomePage'] + +class VideoPage(BasePage): + def get_video(self, video): + if not video: + video = NolifeTVVideo(self.group_dict['id']) + + els = self.document.getroot().xpath('//div[@data-role="content"]') + if els and els[0] is not None: + h3 = els[0].find('h3') + if h3 is not None and h3.text: + video.title = unicode(h3.text) + + h4 = els[0].find('h4') + if h4 is not None and h4.text: + video.title = video.title + u' - ' + h4.text + + thumb = els[0].find('p/img') + if thumb is not None and thumb.get('src'): + video.thumbnail = BaseImage(thumb.attrib['src']) + video.thumbnail.url = video.thumbnail.id + + ps = els[0].findall('p') + if len(ps) > 4: + if ps[4].text: + video.description = ps[4].text + if ps[0].text and ps[0].text != u'∞': + video.date = datetime.strptime(ps[0].text, '%d/%m/%Y').date() + + for text in ps[2].xpath('.//text()'): + m = re.search(r'[^\d]*((\d+):)?(\d+)s?', text) + if m: + if m.group(2): + minutes = int(m.group(2)) + else: + minutes = 0 + video.duration = timedelta(minutes=minutes, + seconds=int(m.group(3))) + return video + +class VideoListPage(BasePage): + def is_list_empty(self): + return self.document.getroot() == None + + def iter_video(self, available_videos): + for el in self.document.getroot().xpath('//li/a'): + strongs = el.findall('p/strong') + if len(strongs) > 3 and strongs[0].text not in ['Autopromo', 'Annonce'] and strongs[1].text in available_videos: + m = re.search(r'emission-(\d+)', el.attrib['href']) + if m and m.group(1): + video = NolifeTVVideo(m.group(1)) + h3 = el.find('h3') + if h3 is not None and h3.text: + video.title = unicode(h3.text) + if strongs[3].text: + video.title = video.title + ' - ' + strongs[3].text + yield video + +class FamilyPage(BasePage): + def iter_category(self): + subs = list() + + for el in self.document.xpath('//ul/li[@data-role="list-divider"]'): + if not el.text in subs: + yield Collection([el.text], unicode(el.text)) + subs.append(el.text) + + def iter_family(self, sub): + for el in self.document.xpath('//ul/li[@data-role="list-divider"]'): + if el.text != sub: + continue + + while True: + el = el.getnext() + if el == None or el.get('data-role'): + break + h1 = el.find('.//h1') + id = h1.getparent().attrib['href'] + m = re.search(r'famille-(\d+)', id) + if m and m.group(1): + yield Collection([m.group(1)], unicode(h1.text)) + +class AboPage(BasePage): + def get_available_videos(self): + available = ['[Gratuit]'] + + for text in self.document.xpath('//div[@data-role="content"]/center/text()'): + if 'Soutien' in text: + available.append('[Archive]') + available.append('[Standard]') + if 'Standard' in text: + available.append('[Standard]') + + return available + + +class LoginPage(BasePage): + def login(self, username, password): + self.browser.select_form(name='login') + self.browser['username'] = str(username) + self.browser['password'] = str(password) + self.browser.submit() + +class HomePage(BasePage): + def is_logged(self): + return len(self.document.xpath('//a[@href="deconnexion/"]')) == 1 diff --git a/modules/nolifetv/pages/__init__.py b/modules/nolifetv/pages/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/modules/nolifetv/pages/index.py b/modules/nolifetv/pages/index.py deleted file mode 100644 index aff1c9fa..00000000 --- a/modules/nolifetv/pages/index.py +++ /dev/null @@ -1,63 +0,0 @@ -# -*- coding: utf-8 -*- - -# Copyright(C) 2011 Romain Bignon -# -# This file is part of weboob. -# -# weboob is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# weboob is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with weboob. If not, see . - - -from datetime import datetime -import re - -from weboob.tools.browser import BasePage, BrokenPageError -from weboob.capabilities.base import NotAvailable -from weboob.capabilities.image import BaseImage - -from ..video import NolifeTVVideo - - -__all__ = ['IndexPage'] - - -class IndexPage(BasePage): - def iter_videos(self): - for div in self.parser.select(self.document.getroot(), 'div.data_emissions ul li'): - m = re.match('id-(\d+)', div.attrib.get('class', '')) - if not m: - continue - - img = self.parser.select(div, 'div.screenshot a img', 1) - - video = NolifeTVVideo(m.group(1)) - video.title = unicode(img.attrib['alt']) - try: - video.description = unicode(self.parser.select(div, 'div.tooltip div.border-bottom p, div.infos div.border-bottom p')[-1].text) - except IndexError: - video.description = NotAvailable - - url = img.attrib['src'] - video.thumbnail = BaseImage(url) - video.thumbnail.url = video.thumbnail.id - try: - dparts = self.parser.select(div, 'span.date_emission', 1).text.strip().split('/') - hparts = self.parser.select(div, 'span.hour_emission', 1).text.strip().split('h') - video.date = datetime(int(dparts[-1]), int(dparts[-2]), int(dparts[-3]), - int(hparts[0]), int(hparts[1])) - except (BrokenPageError,ValueError): - video.date = NotAvailable - - video.set_empty_fields(NotAvailable, ('url',)) - - yield video diff --git a/modules/nolifetv/pages/video.py b/modules/nolifetv/pages/video.py deleted file mode 100644 index ffd437a2..00000000 --- a/modules/nolifetv/pages/video.py +++ /dev/null @@ -1,113 +0,0 @@ -# -*- coding: utf-8 -*- - -# Copyright(C) 2011 Romain Bignon -# -# This file is part of weboob. -# -# weboob is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# weboob is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with weboob. If not, see . - - -from hashlib import md5 -import time -from dateutil.parser import parse as parse_dt -import urllib - -from weboob.capabilities.base import NotAvailable, UserError -from weboob.capabilities.image import BaseImage -from weboob.tools.browser import BasePage, BrokenPageError -from weboob.tools.misc import to_unicode - -from ..video import NolifeTVVideo - - -__all__ = ['VideoPage'] - - -class ForbiddenVideo(UserError): - pass - - -class VideoPage(BasePage): - def get_video(self, video=None): - _id = to_unicode(self.group_dict['id']) - if video is None: - video = NolifeTVVideo(_id) - - # Check if video is external. - try: - div = self.parser.select(self.document.getroot(), 'div#message_lien_ext', 1) - except BrokenPageError: - pass - else: - link = div.find('a').attrib['href'] - raise ForbiddenVideo('Video is only available here: %s' % link) - - meta = self.parser.select(self.document.getroot(), 'meta[property="og:title"]', 1) - try: - video.title = unicode(meta.attrib['content']) - except BrokenPageError: - video.title = NotAvailable - - meta = self.parser.select(self.document.getroot(), 'meta[property="og:description"]', 1) - try: - video.description = unicode(meta.attrib['content']) - except BrokenPageError: - video.description = NotAvailable - - meta = self.parser.select(self.document.getroot(), 'meta[property="og:image"]', 1) - try: - video.thumbnail = BaseImage(meta.attrib['content']) - video.thumbnail.url = video.thumbnail.id - except BrokenPageError: - video.thumbnail = NotAvailable - - try: - video.date = parse_dt(self.parser.select(div, 'div#infos_complementaires', 1).find('p').text.strip()) - except Exception: - video.date = NotAvailable - video.author = NotAvailable - video.duration = NotAvailable - video.rating = NotAvailable - video.rating_max = NotAvailable - - if not video.url: - skey, timestamp = self.genkey() - self.browser.readurl('http://online.nolife-tv.com/_nlfplayer/api/api_player.php', - 'skey=%s&a=MD5×tamp=%s' % (skey, timestamp)) - - skey, timestamp = self.genkey() - self.browser.readurl('http://online.nolife-tv.com/_nlfplayer/api/api_player.php', - 'a=EML&skey=%s&id%%5Fnlshow=%s×tamp=%s' % (skey, _id, timestamp)) - - skey, timestamp = self.genkey() - data = self.browser.readurl('http://online.nolife-tv.com/_nlfplayer/api/api_player.php', - 'quality=0&a=UEM%%7CSEM%%7CMEM%%7CCH%%7CSWQ&skey=%s&id%%5Fnlshow=%s×tamp=%s' % (skey, _id, timestamp)) - - values = dict([urllib.splitvalue(s) for s in data.split('&')]) - - if not 'url' in values: - raise ForbiddenVideo(values.get('message', 'Not available').decode('iso-8859-15')) - video.url = unicode(values['url']) - - video.set_empty_fields(NotAvailable) - - return video - - SALT = 'a53be1853770f0ebe0311d6993c7bcbe' - - def genkey(self): - # This website is really useful to get info: http://www.showmycode.com/ - timestamp = str(int(time.time())) - skey = md5(md5(timestamp).hexdigest() + self.SALT).hexdigest() - return skey, timestamp diff --git a/modules/nolifetv/test.py b/modules/nolifetv/test.py index 452c4b0f..8cc2adf5 100644 --- a/modules/nolifetv/test.py +++ b/modules/nolifetv/test.py @@ -21,32 +21,19 @@ from weboob.tools.test import BackendTest from weboob.capabilities.video import BaseVideo -from .pages.video import ForbiddenVideo - - class NolifeTVTest(BackendTest): BACKEND = 'nolifetv' def test_search(self): l = list(self.backend.search_videos('nolife')) self.assertTrue(len(l) > 0) - for v in l: - try: - self.backend.fillobj(v, ('url',)) - except ForbiddenVideo: - continue - else: - self.assertTrue(v.url and v.url.startswith('http://'), 'URL for video "%s" not found: %s' % (v.id, v.url)) - break + v = l[0] + self.backend.fillobj(v, ('url',)) + self.assertTrue(v.url and v.url.startswith('http://'), 'URL for video "%s" not found: %s' % (v.id, v.url)) def test_latest(self): l = list(self.backend.iter_resources([BaseVideo], [u'latest'])) assert len(l) > 0 - for v in l: - try: - self.backend.fillobj(v, ('url',)) - except ForbiddenVideo: - continue - else: - self.assertTrue(v.url and v.url.startswith('http://'), 'URL for video "%s" not found: %s' % (v.id, v.url)) - break + v = l[0] + self.backend.fillobj(v, ('url',)) + self.assertTrue(v.url and v.url.startswith('http://'), 'URL for video "%s" not found: %s' % (v.id, v.url)) diff --git a/modules/nolifetv/video.py b/modules/nolifetv/video.py index 088570fd..7b995b08 100644 --- a/modules/nolifetv/video.py +++ b/modules/nolifetv/video.py @@ -20,10 +20,8 @@ from weboob.capabilities.video import BaseVideo - __all__ = ['NolifeTVVideo'] - class NolifeTVVideo(BaseVideo): def __init__(self, *args, **kwargs): BaseVideo.__init__(self, *args, **kwargs) @@ -31,4 +29,4 @@ class NolifeTVVideo(BaseVideo): @classmethod def id2url(cls, _id): - return 'http://online.nolife-tv.com/emission-%s/' % _id + return u'http://mobile.nolife-tv.com/online/emission-%s/' % _id