From 2b23948da6d33687e3bb25aee7a4a6bdec176596 Mon Sep 17 00:00:00 2001 From: Romain Bignon Date: Fri, 30 Dec 2011 17:07:17 +0100 Subject: [PATCH] new backend nolifetv --- weboob/backends/nolifetv/__init__.py | 22 ++++++ weboob/backends/nolifetv/backend.py | 62 +++++++++++++++ weboob/backends/nolifetv/browser.py | 53 +++++++++++++ weboob/backends/nolifetv/pages/__init__.py | 0 weboob/backends/nolifetv/pages/index.py | 58 ++++++++++++++ weboob/backends/nolifetv/pages/video.py | 89 ++++++++++++++++++++++ weboob/backends/nolifetv/test.py | 37 +++++++++ weboob/backends/nolifetv/video.py | 34 +++++++++ 8 files changed, 355 insertions(+) create mode 100644 weboob/backends/nolifetv/__init__.py create mode 100644 weboob/backends/nolifetv/backend.py create mode 100644 weboob/backends/nolifetv/browser.py create mode 100644 weboob/backends/nolifetv/pages/__init__.py create mode 100644 weboob/backends/nolifetv/pages/index.py create mode 100644 weboob/backends/nolifetv/pages/video.py create mode 100644 weboob/backends/nolifetv/test.py create mode 100644 weboob/backends/nolifetv/video.py diff --git a/weboob/backends/nolifetv/__init__.py b/weboob/backends/nolifetv/__init__.py new file mode 100644 index 00000000..909ab782 --- /dev/null +++ b/weboob/backends/nolifetv/__init__.py @@ -0,0 +1,22 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2011 Romain Bignon +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + +from .backend import NolifeTVBackend + +__all__ = ['NolifeTVBackend'] diff --git a/weboob/backends/nolifetv/backend.py b/weboob/backends/nolifetv/backend.py new file mode 100644 index 00000000..0865f8d6 --- /dev/null +++ b/weboob/backends/nolifetv/backend.py @@ -0,0 +1,62 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2011 Romain Bignon +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +from __future__ import with_statement + +from weboob.capabilities.video import ICapVideo +from weboob.tools.backend import BaseBackend + +from .browser import NolifeTVBrowser +from .video import NolifeTVVideo + + +__all__ = ['NolifeTVBackend'] + + +class NolifeTVBackend(BaseBackend, ICapVideo): + NAME = 'nolifetv' + MAINTAINER = 'Romain Bignon' + EMAIL = 'romain@weboob.org' + VERSION = '0.a' + DESCRIPTION = 'NolifeTV videos website' + LICENSE = 'AGPLv3+' + BROWSER = NolifeTVBrowser + + def get_video(self, _id): + with self.browser: + video = self.browser.get_video(_id) + return video + + def iter_search_results(self, pattern=None, sortby=ICapVideo.SEARCH_RELEVANCE, nsfw=False, max_results=None): + with self.browser: + return self.browser.iter_search_results(pattern) + + def fill_video(self, video, fields): + if fields != ['thumbnail']: + # if we don't want only the thumbnail, we probably want also every fields + with self.browser: + video = self.browser.get_video(NolifeTVVideo.id2url(video.id), video) + if 'thumbnail' in fields and video.thumbnail: + with self.browser: + video.thumbnail.data = self.browser.readurl(video.thumbnail.url) + + return video + + OBJECTS = {NolifeTVVideo: fill_video} diff --git a/weboob/backends/nolifetv/browser.py b/weboob/backends/nolifetv/browser.py new file mode 100644 index 00000000..0369753b --- /dev/null +++ b/weboob/backends/nolifetv/browser.py @@ -0,0 +1,53 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2011 Romain Bignon +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +import urllib + +from weboob.tools.browser import BaseBrowser +from weboob.tools.browser.decorators import id2url + +from .pages.index import IndexPage +from .pages.video import VideoPage +from .video import NolifeTVVideo + + +__all__ = ['NolifeTVBrowser'] + + +class NolifeTVBrowser(BaseBrowser): + DOMAIN = 'online.nolife-tv.com' + ENCODING = None + PAGES = {r'http://online.nolife-tv.com/index.php\??': IndexPage, + r'http://online.nolife-tv.com/': IndexPage, + r'http://online.nolife-tv.com/index.php\?id=(?P.+)': VideoPage} + + @id2url(NolifeTVVideo.id2url) + def get_video(self, url, video=None): + self.location(url) + assert self.is_on_page(VideoPage), 'Should be on video page.' + return self.page.get_video(video) + + def iter_search_results(self, pattern): + if not pattern: + self.home() + else: + self.location('/index.php?', 'search=%s' % urllib.quote_plus(pattern.encode('utf-8'))) + assert self.is_on_page(IndexPage) + return self.page.iter_videos() diff --git a/weboob/backends/nolifetv/pages/__init__.py b/weboob/backends/nolifetv/pages/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/weboob/backends/nolifetv/pages/index.py b/weboob/backends/nolifetv/pages/index.py new file mode 100644 index 00000000..7fc58400 --- /dev/null +++ b/weboob/backends/nolifetv/pages/index.py @@ -0,0 +1,58 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2011 Romain Bignon +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +from dateutil.parser import parse as parse_dt +import re + +from weboob.tools.browser import BasePage +from weboob.tools.capabilities.thumbnail import Thumbnail +from weboob.capabilities.base import NotAvailable + +from ..video import NolifeTVVideo + + +__all__ = ['IndexPage'] + + +class IndexPage(BasePage): + def iter_videos(self): + div_list = self.parser.select(self.document.getroot(), 'div.ligne_video') + for div in div_list: + m = re.match('index.php\?id=(\d+)', div.find('a').attrib['href']) + if not m: + continue + video = NolifeTVVideo(m.group(1)) + video.title = self.parser.select(div, 'span.span_title', 1).text + video.description = self.parser.select(div, 'span.span_description', 1).text + video.thumbnail = Thumbnail(self.parser.select(div, 'div.screen_video', 1).find('img').attrib['src']) + try: + video.date = parse_dt(self.parser.select(div, 'div.infos_video span.span_title', 1).text.strip()) + except Exception: + video.date = NotAvailable + + rating_url = self.parser.select(div, 'span.description img')[0].attrib['src'] + m = re.match('.*view_level(\d+)\.gif', rating_url) + if m: + video.rating = int(m.group(1)) + video.rating_max = 21 + else: + video.rating = video.rating_max = NotAvailable + + yield video diff --git a/weboob/backends/nolifetv/pages/video.py b/weboob/backends/nolifetv/pages/video.py new file mode 100644 index 00000000..5f429ea6 --- /dev/null +++ b/weboob/backends/nolifetv/pages/video.py @@ -0,0 +1,89 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2011 Romain Bignon +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +from dateutil.parser import parse as parse_dt +import urllib + +from weboob.capabilities.base import NotAvailable +from weboob.tools.capabilities.thumbnail import Thumbnail +from weboob.tools.browser import BasePage +from weboob.tools.misc import to_unicode + +from ..video import NolifeTVVideo + + +__all__ = ['VideoPage'] + + +class ForbiddenVideo(Exception): + pass + +class VideoPage(BasePage): + + def get_video(self, video=None): + _id = to_unicode(self.group_dict['id']) + if video is None: + video = NolifeTVVideo(_id) + #title_el = self.parser.select(self.document.getroot(), 'title', 1) + #video.title = to_unicode(title_el.text.strip()) + + ## youjizz HTML is crap, we must parse it with regexps + #data = lxml.html.tostring(self.document.getroot()) + #m = re.search(r'.*?Runtime.*? (.+?)', data) + #if m: + # txt = m.group(1).strip() + # if txt == 'Unknown': + # video.duration = NotAvailable + # else: + # minutes, seconds = (int(v) for v in to_unicode(txt).split(':')) + # video.duration = datetime.timedelta(minutes=minutes, seconds=seconds) + #else: + # raise BrokenPageError('Unable to retrieve video duration') + + div = self.parser.select(self.document.getroot(), 'div#informations_video', 1) + video.title = self.parser.select(div, 'div#ligne_titre_big', 1).text + video.description = self.parser.select(div, 'div#ligne_titre_small', 1).text + video.thumbnail = Thumbnail(self.parser.select(div, 'div#icone_video img', 1).attrib['src']) + try: + video.date = parse_dt(self.parser.select(div, 'div#infos_complementaires', 1).find('p').text.strip()) + except Exception: + video.date = NotAvailable + video.author = NotAvailable + video.duration = NotAvailable + video.rating = NotAvailable + video.rating_max = NotAvailable + + if not video.url: + r = self.browser.request_class('http://online.nolife-tv.com/_newplayer/api/api_player.php', + 'skey=9fJhXtl%5D%7CFR%3FN%7D%5B%3A%5Fd%22%5F&connect=1&a=US', + {'Referer': 'http://online.nolife-tv.com/_newplayer/nolifeplayer_flash10.swf?idvideo=%s&autostart=0' % _id}) + self.browser.openurl(r) + r = self.browser.request_class('http://online.nolife-tv.com/_newplayer/api/api_player.php', + 'skey=9fJhXtl%5D%7CFR%3FN%7D%5B%3A%5Fd%22%5F&a=UEM%7CSEM&quality=0&id%5Fnlshow=' + _id, + {'Referer': 'http://online.nolife-tv.com/_newplayer/nolifeplayer_flash10.swf?idvideo=%s&autostart=0' % _id}) + data = self.browser.readurl(r) + values = dict([urllib.splitvalue(s) for s in data.split('&')]) + + if not 'url' in values: + raise ForbiddenVideo(values['message'].decode('iso-8859-15')) + video.url = values['url'] + + return video + diff --git a/weboob/backends/nolifetv/test.py b/weboob/backends/nolifetv/test.py new file mode 100644 index 00000000..531d002f --- /dev/null +++ b/weboob/backends/nolifetv/test.py @@ -0,0 +1,37 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2011 Romain Bignon +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +from weboob.tools.test import BackendTest +from .pages.video import ForbiddenVideo + +class NolifeTVTest(BackendTest): + BACKEND = 'nolifetv' + + def test_nolife(self): + l = list(self.backend.iter_search_results('nolife')) + self.assertTrue(len(l) > 0) + for v in l: + try: + self.backend.fillobj(v, ('url',)) + except ForbiddenVideo: + continue + else: + self.assertTrue(v.url and v.url.startswith('http://'), 'URL for video "%s" not found: %s' % (v.id, v.url)) + break diff --git a/weboob/backends/nolifetv/video.py b/weboob/backends/nolifetv/video.py new file mode 100644 index 00000000..d8c276d1 --- /dev/null +++ b/weboob/backends/nolifetv/video.py @@ -0,0 +1,34 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2011 Romain Bignon +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +from weboob.capabilities.video import BaseVideo + + +__all__ = ['NolifeTVVideo'] + + +class NolifeTVVideo(BaseVideo): + def __init__(self, *args, **kwargs): + BaseVideo.__init__(self, *args, **kwargs) + self.ext = 'mp4' + + @classmethod + def id2url(cls, _id): + return 'http://online.nolife-tv.com/index.php?id=%s' % _id