From c45cddc3372c6201deed40c187992cbe413bb103 Mon Sep 17 00:00:00 2001 From: Lord Date: Mon, 12 Mar 2012 10:54:37 +0100 Subject: [PATCH] add module cappedtv Signed-off-by: Lord Signed-off-by: Romain Bignon --- modules/cappedtv/__init__.py | 5 ++ modules/cappedtv/backend.py | 47 ++++++++++++++ modules/cappedtv/browser.py | 116 +++++++++++++++++++++++++++++++++++ modules/cappedtv/favicon.png | Bin 0 -> 721 bytes modules/cappedtv/test.py | 27 ++++++++ 5 files changed, 195 insertions(+) create mode 100644 modules/cappedtv/__init__.py create mode 100644 modules/cappedtv/backend.py create mode 100644 modules/cappedtv/browser.py create mode 100755 modules/cappedtv/favicon.png create mode 100644 modules/cappedtv/test.py diff --git a/modules/cappedtv/__init__.py b/modules/cappedtv/__init__.py new file mode 100644 index 00000000..3030b9c1 --- /dev/null +++ b/modules/cappedtv/__init__.py @@ -0,0 +1,5 @@ +# -*- coding: utf-8 -*- + +from .backend import CappedBackend + +__all__ = ['CappedBackend'] diff --git a/modules/cappedtv/backend.py b/modules/cappedtv/backend.py new file mode 100644 index 00000000..88158614 --- /dev/null +++ b/modules/cappedtv/backend.py @@ -0,0 +1,47 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2012 Lord +# +# This module is free software. It comes without any warranty, to +# the extent permitted by applicable law. You can redistribute it +# and/or modify it under the terms of the Do What The Fuck You Want +# To Public License, Version 2, as published by Sam Hocevar. See +# http://sam.zoy.org/wtfpl/COPYING for more details. + + +from weboob.capabilities.video import ICapVideo +from weboob.tools.backend import BaseBackend +from .browser import CappedBrowser,CappedVideo + + +__all__ = ['CappedBackend'] + + +class CappedBackend(BaseBackend, ICapVideo): + NAME = 'cappedtv' + MAINTAINER = 'Lord' + EMAIL = 'lord@lordtoniok.com' + VERSION = '0.b' + DESCRIPTION = 'Capped.tv demoscene website' + LICENSE = 'WTFPLv2' + BROWSER = CappedBrowser + + def get_video(self, _id): + with self.browser: + return self.browser.get_video(_id) + + def iter_search_results(self, pattern=None, sortby=ICapVideo.SEARCH_RELEVANCE, nsfw=None, max_results=None): + with self.browser: + return self.browser.iter_search_results(pattern) + + def fill_video(self, video, fields): + if fields != ['thumbnail']: + with self.browser: + video = self.browser.get_video(CappedVideo.id2url(video.id), video) + if 'thumbnail' in fields and video.thumbnail: + with self.browser: + video.thumbnail.data = self.browser.readurl(video.thumbnail.url) + + return video + + OBJECTS = {CappedVideo: fill_video} diff --git a/modules/cappedtv/browser.py b/modules/cappedtv/browser.py new file mode 100644 index 00000000..32df6aac --- /dev/null +++ b/modules/cappedtv/browser.py @@ -0,0 +1,116 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2012 Lord +# +# This module is free software. It comes without any warranty, to +# the extent permitted by applicable law. You can redistribute it +# and/or modify it under the terms of the Do What The Fuck You Want +# To Public License, Version 2, as published by Sam Hocevar. See +# http://sam.zoy.org/wtfpl/COPYING for more details. + + +import urllib +import datetime +from weboob.capabilities.base import NotAvailable +from weboob.tools.misc import to_unicode +from weboob.tools.browser import BasePage +from weboob.tools.browser import BrokenPageError +from weboob.tools.browser import BaseBrowser +from weboob.tools.browser.decorators import id2url +from weboob.capabilities.video import BaseVideo +from weboob.tools.ordereddict import OrderedDict + + +__all__ = ['CappedBrowser'] + + +class CappedVideo(BaseVideo): + def __init__(self, *args, **kwargs): + BaseVideo.__init__(self, *args, **kwargs) + self.nsfw = False + self.ext = 'mp4' + + @classmethod + def id2url(cls, _id): + return 'http://capped.tv/%s' % _id + + +# parser for search pages +class IndexPage(BasePage): + def iter_videos(self): + #Extracting meta data from results page + vidbackdrop_list = self.parser.select(self.document.getroot(), 'div.vidBackdrop ') + for vidbackdrop in vidbackdrop_list: + url = self.parser.select(vidbackdrop, 'a', 1).attrib['href'] + _id = url[2:] + title = self.parser.select(vidbackdrop, 'div.vidTitle a', 1).text + author = self.parser.select(vidbackdrop, 'div.vidAuthor a', 1).text + thumbnail_url = 'http://cdn.capped.tv/pre/%s.png' % _id + #we get the description field + duration_tmp = self.parser.select(vidbackdrop, 'div.vidInfo', 1) + #we remove tabs and spaces + duration_tmp2 = duration_tmp.text[7:] + #we remove all fields exept time + duration_tmp3 = duration_tmp2.split(' ')[0] + #we transform it in datetime format + parts = duration_tmp3.split(':') + if len(parts) == 1: + hours = minutes = 0 + seconds = parts[0] + elif len(parts) == 2: + hours = 0 + minutes , seconds = parts + elif len(parts) == 3: + hours, minutes, seconds = parts + else: + raise BrokenPageError('Unable to parse duration %r' % duration_tmp) + + yield CappedVideo(_id=_id, title=title, author=author, thumbnail_url=thumbnail_url, duration=datetime.timedelta(hours=int(hours), minutes=int(minutes), seconds=int(seconds))) + + + +# parser for the video page +class VideoPage(BasePage): + def get_video(self, video=None): + _id = to_unicode(self.group_dict['id']) + if video is None: + video = CappedVideo(_id) + title_tmp = self.parser.select(self.document.getroot(), 'title', 1) + video.title = to_unicode(title_tmp.text.strip()) + + # Videopages doesn't have duration information (only results pages) + video.duration = NotAvailable + video.url = 'http://cdn.capped.tv/vhq/%s.mp4' % _id + return video + + +class CappedBrowser(BaseBrowser): + DOMAIN = 'capped.tv' + PROTOCOL = 'http' + ENCODING = None + PAGES = OrderedDict(( + (r'http://capped\.tv', IndexPage), + (r'http://capped\.tv/newest', IndexPage), + (r'http://capped\.tv/mostviews', IndexPage), + (r'http://capped\.tv/leastviews', IndexPage), + (r'http://capped\.tv/monthtop', IndexPage), + (r'http://capped\.tv/monthbottom', IndexPage), + (r'http://capped\.tv/alpha', IndexPage), + (r'http://capped\.tv/ahpla', IndexPage), + (r'http://capped\.tv/search\?s\=(?P.+)', IndexPage), + (r'http://capped\.tv/(?P.+)', VideoPage), + )) + + @id2url(CappedVideo.id2url) + def get_video(self, url, video=None): + self.location(url) + assert self.is_on_page(VideoPage), 'Should be on video page.' + return self.page.get_video(video) + + def iter_search_results(self,pattern): + if not pattern: + self.home() + else: + self.location('/search?s=%s' % (urllib.quote_plus(pattern.encode('utf-8')))) + assert self.is_on_page(IndexPage) + return self.page.iter_videos() diff --git a/modules/cappedtv/favicon.png b/modules/cappedtv/favicon.png new file mode 100755 index 0000000000000000000000000000000000000000..d37866d598306164ea306142ae8c7462e15d13d1 GIT binary patch literal 721 zcmV;?0xtcDP)00001b5ch_0Itp) z=>Px#1ZP1_K>z@;j|==^1poj532;bRa{vGi!vFvd!vV){sAK>D0%l1>K~!i%?U})9 z!!Qs;^XEAP9}GF=9 z@t+TXtra{JI23SZfN7!2<B1i&A9N^-y7J*F}PNc7V*)w`wUKRpaK0BiKivfOSrskydoACE@?Mv5M*Y#YEZ z422+KxP?#(B<}!N*Hr`5#GM5v7eI5YUqz*Fx(9Z)D5CZ`HJg_l9*L9Q#oRi=RU}tb=xO%t& zfZhv*Pn41f&nIgG0KFGVw>9|dq5**33uW6H=1Btpy%(Z*(gol=luDI%I|;>; zHUKTUQ-rsE*pkd?(LEwix0!fPF1S0CXu!F0FUhlmd`!A^ 0) + v = l[0] + self.backend.fillobj(v, ('url',)) + self.assertTrue(v.url and v.url.startwith('http://'), 'URL for video "%s" not found' % (v.id, v.url)) + self.backend.browser.openurl(v.url)