diff --git a/modules/cappedtv/__init__.py b/modules/cappedtv/__init__.py new file mode 100644 index 00000000..3030b9c1 --- /dev/null +++ b/modules/cappedtv/__init__.py @@ -0,0 +1,5 @@ +# -*- coding: utf-8 -*- + +from .backend import CappedBackend + +__all__ = ['CappedBackend'] diff --git a/modules/cappedtv/backend.py b/modules/cappedtv/backend.py new file mode 100644 index 00000000..88158614 --- /dev/null +++ b/modules/cappedtv/backend.py @@ -0,0 +1,47 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2012 Lord +# +# This module is free software. It comes without any warranty, to +# the extent permitted by applicable law. You can redistribute it +# and/or modify it under the terms of the Do What The Fuck You Want +# To Public License, Version 2, as published by Sam Hocevar. See +# http://sam.zoy.org/wtfpl/COPYING for more details. + + +from weboob.capabilities.video import ICapVideo +from weboob.tools.backend import BaseBackend +from .browser import CappedBrowser,CappedVideo + + +__all__ = ['CappedBackend'] + + +class CappedBackend(BaseBackend, ICapVideo): + NAME = 'cappedtv' + MAINTAINER = 'Lord' + EMAIL = 'lord@lordtoniok.com' + VERSION = '0.b' + DESCRIPTION = 'Capped.tv demoscene website' + LICENSE = 'WTFPLv2' + BROWSER = CappedBrowser + + def get_video(self, _id): + with self.browser: + return self.browser.get_video(_id) + + def iter_search_results(self, pattern=None, sortby=ICapVideo.SEARCH_RELEVANCE, nsfw=None, max_results=None): + with self.browser: + return self.browser.iter_search_results(pattern) + + def fill_video(self, video, fields): + if fields != ['thumbnail']: + with self.browser: + video = self.browser.get_video(CappedVideo.id2url(video.id), video) + if 'thumbnail' in fields and video.thumbnail: + with self.browser: + video.thumbnail.data = self.browser.readurl(video.thumbnail.url) + + return video + + OBJECTS = {CappedVideo: fill_video} diff --git a/modules/cappedtv/browser.py b/modules/cappedtv/browser.py new file mode 100644 index 00000000..32df6aac --- /dev/null +++ b/modules/cappedtv/browser.py @@ -0,0 +1,116 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2012 Lord +# +# This module is free software. It comes without any warranty, to +# the extent permitted by applicable law. You can redistribute it +# and/or modify it under the terms of the Do What The Fuck You Want +# To Public License, Version 2, as published by Sam Hocevar. See +# http://sam.zoy.org/wtfpl/COPYING for more details. + + +import urllib +import datetime +from weboob.capabilities.base import NotAvailable +from weboob.tools.misc import to_unicode +from weboob.tools.browser import BasePage +from weboob.tools.browser import BrokenPageError +from weboob.tools.browser import BaseBrowser +from weboob.tools.browser.decorators import id2url +from weboob.capabilities.video import BaseVideo +from weboob.tools.ordereddict import OrderedDict + + +__all__ = ['CappedBrowser'] + + +class CappedVideo(BaseVideo): + def __init__(self, *args, **kwargs): + BaseVideo.__init__(self, *args, **kwargs) + self.nsfw = False + self.ext = 'mp4' + + @classmethod + def id2url(cls, _id): + return 'http://capped.tv/%s' % _id + + +# parser for search pages +class IndexPage(BasePage): + def iter_videos(self): + #Extracting meta data from results page + vidbackdrop_list = self.parser.select(self.document.getroot(), 'div.vidBackdrop ') + for vidbackdrop in vidbackdrop_list: + url = self.parser.select(vidbackdrop, 'a', 1).attrib['href'] + _id = url[2:] + title = self.parser.select(vidbackdrop, 'div.vidTitle a', 1).text + author = self.parser.select(vidbackdrop, 'div.vidAuthor a', 1).text + thumbnail_url = 'http://cdn.capped.tv/pre/%s.png' % _id + #we get the description field + duration_tmp = self.parser.select(vidbackdrop, 'div.vidInfo', 1) + #we remove tabs and spaces + duration_tmp2 = duration_tmp.text[7:] + #we remove all fields exept time + duration_tmp3 = duration_tmp2.split(' ')[0] + #we transform it in datetime format + parts = duration_tmp3.split(':') + if len(parts) == 1: + hours = minutes = 0 + seconds = parts[0] + elif len(parts) == 2: + hours = 0 + minutes , seconds = parts + elif len(parts) == 3: + hours, minutes, seconds = parts + else: + raise BrokenPageError('Unable to parse duration %r' % duration_tmp) + + yield CappedVideo(_id=_id, title=title, author=author, thumbnail_url=thumbnail_url, duration=datetime.timedelta(hours=int(hours), minutes=int(minutes), seconds=int(seconds))) + + + +# parser for the video page +class VideoPage(BasePage): + def get_video(self, video=None): + _id = to_unicode(self.group_dict['id']) + if video is None: + video = CappedVideo(_id) + title_tmp = self.parser.select(self.document.getroot(), 'title', 1) + video.title = to_unicode(title_tmp.text.strip()) + + # Videopages doesn't have duration information (only results pages) + video.duration = NotAvailable + video.url = 'http://cdn.capped.tv/vhq/%s.mp4' % _id + return video + + +class CappedBrowser(BaseBrowser): + DOMAIN = 'capped.tv' + PROTOCOL = 'http' + ENCODING = None + PAGES = OrderedDict(( + (r'http://capped\.tv', IndexPage), + (r'http://capped\.tv/newest', IndexPage), + (r'http://capped\.tv/mostviews', IndexPage), + (r'http://capped\.tv/leastviews', IndexPage), + (r'http://capped\.tv/monthtop', IndexPage), + (r'http://capped\.tv/monthbottom', IndexPage), + (r'http://capped\.tv/alpha', IndexPage), + (r'http://capped\.tv/ahpla', IndexPage), + (r'http://capped\.tv/search\?s\=(?P.+)', IndexPage), + (r'http://capped\.tv/(?P.+)', VideoPage), + )) + + @id2url(CappedVideo.id2url) + def get_video(self, url, video=None): + self.location(url) + assert self.is_on_page(VideoPage), 'Should be on video page.' + return self.page.get_video(video) + + def iter_search_results(self,pattern): + if not pattern: + self.home() + else: + self.location('/search?s=%s' % (urllib.quote_plus(pattern.encode('utf-8')))) + assert self.is_on_page(IndexPage) + return self.page.iter_videos() diff --git a/modules/cappedtv/favicon.png b/modules/cappedtv/favicon.png new file mode 100755 index 00000000..d37866d5 Binary files /dev/null and b/modules/cappedtv/favicon.png differ diff --git a/modules/cappedtv/test.py b/modules/cappedtv/test.py new file mode 100644 index 00000000..5bd4e30e --- /dev/null +++ b/modules/cappedtv/test.py @@ -0,0 +1,27 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2012 Lord +# +# This module is free software. It comes without any warranty, to +# the extent permitted by applicable law. You can redistribute it +# and/or modify it under the terms of the Do What The Fuck You Want +# To Public License, Version 2, as published by Sam Hocevar. See +# http://sam.zoy.org/wtfpl/COPYING for more details. + + +from weboob.tools.test import BackendTest + + +__all__ = ['CappedTest'] + + +class CappedTest(BackendTest): + BACKEND = 'cappedtv' + + def test_capped(self): + l = list(self.backend.iter_search_results('kewlers')) + self.assertTrue(len(l) > 0) + v = l[0] + self.backend.fillobj(v, ('url',)) + self.assertTrue(v.url and v.url.startwith('http://'), 'URL for video "%s" not found' % (v.id, v.url)) + self.backend.browser.openurl(v.url)