diff --git a/modules/trictractv/__init__.py b/modules/trictractv/__init__.py new file mode 100644 index 00000000..ee002cbd --- /dev/null +++ b/modules/trictractv/__init__.py @@ -0,0 +1,3 @@ +from .backend import TricTracTVBackend + +__all__ = ['TricTracTVBackend'] diff --git a/modules/trictractv/backend.py b/modules/trictractv/backend.py new file mode 100644 index 00000000..7dad975e --- /dev/null +++ b/modules/trictractv/backend.py @@ -0,0 +1,62 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2011 Romain Bignon +# Copyright(C) 2012 Benjamin Drieu +# +# This file is *not yet* part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +from __future__ import with_statement + +from weboob.capabilities.video import ICapVideo +from weboob.tools.backend import BaseBackend + +from .browser import TricTracTVBrowser +from .video import TricTracTVVideo + + +__all__ = ['TricTracTVBackend'] + + +class TricTracTVBackend(BaseBackend, ICapVideo): + NAME = 'trictractv' + MAINTAINER = 'Benjamin Drieu' + EMAIL = 'benjamin@drieu.org' + VERSION = '0.c' + DESCRIPTION = u'TricTrac.tv video website' + LICENSE = 'AGPLv3+' + BROWSER = TricTracTVBrowser + + def get_video(self, _id): + with self.browser: + return self.browser.get_video(_id) + + def search_videos(self, pattern=None, sortby=ICapVideo.SEARCH_RELEVANCE, nsfw=False, max_results=None): + with self.browser: + return self.browser.search_videos(pattern) + + def fill_video(self, video, fields): + if fields != ['thumbnail']: + # if we don't want only the thumbnail, we probably want also every fields + with self.browser: + video = self.browser.get_video(TricTracTVVideo.id2url(video.id), video) + if 'thumbnail' in fields and video.thumbnail: + with self.browser: + video.thumbnail.data = self.browser.readurl(video.thumbnail.url) + + return video + + OBJECTS = {TricTracTVVideo: fill_video} diff --git a/modules/trictractv/browser.py b/modules/trictractv/browser.py new file mode 100644 index 00000000..b7ba8a30 --- /dev/null +++ b/modules/trictractv/browser.py @@ -0,0 +1,78 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2011-2012 Romain Bignon, Laurent Bachelier, Benjamin Drieu +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + +import re + +from weboob.tools.browser import BaseBrowser +from weboob.tools.browser.decorators import id2url + +from .pages import IndexPage, VideoPage +from .video import TricTracTVVideo + + +__all__ = ['TricTracTVBrowser'] + + +class TricTracTVBrowser(BaseBrowser): + DOMAIN = 'trictrac.tv' + ENCODING = 'ISO-8859-1' + PAGES = {r'http://[w\.]*trictrac.tv/': IndexPage, + r'http://[w\.]*trictrac.tv/home/listing.php.*': IndexPage, + r'http://[w\.]*trictrac.tv/video-(.+)': VideoPage, + } + + @id2url(TricTracTVVideo.id2url) + def get_video(self, url, video=None): + self.location(url) + assert self.is_on_page(VideoPage) + + _id = self.page.get_id() + if video is None: + video = TricTracTVVideo(_id) + + infourl = self.page.get_info_url() + if infourl is not None: + self.parse_info(self.openurl(infourl).read(), video) + + return video + + def home(self): + self.location(self.buildurl('http://www.trictrac.tv/home/listing.php', mot='%')) + + def search_videos(self, pattern): + if not pattern: + self.home() + else: + self.location(self.buildurl('http://www.trictrac.tv/home/listing.php', mot=pattern.encode('utf-8'))) + + assert self.is_on_page(IndexPage) + return self.page.iter_videos() + + def parse_info(self, data, video): + m = re.match ( '.*fichier=(.*?)&', data ) + video.url = unicode ( r'http://src.povcon.net/videos/%s' % m.group ( 1 ) ) + + video.description = self.page.get_descriptif() + video.duration = self.page.get_duration() + video.title = self.page.get_title() + video.date = self.page.get_date() + video.rating = self.page.get_rating() + video.rating_max = 5 + + return video diff --git a/modules/trictractv/pages.py b/modules/trictractv/pages.py new file mode 100644 index 00000000..d8681c71 --- /dev/null +++ b/modules/trictractv/pages.py @@ -0,0 +1,124 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2011-2012 Romain Bignon, Laurent Bachelier +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + +import datetime +import re + +from weboob.tools.capabilities.thumbnail import Thumbnail +from weboob.tools.browser import BasePage, BrokenPageError + + +from .video import TricTracTVVideo + + +__all__ = ['IndexPage', 'VideoPage'] + + +class IndexPage(BasePage): + def iter_videos(self): + for div in self.parser.select(self.document.getroot(), 'li#contentsearch'): + title = self.parser.select(div, '#titlesearch span', 1) + + a = self.parser.select(div, 'a', 1) + url = a.attrib['href'] + m = re.match('/video-(.*)', url) + if not m: + print ':( %s' % url + continue + _id = m.group(1) + video = TricTracTVVideo(_id) + video.title = unicode(title.text) + + url = self.parser.select(div, 'img', 1).attrib['src'] + stars = self.parser.select(div, '.etoile_on') + video.rating = len(stars) + video.rating_max = 5 + + video.thumbnail = Thumbnail ( unicode ( 'http://www.trictrac.tv/%s' % url ) ) + + yield video + + +class VideoPage(BasePage): + def on_loaded(self): + p = self.parser.select(self.document.getroot(), 'p.alert') + if len(p) > 0: + raise Exception(p[0].text) + + def get_info_url(self): + try: + div = self.parser.select(self.document.getroot(), '#Content_Video object', 1) + except BrokenPageError: + return None + else: + for param in self.parser.select(div, 'param', None): + if param.get('name') == 'flashvars': + m = re.match('varplaymedia=([0-9]*)', param.attrib['value']) + if m: + return r'http://www.trictrac.tv/swf/listelement.php?idfile=%s' % m.group(1) + + def get_title(self): + try: + title = self.parser.select(self.document.getroot(), 'title', 1) + except BrokenPageError: + return None + else: + return title.text + + def get_descriptif(self): + try: + descriptif = self.parser.select(self.document.getroot(), '.video_descriptif p', 1) + except BrokenPageError: + return None + else: + return descriptif.text + + def get_duration(self): + try: + details = self.parser.select(self.document.getroot(), 'div#video_detail div') + except BrokenPageError: + return None + else: + duration = details[2] + hours, minutes, seconds = duration.text [ duration.text.find(':') : ] . split(':') + if len(hours) > 0: + return datetime.timedelta(hours=int(hours), minutes=int(minutes), seconds=int(seconds)) + else: + return datetime.timedelta(minutes=int(minutes), seconds=int(seconds)) + + def get_date(self): + try: + date = self.parser.select(self.document.getroot(), 'div#video_detail div.date', 1) + except BrokenPageError: + return None + else: + string = date.text + string = string [ string.rfind('le ') : ] + return datetime.datetime.strptime(string, 'le %d %b %Y, %H:%M:%S') + + def get_rating(self): + try: + stars = self.parser.select(self.document.getroot(), '#video_info .etoile_on') + except BrokenPageError: + return None + else: + return len(stars) + + def get_id(self): + return self.groups[0] diff --git a/modules/trictractv/test.py b/modules/trictractv/test.py new file mode 100644 index 00000000..1fa835fb --- /dev/null +++ b/modules/trictractv/test.py @@ -0,0 +1,31 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2011-2012 Romain Bignon, Laurent Bachelier +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +from weboob.tools.test import BackendTest + +class TricTracTVTest(BackendTest): + BACKEND = 'trictractv' + + def test_trictractv(self): + l = list(self.backend.search_videos('TricTrac')) + self.assertTrue(len(l) > 0) + v = l[0] + self.backend.fillobj(v, ('url',)) + self.assertTrue(v.url and v.url.startswith('http://'), 'URL for video "%s" not found: %s' % (v.id, v.url)) diff --git a/modules/trictractv/video.py b/modules/trictractv/video.py new file mode 100644 index 00000000..8e6e1d56 --- /dev/null +++ b/modules/trictractv/video.py @@ -0,0 +1,34 @@ +# -*- coding: utf-8 -*- + +# Copyright(C) 2011 Romain Bignon +# +# This file is part of weboob. +# +# weboob is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# weboob is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with weboob. If not, see . + + +from weboob.capabilities.video import BaseVideo + + +__all__ = ['TricTracTVVideo'] + + +class TricTracTVVideo(BaseVideo): + def __init__(self, *args, **kwargs): + BaseVideo.__init__(self, *args, **kwargs) + self.ext = u'flv' + + @classmethod + def id2url(cls, _id): + return 'http://www.trictrac.tv/video-%s' % _id