Add module TricTrac.tv

Signed-off-by: Benjamin Drieu <bdrieu@april.org> Signed-off-by: Romain Bignon <romain@symlink.me>
2012-05-03 18:20:39 +02:00 · 2012-05-03 18:20:39 +02:00 · c20bb4e4c0
commit c20bb4e4c0
parent c498d41aef
6 changed files with 332 additions and 0 deletions
--- a/modules/trictractv/init.py
+++ b/modules/trictractv/init.py
@ -0,0 +1,3 @@
+from .backend import TricTracTVBackend
+
+__all__ = ['TricTracTVBackend']
--- a/modules/trictractv/backend.py
+++ b/modules/trictractv/backend.py
@ -0,0 +1,62 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2011  Romain Bignon
+# Copyright(C) 2012  Benjamin Drieu
+#
+# This file is *not yet* part of weboob.
+#
+# weboob is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# weboob is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with weboob. If not, see <http://www.gnu.org/licenses/>.
+
+
+from __future__ import with_statement
+
+from weboob.capabilities.video import ICapVideo
+from weboob.tools.backend import BaseBackend
+
+from .browser import TricTracTVBrowser
+from .video import TricTracTVVideo
+
+
+__all__ = ['TricTracTVBackend']
+
+
+class TricTracTVBackend(BaseBackend, ICapVideo):
+    NAME = 'trictractv'
+    MAINTAINER = 'Benjamin Drieu'
+    EMAIL = 'benjamin@drieu.org'
+    VERSION = '0.c'
+    DESCRIPTION = u'TricTrac.tv video website'
+    LICENSE = 'AGPLv3+'
+    BROWSER = TricTracTVBrowser
+
+    def get_video(self, _id):
+        with self.browser:
+            return self.browser.get_video(_id)
+
+    def search_videos(self, pattern=None, sortby=ICapVideo.SEARCH_RELEVANCE, nsfw=False, max_results=None):
+        with self.browser:
+            return self.browser.search_videos(pattern)
+
+    def fill_video(self, video, fields):
+        if fields != ['thumbnail']:
+            # if we don't want only the thumbnail, we probably want also every fields
+            with self.browser:
+                video = self.browser.get_video(TricTracTVVideo.id2url(video.id), video)
+        if 'thumbnail' in fields and video.thumbnail:
+            with self.browser:
+                video.thumbnail.data = self.browser.readurl(video.thumbnail.url)
+
+        return video
+
+    OBJECTS = {TricTracTVVideo: fill_video}
--- a/modules/trictractv/browser.py
+++ b/modules/trictractv/browser.py
@ -0,0 +1,78 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2011-2012  Romain Bignon, Laurent Bachelier, Benjamin Drieu
+#
+# This file is part of weboob.
+#
+# weboob is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# weboob is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with weboob. If not, see <http://www.gnu.org/licenses/>.
+
+import re
+
+from weboob.tools.browser import BaseBrowser
+from weboob.tools.browser.decorators import id2url
+
+from .pages import IndexPage, VideoPage
+from .video import TricTracTVVideo
+
+
+__all__ = ['TricTracTVBrowser']
+
+
+class TricTracTVBrowser(BaseBrowser):
+    DOMAIN = 'trictrac.tv'
+    ENCODING = 'ISO-8859-1'
+    PAGES = {r'http://[w\.]*trictrac.tv/': IndexPage,
+             r'http://[w\.]*trictrac.tv/home/listing.php.*': IndexPage,
+             r'http://[w\.]*trictrac.tv/video-(.+)': VideoPage,
+            }
+
+    @id2url(TricTracTVVideo.id2url)
+    def get_video(self, url, video=None):
+        self.location(url)
+        assert self.is_on_page(VideoPage)
+
+        _id = self.page.get_id()
+        if video is None:
+            video = TricTracTVVideo(_id)
+
+        infourl = self.page.get_info_url()
+        if infourl is not None:
+            self.parse_info(self.openurl(infourl).read(), video)
+
+        return video
+
+    def home(self):
+        self.location(self.buildurl('http://www.trictrac.tv/home/listing.php', mot='%'))
+
+    def search_videos(self, pattern):
+        if not pattern:
+            self.home()
+        else:
+            self.location(self.buildurl('http://www.trictrac.tv/home/listing.php', mot=pattern.encode('utf-8')))
+
+        assert self.is_on_page(IndexPage)
+        return self.page.iter_videos()
+
+    def parse_info(self, data, video):
+        m = re.match ( '.*fichier=(.*?)&', data )
+        video.url = unicode ( r'http://src.povcon.net/videos/%s' % m.group ( 1 ) )
+
+        video.description = self.page.get_descriptif()
+        video.duration = self.page.get_duration()
+        video.title = self.page.get_title()
+        video.date = self.page.get_date()
+        video.rating = self.page.get_rating()
+        video.rating_max = 5
+
+        return video
--- a/modules/trictractv/pages.py
+++ b/modules/trictractv/pages.py
@ -0,0 +1,124 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2011-2012  Romain Bignon, Laurent Bachelier
+#
+# This file is part of weboob.
+#
+# weboob is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# weboob is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with weboob. If not, see <http://www.gnu.org/licenses/>.
+
+import datetime
+import re
+
+from weboob.tools.capabilities.thumbnail import Thumbnail
+from weboob.tools.browser import BasePage, BrokenPageError
+
+
+from .video import TricTracTVVideo
+
+
+__all__ = ['IndexPage', 'VideoPage']
+
+
+class IndexPage(BasePage):
+    def iter_videos(self):
+        for div in self.parser.select(self.document.getroot(), 'li#contentsearch'):
+            title = self.parser.select(div, '#titlesearch span', 1)
+
+            a = self.parser.select(div, 'a', 1)
+            url = a.attrib['href']
+            m = re.match('/video-(.*)', url)
+            if not m:
+                print ':( %s' % url
+                continue
+            _id = m.group(1)
+            video = TricTracTVVideo(_id)
+            video.title = unicode(title.text)
+
+            url = self.parser.select(div, 'img', 1).attrib['src']
+            stars = self.parser.select(div, '.etoile_on')
+            video.rating = len(stars)
+            video.rating_max = 5
+
+            video.thumbnail = Thumbnail ( unicode ( 'http://www.trictrac.tv/%s' % url ) )
+            
+            yield video
+
+
+class VideoPage(BasePage):
+    def on_loaded(self):
+        p = self.parser.select(self.document.getroot(), 'p.alert')
+        if len(p) > 0:
+            raise Exception(p[0].text)
+
+    def get_info_url(self):
+        try:
+            div = self.parser.select(self.document.getroot(), '#Content_Video object', 1)
+        except BrokenPageError:
+            return None
+        else:
+            for param in self.parser.select(div, 'param', None):
+                if param.get('name') == 'flashvars':
+                    m = re.match('varplaymedia=([0-9]*)', param.attrib['value'])
+                    if m:
+                        return r'http://www.trictrac.tv/swf/listelement.php?idfile=%s' % m.group(1)
+
+    def get_title(self):
+        try:
+            title = self.parser.select(self.document.getroot(), 'title', 1)
+        except BrokenPageError:
+            return None
+        else:
+            return title.text
+
+    def get_descriptif(self):
+        try:
+            descriptif = self.parser.select(self.document.getroot(), '.video_descriptif p', 1)
+        except BrokenPageError:
+            return None
+        else:
+            return descriptif.text
+
+    def get_duration(self):
+        try:
+            details = self.parser.select(self.document.getroot(), 'div#video_detail div')
+        except BrokenPageError:
+            return None
+        else:
+            duration = details[2]
+            hours, minutes, seconds = duration.text [ duration.text.find(':') : ] . split(':')
+            if len(hours) > 0:
+                return datetime.timedelta(hours=int(hours), minutes=int(minutes), seconds=int(seconds))
+            else:
+                return datetime.timedelta(minutes=int(minutes), seconds=int(seconds))
+
+    def get_date(self):
+        try:
+            date = self.parser.select(self.document.getroot(), 'div#video_detail div.date', 1)
+        except BrokenPageError:
+            return None
+        else:
+            string = date.text
+            string = string [ string.rfind('le ') : ]
+            return datetime.datetime.strptime(string, 'le %d %b %Y, %H:%M:%S')
+
+    def get_rating(self):
+        try:
+            stars = self.parser.select(self.document.getroot(), '#video_info .etoile_on')
+        except BrokenPageError:
+            return None
+        else:
+            return len(stars)
+
+    def get_id(self):
+        return self.groups[0]
--- a/modules/trictractv/test.py
+++ b/modules/trictractv/test.py
@ -0,0 +1,31 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2011-2012  Romain Bignon, Laurent Bachelier
+#
+# This file is part of weboob.
+#
+# weboob is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# weboob is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with weboob. If not, see <http://www.gnu.org/licenses/>.
+
+
+from weboob.tools.test import BackendTest
+
+class TricTracTVTest(BackendTest):
+    BACKEND = 'trictractv'
+
+    def test_trictractv(self):
+        l = list(self.backend.search_videos('TricTrac'))
+        self.assertTrue(len(l) > 0)
+        v = l[0]
+        self.backend.fillobj(v, ('url',))
+        self.assertTrue(v.url and v.url.startswith('http://'), 'URL for video "%s" not found: %s' % (v.id, v.url))
--- a/modules/trictractv/video.py
+++ b/modules/trictractv/video.py
@ -0,0 +1,34 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2011  Romain Bignon
+#
+# This file is part of weboob.
+#
+# weboob is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# weboob is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with weboob. If not, see <http://www.gnu.org/licenses/>.
+
+
+from weboob.capabilities.video import BaseVideo
+
+
+__all__ = ['TricTracTVVideo']
+
+
+class TricTracTVVideo(BaseVideo):
+    def __init__(self, *args, **kwargs):
+        BaseVideo.__init__(self, *args, **kwargs)
+        self.ext = u'flv'
+
+    @classmethod
+    def id2url(cls, _id):
+        return 'http://www.trictrac.tv/video-%s' % _id