new backend pluzz (closes #426)

2011-06-25 12:58:23 +02:00 · 2011-06-25 12:58:23 +02:00 · 73ec029b43
commit 73ec029b43
parent aa6772a32b
6 changed files with 269 additions and 0 deletions
--- a/weboob/backends/pluzz/init.py
+++ b/weboob/backends/pluzz/init.py
@ -0,0 +1,3 @@
+from .backend import PluzzBackend
+
+__all__ = ['PluzzBackend']
--- a/weboob/backends/pluzz/backend.py
+++ b/weboob/backends/pluzz/backend.py
@ -0,0 +1,61 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2011  Romain Bignon
+#
+# This file is part of weboob.
+#
+# weboob is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# weboob is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with weboob. If not, see <http://www.gnu.org/licenses/>.
+
+
+from __future__ import with_statement
+
+from weboob.capabilities.video import ICapVideo
+from weboob.tools.backend import BaseBackend
+
+from .browser import PluzzBrowser
+from .video import PluzzVideo
+
+
+__all__ = ['PluzzBackend']
+
+
+class PluzzBackend(BaseBackend, ICapVideo):
+    NAME = 'pluzz'
+    MAINTAINER = 'Romain Bignon'
+    EMAIL = 'romain@weboob.org'
+    VERSION = '0.9'
+    DESCRIPTION = 'France Television video website'
+    LICENSE = 'AGPLv3+'
+    BROWSER = PluzzBrowser
+
+    def get_video(self, _id):
+        with self.browser:
+            return self.browser.get_video(_id)
+
+    def iter_search_results(self, pattern=None, sortby=ICapVideo.SEARCH_RELEVANCE, nsfw=False, max_results=None):
+        with self.browser:
+            return self.browser.iter_search_results(pattern)
+
+    def fill_video(self, video, fields):
+        if fields != ['thumbnail']:
+            # if we don't want only the thumbnail, we probably want also every fields
+            with self.browser:
+                video = self.browser.get_video(PluzzVideo.id2url(video.id), video)
+        if 'thumbnail' in fields and video.thumbnail:
+            with self.browser:
+                video.thumbnail.data = self.browser.readurl(video.thumbnail.url)
+
+        return video
+
+    OBJECTS = {PluzzVideo: fill_video}
--- a/weboob/backends/pluzz/browser.py
+++ b/weboob/backends/pluzz/browser.py
@ -0,0 +1,61 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2011  Romain Bignon
+#
+# This file is part of weboob.
+#
+# weboob is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# weboob is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with weboob. If not, see <http://www.gnu.org/licenses/>.
+
+from urllib import quote_plus
+
+from weboob.tools.browser import BaseBrowser
+from weboob.tools.browser.decorators import id2url
+
+from .pages import IndexPage, VideoPage, MetaVideoPage
+from .video import PluzzVideo
+
+
+__all__ = ['PluzzBrowser']
+
+
+class PluzzBrowser(BaseBrowser):
+    DOMAIN = 'pluzz.fr'
+    ENCODING = None
+    PAGES = {r'http://[w\.]*pluzz.fr/?': IndexPage,
+             r'http://[w\.]*pluzz.fr/recherche.html.*': IndexPage,
+             r'http://[w\.]*pluzz.fr/[-\w]+/.*': IndexPage,
+             r'http://[w\.]*pluzz.fr/((?!recherche).+)\.html': VideoPage,
+             r'http://info\.francetelevisions\.fr/\?id-video=.*': MetaVideoPage,
+            }
+
+    @id2url(PluzzVideo.id2url)
+    def get_video(self, url, video=None):
+        self.location(url)
+        assert self.is_on_page(VideoPage)
+
+        metaurl = self.page.get_meta_url()
+        id = self.page.get_id()
+        self.location(metaurl)
+        assert self.is_on_page(MetaVideoPage)
+
+        return self.page.get_video(id, video)
+
+    def iter_search_results(self, pattern):
+        if not pattern:
+            self.home()
+        else:
+            self.location(self.buildurl('recherche.html', q=pattern))
+
+        assert self.is_on_page(IndexPage)
+        return self.page.iter_videos()
--- a/weboob/backends/pluzz/pages.py
+++ b/weboob/backends/pluzz/pages.py
@ -0,0 +1,79 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2011  Romain Bignon
+#
+# This file is part of weboob.
+#
+# weboob is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# weboob is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with weboob. If not, see <http://www.gnu.org/licenses/>.
+
+import datetime
+import urllib
+import re
+
+from weboob.tools.capabilities.thumbnail import Thumbnail
+from weboob.capabilities.base import NotAvailable
+from weboob.tools.misc import html2text
+from weboob.tools.browser import BasePage, BrokenPageError
+
+
+from .video import PluzzVideo
+
+
+__all__ = ['IndexPage', 'VideoPage']
+
+
+class IndexPage(BasePage):
+    def iter_videos(self):
+        for div in self.parser.select(self.document.getroot(), 'li.vignette'):
+            url = self.parser.select(div, 'h4 a', 1).attrib['href']
+            m = re.match('http://www.pluzz.fr/([^/]+).html', url)
+            if not m:
+                print ':('
+                continue
+            _id = m.group(1)
+            video = PluzzVideo(_id)
+            video.title = self.parser.select(div, 'h4 a', 1).text
+            m = re.match('(\d+)/(\d+)/(\d+)', self.parser.select(div, 'p.date', 1).text)
+            if m:
+                video.date = datetime.datetime(int(m.group(3)),
+                                               int(m.group(2)),
+                                               int(m.group(1)))
+            url = self.parser.select(div, 'img.illustration', 1).attrib['src']
+            video.thumbnail = Thumbnail('http://www.pluzz.fr/%s' % url)
+
+            yield video
+
+class VideoPage(BasePage):
+    def get_meta_url(self):
+        div = self.parser.select(self.document.getroot(), 'a#current_video', 1)
+        return div.attrib['href']
+
+    def get_id(self):
+        return self.groups[0]
+
+class MetaVideoPage(BasePage):
+    def get_meta(self, name):
+        return self.parser.select(self.document.getroot(), 'meta[name=%s]' % name, 1).attrib['content']
+
+    def get_video(self, id, video=None):
+        if video is None:
+            video = PluzzVideo(id)
+
+        video.title = self.get_meta('vignette-titre-court')
+        video.url = 'mms://videozones.francetv.fr/%s' % self.get_meta('urls-url-video')
+        video.description = self.get_meta('description')
+        hours, minutes, seconds = self.get_meta('vignette-duree').split(':')
+        video.duration = datetime.timedelta(hours=int(hours), minutes=int(minutes), seconds=int(seconds))
+
+        return video
--- a/weboob/backends/pluzz/test.py
+++ b/weboob/backends/pluzz/test.py
@ -0,0 +1,31 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2011  Romain Bignon
+#
+# This file is part of weboob.
+#
+# weboob is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# weboob is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with weboob. If not, see <http://www.gnu.org/licenses/>.
+
+
+from weboob.tools.test import BackendTest
+
+class PluzzTest(BackendTest):
+    BACKEND = 'pluzz'
+
+    def test_pluzz(self):
+        l = list(self.backend.iter_search_results('jt'))
+        self.assertTrue(len(l) > 0)
+        v = l[0]
+        self.backend.fillobj(v, ('url',))
+        self.assertTrue(v.url and v.url.startswith('mms://'), 'URL for video "%s" not found: %s' % (v.id, v.url))
--- a/weboob/backends/pluzz/video.py
+++ b/weboob/backends/pluzz/video.py
@ -0,0 +1,34 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2011  Romain Bignon
+#
+# This file is part of weboob.
+#
+# weboob is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# weboob is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with weboob. If not, see <http://www.gnu.org/licenses/>.
+
+
+from weboob.capabilities.video import BaseVideo
+
+
+__all__ = ['PluzzVideo']
+
+
+class PluzzVideo(BaseVideo):
+    def __init__(self, *args, **kwargs):
+        BaseVideo.__init__(self, *args, **kwargs)
+        self.ext = 'wmv'
+
+    @classmethod
+    def id2url(cls, _id):
+        return 'http://www.pluzz.fr/%s.html' % _id