support repositories to manage backends (closes #747)

2012-01-03 12:10:21 +01:00 · 2012-01-03 12:10:21 +01:00 · 14a7a1d362
commit 14a7a1d362
parent ef16a5b726
410 changed files with 1079 additions and 297 deletions
--- a/modules/youjizz/init.py
+++ b/modules/youjizz/init.py
@ -0,0 +1,22 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2010-2011 Roger Philibert
+#
+# This file is part of weboob.
+#
+# weboob is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# weboob is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with weboob. If not, see <http://www.gnu.org/licenses/>.
+
+from .backend import YoujizzBackend
+
+__all__ = ['YoujizzBackend']
--- a/modules/youjizz/backend.py
+++ b/modules/youjizz/backend.py
@ -0,0 +1,64 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2010-2011 Roger Philibert
+#
+# This file is part of weboob.
+#
+# weboob is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# weboob is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with weboob. If not, see <http://www.gnu.org/licenses/>.
+
+
+from __future__ import with_statement
+
+from weboob.capabilities.video import ICapVideo
+from weboob.tools.backend import BaseBackend
+
+from .browser import YoujizzBrowser
+from .video import YoujizzVideo
+
+
+__all__ = ['YoujizzBackend']
+
+
+class YoujizzBackend(BaseBackend, ICapVideo):
+    NAME = 'youjizz'
+    MAINTAINER = 'Roger Philibert'
+    EMAIL = 'roger.philibert@gmail.com'
+    VERSION = '0.a'
+    DESCRIPTION = 'Youjizz videos website'
+    LICENSE = 'AGPLv3+'
+    BROWSER = YoujizzBrowser
+
+    def get_video(self, _id):
+        with self.browser:
+            video = self.browser.get_video(_id)
+        return video
+
+    def iter_search_results(self, pattern=None, sortby=ICapVideo.SEARCH_RELEVANCE, nsfw=False, max_results=None):
+        if not nsfw:
+            return set()
+        with self.browser:
+            return self.browser.iter_search_results(pattern)
+
+    def fill_video(self, video, fields):
+        if fields != ['thumbnail']:
+            # if we don't want only the thumbnail, we probably want also every fields
+            with self.browser:
+                video = self.browser.get_video(YoujizzVideo.id2url(video.id), video)
+        if 'thumbnail' in fields and video.thumbnail:
+            with self.browser:
+                video.thumbnail.data = self.browser.readurl(video.thumbnail.url)
+
+        return video
+
+    OBJECTS = {YoujizzVideo: fill_video}
--- a/modules/youjizz/browser.py
+++ b/modules/youjizz/browser.py
@ -0,0 +1,55 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2010-2011 Roger Philibert
+#
+# This file is part of weboob.
+#
+# weboob is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# weboob is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with weboob. If not, see <http://www.gnu.org/licenses/>.
+
+
+import urllib
+
+from weboob.tools.browser import BaseBrowser
+from weboob.tools.browser.decorators import id2url
+
+from .pages.index import IndexPage
+from .pages.video import VideoPage
+from .video import YoujizzVideo
+
+
+__all__ = ['YoujizzBrowser']
+
+
+class YoujizzBrowser(BaseBrowser):
+    DOMAIN = 'youjizz.com'
+    ENCODING = None
+    PAGES = {r'http://.*youjizz\.com/?': IndexPage,
+             r'http://.*youjizz\.com/index.php': IndexPage,
+             r'http://.*youjizz\.com/search/(?P<pattern>.+)\.html': IndexPage,
+             r'http://.*youjizz\.com/videos/(?P<id>.+)\.html': VideoPage,
+            }
+
+    @id2url(YoujizzVideo.id2url)
+    def get_video(self, url, video=None):
+        self.location(url)
+        assert self.is_on_page(VideoPage), 'Should be on video page.'
+        return self.page.get_video(video)
+
+    def iter_search_results(self, pattern):
+        if not pattern:
+            self.home()
+        else:
+            self.location('/search/%s-1.html' % (urllib.quote_plus(pattern.encode('utf-8'))))
+        assert self.is_on_page(IndexPage)
+        return self.page.iter_videos()
--- a/modules/youjizz/favicon.png
+++ b/modules/youjizz/favicon.png
--- a/modules/youjizz/pages/init.py
+++ b/modules/youjizz/pages/init.py
--- a/modules/youjizz/pages/index.py
+++ b/modules/youjizz/pages/index.py
@ -0,0 +1,60 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2010-2011 Roger Philibert
+#
+# This file is part of weboob.
+#
+# weboob is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# weboob is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with weboob. If not, see <http://www.gnu.org/licenses/>.
+
+
+import datetime
+import re
+
+from weboob.tools.browser import BasePage
+from weboob.tools.browser import BrokenPageError
+
+from ..video import YoujizzVideo
+
+
+__all__ = ['IndexPage']
+
+
+class IndexPage(BasePage):
+    def iter_videos(self):
+        span_list = self.parser.select(self.document.getroot(), 'span#miniatura')
+        for span in span_list:
+            a = self.parser.select(span, 'a', 1)
+            url = a.attrib['href']
+            _id = re.sub(r'/videos/(.+)\.html', r'\1', url)
+
+            thumbnail_url = span.find('.//img').attrib['src']
+
+            title_el = self.parser.select(span, 'span#title1', 1)
+            title = title_el.text.strip()
+
+            time_span = self.parser.select(span, 'span.thumbtime span', 1)
+            time_txt = time_span.text.strip().replace(';', ':')
+            if time_txt == 'N/A':
+                minutes, seconds = 0, 0
+            elif ':' in time_txt:
+                minutes, seconds = (int(v) for v in time_txt.split(':'))
+            else:
+                raise BrokenPageError('Unable to parse the video duration: %s' % time_txt)
+
+
+            yield YoujizzVideo(_id,
+                               title=title,
+                               duration=datetime.timedelta(minutes=minutes, seconds=seconds),
+                               thumbnail_url=thumbnail_url,
+                               )
--- a/modules/youjizz/pages/video.py
+++ b/modules/youjizz/pages/video.py
@ -0,0 +1,66 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2010-2011 Roger Philibert
+#
+# This file is part of weboob.
+#
+# weboob is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# weboob is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with weboob. If not, see <http://www.gnu.org/licenses/>.
+
+
+import datetime
+import lxml.html
+import re
+
+from weboob.capabilities.base import NotAvailable
+from weboob.tools.browser import BasePage, BrokenPageError
+from weboob.tools.misc import to_unicode
+
+from ..video import YoujizzVideo
+
+
+__all__ = ['VideoPage']
+
+
+class VideoPage(BasePage):
+
+    def get_video(self, video=None):
+        _id = to_unicode(self.group_dict['id'])
+        if video is None:
+            video = YoujizzVideo(_id)
+        title_el = self.parser.select(self.document.getroot(), 'title', 1)
+        video.title = to_unicode(title_el.text.strip())
+
+        # youjizz HTML is crap, we must parse it with regexps
+        data = lxml.html.tostring(self.document.getroot())
+        m = re.search(r'<strong>.*?Runtime.*?</strong> (.+?)<br.*>', data)
+        if m:
+            txt = m.group(1).strip()
+            if txt == 'Unknown':
+                video.duration = NotAvailable
+            else:
+                minutes, seconds = (int(v) for v in to_unicode(txt).split(':'))
+                video.duration = datetime.timedelta(minutes=minutes, seconds=seconds)
+        else:
+            raise BrokenPageError('Unable to retrieve video duration')
+
+        video_file_urls = re.findall(r'"(http://[^",]+\.youjizz\.com[^",]+\.flv)[\?"]', data)
+        if len(video_file_urls) == 0:
+            raise BrokenPageError('Video URL not found')
+        elif len(video_file_urls) > 1:
+            raise BrokenPageError('Many video file URL found')
+        else:
+            video.url = video_file_urls[0]
+
+        return video
+
--- a/modules/youjizz/test.py
+++ b/modules/youjizz/test.py
@ -0,0 +1,33 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2010-2011 Romain Bignon
+#
+# This file is part of weboob.
+#
+# weboob is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# weboob is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with weboob. If not, see <http://www.gnu.org/licenses/>.
+
+
+from weboob.tools.test import BackendTest
+
+class YoujizzTest(BackendTest):
+    BACKEND = 'youjizz'
+
+    def test_youjizz(self):
+        self.assertTrue(len(self.backend.iter_search_results('anus', nsfw=False)) == 0)
+
+        l = list(self.backend.iter_search_results('sex', nsfw=True))
+        self.assertTrue(len(l) > 0)
+        v = l[0]
+        self.backend.fillobj(v, ('url',))
+        self.assertTrue(v.url and v.url.startswith('http://'), 'URL for video "%s" not found: %s' % (v.id, v.url))
--- a/modules/youjizz/video.py
+++ b/modules/youjizz/video.py
@ -0,0 +1,35 @@
+# -*- coding: utf-8 -*-
+
+# Copyright(C) 2010-2011 Roger Philibert
+#
+# This file is part of weboob.
+#
+# weboob is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# weboob is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with weboob. If not, see <http://www.gnu.org/licenses/>.
+
+
+from weboob.capabilities.video import BaseVideo
+
+
+__all__ = ['YoujizzVideo']
+
+
+class YoujizzVideo(BaseVideo):
+    def __init__(self, *args, **kwargs):
+        BaseVideo.__init__(self, *args, **kwargs)
+        self.nsfw = True
+        self.ext = 'flv'
+
+    @classmethod
+    def id2url(cls, _id):
+        return 'http://www.youjizz.com/videos/%s.html' % _id