implement search for youjizz

2010-04-20 23:51:58 +02:00 · 2010-04-20 23:51:58 +02:00 · 3dfef07141
commit 3dfef07141
parent e79e4bd65c
3 changed files with 98 additions and 8 deletions
--- a/weboob/backends/youjizz/backend.py
+++ b/weboob/backends/youjizz/backend.py
@ -32,22 +32,30 @@ class YoujizzBackend(BaseBackend, ICapVideoProvider):
    LICENSE = 'GPLv3'
    CONFIG = {}
-    browser = None
+    _browser = None
-    def need_browser(func):
+    def __getattr__(self, name):
        if name == 'browser':
            if not self._browser:
                self._browser = YoujizzBrowser()
            return self._browser
        raise AttributeError, name
    def check_url(func):
        def inner(self, *args, **kwargs):
            if not self.browser:
                self.browser = YoujizzBrowser()
            url = args[0]
-            if u'youjizz.com' not in url:
+            if isinstance(url, (str,unicode)) and not url.isdigit() and u'youjizz.com' not in url:
                return None
            return func(self, *args, **kwargs)
        return inner
-    @need_browser
+    @check_url
    def get_video(self, _id):
        return self.browser.get_video(_id)
-    @need_browser
+    @check_url
    def iter_page_urls(self, mozaic_url):
        return self.browser.iter_page_urls(mozaic_url)
    def iter_search_results(self, pattern=None, sortby=None):
        return self.browser.iter_search_results(pattern)
--- a/weboob/backends/youjizz/browser.py
+++ b/weboob/backends/youjizz/browser.py
@ -20,9 +20,11 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 from logging import error
 import re
 import urllib
 from weboob.tools.browser import BaseBrowser
 from .pages.index import IndexPage
 from .pages.video import VideoPage
@ -30,7 +32,15 @@ __all__ = ['YoujizzBrowser']
 class YoujizzBrowser(BaseBrowser):
-    PAGES = {r'http://.*youjizz\.com/videos/.+\.html': VideoPage}
+    DOMAIN = 'youjizz.com'
    PROTOCOL = 'http'
    PAGES = {r'http://.*youjizz\.com/?': IndexPage,
             r'http://.*youjizz\.com/videos/.+\.html': VideoPage,
             r'http://.*youjizz\.com/search/.+\.html': IndexPage,
            }
    def id2url(self, _id):
        return 'http://www.youjizz.com/videos/%s.html' % _id
    def get_video(self, url):
        self.location(url)
@ -38,3 +48,12 @@ class YoujizzBrowser(BaseBrowser):
    def iter_page_urls(self, mozaic_url):
        raise NotImplementedError()
    def iter_search_results(self, pattern):
        if not pattern:
            self.home()
        else:
            self.location('/search/%s-1.html' % (urllib.quote_plus(pattern)))
        assert self.is_on_page(IndexPage)
        return self.page.iter_videos()
--- a/weboob/backends/youjizz/pages/index.py
+++ b/weboob/backends/youjizz/pages/index.py
@ -0,0 +1,63 @@
 # -*- coding: utf-8 -*-
 """
 Copyright(C) 2010  Roger Philibert
 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation, version 3 of the License.
 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.
 You should have received a copy of the GNU General Public License
 along with this program; if not, write to the Free Software
 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 """
 import re
 from weboob.capabilities.video import Video
 from weboob.tools.browser import BasePage
 __all__ = ['IndexPage']
 class IndexPage(BasePage):
    def iter_videos(self):
        span_list = self.document.getroot().cssselect("span#miniatura")
        if not span_list:
            return
        for span in span_list:
            a = span.find('.//a')
            if a is None:
                continue
            url = a.attrib['href']
            _id = re.sub(r'/videos/(.+)\.html', r'\1', url)
            preview_url = span.find('.//img').attrib['src']
            title1 = span.cssselect('span#title1')
            if title1 is None:
                title = None
            else:
                title = title1[0].text.strip()
            duration = 0
            thumbtime = span.cssselect('span.thumbtime')
            if thumbtime is not None:
                time_span = thumbtime[0].find('span')
                minutes, seconds = time_span.text.strip().split(':')
                duration = 60 * int(minutes) + int(seconds)
            yield Video(_id,
                        title=title,
                        page_url=self.browser.id2url(_id),
                        duration=duration,
                        preview_url=preview_url,
                        nsfw=True)