implement search for youjizz

2010-04-20 23:51:58 +02:00 · 2010-04-20 23:51:58 +02:00 · 3dfef07141
commit 3dfef07141
parent e79e4bd65c
3 changed files with 98 additions and 8 deletions
--- a/weboob/backends/youjizz/backend.py
+++ b/weboob/backends/youjizz/backend.py
@ -32,22 +32,30 @@ class YoujizzBackend(BaseBackend, ICapVideoProvider):
    LICENSE = 'GPLv3'

    CONFIG = {}
-    browser = None
+    _browser = None

-    def need_browser(func):
+    def __getattr__(self, name):
+        if name == 'browser':
+            if not self._browser:
+                self._browser = YoujizzBrowser()
+            return self._browser
+        raise AttributeError, name
+
+    def check_url(func):
        def inner(self, *args, **kwargs):
-            if not self.browser:
-                self.browser = YoujizzBrowser()
            url = args[0]
-            if u'youjizz.com' not in url:
+            if isinstance(url, (str,unicode)) and not url.isdigit() and u'youjizz.com' not in url:
                return None
            return func(self, *args, **kwargs)
        return inner

-    @need_browser
+    @check_url
    def get_video(self, _id):
        return self.browser.get_video(_id)

-    @need_browser
+    @check_url
    def iter_page_urls(self, mozaic_url):
        return self.browser.iter_page_urls(mozaic_url)
+
+    def iter_search_results(self, pattern=None, sortby=None):
+        return self.browser.iter_search_results(pattern)
--- a/weboob/backends/youjizz/browser.py
+++ b/weboob/backends/youjizz/browser.py
@ -20,9 +20,11 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.

 from logging import error
 import re
+import urllib

 from weboob.tools.browser import BaseBrowser

+from .pages.index import IndexPage
 from .pages.video import VideoPage


@ -30,7 +32,15 @@ __all__ = ['YoujizzBrowser']


 class YoujizzBrowser(BaseBrowser):
-    PAGES = {r'http://.*youjizz\.com/videos/.+\.html': VideoPage}
+    DOMAIN = 'youjizz.com'
+    PROTOCOL = 'http'
+    PAGES = {r'http://.*youjizz\.com/?': IndexPage,
+             r'http://.*youjizz\.com/videos/.+\.html': VideoPage,
+             r'http://.*youjizz\.com/search/.+\.html': IndexPage,
+            }
+
+    def id2url(self, _id):
+        return 'http://www.youjizz.com/videos/%s.html' % _id

    def get_video(self, url):
        self.location(url)
@ -38,3 +48,12 @@ class YoujizzBrowser(BaseBrowser):

    def iter_page_urls(self, mozaic_url):
        raise NotImplementedError()
+
+    def iter_search_results(self, pattern):
+        if not pattern:
+            self.home()
+        else:
+            self.location('/search/%s-1.html' % (urllib.quote_plus(pattern)))
+
+        assert self.is_on_page(IndexPage)
+        return self.page.iter_videos()
--- a/weboob/backends/youjizz/pages/index.py
+++ b/weboob/backends/youjizz/pages/index.py
@ -0,0 +1,63 @@
+# -*- coding: utf-8 -*-
+
+"""
+Copyright(C) 2010  Roger Philibert
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, version 3 of the License.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+"""
+
+import re
+
+from weboob.capabilities.video import Video
+from weboob.tools.browser import BasePage
+
+
+__all__ = ['IndexPage']
+
+
+class IndexPage(BasePage):
+    def iter_videos(self):
+        span_list = self.document.getroot().cssselect("span#miniatura")
+        if not span_list:
+            return
+
+        for span in span_list:
+            a = span.find('.//a')
+            if a is None:
+                continue
+            url = a.attrib['href']
+            _id = re.sub(r'/videos/(.+)\.html', r'\1', url)
+
+            preview_url = span.find('.//img').attrib['src']
+
+            title1 = span.cssselect('span#title1')
+            if title1 is None:
+                title = None
+            else:
+                title = title1[0].text.strip()
+
+            duration = 0
+            thumbtime = span.cssselect('span.thumbtime')
+            if thumbtime is not None:
+                time_span = thumbtime[0].find('span')
+                minutes, seconds = time_span.text.strip().split(':')
+                duration = 60 * int(minutes) + int(seconds)
+
+            yield Video(_id,
+                        title=title,
+                        page_url=self.browser.id2url(_id),
+                        duration=duration,
+                        preview_url=preview_url,
+                        nsfw=True)