diff --git a/weboob/backends/youjizz/backend.py b/weboob/backends/youjizz/backend.py index 9764c982..603aeebe 100644 --- a/weboob/backends/youjizz/backend.py +++ b/weboob/backends/youjizz/backend.py @@ -32,22 +32,30 @@ class YoujizzBackend(BaseBackend, ICapVideoProvider): LICENSE = 'GPLv3' CONFIG = {} - browser = None + _browser = None - def need_browser(func): + def __getattr__(self, name): + if name == 'browser': + if not self._browser: + self._browser = YoujizzBrowser() + return self._browser + raise AttributeError, name + + def check_url(func): def inner(self, *args, **kwargs): - if not self.browser: - self.browser = YoujizzBrowser() url = args[0] - if u'youjizz.com' not in url: + if isinstance(url, (str,unicode)) and not url.isdigit() and u'youjizz.com' not in url: return None return func(self, *args, **kwargs) return inner - @need_browser + @check_url def get_video(self, _id): return self.browser.get_video(_id) - @need_browser + @check_url def iter_page_urls(self, mozaic_url): return self.browser.iter_page_urls(mozaic_url) + + def iter_search_results(self, pattern=None, sortby=None): + return self.browser.iter_search_results(pattern) diff --git a/weboob/backends/youjizz/browser.py b/weboob/backends/youjizz/browser.py index bd8e1f34..4a485b3c 100644 --- a/weboob/backends/youjizz/browser.py +++ b/weboob/backends/youjizz/browser.py @@ -20,9 +20,11 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. from logging import error import re +import urllib from weboob.tools.browser import BaseBrowser +from .pages.index import IndexPage from .pages.video import VideoPage @@ -30,7 +32,15 @@ __all__ = ['YoujizzBrowser'] class YoujizzBrowser(BaseBrowser): - PAGES = {r'http://.*youjizz\.com/videos/.+\.html': VideoPage} + DOMAIN = 'youjizz.com' + PROTOCOL = 'http' + PAGES = {r'http://.*youjizz\.com/?': IndexPage, + r'http://.*youjizz\.com/videos/.+\.html': VideoPage, + r'http://.*youjizz\.com/search/.+\.html': IndexPage, + } + + def id2url(self, _id): + return 'http://www.youjizz.com/videos/%s.html' % _id def get_video(self, url): self.location(url) @@ -38,3 +48,12 @@ class YoujizzBrowser(BaseBrowser): def iter_page_urls(self, mozaic_url): raise NotImplementedError() + + def iter_search_results(self, pattern): + if not pattern: + self.home() + else: + self.location('/search/%s-1.html' % (urllib.quote_plus(pattern))) + + assert self.is_on_page(IndexPage) + return self.page.iter_videos() diff --git a/weboob/backends/youjizz/pages/index.py b/weboob/backends/youjizz/pages/index.py new file mode 100644 index 00000000..4842318c --- /dev/null +++ b/weboob/backends/youjizz/pages/index.py @@ -0,0 +1,63 @@ +# -*- coding: utf-8 -*- + +""" +Copyright(C) 2010 Roger Philibert + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, version 3 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +""" + +import re + +from weboob.capabilities.video import Video +from weboob.tools.browser import BasePage + + +__all__ = ['IndexPage'] + + +class IndexPage(BasePage): + def iter_videos(self): + span_list = self.document.getroot().cssselect("span#miniatura") + if not span_list: + return + + for span in span_list: + a = span.find('.//a') + if a is None: + continue + url = a.attrib['href'] + _id = re.sub(r'/videos/(.+)\.html', r'\1', url) + + preview_url = span.find('.//img').attrib['src'] + + title1 = span.cssselect('span#title1') + if title1 is None: + title = None + else: + title = title1[0].text.strip() + + duration = 0 + thumbtime = span.cssselect('span.thumbtime') + if thumbtime is not None: + time_span = thumbtime[0].find('span') + minutes, seconds = time_span.text.strip().split(':') + duration = 60 * int(minutes) + int(seconds) + + yield Video(_id, + title=title, + page_url=self.browser.id2url(_id), + duration=duration, + preview_url=preview_url, + nsfw=True)