implement search for youjizz

This commit is contained in:
Roger Philibert 2010-04-20 23:51:58 +02:00
commit 3dfef07141
3 changed files with 98 additions and 8 deletions

View file

@ -32,22 +32,30 @@ class YoujizzBackend(BaseBackend, ICapVideoProvider):
LICENSE = 'GPLv3' LICENSE = 'GPLv3'
CONFIG = {} CONFIG = {}
browser = None _browser = None
def need_browser(func): def __getattr__(self, name):
if name == 'browser':
if not self._browser:
self._browser = YoujizzBrowser()
return self._browser
raise AttributeError, name
def check_url(func):
def inner(self, *args, **kwargs): def inner(self, *args, **kwargs):
if not self.browser:
self.browser = YoujizzBrowser()
url = args[0] url = args[0]
if u'youjizz.com' not in url: if isinstance(url, (str,unicode)) and not url.isdigit() and u'youjizz.com' not in url:
return None return None
return func(self, *args, **kwargs) return func(self, *args, **kwargs)
return inner return inner
@need_browser @check_url
def get_video(self, _id): def get_video(self, _id):
return self.browser.get_video(_id) return self.browser.get_video(_id)
@need_browser @check_url
def iter_page_urls(self, mozaic_url): def iter_page_urls(self, mozaic_url):
return self.browser.iter_page_urls(mozaic_url) return self.browser.iter_page_urls(mozaic_url)
def iter_search_results(self, pattern=None, sortby=None):
return self.browser.iter_search_results(pattern)

View file

@ -20,9 +20,11 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
from logging import error from logging import error
import re import re
import urllib
from weboob.tools.browser import BaseBrowser from weboob.tools.browser import BaseBrowser
from .pages.index import IndexPage
from .pages.video import VideoPage from .pages.video import VideoPage
@ -30,7 +32,15 @@ __all__ = ['YoujizzBrowser']
class YoujizzBrowser(BaseBrowser): class YoujizzBrowser(BaseBrowser):
PAGES = {r'http://.*youjizz\.com/videos/.+\.html': VideoPage} DOMAIN = 'youjizz.com'
PROTOCOL = 'http'
PAGES = {r'http://.*youjizz\.com/?': IndexPage,
r'http://.*youjizz\.com/videos/.+\.html': VideoPage,
r'http://.*youjizz\.com/search/.+\.html': IndexPage,
}
def id2url(self, _id):
return 'http://www.youjizz.com/videos/%s.html' % _id
def get_video(self, url): def get_video(self, url):
self.location(url) self.location(url)
@ -38,3 +48,12 @@ class YoujizzBrowser(BaseBrowser):
def iter_page_urls(self, mozaic_url): def iter_page_urls(self, mozaic_url):
raise NotImplementedError() raise NotImplementedError()
def iter_search_results(self, pattern):
if not pattern:
self.home()
else:
self.location('/search/%s-1.html' % (urllib.quote_plus(pattern)))
assert self.is_on_page(IndexPage)
return self.page.iter_videos()

View file

@ -0,0 +1,63 @@
# -*- coding: utf-8 -*-
"""
Copyright(C) 2010 Roger Philibert
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, version 3 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
"""
import re
from weboob.capabilities.video import Video
from weboob.tools.browser import BasePage
__all__ = ['IndexPage']
class IndexPage(BasePage):
def iter_videos(self):
span_list = self.document.getroot().cssselect("span#miniatura")
if not span_list:
return
for span in span_list:
a = span.find('.//a')
if a is None:
continue
url = a.attrib['href']
_id = re.sub(r'/videos/(.+)\.html', r'\1', url)
preview_url = span.find('.//img').attrib['src']
title1 = span.cssselect('span#title1')
if title1 is None:
title = None
else:
title = title1[0].text.strip()
duration = 0
thumbtime = span.cssselect('span.thumbtime')
if thumbtime is not None:
time_span = thumbtime[0].find('span')
minutes, seconds = time_span.text.strip().split(':')
duration = 60 * int(minutes) + int(seconds)
yield Video(_id,
title=title,
page_url=self.browser.id2url(_id),
duration=duration,
preview_url=preview_url,
nsfw=True)