diff --git a/modules/youjizz/backend.py b/modules/youjizz/backend.py
index 14e06ed2..21981e4a 100644
--- a/modules/youjizz/backend.py
+++ b/modules/youjizz/backend.py
@@ -41,15 +41,13 @@ class YoujizzBackend(BaseBackend, ICapVideo, ICapCollection):
BROWSER = YoujizzBrowser
def get_video(self, _id):
- with self.browser:
- video = self.browser.get_video(_id)
+ video = self.browser.get_video(_id)
return video
def search_videos(self, pattern, sortby=ICapVideo.SEARCH_RELEVANCE, nsfw=False):
if not nsfw:
return set()
- with self.browser:
- return self.browser.search_videos(pattern)
+ return self.browser.search_videos(pattern)
def fill_video(self, video, fields):
if fields != ['thumbnail']:
diff --git a/modules/youjizz/browser.py b/modules/youjizz/browser.py
index cb6ce374..d2ccebaf 100644
--- a/modules/youjizz/browser.py
+++ b/modules/youjizz/browser.py
@@ -18,9 +18,7 @@
# along with weboob. If not, see .
-import urllib
-
-from weboob.tools.browser import BaseBrowser
+from weboob.tools.browser2 import PagesBrowser, URL
from weboob.tools.browser.decorators import id2url
from .pages.index import IndexPage
@@ -31,27 +29,28 @@ from .video import YoujizzVideo
__all__ = ['YoujizzBrowser']
-class YoujizzBrowser(BaseBrowser):
- DOMAIN = 'youjizz.com'
- ENCODING = None
- PAGES = {r'http://.*youjizz\.com/?': IndexPage,
- r'http://.*youjizz\.com/index.php': IndexPage,
- r'http://.*youjizz\.com/search/(?P.+)\.html': IndexPage,
- r'http://.*youjizz\.com/videos/(?P.+)\.html': VideoPage,
- }
+class YoujizzBrowser(PagesBrowser):
+ BASEURL = 'http://www.youjizz.com'
+
+ index = URL(r'/?(index.php)?$', IndexPage)
+ search = URL(r'/search/(?P.+)-(?P\d+).html', IndexPage)
+ video = URL(r'/videos/(?P.*).html', VideoPage)
@id2url(YoujizzVideo.id2url)
def get_video(self, url, video=None):
self.location(url)
- assert self.is_on_page(VideoPage), 'Should be on video page.'
+ assert self.video.is_here()
+
return self.page.get_video(video)
def search_videos(self, pattern):
- self.location('/search/%s-1.html' % (urllib.quote_plus(pattern.encode('utf-8'))))
- assert self.is_on_page(IndexPage)
- return self.page.iter_videos()
+ self.search.go(pattern=pattern, pagenum=1)
+ assert self.search.is_here()
+
+ return self.pagination(lambda: self.page.iter_videos())
def latest_videos(self):
- self.home()
- assert self.is_on_page(IndexPage)
- return self.page.iter_videos()
+ self.index.go()
+ assert self.index.is_here()
+
+ return self.pagination(lambda: self.page.iter_videos())
diff --git a/modules/youjizz/pages/index.py b/modules/youjizz/pages/index.py
index 95800c3a..70658c3a 100644
--- a/modules/youjizz/pages/index.py
+++ b/modules/youjizz/pages/index.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright(C) 2010-2012 Roger Philibert
+# Copyright(C) 2010-2014 Roger Philibert
#
# This file is part of weboob.
#
@@ -21,9 +21,10 @@
import datetime
import re
-from weboob.tools.browser import BasePage, BrokenPageError
+from weboob.tools.browser2 import HTMLPage
+from weboob.tools.browser2.page import ListElement, method, ItemElement
+from weboob.tools.browser2.filters import Filter, Link, CleanText
from weboob.capabilities.image import BaseImage
-from weboob.tools.misc import to_unicode
from ..video import YoujizzVideo
@@ -31,35 +32,42 @@ from ..video import YoujizzVideo
__all__ = ['IndexPage']
-class IndexPage(BasePage):
- def iter_videos(self):
- span_list = self.parser.select(self.document.getroot(), 'span#miniatura')
- for span in span_list:
- a = self.parser.select(span, 'a', 1)
- url = a.attrib['href']
- _id = re.sub(r'/videos/(.+)\.html', r'\1', url)
+class IndexPage(HTMLPage):
+ @method
+ class iter_videos(ListElement):
+ item_xpath = '//span[@id="miniatura"]'
- video = YoujizzVideo(_id)
+ next_page = Link(u'//a[text()="Next ยป"]')
- video.thumbnail = BaseImage(span.find('.//img').attrib['data-original'])
- video.thumbnail.url = video.thumbnail.id
+ class item(ItemElement):
+ klass = YoujizzVideo
- title_el = self.parser.select(span, 'span#title1', 1)
- video.title = to_unicode(title_el.text.strip())
+ class Id(Filter):
+ def filter(self, link):
+ return re.sub(r'/videos/(.+)\.html', r'\1', link)
- time_span = self.parser.select(span, 'span.thumbtime span', 1)
- time_txt = time_span.text.strip().replace(';', ':')
- hours, minutes, seconds = 0, 0, 0
- if ':' in time_txt:
- t = time_txt.split(':')
- t.reverse()
- seconds = int(t[0])
- minutes = int(t[1])
- if len(t) == 3:
- hours = int(t[2])
- elif time_txt != 'N/A':
- raise BrokenPageError('Unable to parse the video duration: %s' % time_txt)
+ class Duration(Filter):
+ def filter(self, txt):
+ time_txt = txt.replace(';', ':')
+ hours, minutes, seconds = 0, 0, 0
+ if ':' in time_txt:
+ t = time_txt.split(':')
+ t.reverse()
+ seconds = int(t[0])
+ minutes = int(t[1])
+ if len(t) == 3:
+ hours = int(t[2])
+ elif time_txt != 'N/A':
+ raise ValueError('Unable to parse the video duration: %s' % time_txt)
- video.duration = datetime.timedelta(hours=hours, minutes=minutes, seconds=seconds)
+ return datetime.timedelta(hours=hours, minutes=minutes, seconds=seconds)
- yield video
+
+ obj_id = Id(Link('.//a'))
+ obj_title = CleanText('.//span[@id="title1"]')
+ obj_duration = Duration(CleanText('.//span[@class="thumbtime"]//span'))
+
+ def obj_thumbnail(self):
+ thumbnail = BaseImage(self.xpath('.//img')[0].attrib['data-original'])
+ thumbnail.url = thumbnail.id
+ return thumbnail
diff --git a/modules/youjizz/pages/video.py b/modules/youjizz/pages/video.py
index e227af25..4ded1ccb 100644
--- a/modules/youjizz/pages/video.py
+++ b/modules/youjizz/pages/video.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Copyright(C) 2010-2011 Roger Philibert
+# Copyright(C) 2010-2014 Roger Philibert
#
# This file is part of weboob.
#
@@ -19,11 +19,12 @@
import datetime
-import lxml.html
import re
+from weboob.tools.browser2 import HTMLPage
+from weboob.tools.browser2.page import method, ItemElement
+from weboob.tools.browser2.filters import CleanText, Env
from weboob.capabilities.base import NotAvailable
-from weboob.tools.browser import BasePage, BrokenPageError
from weboob.tools.misc import to_unicode
from ..video import YoujizzVideo
@@ -32,36 +33,36 @@ from ..video import YoujizzVideo
__all__ = ['VideoPage']
-class VideoPage(BasePage):
- def get_video(self, video=None):
- _id = to_unicode(self.group_dict['id'])
- if video is None:
- video = YoujizzVideo(_id)
- title_el = self.parser.select(self.document.getroot(), 'title', 1)
- video.title = to_unicode(title_el.text.strip())
+class VideoPage(HTMLPage):
+ @method
+ class get_video(ItemElement):
+ klass = YoujizzVideo
- # youjizz HTML is crap, we must parse it with regexps
- data = lxml.html.tostring(self.document.getroot())
- m = re.search(r'.*?Runtime.*? (.+?)', data)
- if m:
- txt = m.group(1).strip()
- if txt == 'Unknown':
- video.duration = NotAvailable
+ obj_id = Env('id')
+ obj_title = CleanText('//title')
+
+ def obj_duration(self):
+ # youjizz HTML is crap, we must parse it with regexps
+ m = re.search(r'.*?Runtime.*? (.+?)', self.page.response.text)
+ if m:
+ txt = m.group(1).strip()
+ if txt == 'Unknown':
+ return NotAvailable
+ else:
+ minutes, seconds = (int(v) for v in to_unicode(txt).split(':'))
+ return datetime.timedelta(minutes=minutes, seconds=seconds)
else:
- minutes, seconds = (int(v) for v in to_unicode(txt).split(':'))
- video.duration = datetime.timedelta(minutes=minutes, seconds=seconds)
- else:
- raise BrokenPageError('Unable to retrieve video duration')
+ raise ValueError('Unable to retrieve video duration')
- real_id = int(_id.split('-')[-1])
- data = self.browser.readurl('http://www.youjizz.com/videos/embed/%s' % real_id)
+ def obj_url(self):
+ real_id = int(self.env['id'].split('-')[-1])
+ response = self.page.browser.open('http://www.youjizz.com/videos/embed/%s' % real_id)
+ data = response.text
- video_file_urls = re.findall(r'"(http://[^",]+\.youjizz\.com[^",]+\.flv(?:\?[^"]*)?)"', data)
- if len(video_file_urls) == 0:
- raise BrokenPageError('Video URL not found')
- elif len(video_file_urls) > 1:
- raise BrokenPageError('Many video file URL found')
- else:
- video.url = to_unicode(video_file_urls[0])
-
- return video
+ video_file_urls = re.findall(r'"(http://[^",]+\.youjizz\.com[^",]+\.flv(?:\?[^"]*)?)"', data)
+ if len(video_file_urls) == 0:
+ raise ValueError('Video URL not found')
+ elif len(video_file_urls) > 1:
+ raise ValueError('Many video file URL found')
+ else:
+ return to_unicode(video_file_urls[0])