From 7ccf4a01eb1a87afc620db7cebab140da7585b7b Mon Sep 17 00:00:00 2001 From: Christophe Benz Date: Mon, 12 Jul 2010 03:12:53 +0200 Subject: [PATCH] handle required fields --- weboob/backends/ina/backend.py | 2 +- weboob/backends/youjizz/browser.py | 12 ++++++---- weboob/backends/youporn/backend.py | 4 ++-- weboob/backends/youporn/browser.py | 35 ++++++++++++++++++++---------- weboob/capabilities/video.py | 5 +++-- 5 files changed, 38 insertions(+), 20 deletions(-) diff --git a/weboob/backends/ina/backend.py b/weboob/backends/ina/backend.py index 0b6563aa..63c24c9b 100644 --- a/weboob/backends/ina/backend.py +++ b/weboob/backends/ina/backend.py @@ -40,6 +40,6 @@ class InaBackend(BaseBackend, ICapVideo): def get_video(self, _id): return self.browser.get_video(_id) - def iter_search_results(self, pattern=None, sortby=ICapVideo.SEARCH_RELEVANCE, nsfw=False): + def iter_search_results(self, pattern=None, sortby=ICapVideo.SEARCH_RELEVANCE, nsfw=False, required_fields=None): debug(u'backend ina: iter_search_results is not implemented') return [] diff --git a/weboob/backends/youjizz/browser.py b/weboob/backends/youjizz/browser.py index 65fdc05b..bd6a43e6 100644 --- a/weboob/backends/youjizz/browser.py +++ b/weboob/backends/youjizz/browser.py @@ -17,9 +17,9 @@ import datetime +import logging import re import urllib -from logging import warning from weboob.tools.browser import BaseBrowser, BrowserUnavailable from weboob.tools.browser.decorators import check_domain, id2url @@ -54,7 +54,7 @@ class YoujizzBrowser(BaseBrowser): return None else: if len(video_file_urls) > 1: - warning('Many video file URL found for given URL: %s' % video_file_urls) + logging.warning('Many video file URL found for given URL: %s' % video_file_urls) return video_file_urls[0] m = re.search(r'http://.*youjizz\.com/videos/(.+)\.html', url) _id = unicode(m.group(1)) if m else None @@ -84,7 +84,11 @@ class YoujizzBrowser(BaseBrowser): for video in self.page.iter_videos(): if required_fields is not None: - required_fields_missing = set(required_fields) - set(iter_fields(video)) - if required_fields_missing: + missing_required_fields = set(required_fields) - set(k for k, v in iter_fields(video) if v) + if missing_required_fields: + logging.debug(u'Completing missing required fields: %s' % missing_required_fields) self.get_video(video.id, video=video) + missing_required_fields = set(required_fields) - set(k for k, v in iter_fields(video) if v) + if missing_required_fields: + raise Exception(u'Could not load all required fields. Missing: %s' % missing_required_fields) yield video diff --git a/weboob/backends/youporn/backend.py b/weboob/backends/youporn/backend.py index 31ecbc0d..5988e4bc 100644 --- a/weboob/backends/youporn/backend.py +++ b/weboob/backends/youporn/backend.py @@ -39,10 +39,10 @@ class YoupornBackend(BaseBackend, ICapVideo): return self.browser.get_video(_id) SORTBY = ['relevance', 'rating', 'views', 'time'] - def iter_search_results(self, pattern=None, sortby=ICapVideo.SEARCH_RELEVANCE, nsfw=False): + def iter_search_results(self, pattern=None, sortby=ICapVideo.SEARCH_RELEVANCE, nsfw=False, required_fields=None): if not nsfw: return iter(set()) - return self.browser.iter_search_results(pattern, self.SORTBY[sortby]) + return self.browser.iter_search_results(pattern, self.SORTBY[sortby], required_fields=required_fields) def iter_page_urls(self, mozaic_url): raise NotImplementedError() diff --git a/weboob/backends/youporn/browser.py b/weboob/backends/youporn/browser.py index 78f3f017..0b292eb1 100644 --- a/weboob/backends/youporn/browser.py +++ b/weboob/backends/youporn/browser.py @@ -16,8 +16,11 @@ # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +import logging + from weboob.tools.browser import BaseBrowser from weboob.tools.browser.decorators import id2url +from weboob.tools.misc import iter_fields from .pages.index import IndexPage from .pages.video import VideoPage @@ -36,20 +39,30 @@ class YoupornBrowser(BaseBrowser): 'http://[w\.]*youporngay\.com:80/watch/.+': VideoPage, } - def __init__(self): - # Disallow arguments - BaseBrowser.__init__(self) + @id2url(YoupornVideo.id2url) + def get_video(self, url, video=None): + self.location(url) + if video is None: + return video.page.video + else: + for k, v in iter_fields(self.page.video): + if v and getattr(video, k) != v: + setattr(video, k, v) + return video - def iter_search_results(self, pattern, sortby): + def iter_search_results(self, pattern, sortby, required_fields=None): if not pattern: self.home() else: self.location(self.buildurl('/search/%s' % sortby, query=pattern)) - assert self.is_on_page(IndexPage) - return self.page.iter_videos() - - @id2url(YoupornVideo.id2url) - def get_video(self, url): - self.location(url) - return self.page.video + for video in self.page.iter_videos(): + if required_fields is not None: + missing_required_fields = set(required_fields) - set(k for k, v in iter_fields(video) if v) + if missing_required_fields: + logging.debug(u'Completing missing required fields: %s' % missing_required_fields) + self.get_video(video.id, video=video) + missing_required_fields = set(required_fields) - set(k for k, v in iter_fields(video) if v) + if missing_required_fields: + raise Exception(u'Could not load all required fields. Missing: %s' % missing_required_fields) + yield video diff --git a/weboob/capabilities/video.py b/weboob/capabilities/video.py index fd1acaff..600e8ebf 100644 --- a/weboob/capabilities/video.py +++ b/weboob/capabilities/video.py @@ -55,14 +55,15 @@ class ICapVideo(ICap): SEARCH_VIEWS, SEARCH_DATE) = range(4) - def iter_search_results(self, pattern=None, sortby=SEARCH_RELEVANCE, nsfw=False): + def iter_search_results(self, pattern=None, sortby=SEARCH_RELEVANCE, nsfw=False, required_fields=None): """ Iter results of a search on a pattern. Note that if pattern is None, it get the latest videos. @param pattern [str] pattern to search on @param sortby [enum] sort by... - @param pattern [bool] include non-suitable for work videos if True + @param nsfw [bool] include non-suitable for work videos if True + @param required_fields [tuple] fields to load even if it takes many HTTP requests """ raise NotImplementedError()