handle required fields

This commit is contained in:
Christophe Benz 2010-07-12 03:12:53 +02:00
commit 7ccf4a01eb
5 changed files with 38 additions and 20 deletions

View file

@ -40,6 +40,6 @@ class InaBackend(BaseBackend, ICapVideo):
def get_video(self, _id): def get_video(self, _id):
return self.browser.get_video(_id) return self.browser.get_video(_id)
def iter_search_results(self, pattern=None, sortby=ICapVideo.SEARCH_RELEVANCE, nsfw=False): def iter_search_results(self, pattern=None, sortby=ICapVideo.SEARCH_RELEVANCE, nsfw=False, required_fields=None):
debug(u'backend ina: iter_search_results is not implemented') debug(u'backend ina: iter_search_results is not implemented')
return [] return []

View file

@ -17,9 +17,9 @@
import datetime import datetime
import logging
import re import re
import urllib import urllib
from logging import warning
from weboob.tools.browser import BaseBrowser, BrowserUnavailable from weboob.tools.browser import BaseBrowser, BrowserUnavailable
from weboob.tools.browser.decorators import check_domain, id2url from weboob.tools.browser.decorators import check_domain, id2url
@ -54,7 +54,7 @@ class YoujizzBrowser(BaseBrowser):
return None return None
else: else:
if len(video_file_urls) > 1: if len(video_file_urls) > 1:
warning('Many video file URL found for given URL: %s' % video_file_urls) logging.warning('Many video file URL found for given URL: %s' % video_file_urls)
return video_file_urls[0] return video_file_urls[0]
m = re.search(r'http://.*youjizz\.com/videos/(.+)\.html', url) m = re.search(r'http://.*youjizz\.com/videos/(.+)\.html', url)
_id = unicode(m.group(1)) if m else None _id = unicode(m.group(1)) if m else None
@ -84,7 +84,11 @@ class YoujizzBrowser(BaseBrowser):
for video in self.page.iter_videos(): for video in self.page.iter_videos():
if required_fields is not None: if required_fields is not None:
required_fields_missing = set(required_fields) - set(iter_fields(video)) missing_required_fields = set(required_fields) - set(k for k, v in iter_fields(video) if v)
if required_fields_missing: if missing_required_fields:
logging.debug(u'Completing missing required fields: %s' % missing_required_fields)
self.get_video(video.id, video=video) self.get_video(video.id, video=video)
missing_required_fields = set(required_fields) - set(k for k, v in iter_fields(video) if v)
if missing_required_fields:
raise Exception(u'Could not load all required fields. Missing: %s' % missing_required_fields)
yield video yield video

View file

@ -39,10 +39,10 @@ class YoupornBackend(BaseBackend, ICapVideo):
return self.browser.get_video(_id) return self.browser.get_video(_id)
SORTBY = ['relevance', 'rating', 'views', 'time'] SORTBY = ['relevance', 'rating', 'views', 'time']
def iter_search_results(self, pattern=None, sortby=ICapVideo.SEARCH_RELEVANCE, nsfw=False): def iter_search_results(self, pattern=None, sortby=ICapVideo.SEARCH_RELEVANCE, nsfw=False, required_fields=None):
if not nsfw: if not nsfw:
return iter(set()) return iter(set())
return self.browser.iter_search_results(pattern, self.SORTBY[sortby]) return self.browser.iter_search_results(pattern, self.SORTBY[sortby], required_fields=required_fields)
def iter_page_urls(self, mozaic_url): def iter_page_urls(self, mozaic_url):
raise NotImplementedError() raise NotImplementedError()

View file

@ -16,8 +16,11 @@
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
import logging
from weboob.tools.browser import BaseBrowser from weboob.tools.browser import BaseBrowser
from weboob.tools.browser.decorators import id2url from weboob.tools.browser.decorators import id2url
from weboob.tools.misc import iter_fields
from .pages.index import IndexPage from .pages.index import IndexPage
from .pages.video import VideoPage from .pages.video import VideoPage
@ -36,20 +39,30 @@ class YoupornBrowser(BaseBrowser):
'http://[w\.]*youporngay\.com:80/watch/.+': VideoPage, 'http://[w\.]*youporngay\.com:80/watch/.+': VideoPage,
} }
def __init__(self): @id2url(YoupornVideo.id2url)
# Disallow arguments def get_video(self, url, video=None):
BaseBrowser.__init__(self) self.location(url)
if video is None:
return video.page.video
else:
for k, v in iter_fields(self.page.video):
if v and getattr(video, k) != v:
setattr(video, k, v)
return video
def iter_search_results(self, pattern, sortby): def iter_search_results(self, pattern, sortby, required_fields=None):
if not pattern: if not pattern:
self.home() self.home()
else: else:
self.location(self.buildurl('/search/%s' % sortby, query=pattern)) self.location(self.buildurl('/search/%s' % sortby, query=pattern))
assert self.is_on_page(IndexPage) assert self.is_on_page(IndexPage)
return self.page.iter_videos() for video in self.page.iter_videos():
if required_fields is not None:
@id2url(YoupornVideo.id2url) missing_required_fields = set(required_fields) - set(k for k, v in iter_fields(video) if v)
def get_video(self, url): if missing_required_fields:
self.location(url) logging.debug(u'Completing missing required fields: %s' % missing_required_fields)
return self.page.video self.get_video(video.id, video=video)
missing_required_fields = set(required_fields) - set(k for k, v in iter_fields(video) if v)
if missing_required_fields:
raise Exception(u'Could not load all required fields. Missing: %s' % missing_required_fields)
yield video

View file

@ -55,14 +55,15 @@ class ICapVideo(ICap):
SEARCH_VIEWS, SEARCH_VIEWS,
SEARCH_DATE) = range(4) SEARCH_DATE) = range(4)
def iter_search_results(self, pattern=None, sortby=SEARCH_RELEVANCE, nsfw=False): def iter_search_results(self, pattern=None, sortby=SEARCH_RELEVANCE, nsfw=False, required_fields=None):
""" """
Iter results of a search on a pattern. Note that if pattern is None, Iter results of a search on a pattern. Note that if pattern is None,
it get the latest videos. it get the latest videos.
@param pattern [str] pattern to search on @param pattern [str] pattern to search on
@param sortby [enum] sort by... @param sortby [enum] sort by...
@param pattern [bool] include non-suitable for work videos if True @param nsfw [bool] include non-suitable for work videos if True
@param required_fields [tuple] fields to load even if it takes many HTTP requests
""" """
raise NotImplementedError() raise NotImplementedError()