From 3dfef071410dc5e16b65f78a0a8ca74018f36b59 Mon Sep 17 00:00:00 2001 From: Roger Philibert Date: Tue, 20 Apr 2010 23:51:58 +0200 Subject: [PATCH 1/9] implement search for youjizz --- weboob/backends/youjizz/backend.py | 22 ++++++--- weboob/backends/youjizz/browser.py | 21 ++++++++- weboob/backends/youjizz/pages/index.py | 63 ++++++++++++++++++++++++++ 3 files changed, 98 insertions(+), 8 deletions(-) create mode 100644 weboob/backends/youjizz/pages/index.py diff --git a/weboob/backends/youjizz/backend.py b/weboob/backends/youjizz/backend.py index 9764c982..603aeebe 100644 --- a/weboob/backends/youjizz/backend.py +++ b/weboob/backends/youjizz/backend.py @@ -32,22 +32,30 @@ class YoujizzBackend(BaseBackend, ICapVideoProvider): LICENSE = 'GPLv3' CONFIG = {} - browser = None + _browser = None - def need_browser(func): + def __getattr__(self, name): + if name == 'browser': + if not self._browser: + self._browser = YoujizzBrowser() + return self._browser + raise AttributeError, name + + def check_url(func): def inner(self, *args, **kwargs): - if not self.browser: - self.browser = YoujizzBrowser() url = args[0] - if u'youjizz.com' not in url: + if isinstance(url, (str,unicode)) and not url.isdigit() and u'youjizz.com' not in url: return None return func(self, *args, **kwargs) return inner - @need_browser + @check_url def get_video(self, _id): return self.browser.get_video(_id) - @need_browser + @check_url def iter_page_urls(self, mozaic_url): return self.browser.iter_page_urls(mozaic_url) + + def iter_search_results(self, pattern=None, sortby=None): + return self.browser.iter_search_results(pattern) diff --git a/weboob/backends/youjizz/browser.py b/weboob/backends/youjizz/browser.py index bd8e1f34..4a485b3c 100644 --- a/weboob/backends/youjizz/browser.py +++ b/weboob/backends/youjizz/browser.py @@ -20,9 +20,11 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. from logging import error import re +import urllib from weboob.tools.browser import BaseBrowser +from .pages.index import IndexPage from .pages.video import VideoPage @@ -30,7 +32,15 @@ __all__ = ['YoujizzBrowser'] class YoujizzBrowser(BaseBrowser): - PAGES = {r'http://.*youjizz\.com/videos/.+\.html': VideoPage} + DOMAIN = 'youjizz.com' + PROTOCOL = 'http' + PAGES = {r'http://.*youjizz\.com/?': IndexPage, + r'http://.*youjizz\.com/videos/.+\.html': VideoPage, + r'http://.*youjizz\.com/search/.+\.html': IndexPage, + } + + def id2url(self, _id): + return 'http://www.youjizz.com/videos/%s.html' % _id def get_video(self, url): self.location(url) @@ -38,3 +48,12 @@ class YoujizzBrowser(BaseBrowser): def iter_page_urls(self, mozaic_url): raise NotImplementedError() + + def iter_search_results(self, pattern): + if not pattern: + self.home() + else: + self.location('/search/%s-1.html' % (urllib.quote_plus(pattern))) + + assert self.is_on_page(IndexPage) + return self.page.iter_videos() diff --git a/weboob/backends/youjizz/pages/index.py b/weboob/backends/youjizz/pages/index.py new file mode 100644 index 00000000..4842318c --- /dev/null +++ b/weboob/backends/youjizz/pages/index.py @@ -0,0 +1,63 @@ +# -*- coding: utf-8 -*- + +""" +Copyright(C) 2010 Roger Philibert + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, version 3 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +""" + +import re + +from weboob.capabilities.video import Video +from weboob.tools.browser import BasePage + + +__all__ = ['IndexPage'] + + +class IndexPage(BasePage): + def iter_videos(self): + span_list = self.document.getroot().cssselect("span#miniatura") + if not span_list: + return + + for span in span_list: + a = span.find('.//a') + if a is None: + continue + url = a.attrib['href'] + _id = re.sub(r'/videos/(.+)\.html', r'\1', url) + + preview_url = span.find('.//img').attrib['src'] + + title1 = span.cssselect('span#title1') + if title1 is None: + title = None + else: + title = title1[0].text.strip() + + duration = 0 + thumbtime = span.cssselect('span.thumbtime') + if thumbtime is not None: + time_span = thumbtime[0].find('span') + minutes, seconds = time_span.text.strip().split(':') + duration = 60 * int(minutes) + int(seconds) + + yield Video(_id, + title=title, + page_url=self.browser.id2url(_id), + duration=duration, + preview_url=preview_url, + nsfw=True) From e29f6b9a723234bb6265532d2392534d02437180 Mon Sep 17 00:00:00 2001 From: Roger Philibert Date: Tue, 20 Apr 2010 23:52:28 +0200 Subject: [PATCH 2/9] add strip() to parsed elements --- weboob/backends/youporn/pages/index.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/weboob/backends/youporn/pages/index.py b/weboob/backends/youporn/pages/index.py index c76bf490..699c8e56 100644 --- a/weboob/backends/youporn/pages/index.py +++ b/weboob/backends/youporn/pages/index.py @@ -43,14 +43,14 @@ class IndexPage(PornPage): url = a.attrib['href'] _id = url[len('/watch/'):] _id = _id[:_id.find('/')] - title = a.text + title = a.text.strip() duration = 0 div = li.cssselect('div[class=duration_views]') if div: h2 = div[0].find('h2') - duration = 60 * int(h2.text) - duration += int(h2.find('span').tail) + duration = 60 * int(h2.text.strip()) + duration += int(h2.find('span').tail.strip()) rating = 0 rating_max = 0 @@ -59,6 +59,7 @@ class IndexPage(PornPage): p = div[0].find('p') rating = float(p.text.strip()) rating_max = float(p.find('span').text.strip()[2:]) + yield Video(int(_id), title, rating=rating, From 0bcf72c08f36b03f74b8b0d4c9152ff652fc8489 Mon Sep 17 00:00:00 2001 From: Roger Philibert Date: Tue, 20 Apr 2010 23:53:33 +0200 Subject: [PATCH 3/9] code clean --- weboob/backends/youporn/pages/index.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/weboob/backends/youporn/pages/index.py b/weboob/backends/youporn/pages/index.py index 699c8e56..d8369d3d 100644 --- a/weboob/backends/youporn/pages/index.py +++ b/weboob/backends/youporn/pages/index.py @@ -18,9 +18,14 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. """ -from .base import PornPage from weboob.capabilities.video import Video +from .base import PornPage + + +__all__ = ['IndexPage'] + + class IndexPage(PornPage): def iter_videos(self): uls = self.document.getroot().cssselect("ul[class=clearfix]") @@ -61,7 +66,7 @@ class IndexPage(PornPage): rating_max = float(p.find('span').text.strip()[2:]) yield Video(int(_id), - title, + title=title, rating=rating, rating_max=rating_max, duration=duration, From 4cc065e05fb6825afd0fcba771d90a9d1200cbb5 Mon Sep 17 00:00:00 2001 From: Roger Philibert Date: Tue, 20 Apr 2010 23:53:56 +0200 Subject: [PATCH 4/9] show page url in videoob search results --- weboob/backends/youporn/pages/index.py | 1 + weboob/capabilities/video.py | 4 +++- weboob/frontends/videoob/application.py | 4 ++-- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/weboob/backends/youporn/pages/index.py b/weboob/backends/youporn/pages/index.py index d8369d3d..011ca3bb 100644 --- a/weboob/backends/youporn/pages/index.py +++ b/weboob/backends/youporn/pages/index.py @@ -67,6 +67,7 @@ class IndexPage(PornPage): yield Video(int(_id), title=title, + page_url=self.browser.id2url(_id), rating=rating, rating_max=rating_max, duration=duration, diff --git a/weboob/capabilities/video.py b/weboob/capabilities/video.py index fd902f48..2383b545 100644 --- a/weboob/capabilities/video.py +++ b/weboob/capabilities/video.py @@ -25,10 +25,12 @@ __all__ = ['ICapVideoProvider', 'Video'] class Video(object): - def __init__(self, _id, title=u'', url=u'', author=u'', duration=0, date=None, rating=0, rating_max=0, preview_url=None, nsfw=False): + def __init__(self, _id, title=u'', url=u'', page_url=u'', author=u'', duration=0, date=None, + rating=0, rating_max=0, preview_url=None, nsfw=False): self.id = _id self.title = title self.url = url + self.page_url = page_url self.author = author self.duration = duration self.date = date diff --git a/weboob/frontends/videoob/application.py b/weboob/frontends/videoob/application.py index 8390d1e1..287ef94a 100644 --- a/weboob/frontends/videoob/application.py +++ b/weboob/frontends/videoob/application.py @@ -63,6 +63,7 @@ class Videoob(ConsoleApplication): results['BEFORE'] = u'Search pattern: %s' % pattern else: results['BEFORE'] = u'Last videos' + results['HEADER'] = ('ID', 'Title', 'Page URL') for backend in self.weboob.iter_backends(): try: iterator = backend.iter_search_results(pattern) @@ -71,8 +72,7 @@ class Videoob(ConsoleApplication): else: rows = [] for video in iterator: - rows.append(('ID', video.id)) - rows.append(('Title', video.title)) + rows.append((video.id, video.title, video.page_url)) results[backend.name] = rows return results From e8be6f77943f1469b6d619321a0fd7574142a03f Mon Sep 17 00:00:00 2001 From: Roger Philibert Date: Tue, 20 Apr 2010 23:54:27 +0200 Subject: [PATCH 5/9] improve text formatter --- weboob/tools/application/console.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/weboob/tools/application/console.py b/weboob/tools/application/console.py index 42b18a4f..c68a3aa2 100644 --- a/weboob/tools/application/console.py +++ b/weboob/tools/application/console.py @@ -79,11 +79,8 @@ class TextFormatter(object): if header[i]: formatted_cols.append(u'%s: %s' % (header[i], col)) else: - formatted_cols.append(col) - if len(formatted_cols) == 2: - formatted += u'%s: %s\n' % (formatted_cols[0], formatted_cols[1]) - else: - formatted += u'%s\n' % u' '.join(formatted_cols) + formatted_cols.append(unicode(col)) + formatted += u'%s\n' % u' '.join(formatted_cols) return unicode(formatted).strip() From e7e0d9cc59f2624a441f6edc838a6eb238901314 Mon Sep 17 00:00:00 2001 From: Roger Philibert Date: Tue, 20 Apr 2010 23:54:58 +0200 Subject: [PATCH 6/9] nice display if formatter could not be loaded --- weboob/tools/application/console.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/weboob/tools/application/console.py b/weboob/tools/application/console.py index c68a3aa2..c34903f2 100644 --- a/weboob/tools/application/console.py +++ b/weboob/tools/application/console.py @@ -18,10 +18,12 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. """ +import logging import sys, tty, termios import re from inspect import getargspec from functools import partial + from weboob.modules import BackendsConfig from .base import BaseApplication @@ -169,7 +171,10 @@ class ConsoleApplication(BaseApplication): output_format = self.default_output_format else: output_format = 'table' - print formatters[output_format].format(command_result) + try: + print formatters[output_format].format(command_result) + except ImportError, e: + logging.error(u'Could not use formatter "%s". Error: %s' % (output_format, e.message)) return 0 elif isinstance(command_result, int): return command_result From 3c75656825ff00b16950ceb666840289c8dfff2d Mon Sep 17 00:00:00 2001 From: Roger Philibert Date: Tue, 20 Apr 2010 23:55:23 +0200 Subject: [PATCH 7/9] fix bug with multiple arguments for command --- weboob/tools/application/console.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/weboob/tools/application/console.py b/weboob/tools/application/console.py index c34903f2..6487eddb 100644 --- a/weboob/tools/application/console.py +++ b/weboob/tools/application/console.py @@ -156,7 +156,10 @@ class ConsoleApplication(BaseApplication): if defaults: nb_min_args -= len(defaults) - if len(args) < nb_min_args or len(args) > nb_max_args and not varargs: + if len(args) > nb_max_args and not varargs: + sys.stderr.write("Command '%s' takes at most %d arguments.\n" % (command, nb_max_args)) + return + elif len(args) < nb_min_args: if varargs or defaults: sys.stderr.write("Command '%s' takes at least %d arguments.\n" % (command, nb_min_args)) else: From 02c6ae851cd99f4194d9a94613d613b1d8b79af8 Mon Sep 17 00:00:00 2001 From: Roger Philibert Date: Tue, 20 Apr 2010 23:58:27 +0200 Subject: [PATCH 8/9] fix pyflakes --- weboob/backends/youjizz/browser.py | 2 -- weboob/backends/youjizz/pages/video.py | 1 + weboob/backends/youtube/browser.py | 1 - 3 files changed, 1 insertion(+), 3 deletions(-) diff --git a/weboob/backends/youjizz/browser.py b/weboob/backends/youjizz/browser.py index 4a485b3c..ce0a60ba 100644 --- a/weboob/backends/youjizz/browser.py +++ b/weboob/backends/youjizz/browser.py @@ -18,8 +18,6 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. """ -from logging import error -import re import urllib from weboob.tools.browser import BaseBrowser diff --git a/weboob/backends/youjizz/pages/video.py b/weboob/backends/youjizz/pages/video.py index 87796dc3..582c5977 100644 --- a/weboob/backends/youjizz/pages/video.py +++ b/weboob/backends/youjizz/pages/video.py @@ -18,6 +18,7 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. """ +from logging import error, warning import re from weboob.capabilities.video import Video diff --git a/weboob/backends/youtube/browser.py b/weboob/backends/youtube/browser.py index f4527f71..bf038b5e 100644 --- a/weboob/backends/youtube/browser.py +++ b/weboob/backends/youtube/browser.py @@ -22,7 +22,6 @@ import urllib import re from weboob.tools.browser import BaseBrowser -from weboob.tools.parsers.lxmlparser import LxmlHtmlParser from .pages import VideoPage, ResultsPage From f89ad92bed70ae6daa00fd4b47704d34802c01b1 Mon Sep 17 00:00:00 2001 From: Roger Philibert Date: Wed, 21 Apr 2010 00:00:17 +0200 Subject: [PATCH 9/9] remove useless print --- weboob/backends/youjizz/pages/video.py | 1 - 1 file changed, 1 deletion(-) diff --git a/weboob/backends/youjizz/pages/video.py b/weboob/backends/youjizz/pages/video.py index 582c5977..638f0b8a 100644 --- a/weboob/backends/youjizz/pages/video.py +++ b/weboob/backends/youjizz/pages/video.py @@ -54,6 +54,5 @@ class VideoPage(BasePage): div = self.document.getroot().cssselect('#video_text')[0] results['title'] = unicode(div.find('h2').text).strip() minutes, seconds = [int(v) for v in [e for e in div.cssselect('strong') if e.text.startswith('Runtime')][0].tail.split(':')] - print minutes, seconds results['duration'] = minutes * 60 + seconds return results