diff --git a/weboob/backends/youjizz/backend.py b/weboob/backends/youjizz/backend.py index 9764c982..603aeebe 100644 --- a/weboob/backends/youjizz/backend.py +++ b/weboob/backends/youjizz/backend.py @@ -32,22 +32,30 @@ class YoujizzBackend(BaseBackend, ICapVideoProvider): LICENSE = 'GPLv3' CONFIG = {} - browser = None + _browser = None - def need_browser(func): + def __getattr__(self, name): + if name == 'browser': + if not self._browser: + self._browser = YoujizzBrowser() + return self._browser + raise AttributeError, name + + def check_url(func): def inner(self, *args, **kwargs): - if not self.browser: - self.browser = YoujizzBrowser() url = args[0] - if u'youjizz.com' not in url: + if isinstance(url, (str,unicode)) and not url.isdigit() and u'youjizz.com' not in url: return None return func(self, *args, **kwargs) return inner - @need_browser + @check_url def get_video(self, _id): return self.browser.get_video(_id) - @need_browser + @check_url def iter_page_urls(self, mozaic_url): return self.browser.iter_page_urls(mozaic_url) + + def iter_search_results(self, pattern=None, sortby=None): + return self.browser.iter_search_results(pattern) diff --git a/weboob/backends/youjizz/browser.py b/weboob/backends/youjizz/browser.py index bd8e1f34..ce0a60ba 100644 --- a/weboob/backends/youjizz/browser.py +++ b/weboob/backends/youjizz/browser.py @@ -18,11 +18,11 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. """ -from logging import error -import re +import urllib from weboob.tools.browser import BaseBrowser +from .pages.index import IndexPage from .pages.video import VideoPage @@ -30,7 +30,15 @@ __all__ = ['YoujizzBrowser'] class YoujizzBrowser(BaseBrowser): - PAGES = {r'http://.*youjizz\.com/videos/.+\.html': VideoPage} + DOMAIN = 'youjizz.com' + PROTOCOL = 'http' + PAGES = {r'http://.*youjizz\.com/?': IndexPage, + r'http://.*youjizz\.com/videos/.+\.html': VideoPage, + r'http://.*youjizz\.com/search/.+\.html': IndexPage, + } + + def id2url(self, _id): + return 'http://www.youjizz.com/videos/%s.html' % _id def get_video(self, url): self.location(url) @@ -38,3 +46,12 @@ class YoujizzBrowser(BaseBrowser): def iter_page_urls(self, mozaic_url): raise NotImplementedError() + + def iter_search_results(self, pattern): + if not pattern: + self.home() + else: + self.location('/search/%s-1.html' % (urllib.quote_plus(pattern))) + + assert self.is_on_page(IndexPage) + return self.page.iter_videos() diff --git a/weboob/backends/youjizz/pages/index.py b/weboob/backends/youjizz/pages/index.py new file mode 100644 index 00000000..4842318c --- /dev/null +++ b/weboob/backends/youjizz/pages/index.py @@ -0,0 +1,63 @@ +# -*- coding: utf-8 -*- + +""" +Copyright(C) 2010 Roger Philibert + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, version 3 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +""" + +import re + +from weboob.capabilities.video import Video +from weboob.tools.browser import BasePage + + +__all__ = ['IndexPage'] + + +class IndexPage(BasePage): + def iter_videos(self): + span_list = self.document.getroot().cssselect("span#miniatura") + if not span_list: + return + + for span in span_list: + a = span.find('.//a') + if a is None: + continue + url = a.attrib['href'] + _id = re.sub(r'/videos/(.+)\.html', r'\1', url) + + preview_url = span.find('.//img').attrib['src'] + + title1 = span.cssselect('span#title1') + if title1 is None: + title = None + else: + title = title1[0].text.strip() + + duration = 0 + thumbtime = span.cssselect('span.thumbtime') + if thumbtime is not None: + time_span = thumbtime[0].find('span') + minutes, seconds = time_span.text.strip().split(':') + duration = 60 * int(minutes) + int(seconds) + + yield Video(_id, + title=title, + page_url=self.browser.id2url(_id), + duration=duration, + preview_url=preview_url, + nsfw=True) diff --git a/weboob/backends/youjizz/pages/video.py b/weboob/backends/youjizz/pages/video.py index 87796dc3..638f0b8a 100644 --- a/weboob/backends/youjizz/pages/video.py +++ b/weboob/backends/youjizz/pages/video.py @@ -18,6 +18,7 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. """ +from logging import error, warning import re from weboob.capabilities.video import Video @@ -53,6 +54,5 @@ class VideoPage(BasePage): div = self.document.getroot().cssselect('#video_text')[0] results['title'] = unicode(div.find('h2').text).strip() minutes, seconds = [int(v) for v in [e for e in div.cssselect('strong') if e.text.startswith('Runtime')][0].tail.split(':')] - print minutes, seconds results['duration'] = minutes * 60 + seconds return results diff --git a/weboob/backends/youporn/pages/index.py b/weboob/backends/youporn/pages/index.py index c76bf490..011ca3bb 100644 --- a/weboob/backends/youporn/pages/index.py +++ b/weboob/backends/youporn/pages/index.py @@ -18,9 +18,14 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. """ -from .base import PornPage from weboob.capabilities.video import Video +from .base import PornPage + + +__all__ = ['IndexPage'] + + class IndexPage(PornPage): def iter_videos(self): uls = self.document.getroot().cssselect("ul[class=clearfix]") @@ -43,14 +48,14 @@ class IndexPage(PornPage): url = a.attrib['href'] _id = url[len('/watch/'):] _id = _id[:_id.find('/')] - title = a.text + title = a.text.strip() duration = 0 div = li.cssselect('div[class=duration_views]') if div: h2 = div[0].find('h2') - duration = 60 * int(h2.text) - duration += int(h2.find('span').tail) + duration = 60 * int(h2.text.strip()) + duration += int(h2.find('span').tail.strip()) rating = 0 rating_max = 0 @@ -59,8 +64,10 @@ class IndexPage(PornPage): p = div[0].find('p') rating = float(p.text.strip()) rating_max = float(p.find('span').text.strip()[2:]) + yield Video(int(_id), - title, + title=title, + page_url=self.browser.id2url(_id), rating=rating, rating_max=rating_max, duration=duration, diff --git a/weboob/backends/youtube/browser.py b/weboob/backends/youtube/browser.py index f4527f71..bf038b5e 100644 --- a/weboob/backends/youtube/browser.py +++ b/weboob/backends/youtube/browser.py @@ -22,7 +22,6 @@ import urllib import re from weboob.tools.browser import BaseBrowser -from weboob.tools.parsers.lxmlparser import LxmlHtmlParser from .pages import VideoPage, ResultsPage diff --git a/weboob/capabilities/video.py b/weboob/capabilities/video.py index fd902f48..2383b545 100644 --- a/weboob/capabilities/video.py +++ b/weboob/capabilities/video.py @@ -25,10 +25,12 @@ __all__ = ['ICapVideoProvider', 'Video'] class Video(object): - def __init__(self, _id, title=u'', url=u'', author=u'', duration=0, date=None, rating=0, rating_max=0, preview_url=None, nsfw=False): + def __init__(self, _id, title=u'', url=u'', page_url=u'', author=u'', duration=0, date=None, + rating=0, rating_max=0, preview_url=None, nsfw=False): self.id = _id self.title = title self.url = url + self.page_url = page_url self.author = author self.duration = duration self.date = date diff --git a/weboob/frontends/videoob/application.py b/weboob/frontends/videoob/application.py index 7bce21f8..d7b83b0f 100644 --- a/weboob/frontends/videoob/application.py +++ b/weboob/frontends/videoob/application.py @@ -65,6 +65,7 @@ class Videoob(ConsoleApplication): results['BEFORE'] = u'Search pattern: %s' % pattern else: results['BEFORE'] = u'Last videos' + results['HEADER'] = ('ID', 'Title', 'Page URL') for backend in self.weboob.iter_backends(): try: iterator = backend.iter_search_results(pattern) @@ -73,8 +74,7 @@ class Videoob(ConsoleApplication): else: rows = [] for video in iterator: - rows.append(('ID', video.id)) - rows.append(('Title', video.title)) + rows.append((video.id, video.title, video.page_url)) results[backend.name] = rows return results diff --git a/weboob/tools/application/console.py b/weboob/tools/application/console.py index 952b80a2..abc5bc06 100644 --- a/weboob/tools/application/console.py +++ b/weboob/tools/application/console.py @@ -18,10 +18,12 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. """ +import logging import sys, tty, termios import re from inspect import getargspec from functools import partial + from weboob.modules import BackendsConfig from .base import BaseApplication @@ -79,11 +81,8 @@ class TextFormatter(object): if header[i]: formatted_cols.append(u'%s: %s' % (header[i], col)) else: - formatted_cols.append(col) - if len(formatted_cols) == 2: - formatted += u'%s: %s\n' % (formatted_cols[0], formatted_cols[1]) - else: - formatted += u'%s\n' % u' '.join(formatted_cols) + formatted_cols.append(unicode(col)) + formatted += u'%s\n' % u' '.join(formatted_cols) return unicode(formatted).strip() @@ -172,7 +171,10 @@ class ConsoleApplication(BaseApplication): if defaults: nb_min_args -= len(defaults) - if len(args) < nb_min_args or len(args) > nb_max_args and not varargs: + if len(args) > nb_max_args and not varargs: + sys.stderr.write("Command '%s' takes at most %d arguments.\n" % (command, nb_max_args)) + return 1 + if len(args) < nb_min_args: if varargs or defaults: sys.stderr.write("Command '%s' takes at least %d arguments.\n" % (command, nb_min_args)) else: @@ -190,7 +192,10 @@ class ConsoleApplication(BaseApplication): output_format = self.default_output_format else: output_format = 'table' - print formatters[output_format].format(command_result) + try: + print formatters[output_format].format(command_result) + except ImportError, e: + logging.error(u'Could not use formatter "%s". Error: %s' % (output_format, e.message)) return 0 elif isinstance(command_result, int): return command_result