Merge branch 'master' of ssh+git://git.symlink.me/var/git/pub/cbenz/weboob

Conflicts:
	weboob/tools/application/console.py
This commit is contained in:
Romain Bignon 2010-04-21 00:23:11 +02:00
commit 71e104595a
9 changed files with 128 additions and 27 deletions

View file

@ -32,22 +32,30 @@ class YoujizzBackend(BaseBackend, ICapVideoProvider):
LICENSE = 'GPLv3' LICENSE = 'GPLv3'
CONFIG = {} CONFIG = {}
browser = None _browser = None
def need_browser(func): def __getattr__(self, name):
if name == 'browser':
if not self._browser:
self._browser = YoujizzBrowser()
return self._browser
raise AttributeError, name
def check_url(func):
def inner(self, *args, **kwargs): def inner(self, *args, **kwargs):
if not self.browser:
self.browser = YoujizzBrowser()
url = args[0] url = args[0]
if u'youjizz.com' not in url: if isinstance(url, (str,unicode)) and not url.isdigit() and u'youjizz.com' not in url:
return None return None
return func(self, *args, **kwargs) return func(self, *args, **kwargs)
return inner return inner
@need_browser @check_url
def get_video(self, _id): def get_video(self, _id):
return self.browser.get_video(_id) return self.browser.get_video(_id)
@need_browser @check_url
def iter_page_urls(self, mozaic_url): def iter_page_urls(self, mozaic_url):
return self.browser.iter_page_urls(mozaic_url) return self.browser.iter_page_urls(mozaic_url)
def iter_search_results(self, pattern=None, sortby=None):
return self.browser.iter_search_results(pattern)

View file

@ -18,11 +18,11 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
""" """
from logging import error import urllib
import re
from weboob.tools.browser import BaseBrowser from weboob.tools.browser import BaseBrowser
from .pages.index import IndexPage
from .pages.video import VideoPage from .pages.video import VideoPage
@ -30,7 +30,15 @@ __all__ = ['YoujizzBrowser']
class YoujizzBrowser(BaseBrowser): class YoujizzBrowser(BaseBrowser):
PAGES = {r'http://.*youjizz\.com/videos/.+\.html': VideoPage} DOMAIN = 'youjizz.com'
PROTOCOL = 'http'
PAGES = {r'http://.*youjizz\.com/?': IndexPage,
r'http://.*youjizz\.com/videos/.+\.html': VideoPage,
r'http://.*youjizz\.com/search/.+\.html': IndexPage,
}
def id2url(self, _id):
return 'http://www.youjizz.com/videos/%s.html' % _id
def get_video(self, url): def get_video(self, url):
self.location(url) self.location(url)
@ -38,3 +46,12 @@ class YoujizzBrowser(BaseBrowser):
def iter_page_urls(self, mozaic_url): def iter_page_urls(self, mozaic_url):
raise NotImplementedError() raise NotImplementedError()
def iter_search_results(self, pattern):
if not pattern:
self.home()
else:
self.location('/search/%s-1.html' % (urllib.quote_plus(pattern)))
assert self.is_on_page(IndexPage)
return self.page.iter_videos()

View file

@ -0,0 +1,63 @@
# -*- coding: utf-8 -*-
"""
Copyright(C) 2010 Roger Philibert
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, version 3 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
"""
import re
from weboob.capabilities.video import Video
from weboob.tools.browser import BasePage
__all__ = ['IndexPage']
class IndexPage(BasePage):
def iter_videos(self):
span_list = self.document.getroot().cssselect("span#miniatura")
if not span_list:
return
for span in span_list:
a = span.find('.//a')
if a is None:
continue
url = a.attrib['href']
_id = re.sub(r'/videos/(.+)\.html', r'\1', url)
preview_url = span.find('.//img').attrib['src']
title1 = span.cssselect('span#title1')
if title1 is None:
title = None
else:
title = title1[0].text.strip()
duration = 0
thumbtime = span.cssselect('span.thumbtime')
if thumbtime is not None:
time_span = thumbtime[0].find('span')
minutes, seconds = time_span.text.strip().split(':')
duration = 60 * int(minutes) + int(seconds)
yield Video(_id,
title=title,
page_url=self.browser.id2url(_id),
duration=duration,
preview_url=preview_url,
nsfw=True)

View file

@ -18,6 +18,7 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
""" """
from logging import error, warning
import re import re
from weboob.capabilities.video import Video from weboob.capabilities.video import Video
@ -53,6 +54,5 @@ class VideoPage(BasePage):
div = self.document.getroot().cssselect('#video_text')[0] div = self.document.getroot().cssselect('#video_text')[0]
results['title'] = unicode(div.find('h2').text).strip() results['title'] = unicode(div.find('h2').text).strip()
minutes, seconds = [int(v) for v in [e for e in div.cssselect('strong') if e.text.startswith('Runtime')][0].tail.split(':')] minutes, seconds = [int(v) for v in [e for e in div.cssselect('strong') if e.text.startswith('Runtime')][0].tail.split(':')]
print minutes, seconds
results['duration'] = minutes * 60 + seconds results['duration'] = minutes * 60 + seconds
return results return results

View file

@ -18,9 +18,14 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
""" """
from .base import PornPage
from weboob.capabilities.video import Video from weboob.capabilities.video import Video
from .base import PornPage
__all__ = ['IndexPage']
class IndexPage(PornPage): class IndexPage(PornPage):
def iter_videos(self): def iter_videos(self):
uls = self.document.getroot().cssselect("ul[class=clearfix]") uls = self.document.getroot().cssselect("ul[class=clearfix]")
@ -43,14 +48,14 @@ class IndexPage(PornPage):
url = a.attrib['href'] url = a.attrib['href']
_id = url[len('/watch/'):] _id = url[len('/watch/'):]
_id = _id[:_id.find('/')] _id = _id[:_id.find('/')]
title = a.text title = a.text.strip()
duration = 0 duration = 0
div = li.cssselect('div[class=duration_views]') div = li.cssselect('div[class=duration_views]')
if div: if div:
h2 = div[0].find('h2') h2 = div[0].find('h2')
duration = 60 * int(h2.text) duration = 60 * int(h2.text.strip())
duration += int(h2.find('span').tail) duration += int(h2.find('span').tail.strip())
rating = 0 rating = 0
rating_max = 0 rating_max = 0
@ -59,8 +64,10 @@ class IndexPage(PornPage):
p = div[0].find('p') p = div[0].find('p')
rating = float(p.text.strip()) rating = float(p.text.strip())
rating_max = float(p.find('span').text.strip()[2:]) rating_max = float(p.find('span').text.strip()[2:])
yield Video(int(_id), yield Video(int(_id),
title, title=title,
page_url=self.browser.id2url(_id),
rating=rating, rating=rating,
rating_max=rating_max, rating_max=rating_max,
duration=duration, duration=duration,

View file

@ -22,7 +22,6 @@ import urllib
import re import re
from weboob.tools.browser import BaseBrowser from weboob.tools.browser import BaseBrowser
from weboob.tools.parsers.lxmlparser import LxmlHtmlParser
from .pages import VideoPage, ResultsPage from .pages import VideoPage, ResultsPage

View file

@ -25,10 +25,12 @@ __all__ = ['ICapVideoProvider', 'Video']
class Video(object): class Video(object):
def __init__(self, _id, title=u'', url=u'', author=u'', duration=0, date=None, rating=0, rating_max=0, preview_url=None, nsfw=False): def __init__(self, _id, title=u'', url=u'', page_url=u'', author=u'', duration=0, date=None,
rating=0, rating_max=0, preview_url=None, nsfw=False):
self.id = _id self.id = _id
self.title = title self.title = title
self.url = url self.url = url
self.page_url = page_url
self.author = author self.author = author
self.duration = duration self.duration = duration
self.date = date self.date = date

View file

@ -65,6 +65,7 @@ class Videoob(ConsoleApplication):
results['BEFORE'] = u'Search pattern: %s' % pattern results['BEFORE'] = u'Search pattern: %s' % pattern
else: else:
results['BEFORE'] = u'Last videos' results['BEFORE'] = u'Last videos'
results['HEADER'] = ('ID', 'Title', 'Page URL')
for backend in self.weboob.iter_backends(): for backend in self.weboob.iter_backends():
try: try:
iterator = backend.iter_search_results(pattern) iterator = backend.iter_search_results(pattern)
@ -73,8 +74,7 @@ class Videoob(ConsoleApplication):
else: else:
rows = [] rows = []
for video in iterator: for video in iterator:
rows.append(('ID', video.id)) rows.append((video.id, video.title, video.page_url))
rows.append(('Title', video.title))
results[backend.name] = rows results[backend.name] = rows
return results return results

View file

@ -18,10 +18,12 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
""" """
import logging
import sys, tty, termios import sys, tty, termios
import re import re
from inspect import getargspec from inspect import getargspec
from functools import partial from functools import partial
from weboob.modules import BackendsConfig from weboob.modules import BackendsConfig
from .base import BaseApplication from .base import BaseApplication
@ -79,11 +81,8 @@ class TextFormatter(object):
if header[i]: if header[i]:
formatted_cols.append(u'%s: %s' % (header[i], col)) formatted_cols.append(u'%s: %s' % (header[i], col))
else: else:
formatted_cols.append(col) formatted_cols.append(unicode(col))
if len(formatted_cols) == 2: formatted += u'%s\n' % u' '.join(formatted_cols)
formatted += u'%s: %s\n' % (formatted_cols[0], formatted_cols[1])
else:
formatted += u'%s\n' % u' '.join(formatted_cols)
return unicode(formatted).strip() return unicode(formatted).strip()
@ -172,7 +171,10 @@ class ConsoleApplication(BaseApplication):
if defaults: if defaults:
nb_min_args -= len(defaults) nb_min_args -= len(defaults)
if len(args) < nb_min_args or len(args) > nb_max_args and not varargs: if len(args) > nb_max_args and not varargs:
sys.stderr.write("Command '%s' takes at most %d arguments.\n" % (command, nb_max_args))
return 1
if len(args) < nb_min_args:
if varargs or defaults: if varargs or defaults:
sys.stderr.write("Command '%s' takes at least %d arguments.\n" % (command, nb_min_args)) sys.stderr.write("Command '%s' takes at least %d arguments.\n" % (command, nb_min_args))
else: else:
@ -190,7 +192,10 @@ class ConsoleApplication(BaseApplication):
output_format = self.default_output_format output_format = self.default_output_format
else: else:
output_format = 'table' output_format = 'table'
print formatters[output_format].format(command_result) try:
print formatters[output_format].format(command_result)
except ImportError, e:
logging.error(u'Could not use formatter "%s". Error: %s' % (output_format, e.message))
return 0 return 0
elif isinstance(command_result, int): elif isinstance(command_result, int):
return command_result return command_result