Merge branch 'master' of ssh+git://git.symlink.me/var/git/pub/cbenz/weboob

Conflicts:
	weboob/backends/youtube/backend.py
	weboob/backends/youtube/browser.py
	weboob/backends/youtube/pages/results.py
	weboob/backends/youtube/pages/video.py
This commit is contained in:
Romain Bignon 2010-04-27 09:18:16 +02:00
commit 66fa086574
13 changed files with 65 additions and 130 deletions

View file

@ -4,6 +4,14 @@ Weboob installation
Like any setuptools package, Weboob can be installed in normal mode, Like any setuptools package, Weboob can be installed in normal mode,
or in development mode. or in development mode.
Debian note
-----------
When using Debian, it is advised to use Python Debian packages, and not the PyPI ones.
To achieve this, please install the following packages before installing Weboob:
* python-gdata
normal mode normal mode
----------- -----------

View file

@ -39,6 +39,6 @@ setup(
packages=find_packages(exclude=['ez_setup']), packages=find_packages(exclude=['ez_setup']),
scripts=[os.path.join('scripts', script) for script in os.listdir('scripts')], scripts=[os.path.join('scripts', script) for script in os.listdir('scripts')],
install_requires=[ install_requires=[
# 'pyyaml', 'gdata',
] ]
) )

View file

@ -57,5 +57,7 @@ class YoujizzBackend(BaseBackend, ICapVideoProvider):
def iter_page_urls(self, mozaic_url): def iter_page_urls(self, mozaic_url):
return self.browser.iter_page_urls(mozaic_url) return self.browser.iter_page_urls(mozaic_url)
def iter_search_results(self, pattern=None, sortby=None): def iter_search_results(self, pattern=None, sortby=ICapVideoProvider.SEARCH_RELEVANCE, nsfw=False):
if not nsfw:
return iter(set())
return self.browser.iter_search_results(pattern) return self.browser.iter_search_results(pattern)

View file

@ -57,7 +57,6 @@ class IndexPage(BasePage):
yield Video(_id, yield Video(_id,
title=title, title=title,
page_url=self.browser.id2url(_id),
duration=duration, duration=duration,
preview_url=preview_url, preview_url=preview_url,
nsfw=True) nsfw=True)

View file

@ -54,7 +54,9 @@ class YoupornBackend(BaseBackend, ICapVideoProvider):
return self.browser.get_video(_id) return self.browser.get_video(_id)
SORTBY = ['relevance', 'rating', 'views', 'time'] SORTBY = ['relevance', 'rating', 'views', 'time']
def iter_search_results(self, pattern=None, sortby=ICapVideoProvider.SEARCH_RELEVANCE): def iter_search_results(self, pattern=None, sortby=ICapVideoProvider.SEARCH_RELEVANCE, nsfw=False):
if not nsfw:
return iter(set())
return self.browser.iter_search_results(pattern, self.SORTBY[sortby]) return self.browser.iter_search_results(pattern, self.SORTBY[sortby])
@need_url @need_url

View file

@ -67,7 +67,6 @@ class IndexPage(PornPage):
yield Video(int(_id), yield Video(int(_id),
title=title, title=title,
page_url=self.browser.id2url(_id),
rating=rating, rating=rating,
rating_max=rating_max, rating_max=rating_max,
duration=duration, duration=duration,

View file

@ -18,10 +18,11 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
""" """
import logging
import re import re
from weboob.backend import BaseBackend from weboob.backend import BaseBackend
from weboob.capabilities.video import ICapVideoProvider from weboob.capabilities.video import ICapVideoProvider, Video
from .browser import YoutubeBrowser from .browser import YoutubeBrowser
@ -43,22 +44,32 @@ class YoutubeBackend(BaseBackend, ICapVideoProvider):
return self._browser return self._browser
raise AttributeError, name raise AttributeError, name
def need_url(func):
def inner(self, *args, **kwargs):
url = args[0]
if (u'youtube.com' not in url) and not re.match('^[\w-]+$', url):
return None
return func(self, *args, **kwargs)
return inner
@need_url
def get_video(self, _id): def get_video(self, _id):
return self.browser.get_video(_id) return self.browser.get_video(_id)
SORTBY = ['', 'video_avg_rating', 'video_view_count', 'video_date_uploaded'] def iter_search_results(self, pattern=None, sortby=ICapVideoProvider.SEARCH_RELEVANCE, nsfw=False):
def iter_search_results(self, pattern=None, sortby=ICapVideoProvider.SEARCH_RELEVANCE): try:
return self.browser.iter_search_results(pattern, self.SORTBY[sortby]) import gdata.youtube.service
except ImportError:
logging.warning('Youtube backend search feature requires python-gdata package.')
return
yt_service = gdata.youtube.service.YouTubeService()
query = gdata.youtube.service.YouTubeVideoQuery()
query.orderby = ('relevance', 'rating', 'viewCount', 'published')[sortby]
query.racy = 'include' if nsfw else 'exclude'
if pattern:
query.categories.extend('/%s' % search_term.lower().encode('utf-8') for search_term in pattern.split())
feed = yt_service.YouTubeQuery(query)
for entry in feed.entry:
if entry.media.name:
author = entry.media.name.text.decode('utf-8').strip()
else:
author = None
yield Video(entry.id.text.split('/')[-1].decode('utf-8'),
title=entry.media.title.text.decode('utf-8').strip(),
author=author,
duration=int(entry.media.duration.seconds.decode('utf-8').strip()),
preview_url=entry.media.thumbnail[0].url.decode('utf-8').strip())
@need_url
def iter_page_urls(self, mozaic_url): def iter_page_urls(self, mozaic_url):
raise NotImplementedError() raise NotImplementedError()

View file

@ -19,35 +19,20 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
""" """
import urllib import urllib
import re
from weboob.tools.browser import BaseBrowser from weboob.tools.browser import BaseBrowser
from .pages import VideoPage, ResultsPage from .pages import VideoPage
__all__ = ['YoutubeBrowser'] __all__ = ['YoutubeBrowser']
class YoutubeBrowser(BaseBrowser): class YoutubeBrowser(BaseBrowser):
PAGES = {'.*youtube\.com/watch\?v=(.+)': VideoPage, PAGES = {'.*youtube\.com/watch\?v=(.+)': VideoPage,
'.*youtube\.com/results\?.*': ResultsPage,
} }
def iter_search_results(self, pattern, sortby): def id2url(self, _id):
if not pattern: return _id if 'youtube.com' in _id else 'http://www.youtube.com/watch?v=%s' % _id
self.home()
else:
if sortby:
sortby = '&search_sort=%s' % sortby
self.location('http://www.youtube.com/results?search_type=videos&search_query=%s%s' % (urllib.quote_plus(pattern), sortby))
assert self.is_on_page(ResultsPage)
return self.page.iter_videos()
def get_video(self, _id): def get_video(self, _id):
if re.match('^[\w-]+$', _id): self.location(self.id2url(_id))
url = 'http://www.youtube.com/watch?v=%s' % _id
else:
url = _id
self.location(url)
return self.page.video return self.page.video

View file

@ -1,66 +0,0 @@
# -*- coding: utf-8 -*-
"""
Copyright(C) 2010 Romain Bignon
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, version 3 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
"""
import re
from weboob.tools.browser import BasePage
from weboob.capabilities.video import Video
class ResultsPage(BasePage):
WATCH_RE = re.compile('/watch\?v=([\w-]+)')
def iter_videos(self):
for div in self.document.getroot().cssselect("div[class^=video-entry]"):
a = div.find('a')
if a is None:
print 'wtf'
continue
_id = ''
m = self.WATCH_RE.match(a.attrib['href'])
if m:
_id = m.group(1)
title = a.find('span').find('img').attrib['alt']
preview_url = a.find('span').find('img').attrib['src']
if preview_url.endswith('.gif'):
preview_url = a.find('span').find('img').attrib['thumb']
vtime = a.find('span').find('span')
duration = 0
if not vtime is None:
vtime = vtime.find('span').text.split(':')
if len(vtime) > 0:
duration += int(vtime[-1])
if len(vtime) > 1:
duration += 60 * int(vtime[-2])
if len(vtime) > 3:
duration += 3600 * int(vtime[-3])
if len(vtime) > 4:
print 'WTF'
author = ''
author_div = div.cssselect('span[class=video-username]')
if author_div:
author = author_div[0].find('a').text.strip()
yield Video(_id,
title,
author=author,
duration=duration,
preview_url=preview_url)

View file

@ -25,7 +25,7 @@ from weboob.tools.browser import BasePage
from weboob.capabilities.video import Video from weboob.capabilities.video import Video
class VideoPage(BasePage): class VideoPage(BasePage):
URL_REGEX = re.compile(r"https?://[w\.]*youtube.com/watch\?v=([\w-]+)") URL_REGEX = re.compile(r"https?://[w\.]*youtube.com/watch\?v=(.+)")
VIDEO_SIGNATURE_REGEX = re.compile(r'&t=([^ ,&]*)') VIDEO_SIGNATURE_REGEX = re.compile(r'&t=([^ ,&]*)')
def on_loaded(self): def on_loaded(self):

View file

@ -25,20 +25,23 @@ __all__ = ['ICapVideoProvider', 'Video']
class Video(object): class Video(object):
def __init__(self, _id, title=u'', url=u'', page_url=u'', author=u'', duration=0, date=None, def __init__(self, _id, title=None, url=None, author=None, duration=0, date=None,
rating=0, rating_max=0, preview_url=None, nsfw=False): rating=0.0, rating_max=0.0, preview_url=None, nsfw=False):
self.id = _id self.id = _id
self.title = title self.title = title
self.url = url self.url = url
self.page_url = page_url
self.author = author self.author = author
self.duration = duration self.duration = int(duration)
self.date = date self.date = date
self.rating = rating self.rating = float(rating)
self.rating_max = rating_max self.rating_max = float(rating_max)
self.preview_url = preview_url self.preview_url = preview_url
self.nsfw = nsfw self.nsfw = nsfw
@property
def formatted_duration(self):
return '%d:%02d:%02d' % (self.duration / 3600, (self.duration % 3600 / 60), self.duration % 60)
class ICapVideoProvider(ICap): class ICapVideoProvider(ICap):
def iter_page_urls(self, mozaic_url): def iter_page_urls(self, mozaic_url):
raise NotImplementedError() raise NotImplementedError()
@ -48,13 +51,14 @@ class ICapVideoProvider(ICap):
SEARCH_VIEWS, SEARCH_VIEWS,
SEARCH_DATE) = range(4) SEARCH_DATE) = range(4)
def iter_search_results(self, pattern=None, sortby=SEARCH_RELEVANCE): def iter_search_results(self, pattern=None, sortby=SEARCH_RELEVANCE, nsfw=False):
""" """
Iter results of a search on a pattern. Note that if pattern is None, Iter results of a search on a pattern. Note that if pattern is None,
it get the latest videos. it get the latest videos.
@param pattern [str] pattern to search on @param pattern [str] pattern to search on
@param sortby [enum] sort by... @param sortby [enum] sort by...
@param pattern [bool] include non-suitable for work videos if True
""" """
raise NotImplementedError() raise NotImplementedError()

View file

@ -27,6 +27,10 @@ class Videoob(ConsoleApplication):
COPYRIGHT = 'Copyright(C) 2010 Christophe Benz, Romain Bignon' COPYRIGHT = 'Copyright(C) 2010 Christophe Benz, Romain Bignon'
CONFIG = {} CONFIG = {}
def __init__(self):
ConsoleApplication.__init__(self)
self._parser.add_option('--nsfw', action='store_true', help='enable non-suitable for work videos')
def main(self, argv): def main(self, argv):
self.load_modules(ICapVideoProvider) self.load_modules(ICapVideoProvider)
return self.process_command(*argv[1:]) return self.process_command(*argv[1:])
@ -67,21 +71,6 @@ class Videoob(ConsoleApplication):
results['BEFORE'] = u'Last videos' results['BEFORE'] = u'Last videos'
results['HEADER'] = ('ID', 'Title', 'Duration') results['HEADER'] = ('ID', 'Title', 'Duration')
for backend in self.weboob.iter_backends(): for backend in self.weboob.iter_backends():
try: results[backend.name] = [(video.id, video.title, video.formatted_duration) for video in
iterator = backend.iter_search_results(pattern) backend.iter_search_results(pattern=pattern, nsfw=self.options.nsfw)]
except NotImplementedError:
continue
else:
rows = []
for video in iterator:
rows.append((video.id, video.title, '%d:%02d:%02d' % (video.duration/3600, (video.duration%3600/60), video.duration%60)))
results[backend.name] = rows
return results return results
@ConsoleApplication.command('Get video file URL from page URL')
def command_file_url(self, url):
for backend in self.weboob.iter_backends():
video = backend.get_video(url)
if video:
print video.url
break

View file

@ -132,7 +132,9 @@ class BaseApplication(object):
return set() return set()
@classmethod @classmethod
def run(klass, args=sys.argv): def run(klass, args=None):
if args is None:
args = [arg.decode(sys.stdin.encoding) for arg in sys.argv]
app = klass() app = klass()
app.options, args = app._parser.parse_args(args) app.options, args = app._parser.parse_args(args)