From ce83fb011f8bf1d653d6f481f3238a66b0c68b49 Mon Sep 17 00:00:00 2001 From: Christophe Benz Date: Mon, 26 Apr 2010 19:19:55 +0200 Subject: [PATCH 1/9] bugfix #264: use gdata API for youtube --- setup.py | 2 +- weboob/backends/youtube/backend.py | 32 +++++++----- weboob/backends/youtube/browser.py | 23 ++------- weboob/backends/youtube/pages/results.py | 66 ------------------------ weboob/backends/youtube/pages/video.py | 2 +- 5 files changed, 25 insertions(+), 100 deletions(-) delete mode 100644 weboob/backends/youtube/pages/results.py diff --git a/setup.py b/setup.py index 70957bbc..23face2c 100755 --- a/setup.py +++ b/setup.py @@ -39,6 +39,6 @@ setup( packages=find_packages(exclude=['ez_setup']), scripts=[os.path.join('scripts', script) for script in os.listdir('scripts')], install_requires=[ - # 'pyyaml', + 'gdata', ] ) diff --git a/weboob/backends/youtube/backend.py b/weboob/backends/youtube/backend.py index c96d3246..5c89dd65 100644 --- a/weboob/backends/youtube/backend.py +++ b/weboob/backends/youtube/backend.py @@ -21,7 +21,7 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. import re from weboob.backend import BaseBackend -from weboob.capabilities.video import ICapVideoProvider +from weboob.capabilities.video import ICapVideoProvider, Video from .browser import YoutubeBrowser @@ -43,22 +43,28 @@ class YoutubeBackend(BaseBackend, ICapVideoProvider): return self._browser raise AttributeError, name - def need_url(func): - def inner(self, *args, **kwargs): - url = args[0] - if (u'youtube.com' not in url) and not re.match('^\w+$', url): - return None - return func(self, *args, **kwargs) - return inner - - @need_url def get_video(self, _id): return self.browser.get_video(_id) - SORTBY = ['', 'video_avg_rating', 'video_view_count', 'video_date_uploaded'] def iter_search_results(self, pattern=None, sortby=ICapVideoProvider.SEARCH_RELEVANCE): - return self.browser.iter_search_results(pattern, self.SORTBY[sortby]) + import gdata.youtube.service + yt_service = gdata.youtube.service.YouTubeService() + query = gdata.youtube.service.YouTubeVideoQuery() + query.orderby = ('relevance', 'rating', 'viewCount', 'published')[sortby] + query.racy = 'include' + if pattern: + query.categories.extend('/%s' % search_term.lower().encode('utf-8') for search_term in pattern.split()) + feed = yt_service.YouTubeQuery(query) + for entry in feed.entry: + if entry.media.name: + author = entry.media.name.text.decode('utf-8').strip() + else: + author = None + yield Video(entry.id.text.split('/')[-1].decode('utf-8'), + title=entry.media.title.text.decode('utf-8').strip(), + author=author, + duration=int(entry.media.duration.seconds.decode('utf-8').strip()), + preview_url=entry.media.thumbnail[0].url.decode('utf-8').strip()) - @need_url def iter_page_urls(self, mozaic_url): raise NotImplementedError() diff --git a/weboob/backends/youtube/browser.py b/weboob/backends/youtube/browser.py index bf038b5e..fb6afec7 100644 --- a/weboob/backends/youtube/browser.py +++ b/weboob/backends/youtube/browser.py @@ -19,35 +19,20 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. """ import urllib -import re from weboob.tools.browser import BaseBrowser -from .pages import VideoPage, ResultsPage +from .pages import VideoPage __all__ = ['YoutubeBrowser'] class YoutubeBrowser(BaseBrowser): PAGES = {'.*youtube\.com/watch\?v=(.+)': VideoPage, - '.*youtube\.com/results\?.*': ResultsPage, } - def iter_search_results(self, pattern, sortby): - if not pattern: - self.home() - else: - if sortby: - sortby = '&search_sort=%s' % sortby - self.location('http://www.youtube.com/results?search_type=videos&search_query=%s%s' % (urllib.quote_plus(pattern), sortby)) - - assert self.is_on_page(ResultsPage) - return self.page.iter_videos() + def id2url(self, _id): + return _id if 'youtube.com' in _id else 'http://www.youtube.com/watch?v=%s' % _id def get_video(self, _id): - if re.match('^\w+$', _id): - url = 'http://www.youtube.com/watch?v=%s' % _id - else: - url = _id - - self.location(url) + self.location(self.id2url(_id)) return self.page.video diff --git a/weboob/backends/youtube/pages/results.py b/weboob/backends/youtube/pages/results.py deleted file mode 100644 index 63fc0cca..00000000 --- a/weboob/backends/youtube/pages/results.py +++ /dev/null @@ -1,66 +0,0 @@ -# -*- coding: utf-8 -*- - -""" -Copyright(C) 2010 Romain Bignon - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, version 3 of the License. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - -""" - -import re - -from weboob.tools.browser import BasePage -from weboob.capabilities.video import Video - -class ResultsPage(BasePage): - WATCH_RE = re.compile('/watch\?v=(\w+)') - def iter_videos(self): - for div in self.document.getroot().cssselect("div[class^=video-entry]"): - a = div.find('a') - if a is None: - print 'wtf' - continue - - _id = '' - m = self.WATCH_RE.match(a.attrib['href']) - if m: - _id = m.group(1) - - title = a.find('span').find('img').attrib['alt'] - preview_url = a.find('span').find('img').attrib['src'] - if preview_url.endswith('.gif'): - preview_url = a.find('span').find('img').attrib['thumb'] - - vtime = a.find('span').find('span') - duration = 0 - if not vtime is None: - vtime = vtime.find('span').text.split(':') - if len(vtime) > 0: - duration += int(vtime[-1]) - if len(vtime) > 1: - duration += 60 * int(vtime[-2]) - if len(vtime) > 3: - duration += 3600 * int(vtime[-3]) - if len(vtime) > 4: - print 'WTF' - - author = '' - author_div = div.cssselect('span[class=video-username]') - if author_div: - author = author_div[0].find('a').text.strip() - yield Video(_id, - title, - author=author, - duration=duration, - preview_url=preview_url) diff --git a/weboob/backends/youtube/pages/video.py b/weboob/backends/youtube/pages/video.py index 29a16654..7cf97cdd 100644 --- a/weboob/backends/youtube/pages/video.py +++ b/weboob/backends/youtube/pages/video.py @@ -25,7 +25,7 @@ from weboob.tools.browser import BasePage from weboob.capabilities.video import Video class VideoPage(BasePage): - URL_REGEX = re.compile(r"https?://[w\.]*youtube.com/watch\?v=(\w+)") + URL_REGEX = re.compile(r"https?://[w\.]*youtube.com/watch\?v=(.+)") VIDEO_SIGNATURE_REGEX = re.compile(r'&t=([^ ,&]*)') def on_loaded(self): From eaff02715f647d80992cca919e64fdbb17ebe553 Mon Sep 17 00:00:00 2001 From: Christophe Benz Date: Mon, 26 Apr 2010 19:20:03 +0200 Subject: [PATCH 2/9] Add Debian note to INSTALL --- INSTALL | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/INSTALL b/INSTALL index 6ef5e043..668f88e2 100644 --- a/INSTALL +++ b/INSTALL @@ -4,6 +4,14 @@ Weboob installation Like any setuptools package, Weboob can be installed in normal mode, or in development mode. +Debian note +----------- + +When using Debian, it is advised to use Python Debian packages, and not the PyPI ones. + +To achieve this, please install the following packages before installing Weboob: +* python-gdata + normal mode ----------- From 9d7d8692ba810ed09f0fd4711c9c2fd6701ba456 Mon Sep 17 00:00:00 2001 From: Christophe Benz Date: Mon, 26 Apr 2010 19:21:21 +0200 Subject: [PATCH 3/9] add nsfw parameter for video search --- weboob/backends/youjizz/backend.py | 4 +++- weboob/backends/youporn/backend.py | 4 +++- weboob/backends/youtube/backend.py | 4 ++-- weboob/capabilities/video.py | 3 ++- 4 files changed, 10 insertions(+), 5 deletions(-) diff --git a/weboob/backends/youjizz/backend.py b/weboob/backends/youjizz/backend.py index 603aeebe..b55fd89f 100644 --- a/weboob/backends/youjizz/backend.py +++ b/weboob/backends/youjizz/backend.py @@ -57,5 +57,7 @@ class YoujizzBackend(BaseBackend, ICapVideoProvider): def iter_page_urls(self, mozaic_url): return self.browser.iter_page_urls(mozaic_url) - def iter_search_results(self, pattern=None, sortby=None): + def iter_search_results(self, pattern=None, sortby=ICapVideoProvider.SEARCH_RELEVANCE, nsfw=False): + if not nsfw: + return iter(set()) return self.browser.iter_search_results(pattern) diff --git a/weboob/backends/youporn/backend.py b/weboob/backends/youporn/backend.py index 1f2029ae..fa50c909 100644 --- a/weboob/backends/youporn/backend.py +++ b/weboob/backends/youporn/backend.py @@ -54,7 +54,9 @@ class YoupornBackend(BaseBackend, ICapVideoProvider): return self.browser.get_video(_id) SORTBY = ['relevance', 'rating', 'views', 'time'] - def iter_search_results(self, pattern=None, sortby=ICapVideoProvider.SEARCH_RELEVANCE): + def iter_search_results(self, pattern=None, sortby=ICapVideoProvider.SEARCH_RELEVANCE, nsfw=False): + if not nsfw: + return iter(set()) return self.browser.iter_search_results(pattern, self.SORTBY[sortby]) @need_url diff --git a/weboob/backends/youtube/backend.py b/weboob/backends/youtube/backend.py index 5c89dd65..ce8145e3 100644 --- a/weboob/backends/youtube/backend.py +++ b/weboob/backends/youtube/backend.py @@ -46,12 +46,12 @@ class YoutubeBackend(BaseBackend, ICapVideoProvider): def get_video(self, _id): return self.browser.get_video(_id) - def iter_search_results(self, pattern=None, sortby=ICapVideoProvider.SEARCH_RELEVANCE): + def iter_search_results(self, pattern=None, sortby=ICapVideoProvider.SEARCH_RELEVANCE, nsfw=False): import gdata.youtube.service yt_service = gdata.youtube.service.YouTubeService() query = gdata.youtube.service.YouTubeVideoQuery() query.orderby = ('relevance', 'rating', 'viewCount', 'published')[sortby] - query.racy = 'include' + query.racy = 'include' if nsfw else 'exclude' if pattern: query.categories.extend('/%s' % search_term.lower().encode('utf-8') for search_term in pattern.split()) feed = yt_service.YouTubeQuery(query) diff --git a/weboob/capabilities/video.py b/weboob/capabilities/video.py index 2383b545..dff5f963 100644 --- a/weboob/capabilities/video.py +++ b/weboob/capabilities/video.py @@ -48,13 +48,14 @@ class ICapVideoProvider(ICap): SEARCH_VIEWS, SEARCH_DATE) = range(4) - def iter_search_results(self, pattern=None, sortby=SEARCH_RELEVANCE): + def iter_search_results(self, pattern=None, sortby=SEARCH_RELEVANCE, nsfw=False): """ Iter results of a search on a pattern. Note that if pattern is None, it get the latest videos. @param pattern [str] pattern to search on @param sortby [enum] sort by... + @param pattern [bool] include non-suitable for work videos if True """ raise NotImplementedError() From 9fe80530529d4d358aaa93782a5243f3cfaa61f0 Mon Sep 17 00:00:00 2001 From: Christophe Benz Date: Mon, 26 Apr 2010 19:21:58 +0200 Subject: [PATCH 4/9] simplify search command code --- weboob/capabilities/video.py | 3 +++ weboob/frontends/videoob/application.py | 11 ++--------- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/weboob/capabilities/video.py b/weboob/capabilities/video.py index dff5f963..dbdb59ea 100644 --- a/weboob/capabilities/video.py +++ b/weboob/capabilities/video.py @@ -39,6 +39,9 @@ class Video(object): self.preview_url = preview_url self.nsfw = nsfw + @property + def formatted_duration(self): + return '%d:%02d:%02d' % (self.duration / 3600, (self.duration % 3600 / 60), self.duration % 60) class ICapVideoProvider(ICap): def iter_page_urls(self, mozaic_url): raise NotImplementedError() diff --git a/weboob/frontends/videoob/application.py b/weboob/frontends/videoob/application.py index a0859061..367b7d8b 100644 --- a/weboob/frontends/videoob/application.py +++ b/weboob/frontends/videoob/application.py @@ -67,15 +67,8 @@ class Videoob(ConsoleApplication): results['BEFORE'] = u'Last videos' results['HEADER'] = ('ID', 'Title', 'Duration') for backend in self.weboob.iter_backends(): - try: - iterator = backend.iter_search_results(pattern) - except NotImplementedError: - continue - else: - rows = [] - for video in iterator: - rows.append((video.id, video.title, '%d:%02d:%02d' % (video.duration/3600, (video.duration%3600/60), video.duration%60))) - results[backend.name] = rows + results[backend.name] = [(video.id, video.title, video.formatted_duration) for video in + backend.iter_search_results(pattern=pattern)] return results @ConsoleApplication.command('Get video file URL from page URL') From bfc22586692c8abd5b76282a01bd61a48deac30b Mon Sep 17 00:00:00 2001 From: Christophe Benz Date: Mon, 26 Apr 2010 19:22:24 +0200 Subject: [PATCH 5/9] decode sys.argv --- weboob/tools/application/base.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/weboob/tools/application/base.py b/weboob/tools/application/base.py index 843138bc..0b8f67f1 100644 --- a/weboob/tools/application/base.py +++ b/weboob/tools/application/base.py @@ -132,7 +132,9 @@ class BaseApplication(object): return set() @classmethod - def run(klass, args=sys.argv): + def run(klass, args=None): + if args is None: + args = [arg.decode(sys.stdin.encoding) for arg in sys.argv] app = klass() app.options, args = app._parser.parse_args(args) From 7996c1e05a8993aeee9b115e0ba92245d7dd08be Mon Sep 17 00:00:00 2001 From: Christophe Benz Date: Mon, 26 Apr 2010 19:22:45 +0200 Subject: [PATCH 6/9] remove page_url attribue, better check types for numbers --- weboob/backends/youjizz/pages/index.py | 1 - weboob/backends/youporn/pages/index.py | 1 - weboob/capabilities/video.py | 12 ++++++------ 3 files changed, 6 insertions(+), 8 deletions(-) diff --git a/weboob/backends/youjizz/pages/index.py b/weboob/backends/youjizz/pages/index.py index 4842318c..1f2aa018 100644 --- a/weboob/backends/youjizz/pages/index.py +++ b/weboob/backends/youjizz/pages/index.py @@ -57,7 +57,6 @@ class IndexPage(BasePage): yield Video(_id, title=title, - page_url=self.browser.id2url(_id), duration=duration, preview_url=preview_url, nsfw=True) diff --git a/weboob/backends/youporn/pages/index.py b/weboob/backends/youporn/pages/index.py index 011ca3bb..d8369d3d 100644 --- a/weboob/backends/youporn/pages/index.py +++ b/weboob/backends/youporn/pages/index.py @@ -67,7 +67,6 @@ class IndexPage(PornPage): yield Video(int(_id), title=title, - page_url=self.browser.id2url(_id), rating=rating, rating_max=rating_max, duration=duration, diff --git a/weboob/capabilities/video.py b/weboob/capabilities/video.py index dbdb59ea..44e7dd74 100644 --- a/weboob/capabilities/video.py +++ b/weboob/capabilities/video.py @@ -25,23 +25,23 @@ __all__ = ['ICapVideoProvider', 'Video'] class Video(object): - def __init__(self, _id, title=u'', url=u'', page_url=u'', author=u'', duration=0, date=None, - rating=0, rating_max=0, preview_url=None, nsfw=False): + def __init__(self, _id, title=None, url=None, author=None, duration=0, date=None, + rating=0.0, rating_max=0.0, preview_url=None, nsfw=False): self.id = _id self.title = title self.url = url - self.page_url = page_url self.author = author - self.duration = duration + self.duration = int(duration) self.date = date - self.rating = rating - self.rating_max = rating_max + self.rating = float(rating) + self.rating_max = float(rating_max) self.preview_url = preview_url self.nsfw = nsfw @property def formatted_duration(self): return '%d:%02d:%02d' % (self.duration / 3600, (self.duration % 3600 / 60), self.duration % 60) + class ICapVideoProvider(ICap): def iter_page_urls(self, mozaic_url): raise NotImplementedError() From 9d229be8b139b288cd4e87af2d76736bbb0760e4 Mon Sep 17 00:00:00 2001 From: Christophe Benz Date: Mon, 26 Apr 2010 19:46:15 +0200 Subject: [PATCH 7/9] display friendly message if dependency is missing --- weboob/backends/youtube/backend.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/weboob/backends/youtube/backend.py b/weboob/backends/youtube/backend.py index ce8145e3..9df3af98 100644 --- a/weboob/backends/youtube/backend.py +++ b/weboob/backends/youtube/backend.py @@ -18,6 +18,7 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. """ +import logging import re from weboob.backend import BaseBackend @@ -47,7 +48,11 @@ class YoutubeBackend(BaseBackend, ICapVideoProvider): return self.browser.get_video(_id) def iter_search_results(self, pattern=None, sortby=ICapVideoProvider.SEARCH_RELEVANCE, nsfw=False): - import gdata.youtube.service + try: + import gdata.youtube.service + except ImportError: + logging.warning('Youtube backend search feature requires python-gdata package.') + return yt_service = gdata.youtube.service.YouTubeService() query = gdata.youtube.service.YouTubeVideoQuery() query.orderby = ('relevance', 'rating', 'viewCount', 'published')[sortby] From 04820fd782201030ec44415791c5d9214b060857 Mon Sep 17 00:00:00 2001 From: Christophe Benz Date: Mon, 26 Apr 2010 19:46:32 +0200 Subject: [PATCH 8/9] removed unused command --- weboob/frontends/videoob/application.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/weboob/frontends/videoob/application.py b/weboob/frontends/videoob/application.py index 367b7d8b..7f3ac13f 100644 --- a/weboob/frontends/videoob/application.py +++ b/weboob/frontends/videoob/application.py @@ -70,11 +70,3 @@ class Videoob(ConsoleApplication): results[backend.name] = [(video.id, video.title, video.formatted_duration) for video in backend.iter_search_results(pattern=pattern)] return results - - @ConsoleApplication.command('Get video file URL from page URL') - def command_file_url(self, url): - for backend in self.weboob.iter_backends(): - video = backend.get_video(url) - if video: - print video.url - break From f7a46ad70d260dbaa76d94cef6ce4f427db20ae0 Mon Sep 17 00:00:00 2001 From: Christophe Benz Date: Mon, 26 Apr 2010 19:47:01 +0200 Subject: [PATCH 9/9] add --nsfw option to videoob --- weboob/frontends/videoob/application.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/weboob/frontends/videoob/application.py b/weboob/frontends/videoob/application.py index 7f3ac13f..798c9ae9 100644 --- a/weboob/frontends/videoob/application.py +++ b/weboob/frontends/videoob/application.py @@ -27,6 +27,10 @@ class Videoob(ConsoleApplication): COPYRIGHT = 'Copyright(C) 2010 Christophe Benz, Romain Bignon' CONFIG = {} + def __init__(self): + ConsoleApplication.__init__(self) + self._parser.add_option('--nsfw', action='store_true', help='enable non-suitable for work videos') + def main(self, argv): self.load_modules(ICapVideoProvider) return self.process_command(*argv[1:]) @@ -68,5 +72,5 @@ class Videoob(ConsoleApplication): results['HEADER'] = ('ID', 'Title', 'Duration') for backend in self.weboob.iter_backends(): results[backend.name] = [(video.id, video.title, video.formatted_duration) for video in - backend.iter_search_results(pattern=pattern)] + backend.iter_search_results(pattern=pattern, nsfw=self.options.nsfw)] return results