diff --git a/weboob/backends/youtube/backend.py b/weboob/backends/youtube/backend.py index 3156e68e..e938d95d 100644 --- a/weboob/backends/youtube/backend.py +++ b/weboob/backends/youtube/backend.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- """ -Copyright(C) 2010 Christophe Benz +Copyright(C) 2010 Christophe Benz, Romain Bignon This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -32,26 +32,39 @@ class YoutubeBackend(BaseBackend, ICapVideoProvider): LICENSE = 'GPLv3' CONFIG = {} - browser = None + _browser = None - def need_browser(func): + def __getattr__(self, name): + if name == 'browser': + if not self._browser: + self._browser = YoutubeBrowser() + return self._browser + raise AttributeError, name + + def need_url(func): def inner(self, *args, **kwargs): - if not self.browser: - self.browser = YoutubeBrowser() url = args[0] - if u'youtube.com' not in url: + if isinstance(url, (str,unicode)) and not url.isdigit() and u'youtube.com' not in url: return None return func(self, *args, **kwargs) return inner - @need_browser + @need_url + def get_video(self, _id): + return self.browser.get_video(_id) + + SORTBY = ['', 'video_avg_rating', 'video_view_count', 'video_date_uploaded'] + def iter_search_results(self, pattern=None, sortby=ICapVideoProvider.SEARCH_RELEVANCE): + return self.browser.iter_search_results(pattern, self.SORTBY[sortby]) + + @need_url def iter_page_urls(self, mozaic_url): raise NotImplementedError() - @need_browser + @need_url def get_video_title(self, page_url): return self.browser.get_video_title(page_url) - @need_browser + @need_url def get_video_url(self, page_url): return self.browser.get_video_url(page_url) diff --git a/weboob/backends/youtube/browser.py b/weboob/backends/youtube/browser.py index 2192c550..9fcefc76 100644 --- a/weboob/backends/youtube/browser.py +++ b/weboob/backends/youtube/browser.py @@ -18,23 +18,37 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. """ +import urllib import re from weboob.tools.browser import BaseBrowser from weboob.tools.parsers.lxmlparser import LxmlHtmlParser -from .pages import VideoPage +from .pages import VideoPage, ResultsPage __all__ = ['YoutubeBrowser'] class YoutubeBrowser(BaseBrowser): + PAGES = {'.*youtube\.com/watch\?v=(.+)': VideoPage, + '.*youtube\.com/results\?.*': ResultsPage, + } video_signature_regex = re.compile(r'&t=([^ ,&]*)') def __init__(self, *args, **kwargs): kwargs['parser'] = LxmlHtmlParser() - self.PAGES = {r'.*youtube\.com/watch\?v=(.+)': VideoPage} BaseBrowser.__init__(self, *args, **kwargs) + def iter_search_results(self, pattern, sortby): + if not pattern: + self.home() + else: + if sortby: + sortby = '&search_sort=%s' % sortby + self.location('http://www.youtube.com/results?search_type=videos&search_query=%s%s' % (urllib.quote_plus(pattern), sortby)) + + assert self.is_on_page(ResultsPage) + return self.page.iter_videos() + def get_video_title(self, page_url): self.location(page_url) return self.page.title diff --git a/weboob/backends/youtube/pages/__init__.py b/weboob/backends/youtube/pages/__init__.py index 7d739457..0c43f46d 100644 --- a/weboob/backends/youtube/pages/__init__.py +++ b/weboob/backends/youtube/pages/__init__.py @@ -19,3 +19,4 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. """ from .video import VideoPage +from .results import ResultsPage diff --git a/weboob/backends/youtube/pages/results.py b/weboob/backends/youtube/pages/results.py new file mode 100644 index 00000000..62abd8f7 --- /dev/null +++ b/weboob/backends/youtube/pages/results.py @@ -0,0 +1,66 @@ +# -*- coding: utf-8 -*- + +""" +Copyright(C) 2010 Romain Bignon + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, version 3 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +""" + +import re + +from weboob.tools.browser import BasePage +from weboob.capabilities.video import Video + +class ResultsPage(BasePage): + WATCH_RE = re.compile('/watch?v=(\w+)') + def iter_videos(self): + for div in self.document.getroot().cssselect("div[class^=video-entry]"): + a = div.find('a') + if a is None: + print 'wtf' + continue + + _id = '' + m = self.WATCH_RE.match(a.attrib['href']) + if m: + _id = m.group(1) + + title = a.find('span').find('img').attrib['alt'] + preview_url = a.find('span').find('img').attrib['src'] + if preview_url.endswith('.gif'): + preview_url = a.find('span').find('img').attrib['thumb'] + + vtime = a.find('span').find('span') + duration = 0 + if not vtime is None: + vtime = vtime.find('span').text.split(':') + if len(vtime) > 0: + duration += int(vtime[-1]) + if len(vtime) > 1: + duration += 60 * int(vtime[-2]) + if len(vtime) > 3: + duration += 3600 * int(vtime[-3]) + if len(vtime) > 4: + print 'WTF' + + author = '' + author_div = div.cssselect('span[class=video-username]') + if author_div: + author = author_div[0].find('a').text.strip() + yield Video(_id, + title, + author=author, + duration=duration, + preview_url=preview_url)