support repositories to manage backends (closes #747)
This commit is contained in:
parent
ef16a5b726
commit
14a7a1d362
410 changed files with 1079 additions and 297 deletions
22
modules/youtube/__init__.py
Normal file
22
modules/youtube/__init__.py
Normal file
|
|
@ -0,0 +1,22 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright(C) 2010-2011 Christophe Benz
|
||||
#
|
||||
# This file is part of weboob.
|
||||
#
|
||||
# weboob is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# weboob is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Affero General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
from .backend import YoutubeBackend
|
||||
|
||||
__all__ = ['YoutubeBackend']
|
||||
149
modules/youtube/backend.py
Normal file
149
modules/youtube/backend.py
Normal file
|
|
@ -0,0 +1,149 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright(C) 2010-2011 Christophe Benz, Romain Bignon
|
||||
#
|
||||
# This file is part of weboob.
|
||||
#
|
||||
# weboob is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# weboob is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Affero General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
|
||||
from __future__ import with_statement
|
||||
|
||||
import datetime
|
||||
import gdata.youtube.service
|
||||
import re
|
||||
import urllib
|
||||
|
||||
from weboob.capabilities.video import ICapVideo
|
||||
from weboob.tools.backend import BaseBackend, BackendConfig
|
||||
from weboob.tools.misc import to_unicode
|
||||
from weboob.tools.value import ValueBackendPassword, Value
|
||||
|
||||
from .browser import YoutubeBrowser
|
||||
from .video import YoutubeVideo
|
||||
|
||||
|
||||
__all__ = ['YoutubeBackend']
|
||||
|
||||
|
||||
class YoutubeBackend(BaseBackend, ICapVideo):
|
||||
NAME = 'youtube'
|
||||
MAINTAINER = 'Christophe Benz'
|
||||
EMAIL = 'christophe.benz@gmail.com'
|
||||
VERSION = '0.a'
|
||||
DESCRIPTION = 'Youtube videos website'
|
||||
LICENSE = 'AGPLv3+'
|
||||
BROWSER = YoutubeBrowser
|
||||
CONFIG = BackendConfig(Value('username', label='Email address', default=''),
|
||||
ValueBackendPassword('password', label='Password', default=''))
|
||||
|
||||
URL_RE = re.compile(r'^https?://(?:\w*\.?youtube\.com/(?:watch\?v=|v/)|youtu\.be\/|\w*\.?youtube\.com\/user\/\w+#p\/u\/\d+\/)([^\?&]+)')
|
||||
|
||||
def create_default_browser(self):
|
||||
password = None
|
||||
username = self.config['username'].get()
|
||||
if len(username) > 0:
|
||||
password = self.config['password'].get()
|
||||
return self.create_browser(username, password)
|
||||
|
||||
def _entry2video(self, entry):
|
||||
"""
|
||||
Parse an entry returned by gdata and return a Video object.
|
||||
"""
|
||||
video = YoutubeVideo(to_unicode(entry.id.text.split('/')[-1].strip()),
|
||||
title=to_unicode(entry.media.title.text.strip()),
|
||||
duration=to_unicode(datetime.timedelta(seconds=int(entry.media.duration.seconds.strip()))),
|
||||
thumbnail_url=to_unicode(entry.media.thumbnail[0].url.strip()),
|
||||
)
|
||||
video.author = entry.author[0].name.text.strip()
|
||||
if entry.media.name:
|
||||
video.author = to_unicode(entry.media.name.text.strip())
|
||||
return video
|
||||
|
||||
def _set_video_url(self, video):
|
||||
"""
|
||||
In the case of a download, if the user-chosen format is not
|
||||
available, the next available format will be used.
|
||||
Much of the code for this method is borrowed from youtubeservice.py of Cutetube
|
||||
http://maemo.org/packages/view/cutetube/.
|
||||
"""
|
||||
if video.url:
|
||||
return
|
||||
|
||||
player_url = YoutubeVideo.id2url(video.id)
|
||||
with self.browser:
|
||||
url, ext = self.browser.get_video_url(player_url)
|
||||
|
||||
video.url = url
|
||||
video.ext = ext
|
||||
|
||||
def get_video(self, _id):
|
||||
m = self.URL_RE.match(_id)
|
||||
if m:
|
||||
_id = m.group(1)
|
||||
|
||||
yt_service = gdata.youtube.service.YouTubeService()
|
||||
try:
|
||||
entry = yt_service.GetYouTubeVideoEntry(video_id=_id)
|
||||
except gdata.service.Error, e:
|
||||
if e.args[0]['status'] == 400:
|
||||
return None
|
||||
raise
|
||||
|
||||
video = self._entry2video(entry)
|
||||
self._set_video_url(video)
|
||||
return video
|
||||
|
||||
def iter_search_results(self, pattern=None, sortby=ICapVideo.SEARCH_RELEVANCE, nsfw=False, max_results=None):
|
||||
YOUTUBE_MAX_RESULTS = 50
|
||||
YOUTUBE_MAX_START_INDEX = 1000
|
||||
yt_service = gdata.youtube.service.YouTubeService()
|
||||
|
||||
start_index = 1
|
||||
nb_yielded = 0
|
||||
while True:
|
||||
query = gdata.youtube.service.YouTubeVideoQuery()
|
||||
if pattern is not None:
|
||||
if isinstance(pattern, unicode):
|
||||
pattern = pattern.encode('utf-8')
|
||||
query.vq = pattern
|
||||
query.orderby = ('relevance', 'rating', 'viewCount', 'published')[sortby]
|
||||
query.racy = 'include' if nsfw else 'exclude'
|
||||
|
||||
if max_results is None or max_results > YOUTUBE_MAX_RESULTS:
|
||||
query_max_results = YOUTUBE_MAX_RESULTS
|
||||
else:
|
||||
query_max_results = max_results
|
||||
query.max_results = query_max_results
|
||||
|
||||
if start_index > YOUTUBE_MAX_START_INDEX:
|
||||
return
|
||||
query.start_index = start_index
|
||||
start_index += query_max_results
|
||||
|
||||
feed = yt_service.YouTubeQuery(query)
|
||||
for entry in feed.entry:
|
||||
yield self._entry2video(entry)
|
||||
nb_yielded += 1
|
||||
if nb_yielded == max_results:
|
||||
return
|
||||
|
||||
def fill_video(self, video, fields):
|
||||
if 'thumbnail' in fields:
|
||||
video.thumbnail.data = urllib.urlopen(video.thumbnail.url).read()
|
||||
if 'url' in fields:
|
||||
self._set_video_url(video)
|
||||
return video
|
||||
|
||||
OBJECTS = {YoutubeVideo: fill_video}
|
||||
55
modules/youtube/browser.py
Normal file
55
modules/youtube/browser.py
Normal file
|
|
@ -0,0 +1,55 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright(C) 2010-2011 Christophe Benz, Romain Bignon
|
||||
#
|
||||
# This file is part of weboob.
|
||||
#
|
||||
# weboob is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# weboob is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Affero General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
|
||||
from weboob.tools.browser import BaseBrowser
|
||||
|
||||
from .pages import BaseYoutubePage, VideoPage, ForbiddenVideoPage, \
|
||||
VerifyAgePage, VerifyControversyPage, \
|
||||
LoginPage, LoginRedirectPage
|
||||
|
||||
|
||||
__all__ = ['YoutubeBrowser']
|
||||
|
||||
|
||||
class YoutubeBrowser(BaseBrowser):
|
||||
DOMAIN = u'youtube.com'
|
||||
ENCODING = None
|
||||
PAGES = {r'https?://.*youtube\.com/': BaseYoutubePage,
|
||||
r'https?://.*youtube\.com/watch\?v=(?P<id>.+)': VideoPage,
|
||||
r'https?://.*youtube\.com/index\?ytsession=.+': ForbiddenVideoPage,
|
||||
r'https?://.*youtube\.com/verify_age\?next_url=(?P<next_url>.+)': VerifyAgePage,
|
||||
r'https?://.*youtube\.com/verify_controversy\?next_url(?P<next_url>.+)': VerifyControversyPage,
|
||||
r'https?://accounts\.youtube\.com/accounts/SetSID.*': LoginRedirectPage,
|
||||
r'https?://www.google.com/accounts/ServiceLogin.*': LoginPage,
|
||||
}
|
||||
|
||||
def is_logged(self):
|
||||
logged = not self.is_on_page(BaseYoutubePage) or self.page.is_logged()
|
||||
return logged
|
||||
|
||||
def login(self):
|
||||
self.location('https://www.google.com/accounts/ServiceLogin?uilel=3&service=youtube&passive=true&continue=http%3A%2F%2Fwww.youtube.com%2Fsignin%3Faction_handle_signin%3Dtrue%26nomobiletemp%3D1%26hl%3Den_US%26next%3D%252F&hl=en_US<mpl=sso')
|
||||
self.page.login(self.username, self.password)
|
||||
|
||||
def get_video_url(self, player_url):
|
||||
self.location(player_url)
|
||||
|
||||
assert self.is_on_page(VideoPage)
|
||||
return self.page.get_video_url()
|
||||
BIN
modules/youtube/favicon.png
Normal file
BIN
modules/youtube/favicon.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 2.8 KiB |
130
modules/youtube/pages.py
Normal file
130
modules/youtube/pages.py
Normal file
|
|
@ -0,0 +1,130 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright(C) 2010-2011 Christophe Benz, Romain Bignon
|
||||
#
|
||||
# This file is part of weboob.
|
||||
#
|
||||
# weboob is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# weboob is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Affero General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
|
||||
try:
|
||||
import json
|
||||
except ImportError:
|
||||
import simplejson as json
|
||||
|
||||
import urllib
|
||||
|
||||
from weboob.tools.browser import BasePage, BrokenPageError, BrowserIncorrectPassword
|
||||
|
||||
|
||||
__all__ = ['LoginPage', 'LoginRedirectPage', 'ForbiddenVideo', 'ForbiddenVideoPage', \
|
||||
'VerifyAgePage', 'VerifyControversyPage', 'VideoPage']
|
||||
|
||||
|
||||
class LoginPage(BasePage):
|
||||
def on_loaded(self):
|
||||
errors = []
|
||||
for errdiv in self.parser.select(self.document.getroot(), 'div.errormsg'):
|
||||
errors.append(errdiv.text.encode('utf-8').strip())
|
||||
|
||||
if len(errors) > 0:
|
||||
raise BrowserIncorrectPassword(', '.join(errors))
|
||||
|
||||
def login(self, username, password):
|
||||
self.browser.select_form(predicate=lambda form: form.attrs.get('id', '') == 'gaia_loginform')
|
||||
self.browser['Email'] = username
|
||||
self.browser['Passwd'] = password
|
||||
self.browser.submit()
|
||||
|
||||
class LoginRedirectPage(BasePage):
|
||||
pass
|
||||
|
||||
|
||||
class ForbiddenVideo(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class BaseYoutubePage(BasePage):
|
||||
def is_logged(self):
|
||||
try:
|
||||
self.parser.select(self.document.getroot(), 'span#masthead-user-expander', 1)
|
||||
except BrokenPageError:
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
|
||||
class ForbiddenVideoPage(BaseYoutubePage):
|
||||
def on_loaded(self):
|
||||
element = self.parser.select(self.document.getroot(), '.yt-alert-content', 1)
|
||||
raise ForbiddenVideo(element.text.strip())
|
||||
|
||||
|
||||
class VerifyAgePage(BaseYoutubePage):
|
||||
def on_loaded(self):
|
||||
if not self.is_logged():
|
||||
raise ForbiddenVideo('This video or group may contain content that is inappropriate for some users')
|
||||
|
||||
self.browser.select_form(predicate=lambda form: form.attrs.get('id', '') == 'confirm-age-form')
|
||||
self.browser.submit()
|
||||
|
||||
class VerifyControversyPage(BaseYoutubePage):
|
||||
def on_loaded(self):
|
||||
self.browser.select_form(predicate=lambda form: 'verify_controversy' in form.attrs.get('action', ''))
|
||||
self.browser.submit()
|
||||
|
||||
class VideoPage(BaseYoutubePage):
|
||||
AVAILABLE_FORMATS = [38, 37, 45, 22, 43, 35, 34, 18, 6, 5, 17, 13]
|
||||
FORMAT_EXTENSIONS = {
|
||||
13: '3gp',
|
||||
17: 'mp4',
|
||||
18: 'mp4',
|
||||
22: 'mp4',
|
||||
37: 'mp4',
|
||||
38: 'video', # You actually don't know if this will be MOV, AVI or whatever
|
||||
43: 'webm',
|
||||
45: 'webm',
|
||||
}
|
||||
|
||||
def get_video_url(self, format=38):
|
||||
formats = {}
|
||||
for script in self.parser.select(self.document.getroot(), 'script'):
|
||||
text = script.text
|
||||
if not text:
|
||||
continue
|
||||
|
||||
pattern = "'PLAYER_CONFIG': "
|
||||
pos = text.find(pattern)
|
||||
if pos < 0:
|
||||
continue
|
||||
|
||||
sub = text[pos+len(pattern):pos+text[pos:].find('\n')]
|
||||
a = json.loads(sub)
|
||||
|
||||
for part in a['args']['url_encoded_fmt_stream_map'].split('&'):
|
||||
key, value = part.split('=', 1)
|
||||
if key != 'itag' or not 'url' in value:
|
||||
continue
|
||||
|
||||
value = urllib.unquote(value)
|
||||
fmt, url = value.split(',url=')
|
||||
formats[int(fmt)] = url
|
||||
|
||||
# choose the better format to use.
|
||||
for format in self.AVAILABLE_FORMATS[self.AVAILABLE_FORMATS.index(format):]:
|
||||
if format in formats:
|
||||
url = formats.get(format)
|
||||
ext = self.FORMAT_EXTENSIONS.get(format, 'flv')
|
||||
return url, ext
|
||||
|
||||
raise BrokenPageError('Unable to find file URL')
|
||||
33
modules/youtube/test.py
Normal file
33
modules/youtube/test.py
Normal file
|
|
@ -0,0 +1,33 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright(C) 2010-2011 Romain Bignon
|
||||
#
|
||||
# This file is part of weboob.
|
||||
#
|
||||
# weboob is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# weboob is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Affero General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
|
||||
from weboob.tools.test import BackendTest
|
||||
|
||||
class YoutubeTest(BackendTest):
|
||||
BACKEND = 'youtube'
|
||||
|
||||
def test_youtube(self):
|
||||
l = list(self.backend.iter_search_results('lol'))
|
||||
self.assertTrue(len(l) > 0)
|
||||
v = l[0]
|
||||
self.backend.fillobj(v, ('url',))
|
||||
self.assertTrue(v.url and v.url.startswith('http://'), 'URL for video "%s" not found: %s' % (v.id, v.url))
|
||||
assert self.backend.get_video(v.shorturl)
|
||||
self.backend.browser.openurl(v.url)
|
||||
35
modules/youtube/video.py
Normal file
35
modules/youtube/video.py
Normal file
|
|
@ -0,0 +1,35 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright(C) 2010-2011 Christophe Benz
|
||||
#
|
||||
# This file is part of weboob.
|
||||
#
|
||||
# weboob is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# weboob is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Affero General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
|
||||
from weboob.capabilities.video import BaseVideo
|
||||
|
||||
|
||||
__all__ = ['YoutubeVideo']
|
||||
|
||||
|
||||
class YoutubeVideo(BaseVideo):
|
||||
@classmethod
|
||||
def id2url(cls, _id):
|
||||
return 'http://www.youtube.com/watch?v=%s' % _id
|
||||
|
||||
def _get_shorturl(self):
|
||||
return 'http://youtu.be/%s' % self.id
|
||||
|
||||
shorturl = property(_get_shorturl)
|
||||
Loading…
Add table
Add a link
Reference in a new issue