support repositories to manage backends (closes #747)

This commit is contained in:
Romain Bignon 2012-01-03 12:10:21 +01:00
commit 14a7a1d362
410 changed files with 1079 additions and 297 deletions

View file

@ -0,0 +1,22 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2010-2011 Christophe Benz
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from .backend import YoutubeBackend
__all__ = ['YoutubeBackend']

149
modules/youtube/backend.py Normal file
View file

@ -0,0 +1,149 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2010-2011 Christophe Benz, Romain Bignon
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from __future__ import with_statement
import datetime
import gdata.youtube.service
import re
import urllib
from weboob.capabilities.video import ICapVideo
from weboob.tools.backend import BaseBackend, BackendConfig
from weboob.tools.misc import to_unicode
from weboob.tools.value import ValueBackendPassword, Value
from .browser import YoutubeBrowser
from .video import YoutubeVideo
__all__ = ['YoutubeBackend']
class YoutubeBackend(BaseBackend, ICapVideo):
NAME = 'youtube'
MAINTAINER = 'Christophe Benz'
EMAIL = 'christophe.benz@gmail.com'
VERSION = '0.a'
DESCRIPTION = 'Youtube videos website'
LICENSE = 'AGPLv3+'
BROWSER = YoutubeBrowser
CONFIG = BackendConfig(Value('username', label='Email address', default=''),
ValueBackendPassword('password', label='Password', default=''))
URL_RE = re.compile(r'^https?://(?:\w*\.?youtube\.com/(?:watch\?v=|v/)|youtu\.be\/|\w*\.?youtube\.com\/user\/\w+#p\/u\/\d+\/)([^\?&]+)')
def create_default_browser(self):
password = None
username = self.config['username'].get()
if len(username) > 0:
password = self.config['password'].get()
return self.create_browser(username, password)
def _entry2video(self, entry):
"""
Parse an entry returned by gdata and return a Video object.
"""
video = YoutubeVideo(to_unicode(entry.id.text.split('/')[-1].strip()),
title=to_unicode(entry.media.title.text.strip()),
duration=to_unicode(datetime.timedelta(seconds=int(entry.media.duration.seconds.strip()))),
thumbnail_url=to_unicode(entry.media.thumbnail[0].url.strip()),
)
video.author = entry.author[0].name.text.strip()
if entry.media.name:
video.author = to_unicode(entry.media.name.text.strip())
return video
def _set_video_url(self, video):
"""
In the case of a download, if the user-chosen format is not
available, the next available format will be used.
Much of the code for this method is borrowed from youtubeservice.py of Cutetube
http://maemo.org/packages/view/cutetube/.
"""
if video.url:
return
player_url = YoutubeVideo.id2url(video.id)
with self.browser:
url, ext = self.browser.get_video_url(player_url)
video.url = url
video.ext = ext
def get_video(self, _id):
m = self.URL_RE.match(_id)
if m:
_id = m.group(1)
yt_service = gdata.youtube.service.YouTubeService()
try:
entry = yt_service.GetYouTubeVideoEntry(video_id=_id)
except gdata.service.Error, e:
if e.args[0]['status'] == 400:
return None
raise
video = self._entry2video(entry)
self._set_video_url(video)
return video
def iter_search_results(self, pattern=None, sortby=ICapVideo.SEARCH_RELEVANCE, nsfw=False, max_results=None):
YOUTUBE_MAX_RESULTS = 50
YOUTUBE_MAX_START_INDEX = 1000
yt_service = gdata.youtube.service.YouTubeService()
start_index = 1
nb_yielded = 0
while True:
query = gdata.youtube.service.YouTubeVideoQuery()
if pattern is not None:
if isinstance(pattern, unicode):
pattern = pattern.encode('utf-8')
query.vq = pattern
query.orderby = ('relevance', 'rating', 'viewCount', 'published')[sortby]
query.racy = 'include' if nsfw else 'exclude'
if max_results is None or max_results > YOUTUBE_MAX_RESULTS:
query_max_results = YOUTUBE_MAX_RESULTS
else:
query_max_results = max_results
query.max_results = query_max_results
if start_index > YOUTUBE_MAX_START_INDEX:
return
query.start_index = start_index
start_index += query_max_results
feed = yt_service.YouTubeQuery(query)
for entry in feed.entry:
yield self._entry2video(entry)
nb_yielded += 1
if nb_yielded == max_results:
return
def fill_video(self, video, fields):
if 'thumbnail' in fields:
video.thumbnail.data = urllib.urlopen(video.thumbnail.url).read()
if 'url' in fields:
self._set_video_url(video)
return video
OBJECTS = {YoutubeVideo: fill_video}

View file

@ -0,0 +1,55 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2010-2011 Christophe Benz, Romain Bignon
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.tools.browser import BaseBrowser
from .pages import BaseYoutubePage, VideoPage, ForbiddenVideoPage, \
VerifyAgePage, VerifyControversyPage, \
LoginPage, LoginRedirectPage
__all__ = ['YoutubeBrowser']
class YoutubeBrowser(BaseBrowser):
DOMAIN = u'youtube.com'
ENCODING = None
PAGES = {r'https?://.*youtube\.com/': BaseYoutubePage,
r'https?://.*youtube\.com/watch\?v=(?P<id>.+)': VideoPage,
r'https?://.*youtube\.com/index\?ytsession=.+': ForbiddenVideoPage,
r'https?://.*youtube\.com/verify_age\?next_url=(?P<next_url>.+)': VerifyAgePage,
r'https?://.*youtube\.com/verify_controversy\?next_url(?P<next_url>.+)': VerifyControversyPage,
r'https?://accounts\.youtube\.com/accounts/SetSID.*': LoginRedirectPage,
r'https?://www.google.com/accounts/ServiceLogin.*': LoginPage,
}
def is_logged(self):
logged = not self.is_on_page(BaseYoutubePage) or self.page.is_logged()
return logged
def login(self):
self.location('https://www.google.com/accounts/ServiceLogin?uilel=3&service=youtube&passive=true&continue=http%3A%2F%2Fwww.youtube.com%2Fsignin%3Faction_handle_signin%3Dtrue%26nomobiletemp%3D1%26hl%3Den_US%26next%3D%252F&hl=en_US&ltmpl=sso')
self.page.login(self.username, self.password)
def get_video_url(self, player_url):
self.location(player_url)
assert self.is_on_page(VideoPage)
return self.page.get_video_url()

BIN
modules/youtube/favicon.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.8 KiB

130
modules/youtube/pages.py Normal file
View file

@ -0,0 +1,130 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2010-2011 Christophe Benz, Romain Bignon
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
try:
import json
except ImportError:
import simplejson as json
import urllib
from weboob.tools.browser import BasePage, BrokenPageError, BrowserIncorrectPassword
__all__ = ['LoginPage', 'LoginRedirectPage', 'ForbiddenVideo', 'ForbiddenVideoPage', \
'VerifyAgePage', 'VerifyControversyPage', 'VideoPage']
class LoginPage(BasePage):
def on_loaded(self):
errors = []
for errdiv in self.parser.select(self.document.getroot(), 'div.errormsg'):
errors.append(errdiv.text.encode('utf-8').strip())
if len(errors) > 0:
raise BrowserIncorrectPassword(', '.join(errors))
def login(self, username, password):
self.browser.select_form(predicate=lambda form: form.attrs.get('id', '') == 'gaia_loginform')
self.browser['Email'] = username
self.browser['Passwd'] = password
self.browser.submit()
class LoginRedirectPage(BasePage):
pass
class ForbiddenVideo(Exception):
pass
class BaseYoutubePage(BasePage):
def is_logged(self):
try:
self.parser.select(self.document.getroot(), 'span#masthead-user-expander', 1)
except BrokenPageError:
return False
else:
return True
class ForbiddenVideoPage(BaseYoutubePage):
def on_loaded(self):
element = self.parser.select(self.document.getroot(), '.yt-alert-content', 1)
raise ForbiddenVideo(element.text.strip())
class VerifyAgePage(BaseYoutubePage):
def on_loaded(self):
if not self.is_logged():
raise ForbiddenVideo('This video or group may contain content that is inappropriate for some users')
self.browser.select_form(predicate=lambda form: form.attrs.get('id', '') == 'confirm-age-form')
self.browser.submit()
class VerifyControversyPage(BaseYoutubePage):
def on_loaded(self):
self.browser.select_form(predicate=lambda form: 'verify_controversy' in form.attrs.get('action', ''))
self.browser.submit()
class VideoPage(BaseYoutubePage):
AVAILABLE_FORMATS = [38, 37, 45, 22, 43, 35, 34, 18, 6, 5, 17, 13]
FORMAT_EXTENSIONS = {
13: '3gp',
17: 'mp4',
18: 'mp4',
22: 'mp4',
37: 'mp4',
38: 'video', # You actually don't know if this will be MOV, AVI or whatever
43: 'webm',
45: 'webm',
}
def get_video_url(self, format=38):
formats = {}
for script in self.parser.select(self.document.getroot(), 'script'):
text = script.text
if not text:
continue
pattern = "'PLAYER_CONFIG': "
pos = text.find(pattern)
if pos < 0:
continue
sub = text[pos+len(pattern):pos+text[pos:].find('\n')]
a = json.loads(sub)
for part in a['args']['url_encoded_fmt_stream_map'].split('&'):
key, value = part.split('=', 1)
if key != 'itag' or not 'url' in value:
continue
value = urllib.unquote(value)
fmt, url = value.split(',url=')
formats[int(fmt)] = url
# choose the better format to use.
for format in self.AVAILABLE_FORMATS[self.AVAILABLE_FORMATS.index(format):]:
if format in formats:
url = formats.get(format)
ext = self.FORMAT_EXTENSIONS.get(format, 'flv')
return url, ext
raise BrokenPageError('Unable to find file URL')

33
modules/youtube/test.py Normal file
View file

@ -0,0 +1,33 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2010-2011 Romain Bignon
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.tools.test import BackendTest
class YoutubeTest(BackendTest):
BACKEND = 'youtube'
def test_youtube(self):
l = list(self.backend.iter_search_results('lol'))
self.assertTrue(len(l) > 0)
v = l[0]
self.backend.fillobj(v, ('url',))
self.assertTrue(v.url and v.url.startswith('http://'), 'URL for video "%s" not found: %s' % (v.id, v.url))
assert self.backend.get_video(v.shorturl)
self.backend.browser.openurl(v.url)

35
modules/youtube/video.py Normal file
View file

@ -0,0 +1,35 @@
# -*- coding: utf-8 -*-
# Copyright(C) 2010-2011 Christophe Benz
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.capabilities.video import BaseVideo
__all__ = ['YoutubeVideo']
class YoutubeVideo(BaseVideo):
@classmethod
def id2url(cls, _id):
return 'http://www.youtube.com/watch?v=%s' % _id
def _get_shorturl(self):
return 'http://youtu.be/%s' % self.id
shorturl = property(_get_shorturl)