use browser to get file URL, and leave an error when it needs an age verification

This commit is contained in:
Romain Bignon 2011-07-24 16:57:01 +02:00
commit d11084f05e
3 changed files with 57 additions and 38 deletions

View file

@ -46,17 +46,6 @@ class YoutubeBackend(BaseBackend, ICapVideo):
BROWSER = YoutubeBrowser BROWSER = YoutubeBrowser
URL_RE = re.compile(r'^https?://(?:\w*\.?youtube\.com/(?:watch\?v=|v/)|youtu\.be\/|\w*\.?youtube\.com\/user\/\w+#p\/u\/\d+\/)([^\?&]+)') URL_RE = re.compile(r'^https?://(?:\w*\.?youtube\.com/(?:watch\?v=|v/)|youtu\.be\/|\w*\.?youtube\.com\/user\/\w+#p\/u\/\d+\/)([^\?&]+)')
AVAILABLE_FORMATS = [38, 37, 22, 45, 35, 34, 43, 18, 6, 5, 17, 13]
FORMAT_EXTENSIONS = {
13: '3gp',
17: 'mp4',
18: 'mp4',
22: 'mp4',
37: 'mp4',
38: 'video', # You actually don't know if this will be MOV, AVI or whatever
43: 'webm',
45: 'webm',
}
def _entry2video(self, entry): def _entry2video(self, entry):
""" """
@ -72,35 +61,22 @@ class YoutubeBackend(BaseBackend, ICapVideo):
video.author = to_unicode(entry.media.name.text.strip()) video.author = to_unicode(entry.media.name.text.strip())
return video return video
def _set_video_url(self, video, format=18): def _set_video_url(self, video):
""" """
In the case of a download, if the user-chosen format is not In the case of a download, if the user-chosen format is not
available, the next available format will be used. available, the next available format will be used.
Much of the code for this method is borrowed from youtubeservice.py of Cutetube Much of the code for this method is borrowed from youtubeservice.py of Cutetube
http://maemo.org/packages/view/cutetube/. http://maemo.org/packages/view/cutetube/.
""" """
player_url = YoutubeVideo.id2url(video.id) if video.url:
html = urllib.urlopen(player_url).read() return
html = ''.join(html.split())
formats = {}
pos = html.find('","fmt_url_map":"')
if (pos != -1):
pos2 = html.find('"', pos + 17)
fmt_map = urllib.unquote(html[pos + 17:pos2]) + ','
parts = fmt_map.split('|')
key = parts[0]
for p in parts[1:]:
idx = p.rfind(',')
value = p[:idx].replace('\\/', '/').replace('\u0026', '&').replace(',', '%2C')
formats[int(key)] = value
key = p[idx + 1:]
for format in self.AVAILABLE_FORMATS[self.AVAILABLE_FORMATS.index(format):]:
if format in formats:
video.url = formats.get(format)
video.ext = self.FORMAT_EXTENSIONS.get(format, 'flv')
return True
return False player_url = YoutubeVideo.id2url(video.id)
with self.browser:
url, ext = self.browser.get_video_url(player_url)
video.url = url
video.ext = ext
def get_video(self, _id): def get_video(self, _id):
m = self.URL_RE.match(_id) m = self.URL_RE.match(_id)

View file

@ -33,3 +33,9 @@ class YoutubeBrowser(BaseBrowser):
r'.*youtube\.com/index\?ytsession=.+': ForbiddenVideoPage, r'.*youtube\.com/index\?ytsession=.+': ForbiddenVideoPage,
r'.*youtube\.com/verify_age\?next_url=(?P<next_url>.+)': VerifyAgePage, r'.*youtube\.com/verify_age\?next_url=(?P<next_url>.+)': VerifyAgePage,
} }
def get_video_url(self, player_url):
self.location(player_url)
assert self.is_on_page(VideoPage)
return self.page.get_video_url()

View file

@ -18,8 +18,9 @@
# along with weboob. If not, see <http://www.gnu.org/licenses/>. # along with weboob. If not, see <http://www.gnu.org/licenses/>.
from weboob.tools.browser import BasePage import urllib
from weboob.tools.browser import BasePage
__all__ = ['ForbiddenVideo', 'ForbiddenVideoPage', 'VerifyAgePage', 'VideoPage'] __all__ = ['ForbiddenVideo', 'ForbiddenVideoPage', 'VerifyAgePage', 'VideoPage']
@ -30,15 +31,51 @@ class ForbiddenVideo(Exception):
class ForbiddenVideoPage(BasePage): class ForbiddenVideoPage(BasePage):
def get_video(self, video=None): def on_loaded(self):
element = self.parser.select(self.document.getroot(), '.yt-alert-content', 1) element = self.parser.select(self.document.getroot(), '.yt-alert-content', 1)
raise ForbiddenVideo(element.text.strip()) raise ForbiddenVideo(element.text.strip())
class VerifyAgePage(BasePage): class VerifyAgePage(BasePage):
def get_video(self, video=None): def on_loaded(self):
raise ForbiddenVideo('verify age not implemented') raise ForbiddenVideo('This video or group may contain content that is inappropriate for some users')
class VideoPage(BasePage): class VideoPage(BasePage):
pass AVAILABLE_FORMATS = [38, 37, 22, 45, 35, 34, 43, 18, 6, 5, 17, 13]
FORMAT_EXTENSIONS = {
13: '3gp',
17: 'mp4',
18: 'mp4',
22: 'mp4',
37: 'mp4',
38: 'video', # You actually don't know if this will be MOV, AVI or whatever
43: 'webm',
45: 'webm',
}
def get_video_url(self, format=18):
formats = {}
for script in self.parser.select(self.document.getroot(), 'script'):
text = script.text
if not text:
continue
pos = text.find('"fmt_url_map": "')
if pos >= 0:
pos2 = text.find('"', pos + 17)
fmt_map = urllib.unquote(text[pos + 17:pos2]) + ','
parts = fmt_map.split('|')
key = parts[0]
for p in parts[1:]:
idx = p.rfind(',')
value = p[:idx].replace('\\/', '/').replace('\u0026', '&').replace(',', '%2C')
formats[int(key)] = value
key = p[idx + 1:]
break
for format in self.AVAILABLE_FORMATS[self.AVAILABLE_FORMATS.index(format):]:
if format in formats:
url = formats.get(format)
ext = self.FORMAT_EXTENSIONS.get(format, 'flv')
return url, ext
return None, None