use browser to get file URL, and leave an error when it needs an age verification
This commit is contained in:
parent
ddda7f88e4
commit
d11084f05e
3 changed files with 57 additions and 38 deletions
|
|
@ -46,17 +46,6 @@ class YoutubeBackend(BaseBackend, ICapVideo):
|
||||||
BROWSER = YoutubeBrowser
|
BROWSER = YoutubeBrowser
|
||||||
|
|
||||||
URL_RE = re.compile(r'^https?://(?:\w*\.?youtube\.com/(?:watch\?v=|v/)|youtu\.be\/|\w*\.?youtube\.com\/user\/\w+#p\/u\/\d+\/)([^\?&]+)')
|
URL_RE = re.compile(r'^https?://(?:\w*\.?youtube\.com/(?:watch\?v=|v/)|youtu\.be\/|\w*\.?youtube\.com\/user\/\w+#p\/u\/\d+\/)([^\?&]+)')
|
||||||
AVAILABLE_FORMATS = [38, 37, 22, 45, 35, 34, 43, 18, 6, 5, 17, 13]
|
|
||||||
FORMAT_EXTENSIONS = {
|
|
||||||
13: '3gp',
|
|
||||||
17: 'mp4',
|
|
||||||
18: 'mp4',
|
|
||||||
22: 'mp4',
|
|
||||||
37: 'mp4',
|
|
||||||
38: 'video', # You actually don't know if this will be MOV, AVI or whatever
|
|
||||||
43: 'webm',
|
|
||||||
45: 'webm',
|
|
||||||
}
|
|
||||||
|
|
||||||
def _entry2video(self, entry):
|
def _entry2video(self, entry):
|
||||||
"""
|
"""
|
||||||
|
|
@ -72,35 +61,22 @@ class YoutubeBackend(BaseBackend, ICapVideo):
|
||||||
video.author = to_unicode(entry.media.name.text.strip())
|
video.author = to_unicode(entry.media.name.text.strip())
|
||||||
return video
|
return video
|
||||||
|
|
||||||
def _set_video_url(self, video, format=18):
|
def _set_video_url(self, video):
|
||||||
"""
|
"""
|
||||||
In the case of a download, if the user-chosen format is not
|
In the case of a download, if the user-chosen format is not
|
||||||
available, the next available format will be used.
|
available, the next available format will be used.
|
||||||
Much of the code for this method is borrowed from youtubeservice.py of Cutetube
|
Much of the code for this method is borrowed from youtubeservice.py of Cutetube
|
||||||
http://maemo.org/packages/view/cutetube/.
|
http://maemo.org/packages/view/cutetube/.
|
||||||
"""
|
"""
|
||||||
player_url = YoutubeVideo.id2url(video.id)
|
if video.url:
|
||||||
html = urllib.urlopen(player_url).read()
|
return
|
||||||
html = ''.join(html.split())
|
|
||||||
formats = {}
|
|
||||||
pos = html.find('","fmt_url_map":"')
|
|
||||||
if (pos != -1):
|
|
||||||
pos2 = html.find('"', pos + 17)
|
|
||||||
fmt_map = urllib.unquote(html[pos + 17:pos2]) + ','
|
|
||||||
parts = fmt_map.split('|')
|
|
||||||
key = parts[0]
|
|
||||||
for p in parts[1:]:
|
|
||||||
idx = p.rfind(',')
|
|
||||||
value = p[:idx].replace('\\/', '/').replace('\u0026', '&').replace(',', '%2C')
|
|
||||||
formats[int(key)] = value
|
|
||||||
key = p[idx + 1:]
|
|
||||||
for format in self.AVAILABLE_FORMATS[self.AVAILABLE_FORMATS.index(format):]:
|
|
||||||
if format in formats:
|
|
||||||
video.url = formats.get(format)
|
|
||||||
video.ext = self.FORMAT_EXTENSIONS.get(format, 'flv')
|
|
||||||
return True
|
|
||||||
|
|
||||||
return False
|
player_url = YoutubeVideo.id2url(video.id)
|
||||||
|
with self.browser:
|
||||||
|
url, ext = self.browser.get_video_url(player_url)
|
||||||
|
|
||||||
|
video.url = url
|
||||||
|
video.ext = ext
|
||||||
|
|
||||||
def get_video(self, _id):
|
def get_video(self, _id):
|
||||||
m = self.URL_RE.match(_id)
|
m = self.URL_RE.match(_id)
|
||||||
|
|
|
||||||
|
|
@ -33,3 +33,9 @@ class YoutubeBrowser(BaseBrowser):
|
||||||
r'.*youtube\.com/index\?ytsession=.+': ForbiddenVideoPage,
|
r'.*youtube\.com/index\?ytsession=.+': ForbiddenVideoPage,
|
||||||
r'.*youtube\.com/verify_age\?next_url=(?P<next_url>.+)': VerifyAgePage,
|
r'.*youtube\.com/verify_age\?next_url=(?P<next_url>.+)': VerifyAgePage,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def get_video_url(self, player_url):
|
||||||
|
self.location(player_url)
|
||||||
|
|
||||||
|
assert self.is_on_page(VideoPage)
|
||||||
|
return self.page.get_video_url()
|
||||||
|
|
|
||||||
|
|
@ -18,8 +18,9 @@
|
||||||
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
|
||||||
from weboob.tools.browser import BasePage
|
import urllib
|
||||||
|
|
||||||
|
from weboob.tools.browser import BasePage
|
||||||
|
|
||||||
|
|
||||||
__all__ = ['ForbiddenVideo', 'ForbiddenVideoPage', 'VerifyAgePage', 'VideoPage']
|
__all__ = ['ForbiddenVideo', 'ForbiddenVideoPage', 'VerifyAgePage', 'VideoPage']
|
||||||
|
|
@ -30,15 +31,51 @@ class ForbiddenVideo(Exception):
|
||||||
|
|
||||||
|
|
||||||
class ForbiddenVideoPage(BasePage):
|
class ForbiddenVideoPage(BasePage):
|
||||||
def get_video(self, video=None):
|
def on_loaded(self):
|
||||||
element = self.parser.select(self.document.getroot(), '.yt-alert-content', 1)
|
element = self.parser.select(self.document.getroot(), '.yt-alert-content', 1)
|
||||||
raise ForbiddenVideo(element.text.strip())
|
raise ForbiddenVideo(element.text.strip())
|
||||||
|
|
||||||
|
|
||||||
class VerifyAgePage(BasePage):
|
class VerifyAgePage(BasePage):
|
||||||
def get_video(self, video=None):
|
def on_loaded(self):
|
||||||
raise ForbiddenVideo('verify age not implemented')
|
raise ForbiddenVideo('This video or group may contain content that is inappropriate for some users')
|
||||||
|
|
||||||
|
|
||||||
class VideoPage(BasePage):
|
class VideoPage(BasePage):
|
||||||
pass
|
AVAILABLE_FORMATS = [38, 37, 22, 45, 35, 34, 43, 18, 6, 5, 17, 13]
|
||||||
|
FORMAT_EXTENSIONS = {
|
||||||
|
13: '3gp',
|
||||||
|
17: 'mp4',
|
||||||
|
18: 'mp4',
|
||||||
|
22: 'mp4',
|
||||||
|
37: 'mp4',
|
||||||
|
38: 'video', # You actually don't know if this will be MOV, AVI or whatever
|
||||||
|
43: 'webm',
|
||||||
|
45: 'webm',
|
||||||
|
}
|
||||||
|
|
||||||
|
def get_video_url(self, format=18):
|
||||||
|
formats = {}
|
||||||
|
for script in self.parser.select(self.document.getroot(), 'script'):
|
||||||
|
text = script.text
|
||||||
|
if not text:
|
||||||
|
continue
|
||||||
|
pos = text.find('"fmt_url_map": "')
|
||||||
|
if pos >= 0:
|
||||||
|
pos2 = text.find('"', pos + 17)
|
||||||
|
fmt_map = urllib.unquote(text[pos + 17:pos2]) + ','
|
||||||
|
parts = fmt_map.split('|')
|
||||||
|
key = parts[0]
|
||||||
|
for p in parts[1:]:
|
||||||
|
idx = p.rfind(',')
|
||||||
|
value = p[:idx].replace('\\/', '/').replace('\u0026', '&').replace(',', '%2C')
|
||||||
|
formats[int(key)] = value
|
||||||
|
key = p[idx + 1:]
|
||||||
|
break
|
||||||
|
for format in self.AVAILABLE_FORMATS[self.AVAILABLE_FORMATS.index(format):]:
|
||||||
|
if format in formats:
|
||||||
|
url = formats.get(format)
|
||||||
|
ext = self.FORMAT_EXTENSIONS.get(format, 'flv')
|
||||||
|
return url, ext
|
||||||
|
|
||||||
|
return None, None
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue