use browser to get file URL, and leave an error when it needs an age verification
This commit is contained in:
parent
ddda7f88e4
commit
d11084f05e
3 changed files with 57 additions and 38 deletions
|
|
@ -18,8 +18,9 @@
|
|||
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
|
||||
from weboob.tools.browser import BasePage
|
||||
import urllib
|
||||
|
||||
from weboob.tools.browser import BasePage
|
||||
|
||||
|
||||
__all__ = ['ForbiddenVideo', 'ForbiddenVideoPage', 'VerifyAgePage', 'VideoPage']
|
||||
|
|
@ -30,15 +31,51 @@ class ForbiddenVideo(Exception):
|
|||
|
||||
|
||||
class ForbiddenVideoPage(BasePage):
|
||||
def get_video(self, video=None):
|
||||
def on_loaded(self):
|
||||
element = self.parser.select(self.document.getroot(), '.yt-alert-content', 1)
|
||||
raise ForbiddenVideo(element.text.strip())
|
||||
|
||||
|
||||
class VerifyAgePage(BasePage):
|
||||
def get_video(self, video=None):
|
||||
raise ForbiddenVideo('verify age not implemented')
|
||||
def on_loaded(self):
|
||||
raise ForbiddenVideo('This video or group may contain content that is inappropriate for some users')
|
||||
|
||||
|
||||
class VideoPage(BasePage):
|
||||
pass
|
||||
AVAILABLE_FORMATS = [38, 37, 22, 45, 35, 34, 43, 18, 6, 5, 17, 13]
|
||||
FORMAT_EXTENSIONS = {
|
||||
13: '3gp',
|
||||
17: 'mp4',
|
||||
18: 'mp4',
|
||||
22: 'mp4',
|
||||
37: 'mp4',
|
||||
38: 'video', # You actually don't know if this will be MOV, AVI or whatever
|
||||
43: 'webm',
|
||||
45: 'webm',
|
||||
}
|
||||
|
||||
def get_video_url(self, format=18):
|
||||
formats = {}
|
||||
for script in self.parser.select(self.document.getroot(), 'script'):
|
||||
text = script.text
|
||||
if not text:
|
||||
continue
|
||||
pos = text.find('"fmt_url_map": "')
|
||||
if pos >= 0:
|
||||
pos2 = text.find('"', pos + 17)
|
||||
fmt_map = urllib.unquote(text[pos + 17:pos2]) + ','
|
||||
parts = fmt_map.split('|')
|
||||
key = parts[0]
|
||||
for p in parts[1:]:
|
||||
idx = p.rfind(',')
|
||||
value = p[:idx].replace('\\/', '/').replace('\u0026', '&').replace(',', '%2C')
|
||||
formats[int(key)] = value
|
||||
key = p[idx + 1:]
|
||||
break
|
||||
for format in self.AVAILABLE_FORMATS[self.AVAILABLE_FORMATS.index(format):]:
|
||||
if format in formats:
|
||||
url = formats.get(format)
|
||||
ext = self.FORMAT_EXTENSIONS.get(format, 'flv')
|
||||
return url, ext
|
||||
|
||||
return None, None
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue