fix parsing URL because of upstream website changes
This commit is contained in:
parent
dff165593f
commit
59f8fc9622
1 changed files with 25 additions and 13 deletions
|
|
@ -18,6 +18,11 @@
|
||||||
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
|
||||||
|
try:
|
||||||
|
import json
|
||||||
|
except ImportError:
|
||||||
|
import simplejson as json
|
||||||
|
|
||||||
import urllib
|
import urllib
|
||||||
|
|
||||||
from weboob.tools.browser import BasePage, BrokenPageError, BrowserIncorrectPassword
|
from weboob.tools.browser import BasePage, BrokenPageError, BrowserIncorrectPassword
|
||||||
|
|
@ -97,22 +102,29 @@ class VideoPage(BaseYoutubePage):
|
||||||
text = script.text
|
text = script.text
|
||||||
if not text:
|
if not text:
|
||||||
continue
|
continue
|
||||||
pos = text.find('"fmt_url_map": "')
|
|
||||||
if pos >= 0:
|
pattern = "'PLAYER_CONFIG': "
|
||||||
pos2 = text.find('"', pos + 17)
|
pos = text.find(pattern)
|
||||||
fmt_map = urllib.unquote(text[pos + 17:pos2]) + ','
|
if pos < 0:
|
||||||
parts = fmt_map.split('|')
|
continue
|
||||||
key = parts[0]
|
|
||||||
for p in parts[1:]:
|
sub = text[pos+len(pattern):pos+text[pos:].find('\n')]
|
||||||
idx = p.rfind(',')
|
a = json.loads(sub)
|
||||||
value = p[:idx].replace('\\/', '/').replace('\u0026', '&').replace(',', '%2C')
|
|
||||||
formats[int(key)] = value
|
for part in a['args']['url_encoded_fmt_stream_map'].split('&'):
|
||||||
key = p[idx + 1:]
|
key, value = part.split('=', 1)
|
||||||
break
|
if key != 'itag' or not 'url' in value:
|
||||||
|
continue
|
||||||
|
|
||||||
|
value = urllib.unquote(value)
|
||||||
|
fmt, url = value.split(',url=')
|
||||||
|
formats[int(fmt)] = url
|
||||||
|
|
||||||
|
# choose the better format to use.
|
||||||
for format in self.AVAILABLE_FORMATS[self.AVAILABLE_FORMATS.index(format):]:
|
for format in self.AVAILABLE_FORMATS[self.AVAILABLE_FORMATS.index(format):]:
|
||||||
if format in formats:
|
if format in formats:
|
||||||
url = formats.get(format)
|
url = formats.get(format)
|
||||||
ext = self.FORMAT_EXTENSIONS.get(format, 'flv')
|
ext = self.FORMAT_EXTENSIONS.get(format, 'flv')
|
||||||
return url, ext
|
return url, ext
|
||||||
|
|
||||||
return None, None
|
raise BrokenPageError('Unable to find file URL')
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue