Compare commits

...

2 Commits

Author SHA1 Message Date
dirkf
34c06b16f5 Support Youtube Shorts URL format 2022-02-01 14:40:20 +00:00
dirkf
1e677567cd
[YouTube] Fix n-sig for player e06dea74 (#30582)
From yt-dl commit 48416bc
2022-02-01 14:39:03 +00:00
2 changed files with 45 additions and 10 deletions

View File

@ -82,6 +82,14 @@ _NSIG_TESTS = [
'https://www.youtube.com/s/player/f1ca6900/player_ias.vflset/en_US/base.js', 'https://www.youtube.com/s/player/f1ca6900/player_ias.vflset/en_US/base.js',
'cu3wyu6LQn2hse', 'jvxetvmlI9AN9Q', 'cu3wyu6LQn2hse', 'jvxetvmlI9AN9Q',
), ),
(
'https://www.youtube.com/s/player/8040e515/player_ias.vflset/en_US/base.js',
'wvOFaY-yjgDuIEg5', 'HkfBFDHmgw4rsw',
),
(
'https://www.youtube.com/s/player/e06dea74/player_ias.vflset/en_US/base.js',
'AiuodmaDDYw8d3y4bf', 'ankd8eza2T6Qmw',
),
] ]
@ -110,10 +118,17 @@ class TestPlayerInfo(unittest.TestCase):
class TestSignature(unittest.TestCase): class TestSignature(unittest.TestCase):
def setUp(self): def setUp(self):
TEST_DIR = os.path.dirname(os.path.abspath(__file__)) TEST_DIR = os.path.dirname(os.path.abspath(__file__))
self.TESTDATA_DIR = os.path.join(TEST_DIR, 'testdata') self.TESTDATA_DIR = os.path.join(TEST_DIR, 'testdata/sigs')
if not os.path.exists(self.TESTDATA_DIR): if not os.path.exists(self.TESTDATA_DIR):
os.mkdir(self.TESTDATA_DIR) os.mkdir(self.TESTDATA_DIR)
def tearDown(self):
try:
for f in os.listdir(self.TESTDATA_DIR):
os.remove(f)
except OSError:
pass
def t_factory(name, sig_func, url_pattern): def t_factory(name, sig_func, url_pattern):
def make_tfunc(url, sig_input, expected_sig): def make_tfunc(url, sig_input, expected_sig):
@ -145,12 +160,7 @@ def signature(jscode, sig_input):
def n_sig(jscode, sig_input): def n_sig(jscode, sig_input):
# Pending implementation of _extract_n_function_name() or similar in funcname = YoutubeIE(FakeYDL())._extract_n_function_name(jscode)
# youtube.py, hard-code here
# funcname = YoutubeIE(FakeYDL())._extract_n_function_name(jscode)
import re
funcname = re.search(r'[=(,&|](\w+)\(\w+\),\w+\.set\("n",', jscode)
funcname = funcname and funcname.group(1)
return JSInterpreter(jscode).call_function(funcname, sig_input) return JSInterpreter(jscode).call_function(funcname, sig_input)

View File

@ -28,6 +28,7 @@ from ..utils import (
dict_get, dict_get,
float_or_none, float_or_none,
int_or_none, int_or_none,
js_to_json,
mimetype2ext, mimetype2ext,
parse_codecs, parse_codecs,
parse_duration, parse_duration,
@ -416,6 +417,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
(?:.*?\#/)? # handle anchor (#/) redirect urls (?:.*?\#/)? # handle anchor (#/) redirect urls
(?: # the various things that can precede the ID: (?: # the various things that can precede the ID:
(?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/ (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
|shorts/
|(?: # or the v= param in all its forms |(?: # or the v= param in all its forms
(?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx) (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
(?:\?|\#!?) # the params delimiter ? or # or #! (?:\?|\#!?) # the params delimiter ? or # or #!
@ -1118,6 +1120,22 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'skip_download': True, 'skip_download': True,
}, },
}, },
{
# YT 'Shorts'
'url': 'https://youtube.com/shorts/4L2J27mJ3Dc',
'info_dict': {
'id': '4L2J27mJ3Dc',
'ext': 'mp4',
'upload_date': '20211025',
'uploader': 'Charlie Berens',
'description': 'md5:976512b8a29269b93bbd8a61edc45a6d',
'uploader_id': 'fivedlrmilkshake',
'title': 'Midwest Squid Game #Shorts',
},
'params': {
'skip_download': True,
},
},
] ]
_formats = { _formats = {
'5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'}, '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
@ -1391,9 +1409,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
# 2. https://code.videolan.org/videolan/vlc/-/blob/4fb284e5af69aa9ac2100ccbdd3b88debec9987f/share/lua/playlist/youtube.lua#L116 # 2. https://code.videolan.org/videolan/vlc/-/blob/4fb284e5af69aa9ac2100ccbdd3b88debec9987f/share/lua/playlist/youtube.lua#L116
# 3. https://github.com/ytdl-org/youtube-dl/issues/30097#issuecomment-950157377 # 3. https://github.com/ytdl-org/youtube-dl/issues/30097#issuecomment-950157377
def _extract_n_function_name(self, jscode): def _extract_n_function_name(self, jscode):
return self._search_regex( target = r'(?P<nfunc>[a-zA-Z0-9$]{3})(?:\[(?P<idx>\d+)\])?'
(r'\.get\("n"\)\)&&\(b=(?P<nfunc>[a-zA-Z0-9$]{3})\([a-zA-Z0-9]\)',), nfunc_and_idx = self._search_regex(
jscode, 'Initial JS player n function name', group='nfunc') r'\.get\("n"\)\)&&\(b=(%s)\([a-zA-Z0-9]\)' % (target, ),
jscode, 'Initial JS player n function name')
nfunc, idx = re.match(target, nfunc_and_idx).group('nfunc', 'idx')
if not idx:
return nfunc
return self._parse_json(self._search_regex(
r'var %s\s*=\s*(\[.+?\]);' % (nfunc, ), jscode,
'Initial JS player n function list ({nfunc}[{idx}])'.format(**locals())), nfunc, transform_source=js_to_json)[int(idx)]
def _extract_n_function(self, video_id, player_url): def _extract_n_function(self, video_id, player_url):
player_id = self._extract_player_info(player_url) player_id = self._extract_player_info(player_url)