Compare commits

...

3 Commits

Author SHA1 Message Date
dirkf
fa7f0effbe [YouTube] Avoid crash in author extraction 2023-06-22 23:14:21 +01:00
dirkf
ebdc82c586 [workflows/ci.yml] Replace actions/setup-python for legacy Pythons
Thanks MatteoH2O1999: https://github.com/MatteoH2O1999/setup-python
2023-06-22 23:12:22 +01:00
pukkandan
9112e668a5 [YouTube] Improve nsig function name extraction
Fixes player b7910ca8, using `,` vs `;`
See https://github.com/ytdl-org/youtube-dl/issues/32292#issuecomment-1602231170

Co-authored-by: dirkf
2023-06-22 16:46:53 +01:00
3 changed files with 21 additions and 17 deletions

View File

@ -38,10 +38,12 @@ jobs:
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v3
- name: Set up supported Python ${{ matrix.python-version }} - name: Set up supported Python ${{ matrix.python-version }}
uses: actions/setup-python@v4 # wrap broken actions/setup-python@v4
if: ${{ matrix.python-impl == 'cpython' && ! contains(fromJSON('["3.3", "3.4"]'), matrix.python-version) }} uses: ytdl-org/setup-python@v1
with: with:
python-version: ${{ matrix.python-version }} python-version: ${{ matrix.python-version }}
cache-build: true
allow-build: info
- name: Set up Java 8 - name: Set up Java 8
if: ${{ matrix.python-impl == 'jython' }} if: ${{ matrix.python-impl == 'jython' }}
uses: actions/setup-java@v2 uses: actions/setup-java@v2

View File

@ -63,11 +63,6 @@ _SIG_TESTS = [
'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflKjOTVq/html5player.js', 'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflKjOTVq/html5player.js',
'312AA52209E3623129A412D56A40F11CB0AF14AE.3EE09501CB14E3BCDC3B2AE808BF3F1D14E7FBF12', '312AA52209E3623129A412D56A40F11CB0AF14AE.3EE09501CB14E3BCDC3B2AE808BF3F1D14E7FBF12',
'112AA5220913623229A412D56A40F11CB0AF14AE.3EE0950FCB14EEBCDC3B2AE808BF331D14E7FBF3', '112AA5220913623229A412D56A40F11CB0AF14AE.3EE0950FCB14EEBCDC3B2AE808BF331D14E7FBF3',
),
(
'https://www.youtube.com/s/player/6ed0d907/player_ias.vflset/en_US/base.js',
'2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA',
'AOq0QJ8wRAIgXmPlOPSBkkUs1bYFYlJCfe29xx8j7v1pDL2QwbdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJoOySqa0',
) )
] ]
@ -157,8 +152,8 @@ _NSIG_TESTS = [
'qO0NiMtYQ7TeJnfFG2', 'k9cuJDHNS5O7kQ', 'qO0NiMtYQ7TeJnfFG2', 'k9cuJDHNS5O7kQ',
), ),
( (
'https://www.youtube.com/s/player/8c7583ff/player_ias.vflset/en_US/base.js', 'https://www.youtube.com/s/player/b7910ca8/player_ias.vflset/en_US/base.js',
'E2AQVN6y_zM7uN9w8z', '9A2dbY5GDZrt9A', '_hXMCwMt9qE310D', 'LoZMgkkofRMCZQ',
), ),
] ]
@ -236,7 +231,7 @@ def n_sig(jscode, sig_input):
make_sig_test = t_factory( make_sig_test = t_factory(
'signature', signature, re.compile(r'(?s).*(?:-|/player/)(?P<id>[a-zA-Z0-9_-]+)(?:/.+\.js|(?:/watch_as3|/html5player)?\.[a-z]+)$')) 'signature', signature, re.compile(r'.*-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player)?\.[a-z]+$'))
for test_spec in _SIG_TESTS: for test_spec in _SIG_TESTS:
make_sig_test(*test_spec) make_sig_test(*test_spec)

View File

@ -448,7 +448,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
extract_attributes(self._search_regex( extract_attributes(self._search_regex(
r'''(?s)(<link\b[^>]+\bitemprop\s*=\s*("|')%s\2[^>]*>)''' r'''(?s)(<link\b[^>]+\bitemprop\s*=\s*("|')%s\2[^>]*>)'''
% re.escape(var_name), % re.escape(var_name),
get_element_by_attribute('itemprop', 'author', webpage) or '', get_element_by_attribute('itemprop', 'author', webpage or '') or '',
'author link', default='')), 'author link', default='')),
paths[var_name][0]) paths[var_name][0])
@ -1623,15 +1623,22 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
nfunc, idx = re.match(target, nfunc_and_idx).group('nfunc', 'idx') nfunc, idx = re.match(target, nfunc_and_idx).group('nfunc', 'idx')
if not idx: if not idx:
return nfunc return nfunc
VAR_RE_TMPL = r'var\s+%s\s*=\s*(?P<name>\[(?P<alias>%s)\])[;,]'
note = 'Initial JS player n function {0} (%s[%s])' % (nfunc, idx)
def search_function_code(needle, group):
return self._search_regex(
VAR_RE_TMPL % (re.escape(nfunc), needle), jscode,
note.format(group), group=group)
if int_or_none(idx) == 0: if int_or_none(idx) == 0:
real_nfunc = self._search_regex( real_nfunc = search_function_code(r'[a-zA-Z_$][\w$]*', group='alias')
r'var %s\s*=\s*\[([a-zA-Z_$][\w$]*)\];' % (re.escape(nfunc), ), jscode,
'Initial JS player n function alias ({nfunc}[{idx}])'.format(**locals()))
if real_nfunc: if real_nfunc:
return real_nfunc return real_nfunc
return self._parse_json(self._search_regex( return self._parse_json(
r'var %s\s*=\s*(\[.+?\]);' % (re.escape(nfunc), ), jscode, search_function_code('.+?', group='name'),
'Initial JS player n function name ({nfunc}[{idx}])'.format(**locals())), nfunc, transform_source=js_to_json)[int(idx)] nfunc, transform_source=js_to_json)[int(idx)]
def _extract_n_function(self, video_id, player_url): def _extract_n_function(self, video_id, player_url):
player_id = self._extract_player_info(player_url) player_id = self._extract_player_info(player_url)