Compare commits

...

27 Commits

Author SHA1 Message Date
Ben Welsh
1dc9d18084
Merge 481ad85994 into c5098961b0 2024-08-21 22:32:46 -04:00
dirkf
c5098961b0 [Youtube] Rework n function extraction pattern
Now also succeeds with player b12cc44b
2024-08-06 20:59:09 +01:00
dirkf
dbc08fba83 [jsinterp] Improve slice implementation for player b12cc44b
Partly taken from yt-dlp/yt-dlp#10664, thx seproDev
        Fixes #32896
2024-08-06 20:51:38 +01:00
Aiur Adept
71223bff39
[Youtube] Fix nsig extraction for player 20dfca59 (#32891)
* dirkf's patch for nsig extraction
* add generic search per  yt-dlp/yt-dlp/pull/10611 - thx bashonly

---------

Co-authored-by: dirkf <fieldhouse@gmx.net>
2024-08-01 19:18:34 +01:00
dirkf
481ad85994
Newline at end 2022-10-30 21:10:32 +00:00
dirkf
54a1f498f1
Ensure primary is a dict
Test commit to provoke CI tests
2022-10-30 21:07:50 +00:00
palewire
d75cf3ade0
Worked youtube downloader back in 2022-08-23 10:45:31 -07:00
palewire
e6eef9e0ef
Merge branch 'master' of https://github.com/palewire/youtube-dl into parler 2022-08-23 10:44:38 -07:00
palewire
18b7043b0c
Backport from yt-dlp 2022-08-15 11:58:04 -07:00
palewire
c8686e7ccb
No @ 2022-08-09 03:44:59 -07:00
palewire
0ef718103f
Shorter title 2022-08-09 03:42:26 -07:00
palewire
871b60b117
_generic_title 2022-08-08 11:40:31 -07:00
Ben Welsh
2e1c744be8
Update youtube_dl/extractor/parler.py
Co-authored-by: dirkf <fieldhouse@gmx.net>
2022-08-08 11:05:07 -07:00
Ben Welsh
fec554563a
Update youtube_dl/extractor/parler.py
Co-authored-by: dirkf <fieldhouse@gmx.net>
2022-08-08 11:04:54 -07:00
palewire
99d43f800f
Merge branch 'parler' of github.com:palewire/youtube-dl into parler 2022-08-08 11:04:16 -07:00
palewire
92e053f4d2
Single quotes 2022-08-08 11:04:11 -07:00
Ben Welsh
e46a2a58e2
Update youtube_dl/extractor/parler.py
Co-authored-by: dirkf <fieldhouse@gmx.net>
2022-08-08 11:03:18 -07:00
Ben Welsh
40f370c358
Update youtube_dl/extractor/parler.py
Co-authored-by: dirkf <fieldhouse@gmx.net>
2022-08-08 11:03:09 -07:00
Ben Welsh
4b3bc81777
Update youtube_dl/extractor/parler.py
Co-authored-by: dirkf <fieldhouse@gmx.net>
2022-08-08 11:03:00 -07:00
palewire
027ba18bf4
Merge branch 'parler' of github.com:palewire/youtube-dl into parler 2022-08-08 11:02:09 -07:00
palewire
b14b4b8af5
Single quotes 2022-08-08 11:01:58 -07:00
Ben Welsh
ff952d586b
Update youtube_dl/extractor/parler.py
Co-authored-by: dirkf <fieldhouse@gmx.net>
2022-08-08 09:47:08 -07:00
Ben Welsh
1a82825b9c
Update youtube_dl/extractor/parler.py
Co-authored-by: dirkf <fieldhouse@gmx.net>
2022-08-08 09:46:46 -07:00
Ben Welsh
b1bbb03e0b
Update youtube_dl/extractor/parler.py
Co-authored-by: dirkf <fieldhouse@gmx.net>
2022-08-08 09:46:26 -07:00
Ben Welsh
a6f8feef49
Update youtube_dl/extractor/parler.py
Co-authored-by: dirkf <fieldhouse@gmx.net>
2022-08-08 09:45:59 -07:00
palewire
6d6f38bed8
Cut geography 2022-08-07 11:14:38 -07:00
palewire
e9839174a1
[parler] Add new extractor 2022-08-07 11:14:24 -07:00
6 changed files with 200 additions and 13 deletions

View File

@ -425,6 +425,34 @@ class TestJSInterpreter(unittest.TestCase):
self._test(jsi, [''], args=['', '-']) self._test(jsi, [''], args=['', '-'])
self._test(jsi, [], args=['', '']) self._test(jsi, [], args=['', ''])
def test_slice(self):
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice()}', [0, 1, 2, 3, 4, 5, 6, 7, 8])
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(0)}', [0, 1, 2, 3, 4, 5, 6, 7, 8])
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(5)}', [5, 6, 7, 8])
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(99)}', [])
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(-2)}', [7, 8])
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(-99)}', [0, 1, 2, 3, 4, 5, 6, 7, 8])
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(0, 0)}', [])
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(1, 0)}', [])
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(0, 1)}', [0])
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(3, 6)}', [3, 4, 5])
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(1, -1)}', [1, 2, 3, 4, 5, 6, 7])
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(-1, 1)}', [])
self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(-3, -1)}', [6, 7])
self._test('function f(){return "012345678".slice()}', '012345678')
self._test('function f(){return "012345678".slice(0)}', '012345678')
self._test('function f(){return "012345678".slice(5)}', '5678')
self._test('function f(){return "012345678".slice(99)}', '')
self._test('function f(){return "012345678".slice(-2)}', '78')
self._test('function f(){return "012345678".slice(-99)}', '012345678')
self._test('function f(){return "012345678".slice(0, 0)}', '')
self._test('function f(){return "012345678".slice(1, 0)}', '')
self._test('function f(){return "012345678".slice(0, 1)}', '0')
self._test('function f(){return "012345678".slice(3, 6)}', '345')
self._test('function f(){return "012345678".slice(1, -1)}', '1234567')
self._test('function f(){return "012345678".slice(-1, 1)}', '')
self._test('function f(){return "012345678".slice(-3, -1)}', '67')
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View File

@ -174,6 +174,14 @@ _NSIG_TESTS = [
'https://www.youtube.com/s/player/5604538d/player_ias.vflset/en_US/base.js', 'https://www.youtube.com/s/player/5604538d/player_ias.vflset/en_US/base.js',
'7X-he4jjvMx7BCX', 'sViSydX8IHtdWA', '7X-he4jjvMx7BCX', 'sViSydX8IHtdWA',
), ),
(
'https://www.youtube.com/s/player/20dfca59/player_ias.vflset/en_US/base.js',
'-fLCxedkAk4LUTK2', 'O8kfRq1y1eyHGw',
),
(
'https://www.youtube.com/s/player/b12cc44b/player_ias.vflset/en_US/base.js',
'keLa5R2U00sR9SQK', 'N1OGyujjEwMnLw',
),
] ]

View File

@ -917,6 +917,7 @@ from .palcomp3 import (
PalcoMP3VideoIE, PalcoMP3VideoIE,
) )
from .pandoratv import PandoraTVIE from .pandoratv import PandoraTVIE
from .parler import ParlerIE
from .parliamentliveuk import ParliamentLiveUKIE from .parliamentliveuk import ParliamentLiveUKIE
from .patreon import PatreonIE from .patreon import PatreonIE
from .pbs import PBSIE from .pbs import PBSIE

View File

@ -0,0 +1,114 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from .youtube import YoutubeIE
from ..utils import (
clean_html,
int_or_none,
strip_or_none,
try_get,
unified_timestamp,
urlencode_postdata,
)
class ParlerIE(InfoExtractor):
IE_DESC = 'Posts on parler.com'
_VALID_URL = r'https://parler\.com/feed/(?P<id>[0-9a-f]{8}-(?:[0-9a-f]{4}-){3}[0-9a-f]{12})'
_TESTS = [
{
'url': 'https://parler.com/feed/df79fdba-07cc-48fe-b085-3293897520d7',
'md5': '16e0f447bf186bb3cf64de5bbbf4d22d',
'info_dict': {
'id': 'df79fdba-07cc-48fe-b085-3293897520d7',
'ext': 'mp4',
'thumbnail': 'https://bl-images.parler.com/videos/6ce7cdf3-a27a-4d72-bf9c-d3e17ce39a66/thumbnail.jpeg',
'title': 'Parler video #df79fdba-07cc-48fe-b085-3293897520d7',
'description': 'md5:6f220bde2df4a97cbb89ac11f1fd8197',
'timestamp': 1659744000,
'upload_date': '20220806',
'uploader': 'Tulsi Gabbard',
'uploader_id': 'TulsiGabbard',
'uploader_url': 'https://parler.com/TulsiGabbard',
'view_count': int,
'comment_count': int,
'repost_count': int,
},
},
{
'url': 'https://parler.com/feed/a7406eb4-91e5-4793-b5e3-ade57a24e287',
'md5': '11687e2f5bb353682cee338d181422ed',
'info_dict': {
'id': 'a7406eb4-91e5-4793-b5e3-ade57a24e287',
'ext': 'mp4',
'thumbnail': 'https://bl-images.parler.com/videos/317827a8-1e48-4cbc-981f-7dd17d4c1183/thumbnail.jpeg',
'title': 'Parler video #a7406eb4-91e5-4793-b5e3-ade57a24e287',
'description': 'This man should run for office',
'timestamp': 1659657600,
'upload_date': '20220805',
'uploader': 'Benny Johnson',
'uploader_id': 'BennyJohnson',
'uploader_url': 'https://parler.com/BennyJohnson',
'view_count': int,
'comment_count': int,
'repost_count': int,
},
},
{
'url': 'https://parler.com/feed/f23b85c1-6558-470f-b9ff-02c145f28da5',
'md5': 'eaba1ff4a10fe281f5ce74e930ab2cb4',
'info_dict': {
'id': 'r5vkSaz8PxQ',
'ext': 'mp4',
'thumbnail': 'https://i.ytimg.com/vi_webp/r5vkSaz8PxQ/maxresdefault.webp',
'title': 'Tom MacDonald Names Reaction',
'description': 'md5:33c21f0d35ae6dc2edf3007d6696baea',
'upload_date': '20220716',
'duration': 1267,
'uploader': 'Mahesh Chookolingo',
'uploader_id': 'maheshchookolingo',
'uploader_url': 'http://www.youtube.com/user/maheshchookolingo',
'channel': 'Mahesh Chookolingo',
'channel_id': 'UCox6YeMSY1PQInbCtTaZj_w',
'channel_url': 'https://www.youtube.com/channel/UCox6YeMSY1PQInbCtTaZj_w',
'categories': ['Entertainment'],
'tags': list,
'availability': 'public',
'live_status': 'not_live',
'view_count': int,
'comment_count': int,
'like_count': int,
'channel_follower_count': int,
'age_limit': 0,
'playable_in_embed': True,
},
'add_ie': ['Youtube'],
},
]
def _real_extract(self, url):
video_id = self._match_id(url)
data = self._download_json(
'https://parler.com/open-api/ParleyDetailEndpoint.php', video_id,
data=urlencode_postdata({'uuid': video_id}))['data'][0]
primary = try_get(data, lambda x: x['primary'], dict) or {}
embed = self._parse_json(primary.get('V2LINKLONG') or '', video_id, fatal=False)
if embed:
return self.url_result(embed[0], YoutubeIE.ie_key())
return {
'id': video_id,
'url': primary['video_data']['videoSrc'],
'thumbnail': primary['video_data']['thumbnailUrl'],
'title': "Parler video #%s" % video_id,
'description': strip_or_none(clean_html(primary.get('full_body'))) or None,
'timestamp': unified_timestamp(primary.get('date_created')),
'uploader': strip_or_none(primary.get('name')),
'uploader_id': strip_or_none(primary.get('username')),
'uploader_url': 'https://parler.com/%s' % strip_or_none(primary.get('username')),
'view_count': int_or_none(primary.get('view_count')),
'comment_count': int_or_none(data['engagement']['commentCount']),
'repost_count': int_or_none(data['engagement']['echoCount']),
}

View File

@ -1659,17 +1659,46 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
def _extract_n_function_name(self, jscode): def _extract_n_function_name(self, jscode):
func_name, idx = self._search_regex( func_name, idx = self._search_regex(
# new: (b=String.fromCharCode(110),c=a.get(b))&&c=nfunc[idx](c) # new: (b=String.fromCharCode(110),c=a.get(b))&&c=nfunc[idx](c)
# or: (b="nn"[+a.D],c=a.get(b))&&(c=nfunc[idx](c)s # or: (b="nn"[+a.D],c=a.get(b))&&(c=nfunc[idx](c)
# old: .get("n"))&&(b=nfunc[idx](b) # or: (PL(a),b=a.j.n||null)&&(b=nfunc[idx](b)
# older: .get("n"))&&(b=nfunc(b) # or: (b="nn"[+a.D],vL(a),c=a.j[b]||null)&&(c=narray[idx](c),a.set(b,c),narray.length||nfunc("")
# old: (b=a.get("n"))&&(b=nfunc[idx](b)(?P<c>[a-z])\s*=\s*[a-z]\s*
# older: (b=a.get("n"))&&(b=nfunc(b)
r'''(?x) r'''(?x)
(?:\(\s*(?P<b>[a-z])\s*=\s*(?: \((?:[\w$()\s]+,)*?\s* # (
String\s*\.\s*fromCharCode\s*\(\s*110\s*\)| (?P<b>[a-z])\s*=\s* # b=
"n+"\[\s*\+?s*[\w$.]+\s*] (?:
)\s*,(?P<c>[a-z])\s*=\s*[a-z]\s*)? (?: # expect ,c=a.get(b) (etc)
\.\s*get\s*\(\s*(?(b)(?P=b)|"n{1,2}")(?:\s*\)){2}\s*&&\s*\(\s*(?(c)(?P=c)|b)\s*=\s* String\s*\.\s*fromCharCode\s*\(\s*110\s*\)|
(?P<nfunc>[a-zA-Z_$][\w$]*)(?:\s*\[(?P<idx>\d+)\])?\s*\(\s*[\w$]+\s*\) "n+"\[\s*\+?s*[\w$.]+\s*]
''', jscode, 'Initial JS player n function name', group=('nfunc', 'idx')) )\s*(?:,[\w$()\s]+(?=,))*|
(?P<old>[\w$]+) # a (old[er])
)\s*
(?(old)
# b.get("n")
(?:\.\s*[\w$]+\s*|\[\s*[\w$]+\s*]\s*)*?
(?:\.\s*n|\[\s*"n"\s*]|\.\s*get\s*\(\s*"n"\s*\))
| # ,c=a.get(b)
,\s*(?P<c>[a-z])\s*=\s*[a-z]\s*
(?:\.\s*[\w$]+\s*|\[\s*[\w$]+\s*]\s*)*?
(?:\[\s*(?P=b)\s*]|\.\s*get\s*\(\s*(?P=b)\s*\))
)
# interstitial junk
\s*(?:\|\|\s*null\s*)?(?:\)\s*)?&&\s*(?:\(\s*)?
(?(c)(?P=c)|(?P=b))\s*=\s* # [c|b]=
# nfunc|nfunc[idx]
(?P<nfunc>[a-zA-Z_$][\w$]*)(?:\s*\[(?P<idx>\d+)\])?\s*\(\s*[\w$]+\s*\)
''', jscode, 'Initial JS player n function name', group=('nfunc', 'idx'),
default=(None, None))
# thx bashonly: yt-dlp/yt-dlp/pull/10611
if not func_name:
self.report_warning('Falling back to generic n function search')
return self._search_regex(
r'''(?xs)
(?:(?<=[^\w$])|^) # instead of \b, which ignores $
(?P<name>(?!\d)[a-zA-Z\d_$]+)\s*=\s*function\((?!\d)[a-zA-Z\d_$]+\)
\s*\{(?:(?!};).)+?["']enhanced_except_
''', jscode, 'Initial JS player n function name', group='name')
if not idx: if not idx:
return func_name return func_name

View File

@ -925,9 +925,16 @@ class JSInterpreter(object):
obj.reverse() obj.reverse()
return obj return obj
elif member == 'slice': elif member == 'slice':
assertion(isinstance(obj, list), 'must be applied on a list') assertion(isinstance(obj, (list, compat_str)), 'must be applied on a list or string')
assertion(len(argvals) == 1, 'takes exactly one argument') # From [1]:
return obj[argvals[0]:] # .slice() - like [:]
# .slice(n) - like [n:] (not [slice(n)]
# .slice(m, n) - like [m:n] or [slice(m, n)]
# [1] https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Array/slice
assertion(len(argvals) <= 2, 'takes between 0 and 2 arguments')
if len(argvals) < 2:
argvals += (None,)
return obj[slice(*argvals)]
elif member == 'splice': elif member == 'splice':
assertion(isinstance(obj, list), 'must be applied on a list') assertion(isinstance(obj, list), 'must be applied on a list')
assertion(argvals, 'takes one or more arguments') assertion(argvals, 'takes one or more arguments')