mirror of
https://github.com/ytdl-org/youtube-dl.git
synced 2024-12-20 23:52:07 +00:00
Compare commits
No commits in common. "1fa8b86f0b95f2e1488042ceeda8f356ea2a5448" and "47214e46d852e9d7ddf81d69a8e70806e2396c6c" have entirely different histories.
1fa8b86f0b
...
47214e46d8
@ -4,7 +4,6 @@ from inspect import getsource
|
|||||||
import io
|
import io
|
||||||
import os
|
import os
|
||||||
from os.path import dirname as dirn
|
from os.path import dirname as dirn
|
||||||
import re
|
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
print('WARNING: Lazy loading extractors is an experimental feature that may not always work', file=sys.stderr)
|
print('WARNING: Lazy loading extractors is an experimental feature that may not always work', file=sys.stderr)
|
||||||
@ -30,18 +29,11 @@ from youtube_dl.extractor.common import InfoExtractor, SearchInfoExtractor
|
|||||||
with open('devscripts/lazy_load_template.py', 'rt') as f:
|
with open('devscripts/lazy_load_template.py', 'rt') as f:
|
||||||
module_template = f.read()
|
module_template = f.read()
|
||||||
|
|
||||||
|
|
||||||
def get_source(m):
|
|
||||||
return re.sub(r'(?m)^\s*#.*\n', '', getsource(m))
|
|
||||||
|
|
||||||
|
|
||||||
module_contents = [
|
module_contents = [
|
||||||
module_template,
|
module_template + '\n' + getsource(InfoExtractor.suitable) + '\n',
|
||||||
get_source(InfoExtractor.suitable),
|
|
||||||
get_source(InfoExtractor._match_valid_url) + '\n',
|
|
||||||
'class LazyLoadSearchExtractor(LazyLoadExtractor):\n pass\n',
|
'class LazyLoadSearchExtractor(LazyLoadExtractor):\n pass\n',
|
||||||
# needed for suitable() methods of Youtube extractor (see #28780)
|
# needed for suitable() methods of Youtube extractor (see #28780)
|
||||||
'from youtube_dl.utils import parse_qs, variadic\n',
|
'from youtube_dl.utils import parse_qs\n',
|
||||||
]
|
]
|
||||||
|
|
||||||
ie_template = '''
|
ie_template = '''
|
||||||
@ -74,7 +66,7 @@ def build_lazy_ie(ie, name):
|
|||||||
valid_url=valid_url,
|
valid_url=valid_url,
|
||||||
module=ie.__module__)
|
module=ie.__module__)
|
||||||
if ie.suitable.__func__ is not InfoExtractor.suitable.__func__:
|
if ie.suitable.__func__ is not InfoExtractor.suitable.__func__:
|
||||||
s += '\n' + get_source(ie.suitable)
|
s += '\n' + getsource(ie.suitable)
|
||||||
if hasattr(ie, '_make_valid_url'):
|
if hasattr(ie, '_make_valid_url'):
|
||||||
# search extractors
|
# search extractors
|
||||||
s += make_valid_template.format(valid_url=ie._make_valid_url())
|
s += make_valid_template.format(valid_url=ie._make_valid_url())
|
||||||
|
@ -7,32 +7,14 @@ import io
|
|||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
import threading
|
from test.helper import FakeYDL, expect_dict, expect_value, http_server_port
|
||||||
|
from youtube_dl.compat import compat_etree_fromstring, compat_http_server
|
||||||
from test.helper import (
|
|
||||||
expect_dict,
|
|
||||||
expect_value,
|
|
||||||
FakeYDL,
|
|
||||||
http_server_port,
|
|
||||||
)
|
|
||||||
from youtube_dl.compat import (
|
|
||||||
compat_etree_fromstring,
|
|
||||||
compat_http_server,
|
|
||||||
)
|
|
||||||
from youtube_dl.extractor.common import InfoExtractor
|
from youtube_dl.extractor.common import InfoExtractor
|
||||||
from youtube_dl.extractor import (
|
from youtube_dl.extractor import YoutubeIE, get_info_extractor
|
||||||
get_info_extractor,
|
from youtube_dl.utils import encode_data_uri, strip_jsonp, ExtractorError, RegexNotFoundError
|
||||||
YoutubeIE,
|
import threading
|
||||||
)
|
|
||||||
from youtube_dl.utils import (
|
|
||||||
encode_data_uri,
|
|
||||||
ExtractorError,
|
|
||||||
RegexNotFoundError,
|
|
||||||
strip_jsonp,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
TEAPOT_RESPONSE_STATUS = 418
|
TEAPOT_RESPONSE_STATUS = 418
|
||||||
@ -118,71 +100,6 @@ class TestInfoExtractor(unittest.TestCase):
|
|||||||
self.assertRaises(RegexNotFoundError, ie._html_search_meta, 'z', html, None, fatal=True)
|
self.assertRaises(RegexNotFoundError, ie._html_search_meta, 'z', html, None, fatal=True)
|
||||||
self.assertRaises(RegexNotFoundError, ie._html_search_meta, ('z', 'x'), html, None, fatal=True)
|
self.assertRaises(RegexNotFoundError, ie._html_search_meta, ('z', 'x'), html, None, fatal=True)
|
||||||
|
|
||||||
def test_search_nextjs_data(self):
|
|
||||||
html = '''
|
|
||||||
<!DOCTYPE html>
|
|
||||||
<html>
|
|
||||||
<head>
|
|
||||||
<meta http-equiv="content-type" content=
|
|
||||||
"text/html; charset=utf-8">
|
|
||||||
<meta name="viewport" content="width=device-width">
|
|
||||||
<title>Test _search_nextjs_data()</title>
|
|
||||||
</head>
|
|
||||||
<body>
|
|
||||||
<div id="__next">
|
|
||||||
<div style="background-color:#17171E" class="FU" dir="ltr">
|
|
||||||
<div class="sc-93de261d-0 dyzzYE">
|
|
||||||
<div>
|
|
||||||
<header class="HD"></header>
|
|
||||||
<main class="MN">
|
|
||||||
<div style="height:0" class="HT0">
|
|
||||||
<div style="width:NaN%" data-testid=
|
|
||||||
"stream-container" class="WDN"></div>
|
|
||||||
</div>
|
|
||||||
</main>
|
|
||||||
</div>
|
|
||||||
<footer class="sc-6e5faf91-0 dEGaHS"></footer>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
<script id="__NEXT_DATA__" type="application/json">
|
|
||||||
{"props":{"pageProps":{"video":{"id":"testid"}}}}
|
|
||||||
</script>
|
|
||||||
</body>
|
|
||||||
</html>
|
|
||||||
'''
|
|
||||||
search = self.ie._search_nextjs_data(html, 'testID')
|
|
||||||
self.assertEqual(search['props']['pageProps']['video']['id'], 'testid')
|
|
||||||
|
|
||||||
def test_search_nuxt_data(self):
|
|
||||||
html = '''
|
|
||||||
<!DOCTYPE html>
|
|
||||||
<html>
|
|
||||||
<head>
|
|
||||||
<meta http-equiv="content-type" content=
|
|
||||||
"text/html; charset=utf-8">
|
|
||||||
<title>Nuxt.js Test Page</title>
|
|
||||||
<meta name="viewport" content=
|
|
||||||
"width=device-width, initial-scale=1">
|
|
||||||
<meta data-hid="robots" name="robots" content="all">
|
|
||||||
</head>
|
|
||||||
<body class="BD">
|
|
||||||
<div id="__layout">
|
|
||||||
<h1 class="H1">Example heading</h1>
|
|
||||||
<div class="IN">
|
|
||||||
<p>Decoy text</p>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
<script>
|
|
||||||
window.__NUXT__=(function(a,b,c,d,e,f,g,h){return {decoy:" default",data:[{track:{id:f,title:g}}]}}(null,null,"c",null,null,"testid","Nuxt.js title",null));
|
|
||||||
</script>
|
|
||||||
<script src="/_nuxt/a12345b.js" defer="defer"></script>
|
|
||||||
</body>
|
|
||||||
</html>
|
|
||||||
'''
|
|
||||||
search = self.ie._search_nuxt_data(html, 'testID')
|
|
||||||
self.assertEqual(search['track']['id'], 'testid')
|
|
||||||
|
|
||||||
def test_search_json_ld_realworld(self):
|
def test_search_json_ld_realworld(self):
|
||||||
# https://github.com/ytdl-org/youtube-dl/issues/23306
|
# https://github.com/ytdl-org/youtube-dl/issues/23306
|
||||||
expect_dict(
|
expect_dict(
|
||||||
@ -431,24 +348,6 @@ class TestInfoExtractor(unittest.TestCase):
|
|||||||
}],
|
}],
|
||||||
})
|
})
|
||||||
|
|
||||||
# from https://0000.studio/
|
|
||||||
# with type attribute but without extension in URL
|
|
||||||
expect_dict(
|
|
||||||
self,
|
|
||||||
self.ie._parse_html5_media_entries(
|
|
||||||
'https://0000.studio',
|
|
||||||
r'''
|
|
||||||
<video src="https://d1ggyt9m8pwf3g.cloudfront.net/protected/ap-northeast-1:1864af40-28d5-492b-b739-b32314b1a527/archive/clip/838db6a7-8973-4cd6-840d-8517e4093c92"
|
|
||||||
controls="controls" type="video/mp4" preload="metadata" autoplay="autoplay" playsinline class="object-contain">
|
|
||||||
</video>
|
|
||||||
''', None)[0],
|
|
||||||
{
|
|
||||||
'formats': [{
|
|
||||||
'url': 'https://d1ggyt9m8pwf3g.cloudfront.net/protected/ap-northeast-1:1864af40-28d5-492b-b739-b32314b1a527/archive/clip/838db6a7-8973-4cd6-840d-8517e4093c92',
|
|
||||||
'ext': 'mp4',
|
|
||||||
}],
|
|
||||||
})
|
|
||||||
|
|
||||||
def test_extract_jwplayer_data_realworld(self):
|
def test_extract_jwplayer_data_realworld(self):
|
||||||
# from http://www.suffolk.edu/sjc/
|
# from http://www.suffolk.edu/sjc/
|
||||||
expect_dict(
|
expect_dict(
|
||||||
|
@ -20,7 +20,7 @@ import xml.etree.ElementTree
|
|||||||
from youtube_dl.utils import (
|
from youtube_dl.utils import (
|
||||||
age_restricted,
|
age_restricted,
|
||||||
args_to_str,
|
args_to_str,
|
||||||
base_url,
|
encode_base_n,
|
||||||
caesar,
|
caesar,
|
||||||
clean_html,
|
clean_html,
|
||||||
clean_podcast_url,
|
clean_podcast_url,
|
||||||
@ -29,12 +29,10 @@ from youtube_dl.utils import (
|
|||||||
detect_exe_version,
|
detect_exe_version,
|
||||||
determine_ext,
|
determine_ext,
|
||||||
dict_get,
|
dict_get,
|
||||||
encode_base_n,
|
|
||||||
encode_compat_str,
|
encode_compat_str,
|
||||||
encodeFilename,
|
encodeFilename,
|
||||||
escape_rfc3986,
|
escape_rfc3986,
|
||||||
escape_url,
|
escape_url,
|
||||||
expand_path,
|
|
||||||
extract_attributes,
|
extract_attributes,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
find_xpath_attr,
|
find_xpath_attr,
|
||||||
@ -53,7 +51,6 @@ from youtube_dl.utils import (
|
|||||||
js_to_json,
|
js_to_json,
|
||||||
LazyList,
|
LazyList,
|
||||||
limit_length,
|
limit_length,
|
||||||
lowercase_escape,
|
|
||||||
merge_dicts,
|
merge_dicts,
|
||||||
mimetype2ext,
|
mimetype2ext,
|
||||||
month_by_name,
|
month_by_name,
|
||||||
@ -69,16 +66,17 @@ from youtube_dl.utils import (
|
|||||||
parse_resolution,
|
parse_resolution,
|
||||||
parse_bitrate,
|
parse_bitrate,
|
||||||
pkcs1pad,
|
pkcs1pad,
|
||||||
prepend_extension,
|
|
||||||
read_batch_urls,
|
read_batch_urls,
|
||||||
remove_start,
|
|
||||||
remove_end,
|
|
||||||
remove_quotes,
|
|
||||||
replace_extension,
|
|
||||||
rot47,
|
|
||||||
sanitize_filename,
|
sanitize_filename,
|
||||||
sanitize_path,
|
sanitize_path,
|
||||||
sanitize_url,
|
sanitize_url,
|
||||||
|
expand_path,
|
||||||
|
prepend_extension,
|
||||||
|
replace_extension,
|
||||||
|
remove_start,
|
||||||
|
remove_end,
|
||||||
|
remove_quotes,
|
||||||
|
rot47,
|
||||||
shell_quote,
|
shell_quote,
|
||||||
smuggle_url,
|
smuggle_url,
|
||||||
str_or_none,
|
str_or_none,
|
||||||
@ -95,8 +93,10 @@ from youtube_dl.utils import (
|
|||||||
unified_timestamp,
|
unified_timestamp,
|
||||||
unsmuggle_url,
|
unsmuggle_url,
|
||||||
uppercase_escape,
|
uppercase_escape,
|
||||||
|
lowercase_escape,
|
||||||
url_basename,
|
url_basename,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
|
base_url,
|
||||||
urljoin,
|
urljoin,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
urshift,
|
urshift,
|
||||||
@ -905,85 +905,6 @@ class TestUtil(unittest.TestCase):
|
|||||||
)
|
)
|
||||||
self.assertEqual(escape_url('http://vimeo.com/56015672#at=0'), 'http://vimeo.com/56015672#at=0')
|
self.assertEqual(escape_url('http://vimeo.com/56015672#at=0'), 'http://vimeo.com/56015672#at=0')
|
||||||
|
|
||||||
def test_js_to_json_vars_strings(self):
|
|
||||||
self.assertDictEqual(
|
|
||||||
json.loads(js_to_json(
|
|
||||||
'''{
|
|
||||||
'null': a,
|
|
||||||
'nullStr': b,
|
|
||||||
'true': c,
|
|
||||||
'trueStr': d,
|
|
||||||
'false': e,
|
|
||||||
'falseStr': f,
|
|
||||||
'unresolvedVar': g,
|
|
||||||
}''',
|
|
||||||
{
|
|
||||||
'a': 'null',
|
|
||||||
'b': '"null"',
|
|
||||||
'c': 'true',
|
|
||||||
'd': '"true"',
|
|
||||||
'e': 'false',
|
|
||||||
'f': '"false"',
|
|
||||||
'g': 'var',
|
|
||||||
}
|
|
||||||
)),
|
|
||||||
{
|
|
||||||
'null': None,
|
|
||||||
'nullStr': 'null',
|
|
||||||
'true': True,
|
|
||||||
'trueStr': 'true',
|
|
||||||
'false': False,
|
|
||||||
'falseStr': 'false',
|
|
||||||
'unresolvedVar': 'var'
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
self.assertDictEqual(
|
|
||||||
json.loads(js_to_json(
|
|
||||||
'''{
|
|
||||||
'int': a,
|
|
||||||
'intStr': b,
|
|
||||||
'float': c,
|
|
||||||
'floatStr': d,
|
|
||||||
}''',
|
|
||||||
{
|
|
||||||
'a': '123',
|
|
||||||
'b': '"123"',
|
|
||||||
'c': '1.23',
|
|
||||||
'd': '"1.23"',
|
|
||||||
}
|
|
||||||
)),
|
|
||||||
{
|
|
||||||
'int': 123,
|
|
||||||
'intStr': '123',
|
|
||||||
'float': 1.23,
|
|
||||||
'floatStr': '1.23',
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
self.assertDictEqual(
|
|
||||||
json.loads(js_to_json(
|
|
||||||
'''{
|
|
||||||
'object': a,
|
|
||||||
'objectStr': b,
|
|
||||||
'array': c,
|
|
||||||
'arrayStr': d,
|
|
||||||
}''',
|
|
||||||
{
|
|
||||||
'a': '{}',
|
|
||||||
'b': '"{}"',
|
|
||||||
'c': '[]',
|
|
||||||
'd': '"[]"',
|
|
||||||
}
|
|
||||||
)),
|
|
||||||
{
|
|
||||||
'object': {},
|
|
||||||
'objectStr': '{}',
|
|
||||||
'array': [],
|
|
||||||
'arrayStr': '[]',
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
def test_js_to_json_realworld(self):
|
def test_js_to_json_realworld(self):
|
||||||
inp = '''{
|
inp = '''{
|
||||||
'clip':{'provider':'pseudo'}
|
'clip':{'provider':'pseudo'}
|
||||||
@ -1054,10 +975,10 @@ class TestUtil(unittest.TestCase):
|
|||||||
!42: 42
|
!42: 42
|
||||||
}''')
|
}''')
|
||||||
self.assertEqual(json.loads(on), {
|
self.assertEqual(json.loads(on), {
|
||||||
'a': True,
|
'a': 0,
|
||||||
'b': False,
|
'b': 1,
|
||||||
'c': False,
|
'c': 0,
|
||||||
'd': True,
|
'd': 42.42,
|
||||||
'e': [],
|
'e': [],
|
||||||
'f': "abc",
|
'f': "abc",
|
||||||
'g': "",
|
'g': "",
|
||||||
@ -1127,26 +1048,10 @@ class TestUtil(unittest.TestCase):
|
|||||||
on = js_to_json('{ "040": "040" }')
|
on = js_to_json('{ "040": "040" }')
|
||||||
self.assertEqual(json.loads(on), {'040': '040'})
|
self.assertEqual(json.loads(on), {'040': '040'})
|
||||||
|
|
||||||
on = js_to_json('[1,//{},\n2]')
|
|
||||||
self.assertEqual(json.loads(on), [1, 2])
|
|
||||||
|
|
||||||
on = js_to_json(r'"\^\$\#"')
|
|
||||||
self.assertEqual(json.loads(on), R'^$#', msg='Unnecessary escapes should be stripped')
|
|
||||||
|
|
||||||
on = js_to_json('\'"\\""\'')
|
|
||||||
self.assertEqual(json.loads(on), '"""', msg='Unnecessary quote escape should be escaped')
|
|
||||||
|
|
||||||
def test_js_to_json_malformed(self):
|
def test_js_to_json_malformed(self):
|
||||||
self.assertEqual(js_to_json('42a1'), '42"a1"')
|
self.assertEqual(js_to_json('42a1'), '42"a1"')
|
||||||
self.assertEqual(js_to_json('42a-1'), '42"a"-1')
|
self.assertEqual(js_to_json('42a-1'), '42"a"-1')
|
||||||
|
|
||||||
def test_js_to_json_template_literal(self):
|
|
||||||
self.assertEqual(js_to_json('`Hello ${name}`', {'name': '"world"'}), '"Hello world"')
|
|
||||||
self.assertEqual(js_to_json('`${name}${name}`', {'name': '"X"'}), '"XX"')
|
|
||||||
self.assertEqual(js_to_json('`${name}${name}`', {'name': '5'}), '"55"')
|
|
||||||
self.assertEqual(js_to_json('`${name}"${name}"`', {'name': '5'}), '"5\\"5\\""')
|
|
||||||
self.assertEqual(js_to_json('`${name}`', {}), '"name"')
|
|
||||||
|
|
||||||
def test_extract_attributes(self):
|
def test_extract_attributes(self):
|
||||||
self.assertEqual(extract_attributes('<e x="y">'), {'x': 'y'})
|
self.assertEqual(extract_attributes('<e x="y">'), {'x': 'y'})
|
||||||
self.assertEqual(extract_attributes("<e x='y'>"), {'x': 'y'})
|
self.assertEqual(extract_attributes("<e x='y'>"), {'x': 'y'})
|
||||||
@ -1681,11 +1586,6 @@ Line 1
|
|||||||
'dict': {},
|
'dict': {},
|
||||||
}
|
}
|
||||||
|
|
||||||
# define a pukka Iterable
|
|
||||||
def iter_range(stop):
|
|
||||||
for from_ in range(stop):
|
|
||||||
yield from_
|
|
||||||
|
|
||||||
# Test base functionality
|
# Test base functionality
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, ('str',)), 'str',
|
self.assertEqual(traverse_obj(_TEST_DATA, ('str',)), 'str',
|
||||||
msg='allow tuple path')
|
msg='allow tuple path')
|
||||||
@ -1702,13 +1602,13 @@ Line 1
|
|||||||
# Test Ellipsis behavior
|
# Test Ellipsis behavior
|
||||||
self.assertCountEqual(traverse_obj(_TEST_DATA, Ellipsis),
|
self.assertCountEqual(traverse_obj(_TEST_DATA, Ellipsis),
|
||||||
(item for item in _TEST_DATA.values() if item not in (None, {})),
|
(item for item in _TEST_DATA.values() if item not in (None, {})),
|
||||||
msg='`...` should give all non-discarded values')
|
msg='`...` should give all non discarded values')
|
||||||
self.assertCountEqual(traverse_obj(_TEST_DATA, ('urls', 0, Ellipsis)), _TEST_DATA['urls'][0].values(),
|
self.assertCountEqual(traverse_obj(_TEST_DATA, ('urls', 0, Ellipsis)), _TEST_DATA['urls'][0].values(),
|
||||||
msg='`...` selection for dicts should select all values')
|
msg='`...` selection for dicts should select all values')
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, (Ellipsis, Ellipsis, 'url')),
|
self.assertEqual(traverse_obj(_TEST_DATA, (Ellipsis, Ellipsis, 'url')),
|
||||||
['https://www.example.com/0', 'https://www.example.com/1'],
|
['https://www.example.com/0', 'https://www.example.com/1'],
|
||||||
msg='nested `...` queries should work')
|
msg='nested `...` queries should work')
|
||||||
self.assertCountEqual(traverse_obj(_TEST_DATA, (Ellipsis, Ellipsis, 'index')), iter_range(4),
|
self.assertCountEqual(traverse_obj(_TEST_DATA, (Ellipsis, Ellipsis, 'index')), range(4),
|
||||||
msg='`...` query result should be flattened')
|
msg='`...` query result should be flattened')
|
||||||
self.assertEqual(traverse_obj(iter(range(4)), Ellipsis), list(range(4)),
|
self.assertEqual(traverse_obj(iter(range(4)), Ellipsis), list(range(4)),
|
||||||
msg='`...` should accept iterables')
|
msg='`...` should accept iterables')
|
||||||
@ -1718,7 +1618,7 @@ Line 1
|
|||||||
[_TEST_DATA['urls']],
|
[_TEST_DATA['urls']],
|
||||||
msg='function as query key should perform a filter based on (key, value)')
|
msg='function as query key should perform a filter based on (key, value)')
|
||||||
self.assertCountEqual(traverse_obj(_TEST_DATA, lambda _, x: isinstance(x[0], str)), set(('str',)),
|
self.assertCountEqual(traverse_obj(_TEST_DATA, lambda _, x: isinstance(x[0], str)), set(('str',)),
|
||||||
msg='exceptions in the query function should be caught')
|
msg='exceptions in the query function should be catched')
|
||||||
self.assertEqual(traverse_obj(iter(range(4)), lambda _, x: x % 2 == 0), [0, 2],
|
self.assertEqual(traverse_obj(iter(range(4)), lambda _, x: x % 2 == 0), [0, 2],
|
||||||
msg='function key should accept iterables')
|
msg='function key should accept iterables')
|
||||||
if __debug__:
|
if __debug__:
|
||||||
@ -1806,7 +1706,7 @@ Line 1
|
|||||||
self.assertEqual(traverse_obj(_TEST_DATA, {0: 'dict'}), {},
|
self.assertEqual(traverse_obj(_TEST_DATA, {0: 'dict'}), {},
|
||||||
msg='remove empty values when dict key')
|
msg='remove empty values when dict key')
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, {0: 'dict'}, default=Ellipsis), {0: Ellipsis},
|
self.assertEqual(traverse_obj(_TEST_DATA, {0: 'dict'}, default=Ellipsis), {0: Ellipsis},
|
||||||
msg='use `default` when dict key and a default')
|
msg='use `default` when dict key and `default`')
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, {0: {0: 'fail'}}), {},
|
self.assertEqual(traverse_obj(_TEST_DATA, {0: {0: 'fail'}}), {},
|
||||||
msg='remove empty values when nested dict key fails')
|
msg='remove empty values when nested dict key fails')
|
||||||
self.assertEqual(traverse_obj(None, {0: 'fail'}), {},
|
self.assertEqual(traverse_obj(None, {0: 'fail'}), {},
|
||||||
@ -1868,7 +1768,7 @@ Line 1
|
|||||||
self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=str),
|
self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=str),
|
||||||
'str', msg='accept matching `expected_type` type')
|
'str', msg='accept matching `expected_type` type')
|
||||||
self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=int),
|
self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=int),
|
||||||
None, msg='reject non-matching `expected_type` type')
|
None, msg='reject non matching `expected_type` type')
|
||||||
self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'int', expected_type=lambda x: str(x)),
|
self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'int', expected_type=lambda x: str(x)),
|
||||||
'0', msg='transform type using type function')
|
'0', msg='transform type using type function')
|
||||||
self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=lambda _: 1 / 0),
|
self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=lambda _: 1 / 0),
|
||||||
@ -1880,7 +1780,7 @@ Line 1
|
|||||||
self.assertEqual(traverse_obj(_TEST_DATA, {0: 100, 1: 1.2, 2: 'None'}, expected_type=str_or_none),
|
self.assertEqual(traverse_obj(_TEST_DATA, {0: 100, 1: 1.2, 2: 'None'}, expected_type=str_or_none),
|
||||||
{0: '100', 1: '1.2'}, msg='function as expected_type should transform dict values')
|
{0: '100', 1: '1.2'}, msg='function as expected_type should transform dict values')
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, ({0: 1.2}, 0, set((int_or_none,))), expected_type=int),
|
self.assertEqual(traverse_obj(_TEST_DATA, ({0: 1.2}, 0, set((int_or_none,))), expected_type=int),
|
||||||
1, msg='expected_type should not filter non-final dict values')
|
1, msg='expected_type should not filter non final dict values')
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, {0: {0: 100, 1: 'str'}}, expected_type=int),
|
self.assertEqual(traverse_obj(_TEST_DATA, {0: {0: 100, 1: 'str'}}, expected_type=int),
|
||||||
{0: {0: 100}}, msg='expected_type should transform deep dict values')
|
{0: {0: 100}}, msg='expected_type should transform deep dict values')
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, [({0: '...'}, {0: '...'})], expected_type=type(Ellipsis)),
|
self.assertEqual(traverse_obj(_TEST_DATA, [({0: '...'}, {0: '...'})], expected_type=type(Ellipsis)),
|
||||||
@ -1938,7 +1838,7 @@ Line 1
|
|||||||
self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', slice(0, None, 2)),
|
self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', slice(0, None, 2)),
|
||||||
_traverse_string=True), 'sr',
|
_traverse_string=True), 'sr',
|
||||||
msg='`slice` should result in string if `traverse_string`')
|
msg='`slice` should result in string if `traverse_string`')
|
||||||
self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', lambda i, v: i or v == 's'),
|
self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', lambda i, v: i or v == "s"),
|
||||||
_traverse_string=True), 'str',
|
_traverse_string=True), 'str',
|
||||||
msg='function should result in string if `traverse_string`')
|
msg='function should result in string if `traverse_string`')
|
||||||
self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', (0, 2)),
|
self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', (0, 2)),
|
||||||
|
@ -1,69 +0,0 @@
|
|||||||
# coding: utf-8
|
|
||||||
from __future__ import unicode_literals
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
|
||||||
from ..compat import compat_str
|
|
||||||
from ..utils import (
|
|
||||||
ExtractorError,
|
|
||||||
merge_dicts,
|
|
||||||
T,
|
|
||||||
traverse_obj,
|
|
||||||
unified_timestamp,
|
|
||||||
url_or_none,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class ClipchampIE(InfoExtractor):
|
|
||||||
_VALID_URL = r'https?://(?:www\.)?clipchamp\.com/watch/(?P<id>[\w-]+)'
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'https://clipchamp.com/watch/gRXZ4ZhdDaU',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'gRXZ4ZhdDaU',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Untitled video',
|
|
||||||
'uploader': 'Alexander Schwartz',
|
|
||||||
'timestamp': 1680805580,
|
|
||||||
'upload_date': '20230406',
|
|
||||||
'thumbnail': r're:^https?://.+\.jpg',
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
'skip_download': 'm3u8',
|
|
||||||
'format': 'bestvideo',
|
|
||||||
},
|
|
||||||
}]
|
|
||||||
|
|
||||||
_STREAM_URL_TMPL = 'https://%s.cloudflarestream.com/%s/manifest/video.%s'
|
|
||||||
_STREAM_URL_QUERY = {'parentOrigin': 'https://clipchamp.com'}
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
video_id = self._match_id(url)
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
|
||||||
data = self._search_nextjs_data(webpage, video_id)['props']['pageProps']['video']
|
|
||||||
|
|
||||||
storage_location = data.get('storage_location')
|
|
||||||
if storage_location != 'cf_stream':
|
|
||||||
raise ExtractorError('Unsupported clip storage location "%s"' % (storage_location,))
|
|
||||||
|
|
||||||
path = data['download_url']
|
|
||||||
iframe = self._download_webpage(
|
|
||||||
'https://iframe.cloudflarestream.com/' + path, video_id, 'Downloading player iframe')
|
|
||||||
subdomain = self._search_regex(
|
|
||||||
r'''\bcustomer-domain-prefix\s*=\s*("|')(?P<sd>[\w-]+)\1''', iframe,
|
|
||||||
'subdomain', group='sd', fatal=False) or 'customer-2ut9yn3y6fta1yxe'
|
|
||||||
|
|
||||||
formats = self._extract_mpd_formats(
|
|
||||||
self._STREAM_URL_TMPL % (subdomain, path, 'mpd'), video_id,
|
|
||||||
query=self._STREAM_URL_QUERY, fatal=False, mpd_id='dash')
|
|
||||||
formats.extend(self._extract_m3u8_formats(
|
|
||||||
self._STREAM_URL_TMPL % (subdomain, path, 'm3u8'), video_id, 'mp4',
|
|
||||||
query=self._STREAM_URL_QUERY, fatal=False, m3u8_id='hls'))
|
|
||||||
|
|
||||||
return merge_dicts({
|
|
||||||
'id': video_id,
|
|
||||||
'formats': formats,
|
|
||||||
'uploader': ' '.join(traverse_obj(data, ('creator', ('first_name', 'last_name'), T(compat_str)))) or None,
|
|
||||||
}, traverse_obj(data, {
|
|
||||||
'title': ('project', 'project_name', T(compat_str)),
|
|
||||||
'timestamp': ('created_at', T(unified_timestamp)),
|
|
||||||
'thumbnail': ('thumbnail_url', T(url_or_none)),
|
|
||||||
}), rev=True)
|
|
@ -3,7 +3,6 @@ from __future__ import unicode_literals
|
|||||||
|
|
||||||
import base64
|
import base64
|
||||||
import datetime
|
import datetime
|
||||||
import functools
|
|
||||||
import hashlib
|
import hashlib
|
||||||
import json
|
import json
|
||||||
import netrc
|
import netrc
|
||||||
@ -24,7 +23,6 @@ from ..compat import (
|
|||||||
compat_getpass,
|
compat_getpass,
|
||||||
compat_integer_types,
|
compat_integer_types,
|
||||||
compat_http_client,
|
compat_http_client,
|
||||||
compat_map as map,
|
|
||||||
compat_os_name,
|
compat_os_name,
|
||||||
compat_str,
|
compat_str,
|
||||||
compat_urllib_error,
|
compat_urllib_error,
|
||||||
@ -33,7 +31,6 @@ from ..compat import (
|
|||||||
compat_urllib_request,
|
compat_urllib_request,
|
||||||
compat_urlparse,
|
compat_urlparse,
|
||||||
compat_xml_parse_error,
|
compat_xml_parse_error,
|
||||||
compat_zip as zip,
|
|
||||||
)
|
)
|
||||||
from ..downloader.f4m import (
|
from ..downloader.f4m import (
|
||||||
get_base_url,
|
get_base_url,
|
||||||
@ -73,7 +70,6 @@ from ..utils import (
|
|||||||
str_or_none,
|
str_or_none,
|
||||||
str_to_int,
|
str_to_int,
|
||||||
strip_or_none,
|
strip_or_none,
|
||||||
traverse_obj,
|
|
||||||
try_get,
|
try_get,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
@ -83,7 +79,6 @@ from ..utils import (
|
|||||||
urljoin,
|
urljoin,
|
||||||
url_basename,
|
url_basename,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
variadic,
|
|
||||||
xpath_element,
|
xpath_element,
|
||||||
xpath_text,
|
xpath_text,
|
||||||
xpath_with_ns,
|
xpath_with_ns,
|
||||||
@ -372,22 +367,9 @@ class InfoExtractor(object):
|
|||||||
title, description etc.
|
title, description etc.
|
||||||
|
|
||||||
|
|
||||||
A subclass of InfoExtractor must be defined to handle each specific site (or
|
Subclasses of this one should re-define the _real_initialize() and
|
||||||
several sites). Such a concrete subclass should be added to the list of
|
_real_extract() methods and define a _VALID_URL regexp.
|
||||||
extractors. It should also:
|
Probably, they should also be added to the list of extractors.
|
||||||
* define its _VALID_URL attribute as a regexp, or a Sequence of alternative
|
|
||||||
regexps (but see below)
|
|
||||||
* re-define the _real_extract() method
|
|
||||||
* optionally re-define the _real_initialize() method.
|
|
||||||
|
|
||||||
An extractor subclass may also override suitable() if necessary, but the
|
|
||||||
function signature must be preserved and the function must import everything
|
|
||||||
it needs (except other extractors), so that lazy_extractors works correctly.
|
|
||||||
If the subclass's suitable() and _real_extract() functions avoid using
|
|
||||||
_VALID_URL, the subclass need not set that class attribute.
|
|
||||||
|
|
||||||
An abstract subclass of InfoExtractor may be used to simplify implementation
|
|
||||||
within an extractor module; it should not be added to the list of extractors.
|
|
||||||
|
|
||||||
_GEO_BYPASS attribute may be set to False in order to disable
|
_GEO_BYPASS attribute may be set to False in order to disable
|
||||||
geo restriction bypass mechanisms for a particular extractor.
|
geo restriction bypass mechanisms for a particular extractor.
|
||||||
@ -422,33 +404,22 @@ class InfoExtractor(object):
|
|||||||
self._x_forwarded_for_ip = None
|
self._x_forwarded_for_ip = None
|
||||||
self.set_downloader(downloader)
|
self.set_downloader(downloader)
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def __match_valid_url(cls, url):
|
|
||||||
# This does not use has/getattr intentionally - we want to know whether
|
|
||||||
# we have cached the regexp for cls, whereas getattr would also
|
|
||||||
# match its superclass
|
|
||||||
if '_VALID_URL_RE' not in cls.__dict__:
|
|
||||||
# _VALID_URL can now be a list/tuple of patterns
|
|
||||||
cls._VALID_URL_RE = tuple(map(re.compile, variadic(cls._VALID_URL)))
|
|
||||||
# 20% faster than next(filter(None, (p.match(url) for p in cls._VALID_URL_RE)), None) in 2.7
|
|
||||||
for p in cls._VALID_URL_RE:
|
|
||||||
p = p.match(url)
|
|
||||||
if p:
|
|
||||||
return p
|
|
||||||
|
|
||||||
# The public alias can safely be overridden, as in some back-ports
|
|
||||||
_match_valid_url = __match_valid_url
|
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def suitable(cls, url):
|
def suitable(cls, url):
|
||||||
"""Receives a URL and returns True if suitable for this IE."""
|
"""Receives a URL and returns True if suitable for this IE."""
|
||||||
# This function must import everything it needs (except other extractors),
|
|
||||||
# so that lazy_extractors works correctly
|
# This does not use has/getattr intentionally - we want to know whether
|
||||||
return cls.__match_valid_url(url) is not None
|
# we have cached the regexp for *this* class, whereas getattr would also
|
||||||
|
# match the superclass
|
||||||
|
if '_VALID_URL_RE' not in cls.__dict__:
|
||||||
|
cls._VALID_URL_RE = re.compile(cls._VALID_URL)
|
||||||
|
return cls._VALID_URL_RE.match(url) is not None
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def _match_id(cls, url):
|
def _match_id(cls, url):
|
||||||
m = cls.__match_valid_url(url)
|
if '_VALID_URL_RE' not in cls.__dict__:
|
||||||
|
cls._VALID_URL_RE = re.compile(cls._VALID_URL)
|
||||||
|
m = cls._VALID_URL_RE.match(url)
|
||||||
assert m
|
assert m
|
||||||
return compat_str(m.group('id'))
|
return compat_str(m.group('id'))
|
||||||
|
|
||||||
@ -1034,8 +1005,6 @@ class InfoExtractor(object):
|
|||||||
if group is None:
|
if group is None:
|
||||||
# return the first matching group
|
# return the first matching group
|
||||||
return next(g for g in mobj.groups() if g is not None)
|
return next(g for g in mobj.groups() if g is not None)
|
||||||
elif isinstance(group, (list, tuple)):
|
|
||||||
return tuple(mobj.group(g) for g in group)
|
|
||||||
else:
|
else:
|
||||||
return mobj.group(group)
|
return mobj.group(group)
|
||||||
elif default is not NO_DEFAULT:
|
elif default is not NO_DEFAULT:
|
||||||
@ -1051,9 +1020,10 @@ class InfoExtractor(object):
|
|||||||
Like _search_regex, but strips HTML tags and unescapes entities.
|
Like _search_regex, but strips HTML tags and unescapes entities.
|
||||||
"""
|
"""
|
||||||
res = self._search_regex(pattern, string, name, default, fatal, flags, group)
|
res = self._search_regex(pattern, string, name, default, fatal, flags, group)
|
||||||
if isinstance(res, tuple):
|
if res:
|
||||||
return tuple(map(clean_html, res))
|
return clean_html(res).strip()
|
||||||
return clean_html(res)
|
else:
|
||||||
|
return res
|
||||||
|
|
||||||
def _get_netrc_login_info(self, netrc_machine=None):
|
def _get_netrc_login_info(self, netrc_machine=None):
|
||||||
username = None
|
username = None
|
||||||
@ -1378,44 +1348,6 @@ class InfoExtractor(object):
|
|||||||
break
|
break
|
||||||
return dict((k, v) for k, v in info.items() if v is not None)
|
return dict((k, v) for k, v in info.items() if v is not None)
|
||||||
|
|
||||||
def _search_nextjs_data(self, webpage, video_id, **kw):
|
|
||||||
nkw = dict((k, v) for k, v in kw.items() if k in ('transform_source', 'fatal'))
|
|
||||||
kw.pop('transform_source', None)
|
|
||||||
next_data = self._search_regex(
|
|
||||||
r'''<script[^>]+\bid\s*=\s*('|")__NEXT_DATA__\1[^>]*>(?P<nd>[^<]+)</script>''',
|
|
||||||
webpage, 'next.js data', group='nd', **kw)
|
|
||||||
if not next_data:
|
|
||||||
return {}
|
|
||||||
return self._parse_json(next_data, video_id, **nkw)
|
|
||||||
|
|
||||||
def _search_nuxt_data(self, webpage, video_id, *args, **kwargs):
|
|
||||||
"""Parses Nuxt.js metadata. This works as long as the function __NUXT__ invokes is a pure function"""
|
|
||||||
|
|
||||||
# self, webpage, video_id, context_name='__NUXT__', *, fatal=True, traverse=('data', 0)
|
|
||||||
context_name = args[0] if len(args) > 0 else kwargs.get('context_name', '__NUXT__')
|
|
||||||
fatal = kwargs.get('fatal', True)
|
|
||||||
traverse = kwargs.get('traverse', ('data', 0))
|
|
||||||
|
|
||||||
re_ctx = re.escape(context_name)
|
|
||||||
|
|
||||||
FUNCTION_RE = (r'\(\s*function\s*\((?P<arg_keys>[\s\S]*?)\)\s*\{\s*'
|
|
||||||
r'return\s+(?P<js>\{[\s\S]*?})\s*;?\s*}\s*\((?P<arg_vals>[\s\S]*?)\)')
|
|
||||||
|
|
||||||
js, arg_keys, arg_vals = self._search_regex(
|
|
||||||
(p.format(re_ctx, FUNCTION_RE) for p in
|
|
||||||
(r'<script>\s*window\s*\.\s*{0}\s*=\s*{1}\s*\)\s*;?\s*</script>',
|
|
||||||
r'{0}\s*\([\s\S]*?{1}')),
|
|
||||||
webpage, context_name, group=('js', 'arg_keys', 'arg_vals'),
|
|
||||||
default=NO_DEFAULT if fatal else (None, None, None))
|
|
||||||
if js is None:
|
|
||||||
return {}
|
|
||||||
|
|
||||||
args = dict(zip(arg_keys.split(','), map(json.dumps, self._parse_json(
|
|
||||||
'[{0}]'.format(arg_vals), video_id, transform_source=js_to_json, fatal=fatal) or ())))
|
|
||||||
|
|
||||||
ret = self._parse_json(js, video_id, transform_source=functools.partial(js_to_json, vars=args), fatal=fatal)
|
|
||||||
return traverse_obj(ret, traverse) or {}
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _hidden_inputs(html):
|
def _hidden_inputs(html):
|
||||||
html = re.sub(r'<!--(?:(?!<!--).)*-->', '', html)
|
html = re.sub(r'<!--(?:(?!<!--).)*-->', '', html)
|
||||||
@ -2563,8 +2495,7 @@ class InfoExtractor(object):
|
|||||||
return f
|
return f
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
def _media_formats(src, cur_media_type, type_info=None):
|
def _media_formats(src, cur_media_type, type_info={}):
|
||||||
type_info = type_info or {}
|
|
||||||
full_url = absolute_url(src)
|
full_url = absolute_url(src)
|
||||||
ext = type_info.get('ext') or determine_ext(full_url)
|
ext = type_info.get('ext') or determine_ext(full_url)
|
||||||
if ext == 'm3u8':
|
if ext == 'm3u8':
|
||||||
@ -2582,7 +2513,6 @@ class InfoExtractor(object):
|
|||||||
formats = [{
|
formats = [{
|
||||||
'url': full_url,
|
'url': full_url,
|
||||||
'vcodec': 'none' if cur_media_type == 'audio' else None,
|
'vcodec': 'none' if cur_media_type == 'audio' else None,
|
||||||
'ext': ext,
|
|
||||||
}]
|
}]
|
||||||
return is_plain_url, formats
|
return is_plain_url, formats
|
||||||
|
|
||||||
@ -2591,7 +2521,7 @@ class InfoExtractor(object):
|
|||||||
# so we wll include them right here (see
|
# so we wll include them right here (see
|
||||||
# https://www.ampproject.org/docs/reference/components/amp-video)
|
# https://www.ampproject.org/docs/reference/components/amp-video)
|
||||||
# For dl8-* tags see https://delight-vr.com/documentation/dl8-video/
|
# For dl8-* tags see https://delight-vr.com/documentation/dl8-video/
|
||||||
_MEDIA_TAG_NAME_RE = r'(?:(?:amp|dl8(?:-live)?)-)?(video(?:-js)?|audio)'
|
_MEDIA_TAG_NAME_RE = r'(?:(?:amp|dl8(?:-live)?)-)?(video|audio)'
|
||||||
media_tags = [(media_tag, media_tag_name, media_type, '')
|
media_tags = [(media_tag, media_tag_name, media_type, '')
|
||||||
for media_tag, media_tag_name, media_type
|
for media_tag, media_tag_name, media_type
|
||||||
in re.findall(r'(?s)(<(%s)[^>]*/>)' % _MEDIA_TAG_NAME_RE, webpage)]
|
in re.findall(r'(?s)(<(%s)[^>]*/>)' % _MEDIA_TAG_NAME_RE, webpage)]
|
||||||
@ -2609,8 +2539,7 @@ class InfoExtractor(object):
|
|||||||
media_attributes = extract_attributes(media_tag)
|
media_attributes = extract_attributes(media_tag)
|
||||||
src = strip_or_none(media_attributes.get('src'))
|
src = strip_or_none(media_attributes.get('src'))
|
||||||
if src:
|
if src:
|
||||||
f = parse_content_type(media_attributes.get('type'))
|
_, formats = _media_formats(src, media_type)
|
||||||
_, formats = _media_formats(src, media_type, f)
|
|
||||||
media_info['formats'].extend(formats)
|
media_info['formats'].extend(formats)
|
||||||
media_info['thumbnail'] = absolute_url(media_attributes.get('poster'))
|
media_info['thumbnail'] = absolute_url(media_attributes.get('poster'))
|
||||||
if media_content:
|
if media_content:
|
||||||
|
@ -1,204 +0,0 @@
|
|||||||
# coding: utf-8
|
|
||||||
from __future__ import unicode_literals
|
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
|
||||||
from ..compat import (
|
|
||||||
compat_str,
|
|
||||||
)
|
|
||||||
from ..utils import (
|
|
||||||
determine_ext,
|
|
||||||
extract_attributes,
|
|
||||||
int_or_none,
|
|
||||||
merge_dicts,
|
|
||||||
traverse_obj,
|
|
||||||
url_or_none,
|
|
||||||
variadic,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class DLFBaseIE(InfoExtractor):
|
|
||||||
_VALID_URL_BASE = r'https?://(?:www\.)?deutschlandfunk\.de/'
|
|
||||||
_BUTTON_REGEX = r'(<button[^>]+alt="Anhören"[^>]+data-audio-diraid[^>]*>)'
|
|
||||||
|
|
||||||
def _parse_button_attrs(self, button, audio_id=None):
|
|
||||||
attrs = extract_attributes(button)
|
|
||||||
audio_id = audio_id or attrs['data-audio-diraid']
|
|
||||||
|
|
||||||
url = traverse_obj(
|
|
||||||
attrs, 'data-audio-download-src', 'data-audio', 'data-audioreference',
|
|
||||||
'data-audio-src', expected_type=url_or_none)
|
|
||||||
ext = determine_ext(url)
|
|
||||||
formats = (self._extract_m3u8_formats(url, audio_id, fatal=False)
|
|
||||||
if ext == 'm3u8' else [{'url': url, 'ext': ext, 'vcodec': 'none'}])
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
def traverse_attrs(path):
|
|
||||||
path = list(variadic(path))
|
|
||||||
t = path.pop() if callable(path[-1]) else None
|
|
||||||
return traverse_obj(attrs, path, expected_type=t, get_all=False)
|
|
||||||
|
|
||||||
def txt_or_none(v, default=None):
|
|
||||||
return default if v is None else (compat_str(v).strip() or default)
|
|
||||||
|
|
||||||
return merge_dicts(*reversed([{
|
|
||||||
'id': audio_id,
|
|
||||||
# 'extractor_key': DLFIE.ie_key(),
|
|
||||||
# 'extractor': DLFIE.IE_NAME,
|
|
||||||
'formats': formats,
|
|
||||||
}, dict((k, traverse_attrs(v)) for k, v in {
|
|
||||||
'title': (('data-audiotitle', 'data-audio-title', 'data-audio-download-tracking-title'), txt_or_none),
|
|
||||||
'duration': (('data-audioduration', 'data-audio-duration'), int_or_none),
|
|
||||||
'thumbnail': ('data-audioimage', url_or_none),
|
|
||||||
'uploader': 'data-audio-producer',
|
|
||||||
'series': 'data-audio-series',
|
|
||||||
'channel': 'data-audio-origin-site-name',
|
|
||||||
'webpage_url': ('data-audio-download-tracking-path', url_or_none),
|
|
||||||
}.items())]))
|
|
||||||
|
|
||||||
|
|
||||||
class DLFIE(DLFBaseIE):
|
|
||||||
IE_NAME = 'dlf'
|
|
||||||
_VALID_URL = DLFBaseIE._VALID_URL_BASE + r'[\w-]+-dlf-(?P<id>[\da-f]{8})-100\.html'
|
|
||||||
_TESTS = [
|
|
||||||
# Audio as an HLS stream
|
|
||||||
{
|
|
||||||
'url': 'https://www.deutschlandfunk.de/tanz-der-saiteninstrumente-das-wild-strings-trio-aus-slowenien-dlf-03a3eb19-100.html',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '03a3eb19',
|
|
||||||
'title': r're:Tanz der Saiteninstrumente [-/] Das Wild Strings Trio aus Slowenien',
|
|
||||||
'ext': 'm4a',
|
|
||||||
'duration': 3298,
|
|
||||||
'thumbnail': 'https://assets.deutschlandfunk.de/FALLBACK-IMAGE-AUDIO/512x512.png?t=1603714364673',
|
|
||||||
'uploader': 'Deutschlandfunk',
|
|
||||||
'series': 'On Stage',
|
|
||||||
'channel': 'deutschlandfunk'
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
'skip_download': 'm3u8'
|
|
||||||
},
|
|
||||||
'skip': 'This webpage no longer exists'
|
|
||||||
}, {
|
|
||||||
'url': 'https://www.deutschlandfunk.de/russische-athleten-kehren-zurueck-auf-die-sportbuehne-ein-gefaehrlicher-tueroeffner-dlf-d9cc1856-100.html',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'd9cc1856',
|
|
||||||
'title': 'Russische Athleten kehren zurück auf die Sportbühne: Ein gefährlicher Türöffner',
|
|
||||||
'ext': 'mp3',
|
|
||||||
'duration': 291,
|
|
||||||
'thumbnail': 'https://assets.deutschlandfunk.de/FALLBACK-IMAGE-AUDIO/512x512.png?t=1603714364673',
|
|
||||||
'uploader': 'Deutschlandfunk',
|
|
||||||
'series': 'Kommentare und Themen der Woche',
|
|
||||||
'channel': 'deutschlandfunk'
|
|
||||||
}
|
|
||||||
},
|
|
||||||
]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
audio_id = self._match_id(url)
|
|
||||||
webpage = self._download_webpage(url, audio_id)
|
|
||||||
|
|
||||||
return self._parse_button_attrs(
|
|
||||||
self._search_regex(self._BUTTON_REGEX, webpage, 'button'), audio_id)
|
|
||||||
|
|
||||||
|
|
||||||
class DLFCorpusIE(DLFBaseIE):
|
|
||||||
IE_NAME = 'dlf:corpus'
|
|
||||||
IE_DESC = 'DLF Multi-feed Archives'
|
|
||||||
_VALID_URL = DLFBaseIE._VALID_URL_BASE + r'(?P<id>(?![\w-]+-dlf-[\da-f]{8})[\w-]+-\d+)\.html'
|
|
||||||
_TESTS = [
|
|
||||||
# Recorded news broadcast with referrals to related broadcasts
|
|
||||||
{
|
|
||||||
'url': 'https://www.deutschlandfunk.de/fechten-russland-belarus-ukraine-protest-100.html',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'fechten-russland-belarus-ukraine-protest-100',
|
|
||||||
'title': r're:Wiederzulassung als neutrale Athleten [-/] Was die Rückkehr russischer und belarussischer Sportler beim Fechten bedeutet',
|
|
||||||
'description': 'md5:91340aab29c71aa7518ad5be13d1e8ad'
|
|
||||||
},
|
|
||||||
'playlist_mincount': 5,
|
|
||||||
'playlist': [{
|
|
||||||
'info_dict': {
|
|
||||||
'id': '1fc5d64a',
|
|
||||||
'title': r're:Wiederzulassung als neutrale Athleten [-/] Was die Rückkehr russischer und belarussischer Sportler beim Fechten bedeutet',
|
|
||||||
'ext': 'mp3',
|
|
||||||
'duration': 252,
|
|
||||||
'thumbnail': 'https://assets.deutschlandfunk.de/aad16241-6b76-4a09-958b-96d0ee1d6f57/512x512.jpg?t=1679480020313',
|
|
||||||
'uploader': 'Deutschlandfunk',
|
|
||||||
'series': 'Sport',
|
|
||||||
'channel': 'deutschlandfunk'
|
|
||||||
}
|
|
||||||
}, {
|
|
||||||
'info_dict': {
|
|
||||||
'id': '2ada145f',
|
|
||||||
'title': r're:(?:Sportpolitik / )?Fechtverband votiert für Rückkehr russischer Athleten',
|
|
||||||
'ext': 'mp3',
|
|
||||||
'duration': 336,
|
|
||||||
'thumbnail': 'https://assets.deutschlandfunk.de/FILE_93982766f7317df30409b8a184ac044a/512x512.jpg?t=1678547581005',
|
|
||||||
'uploader': 'Deutschlandfunk',
|
|
||||||
'series': 'Deutschlandfunk Nova',
|
|
||||||
'channel': 'deutschlandfunk-nova'
|
|
||||||
}
|
|
||||||
}, {
|
|
||||||
'info_dict': {
|
|
||||||
'id': '5e55e8c9',
|
|
||||||
'title': r're:Wiederzulassung von Russland und Belarus [-/] "Herumlavieren" des Fechter-Bundes sorgt für Unverständnis',
|
|
||||||
'ext': 'mp3',
|
|
||||||
'duration': 187,
|
|
||||||
'thumbnail': 'https://assets.deutschlandfunk.de/a595989d-1ed1-4a2e-8370-b64d7f11d757/512x512.jpg?t=1679173825412',
|
|
||||||
'uploader': 'Deutschlandfunk',
|
|
||||||
'series': 'Sport am Samstag',
|
|
||||||
'channel': 'deutschlandfunk'
|
|
||||||
}
|
|
||||||
}, {
|
|
||||||
'info_dict': {
|
|
||||||
'id': '47e1a096',
|
|
||||||
'title': r're:Rückkehr Russlands im Fechten [-/] "Fassungslos, dass es einfach so passiert ist"',
|
|
||||||
'ext': 'mp3',
|
|
||||||
'duration': 602,
|
|
||||||
'thumbnail': 'https://assets.deutschlandfunk.de/da4c494a-21cc-48b4-9cc7-40e09fd442c2/512x512.jpg?t=1678562155770',
|
|
||||||
'uploader': 'Deutschlandfunk',
|
|
||||||
'series': 'Sport am Samstag',
|
|
||||||
'channel': 'deutschlandfunk'
|
|
||||||
}
|
|
||||||
}, {
|
|
||||||
'info_dict': {
|
|
||||||
'id': '5e55e8c9',
|
|
||||||
'title': r're:Wiederzulassung von Russland und Belarus [-/] "Herumlavieren" des Fechter-Bundes sorgt für Unverständnis',
|
|
||||||
'ext': 'mp3',
|
|
||||||
'duration': 187,
|
|
||||||
'thumbnail': 'https://assets.deutschlandfunk.de/a595989d-1ed1-4a2e-8370-b64d7f11d757/512x512.jpg?t=1679173825412',
|
|
||||||
'uploader': 'Deutschlandfunk',
|
|
||||||
'series': 'Sport am Samstag',
|
|
||||||
'channel': 'deutschlandfunk'
|
|
||||||
}
|
|
||||||
}]
|
|
||||||
},
|
|
||||||
# Podcast feed with tag buttons, playlist count fluctuates
|
|
||||||
{
|
|
||||||
'url': 'https://www.deutschlandfunk.de/kommentare-und-themen-der-woche-100.html',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'kommentare-und-themen-der-woche-100',
|
|
||||||
'title': 'Meinung - Kommentare und Themen der Woche',
|
|
||||||
'description': 'md5:2901bbd65cd2d45e116d399a099ce5d5',
|
|
||||||
},
|
|
||||||
'playlist_mincount': 10,
|
|
||||||
},
|
|
||||||
# Podcast feed with no description
|
|
||||||
{
|
|
||||||
'url': 'https://www.deutschlandfunk.de/podcast-tolle-idee-100.html',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'podcast-tolle-idee-100',
|
|
||||||
'title': 'Wissenschaftspodcast - Tolle Idee! - Was wurde daraus?',
|
|
||||||
},
|
|
||||||
'playlist_mincount': 11,
|
|
||||||
},
|
|
||||||
]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
playlist_id = self._match_id(url)
|
|
||||||
webpage = self._download_webpage(url, playlist_id)
|
|
||||||
|
|
||||||
return self.playlist_result(
|
|
||||||
map(self._parse_button_attrs, re.findall(self._BUTTON_REGEX, webpage)),
|
|
||||||
playlist_id, self._html_search_meta(['og:title', 'twitter:title'], webpage, default=None),
|
|
||||||
self._html_search_meta(['description', 'og:description', 'twitter:description'], webpage, default=None))
|
|
@ -226,7 +226,6 @@ from .ciscolive import (
|
|||||||
CiscoLiveSearchIE,
|
CiscoLiveSearchIE,
|
||||||
)
|
)
|
||||||
from .cjsw import CJSWIE
|
from .cjsw import CJSWIE
|
||||||
from .clipchamp import ClipchampIE
|
|
||||||
from .cliphunter import CliphunterIE
|
from .cliphunter import CliphunterIE
|
||||||
from .clippit import ClippitIE
|
from .clippit import ClippitIE
|
||||||
from .cliprs import ClipRsIE
|
from .cliprs import ClipRsIE
|
||||||
@ -296,10 +295,6 @@ from .dbtv import DBTVIE
|
|||||||
from .dctp import DctpTvIE
|
from .dctp import DctpTvIE
|
||||||
from .deezer import DeezerPlaylistIE
|
from .deezer import DeezerPlaylistIE
|
||||||
from .democracynow import DemocracynowIE
|
from .democracynow import DemocracynowIE
|
||||||
from .dlf import (
|
|
||||||
DLFCorpusIE,
|
|
||||||
DLFIE,
|
|
||||||
)
|
|
||||||
from .dfb import DFBIE
|
from .dfb import DFBIE
|
||||||
from .dhm import DHMIE
|
from .dhm import DHMIE
|
||||||
from .digg import DiggIE
|
from .digg import DiggIE
|
||||||
@ -449,13 +444,6 @@ from .gfycat import GfycatIE
|
|||||||
from .giantbomb import GiantBombIE
|
from .giantbomb import GiantBombIE
|
||||||
from .giga import GigaIE
|
from .giga import GigaIE
|
||||||
from .glide import GlideIE
|
from .glide import GlideIE
|
||||||
from .globalplayer import (
|
|
||||||
GlobalPlayerLiveIE,
|
|
||||||
GlobalPlayerLivePlaylistIE,
|
|
||||||
GlobalPlayerAudioIE,
|
|
||||||
GlobalPlayerAudioEpisodeIE,
|
|
||||||
GlobalPlayerVideoIE
|
|
||||||
)
|
|
||||||
from .globo import (
|
from .globo import (
|
||||||
GloboIE,
|
GloboIE,
|
||||||
GloboArticleIE,
|
GloboArticleIE,
|
||||||
@ -987,10 +975,6 @@ from .pornhub import (
|
|||||||
from .pornotube import PornotubeIE
|
from .pornotube import PornotubeIE
|
||||||
from .pornovoisines import PornoVoisinesIE
|
from .pornovoisines import PornoVoisinesIE
|
||||||
from .pornoxo import PornoXOIE
|
from .pornoxo import PornoXOIE
|
||||||
from .pr0gramm import (
|
|
||||||
Pr0grammIE,
|
|
||||||
Pr0grammStaticIE,
|
|
||||||
)
|
|
||||||
from .puhutv import (
|
from .puhutv import (
|
||||||
PuhuTVIE,
|
PuhuTVIE,
|
||||||
PuhuTVSerieIE,
|
PuhuTVSerieIE,
|
||||||
@ -1581,7 +1565,6 @@ from .weibo import (
|
|||||||
WeiboMobileIE
|
WeiboMobileIE
|
||||||
)
|
)
|
||||||
from .weiqitv import WeiqiTVIE
|
from .weiqitv import WeiqiTVIE
|
||||||
from .whyp import WhypIE
|
|
||||||
from .wistia import (
|
from .wistia import (
|
||||||
WistiaIE,
|
WistiaIE,
|
||||||
WistiaPlaylistIE,
|
WistiaPlaylistIE,
|
||||||
@ -1695,3 +1678,7 @@ from .zingmp3 import (
|
|||||||
)
|
)
|
||||||
from .zoom import ZoomIE
|
from .zoom import ZoomIE
|
||||||
from .zype import ZypeIE
|
from .zype import ZypeIE
|
||||||
|
from .pr0gramm import (
|
||||||
|
Pr0grammIE,
|
||||||
|
Pr0grammStaticIE,
|
||||||
|
)
|
||||||
|
@ -1,273 +0,0 @@
|
|||||||
# coding: utf-8
|
|
||||||
from __future__ import unicode_literals
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
|
||||||
from ..utils import (
|
|
||||||
clean_html,
|
|
||||||
join_nonempty,
|
|
||||||
merge_dicts,
|
|
||||||
parse_duration,
|
|
||||||
str_or_none,
|
|
||||||
T,
|
|
||||||
traverse_obj,
|
|
||||||
unified_strdate,
|
|
||||||
unified_timestamp,
|
|
||||||
urlhandle_detect_ext,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class GlobalPlayerBaseIE(InfoExtractor):
|
|
||||||
|
|
||||||
def _get_page_props(self, url, video_id):
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
|
||||||
return self._search_nextjs_data(webpage, video_id)['props']['pageProps']
|
|
||||||
|
|
||||||
def _request_ext(self, url, video_id):
|
|
||||||
return urlhandle_detect_ext(self._request_webpage( # Server rejects HEAD requests
|
|
||||||
url, video_id, note='Determining source extension'))
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def _clean_desc(x):
|
|
||||||
x = clean_html(x)
|
|
||||||
if x:
|
|
||||||
x = x.replace('\xa0', ' ')
|
|
||||||
return x
|
|
||||||
|
|
||||||
def _extract_audio(self, episode, series):
|
|
||||||
|
|
||||||
return merge_dicts({
|
|
||||||
'vcodec': 'none',
|
|
||||||
}, traverse_obj(series, {
|
|
||||||
'series': 'title',
|
|
||||||
'series_id': 'id',
|
|
||||||
'thumbnail': 'imageUrl',
|
|
||||||
'uploader': 'itunesAuthor', # podcasts only
|
|
||||||
}), traverse_obj(episode, {
|
|
||||||
'id': 'id',
|
|
||||||
'description': ('description', T(self._clean_desc)),
|
|
||||||
'duration': ('duration', T(parse_duration)),
|
|
||||||
'thumbnail': 'imageUrl',
|
|
||||||
'url': 'streamUrl',
|
|
||||||
'timestamp': (('pubDate', 'startDate'), T(unified_timestamp)),
|
|
||||||
'title': 'title',
|
|
||||||
}, get_all=False), rev=True)
|
|
||||||
|
|
||||||
|
|
||||||
class GlobalPlayerLiveIE(GlobalPlayerBaseIE):
|
|
||||||
_VALID_URL = r'https?://www\.globalplayer\.com/live/(?P<id>\w+)/\w+'
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'https://www.globalplayer.com/live/smoothchill/uk/',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '2mx1E',
|
|
||||||
'ext': 'aac',
|
|
||||||
'display_id': 'smoothchill-uk',
|
|
||||||
'title': 're:^Smooth Chill.+$',
|
|
||||||
'thumbnail': 'https://herald.musicradio.com/media/f296ade8-50c9-4f60-911f-924e96873620.png',
|
|
||||||
'description': 'Music To Chill To',
|
|
||||||
# 'live_status': 'is_live',
|
|
||||||
'is_live': True,
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
# national station
|
|
||||||
'url': 'https://www.globalplayer.com/live/heart/uk/',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '2mwx4',
|
|
||||||
'ext': 'aac',
|
|
||||||
'description': 'turn up the feel good!',
|
|
||||||
'thumbnail': 'https://herald.musicradio.com/media/49b9e8cb-15bf-4bf2-8c28-a4850cc6b0f3.png',
|
|
||||||
# 'live_status': 'is_live',
|
|
||||||
'is_live': True,
|
|
||||||
'title': 're:^Heart UK.+$',
|
|
||||||
'display_id': 'heart-uk',
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
# regional variation
|
|
||||||
'url': 'https://www.globalplayer.com/live/heart/london/',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'AMqg',
|
|
||||||
'ext': 'aac',
|
|
||||||
'thumbnail': 'https://herald.musicradio.com/media/49b9e8cb-15bf-4bf2-8c28-a4850cc6b0f3.png',
|
|
||||||
'title': 're:^Heart London.+$',
|
|
||||||
# 'live_status': 'is_live',
|
|
||||||
'is_live': True,
|
|
||||||
'display_id': 'heart-london',
|
|
||||||
'description': 'turn up the feel good!',
|
|
||||||
},
|
|
||||||
}]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
video_id = self._match_id(url)
|
|
||||||
station = self._get_page_props(url, video_id)['station']
|
|
||||||
stream_url = station['streamUrl']
|
|
||||||
|
|
||||||
return merge_dicts({
|
|
||||||
'id': station['id'],
|
|
||||||
'display_id': (
|
|
||||||
join_nonempty('brandSlug', 'slug', from_dict=station)
|
|
||||||
or station.get('legacyStationPrefix')),
|
|
||||||
'url': stream_url,
|
|
||||||
'ext': self._request_ext(stream_url, video_id),
|
|
||||||
'vcodec': 'none',
|
|
||||||
'is_live': True,
|
|
||||||
}, {
|
|
||||||
'title': self._live_title(traverse_obj(
|
|
||||||
station, (('name', 'brandName'), T(str_or_none)),
|
|
||||||
get_all=False)),
|
|
||||||
}, traverse_obj(station, {
|
|
||||||
'description': 'tagline',
|
|
||||||
'thumbnail': 'brandLogo',
|
|
||||||
}), rev=True)
|
|
||||||
|
|
||||||
|
|
||||||
class GlobalPlayerLivePlaylistIE(GlobalPlayerBaseIE):
|
|
||||||
_VALID_URL = r'https?://www\.globalplayer\.com/playlists/(?P<id>\w+)'
|
|
||||||
_TESTS = [{
|
|
||||||
# "live playlist"
|
|
||||||
'url': 'https://www.globalplayer.com/playlists/8bLk/',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '8bLk',
|
|
||||||
'ext': 'aac',
|
|
||||||
# 'live_status': 'is_live',
|
|
||||||
'is_live': True,
|
|
||||||
'description': r're:(?s).+\bclassical\b.+\bClassic FM Hall [oO]f Fame\b',
|
|
||||||
'thumbnail': 'https://images.globalplayer.com/images/551379?width=450&signature=oMLPZIoi5_dBSHnTMREW0Xg76mA=',
|
|
||||||
'title': 're:Classic FM Hall of Fame.+$'
|
|
||||||
},
|
|
||||||
}]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
video_id = self._match_id(url)
|
|
||||||
station = self._get_page_props(url, video_id)['playlistData']
|
|
||||||
stream_url = station['streamUrl']
|
|
||||||
|
|
||||||
return merge_dicts({
|
|
||||||
'id': video_id,
|
|
||||||
'url': stream_url,
|
|
||||||
'ext': self._request_ext(stream_url, video_id),
|
|
||||||
'vcodec': 'none',
|
|
||||||
'is_live': True,
|
|
||||||
}, traverse_obj(station, {
|
|
||||||
'title': 'title',
|
|
||||||
'description': ('description', T(self._clean_desc)),
|
|
||||||
'thumbnail': 'image',
|
|
||||||
}), rev=True)
|
|
||||||
|
|
||||||
|
|
||||||
class GlobalPlayerAudioIE(GlobalPlayerBaseIE):
|
|
||||||
_VALID_URL = r'https?://www\.globalplayer\.com/(?:(?P<podcast>podcasts)/|catchup/\w+/\w+/)(?P<id>\w+)/?(?:$|[?#])'
|
|
||||||
_TESTS = [{
|
|
||||||
# podcast
|
|
||||||
'url': 'https://www.globalplayer.com/podcasts/42KuaM/',
|
|
||||||
'playlist_mincount': 5,
|
|
||||||
'info_dict': {
|
|
||||||
'id': '42KuaM',
|
|
||||||
'title': 'Filthy Ritual',
|
|
||||||
'thumbnail': 'md5:60286e7d12d795bd1bbc9efc6cee643e',
|
|
||||||
'categories': ['Society & Culture', 'True Crime'],
|
|
||||||
'uploader': 'Global',
|
|
||||||
'description': r're:(?s).+\bscam\b.+?\bseries available now\b',
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
# radio catchup
|
|
||||||
'url': 'https://www.globalplayer.com/catchup/lbc/uk/46vyD7z/',
|
|
||||||
'playlist_mincount': 2,
|
|
||||||
'info_dict': {
|
|
||||||
'id': '46vyD7z',
|
|
||||||
'description': 'Nick Ferrari At Breakfast is Leading Britain\'s Conversation.',
|
|
||||||
'title': 'Nick Ferrari',
|
|
||||||
'thumbnail': 'md5:4df24d8a226f5b2508efbcc6ae874ebf',
|
|
||||||
},
|
|
||||||
}]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
video_id, podcast = self._match_valid_url(url).group('id', 'podcast')
|
|
||||||
props = self._get_page_props(url, video_id)
|
|
||||||
series = props['podcastInfo'] if podcast else props['catchupInfo']
|
|
||||||
|
|
||||||
return merge_dicts({
|
|
||||||
'_type': 'playlist',
|
|
||||||
'id': video_id,
|
|
||||||
'entries': [self._extract_audio(ep, series) for ep in traverse_obj(
|
|
||||||
series, ('episodes', lambda _, v: v['id'] and v['streamUrl']))],
|
|
||||||
'categories': traverse_obj(series, ('categories', Ellipsis, 'name')) or None,
|
|
||||||
}, traverse_obj(series, {
|
|
||||||
'description': ('description', T(self._clean_desc)),
|
|
||||||
'thumbnail': 'imageUrl',
|
|
||||||
'title': 'title',
|
|
||||||
'uploader': 'itunesAuthor', # podcasts only
|
|
||||||
}), rev=True)
|
|
||||||
|
|
||||||
|
|
||||||
class GlobalPlayerAudioEpisodeIE(GlobalPlayerBaseIE):
|
|
||||||
_VALID_URL = r'https?://www\.globalplayer\.com/(?:(?P<podcast>podcasts)|catchup/\w+/\w+)/episodes/(?P<id>\w+)/?(?:$|[?#])'
|
|
||||||
_TESTS = [{
|
|
||||||
# podcast
|
|
||||||
'url': 'https://www.globalplayer.com/podcasts/episodes/7DrfNnE/',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '7DrfNnE',
|
|
||||||
'ext': 'mp3',
|
|
||||||
'title': 'Filthy Ritual - Trailer',
|
|
||||||
'description': 'md5:1f1562fd0f01b4773b590984f94223e0',
|
|
||||||
'thumbnail': 'md5:60286e7d12d795bd1bbc9efc6cee643e',
|
|
||||||
'duration': 225.0,
|
|
||||||
'timestamp': 1681254900,
|
|
||||||
'series': 'Filthy Ritual',
|
|
||||||
'series_id': '42KuaM',
|
|
||||||
'upload_date': '20230411',
|
|
||||||
'uploader': 'Global',
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
# radio catchup
|
|
||||||
'url': 'https://www.globalplayer.com/catchup/lbc/uk/episodes/2zGq26Vcv1fCWhddC4JAwETXWe/',
|
|
||||||
'only_matching': True,
|
|
||||||
# expired: refresh the details with a current show for a full test
|
|
||||||
'info_dict': {
|
|
||||||
'id': '2zGq26Vcv1fCWhddC4JAwETXWe',
|
|
||||||
'ext': 'm4a',
|
|
||||||
'timestamp': 1682056800,
|
|
||||||
'series': 'Nick Ferrari',
|
|
||||||
'thumbnail': 'md5:4df24d8a226f5b2508efbcc6ae874ebf',
|
|
||||||
'upload_date': '20230421',
|
|
||||||
'series_id': '46vyD7z',
|
|
||||||
'description': 'Nick Ferrari At Breakfast is Leading Britain\'s Conversation.',
|
|
||||||
'title': 'Nick Ferrari',
|
|
||||||
'duration': 10800.0,
|
|
||||||
},
|
|
||||||
}]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
video_id, podcast = self._match_valid_url(url).group('id', 'podcast')
|
|
||||||
props = self._get_page_props(url, video_id)
|
|
||||||
episode = props['podcastEpisode'] if podcast else props['catchupEpisode']
|
|
||||||
|
|
||||||
return self._extract_audio(
|
|
||||||
episode, traverse_obj(episode, 'podcast', 'show', expected_type=dict) or {})
|
|
||||||
|
|
||||||
|
|
||||||
class GlobalPlayerVideoIE(GlobalPlayerBaseIE):
|
|
||||||
_VALID_URL = r'https?://www\.globalplayer\.com/videos/(?P<id>\w+)'
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'https://www.globalplayer.com/videos/2JsSZ7Gm2uP/',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '2JsSZ7Gm2uP',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'description': 'md5:6a9f063c67c42f218e42eee7d0298bfd',
|
|
||||||
'thumbnail': 'md5:d4498af48e15aae4839ce77b97d39550',
|
|
||||||
'upload_date': '20230420',
|
|
||||||
'title': 'Treble Malakai Bayoh sings a sublime Handel aria at Classic FM Live',
|
|
||||||
},
|
|
||||||
}]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
video_id = self._match_id(url)
|
|
||||||
meta = self._get_page_props(url, video_id)['videoData']
|
|
||||||
|
|
||||||
return merge_dicts({
|
|
||||||
'id': video_id,
|
|
||||||
}, traverse_obj(meta, {
|
|
||||||
'url': 'url',
|
|
||||||
'thumbnail': ('image', 'url'),
|
|
||||||
'title': 'title',
|
|
||||||
'upload_date': ('publish_date', T(unified_strdate)),
|
|
||||||
'description': 'description',
|
|
||||||
}), rev=True)
|
|
@ -1,55 +0,0 @@
|
|||||||
# coding: utf-8
|
|
||||||
from __future__ import unicode_literals
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
|
||||||
from ..utils import (
|
|
||||||
float_or_none,
|
|
||||||
merge_dicts,
|
|
||||||
str_or_none,
|
|
||||||
T,
|
|
||||||
traverse_obj,
|
|
||||||
url_or_none,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class WhypIE(InfoExtractor):
|
|
||||||
_VALID_URL = r'https?://(?:www\.)?whyp\.it/tracks/(?P<id>\d+)'
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'https://www.whyp.it/tracks/18337/home-page-example-track-b4kq7',
|
|
||||||
'md5': 'c1187b42ebf8605284e3dc92aeb33d16',
|
|
||||||
'info_dict': {
|
|
||||||
'url': 'https://cdn.whyp.it/50eb17cc-e9ff-4e18-b89b-dc9206a95cb1.mp3',
|
|
||||||
'id': '18337',
|
|
||||||
'title': 'Home Page Example Track',
|
|
||||||
'description': r're:(?s).+\bexample track\b',
|
|
||||||
'ext': 'mp3',
|
|
||||||
'duration': 52.82,
|
|
||||||
'uploader': 'Brad',
|
|
||||||
'uploader_id': '1',
|
|
||||||
'thumbnail': 'https://cdn.whyp.it/a537bb36-3373-4c61-96c8-27fc1b2f427a.jpg',
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
'url': 'https://www.whyp.it/tracks/18337',
|
|
||||||
'only_matching': True,
|
|
||||||
}]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
unique_id = self._match_id(url)
|
|
||||||
webpage = self._download_webpage(url, unique_id)
|
|
||||||
data = self._search_nuxt_data(webpage, unique_id)['rawTrack']
|
|
||||||
|
|
||||||
return merge_dicts({
|
|
||||||
'url': data['audio_url'],
|
|
||||||
'id': unique_id,
|
|
||||||
}, traverse_obj(data, {
|
|
||||||
'title': 'title',
|
|
||||||
'description': 'description',
|
|
||||||
'duration': ('duration', T(float_or_none)),
|
|
||||||
'uploader': ('user', 'username'),
|
|
||||||
'uploader_id': ('user', 'id', T(str_or_none)),
|
|
||||||
'thumbnail': ('artwork_url', T(url_or_none)),
|
|
||||||
}), {
|
|
||||||
'ext': 'mp3',
|
|
||||||
'vcodec': 'none',
|
|
||||||
'http_headers': {'Referer': 'https://whyp.it/'},
|
|
||||||
}, rev=True)
|
|
@ -2996,8 +2996,7 @@ class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
|
|||||||
# Technically the Cookie header should be in unredirected_hdrs;
|
# Technically the Cookie header should be in unredirected_hdrs;
|
||||||
# however in practice some may set it in normal headers anyway.
|
# however in practice some may set it in normal headers anyway.
|
||||||
# We will remove it here to prevent any leaks.
|
# We will remove it here to prevent any leaks.
|
||||||
# Also remove unwanted and undocumented Host header for old URL
|
remove_headers = ['Cookie']
|
||||||
remove_headers = ['Cookie', 'Host']
|
|
||||||
|
|
||||||
# A 303 must either use GET or HEAD for subsequent request
|
# A 303 must either use GET or HEAD for subsequent request
|
||||||
# https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.4
|
# https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.4
|
||||||
@ -4269,9 +4268,13 @@ def variadic(x, allowed_types=NO_DEFAULT):
|
|||||||
|
|
||||||
|
|
||||||
def dict_get(d, key_or_keys, default=None, skip_false_values=True):
|
def dict_get(d, key_or_keys, default=None, skip_false_values=True):
|
||||||
exp = (lambda x: x or None) if skip_false_values else IDENTITY
|
if isinstance(key_or_keys, (list, tuple)):
|
||||||
return traverse_obj(d, *variadic(key_or_keys), expected_type=exp,
|
for key in key_or_keys:
|
||||||
default=default, get_all=False)
|
if key not in d or d[key] is None or skip_false_values and not d[key]:
|
||||||
|
continue
|
||||||
|
return d[key]
|
||||||
|
return default
|
||||||
|
return d.get(key_or_keys, default)
|
||||||
|
|
||||||
|
|
||||||
def try_call(*funcs, **kwargs):
|
def try_call(*funcs, **kwargs):
|
||||||
@ -4304,38 +4307,16 @@ def try_get(src, getter, expected_type=None):
|
|||||||
return v
|
return v
|
||||||
|
|
||||||
|
|
||||||
def merge_dicts(*dicts, **kwargs):
|
def merge_dicts(*dicts):
|
||||||
"""
|
|
||||||
Merge the `dict`s in `dicts` using the first valid value for each key.
|
|
||||||
Normally valid: not None and not an empty string
|
|
||||||
|
|
||||||
Keyword-only args:
|
|
||||||
unblank: allow empty string if False (default True)
|
|
||||||
rev: merge dicts in reverse order (default False)
|
|
||||||
|
|
||||||
merge_dicts(dct1, dct2, ..., unblank=False, rev=True)
|
|
||||||
matches {**dct1, **dct2, ...}
|
|
||||||
|
|
||||||
However, merge_dicts(dct1, dct2, ..., rev=True) may often be better.
|
|
||||||
"""
|
|
||||||
|
|
||||||
unblank = kwargs.get('unblank', True)
|
|
||||||
rev = kwargs.get('rev', False)
|
|
||||||
|
|
||||||
if unblank:
|
|
||||||
def can_merge_str(k, v, to_dict):
|
|
||||||
return (isinstance(v, compat_str) and v
|
|
||||||
and isinstance(to_dict[k], compat_str)
|
|
||||||
and not to_dict[k])
|
|
||||||
else:
|
|
||||||
can_merge_str = lambda k, v, to_dict: False
|
|
||||||
|
|
||||||
merged = {}
|
merged = {}
|
||||||
for a_dict in reversed(dicts) if rev else dicts:
|
for a_dict in dicts:
|
||||||
for k, v in a_dict.items():
|
for k, v in a_dict.items():
|
||||||
if v is None:
|
if v is None:
|
||||||
continue
|
continue
|
||||||
if (k not in merged) or can_merge_str(k, v, merged):
|
if (k not in merged
|
||||||
|
or (isinstance(v, compat_str) and v
|
||||||
|
and isinstance(merged[k], compat_str)
|
||||||
|
and not merged[k])):
|
||||||
merged[k] = v
|
merged[k] = v
|
||||||
return merged
|
return merged
|
||||||
|
|
||||||
@ -4389,108 +4370,46 @@ def strip_jsonp(code):
|
|||||||
r'\g<callback_data>', code)
|
r'\g<callback_data>', code)
|
||||||
|
|
||||||
|
|
||||||
def js_to_json(code, *args, **kwargs):
|
def js_to_json(code):
|
||||||
|
COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*'
|
||||||
# vars is a dict of (var, val) pairs to substitute
|
|
||||||
vars = args[0] if len(args) > 0 else kwargs.get('vars', {})
|
|
||||||
strict = kwargs.get('strict', False)
|
|
||||||
|
|
||||||
STRING_QUOTES = '\'"`'
|
|
||||||
STRING_RE = '|'.join(r'{0}(?:\\.|[^\\{0}])*{0}'.format(q) for q in STRING_QUOTES)
|
|
||||||
COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*\n'
|
|
||||||
SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE)
|
SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE)
|
||||||
INTEGER_TABLE = (
|
INTEGER_TABLE = (
|
||||||
(r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16),
|
(r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16),
|
||||||
(r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
|
(r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
|
||||||
(r'(?s)^(\d+){skip}:?$'.format(skip=SKIP_RE), 10),
|
|
||||||
)
|
)
|
||||||
# compat candidate
|
|
||||||
JSONDecodeError = json.JSONDecodeError if 'JSONDecodeError' in dir(json) else ValueError
|
|
||||||
|
|
||||||
def process_escape(match):
|
|
||||||
JSON_PASSTHROUGH_ESCAPES = r'"\bfnrtu'
|
|
||||||
escape = match.group(1) or match.group(2)
|
|
||||||
|
|
||||||
return ('\\' + escape if escape in JSON_PASSTHROUGH_ESCAPES
|
|
||||||
else '\\u00' if escape == 'x'
|
|
||||||
else '' if escape == '\n'
|
|
||||||
else escape)
|
|
||||||
|
|
||||||
def template_substitute(match):
|
|
||||||
evaluated = js_to_json(match.group(1), vars, strict=strict)
|
|
||||||
if evaluated[0] == '"':
|
|
||||||
return json.loads(evaluated)
|
|
||||||
return evaluated
|
|
||||||
|
|
||||||
def fix_kv(m):
|
def fix_kv(m):
|
||||||
v = m.group(0)
|
v = m.group(0)
|
||||||
if v in ('true', 'false', 'null'):
|
if v in ('true', 'false', 'null'):
|
||||||
return v
|
return v
|
||||||
elif v in ('undefined', 'void 0'):
|
elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',':
|
||||||
return 'null'
|
return ""
|
||||||
elif v.startswith('/*') or v.startswith('//') or v == ',':
|
|
||||||
return ''
|
|
||||||
|
|
||||||
if v[0] in STRING_QUOTES:
|
if v[0] in ("'", '"'):
|
||||||
v = re.sub(r'(?s)\${([^}]+)}', template_substitute, v[1:-1]) if v[0] == '`' else v[1:-1]
|
v = re.sub(r'(?s)\\.|"', lambda m: {
|
||||||
escaped = re.sub(r'(?s)(")|\\(.)', process_escape, v)
|
'"': '\\"',
|
||||||
return '"{0}"'.format(escaped)
|
"\\'": "'",
|
||||||
|
'\\\n': '',
|
||||||
|
'\\x': '\\u00',
|
||||||
|
}.get(m.group(0), m.group(0)), v[1:-1])
|
||||||
|
else:
|
||||||
|
for regex, base in INTEGER_TABLE:
|
||||||
|
im = re.match(regex, v)
|
||||||
|
if im:
|
||||||
|
i = int(im.group(1), base)
|
||||||
|
return '"%d":' % i if v.endswith(':') else '%d' % i
|
||||||
|
|
||||||
inv = IDENTITY
|
return '"%s"' % v
|
||||||
im = re.split(r'^!+', v)
|
|
||||||
if len(im) > 1 and not im[-1].endswith(':'):
|
|
||||||
if (len(v) - len(im[1])) % 2 == 1:
|
|
||||||
inv = lambda x: 'true' if x == 0 else 'false'
|
|
||||||
else:
|
|
||||||
inv = lambda x: 'false' if x == 0 else 'true'
|
|
||||||
if not any(x for x in im):
|
|
||||||
return
|
|
||||||
v = im[-1]
|
|
||||||
|
|
||||||
for regex, base in INTEGER_TABLE:
|
|
||||||
im = re.match(regex, v)
|
|
||||||
if im:
|
|
||||||
i = int(im.group(1), base)
|
|
||||||
return ('"%s":' if v.endswith(':') else '%s') % inv(i)
|
|
||||||
|
|
||||||
if v in vars:
|
|
||||||
try:
|
|
||||||
if not strict:
|
|
||||||
json.loads(vars[v])
|
|
||||||
except JSONDecodeError:
|
|
||||||
return inv(json.dumps(vars[v]))
|
|
||||||
else:
|
|
||||||
return inv(vars[v])
|
|
||||||
|
|
||||||
if not strict:
|
|
||||||
v = try_call(inv, args=(v,), default=v)
|
|
||||||
if v in ('true', 'false'):
|
|
||||||
return v
|
|
||||||
return '"{0}"'.format(v)
|
|
||||||
|
|
||||||
raise ValueError('Unknown value: ' + v)
|
|
||||||
|
|
||||||
def create_map(mobj):
|
|
||||||
return json.dumps(dict(json.loads(js_to_json(mobj.group(1) or '[]', vars=vars))))
|
|
||||||
|
|
||||||
code = re.sub(r'new Map\((\[.*?\])?\)', create_map, code)
|
|
||||||
if not strict:
|
|
||||||
code = re.sub(r'new Date\((".+")\)', r'\g<1>', code)
|
|
||||||
code = re.sub(r'new \w+\((.*?)\)', lambda m: json.dumps(m.group(0)), code)
|
|
||||||
code = re.sub(r'parseInt\([^\d]+(\d+)[^\d]+\)', r'\1', code)
|
|
||||||
code = re.sub(r'\(function\([^)]*\)\s*\{[^}]*\}\s*\)\s*\(\s*(["\'][^)]*["\'])\s*\)', r'\1', code)
|
|
||||||
|
|
||||||
return re.sub(r'''(?sx)
|
return re.sub(r'''(?sx)
|
||||||
{str_}|
|
"(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
|
||||||
{comment}|
|
'(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
|
||||||
,(?={skip}[\]}}])|
|
{comment}|,(?={skip}[\]}}])|
|
||||||
void\s0|
|
(?:(?<![0-9])[eE]|[a-df-zA-DF-Z_])[.a-zA-Z_0-9]*|
|
||||||
!*(?:(?<!\d)[eE]|[a-df-zA-DF-Z_$])[.a-zA-Z_$0-9]*|
|
\b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
|
||||||
(?:\b|!+)0(?:[xX][\da-fA-F]+|[0-7]+)(?:{skip}:)?|
|
[0-9]+(?={skip}:)|
|
||||||
!+\d+(?:\.\d*)?(?:{skip}:)?|
|
|
||||||
[0-9]+(?:{skip}:)|
|
|
||||||
!+
|
!+
|
||||||
'''.format(comment=COMMENT_RE, skip=SKIP_RE, str_=STRING_RE), fix_kv, code)
|
'''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
|
||||||
|
|
||||||
|
|
||||||
def qualities(quality_ids):
|
def qualities(quality_ids):
|
||||||
@ -6110,37 +6029,6 @@ def clean_podcast_url(url):
|
|||||||
)/''', '', url)
|
)/''', '', url)
|
||||||
|
|
||||||
|
|
||||||
if __debug__:
|
|
||||||
# Raise TypeError if args can't be bound
|
|
||||||
# needs compat owing to unstable inspect API, thanks PSF :-(
|
|
||||||
try:
|
|
||||||
inspect.signature
|
|
||||||
|
|
||||||
def _try_bind_args(fn, *args, **kwargs):
|
|
||||||
inspect.signature(fn).bind(*args, **kwargs)
|
|
||||||
except AttributeError:
|
|
||||||
# Py < 3.3
|
|
||||||
def _try_bind_args(fn, *args, **kwargs):
|
|
||||||
fn_args = inspect.getargspec(fn)
|
|
||||||
# Py2: ArgInfo(args, varargs, keywords, defaults)
|
|
||||||
# Py3: ArgSpec(args, varargs, keywords, defaults)
|
|
||||||
if not fn_args.keywords:
|
|
||||||
for k in kwargs:
|
|
||||||
if k not in (fn_args.args or []):
|
|
||||||
raise TypeError("got an unexpected keyword argument: '{0}'".format(k))
|
|
||||||
if not fn_args.varargs:
|
|
||||||
args_to_bind = len(args)
|
|
||||||
bindable = len(fn_args.args or [])
|
|
||||||
if args_to_bind > bindable:
|
|
||||||
raise TypeError('too many positional arguments')
|
|
||||||
bindable -= len(fn_args.defaults or [])
|
|
||||||
if args_to_bind < bindable:
|
|
||||||
if kwargs:
|
|
||||||
bindable -= len(set(fn_args.args or []) & set(kwargs))
|
|
||||||
if bindable > args_to_bind:
|
|
||||||
raise TypeError("missing a required argument: '{0}'".format(fn_args.args[args_to_bind]))
|
|
||||||
|
|
||||||
|
|
||||||
def traverse_obj(obj, *paths, **kwargs):
|
def traverse_obj(obj, *paths, **kwargs):
|
||||||
"""
|
"""
|
||||||
Safely traverse nested `dict`s and `Iterable`s
|
Safely traverse nested `dict`s and `Iterable`s
|
||||||
@ -6359,7 +6247,10 @@ def traverse_obj(obj, *paths, **kwargs):
|
|||||||
|
|
||||||
if __debug__ and callable(key):
|
if __debug__ and callable(key):
|
||||||
# Verify function signature
|
# Verify function signature
|
||||||
_try_bind_args(key, None, None)
|
args = inspect.getargspec(key)
|
||||||
|
if len(args.args) != 2:
|
||||||
|
# crash differently in 2.6 !
|
||||||
|
inspect.getcallargs(key, None, None)
|
||||||
|
|
||||||
new_objs = []
|
new_objs = []
|
||||||
for obj in objs:
|
for obj in objs:
|
||||||
|
Loading…
Reference in New Issue
Block a user