mirror of
https://github.com/ytdl-org/youtube-dl.git
synced 2025-01-01 21:42:07 +00:00
Compare commits
17 Commits
70a4a8b752
...
6f8c2635a5
Author | SHA1 | Date | |
---|---|---|---|
|
6f8c2635a5 | ||
|
de48105dd8 | ||
|
822f19f05d | ||
|
33db85c571 | ||
|
f33923cba7 | ||
|
e8198c517b | ||
|
bafb6dec72 | ||
|
4e04f10499 | ||
|
90c9f789d9 | ||
|
249f2b6316 | ||
|
d6b14ba316 | ||
|
30e986b834 | ||
|
58988c1421 | ||
|
e19ec52322 | ||
|
f2f90887ca | ||
|
cd987e6fca | ||
|
d947ffe8e3 |
@ -13,6 +13,11 @@ sys.path.insert(0, dirn(dirn((os.path.abspath(__file__)))))
|
|||||||
lazy_extractors_filename = sys.argv[1]
|
lazy_extractors_filename = sys.argv[1]
|
||||||
if os.path.exists(lazy_extractors_filename):
|
if os.path.exists(lazy_extractors_filename):
|
||||||
os.remove(lazy_extractors_filename)
|
os.remove(lazy_extractors_filename)
|
||||||
|
# Py2: may be confused by leftover lazy_extractors.pyc
|
||||||
|
try:
|
||||||
|
os.remove(lazy_extractors_filename + 'c')
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
|
||||||
from youtube_dl.extractor import _ALL_CLASSES
|
from youtube_dl.extractor import _ALL_CLASSES
|
||||||
from youtube_dl.extractor.common import InfoExtractor, SearchInfoExtractor
|
from youtube_dl.extractor.common import InfoExtractor, SearchInfoExtractor
|
||||||
@ -22,7 +27,10 @@ with open('devscripts/lazy_load_template.py', 'rt') as f:
|
|||||||
|
|
||||||
module_contents = [
|
module_contents = [
|
||||||
module_template + '\n' + getsource(InfoExtractor.suitable) + '\n',
|
module_template + '\n' + getsource(InfoExtractor.suitable) + '\n',
|
||||||
'class LazyLoadSearchExtractor(LazyLoadExtractor):\n pass\n']
|
'class LazyLoadSearchExtractor(LazyLoadExtractor):\n pass\n',
|
||||||
|
# needed for suitable() methods of Youtube extractor (see #28780)
|
||||||
|
'from youtube_dl.utils import parse_qs\n',
|
||||||
|
]
|
||||||
|
|
||||||
ie_template = '''
|
ie_template = '''
|
||||||
class {name}({bases}):
|
class {name}({bases}):
|
||||||
|
@ -89,6 +89,17 @@ class FakeYDL(YoutubeDL):
|
|||||||
self.report_warning = types.MethodType(report_warning, self)
|
self.report_warning = types.MethodType(report_warning, self)
|
||||||
|
|
||||||
|
|
||||||
|
class FakeLogger(object):
|
||||||
|
def debug(self, msg):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def warning(self, msg):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def error(self, msg):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
def gettestcases(include_onlymatching=False):
|
def gettestcases(include_onlymatching=False):
|
||||||
for ie in youtube_dl.extractor.gen_extractors():
|
for ie in youtube_dl.extractor.gen_extractors():
|
||||||
for tc in ie.get_testcases(include_onlymatching):
|
for tc in ie.get_testcases(include_onlymatching):
|
||||||
|
@ -11,6 +11,7 @@ from test.helper import try_rm
|
|||||||
|
|
||||||
|
|
||||||
from youtube_dl import YoutubeDL
|
from youtube_dl import YoutubeDL
|
||||||
|
from youtube_dl.utils import DownloadError
|
||||||
|
|
||||||
|
|
||||||
def _download_restricted(url, filename, age):
|
def _download_restricted(url, filename, age):
|
||||||
@ -26,7 +27,10 @@ def _download_restricted(url, filename, age):
|
|||||||
ydl.add_default_info_extractors()
|
ydl.add_default_info_extractors()
|
||||||
json_filename = os.path.splitext(filename)[0] + '.info.json'
|
json_filename = os.path.splitext(filename)[0] + '.info.json'
|
||||||
try_rm(json_filename)
|
try_rm(json_filename)
|
||||||
ydl.download([url])
|
try:
|
||||||
|
ydl.download([url])
|
||||||
|
except DownloadError:
|
||||||
|
try_rm(json_filename)
|
||||||
res = os.path.exists(json_filename)
|
res = os.path.exists(json_filename)
|
||||||
try_rm(json_filename)
|
try_rm(json_filename)
|
||||||
return res
|
return res
|
||||||
@ -38,12 +42,12 @@ class TestAgeRestriction(unittest.TestCase):
|
|||||||
self.assertFalse(_download_restricted(url, filename, age))
|
self.assertFalse(_download_restricted(url, filename, age))
|
||||||
|
|
||||||
def test_youtube(self):
|
def test_youtube(self):
|
||||||
self._assert_restricted('07FYdnEawAQ', '07FYdnEawAQ.mp4', 10)
|
self._assert_restricted('HtVdAasjOgU', 'HtVdAasjOgU.mp4', 10)
|
||||||
|
|
||||||
def test_youporn(self):
|
def test_youporn(self):
|
||||||
self._assert_restricted(
|
self._assert_restricted(
|
||||||
'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/',
|
'https://www.youporn.com/watch/16715086/sex-ed-in-detention-18-asmr/',
|
||||||
'505835.mp4', 2, old_age=25)
|
'16715086.mp4', 2, old_age=25)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
@ -48,10 +48,11 @@ class TestCompat(unittest.TestCase):
|
|||||||
|
|
||||||
def test_all_present(self):
|
def test_all_present(self):
|
||||||
import youtube_dl.compat
|
import youtube_dl.compat
|
||||||
all_names = youtube_dl.compat.__all__
|
all_names = sorted(
|
||||||
present_names = set(filter(
|
youtube_dl.compat.__all__ + youtube_dl.compat.legacy)
|
||||||
|
present_names = set(map(compat_str, filter(
|
||||||
lambda c: '_' in c and not c.startswith('_'),
|
lambda c: '_' in c and not c.startswith('_'),
|
||||||
dir(youtube_dl.compat))) - set(['unicode_literals'])
|
dir(youtube_dl.compat)))) - set(['unicode_literals'])
|
||||||
self.assertEqual(all_names, sorted(present_names))
|
self.assertEqual(all_names, sorted(present_names))
|
||||||
|
|
||||||
def test_compat_urllib_parse_unquote(self):
|
def test_compat_urllib_parse_unquote(self):
|
||||||
|
115
test/test_downloader_external.py
Normal file
115
test/test_downloader_external.py
Normal file
@ -0,0 +1,115 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
# Allow direct execution
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
import subprocess
|
||||||
|
import unittest
|
||||||
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
|
from test.helper import (
|
||||||
|
FakeLogger,
|
||||||
|
http_server_port,
|
||||||
|
try_rm,
|
||||||
|
)
|
||||||
|
from youtube_dl import YoutubeDL
|
||||||
|
from youtube_dl.compat import compat_http_server
|
||||||
|
from youtube_dl.utils import encodeFilename
|
||||||
|
from youtube_dl.downloader.external import Aria2pFD
|
||||||
|
import threading
|
||||||
|
|
||||||
|
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||||
|
|
||||||
|
|
||||||
|
TEST_SIZE = 10 * 1024
|
||||||
|
|
||||||
|
|
||||||
|
class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
|
||||||
|
def log_message(self, format, *args):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def send_content_range(self, total=None):
|
||||||
|
range_header = self.headers.get('Range')
|
||||||
|
start = end = None
|
||||||
|
if range_header:
|
||||||
|
mobj = re.match(r'bytes=(\d+)-(\d+)', range_header)
|
||||||
|
if mobj:
|
||||||
|
start, end = (int(mobj.group(i)) for i in (1, 2))
|
||||||
|
valid_range = start is not None and end is not None
|
||||||
|
if valid_range:
|
||||||
|
content_range = 'bytes %d-%d' % (start, end)
|
||||||
|
if total:
|
||||||
|
content_range += '/%d' % total
|
||||||
|
self.send_header('Content-Range', content_range)
|
||||||
|
return (end - start + 1) if valid_range else total
|
||||||
|
|
||||||
|
def serve(self, range=True, content_length=True):
|
||||||
|
self.send_response(200)
|
||||||
|
self.send_header('Content-Type', 'video/mp4')
|
||||||
|
size = TEST_SIZE
|
||||||
|
if range:
|
||||||
|
size = self.send_content_range(TEST_SIZE)
|
||||||
|
if content_length:
|
||||||
|
self.send_header('Content-Length', size)
|
||||||
|
self.end_headers()
|
||||||
|
self.wfile.write(b'#' * size)
|
||||||
|
|
||||||
|
def do_GET(self):
|
||||||
|
if self.path == '/regular':
|
||||||
|
self.serve()
|
||||||
|
elif self.path == '/no-content-length':
|
||||||
|
self.serve(content_length=False)
|
||||||
|
elif self.path == '/no-range':
|
||||||
|
self.serve(range=False)
|
||||||
|
elif self.path == '/no-range-no-content-length':
|
||||||
|
self.serve(range=False, content_length=False)
|
||||||
|
else:
|
||||||
|
assert False, 'unrecognised server path'
|
||||||
|
|
||||||
|
|
||||||
|
@unittest.skipUnless(Aria2pFD.available(), 'aria2p module not found')
|
||||||
|
class TestAria2pFD(unittest.TestCase):
|
||||||
|
def setUp(self):
|
||||||
|
self.httpd = compat_http_server.HTTPServer(
|
||||||
|
('127.0.0.1', 0), HTTPTestRequestHandler)
|
||||||
|
self.port = http_server_port(self.httpd)
|
||||||
|
self.server_thread = threading.Thread(target=self.httpd.serve_forever)
|
||||||
|
self.server_thread.daemon = True
|
||||||
|
self.server_thread.start()
|
||||||
|
|
||||||
|
def download(self, params, ep):
|
||||||
|
with subprocess.Popen(
|
||||||
|
['aria2c', '--enable-rpc'],
|
||||||
|
stdout=subprocess.DEVNULL,
|
||||||
|
stderr=subprocess.DEVNULL
|
||||||
|
) as process:
|
||||||
|
if not process.poll():
|
||||||
|
filename = 'testfile.mp4'
|
||||||
|
params['logger'] = FakeLogger()
|
||||||
|
params['outtmpl'] = filename
|
||||||
|
ydl = YoutubeDL(params)
|
||||||
|
try_rm(encodeFilename(filename))
|
||||||
|
self.assertEqual(ydl.download(['http://127.0.0.1:%d/%s' % (self.port, ep)]), 0)
|
||||||
|
self.assertEqual(os.path.getsize(encodeFilename(filename)), TEST_SIZE)
|
||||||
|
try_rm(encodeFilename(filename))
|
||||||
|
process.kill()
|
||||||
|
|
||||||
|
def download_all(self, params):
|
||||||
|
for ep in ('regular', 'no-content-length', 'no-range', 'no-range-no-content-length'):
|
||||||
|
self.download(params, ep)
|
||||||
|
|
||||||
|
def test_regular(self):
|
||||||
|
self.download_all({'external_downloader': 'aria2p'})
|
||||||
|
|
||||||
|
def test_chunked(self):
|
||||||
|
self.download_all({
|
||||||
|
'external_downloader': 'aria2p',
|
||||||
|
'http_chunk_size': 1000,
|
||||||
|
})
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
unittest.main()
|
@ -9,7 +9,11 @@ import sys
|
|||||||
import unittest
|
import unittest
|
||||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
from test.helper import http_server_port, try_rm
|
from test.helper import (
|
||||||
|
FakeLogger,
|
||||||
|
http_server_port,
|
||||||
|
try_rm,
|
||||||
|
)
|
||||||
from youtube_dl import YoutubeDL
|
from youtube_dl import YoutubeDL
|
||||||
from youtube_dl.compat import compat_http_server
|
from youtube_dl.compat import compat_http_server
|
||||||
from youtube_dl.downloader.http import HttpFD
|
from youtube_dl.downloader.http import HttpFD
|
||||||
@ -66,17 +70,6 @@ class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
|
|||||||
assert False
|
assert False
|
||||||
|
|
||||||
|
|
||||||
class FakeLogger(object):
|
|
||||||
def debug(self, msg):
|
|
||||||
pass
|
|
||||||
|
|
||||||
def warning(self, msg):
|
|
||||||
pass
|
|
||||||
|
|
||||||
def error(self, msg):
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
class TestHttpFD(unittest.TestCase):
|
class TestHttpFD(unittest.TestCase):
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
self.httpd = compat_http_server.HTTPServer(
|
self.httpd = compat_http_server.HTTPServer(
|
||||||
|
@ -40,14 +40,16 @@ class TestExecution(unittest.TestCase):
|
|||||||
self.assertFalse(stderr)
|
self.assertFalse(stderr)
|
||||||
|
|
||||||
def test_lazy_extractors(self):
|
def test_lazy_extractors(self):
|
||||||
|
lazy_extractors = 'youtube_dl/extractor/lazy_extractors.py'
|
||||||
try:
|
try:
|
||||||
subprocess.check_call([sys.executable, 'devscripts/make_lazy_extractors.py', 'youtube_dl/extractor/lazy_extractors.py'], cwd=rootDir, stdout=_DEV_NULL)
|
subprocess.check_call([sys.executable, 'devscripts/make_lazy_extractors.py', lazy_extractors], cwd=rootDir, stdout=_DEV_NULL)
|
||||||
subprocess.check_call([sys.executable, 'test/test_all_urls.py'], cwd=rootDir, stdout=_DEV_NULL)
|
subprocess.check_call([sys.executable, 'test/test_all_urls.py'], cwd=rootDir, stdout=_DEV_NULL)
|
||||||
finally:
|
finally:
|
||||||
try:
|
for x in ['', 'c'] if sys.version_info[0] < 3 else ['']:
|
||||||
os.remove('youtube_dl/extractor/lazy_extractors.py')
|
try:
|
||||||
except (IOError, OSError):
|
os.remove(lazy_extractors + x)
|
||||||
pass
|
except (IOError, OSError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
@ -8,7 +8,10 @@ import sys
|
|||||||
import unittest
|
import unittest
|
||||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
from test.helper import http_server_port
|
from test.helper import (
|
||||||
|
FakeLogger,
|
||||||
|
http_server_port,
|
||||||
|
)
|
||||||
from youtube_dl import YoutubeDL
|
from youtube_dl import YoutubeDL
|
||||||
from youtube_dl.compat import compat_http_server, compat_urllib_request
|
from youtube_dl.compat import compat_http_server, compat_urllib_request
|
||||||
import ssl
|
import ssl
|
||||||
@ -52,17 +55,6 @@ class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
|
|||||||
assert False
|
assert False
|
||||||
|
|
||||||
|
|
||||||
class FakeLogger(object):
|
|
||||||
def debug(self, msg):
|
|
||||||
pass
|
|
||||||
|
|
||||||
def warning(self, msg):
|
|
||||||
pass
|
|
||||||
|
|
||||||
def error(self, msg):
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
class TestHTTP(unittest.TestCase):
|
class TestHTTP(unittest.TestCase):
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
self.httpd = compat_http_server.HTTPServer(
|
self.httpd = compat_http_server.HTTPServer(
|
||||||
|
@ -21,6 +21,10 @@ import subprocess
|
|||||||
import sys
|
import sys
|
||||||
import xml.etree.ElementTree
|
import xml.etree.ElementTree
|
||||||
|
|
||||||
|
# naming convention
|
||||||
|
# 'compat_' + Python3_name.replace('.', '_')
|
||||||
|
# other aliases exist for convenience and/or legacy
|
||||||
|
|
||||||
# deal with critical unicode/str things first
|
# deal with critical unicode/str things first
|
||||||
try:
|
try:
|
||||||
# Python 2
|
# Python 2
|
||||||
@ -28,6 +32,7 @@ try:
|
|||||||
unicode, basestring, unichr
|
unicode, basestring, unichr
|
||||||
)
|
)
|
||||||
from .casefold import casefold as compat_casefold
|
from .casefold import casefold as compat_casefold
|
||||||
|
|
||||||
except NameError:
|
except NameError:
|
||||||
compat_str, compat_basestring, compat_chr = (
|
compat_str, compat_basestring, compat_chr = (
|
||||||
str, str, chr
|
str, str, chr
|
||||||
@ -53,16 +58,15 @@ try:
|
|||||||
import urllib.parse as compat_urllib_parse
|
import urllib.parse as compat_urllib_parse
|
||||||
except ImportError: # Python 2
|
except ImportError: # Python 2
|
||||||
import urllib as compat_urllib_parse
|
import urllib as compat_urllib_parse
|
||||||
|
import urlparse as _urlparse
|
||||||
|
for a in dir(_urlparse):
|
||||||
|
if not hasattr(compat_urllib_parse, a):
|
||||||
|
setattr(compat_urllib_parse, a, getattr(_urlparse, a))
|
||||||
|
del _urlparse
|
||||||
|
|
||||||
try:
|
# unfavoured aliases
|
||||||
from urllib.parse import urlparse as compat_urllib_parse_urlparse
|
compat_urlparse = compat_urllib_parse
|
||||||
except ImportError: # Python 2
|
compat_urllib_parse_urlparse = compat_urllib_parse.urlparse
|
||||||
from urlparse import urlparse as compat_urllib_parse_urlparse
|
|
||||||
|
|
||||||
try:
|
|
||||||
import urllib.parse as compat_urlparse
|
|
||||||
except ImportError: # Python 2
|
|
||||||
import urlparse as compat_urlparse
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import urllib.response as compat_urllib_response
|
import urllib.response as compat_urllib_response
|
||||||
@ -73,6 +77,7 @@ try:
|
|||||||
import http.cookiejar as compat_cookiejar
|
import http.cookiejar as compat_cookiejar
|
||||||
except ImportError: # Python 2
|
except ImportError: # Python 2
|
||||||
import cookielib as compat_cookiejar
|
import cookielib as compat_cookiejar
|
||||||
|
compat_http_cookiejar = compat_cookiejar
|
||||||
|
|
||||||
if sys.version_info[0] == 2:
|
if sys.version_info[0] == 2:
|
||||||
class compat_cookiejar_Cookie(compat_cookiejar.Cookie):
|
class compat_cookiejar_Cookie(compat_cookiejar.Cookie):
|
||||||
@ -84,11 +89,13 @@ if sys.version_info[0] == 2:
|
|||||||
compat_cookiejar.Cookie.__init__(self, version, name, value, *args, **kwargs)
|
compat_cookiejar.Cookie.__init__(self, version, name, value, *args, **kwargs)
|
||||||
else:
|
else:
|
||||||
compat_cookiejar_Cookie = compat_cookiejar.Cookie
|
compat_cookiejar_Cookie = compat_cookiejar.Cookie
|
||||||
|
compat_http_cookiejar_Cookie = compat_cookiejar_Cookie
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import http.cookies as compat_cookies
|
import http.cookies as compat_cookies
|
||||||
except ImportError: # Python 2
|
except ImportError: # Python 2
|
||||||
import Cookie as compat_cookies
|
import Cookie as compat_cookies
|
||||||
|
compat_http_cookies = compat_cookies
|
||||||
|
|
||||||
if sys.version_info[0] == 2:
|
if sys.version_info[0] == 2:
|
||||||
class compat_cookies_SimpleCookie(compat_cookies.SimpleCookie):
|
class compat_cookies_SimpleCookie(compat_cookies.SimpleCookie):
|
||||||
@ -98,6 +105,7 @@ if sys.version_info[0] == 2:
|
|||||||
return super(compat_cookies_SimpleCookie, self).load(rawdata)
|
return super(compat_cookies_SimpleCookie, self).load(rawdata)
|
||||||
else:
|
else:
|
||||||
compat_cookies_SimpleCookie = compat_cookies.SimpleCookie
|
compat_cookies_SimpleCookie = compat_cookies.SimpleCookie
|
||||||
|
compat_http_cookies_SimpleCookie = compat_cookies_SimpleCookie
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import html.entities as compat_html_entities
|
import html.entities as compat_html_entities
|
||||||
@ -2351,16 +2359,19 @@ try:
|
|||||||
from urllib.error import HTTPError as compat_HTTPError
|
from urllib.error import HTTPError as compat_HTTPError
|
||||||
except ImportError: # Python 2
|
except ImportError: # Python 2
|
||||||
from urllib2 import HTTPError as compat_HTTPError
|
from urllib2 import HTTPError as compat_HTTPError
|
||||||
|
compat_urllib_HTTPError = compat_HTTPError
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from urllib.request import urlretrieve as compat_urlretrieve
|
from urllib.request import urlretrieve as compat_urlretrieve
|
||||||
except ImportError: # Python 2
|
except ImportError: # Python 2
|
||||||
from urllib import urlretrieve as compat_urlretrieve
|
from urllib import urlretrieve as compat_urlretrieve
|
||||||
|
compat_urllib_request_urlretrieve = compat_urlretrieve
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from html.parser import HTMLParser as compat_HTMLParser
|
from html.parser import HTMLParser as compat_HTMLParser
|
||||||
except ImportError: # Python 2
|
except ImportError: # Python 2
|
||||||
from HTMLParser import HTMLParser as compat_HTMLParser
|
from HTMLParser import HTMLParser as compat_HTMLParser
|
||||||
|
compat_html_parser_HTMLParser = compat_HTMLParser
|
||||||
|
|
||||||
try: # Python 2
|
try: # Python 2
|
||||||
from HTMLParser import HTMLParseError as compat_HTMLParseError
|
from HTMLParser import HTMLParseError as compat_HTMLParseError
|
||||||
@ -2374,6 +2385,7 @@ except ImportError: # Python <3.4
|
|||||||
# and uniform cross-version exception handling
|
# and uniform cross-version exception handling
|
||||||
class compat_HTMLParseError(Exception):
|
class compat_HTMLParseError(Exception):
|
||||||
pass
|
pass
|
||||||
|
compat_html_parser_HTMLParseError = compat_HTMLParseError
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from subprocess import DEVNULL
|
from subprocess import DEVNULL
|
||||||
@ -2390,6 +2402,8 @@ try:
|
|||||||
from urllib.parse import unquote_to_bytes as compat_urllib_parse_unquote_to_bytes
|
from urllib.parse import unquote_to_bytes as compat_urllib_parse_unquote_to_bytes
|
||||||
from urllib.parse import unquote as compat_urllib_parse_unquote
|
from urllib.parse import unquote as compat_urllib_parse_unquote
|
||||||
from urllib.parse import unquote_plus as compat_urllib_parse_unquote_plus
|
from urllib.parse import unquote_plus as compat_urllib_parse_unquote_plus
|
||||||
|
from urllib.parse import urlencode as compat_urllib_parse_urlencode
|
||||||
|
from urllib.parse import parse_qs as compat_parse_qs
|
||||||
except ImportError: # Python 2
|
except ImportError: # Python 2
|
||||||
_asciire = (compat_urllib_parse._asciire if hasattr(compat_urllib_parse, '_asciire')
|
_asciire = (compat_urllib_parse._asciire if hasattr(compat_urllib_parse, '_asciire')
|
||||||
else re.compile(r'([\x00-\x7f]+)'))
|
else re.compile(r'([\x00-\x7f]+)'))
|
||||||
@ -2456,9 +2470,6 @@ except ImportError: # Python 2
|
|||||||
string = string.replace('+', ' ')
|
string = string.replace('+', ' ')
|
||||||
return compat_urllib_parse_unquote(string, encoding, errors)
|
return compat_urllib_parse_unquote(string, encoding, errors)
|
||||||
|
|
||||||
try:
|
|
||||||
from urllib.parse import urlencode as compat_urllib_parse_urlencode
|
|
||||||
except ImportError: # Python 2
|
|
||||||
# Python 2 will choke in urlencode on mixture of byte and unicode strings.
|
# Python 2 will choke in urlencode on mixture of byte and unicode strings.
|
||||||
# Possible solutions are to either port it from python 3 with all
|
# Possible solutions are to either port it from python 3 with all
|
||||||
# the friends or manually ensure input query contains only byte strings.
|
# the friends or manually ensure input query contains only byte strings.
|
||||||
@ -2480,7 +2491,62 @@ except ImportError: # Python 2
|
|||||||
def encode_list(l):
|
def encode_list(l):
|
||||||
return [encode_elem(e) for e in l]
|
return [encode_elem(e) for e in l]
|
||||||
|
|
||||||
return compat_urllib_parse.urlencode(encode_elem(query), doseq=doseq)
|
return compat_urllib_parse._urlencode(encode_elem(query), doseq=doseq)
|
||||||
|
|
||||||
|
# HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
|
||||||
|
# Python 2's version is apparently totally broken
|
||||||
|
def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
|
||||||
|
encoding='utf-8', errors='replace'):
|
||||||
|
qs, _coerce_result = qs, compat_str
|
||||||
|
pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
|
||||||
|
r = []
|
||||||
|
for name_value in pairs:
|
||||||
|
if not name_value and not strict_parsing:
|
||||||
|
continue
|
||||||
|
nv = name_value.split('=', 1)
|
||||||
|
if len(nv) != 2:
|
||||||
|
if strict_parsing:
|
||||||
|
raise ValueError('bad query field: %r' % (name_value,))
|
||||||
|
# Handle case of a control-name with no equal sign
|
||||||
|
if keep_blank_values:
|
||||||
|
nv.append('')
|
||||||
|
else:
|
||||||
|
continue
|
||||||
|
if len(nv[1]) or keep_blank_values:
|
||||||
|
name = nv[0].replace('+', ' ')
|
||||||
|
name = compat_urllib_parse_unquote(
|
||||||
|
name, encoding=encoding, errors=errors)
|
||||||
|
name = _coerce_result(name)
|
||||||
|
value = nv[1].replace('+', ' ')
|
||||||
|
value = compat_urllib_parse_unquote(
|
||||||
|
value, encoding=encoding, errors=errors)
|
||||||
|
value = _coerce_result(value)
|
||||||
|
r.append((name, value))
|
||||||
|
return r
|
||||||
|
|
||||||
|
def compat_parse_qs(qs, keep_blank_values=False, strict_parsing=False,
|
||||||
|
encoding='utf-8', errors='replace'):
|
||||||
|
parsed_result = {}
|
||||||
|
pairs = _parse_qsl(qs, keep_blank_values, strict_parsing,
|
||||||
|
encoding=encoding, errors=errors)
|
||||||
|
for name, value in pairs:
|
||||||
|
if name in parsed_result:
|
||||||
|
parsed_result[name].append(value)
|
||||||
|
else:
|
||||||
|
parsed_result[name] = [value]
|
||||||
|
return parsed_result
|
||||||
|
|
||||||
|
setattr(compat_urllib_parse, '_urlencode',
|
||||||
|
getattr(compat_urllib_parse, 'urlencode'))
|
||||||
|
for name, fix in (
|
||||||
|
('unquote_to_bytes', compat_urllib_parse_unquote_to_bytes),
|
||||||
|
('parse_unquote', compat_urllib_parse_unquote),
|
||||||
|
('unquote_plus', compat_urllib_parse_unquote_plus),
|
||||||
|
('urlencode', compat_urllib_parse_urlencode),
|
||||||
|
('parse_qs', compat_parse_qs)):
|
||||||
|
setattr(compat_urllib_parse, name, fix)
|
||||||
|
|
||||||
|
compat_urllib_parse_parse_qs = compat_parse_qs
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from urllib.request import DataHandler as compat_urllib_request_DataHandler
|
from urllib.request import DataHandler as compat_urllib_request_DataHandler
|
||||||
@ -2520,6 +2586,7 @@ try:
|
|||||||
from xml.etree.ElementTree import ParseError as compat_xml_parse_error
|
from xml.etree.ElementTree import ParseError as compat_xml_parse_error
|
||||||
except ImportError: # Python 2.6
|
except ImportError: # Python 2.6
|
||||||
from xml.parsers.expat import ExpatError as compat_xml_parse_error
|
from xml.parsers.expat import ExpatError as compat_xml_parse_error
|
||||||
|
compat_xml_etree_ElementTree_ParseError = compat_xml_parse_error
|
||||||
|
|
||||||
etree = xml.etree.ElementTree
|
etree = xml.etree.ElementTree
|
||||||
|
|
||||||
@ -2533,10 +2600,11 @@ try:
|
|||||||
# xml.etree.ElementTree.Element is a method in Python <=2.6 and
|
# xml.etree.ElementTree.Element is a method in Python <=2.6 and
|
||||||
# the following will crash with:
|
# the following will crash with:
|
||||||
# TypeError: isinstance() arg 2 must be a class, type, or tuple of classes and types
|
# TypeError: isinstance() arg 2 must be a class, type, or tuple of classes and types
|
||||||
isinstance(None, xml.etree.ElementTree.Element)
|
isinstance(None, etree.Element)
|
||||||
from xml.etree.ElementTree import Element as compat_etree_Element
|
from xml.etree.ElementTree import Element as compat_etree_Element
|
||||||
except TypeError: # Python <=2.6
|
except TypeError: # Python <=2.6
|
||||||
from xml.etree.ElementTree import _ElementInterface as compat_etree_Element
|
from xml.etree.ElementTree import _ElementInterface as compat_etree_Element
|
||||||
|
compat_xml_etree_ElementTree_Element = compat_etree_Element
|
||||||
|
|
||||||
if sys.version_info[0] >= 3:
|
if sys.version_info[0] >= 3:
|
||||||
def compat_etree_fromstring(text):
|
def compat_etree_fromstring(text):
|
||||||
@ -2592,6 +2660,7 @@ else:
|
|||||||
if k == uri or v == prefix:
|
if k == uri or v == prefix:
|
||||||
del etree._namespace_map[k]
|
del etree._namespace_map[k]
|
||||||
etree._namespace_map[uri] = prefix
|
etree._namespace_map[uri] = prefix
|
||||||
|
compat_xml_etree_register_namespace = compat_etree_register_namespace
|
||||||
|
|
||||||
if sys.version_info < (2, 7):
|
if sys.version_info < (2, 7):
|
||||||
# Here comes the crazy part: In 2.6, if the xpath is a unicode,
|
# Here comes the crazy part: In 2.6, if the xpath is a unicode,
|
||||||
@ -2603,53 +2672,6 @@ if sys.version_info < (2, 7):
|
|||||||
else:
|
else:
|
||||||
compat_xpath = lambda xpath: xpath
|
compat_xpath = lambda xpath: xpath
|
||||||
|
|
||||||
try:
|
|
||||||
from urllib.parse import parse_qs as compat_parse_qs
|
|
||||||
except ImportError: # Python 2
|
|
||||||
# HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
|
|
||||||
# Python 2's version is apparently totally broken
|
|
||||||
|
|
||||||
def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
|
|
||||||
encoding='utf-8', errors='replace'):
|
|
||||||
qs, _coerce_result = qs, compat_str
|
|
||||||
pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
|
|
||||||
r = []
|
|
||||||
for name_value in pairs:
|
|
||||||
if not name_value and not strict_parsing:
|
|
||||||
continue
|
|
||||||
nv = name_value.split('=', 1)
|
|
||||||
if len(nv) != 2:
|
|
||||||
if strict_parsing:
|
|
||||||
raise ValueError('bad query field: %r' % (name_value,))
|
|
||||||
# Handle case of a control-name with no equal sign
|
|
||||||
if keep_blank_values:
|
|
||||||
nv.append('')
|
|
||||||
else:
|
|
||||||
continue
|
|
||||||
if len(nv[1]) or keep_blank_values:
|
|
||||||
name = nv[0].replace('+', ' ')
|
|
||||||
name = compat_urllib_parse_unquote(
|
|
||||||
name, encoding=encoding, errors=errors)
|
|
||||||
name = _coerce_result(name)
|
|
||||||
value = nv[1].replace('+', ' ')
|
|
||||||
value = compat_urllib_parse_unquote(
|
|
||||||
value, encoding=encoding, errors=errors)
|
|
||||||
value = _coerce_result(value)
|
|
||||||
r.append((name, value))
|
|
||||||
return r
|
|
||||||
|
|
||||||
def compat_parse_qs(qs, keep_blank_values=False, strict_parsing=False,
|
|
||||||
encoding='utf-8', errors='replace'):
|
|
||||||
parsed_result = {}
|
|
||||||
pairs = _parse_qsl(qs, keep_blank_values, strict_parsing,
|
|
||||||
encoding=encoding, errors=errors)
|
|
||||||
for name, value in pairs:
|
|
||||||
if name in parsed_result:
|
|
||||||
parsed_result[name].append(value)
|
|
||||||
else:
|
|
||||||
parsed_result[name] = [value]
|
|
||||||
return parsed_result
|
|
||||||
|
|
||||||
|
|
||||||
compat_os_name = os._name if os.name == 'java' else os.name
|
compat_os_name = os._name if os.name == 'java' else os.name
|
||||||
|
|
||||||
@ -2774,6 +2796,8 @@ else:
|
|||||||
else:
|
else:
|
||||||
compat_expanduser = os.path.expanduser
|
compat_expanduser = os.path.expanduser
|
||||||
|
|
||||||
|
compat_os_path_expanduser = compat_expanduser
|
||||||
|
|
||||||
|
|
||||||
if compat_os_name == 'nt' and sys.version_info < (3, 8):
|
if compat_os_name == 'nt' and sys.version_info < (3, 8):
|
||||||
# os.path.realpath on Windows does not follow symbolic links
|
# os.path.realpath on Windows does not follow symbolic links
|
||||||
@ -2785,6 +2809,8 @@ if compat_os_name == 'nt' and sys.version_info < (3, 8):
|
|||||||
else:
|
else:
|
||||||
compat_realpath = os.path.realpath
|
compat_realpath = os.path.realpath
|
||||||
|
|
||||||
|
compat_os_path_realpath = compat_realpath
|
||||||
|
|
||||||
|
|
||||||
if sys.version_info < (3, 0):
|
if sys.version_info < (3, 0):
|
||||||
def compat_print(s):
|
def compat_print(s):
|
||||||
@ -2805,11 +2831,15 @@ if sys.version_info < (3, 0) and sys.platform == 'win32':
|
|||||||
else:
|
else:
|
||||||
compat_getpass = getpass.getpass
|
compat_getpass = getpass.getpass
|
||||||
|
|
||||||
|
compat_getpass_getpass = compat_getpass
|
||||||
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
compat_input = raw_input
|
compat_input = raw_input
|
||||||
except NameError: # Python 3
|
except NameError: # Python 3
|
||||||
compat_input = input
|
compat_input = input
|
||||||
|
|
||||||
|
|
||||||
# Python < 2.6.5 require kwargs to be bytes
|
# Python < 2.6.5 require kwargs to be bytes
|
||||||
try:
|
try:
|
||||||
def _testfunc(x):
|
def _testfunc(x):
|
||||||
@ -2915,15 +2945,16 @@ else:
|
|||||||
lines = _lines
|
lines = _lines
|
||||||
return _terminal_size(columns, lines)
|
return _terminal_size(columns, lines)
|
||||||
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
itertools.count(start=0, step=1)
|
itertools.count(start=0, step=1)
|
||||||
compat_itertools_count = itertools.count
|
compat_itertools_count = itertools.count
|
||||||
except TypeError: # Python 2.6
|
except TypeError: # Python 2.6
|
||||||
def compat_itertools_count(start=0, step=1):
|
def compat_itertools_count(start=0, step=1):
|
||||||
n = start
|
|
||||||
while True:
|
while True:
|
||||||
yield n
|
yield start
|
||||||
n += step
|
start += step
|
||||||
|
|
||||||
|
|
||||||
if sys.version_info >= (3, 0):
|
if sys.version_info >= (3, 0):
|
||||||
from tokenize import tokenize as compat_tokenize_tokenize
|
from tokenize import tokenize as compat_tokenize_tokenize
|
||||||
@ -3075,6 +3106,8 @@ if sys.version_info < (3, 3):
|
|||||||
else:
|
else:
|
||||||
compat_b64decode = base64.b64decode
|
compat_b64decode = base64.b64decode
|
||||||
|
|
||||||
|
compat_base64_b64decode = compat_b64decode
|
||||||
|
|
||||||
|
|
||||||
if platform.python_implementation() == 'PyPy' and sys.pypy_version_info < (5, 4, 0):
|
if platform.python_implementation() == 'PyPy' and sys.pypy_version_info < (5, 4, 0):
|
||||||
# PyPy2 prior to version 5.4.0 expects byte strings as Windows function
|
# PyPy2 prior to version 5.4.0 expects byte strings as Windows function
|
||||||
@ -3094,30 +3127,53 @@ else:
|
|||||||
return ctypes.WINFUNCTYPE(*args, **kwargs)
|
return ctypes.WINFUNCTYPE(*args, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
__all__ = [
|
legacy = [
|
||||||
'compat_HTMLParseError',
|
'compat_HTMLParseError',
|
||||||
'compat_HTMLParser',
|
'compat_HTMLParser',
|
||||||
'compat_HTTPError',
|
'compat_HTTPError',
|
||||||
'compat_Struct',
|
|
||||||
'compat_b64decode',
|
'compat_b64decode',
|
||||||
|
'compat_cookiejar',
|
||||||
|
'compat_cookiejar_Cookie',
|
||||||
|
'compat_cookies',
|
||||||
|
'compat_cookies_SimpleCookie',
|
||||||
|
'compat_etree_Element',
|
||||||
|
'compat_etree_register_namespace',
|
||||||
|
'compat_expanduser',
|
||||||
|
'compat_getpass',
|
||||||
|
'compat_parse_qs',
|
||||||
|
'compat_realpath',
|
||||||
|
'compat_urllib_parse_parse_qs',
|
||||||
|
'compat_urllib_parse_unquote',
|
||||||
|
'compat_urllib_parse_unquote_plus',
|
||||||
|
'compat_urllib_parse_unquote_to_bytes',
|
||||||
|
'compat_urllib_parse_urlencode',
|
||||||
|
'compat_urllib_parse_urlparse',
|
||||||
|
'compat_urlparse',
|
||||||
|
'compat_urlretrieve',
|
||||||
|
'compat_xml_parse_error',
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
'compat_html_parser_HTMLParseError',
|
||||||
|
'compat_html_parser_HTMLParser',
|
||||||
|
'compat_Struct',
|
||||||
|
'compat_base64_b64decode',
|
||||||
'compat_basestring',
|
'compat_basestring',
|
||||||
'compat_casefold',
|
'compat_casefold',
|
||||||
'compat_chr',
|
'compat_chr',
|
||||||
'compat_collections_abc',
|
'compat_collections_abc',
|
||||||
'compat_collections_chain_map',
|
'compat_collections_chain_map',
|
||||||
'compat_cookiejar',
|
'compat_http_cookiejar',
|
||||||
'compat_cookiejar_Cookie',
|
'compat_http_cookiejar_Cookie',
|
||||||
'compat_cookies',
|
'compat_http_cookies',
|
||||||
'compat_cookies_SimpleCookie',
|
'compat_http_cookies_SimpleCookie',
|
||||||
'compat_ctypes_WINFUNCTYPE',
|
'compat_ctypes_WINFUNCTYPE',
|
||||||
'compat_etree_Element',
|
|
||||||
'compat_etree_fromstring',
|
'compat_etree_fromstring',
|
||||||
'compat_etree_register_namespace',
|
|
||||||
'compat_expanduser',
|
|
||||||
'compat_filter',
|
'compat_filter',
|
||||||
'compat_get_terminal_size',
|
'compat_get_terminal_size',
|
||||||
'compat_getenv',
|
'compat_getenv',
|
||||||
'compat_getpass',
|
'compat_getpass_getpass',
|
||||||
'compat_html_entities',
|
'compat_html_entities',
|
||||||
'compat_html_entities_html5',
|
'compat_html_entities_html5',
|
||||||
'compat_http_client',
|
'compat_http_client',
|
||||||
@ -3131,11 +3187,11 @@ __all__ = [
|
|||||||
'compat_numeric_types',
|
'compat_numeric_types',
|
||||||
'compat_ord',
|
'compat_ord',
|
||||||
'compat_os_name',
|
'compat_os_name',
|
||||||
'compat_parse_qs',
|
'compat_os_path_expanduser',
|
||||||
|
'compat_os_path_realpath',
|
||||||
'compat_print',
|
'compat_print',
|
||||||
'compat_re_Match',
|
'compat_re_Match',
|
||||||
'compat_re_Pattern',
|
'compat_re_Pattern',
|
||||||
'compat_realpath',
|
|
||||||
'compat_setenv',
|
'compat_setenv',
|
||||||
'compat_shlex_quote',
|
'compat_shlex_quote',
|
||||||
'compat_shlex_split',
|
'compat_shlex_split',
|
||||||
@ -3147,17 +3203,14 @@ __all__ = [
|
|||||||
'compat_tokenize_tokenize',
|
'compat_tokenize_tokenize',
|
||||||
'compat_urllib_error',
|
'compat_urllib_error',
|
||||||
'compat_urllib_parse',
|
'compat_urllib_parse',
|
||||||
'compat_urllib_parse_unquote',
|
|
||||||
'compat_urllib_parse_unquote_plus',
|
|
||||||
'compat_urllib_parse_unquote_to_bytes',
|
|
||||||
'compat_urllib_parse_urlencode',
|
|
||||||
'compat_urllib_parse_urlparse',
|
|
||||||
'compat_urllib_request',
|
'compat_urllib_request',
|
||||||
'compat_urllib_request_DataHandler',
|
'compat_urllib_request_DataHandler',
|
||||||
'compat_urllib_response',
|
'compat_urllib_response',
|
||||||
'compat_urlparse',
|
'compat_urllib_request_urlretrieve',
|
||||||
'compat_urlretrieve',
|
'compat_urllib_HTTPError',
|
||||||
'compat_xml_parse_error',
|
'compat_xml_etree_ElementTree_Element',
|
||||||
|
'compat_xml_etree_ElementTree_ParseError',
|
||||||
|
'compat_xml_etree_register_namespace',
|
||||||
'compat_xpath',
|
'compat_xpath',
|
||||||
'compat_zip',
|
'compat_zip',
|
||||||
'workaround_optparse_bug9161',
|
'workaround_optparse_bug9161',
|
||||||
|
@ -200,6 +200,64 @@ class Aria2cFD(ExternalFD):
|
|||||||
return cmd
|
return cmd
|
||||||
|
|
||||||
|
|
||||||
|
class Aria2pFD(ExternalFD):
|
||||||
|
''' Aria2pFD class
|
||||||
|
This class support to use aria2p as downloader.
|
||||||
|
(Aria2p, a command-line tool and Python library to interact with an aria2c daemon process
|
||||||
|
through JSON-RPC.)
|
||||||
|
It can help you to get download progress more easily.
|
||||||
|
To use aria2p as downloader, you need to install aria2c and aria2p, aria2p can download with pip.
|
||||||
|
Then run aria2c in the background and enable with the --enable-rpc option.
|
||||||
|
'''
|
||||||
|
try:
|
||||||
|
import aria2p
|
||||||
|
__avail = True
|
||||||
|
except ImportError:
|
||||||
|
__avail = False
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def available(cls):
|
||||||
|
return cls.__avail
|
||||||
|
|
||||||
|
def _call_downloader(self, tmpfilename, info_dict):
|
||||||
|
aria2 = self.aria2p.API(
|
||||||
|
self.aria2p.Client(
|
||||||
|
host='http://localhost',
|
||||||
|
port=6800,
|
||||||
|
secret=''
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
options = {
|
||||||
|
'min-split-size': '1M',
|
||||||
|
'max-connection-per-server': 4,
|
||||||
|
'auto-file-renaming': 'false',
|
||||||
|
}
|
||||||
|
options['dir'] = os.path.dirname(tmpfilename) or os.path.abspath('.')
|
||||||
|
options['out'] = os.path.basename(tmpfilename)
|
||||||
|
options['header'] = []
|
||||||
|
for key, val in info_dict['http_headers'].items():
|
||||||
|
options['header'].append('{0}: {1}'.format(key, val))
|
||||||
|
download = aria2.add_uris([info_dict['url']], options)
|
||||||
|
status = {
|
||||||
|
'status': 'downloading',
|
||||||
|
'tmpfilename': tmpfilename,
|
||||||
|
}
|
||||||
|
started = time.time()
|
||||||
|
while download.status in ['active', 'waiting']:
|
||||||
|
download = aria2.get_download(download.gid)
|
||||||
|
status.update({
|
||||||
|
'downloaded_bytes': download.completed_length,
|
||||||
|
'total_bytes': download.total_length,
|
||||||
|
'elapsed': time.time() - started,
|
||||||
|
'eta': download.eta.total_seconds(),
|
||||||
|
'speed': download.download_speed,
|
||||||
|
})
|
||||||
|
self._hook_progress(status)
|
||||||
|
time.sleep(.5)
|
||||||
|
return download.status != 'complete'
|
||||||
|
|
||||||
|
|
||||||
class HttpieFD(ExternalFD):
|
class HttpieFD(ExternalFD):
|
||||||
@classmethod
|
@classmethod
|
||||||
def available(cls):
|
def available(cls):
|
||||||
|
@ -376,6 +376,7 @@ from .fc2 import (
|
|||||||
FC2EmbedIE,
|
FC2EmbedIE,
|
||||||
)
|
)
|
||||||
from .fczenit import FczenitIE
|
from .fczenit import FczenitIE
|
||||||
|
from .filemoon import FileMoonIE
|
||||||
from .fifa import FifaIE
|
from .fifa import FifaIE
|
||||||
from .filmon import (
|
from .filmon import (
|
||||||
FilmOnIE,
|
FilmOnIE,
|
||||||
@ -556,6 +557,7 @@ from .khanacademy import (
|
|||||||
from .kickstarter import KickStarterIE
|
from .kickstarter import KickStarterIE
|
||||||
from .kinja import KinjaEmbedIE
|
from .kinja import KinjaEmbedIE
|
||||||
from .kinopoisk import KinoPoiskIE
|
from .kinopoisk import KinoPoiskIE
|
||||||
|
from .kommunetv import KommunetvIE
|
||||||
from .konserthusetplay import KonserthusetPlayIE
|
from .konserthusetplay import KonserthusetPlayIE
|
||||||
from .krasview import KrasViewIE
|
from .krasview import KrasViewIE
|
||||||
from .kth import KTHIE
|
from .kth import KTHIE
|
||||||
@ -1010,6 +1012,10 @@ from .raywenderlich import (
|
|||||||
RayWenderlichIE,
|
RayWenderlichIE,
|
||||||
RayWenderlichCourseIE,
|
RayWenderlichCourseIE,
|
||||||
)
|
)
|
||||||
|
from .rbgtum import (
|
||||||
|
RbgTumIE,
|
||||||
|
RbgTumCourseIE,
|
||||||
|
)
|
||||||
from .rbmaradio import RBMARadioIE
|
from .rbmaradio import RBMARadioIE
|
||||||
from .rds import RDSIE
|
from .rds import RDSIE
|
||||||
from .redbulltv import (
|
from .redbulltv import (
|
||||||
@ -1200,6 +1206,7 @@ from .storyfire import (
|
|||||||
from .streamable import StreamableIE
|
from .streamable import StreamableIE
|
||||||
from .streamcloud import StreamcloudIE
|
from .streamcloud import StreamcloudIE
|
||||||
from .streamcz import StreamCZIE
|
from .streamcz import StreamCZIE
|
||||||
|
from .streamsb import StreamsbIE
|
||||||
from .streetvoice import StreetVoiceIE
|
from .streetvoice import StreetVoiceIE
|
||||||
from .stretchinternet import StretchInternetIE
|
from .stretchinternet import StretchInternetIE
|
||||||
from .stv import STVPlayerIE
|
from .stv import STVPlayerIE
|
||||||
|
43
youtube_dl/extractor/filemoon.py
Normal file
43
youtube_dl/extractor/filemoon.py
Normal file
@ -0,0 +1,43 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
decode_packed_codes,
|
||||||
|
js_to_json,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class FileMoonIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?filemoon\.sx/./(?P<id>\w+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://filemoon.sx/e/dw40rxrzruqz',
|
||||||
|
'md5': '5a713742f57ac4aef29b74733e8dda01',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'dw40rxrzruqz',
|
||||||
|
'title': 'dw40rxrzruqz',
|
||||||
|
'ext': 'mp4'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
matches = re.findall(r'(?s)(eval.*?)</script>', webpage)
|
||||||
|
packed = matches[-1]
|
||||||
|
unpacked = decode_packed_codes(packed)
|
||||||
|
jwplayer_sources = self._parse_json(
|
||||||
|
self._search_regex(
|
||||||
|
r'(?s)player\s*\.\s*setup\s*\(\s*\{\s*sources\s*:\s*(.*?])', unpacked, 'jwplayer sources'),
|
||||||
|
video_id, transform_source=js_to_json)
|
||||||
|
|
||||||
|
formats = self._parse_jwplayer_formats(jwplayer_sources, video_id)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': self._generic_title(url) or video_id,
|
||||||
|
'formats': formats
|
||||||
|
}
|
@ -1,19 +1,29 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
|
compat_filter as filter,
|
||||||
|
compat_HTTPError,
|
||||||
compat_parse_qs,
|
compat_parse_qs,
|
||||||
compat_urllib_parse_urlparse,
|
compat_urlparse,
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
HEADRequest,
|
|
||||||
determine_ext,
|
determine_ext,
|
||||||
|
error_to_compat_str,
|
||||||
|
extract_attributes,
|
||||||
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
merge_dicts,
|
||||||
|
orderedSet,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
strip_or_none,
|
strip_or_none,
|
||||||
try_get,
|
traverse_obj,
|
||||||
|
url_or_none,
|
||||||
|
urljoin,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -22,14 +32,102 @@ class IGNBaseIE(InfoExtractor):
|
|||||||
return self._download_json(
|
return self._download_json(
|
||||||
'http://apis.ign.com/{0}/v3/{0}s/slug/{1}'.format(self._PAGE_TYPE, slug), slug)
|
'http://apis.ign.com/{0}/v3/{0}s/slug/{1}'.format(self._PAGE_TYPE, slug), slug)
|
||||||
|
|
||||||
|
def _checked_call_api(self, slug):
|
||||||
|
try:
|
||||||
|
return self._call_api(slug)
|
||||||
|
except ExtractorError as e:
|
||||||
|
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404:
|
||||||
|
e.cause.args = e.cause.args or [
|
||||||
|
e.cause.geturl(), e.cause.getcode(), e.cause.reason]
|
||||||
|
raise ExtractorError(
|
||||||
|
'Content not found: expired?', cause=e.cause,
|
||||||
|
expected=True)
|
||||||
|
raise
|
||||||
|
|
||||||
|
def _extract_video_info(self, video, fatal=True):
|
||||||
|
video_id = video['videoId']
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
refs = traverse_obj(video, 'refs', expected_type=dict) or {}
|
||||||
|
|
||||||
|
m3u8_url = url_or_none(refs.get('m3uUrl'))
|
||||||
|
if m3u8_url:
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
m3u8_url, video_id, 'mp4', 'm3u8_native',
|
||||||
|
m3u8_id='hls', fatal=False))
|
||||||
|
|
||||||
|
f4m_url = url_or_none(refs.get('f4mUrl'))
|
||||||
|
if f4m_url:
|
||||||
|
formats.extend(self._extract_f4m_formats(
|
||||||
|
f4m_url, video_id, f4m_id='hds', fatal=False))
|
||||||
|
|
||||||
|
for asset in (video.get('assets') or []):
|
||||||
|
asset_url = url_or_none(asset.get('url'))
|
||||||
|
if not asset_url:
|
||||||
|
continue
|
||||||
|
formats.append({
|
||||||
|
'url': asset_url,
|
||||||
|
'tbr': int_or_none(asset.get('bitrate'), 1000),
|
||||||
|
'fps': int_or_none(asset.get('frame_rate')),
|
||||||
|
'height': int_or_none(asset.get('height')),
|
||||||
|
'width': int_or_none(asset.get('width')),
|
||||||
|
})
|
||||||
|
|
||||||
|
mezzanine_url = traverse_obj(
|
||||||
|
video, ('system', 'mezzanineUrl'), expected_type=url_or_none)
|
||||||
|
if mezzanine_url:
|
||||||
|
formats.append({
|
||||||
|
'ext': determine_ext(mezzanine_url, 'mp4'),
|
||||||
|
'format_id': 'mezzanine',
|
||||||
|
'preference': 1,
|
||||||
|
'url': mezzanine_url,
|
||||||
|
})
|
||||||
|
|
||||||
|
if formats or fatal:
|
||||||
|
self._sort_formats(formats)
|
||||||
|
else:
|
||||||
|
return
|
||||||
|
|
||||||
|
thumbnails = traverse_obj(
|
||||||
|
video, ('thumbnails', Ellipsis, {'url': 'url'}), expected_type=url_or_none)
|
||||||
|
tags = traverse_obj(
|
||||||
|
video, ('tags', Ellipsis, 'displayName'),
|
||||||
|
expected_type=lambda x: x.strip() or None)
|
||||||
|
|
||||||
|
metadata = traverse_obj(video, 'metadata', expected_type=dict) or {}
|
||||||
|
title = traverse_obj(
|
||||||
|
metadata, 'longTitle', 'title', 'name',
|
||||||
|
expected_type=lambda x: x.strip() or None)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': strip_or_none(metadata.get('description')),
|
||||||
|
'timestamp': parse_iso8601(metadata.get('publishDate')),
|
||||||
|
'duration': int_or_none(metadata.get('duration')),
|
||||||
|
'thumbnails': thumbnails,
|
||||||
|
'formats': formats,
|
||||||
|
'tags': tags,
|
||||||
|
}
|
||||||
|
|
||||||
|
# yt-dlp shim
|
||||||
|
@classmethod
|
||||||
|
def _extract_from_webpage(cls, url, webpage):
|
||||||
|
for embed_url in orderedSet(
|
||||||
|
cls._extract_embed_urls(url, webpage) or [], lazy=True):
|
||||||
|
yield cls.url_result(embed_url, None if cls._VALID_URL is False else cls)
|
||||||
|
|
||||||
|
|
||||||
class IGNIE(IGNBaseIE):
|
class IGNIE(IGNBaseIE):
|
||||||
"""
|
"""
|
||||||
Extractor for some of the IGN sites, like www.ign.com, es.ign.com de.ign.com.
|
Extractor for some of the IGN sites, like www.ign.com, es.ign.com de.ign.com.
|
||||||
Some videos of it.ign.com are also supported
|
Some videos of it.ign.com are also supported
|
||||||
"""
|
"""
|
||||||
|
_VIDEO_PATH_RE = r'/(?:\d{4}/\d{2}/\d{2}/)?(?P<id>.+?)'
|
||||||
_VALID_URL = r'https?://(?:.+?\.ign|www\.pcmag)\.com/videos/(?:\d{4}/\d{2}/\d{2}/)?(?P<id>[^/?&#]+)'
|
_PLAYLIST_PATH_RE = r'(?:/?\?(?P<filt>[^&#]+))?'
|
||||||
|
_VALID_URL = (
|
||||||
|
r'https?://(?:.+?\.ign|www\.pcmag)\.com/videos(?:%s)'
|
||||||
|
% '|'.join((_VIDEO_PATH_RE + r'(?:[/?&#]|$)', _PLAYLIST_PATH_RE)))
|
||||||
IE_NAME = 'ign.com'
|
IE_NAME = 'ign.com'
|
||||||
_PAGE_TYPE = 'video'
|
_PAGE_TYPE = 'video'
|
||||||
|
|
||||||
@ -44,7 +142,10 @@ class IGNIE(IGNBaseIE):
|
|||||||
'timestamp': 1370440800,
|
'timestamp': 1370440800,
|
||||||
'upload_date': '20130605',
|
'upload_date': '20130605',
|
||||||
'tags': 'count:9',
|
'tags': 'count:9',
|
||||||
}
|
},
|
||||||
|
'params': {
|
||||||
|
'nocheckcertificate': True,
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.pcmag.com/videos/2015/01/06/010615-whats-new-now-is-gogo-snooping-on-your-data',
|
'url': 'http://www.pcmag.com/videos/2015/01/06/010615-whats-new-now-is-gogo-snooping-on-your-data',
|
||||||
'md5': 'f1581a6fe8c5121be5b807684aeac3f6',
|
'md5': 'f1581a6fe8c5121be5b807684aeac3f6',
|
||||||
@ -56,86 +157,51 @@ class IGNIE(IGNBaseIE):
|
|||||||
'timestamp': 1420571160,
|
'timestamp': 1420571160,
|
||||||
'upload_date': '20150106',
|
'upload_date': '20150106',
|
||||||
'tags': 'count:4',
|
'tags': 'count:4',
|
||||||
}
|
},
|
||||||
|
'skip': '404 Not Found',
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.ign.com/videos/is-a-resident-evil-4-remake-on-the-way-ign-daily-fix',
|
'url': 'https://www.ign.com/videos/is-a-resident-evil-4-remake-on-the-way-ign-daily-fix',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def _extract_embed_urls(cls, url, webpage):
|
||||||
|
grids = re.findall(
|
||||||
|
r'''(?s)<section\b[^>]+\bclass\s*=\s*['"](?:[\w-]+\s+)*?content-feed-grid(?!\B|-)[^>]+>(.+?)</section[^>]*>''',
|
||||||
|
webpage)
|
||||||
|
return filter(None,
|
||||||
|
(urljoin(url, m.group('path')) for m in re.finditer(
|
||||||
|
r'''<a\b[^>]+\bhref\s*=\s*('|")(?P<path>/videos%s)\1'''
|
||||||
|
% cls._VIDEO_PATH_RE, grids[0] if grids else '')))
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
m = re.match(self._VALID_URL, url)
|
||||||
|
display_id = m.group('id')
|
||||||
|
if display_id:
|
||||||
|
return self._extract_video(url, display_id)
|
||||||
|
display_id = m.group('filt') or 'all'
|
||||||
|
return self._extract_playlist(url, display_id)
|
||||||
|
|
||||||
|
def _extract_playlist(self, url, display_id):
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
|
return self.playlist_result(
|
||||||
|
(self.url_result(u, ie=self.ie_key())
|
||||||
|
for u in self._extract_embed_urls(url, webpage)),
|
||||||
|
playlist_id=display_id)
|
||||||
|
|
||||||
|
def _extract_video(self, url, display_id):
|
||||||
display_id = self._match_id(url)
|
display_id = self._match_id(url)
|
||||||
video = self._call_api(display_id)
|
video = self._checked_call_api(display_id)
|
||||||
video_id = video['videoId']
|
|
||||||
metadata = video['metadata']
|
|
||||||
title = metadata.get('longTitle') or metadata.get('title') or metadata['name']
|
|
||||||
|
|
||||||
formats = []
|
info = self._extract_video_info(video)
|
||||||
refs = video.get('refs') or {}
|
|
||||||
|
|
||||||
m3u8_url = refs.get('m3uUrl')
|
return merge_dicts({
|
||||||
if m3u8_url:
|
|
||||||
formats.extend(self._extract_m3u8_formats(
|
|
||||||
m3u8_url, video_id, 'mp4', 'm3u8_native',
|
|
||||||
m3u8_id='hls', fatal=False))
|
|
||||||
|
|
||||||
f4m_url = refs.get('f4mUrl')
|
|
||||||
if f4m_url:
|
|
||||||
formats.extend(self._extract_f4m_formats(
|
|
||||||
f4m_url, video_id, f4m_id='hds', fatal=False))
|
|
||||||
|
|
||||||
for asset in (video.get('assets') or []):
|
|
||||||
asset_url = asset.get('url')
|
|
||||||
if not asset_url:
|
|
||||||
continue
|
|
||||||
formats.append({
|
|
||||||
'url': asset_url,
|
|
||||||
'tbr': int_or_none(asset.get('bitrate'), 1000),
|
|
||||||
'fps': int_or_none(asset.get('frame_rate')),
|
|
||||||
'height': int_or_none(asset.get('height')),
|
|
||||||
'width': int_or_none(asset.get('width')),
|
|
||||||
})
|
|
||||||
|
|
||||||
mezzanine_url = try_get(video, lambda x: x['system']['mezzanineUrl'])
|
|
||||||
if mezzanine_url:
|
|
||||||
formats.append({
|
|
||||||
'ext': determine_ext(mezzanine_url, 'mp4'),
|
|
||||||
'format_id': 'mezzanine',
|
|
||||||
'preference': 1,
|
|
||||||
'url': mezzanine_url,
|
|
||||||
})
|
|
||||||
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
thumbnails = []
|
|
||||||
for thumbnail in (video.get('thumbnails') or []):
|
|
||||||
thumbnail_url = thumbnail.get('url')
|
|
||||||
if not thumbnail_url:
|
|
||||||
continue
|
|
||||||
thumbnails.append({
|
|
||||||
'url': thumbnail_url,
|
|
||||||
})
|
|
||||||
|
|
||||||
tags = []
|
|
||||||
for tag in (video.get('tags') or []):
|
|
||||||
display_name = tag.get('displayName')
|
|
||||||
if not display_name:
|
|
||||||
continue
|
|
||||||
tags.append(display_name)
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'title': title,
|
|
||||||
'description': strip_or_none(metadata.get('description')),
|
|
||||||
'timestamp': parse_iso8601(metadata.get('publishDate')),
|
|
||||||
'duration': int_or_none(metadata.get('duration')),
|
|
||||||
'display_id': display_id,
|
'display_id': display_id,
|
||||||
'thumbnails': thumbnails,
|
}, info)
|
||||||
'formats': formats,
|
|
||||||
'tags': tags,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class IGNVideoIE(InfoExtractor):
|
class IGNVideoIE(IGNBaseIE):
|
||||||
_VALID_URL = r'https?://.+?\.ign\.com/(?:[a-z]{2}/)?[^/]+/(?P<id>\d+)/(?:video|trailer)/'
|
_VALID_URL = r'https?://.+?\.ign\.com/(?:[a-z]{2}/)?[^/]+/(?P<id>\d+)/(?:video|trailer)/'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://me.ign.com/en/videos/112203/video/how-hitman-aims-to-be-different-than-every-other-s',
|
'url': 'http://me.ign.com/en/videos/112203/video/how-hitman-aims-to-be-different-than-every-other-s',
|
||||||
@ -147,7 +213,8 @@ class IGNVideoIE(InfoExtractor):
|
|||||||
'description': 'Taking out assassination targets in Hitman has never been more stylish.',
|
'description': 'Taking out assassination targets in Hitman has never been more stylish.',
|
||||||
'timestamp': 1444665600,
|
'timestamp': 1444665600,
|
||||||
'upload_date': '20151012',
|
'upload_date': '20151012',
|
||||||
}
|
},
|
||||||
|
'expected_warnings': ['HTTP Error 400: Bad Request'],
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://me.ign.com/ar/angry-birds-2/106533/video/lrd-ldyy-lwl-lfylm-angry-birds',
|
'url': 'http://me.ign.com/ar/angry-birds-2/106533/video/lrd-ldyy-lwl-lfylm-angry-birds',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@ -167,22 +234,38 @@ class IGNVideoIE(InfoExtractor):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
req = HEADRequest(url.rsplit('/', 1)[0] + '/embed')
|
parsed_url = compat_urlparse.urlparse(url)
|
||||||
url = self._request_webpage(req, video_id).geturl()
|
embed_url = compat_urlparse.urlunparse(
|
||||||
|
parsed_url._replace(path=parsed_url.path.rsplit('/', 1)[0] + '/embed'))
|
||||||
|
|
||||||
|
webpage, urlh = self._download_webpage_handle(embed_url, video_id)
|
||||||
|
new_url = urlh.geturl()
|
||||||
ign_url = compat_parse_qs(
|
ign_url = compat_parse_qs(
|
||||||
compat_urllib_parse_urlparse(url).query).get('url', [None])[0]
|
compat_urlparse.urlparse(new_url).query).get('url', [None])[-1]
|
||||||
if ign_url:
|
if ign_url:
|
||||||
return self.url_result(ign_url, IGNIE.ie_key())
|
return self.url_result(ign_url, IGNIE.ie_key())
|
||||||
return self.url_result(url)
|
video = self._search_regex(r'(<div\b[^>]+\bdata-video-id\s*=\s*[^>]+>)', webpage, 'video element', fatal=False)
|
||||||
|
if not video:
|
||||||
|
if new_url == url:
|
||||||
|
raise ExtractorError('Redirect loop: ' + url)
|
||||||
|
return self.url_result(new_url)
|
||||||
|
video = extract_attributes(video)
|
||||||
|
video_data = video.get('data-settings') or '{}'
|
||||||
|
video_data = self._parse_json(video_data, video_id)['video']
|
||||||
|
info = self._extract_video_info(video_data)
|
||||||
|
|
||||||
|
return merge_dicts({
|
||||||
|
'display_id': video_id,
|
||||||
|
}, info)
|
||||||
|
|
||||||
|
|
||||||
class IGNArticleIE(IGNBaseIE):
|
class IGNArticleIE(IGNBaseIE):
|
||||||
_VALID_URL = r'https?://.+?\.ign\.com/(?:articles(?:/\d{4}/\d{2}/\d{2})?|(?:[a-z]{2}/)?feature/\d+)/(?P<id>[^/?&#]+)'
|
_VALID_URL = r'https?://.+?\.ign\.com/(?:articles(?:/\d{4}/\d{2}/\d{2})?|(?:[a-z]{2}/)?(?:[\w-]+/)*?feature/\d+)/(?P<id>[^/?&#]+)'
|
||||||
_PAGE_TYPE = 'article'
|
_PAGE_TYPE = 'article'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://me.ign.com/en/feature/15775/100-little-things-in-gta-5-that-will-blow-your-mind',
|
'url': 'http://me.ign.com/en/feature/15775/100-little-things-in-gta-5-that-will-blow-your-mind',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '524497489e4e8ff5848ece34',
|
'id': '72113',
|
||||||
'title': '100 Little Things in GTA 5 That Will Blow Your Mind',
|
'title': '100 Little Things in GTA 5 That Will Blow Your Mind',
|
||||||
},
|
},
|
||||||
'playlist': [
|
'playlist': [
|
||||||
@ -190,7 +273,7 @@ class IGNArticleIE(IGNBaseIE):
|
|||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '5ebbd138523268b93c9141af17bec937',
|
'id': '5ebbd138523268b93c9141af17bec937',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'GTA 5 Video Review',
|
'title': 'Grand Theft Auto V Video Review',
|
||||||
'description': 'Rockstar drops the mic on this generation of games. Watch our review of the masterly Grand Theft Auto V.',
|
'description': 'Rockstar drops the mic on this generation of games. Watch our review of the masterly Grand Theft Auto V.',
|
||||||
'timestamp': 1379339880,
|
'timestamp': 1379339880,
|
||||||
'upload_date': '20130916',
|
'upload_date': '20130916',
|
||||||
@ -200,7 +283,7 @@ class IGNArticleIE(IGNBaseIE):
|
|||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '638672ee848ae4ff108df2a296418ee2',
|
'id': '638672ee848ae4ff108df2a296418ee2',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': '26 Twisted Moments from GTA 5 in Slow Motion',
|
'title': 'GTA 5 In Slow Motion',
|
||||||
'description': 'The twisted beauty of GTA 5 in stunning slow motion.',
|
'description': 'The twisted beauty of GTA 5 in stunning slow motion.',
|
||||||
'timestamp': 1386878820,
|
'timestamp': 1386878820,
|
||||||
'upload_date': '20131212',
|
'upload_date': '20131212',
|
||||||
@ -208,16 +291,17 @@ class IGNArticleIE(IGNBaseIE):
|
|||||||
},
|
},
|
||||||
],
|
],
|
||||||
'params': {
|
'params': {
|
||||||
'playlist_items': '2-3',
|
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
|
'expected_warnings': ['Backend fetch failed'],
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.ign.com/articles/2014/08/15/rewind-theater-wild-trailer-gamescom-2014?watch',
|
'url': 'http://www.ign.com/articles/2014/08/15/rewind-theater-wild-trailer-gamescom-2014?watch',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '53ee806780a81ec46e0790f8',
|
'id': '53ee806780a81ec46e0790f8',
|
||||||
'title': 'Rewind Theater - Wild Trailer Gamescom 2014',
|
'title': 'Rewind Theater - Wild Trailer Gamescom 2014',
|
||||||
},
|
},
|
||||||
'playlist_count': 2,
|
'playlist_count': 1,
|
||||||
|
'expected_warnings': ['Backend fetch failed'],
|
||||||
}, {
|
}, {
|
||||||
# videoId pattern
|
# videoId pattern
|
||||||
'url': 'http://www.ign.com/articles/2017/06/08/new-ducktales-short-donalds-birthday-doesnt-go-as-planned',
|
'url': 'http://www.ign.com/articles/2017/06/08/new-ducktales-short-donalds-birthday-doesnt-go-as-planned',
|
||||||
@ -240,18 +324,91 @@ class IGNArticleIE(IGNBaseIE):
|
|||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
def _checked_call_api(self, slug):
|
||||||
|
try:
|
||||||
|
return self._call_api(slug)
|
||||||
|
except ExtractorError as e:
|
||||||
|
if isinstance(e.cause, compat_HTTPError):
|
||||||
|
e.cause.args = e.cause.args or [
|
||||||
|
e.cause.geturl(), e.cause.getcode(), e.cause.reason]
|
||||||
|
if e.cause.code == 404:
|
||||||
|
raise ExtractorError(
|
||||||
|
'Content not found: expired?', cause=e.cause,
|
||||||
|
expected=True)
|
||||||
|
elif e.cause.code == 503:
|
||||||
|
self.report_warning(error_to_compat_str(e.cause))
|
||||||
|
return
|
||||||
|
raise
|
||||||
|
|
||||||
|
def _search_nextjs_data(self, webpage, video_id, **kw):
|
||||||
|
return self._parse_json(
|
||||||
|
self._search_regex(
|
||||||
|
r'(?s)<script[^>]+id=[\'"]__NEXT_DATA__[\'"][^>]*>([^<]+)</script>',
|
||||||
|
webpage, 'next.js data', **kw),
|
||||||
|
video_id, **kw)
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
display_id = self._match_id(url)
|
display_id = self._match_id(url)
|
||||||
article = self._call_api(display_id)
|
article = self._checked_call_api(display_id)
|
||||||
|
|
||||||
def entries():
|
if article:
|
||||||
media_url = try_get(article, lambda x: x['mediaRelations'][0]['media']['metadata']['url'])
|
# obsolete ?
|
||||||
if media_url:
|
def entries():
|
||||||
yield self.url_result(media_url, IGNIE.ie_key())
|
media_url = traverse_obj(
|
||||||
for content in (article.get('content') or []):
|
article, ('mediaRelations', 0, 'media', 'metadata', 'url'),
|
||||||
for video_url in re.findall(r'(?:\[(?:ignvideo\s+url|youtube\s+clip_id)|<iframe[^>]+src)="([^"]+)"', content):
|
expected_type=url_or_none)
|
||||||
yield self.url_result(video_url)
|
if media_url:
|
||||||
|
yield self.url_result(media_url, IGNIE.ie_key())
|
||||||
|
for content in (article.get('content') or []):
|
||||||
|
for video_url in re.findall(r'(?:\[(?:ignvideo\s+url|youtube\s+clip_id)|<iframe[^>]+src)="([^"]+)"', content):
|
||||||
|
if url_or_none(video_url):
|
||||||
|
yield self.url_result(video_url)
|
||||||
|
|
||||||
|
return self.playlist_result(
|
||||||
|
entries(), article.get('articleId'),
|
||||||
|
traverse_obj(
|
||||||
|
article, ('metadata', 'headline'),
|
||||||
|
expected_type=lambda x: x.strip() or None))
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
|
playlist_id = self._html_search_meta('dable:item_id', webpage, default=None)
|
||||||
|
if playlist_id:
|
||||||
|
|
||||||
|
def entries():
|
||||||
|
for m in re.finditer(
|
||||||
|
r'''(?s)<object\b[^>]+\bclass\s*=\s*("|')ign-videoplayer\1[^>]*>(?P<params>.+?)</object''',
|
||||||
|
webpage):
|
||||||
|
flashvars = self._search_regex(
|
||||||
|
r'''(<param\b[^>]+\bname\s*=\s*("|')flashvars\2[^>]*>)''',
|
||||||
|
m.group('params'), 'flashvars', default='')
|
||||||
|
flashvars = compat_parse_qs(extract_attributes(flashvars).get('value') or '')
|
||||||
|
v_url = url_or_none((flashvars.get('url') or [None])[-1])
|
||||||
|
if v_url:
|
||||||
|
yield self.url_result(v_url)
|
||||||
|
else:
|
||||||
|
playlist_id = self._search_regex(
|
||||||
|
r'''\bdata-post-id\s*=\s*("|')(?P<id>[\da-f]+)\1''',
|
||||||
|
webpage, 'id', group='id', default=None)
|
||||||
|
|
||||||
|
nextjs_data = self._search_nextjs_data(webpage, display_id)
|
||||||
|
|
||||||
|
def entries():
|
||||||
|
for player in traverse_obj(
|
||||||
|
nextjs_data,
|
||||||
|
('props', 'apolloState', 'ROOT_QUERY', lambda k, _: k.startswith('videoPlayerProps('), '__ref')):
|
||||||
|
# skip promo links (which may not always be served, eg GH CI servers)
|
||||||
|
if traverse_obj(nextjs_data,
|
||||||
|
('props', 'apolloState', player.replace('PlayerProps', 'ModernContent')),
|
||||||
|
expected_type=dict):
|
||||||
|
continue
|
||||||
|
video = traverse_obj(nextjs_data, ('props', 'apolloState', player), expected_type=dict) or {}
|
||||||
|
info = self._extract_video_info(video, fatal=False)
|
||||||
|
if info:
|
||||||
|
yield merge_dicts({
|
||||||
|
'display_id': display_id,
|
||||||
|
}, info)
|
||||||
|
|
||||||
return self.playlist_result(
|
return self.playlist_result(
|
||||||
entries(), article.get('articleId'),
|
entries(), playlist_id or display_id,
|
||||||
strip_or_none(try_get(article, lambda x: x['metadata']['headline'])))
|
re.sub(r'\s+-\s+IGN\s*$', '', self._og_search_title(webpage, default='')) or None)
|
||||||
|
35
youtube_dl/extractor/kommunetv.py
Normal file
35
youtube_dl/extractor/kommunetv.py
Normal file
@ -0,0 +1,35 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import update_url
|
||||||
|
|
||||||
|
|
||||||
|
class KommunetvIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https://(\w+).kommunetv.no/archive/(?P<id>\w+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://oslo.kommunetv.no/archive/921',
|
||||||
|
'md5': '5f102be308ee759be1e12b63d5da4bbc',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '921',
|
||||||
|
'title': 'Bystyremøte',
|
||||||
|
'ext': 'mp4'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
headers = {
|
||||||
|
'Accept': 'application/json'
|
||||||
|
}
|
||||||
|
data = self._download_json('https://oslo.kommunetv.no/api/streams?streamType=1&id=%s' % video_id, video_id, headers=headers)
|
||||||
|
title = data['stream']['title']
|
||||||
|
file = data['playlist'][0]['playlist'][0]['file']
|
||||||
|
url = update_url(file, query=None, fragment=None)
|
||||||
|
formats = self._extract_m3u8_formats(url, video_id, ext='mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)
|
||||||
|
self._sort_formats(formats)
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'formats': formats,
|
||||||
|
'title': title
|
||||||
|
}
|
97
youtube_dl/extractor/rbgtum.py
Normal file
97
youtube_dl/extractor/rbgtum.py
Normal file
@ -0,0 +1,97 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class RbgTumIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https://live\.rbg\.tum\.de/w/(?P<id>.+)'
|
||||||
|
_TESTS = [{
|
||||||
|
# Combined view
|
||||||
|
'url': 'https://live.rbg.tum.de/w/cpp/22128',
|
||||||
|
'md5': '53a5e7b3e07128e33bbf36687fe1c08f',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'cpp/22128',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Lecture: October 18. 2022',
|
||||||
|
'series': 'Concepts of C++ programming (IN2377)',
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
# Presentation only
|
||||||
|
'url': 'https://live.rbg.tum.de/w/I2DL/12349/PRES',
|
||||||
|
'md5': '36c584272179f3e56b0db5d880639cba',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'I2DL/12349/PRES',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Lecture 3: Introduction to Neural Networks',
|
||||||
|
'series': 'Introduction to Deep Learning (IN2346)',
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
# Camera only
|
||||||
|
'url': 'https://live.rbg.tum.de/w/fvv-info/16130/CAM',
|
||||||
|
'md5': 'e04189d92ff2f56aedf5cede65d37aad',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'fvv-info/16130/CAM',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Fachschaftsvollversammlung',
|
||||||
|
'series': 'Fachschaftsvollversammlung Informatik',
|
||||||
|
}
|
||||||
|
}, ]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
m3u8 = self._html_search_regex(r'(https://.+?\.m3u8)', webpage, 'm3u8')
|
||||||
|
lecture_title = self._html_search_regex(r'(?si)<h1.*?>(.*)</h1>', webpage, 'title')
|
||||||
|
lecture_series_title = self._html_search_regex(
|
||||||
|
r'(?s)<title\b[^>]*>\s*(?:TUM-Live\s\|\s?)?([^:]+):?.*?</title>', webpage, 'series')
|
||||||
|
|
||||||
|
formats = self._extract_m3u8_formats(m3u8, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls')
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': lecture_title,
|
||||||
|
'series': lecture_series_title,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class RbgTumCourseIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https://live\.rbg\.tum\.de/course/(?P<id>.+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://live.rbg.tum.de/course/2022/S/fpv',
|
||||||
|
'info_dict': {
|
||||||
|
'title': 'Funktionale Programmierung und Verifikation (IN0003)',
|
||||||
|
'id': '2022/S/fpv',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'noplaylist': False,
|
||||||
|
},
|
||||||
|
'playlist_count': 13,
|
||||||
|
}, {
|
||||||
|
'url': 'https://live.rbg.tum.de/course/2022/W/set',
|
||||||
|
'info_dict': {
|
||||||
|
'title': 'SET FSMPIC',
|
||||||
|
'id': '2022/W/set',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'noplaylist': False,
|
||||||
|
},
|
||||||
|
'playlist_count': 6,
|
||||||
|
}, ]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
course_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, course_id)
|
||||||
|
|
||||||
|
lecture_series_title = self._html_search_regex(r'(?si)<h1.*?>(.*)</h1>', webpage, 'title')
|
||||||
|
|
||||||
|
lecture_urls = []
|
||||||
|
for lecture_url in re.findall(r'(?i)href="/w/(.+)(?<!/cam)(?<!/pres)(?<!/chat)"', webpage):
|
||||||
|
lecture_urls.append(self.url_result('https://live.rbg.tum.de/w/' + lecture_url, ie=RbgTumIE.ie_key()))
|
||||||
|
|
||||||
|
return self.playlist_result(lecture_urls, course_id, lecture_series_title)
|
61
youtube_dl/extractor/streamsb.py
Normal file
61
youtube_dl/extractor/streamsb.py
Normal file
@ -0,0 +1,61 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import binascii
|
||||||
|
import random
|
||||||
|
import re
|
||||||
|
import string
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import urljoin, url_basename
|
||||||
|
|
||||||
|
|
||||||
|
def to_ascii_hex(str1):
|
||||||
|
return binascii.hexlify(str1.encode('utf-8')).decode('ascii')
|
||||||
|
|
||||||
|
|
||||||
|
def generate_random_string(length):
|
||||||
|
return ''.join(random.choice(string.ascii_letters + string.digits) for _ in range(length))
|
||||||
|
|
||||||
|
|
||||||
|
class StreamsbIE(InfoExtractor):
|
||||||
|
_DOMAINS = ('viewsb.com', )
|
||||||
|
_VALID_URL = r'https://(?P<domain>%s)/(?P<id>.+)' % '|'.join(_DOMAINS)
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://viewsb.com/dxfvlu4qanjx',
|
||||||
|
'md5': '488d111a63415369bf90ea83adc8a325',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'dxfvlu4qanjx',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Sintel'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
domain, video_id = re.match(self._VALID_URL, url).group('domain', 'id')
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
iframe_rel_url = self._search_regex(r'''(?i)<iframe\b[^>]+\bsrc\s*=\s*('|")(?P<path>/.*\.html)\1''', webpage, 'iframe', group='path')
|
||||||
|
iframe_url = urljoin('https://' + domain, iframe_rel_url)
|
||||||
|
|
||||||
|
iframe_data = self._download_webpage(iframe_url, video_id)
|
||||||
|
app_version = self._search_regex(r'''<script\b[^>]+\bsrc\s*=\s*["|'].*/app\.min\.(\d+)\.js''', iframe_data, 'app version', fatal=False) or '50'
|
||||||
|
|
||||||
|
video_code = url_basename(iframe_url).rsplit('.')[0]
|
||||||
|
|
||||||
|
length = 12
|
||||||
|
req = '||'.join((generate_random_string(length), video_code, generate_random_string(length), 'streamsb'))
|
||||||
|
ereq = 'https://{0}/sources{1}/{2}'.format(domain, app_version, to_ascii_hex(req))
|
||||||
|
|
||||||
|
video_data = self._download_webpage(ereq, video_id, headers={
|
||||||
|
'Referer': iframe_url,
|
||||||
|
'watchsb': 'sbstream',
|
||||||
|
})
|
||||||
|
player_data = self._parse_json(video_data, video_id)
|
||||||
|
title = player_data['stream_data']['title']
|
||||||
|
formats = self._extract_m3u8_formats(player_data['stream_data']['file'], video_id, ext='mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'formats': formats,
|
||||||
|
'title': title,
|
||||||
|
}
|
@ -261,27 +261,33 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
|||||||
|
|
||||||
# _VALID_URL matches Vimeo URLs
|
# _VALID_URL matches Vimeo URLs
|
||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
https?://
|
https?://
|
||||||
(?:
|
(?:
|
||||||
(?:
|
(?:
|
||||||
www|
|
www|
|
||||||
player
|
player
|
||||||
)
|
)
|
||||||
\.
|
\.
|
||||||
)?
|
)?
|
||||||
vimeo(?:pro)?\.com/
|
vimeo(?:pro)?\.com/
|
||||||
(?!(?:channels|album|showcase)/[^/?#]+/?(?:$|[?#])|[^/]+/review/|ondemand/)
|
(?:
|
||||||
(?:.*?/)??
|
(?P<u>user)|
|
||||||
(?:
|
(?!(?:channels|album|showcase)/[^/?#]+/?(?:$|[?#])|[^/]+/review/|ondemand/)
|
||||||
(?:
|
(?:.*?/)??
|
||||||
play_redirect_hls|
|
(?P<q>
|
||||||
moogaloop\.swf)\?clip_id=
|
(?:
|
||||||
)?
|
play_redirect_hls|
|
||||||
(?:videos?/)?
|
moogaloop\.swf)\?clip_id=
|
||||||
(?P<id>[0-9]+)
|
)?
|
||||||
(?:/(?P<unlisted_hash>[\da-f]{10}))?
|
(?:videos?/)?
|
||||||
/?(?:[?&].*)?(?:[#].*)?$
|
)
|
||||||
'''
|
(?P<id>[0-9]+)
|
||||||
|
(?(u)
|
||||||
|
/(?!videos|likes)[^/?#]+/?|
|
||||||
|
(?(q)|/(?P<unlisted_hash>[\da-f]{10}))?
|
||||||
|
)
|
||||||
|
(?:(?(q)[&]|(?(u)|/?)[?]).*?)?(?:[#].*)?$
|
||||||
|
'''
|
||||||
IE_NAME = 'vimeo'
|
IE_NAME = 'vimeo'
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
@ -539,7 +545,12 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
|||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
}
|
},
|
||||||
|
{
|
||||||
|
# user playlist alias -> https://vimeo.com/258705797
|
||||||
|
'url': 'https://vimeo.com/user26785108/newspiritualguide',
|
||||||
|
'only_matching': True,
|
||||||
|
},
|
||||||
# https://gettingthingsdone.com/workflowmap/
|
# https://gettingthingsdone.com/workflowmap/
|
||||||
# vimeo embed with check-password page protected by Referer header
|
# vimeo embed with check-password page protected by Referer header
|
||||||
]
|
]
|
||||||
@ -663,7 +674,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
|||||||
|
|
||||||
if '//player.vimeo.com/video/' in url:
|
if '//player.vimeo.com/video/' in url:
|
||||||
config = self._parse_json(self._search_regex(
|
config = self._parse_json(self._search_regex(
|
||||||
r'\b(?:playerC|c)onfig\s*=\s*({.+?})\s*;', webpage, 'info section'), video_id)
|
r'(?s)\b(?:playerC|c)onfig\s*=\s*({.+?})\s*[;\n]', webpage, 'info section'), video_id)
|
||||||
if config.get('view') == 4:
|
if config.get('view') == 4:
|
||||||
config = self._verify_player_video_password(
|
config = self._verify_player_video_password(
|
||||||
redirect_url, video_id, headers)
|
redirect_url, video_id, headers)
|
||||||
|
@ -14,12 +14,11 @@ from ..compat import (
|
|||||||
compat_chr,
|
compat_chr,
|
||||||
compat_HTTPError,
|
compat_HTTPError,
|
||||||
compat_map as map,
|
compat_map as map,
|
||||||
compat_parse_qs,
|
|
||||||
compat_str,
|
compat_str,
|
||||||
|
compat_urllib_parse,
|
||||||
|
compat_urllib_parse_parse_qs as compat_parse_qs,
|
||||||
compat_urllib_parse_unquote_plus,
|
compat_urllib_parse_unquote_plus,
|
||||||
compat_urllib_parse_urlencode,
|
|
||||||
compat_urllib_parse_urlparse,
|
compat_urllib_parse_urlparse,
|
||||||
compat_urlparse,
|
|
||||||
)
|
)
|
||||||
from ..jsinterp import JSInterpreter
|
from ..jsinterp import JSInterpreter
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
@ -28,20 +27,25 @@ from ..utils import (
|
|||||||
dict_get,
|
dict_get,
|
||||||
error_to_compat_str,
|
error_to_compat_str,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
|
extract_attributes,
|
||||||
|
get_element_by_attribute,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
js_to_json,
|
js_to_json,
|
||||||
mimetype2ext,
|
mimetype2ext,
|
||||||
parse_codecs,
|
parse_codecs,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
|
parse_qs,
|
||||||
qualities,
|
qualities,
|
||||||
remove_start,
|
remove_start,
|
||||||
smuggle_url,
|
smuggle_url,
|
||||||
str_or_none,
|
str_or_none,
|
||||||
str_to_int,
|
str_to_int,
|
||||||
|
traverse_obj,
|
||||||
try_get,
|
try_get,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
unsmuggle_url,
|
unsmuggle_url,
|
||||||
|
update_url,
|
||||||
update_url_query,
|
update_url_query,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
@ -49,10 +53,6 @@ from ..utils import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def parse_qs(url):
|
|
||||||
return compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
|
|
||||||
|
|
||||||
|
|
||||||
class YoutubeBaseInfoExtractor(InfoExtractor):
|
class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||||
"""Provide base functions for Youtube extractors"""
|
"""Provide base functions for Youtube extractors"""
|
||||||
_LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
|
_LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
|
||||||
@ -286,15 +286,18 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
|||||||
_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
|
_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
|
||||||
_YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
|
_YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
|
||||||
|
|
||||||
def _call_api(self, ep, query, video_id, fatal=True):
|
def _call_api(self, ep, query, video_id, fatal=True, headers=None):
|
||||||
data = self._DEFAULT_API_DATA.copy()
|
data = self._DEFAULT_API_DATA.copy()
|
||||||
data.update(query)
|
data.update(query)
|
||||||
|
real_headers = {'content-type': 'application/json'}
|
||||||
|
if headers:
|
||||||
|
real_headers.update(headers)
|
||||||
|
|
||||||
return self._download_json(
|
return self._download_json(
|
||||||
'https://www.youtube.com/youtubei/v1/%s' % ep, video_id=video_id,
|
'https://www.youtube.com/youtubei/v1/%s' % ep, video_id=video_id,
|
||||||
note='Downloading API JSON', errnote='Unable to download API page',
|
note='Downloading API JSON', errnote='Unable to download API page',
|
||||||
data=json.dumps(data).encode('utf8'), fatal=fatal,
|
data=json.dumps(data).encode('utf8'), fatal=fatal,
|
||||||
headers={'content-type': 'application/json'},
|
headers=real_headers,
|
||||||
query={'key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'})
|
query={'key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'})
|
||||||
|
|
||||||
def _extract_yt_initial_data(self, video_id, webpage):
|
def _extract_yt_initial_data(self, video_id, webpage):
|
||||||
@ -515,6 +518,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
'uploader': 'Philipp Hagemeister',
|
'uploader': 'Philipp Hagemeister',
|
||||||
'uploader_id': 'phihag',
|
'uploader_id': 'phihag',
|
||||||
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
|
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
|
||||||
|
'channel': 'Philipp Hagemeister',
|
||||||
'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
|
'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
|
||||||
'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
|
'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
|
||||||
'upload_date': '20121002',
|
'upload_date': '20121002',
|
||||||
@ -524,10 +528,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
'duration': 10,
|
'duration': 10,
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'dislike_count': int,
|
'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
|
||||||
'start_time': 1,
|
'start_time': 1,
|
||||||
'end_time': 9,
|
'end_time': 9,
|
||||||
}
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
|
'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
|
||||||
@ -562,7 +566,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
'duration': 10,
|
'duration': 10,
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'dislike_count': int,
|
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
@ -621,8 +624,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms \r\n\r\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
|
'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms \r\n\r\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
# Normal age-gate video (No vevo, embed allowed), available via embed page
|
# Age-gated videos
|
||||||
{
|
{
|
||||||
|
'note': 'Age-gated video (No vevo, embed allowed)',
|
||||||
'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
|
'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'HtVdAasjOgU',
|
'id': 'HtVdAasjOgU',
|
||||||
@ -634,14 +638,98 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
'uploader_id': 'WitcherGame',
|
'uploader_id': 'WitcherGame',
|
||||||
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
|
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
|
||||||
'upload_date': '20140605',
|
'upload_date': '20140605',
|
||||||
|
'thumbnail': 'https://i.ytimg.com/vi/HtVdAasjOgU/maxresdefault.jpg',
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
|
'categories': ['Gaming'],
|
||||||
|
'tags': 'count:17',
|
||||||
|
'channel': 'The Witcher',
|
||||||
|
'channel_url': 'https://www.youtube.com/channel/UCzybXLxv08IApdjdN0mJhEg',
|
||||||
|
'channel_id': 'UCzybXLxv08IApdjdN0mJhEg',
|
||||||
|
'view_count': int,
|
||||||
|
'like_count': int,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
# Age-gated video only available with authentication (unavailable
|
'note': 'Age-gated video with embed allowed in public site',
|
||||||
# via embed page workaround)
|
'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'HsUATh_Nc2U',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Godzilla 2 (Official Video)',
|
||||||
|
'description': 'md5:bf77e03fcae5529475e500129b05668a',
|
||||||
|
'duration': 177,
|
||||||
|
'uploader': 'FlyingKitty',
|
||||||
|
'uploader_id': 'FlyingKitty900',
|
||||||
|
'upload_date': '20200408',
|
||||||
|
'thumbnail': 'https://i.ytimg.com/vi/HsUATh_Nc2U/maxresdefault.jpg',
|
||||||
|
'age_limit': 18,
|
||||||
|
'categories': ['Entertainment'],
|
||||||
|
'tags': ['Flyingkitty', 'godzilla 2'],
|
||||||
|
'channel': 'FlyingKitty',
|
||||||
|
'channel_url': 'https://www.youtube.com/channel/UCYQT13AtrJC0gsM1far_zJg',
|
||||||
|
'channel_id': 'UCYQT13AtrJC0gsM1far_zJg',
|
||||||
|
'view_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'note': 'Age-gated video embeddable only with clientScreen=EMBED',
|
||||||
|
'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'Tq92D6wQ1mg',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
|
||||||
|
'description': 'md5:17eccca93a786d51bc67646756894066',
|
||||||
|
'duration': 106,
|
||||||
|
'uploader': 'Projekt Melody',
|
||||||
|
'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
|
||||||
|
'upload_date': '20191227',
|
||||||
|
'age_limit': 18,
|
||||||
|
'thumbnail': 'https://i.ytimg.com/vi/Tq92D6wQ1mg/sddefault.jpg',
|
||||||
|
'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],
|
||||||
|
'categories': ['Entertainment'],
|
||||||
|
'channel': 'Projekt Melody',
|
||||||
|
'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
|
||||||
|
'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
|
||||||
|
'view_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'note': 'Non-Age-gated non-embeddable video',
|
||||||
|
'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'MeJVWBSsPAY',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',
|
||||||
|
'description': 'Fan Video. Music & Lyrics by OOMPH!.',
|
||||||
|
'duration': 210,
|
||||||
|
'uploader': 'Herr Lurik',
|
||||||
|
'uploader_id': 'st3in234',
|
||||||
|
'upload_date': '20130730',
|
||||||
|
'uploader_url': 'http://www.youtube.com/user/st3in234',
|
||||||
|
'age_limit': 0,
|
||||||
|
'thumbnail': 'https://i.ytimg.com/vi/MeJVWBSsPAY/hqdefault.jpg',
|
||||||
|
'tags': ['oomph', 'such mich find mich', 'lyrics', 'german industrial', 'musica industrial'],
|
||||||
|
'categories': ['Music'],
|
||||||
|
'channel': 'Herr Lurik',
|
||||||
|
'channel_url': 'https://www.youtube.com/channel/UCdR3RSDPqub28LjZx0v9-aA',
|
||||||
|
'channel_id': 'UCdR3RSDPqub28LjZx0v9-aA',
|
||||||
|
'artist': 'OOMPH!',
|
||||||
|
'view_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'note': 'Non-bypassable age-gated video',
|
||||||
|
'url': 'https://youtube.com/watch?v=Cr381pDsSsA',
|
||||||
|
'only_matching': True,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'note': 'Age-gated video only available with authentication (not via embed workaround)',
|
||||||
'url': 'XgnwCQzjau8',
|
'url': 'XgnwCQzjau8',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
'skip': '''This video has been removed for violating YouTube's Community Guidelines''',
|
||||||
},
|
},
|
||||||
# video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
|
# video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
|
||||||
# YouTube Red ad is not captured for creator
|
# YouTube Red ad is not captured for creator
|
||||||
@ -670,17 +758,23 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'lqQg6PlCWgI',
|
'id': 'lqQg6PlCWgI',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
|
'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
|
||||||
|
'description': r're:(?s)(?:.+\s)?HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games\s*',
|
||||||
'duration': 6085,
|
'duration': 6085,
|
||||||
'upload_date': '20150827',
|
'upload_date': '20150827',
|
||||||
'uploader_id': 'olympic',
|
'uploader_id': 'olympic',
|
||||||
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
|
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
|
||||||
'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
|
'uploader': r're:Olympics?',
|
||||||
'uploader': 'Olympic',
|
'age_limit': 0,
|
||||||
'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
|
'thumbnail': 'https://i.ytimg.com/vi/lqQg6PlCWgI/maxresdefault.jpg',
|
||||||
|
'categories': ['Sports'],
|
||||||
|
'tags': ['Hockey', '2012-07-31', '31 July 2012', 'Riverbank Arena', 'Session', 'Olympics', 'Olympic Games', 'London 2012', '2012 Summer Olympics', 'Summer Games'],
|
||||||
|
'channel': 'Olympics',
|
||||||
|
'channel_url': 'https://www.youtube.com/channel/UCTl3QQTvqHFjurroKxexy2Q',
|
||||||
|
'channel_id': 'UCTl3QQTvqHFjurroKxexy2Q',
|
||||||
|
'view_count': int,
|
||||||
|
'like_count': int,
|
||||||
},
|
},
|
||||||
'params': {
|
|
||||||
'skip_download': 'requires avconv',
|
|
||||||
}
|
|
||||||
},
|
},
|
||||||
# Non-square pixels
|
# Non-square pixels
|
||||||
{
|
{
|
||||||
@ -840,16 +934,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
'id': 'lsguqyKfVQg',
|
'id': 'lsguqyKfVQg',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
|
'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
|
||||||
'alt_title': 'Dark Walk - Position Music',
|
'alt_title': 'Dark Walk',
|
||||||
'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
|
'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
|
||||||
'duration': 133,
|
'duration': 133,
|
||||||
'upload_date': '20151119',
|
'upload_date': '20151119',
|
||||||
'uploader_id': 'IronSoulElf',
|
'uploader_id': 'IronSoulElf',
|
||||||
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
|
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
|
||||||
'uploader': 'IronSoulElf',
|
'uploader': 'IronSoulElf',
|
||||||
'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
|
'creator': r're:Todd Haberman[;,]\s+Daniel Law Heath and Aaron Kaplan',
|
||||||
'track': 'Dark Walk - Position Music',
|
'track': 'Dark Walk',
|
||||||
'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
|
'artist': r're:Todd Haberman[;,]\s+Daniel Law Heath and Aaron Kaplan',
|
||||||
'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
|
'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
@ -1301,11 +1395,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def suitable(cls, url):
|
def suitable(cls, url):
|
||||||
# Hack for lazy extractors until more generic solution is implemented
|
if parse_qs(url).get('list', [None])[0]:
|
||||||
# (see #28780)
|
|
||||||
from .youtube import parse_qs
|
|
||||||
qs = parse_qs(url)
|
|
||||||
if qs.get('list', [None])[0]:
|
|
||||||
return False
|
return False
|
||||||
return super(YoutubeIE, cls).suitable(url)
|
return super(YoutubeIE, cls).suitable(url)
|
||||||
|
|
||||||
@ -1455,7 +1545,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
if player_url.startswith('//'):
|
if player_url.startswith('//'):
|
||||||
player_url = 'https:' + player_url
|
player_url = 'https:' + player_url
|
||||||
elif not re.match(r'https?://', player_url):
|
elif not re.match(r'https?://', player_url):
|
||||||
player_url = compat_urlparse.urljoin(
|
player_url = compat_urllib_parse.urljoin(
|
||||||
'https://www.youtube.com', player_url)
|
'https://www.youtube.com', player_url)
|
||||||
return player_url
|
return player_url
|
||||||
|
|
||||||
@ -1537,9 +1627,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
|
|
||||||
def _unthrottle_format_urls(self, video_id, player_url, formats):
|
def _unthrottle_format_urls(self, video_id, player_url, formats):
|
||||||
for fmt in formats:
|
for fmt in formats:
|
||||||
parsed_fmt_url = compat_urlparse.urlparse(fmt['url'])
|
parsed_fmt_url = compat_urllib_parse.urlparse(fmt['url'])
|
||||||
qs = compat_urlparse.parse_qs(parsed_fmt_url.query)
|
n_param = compat_parse_qs(parsed_fmt_url.query).get('n')
|
||||||
n_param = qs.get('n')
|
|
||||||
if not n_param:
|
if not n_param:
|
||||||
continue
|
continue
|
||||||
n_param = n_param[-1]
|
n_param = n_param[-1]
|
||||||
@ -1547,9 +1636,29 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
if n_response is None:
|
if n_response is None:
|
||||||
# give up if descrambling failed
|
# give up if descrambling failed
|
||||||
break
|
break
|
||||||
qs['n'] = [n_response]
|
fmt['url'] = update_url(
|
||||||
fmt['url'] = compat_urlparse.urlunparse(
|
parsed_fmt_url, query_update={'n': [n_response]})
|
||||||
parsed_fmt_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
|
|
||||||
|
# from yt-dlp, with tweaks
|
||||||
|
def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
|
||||||
|
"""
|
||||||
|
Extract signatureTimestamp (sts)
|
||||||
|
Required to tell API what sig/player version is in use.
|
||||||
|
"""
|
||||||
|
sts = int_or_none(ytcfg.get('STS')) if isinstance(ytcfg, dict) else None
|
||||||
|
if not sts:
|
||||||
|
# Attempt to extract from player
|
||||||
|
if player_url is None:
|
||||||
|
error_msg = 'Cannot extract signature timestamp without player_url.'
|
||||||
|
if fatal:
|
||||||
|
raise ExtractorError(error_msg)
|
||||||
|
self._downloader.report_warning(error_msg)
|
||||||
|
return
|
||||||
|
code = self._get_player_code(video_id, player_url)
|
||||||
|
sts = int_or_none(self._search_regex(
|
||||||
|
r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code or '',
|
||||||
|
'JS player signature timestamp', group='sts', fatal=fatal))
|
||||||
|
return sts
|
||||||
|
|
||||||
def _mark_watched(self, video_id, player_response):
|
def _mark_watched(self, video_id, player_response):
|
||||||
playback_url = url_or_none(try_get(
|
playback_url = url_or_none(try_get(
|
||||||
@ -1557,20 +1666,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']))
|
lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']))
|
||||||
if not playback_url:
|
if not playback_url:
|
||||||
return
|
return
|
||||||
parsed_playback_url = compat_urlparse.urlparse(playback_url)
|
|
||||||
qs = compat_urlparse.parse_qs(parsed_playback_url.query)
|
|
||||||
|
|
||||||
# cpn generation algorithm is reverse engineered from base.js.
|
# cpn generation algorithm is reverse engineered from base.js.
|
||||||
# In fact it works even with dummy cpn.
|
# In fact it works even with dummy cpn.
|
||||||
CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
|
CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
|
||||||
cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
|
cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
|
||||||
|
|
||||||
qs.update({
|
playback_url = update_url(
|
||||||
'ver': ['2'],
|
playback_url, query_update={
|
||||||
'cpn': [cpn],
|
'ver': ['2'],
|
||||||
})
|
'cpn': [cpn],
|
||||||
playback_url = compat_urlparse.urlunparse(
|
})
|
||||||
parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
|
|
||||||
|
|
||||||
self._download_webpage(
|
self._download_webpage(
|
||||||
playback_url, video_id, 'Marking watched',
|
playback_url, video_id, 'Marking watched',
|
||||||
@ -1675,6 +1781,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
|
webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
|
||||||
|
|
||||||
player_response = None
|
player_response = None
|
||||||
|
player_url = None
|
||||||
if webpage:
|
if webpage:
|
||||||
player_response = self._extract_yt_initial_variable(
|
player_response = self._extract_yt_initial_variable(
|
||||||
webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
|
webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
|
||||||
@ -1683,27 +1790,61 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
player_response = self._call_api(
|
player_response = self._call_api(
|
||||||
'player', {'videoId': video_id}, video_id)
|
'player', {'videoId': video_id}, video_id)
|
||||||
|
|
||||||
playability_status = player_response.get('playabilityStatus') or {}
|
def is_agegated(playability):
|
||||||
if playability_status.get('reason') == 'Sign in to confirm your age':
|
if not isinstance(playability, dict):
|
||||||
video_info = self._download_webpage(
|
return
|
||||||
base_url + 'get_video_info', video_id,
|
|
||||||
'Refetching age-gated info webpage',
|
if playability.get('desktopLegacyAgeGateReason'):
|
||||||
'unable to download video info webpage', query={
|
return True
|
||||||
'video_id': video_id,
|
|
||||||
'eurl': 'https://youtube.googleapis.com/v/' + video_id,
|
reasons = filter(None, (playability.get(r) for r in ('status', 'reason')))
|
||||||
'html5': 1,
|
AGE_GATE_REASONS = (
|
||||||
# See https://github.com/ytdl-org/youtube-dl/issues/29333#issuecomment-864049544
|
'confirm your age', 'age-restricted', 'inappropriate', # reason
|
||||||
'c': 'TVHTML5',
|
'age_verification_required', 'age_check_required', # status
|
||||||
'cver': '6.20180913',
|
)
|
||||||
}, fatal=False)
|
return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)
|
||||||
if video_info:
|
|
||||||
pr = self._parse_json(
|
def get_playability_status(response):
|
||||||
try_get(
|
return try_get(response, lambda x: x['playabilityStatus'], dict) or {}
|
||||||
compat_parse_qs(video_info),
|
|
||||||
lambda x: x['player_response'][0], compat_str) or '{}',
|
playability_status = get_playability_status(player_response)
|
||||||
video_id, fatal=False)
|
if (is_agegated(playability_status)
|
||||||
if pr and isinstance(pr, dict):
|
and int_or_none(self._downloader.params.get('age_limit'), default=18) >= 18):
|
||||||
player_response = pr
|
|
||||||
|
self.report_age_confirmation()
|
||||||
|
|
||||||
|
# Thanks: https://github.com/yt-dlp/yt-dlp/pull/3233
|
||||||
|
pb_context = {'html5Preference': 'HTML5_PREF_WANTS'}
|
||||||
|
|
||||||
|
# Use signatureTimestamp if available
|
||||||
|
# Thanks https://github.com/ytdl-org/youtube-dl/issues/31034#issuecomment-1160718026
|
||||||
|
player_url = self._extract_player_url(webpage)
|
||||||
|
ytcfg = self._extract_ytcfg(video_id, webpage)
|
||||||
|
sts = self._extract_signature_timestamp(video_id, player_url, ytcfg)
|
||||||
|
if sts:
|
||||||
|
pb_context['signatureTimestamp'] = sts
|
||||||
|
|
||||||
|
query = {
|
||||||
|
'playbackContext': {'contentPlaybackContext': pb_context},
|
||||||
|
'contentCheckOk': True,
|
||||||
|
'racyCheckOk': True,
|
||||||
|
'context': {
|
||||||
|
'client': {'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER', 'clientVersion': '2.0', 'hl': 'en', 'clientScreen': 'EMBED'},
|
||||||
|
'thirdParty': {'embedUrl': 'https://google.com'},
|
||||||
|
},
|
||||||
|
'videoId': video_id,
|
||||||
|
}
|
||||||
|
headers = {
|
||||||
|
'X-YouTube-Client-Name': '85',
|
||||||
|
'X-YouTube-Client-Version': '2.0',
|
||||||
|
'Origin': 'https://www.youtube.com'
|
||||||
|
}
|
||||||
|
|
||||||
|
video_info = self._call_api('player', query, video_id, fatal=False, headers=headers)
|
||||||
|
age_gate_status = get_playability_status(video_info)
|
||||||
|
if age_gate_status.get('status') == 'OK':
|
||||||
|
player_response = video_info
|
||||||
|
playability_status = age_gate_status
|
||||||
|
|
||||||
trailer_video_id = try_get(
|
trailer_video_id = try_get(
|
||||||
playability_status,
|
playability_status,
|
||||||
@ -1785,7 +1926,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
formats = []
|
formats = []
|
||||||
itags = []
|
itags = []
|
||||||
itag_qualities = {}
|
itag_qualities = {}
|
||||||
player_url = None
|
|
||||||
q = qualities(['tiny', 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'])
|
q = qualities(['tiny', 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'])
|
||||||
streaming_data = player_response.get('streamingData') or {}
|
streaming_data = player_response.get('streamingData') or {}
|
||||||
streaming_formats = streaming_data.get('formats') or []
|
streaming_formats = streaming_data.get('formats') or []
|
||||||
@ -1929,15 +2069,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
|
|
||||||
thumbnails = []
|
thumbnails = []
|
||||||
for container in (video_details, microformat):
|
for container in (video_details, microformat):
|
||||||
for thumbnail in (try_get(
|
for thumbnail in try_get(
|
||||||
container,
|
container,
|
||||||
lambda x: x['thumbnail']['thumbnails'], list) or []):
|
lambda x: x['thumbnail']['thumbnails'], list) or []:
|
||||||
thumbnail_url = thumbnail.get('url')
|
thumbnail_url = url_or_none(thumbnail.get('url'))
|
||||||
if not thumbnail_url:
|
if not thumbnail_url:
|
||||||
continue
|
continue
|
||||||
thumbnails.append({
|
thumbnails.append({
|
||||||
'height': int_or_none(thumbnail.get('height')),
|
'height': int_or_none(thumbnail.get('height')),
|
||||||
'url': thumbnail_url,
|
'url': update_url(thumbnail_url, query=None, fragment=None),
|
||||||
'width': int_or_none(thumbnail.get('width')),
|
'width': int_or_none(thumbnail.get('width')),
|
||||||
})
|
})
|
||||||
if thumbnails:
|
if thumbnails:
|
||||||
@ -1956,7 +2096,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
or microformat.get('lengthSeconds')) \
|
or microformat.get('lengthSeconds')) \
|
||||||
or parse_duration(search_meta('duration'))
|
or parse_duration(search_meta('duration'))
|
||||||
is_live = video_details.get('isLive')
|
is_live = video_details.get('isLive')
|
||||||
owner_profile_url = microformat.get('ownerProfileUrl')
|
|
||||||
|
def gen_owner_profile_url():
|
||||||
|
yield microformat.get('ownerProfileUrl')
|
||||||
|
yield extract_attributes(self._search_regex(
|
||||||
|
r'''(?s)(<link\b[^>]+\bitemprop\s*=\s*("|')url\2[^>]*>)''',
|
||||||
|
get_element_by_attribute('itemprop', 'author', webpage),
|
||||||
|
'owner_profile_url', default='')).get('href')
|
||||||
|
|
||||||
|
owner_profile_url = next(
|
||||||
|
(x for x in map(url_or_none, gen_owner_profile_url()) if x),
|
||||||
|
None)
|
||||||
|
|
||||||
if not player_url:
|
if not player_url:
|
||||||
player_url = self._extract_player_url(webpage)
|
player_url = self._extract_player_url(webpage)
|
||||||
@ -2041,6 +2191,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
info[d_k] = parse_duration(query[k][0])
|
info[d_k] = parse_duration(query[k][0])
|
||||||
|
|
||||||
if video_description:
|
if video_description:
|
||||||
|
# Youtube Music Auto-generated description
|
||||||
mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
|
mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
|
||||||
if mobj:
|
if mobj:
|
||||||
release_year = mobj.group('release_year')
|
release_year = mobj.group('release_year')
|
||||||
@ -2115,7 +2266,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
|
lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
|
||||||
info['location'] = stl
|
info['location'] = stl
|
||||||
else:
|
else:
|
||||||
mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
|
# •? doesn't match, but [•]? does; \xa0 = non-breaking space
|
||||||
|
mobj = re.search(r'([^\xa0\s].*?)[\xa0\s]*S(\d+)[\xa0\s]*[•]?[\xa0\s]*E(\d+)', stl)
|
||||||
if mobj:
|
if mobj:
|
||||||
info.update({
|
info.update({
|
||||||
'series': mobj.group(1),
|
'series': mobj.group(1),
|
||||||
@ -2126,7 +2278,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
vpir,
|
vpir,
|
||||||
lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
|
lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
|
||||||
list) or []):
|
list) or []):
|
||||||
tbr = tlb.get('toggleButtonRenderer') or {}
|
tbr = traverse_obj(tlb, ('segmentedLikeDislikeButtonRenderer', 'likeButton', 'toggleButtonRenderer'), 'toggleButtonRenderer') or {}
|
||||||
for getter, regex in [(
|
for getter, regex in [(
|
||||||
lambda x: x['defaultText']['accessibility']['accessibilityData'],
|
lambda x: x['defaultText']['accessibility']['accessibilityData'],
|
||||||
r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
|
r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
|
||||||
@ -2142,6 +2294,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
sbr_tooltip = try_get(
|
sbr_tooltip = try_get(
|
||||||
vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
|
vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
|
||||||
if sbr_tooltip:
|
if sbr_tooltip:
|
||||||
|
# however dislike_count was hidden by YT, as if there could ever be dislikable content on YT
|
||||||
like_count, dislike_count = sbr_tooltip.split(' / ')
|
like_count, dislike_count = sbr_tooltip.split(' / ')
|
||||||
info.update({
|
info.update({
|
||||||
'like_count': str_to_int(like_count),
|
'like_count': str_to_int(like_count),
|
||||||
@ -2179,6 +2332,30 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
elif mrr_title == 'Song':
|
elif mrr_title == 'Song':
|
||||||
info['track'] = mrr_contents_text
|
info['track'] = mrr_contents_text
|
||||||
|
|
||||||
|
# this is not extraction but spelunking!
|
||||||
|
carousel_lockups = traverse_obj(
|
||||||
|
initial_data,
|
||||||
|
('engagementPanels', Ellipsis, 'engagementPanelSectionListRenderer',
|
||||||
|
'content', 'structuredDescriptionContentRenderer', 'items', Ellipsis,
|
||||||
|
'videoDescriptionMusicSectionRenderer', 'carouselLockups', Ellipsis),
|
||||||
|
expected_type=dict) or []
|
||||||
|
# try to reproduce logic from metadataRowContainerRenderer above (if it still is)
|
||||||
|
fields = (('ALBUM', 'album'), ('ARTIST', 'artist'), ('SONG', 'track'), ('LICENSES', 'license'))
|
||||||
|
# multiple_songs ?
|
||||||
|
if len(carousel_lockups) > 1:
|
||||||
|
fields = fields[-1:]
|
||||||
|
for info_row in traverse_obj(
|
||||||
|
carousel_lockups,
|
||||||
|
(0, 'carouselLockupRenderer', 'infoRows', Ellipsis, 'infoRowRenderer'),
|
||||||
|
expected_type=dict):
|
||||||
|
row_title = traverse_obj(info_row, ('title', 'simpleText'))
|
||||||
|
row_text = traverse_obj(info_row, 'defaultMetadata', 'expandedMetadata', expected_type=get_text)
|
||||||
|
if not row_text:
|
||||||
|
continue
|
||||||
|
for name, field in fields:
|
||||||
|
if name == row_title and not info.get(field):
|
||||||
|
info[field] = row_text
|
||||||
|
|
||||||
for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
|
for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
|
||||||
v = info.get(s_k)
|
v = info.get(s_k)
|
||||||
if v:
|
if v:
|
||||||
@ -2411,7 +2588,6 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
|||||||
'tags': list,
|
'tags': list,
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'dislike_count': int,
|
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
@ -2438,7 +2614,6 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
|||||||
'categories': ['News & Politics'],
|
'categories': ['News & Politics'],
|
||||||
'tags': list,
|
'tags': list,
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'dislike_count': int,
|
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
@ -2458,7 +2633,6 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
|||||||
'categories': ['News & Politics'],
|
'categories': ['News & Politics'],
|
||||||
'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
|
'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'dislike_count': int,
|
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
@ -3043,8 +3217,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
item_id = self._match_id(url)
|
item_id = self._match_id(url)
|
||||||
url = compat_urlparse.urlunparse(
|
url = update_url(url, netloc='www.youtube.com')
|
||||||
compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
|
|
||||||
# Handle both video/playlist URLs
|
# Handle both video/playlist URLs
|
||||||
qs = parse_qs(url)
|
qs = parse_qs(url)
|
||||||
video_id = qs.get('v', [None])[0]
|
video_id = qs.get('v', [None])[0]
|
||||||
@ -3144,11 +3317,7 @@ class YoutubePlaylistIE(InfoExtractor):
|
|||||||
def suitable(cls, url):
|
def suitable(cls, url):
|
||||||
if YoutubeTabIE.suitable(url):
|
if YoutubeTabIE.suitable(url):
|
||||||
return False
|
return False
|
||||||
# Hack for lazy extractors until more generic solution is implemented
|
if parse_qs(url).get('v', [None])[0]:
|
||||||
# (see #28780)
|
|
||||||
from .youtube import parse_qs
|
|
||||||
qs = parse_qs(url)
|
|
||||||
if qs.get('v', [None])[0]:
|
|
||||||
return False
|
return False
|
||||||
return super(YoutubePlaylistIE, cls).suitable(url)
|
return super(YoutubePlaylistIE, cls).suitable(url)
|
||||||
|
|
||||||
@ -3178,7 +3347,6 @@ class YoutubeYtBeIE(InfoExtractor):
|
|||||||
'categories': ['Nonprofits & Activism'],
|
'categories': ['Nonprofits & Activism'],
|
||||||
'tags': list,
|
'tags': list,
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'dislike_count': int,
|
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'noplaylist': True,
|
'noplaylist': True,
|
||||||
@ -3288,9 +3456,9 @@ class YoutubeSearchURLIE(YoutubeBaseInfoExtractor):
|
|||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
|
qs = parse_qs(url)
|
||||||
query = (qs.get('search_query') or qs.get('q'))[0]
|
query = (qs.get('search_query') or qs.get('q'))[-1]
|
||||||
params = qs.get('sp', ('',))[0]
|
params = qs.get('sp', ('',))[-1]
|
||||||
return self.playlist_result(self._search_results(query, params), query, query)
|
return self.playlist_result(self._search_results(query, params), query, query)
|
||||||
|
|
||||||
|
|
||||||
|
@ -201,7 +201,7 @@ class JSInterpreter(object):
|
|||||||
def __init__(self, msg, *args, **kwargs):
|
def __init__(self, msg, *args, **kwargs):
|
||||||
expr = kwargs.pop('expr', None)
|
expr = kwargs.pop('expr', None)
|
||||||
if expr is not None:
|
if expr is not None:
|
||||||
msg = '{0} in: {1!r}'.format(msg.rstrip(), expr[:100])
|
msg = '{0} in: {1!r:.100}'.format(msg.rstrip(), expr)
|
||||||
super(JSInterpreter.Exception, self).__init__(msg, *args, **kwargs)
|
super(JSInterpreter.Exception, self).__init__(msg, *args, **kwargs)
|
||||||
|
|
||||||
class JS_RegExp(object):
|
class JS_RegExp(object):
|
||||||
@ -699,7 +699,7 @@ class JSInterpreter(object):
|
|||||||
""" assert, but without risk of getting optimized out """
|
""" assert, but without risk of getting optimized out """
|
||||||
if not cndn:
|
if not cndn:
|
||||||
memb = member
|
memb = member
|
||||||
raise self.Exception('{member} {msg}'.format(**locals()), expr=expr)
|
raise self.Exception('{memb} {msg}'.format(**locals()), expr=expr)
|
||||||
|
|
||||||
def eval_method():
|
def eval_method():
|
||||||
if (variable, member) == ('console', 'debug'):
|
if (variable, member) == ('console', 'debug'):
|
||||||
|
@ -42,6 +42,7 @@ from .compat import (
|
|||||||
compat_HTMLParser,
|
compat_HTMLParser,
|
||||||
compat_HTTPError,
|
compat_HTTPError,
|
||||||
compat_basestring,
|
compat_basestring,
|
||||||
|
compat_casefold,
|
||||||
compat_chr,
|
compat_chr,
|
||||||
compat_collections_abc,
|
compat_collections_abc,
|
||||||
compat_cookiejar,
|
compat_cookiejar,
|
||||||
@ -54,18 +55,18 @@ from .compat import (
|
|||||||
compat_integer_types,
|
compat_integer_types,
|
||||||
compat_kwargs,
|
compat_kwargs,
|
||||||
compat_os_name,
|
compat_os_name,
|
||||||
compat_parse_qs,
|
compat_re_Match,
|
||||||
compat_shlex_quote,
|
compat_shlex_quote,
|
||||||
compat_str,
|
compat_str,
|
||||||
compat_struct_pack,
|
compat_struct_pack,
|
||||||
compat_struct_unpack,
|
compat_struct_unpack,
|
||||||
compat_urllib_error,
|
compat_urllib_error,
|
||||||
compat_urllib_parse,
|
compat_urllib_parse,
|
||||||
|
compat_urllib_parse_parse_qs as compat_parse_qs,
|
||||||
compat_urllib_parse_urlencode,
|
compat_urllib_parse_urlencode,
|
||||||
compat_urllib_parse_urlparse,
|
compat_urllib_parse_urlparse,
|
||||||
compat_urllib_parse_unquote_plus,
|
compat_urllib_parse_unquote_plus,
|
||||||
compat_urllib_request,
|
compat_urllib_request,
|
||||||
compat_urlparse,
|
|
||||||
compat_xpath,
|
compat_xpath,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -80,12 +81,12 @@ def register_socks_protocols():
|
|||||||
# In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
|
# In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
|
||||||
# URLs with protocols not in urlparse.uses_netloc are not handled correctly
|
# URLs with protocols not in urlparse.uses_netloc are not handled correctly
|
||||||
for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
|
for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
|
||||||
if scheme not in compat_urlparse.uses_netloc:
|
if scheme not in compat_urllib_parse.uses_netloc:
|
||||||
compat_urlparse.uses_netloc.append(scheme)
|
compat_urllib_parse.uses_netloc.append(scheme)
|
||||||
|
|
||||||
|
|
||||||
# This is not clearly defined otherwise
|
# Unfavoured alias
|
||||||
compiled_regex_type = type(re.compile(''))
|
compiled_regex_type = compat_re_Match
|
||||||
|
|
||||||
|
|
||||||
def random_user_agent():
|
def random_user_agent():
|
||||||
@ -2725,7 +2726,7 @@ def make_socks_conn_class(base_class, socks_proxy):
|
|||||||
assert issubclass(base_class, (
|
assert issubclass(base_class, (
|
||||||
compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
|
compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
|
||||||
|
|
||||||
url_components = compat_urlparse.urlparse(socks_proxy)
|
url_components = compat_urllib_parse.urlparse(socks_proxy)
|
||||||
if url_components.scheme.lower() == 'socks5':
|
if url_components.scheme.lower() == 'socks5':
|
||||||
socks_type = ProxyType.SOCKS5
|
socks_type = ProxyType.SOCKS5
|
||||||
elif url_components.scheme.lower() in ('socks', 'socks4'):
|
elif url_components.scheme.lower() in ('socks', 'socks4'):
|
||||||
@ -3673,7 +3674,7 @@ def remove_quotes(s):
|
|||||||
|
|
||||||
|
|
||||||
def url_basename(url):
|
def url_basename(url):
|
||||||
path = compat_urlparse.urlparse(url).path
|
path = compat_urllib_parse.urlparse(url).path
|
||||||
return path.strip('/').split('/')[-1]
|
return path.strip('/').split('/')[-1]
|
||||||
|
|
||||||
|
|
||||||
@ -3693,7 +3694,7 @@ def urljoin(base, path):
|
|||||||
if not isinstance(base, compat_str) or not re.match(
|
if not isinstance(base, compat_str) or not re.match(
|
||||||
r'^(?:https?:)?//', base):
|
r'^(?:https?:)?//', base):
|
||||||
return None
|
return None
|
||||||
return compat_urlparse.urljoin(base, path)
|
return compat_urllib_parse.urljoin(base, path)
|
||||||
|
|
||||||
|
|
||||||
class HEADRequest(compat_urllib_request.Request):
|
class HEADRequest(compat_urllib_request.Request):
|
||||||
@ -4091,6 +4092,10 @@ def escape_url(url):
|
|||||||
).geturl()
|
).geturl()
|
||||||
|
|
||||||
|
|
||||||
|
def parse_qs(url):
|
||||||
|
return compat_parse_qs(compat_urllib_parse.urlparse(url).query)
|
||||||
|
|
||||||
|
|
||||||
def read_batch_urls(batch_fd):
|
def read_batch_urls(batch_fd):
|
||||||
def fixup(url):
|
def fixup(url):
|
||||||
if not isinstance(url, compat_str):
|
if not isinstance(url, compat_str):
|
||||||
@ -4111,14 +4116,28 @@ def urlencode_postdata(*args, **kargs):
|
|||||||
return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii')
|
return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii')
|
||||||
|
|
||||||
|
|
||||||
|
def update_url(url, **kwargs):
|
||||||
|
"""Replace URL components specified by kwargs
|
||||||
|
url: compat_str or parsed URL tuple
|
||||||
|
if query_update is in kwargs, update query with
|
||||||
|
its value instead of replacing (overrides any `query`)
|
||||||
|
returns: compat_str
|
||||||
|
"""
|
||||||
|
if not kwargs:
|
||||||
|
return compat_urllib_parse.urlunparse(url) if isinstance(url, tuple) else url
|
||||||
|
if not isinstance(url, tuple):
|
||||||
|
url = compat_urllib_parse.urlparse(url)
|
||||||
|
query = kwargs.pop('query_update', None)
|
||||||
|
if query:
|
||||||
|
qs = compat_parse_qs(url.query)
|
||||||
|
qs.update(query)
|
||||||
|
kwargs['query'] = compat_urllib_parse_urlencode(qs, True)
|
||||||
|
kwargs = compat_kwargs(kwargs)
|
||||||
|
return compat_urllib_parse.urlunparse(url._replace(**kwargs))
|
||||||
|
|
||||||
|
|
||||||
def update_url_query(url, query):
|
def update_url_query(url, query):
|
||||||
if not query:
|
return update_url(url, query_update=query)
|
||||||
return url
|
|
||||||
parsed_url = compat_urlparse.urlparse(url)
|
|
||||||
qs = compat_parse_qs(parsed_url.query)
|
|
||||||
qs.update(query)
|
|
||||||
return compat_urlparse.urlunparse(parsed_url._replace(
|
|
||||||
query=compat_urllib_parse_urlencode(qs, True)))
|
|
||||||
|
|
||||||
|
|
||||||
def update_Request(req, url=None, data=None, headers={}, query={}):
|
def update_Request(req, url=None, data=None, headers={}, query={}):
|
||||||
@ -5586,7 +5605,7 @@ class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
|
|||||||
|
|
||||||
if proxy == '__noproxy__':
|
if proxy == '__noproxy__':
|
||||||
return None # No Proxy
|
return None # No Proxy
|
||||||
if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
|
if compat_urllib_parse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
|
||||||
req.add_header('Ytdl-socks-proxy', proxy)
|
req.add_header('Ytdl-socks-proxy', proxy)
|
||||||
# youtube-dl's http/https handlers do wrapping the socket with socks
|
# youtube-dl's http/https handlers do wrapping the socket with socks
|
||||||
return None
|
return None
|
||||||
@ -6024,14 +6043,6 @@ def traverse_obj(obj, *paths, **kwargs):
|
|||||||
str = compat_str
|
str = compat_str
|
||||||
|
|
||||||
is_sequence = lambda x: isinstance(x, compat_collections_abc.Sequence) and not isinstance(x, (str, bytes))
|
is_sequence = lambda x: isinstance(x, compat_collections_abc.Sequence) and not isinstance(x, (str, bytes))
|
||||||
# stand-in until compat_re_Match is added
|
|
||||||
compat_re_Match = type(re.match('a', 'a'))
|
|
||||||
# stand-in until casefold.py is added
|
|
||||||
try:
|
|
||||||
''.casefold()
|
|
||||||
compat_casefold = lambda s: s.casefold()
|
|
||||||
except AttributeError:
|
|
||||||
compat_casefold = lambda s: s.lower()
|
|
||||||
casefold = lambda k: compat_casefold(k) if isinstance(k, str) else k
|
casefold = lambda k: compat_casefold(k) if isinstance(k, str) else k
|
||||||
|
|
||||||
if isinstance(expected_type, type):
|
if isinstance(expected_type, type):
|
||||||
|
Loading…
Reference in New Issue
Block a user