[rbgtum] Add new extractor (#31305 )

* [rbgtum] Add new extractor * Small update, force CI --------- Co-authored-by: dirkf <fieldhouse@gmx.net>
[YouTube] Fix tests
2024-12-20 23:52:07 +00:00 · 2023-02-09 11:25:28 +00:00 · 2023-02-09 11:01:57 +00:00 · 2023-02-09 11:01:57 +00:00 · 2023-02-09 11:01:57 +00:00 · 2023-02-09 11:01:57 +00:00
9 changed files with 558 additions and 210 deletions
--- a/devscripts/make_lazy_extractors.py
+++ b/devscripts/make_lazy_extractors.py
@ -13,6 +13,11 @@ sys.path.insert(0, dirn(dirn((os.path.abspath(__file__)))))
 lazy_extractors_filename = sys.argv[1]
 if os.path.exists(lazy_extractors_filename):
    os.remove(lazy_extractors_filename)
 # Py2: may be confused by leftover lazy_extractors.pyc
 try:
    os.remove(lazy_extractors_filename + 'c')
 except OSError:
    pass
 from youtube_dl.extractor import _ALL_CLASSES
 from youtube_dl.extractor.common import InfoExtractor, SearchInfoExtractor
@ -22,7 +27,10 @@ with open('devscripts/lazy_load_template.py', 'rt') as f:
 module_contents = [
    module_template + '\n' + getsource(InfoExtractor.suitable) + '\n',
-    'class LazyLoadSearchExtractor(LazyLoadExtractor):\n    pass\n']
+    'class LazyLoadSearchExtractor(LazyLoadExtractor):\n    pass\n',
    # needed for suitable() methods of Youtube extractor (see #28780)
    'from youtube_dl.utils import parse_qs\n',
 ]
 ie_template = '''
 class {name}({bases}):
--- a/test/test_age_restriction.py
+++ b/test/test_age_restriction.py
@ -11,6 +11,7 @@ from test.helper import try_rm
 from youtube_dl import YoutubeDL
 from youtube_dl.utils import DownloadError
 def _download_restricted(url, filename, age):
@ -26,7 +27,10 @@ def _download_restricted(url, filename, age):
    ydl.add_default_info_extractors()
    json_filename = os.path.splitext(filename)[0] + '.info.json'
    try_rm(json_filename)
    try:
        ydl.download([url])
    except DownloadError:
        try_rm(json_filename)
    res = os.path.exists(json_filename)
    try_rm(json_filename)
    return res
@ -38,12 +42,12 @@ class TestAgeRestriction(unittest.TestCase):
        self.assertFalse(_download_restricted(url, filename, age))
    def test_youtube(self):
-        self._assert_restricted('07FYdnEawAQ', '07FYdnEawAQ.mp4', 10)
+        self._assert_restricted('HtVdAasjOgU', 'HtVdAasjOgU.mp4', 10)
    def test_youporn(self):
        self._assert_restricted(
-            'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/',
+            'https://www.youporn.com/watch/16715086/sex-ed-in-detention-18-asmr/',
-            '505835.mp4', 2, old_age=25)
+            '16715086.mp4', 2, old_age=25)
 if __name__ == '__main__':
--- a/test/test_compat.py
+++ b/test/test_compat.py
@ -48,10 +48,11 @@ class TestCompat(unittest.TestCase):
    def test_all_present(self):
        import youtube_dl.compat
-        all_names = youtube_dl.compat.__all__
+        all_names = sorted(
-        present_names = set(filter(
+            youtube_dl.compat.__all__ + youtube_dl.compat.legacy)
        present_names = set(map(compat_str, filter(
            lambda c: '_' in c and not c.startswith('_'),
-            dir(youtube_dl.compat))) - set(['unicode_literals'])
+            dir(youtube_dl.compat)))) - set(['unicode_literals'])
        self.assertEqual(all_names, sorted(present_names))
    def test_compat_urllib_parse_unquote(self):
--- a/test/test_execution.py
+++ b/test/test_execution.py
@ -40,12 +40,14 @@ class TestExecution(unittest.TestCase):
        self.assertFalse(stderr)
    def test_lazy_extractors(self):
        lazy_extractors = 'youtube_dl/extractor/lazy_extractors.py'
        try:
-            subprocess.check_call([sys.executable, 'devscripts/make_lazy_extractors.py', 'youtube_dl/extractor/lazy_extractors.py'], cwd=rootDir, stdout=_DEV_NULL)
+            subprocess.check_call([sys.executable, 'devscripts/make_lazy_extractors.py', lazy_extractors], cwd=rootDir, stdout=_DEV_NULL)
            subprocess.check_call([sys.executable, 'test/test_all_urls.py'], cwd=rootDir, stdout=_DEV_NULL)
        finally:
            for x in ['', 'c'] if sys.version_info[0] < 3 else ['']:
                try:
-                os.remove('youtube_dl/extractor/lazy_extractors.py')
+                    os.remove(lazy_extractors + x)
                except (IOError, OSError):
                    pass
--- a/youtube_dl/compat.py
+++ b/youtube_dl/compat.py
@ -21,6 +21,10 @@ import subprocess
 import sys
 import xml.etree.ElementTree
 # naming convention
 # 'compat_' + Python3_name.replace('.', '_')
 # other aliases exist for convenience and/or legacy
 # deal with critical unicode/str things first
 try:
    # Python 2
@ -28,6 +32,7 @@ try:
        unicode, basestring, unichr
    )
    from .casefold import casefold as compat_casefold
 except NameError:
    compat_str, compat_basestring, compat_chr = (
        str, str, chr
@ -53,16 +58,15 @@ try:
    import urllib.parse as compat_urllib_parse
 except ImportError:  # Python 2
    import urllib as compat_urllib_parse
    import urlparse as _urlparse
    for a in dir(_urlparse):
        if not hasattr(compat_urllib_parse, a):
            setattr(compat_urllib_parse, a, getattr(_urlparse, a))
    del _urlparse
-try:
+# unfavoured aliases
-    from urllib.parse import urlparse as compat_urllib_parse_urlparse
+compat_urlparse = compat_urllib_parse
-except ImportError:  # Python 2
+compat_urllib_parse_urlparse = compat_urllib_parse.urlparse
    from urlparse import urlparse as compat_urllib_parse_urlparse
 try:
    import urllib.parse as compat_urlparse
 except ImportError:  # Python 2
    import urlparse as compat_urlparse
 try:
    import urllib.response as compat_urllib_response
@ -73,6 +77,7 @@ try:
    import http.cookiejar as compat_cookiejar
 except ImportError:  # Python 2
    import cookielib as compat_cookiejar
 compat_http_cookiejar = compat_cookiejar
 if sys.version_info[0] == 2:
    class compat_cookiejar_Cookie(compat_cookiejar.Cookie):
@ -84,11 +89,13 @@ if sys.version_info[0] == 2:
            compat_cookiejar.Cookie.__init__(self, version, name, value, *args, **kwargs)
 else:
    compat_cookiejar_Cookie = compat_cookiejar.Cookie
 compat_http_cookiejar_Cookie = compat_cookiejar_Cookie
 try:
    import http.cookies as compat_cookies
 except ImportError:  # Python 2
    import Cookie as compat_cookies
 compat_http_cookies = compat_cookies
 if sys.version_info[0] == 2:
    class compat_cookies_SimpleCookie(compat_cookies.SimpleCookie):
@ -98,6 +105,7 @@ if sys.version_info[0] == 2:
            return super(compat_cookies_SimpleCookie, self).load(rawdata)
 else:
    compat_cookies_SimpleCookie = compat_cookies.SimpleCookie
 compat_http_cookies_SimpleCookie = compat_cookies_SimpleCookie
 try:
    import html.entities as compat_html_entities
@ -2351,16 +2359,19 @@ try:
    from urllib.error import HTTPError as compat_HTTPError
 except ImportError:  # Python 2
    from urllib2 import HTTPError as compat_HTTPError
 compat_urllib_HTTPError = compat_HTTPError
 try:
    from urllib.request import urlretrieve as compat_urlretrieve
 except ImportError:  # Python 2
    from urllib import urlretrieve as compat_urlretrieve
 compat_urllib_request_urlretrieve = compat_urlretrieve
 try:
    from html.parser import HTMLParser as compat_HTMLParser
 except ImportError:  # Python 2
    from HTMLParser import HTMLParser as compat_HTMLParser
 compat_html_parser_HTMLParser = compat_HTMLParser
 try:  # Python 2
    from HTMLParser import HTMLParseError as compat_HTMLParseError
@ -2374,6 +2385,7 @@ except ImportError:  # Python <3.4
        # and uniform cross-version exception handling
        class compat_HTMLParseError(Exception):
            pass
 compat_html_parser_HTMLParseError = compat_HTMLParseError
 try:
    from subprocess import DEVNULL
@ -2390,6 +2402,8 @@ try:
    from urllib.parse import unquote_to_bytes as compat_urllib_parse_unquote_to_bytes
    from urllib.parse import unquote as compat_urllib_parse_unquote
    from urllib.parse import unquote_plus as compat_urllib_parse_unquote_plus
    from urllib.parse import urlencode as compat_urllib_parse_urlencode
    from urllib.parse import parse_qs as compat_parse_qs
 except ImportError:  # Python 2
    _asciire = (compat_urllib_parse._asciire if hasattr(compat_urllib_parse, '_asciire')
                else re.compile(r'([\x00-\x7f]+)'))
@ -2456,9 +2470,6 @@ except ImportError:  # Python 2
        string = string.replace('+', ' ')
        return compat_urllib_parse_unquote(string, encoding, errors)
 try:
    from urllib.parse import urlencode as compat_urllib_parse_urlencode
 except ImportError:  # Python 2
    # Python 2 will choke in urlencode on mixture of byte and unicode strings.
    # Possible solutions are to either port it from python 3 with all
    # the friends or manually ensure input query contains only byte strings.
@ -2480,7 +2491,62 @@ except ImportError:  # Python 2
        def encode_list(l):
            return [encode_elem(e) for e in l]
-        return compat_urllib_parse.urlencode(encode_elem(query), doseq=doseq)
+        return compat_urllib_parse._urlencode(encode_elem(query), doseq=doseq)
    # HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
    # Python 2's version is apparently totally broken
    def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
                   encoding='utf-8', errors='replace'):
        qs, _coerce_result = qs, compat_str
        pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
        r = []
        for name_value in pairs:
            if not name_value and not strict_parsing:
                continue
            nv = name_value.split('=', 1)
            if len(nv) != 2:
                if strict_parsing:
                    raise ValueError('bad query field: %r' % (name_value,))
                # Handle case of a control-name with no equal sign
                if keep_blank_values:
                    nv.append('')
                else:
                    continue
            if len(nv[1]) or keep_blank_values:
                name = nv[0].replace('+', ' ')
                name = compat_urllib_parse_unquote(
                    name, encoding=encoding, errors=errors)
                name = _coerce_result(name)
                value = nv[1].replace('+', ' ')
                value = compat_urllib_parse_unquote(
                    value, encoding=encoding, errors=errors)
                value = _coerce_result(value)
                r.append((name, value))
        return r
    def compat_parse_qs(qs, keep_blank_values=False, strict_parsing=False,
                        encoding='utf-8', errors='replace'):
        parsed_result = {}
        pairs = _parse_qsl(qs, keep_blank_values, strict_parsing,
                           encoding=encoding, errors=errors)
        for name, value in pairs:
            if name in parsed_result:
                parsed_result[name].append(value)
            else:
                parsed_result[name] = [value]
        return parsed_result
    setattr(compat_urllib_parse, '_urlencode',
            getattr(compat_urllib_parse, 'urlencode'))
    for name, fix in (
            ('unquote_to_bytes', compat_urllib_parse_unquote_to_bytes),
            ('parse_unquote', compat_urllib_parse_unquote),
            ('unquote_plus', compat_urllib_parse_unquote_plus),
            ('urlencode', compat_urllib_parse_urlencode),
            ('parse_qs', compat_parse_qs)):
        setattr(compat_urllib_parse, name, fix)
 compat_urllib_parse_parse_qs = compat_parse_qs
 try:
    from urllib.request import DataHandler as compat_urllib_request_DataHandler
@ -2520,6 +2586,7 @@ try:
    from xml.etree.ElementTree import ParseError as compat_xml_parse_error
 except ImportError:  # Python 2.6
    from xml.parsers.expat import ExpatError as compat_xml_parse_error
 compat_xml_etree_ElementTree_ParseError = compat_xml_parse_error
 etree = xml.etree.ElementTree
@ -2533,10 +2600,11 @@ try:
    # xml.etree.ElementTree.Element is a method in Python <=2.6 and
    # the following will crash with:
    #  TypeError: isinstance() arg 2 must be a class, type, or tuple of classes and types
-    isinstance(None, xml.etree.ElementTree.Element)
+    isinstance(None, etree.Element)
    from xml.etree.ElementTree import Element as compat_etree_Element
 except TypeError:  # Python <=2.6
    from xml.etree.ElementTree import _ElementInterface as compat_etree_Element
 compat_xml_etree_ElementTree_Element = compat_etree_Element
 if sys.version_info[0] >= 3:
    def compat_etree_fromstring(text):
@ -2592,6 +2660,7 @@ else:
            if k == uri or v == prefix:
                del etree._namespace_map[k]
        etree._namespace_map[uri] = prefix
 compat_xml_etree_register_namespace = compat_etree_register_namespace
 if sys.version_info < (2, 7):
    # Here comes the crazy part: In 2.6, if the xpath is a unicode,
@ -2603,53 +2672,6 @@ if sys.version_info < (2, 7):
 else:
    compat_xpath = lambda xpath: xpath
 try:
    from urllib.parse import parse_qs as compat_parse_qs
 except ImportError:  # Python 2
    # HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
    # Python 2's version is apparently totally broken
    def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
                   encoding='utf-8', errors='replace'):
        qs, _coerce_result = qs, compat_str
        pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
        r = []
        for name_value in pairs:
            if not name_value and not strict_parsing:
                continue
            nv = name_value.split('=', 1)
            if len(nv) != 2:
                if strict_parsing:
                    raise ValueError('bad query field: %r' % (name_value,))
                # Handle case of a control-name with no equal sign
                if keep_blank_values:
                    nv.append('')
                else:
                    continue
            if len(nv[1]) or keep_blank_values:
                name = nv[0].replace('+', ' ')
                name = compat_urllib_parse_unquote(
                    name, encoding=encoding, errors=errors)
                name = _coerce_result(name)
                value = nv[1].replace('+', ' ')
                value = compat_urllib_parse_unquote(
                    value, encoding=encoding, errors=errors)
                value = _coerce_result(value)
                r.append((name, value))
        return r
    def compat_parse_qs(qs, keep_blank_values=False, strict_parsing=False,
                        encoding='utf-8', errors='replace'):
        parsed_result = {}
        pairs = _parse_qsl(qs, keep_blank_values, strict_parsing,
                           encoding=encoding, errors=errors)
        for name, value in pairs:
            if name in parsed_result:
                parsed_result[name].append(value)
            else:
                parsed_result[name] = [value]
        return parsed_result
 compat_os_name = os._name if os.name == 'java' else os.name
@ -2774,6 +2796,8 @@ else:
    else:
        compat_expanduser = os.path.expanduser
 compat_os_path_expanduser = compat_expanduser
 if compat_os_name == 'nt' and sys.version_info < (3, 8):
    # os.path.realpath on Windows does not follow symbolic links
@ -2785,6 +2809,8 @@ if compat_os_name == 'nt' and sys.version_info < (3, 8):
 else:
    compat_realpath = os.path.realpath
 compat_os_path_realpath = compat_realpath
 if sys.version_info < (3, 0):
    def compat_print(s):
@ -2805,11 +2831,15 @@ if sys.version_info < (3, 0) and sys.platform == 'win32':
 else:
    compat_getpass = getpass.getpass
 compat_getpass_getpass = compat_getpass
 try:
    compat_input = raw_input
 except NameError:  # Python 3
    compat_input = input
 # Python < 2.6.5 require kwargs to be bytes
 try:
    def _testfunc(x):
@ -2915,15 +2945,16 @@ else:
                lines = _lines
        return _terminal_size(columns, lines)
 try:
    itertools.count(start=0, step=1)
    compat_itertools_count = itertools.count
 except TypeError:  # Python 2.6
    def compat_itertools_count(start=0, step=1):
        n = start
        while True:
-            yield n
+            yield start
-            n += step
+            start += step
 if sys.version_info >= (3, 0):
    from tokenize import tokenize as compat_tokenize_tokenize
@ -3075,6 +3106,8 @@ if sys.version_info < (3, 3):
 else:
    compat_b64decode = base64.b64decode
 compat_base64_b64decode = compat_b64decode
 if platform.python_implementation() == 'PyPy' and sys.pypy_version_info < (5, 4, 0):
    # PyPy2 prior to version 5.4.0 expects byte strings as Windows function
@ -3094,30 +3127,53 @@ else:
        return ctypes.WINFUNCTYPE(*args, **kwargs)
-__all__ = [
+legacy = [
    'compat_HTMLParseError',
    'compat_HTMLParser',
    'compat_HTTPError',
    'compat_Struct',
    'compat_b64decode',
    'compat_cookiejar',
    'compat_cookiejar_Cookie',
    'compat_cookies',
    'compat_cookies_SimpleCookie',
    'compat_etree_Element',
    'compat_etree_register_namespace',
    'compat_expanduser',
    'compat_getpass',
    'compat_parse_qs',
    'compat_realpath',
    'compat_urllib_parse_parse_qs',
    'compat_urllib_parse_unquote',
    'compat_urllib_parse_unquote_plus',
    'compat_urllib_parse_unquote_to_bytes',
    'compat_urllib_parse_urlencode',
    'compat_urllib_parse_urlparse',
    'compat_urlparse',
    'compat_urlretrieve',
    'compat_xml_parse_error',
 ]
 __all__ = [
    'compat_html_parser_HTMLParseError',
    'compat_html_parser_HTMLParser',
    'compat_Struct',
    'compat_base64_b64decode',
    'compat_basestring',
    'compat_casefold',
    'compat_chr',
    'compat_collections_abc',
    'compat_collections_chain_map',
-    'compat_cookiejar',
+    'compat_http_cookiejar',
-    'compat_cookiejar_Cookie',
+    'compat_http_cookiejar_Cookie',
-    'compat_cookies',
+    'compat_http_cookies',
-    'compat_cookies_SimpleCookie',
+    'compat_http_cookies_SimpleCookie',
    'compat_ctypes_WINFUNCTYPE',
    'compat_etree_Element',
    'compat_etree_fromstring',
    'compat_etree_register_namespace',
    'compat_expanduser',
    'compat_filter',
    'compat_get_terminal_size',
    'compat_getenv',
-    'compat_getpass',
+    'compat_getpass_getpass',
    'compat_html_entities',
    'compat_html_entities_html5',
    'compat_http_client',
@ -3131,11 +3187,11 @@ __all__ = [
    'compat_numeric_types',
    'compat_ord',
    'compat_os_name',
-    'compat_parse_qs',
+    'compat_os_path_expanduser',
    'compat_os_path_realpath',
    'compat_print',
    'compat_re_Match',
    'compat_re_Pattern',
    'compat_realpath',
    'compat_setenv',
    'compat_shlex_quote',
    'compat_shlex_split',
@ -3147,17 +3203,14 @@ __all__ = [
    'compat_tokenize_tokenize',
    'compat_urllib_error',
    'compat_urllib_parse',
    'compat_urllib_parse_unquote',
    'compat_urllib_parse_unquote_plus',
    'compat_urllib_parse_unquote_to_bytes',
    'compat_urllib_parse_urlencode',
    'compat_urllib_parse_urlparse',
    'compat_urllib_request',
    'compat_urllib_request_DataHandler',
    'compat_urllib_response',
-    'compat_urlparse',
+    'compat_urllib_request_urlretrieve',
-    'compat_urlretrieve',
+    'compat_urllib_HTTPError',
-    'compat_xml_parse_error',
+    'compat_xml_etree_ElementTree_Element',
    'compat_xml_etree_ElementTree_ParseError',
    'compat_xml_etree_register_namespace',
    'compat_xpath',
    'compat_zip',
    'workaround_optparse_bug9161',
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@ -1010,6 +1010,10 @@ from .raywenderlich import (
    RayWenderlichIE,
    RayWenderlichCourseIE,
 )
 from .rbgtum import (
    RbgTumIE,
    RbgTumCourseIE,
 )
 from .rbmaradio import RBMARadioIE
 from .rds import RDSIE
 from .redbulltv import (
--- a/youtube_dl/extractor/rbgtum.py
+++ b/youtube_dl/extractor/rbgtum.py
@ -0,0 +1,97 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 class RbgTumIE(InfoExtractor):
    _VALID_URL = r'https://live\.rbg\.tum\.de/w/(?P<id>.+)'
    _TESTS = [{
        # Combined view
        'url': 'https://live.rbg.tum.de/w/cpp/22128',
        'md5': '53a5e7b3e07128e33bbf36687fe1c08f',
        'info_dict': {
            'id': 'cpp/22128',
            'ext': 'mp4',
            'title': 'Lecture: October 18. 2022',
            'series': 'Concepts of C++ programming (IN2377)',
        }
    }, {
        # Presentation only
        'url': 'https://live.rbg.tum.de/w/I2DL/12349/PRES',
        'md5': '36c584272179f3e56b0db5d880639cba',
        'info_dict': {
            'id': 'I2DL/12349/PRES',
            'ext': 'mp4',
            'title': 'Lecture 3: Introduction to Neural Networks',
            'series': 'Introduction to Deep Learning (IN2346)',
        }
    }, {
        # Camera only
        'url': 'https://live.rbg.tum.de/w/fvv-info/16130/CAM',
        'md5': 'e04189d92ff2f56aedf5cede65d37aad',
        'info_dict': {
            'id': 'fvv-info/16130/CAM',
            'ext': 'mp4',
            'title': 'Fachschaftsvollversammlung',
            'series': 'Fachschaftsvollversammlung Informatik',
        }
    }, ]
    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)
        m3u8 = self._html_search_regex(r'(https://.+?\.m3u8)', webpage, 'm3u8')
        lecture_title = self._html_search_regex(r'(?si)<h1.*?>(.*)</h1>', webpage, 'title')
        lecture_series_title = self._html_search_regex(
            r'(?s)<title\b[^>]*>\s*(?:TUM-Live\s\|\s?)?([^:]+):?.*?</title>', webpage, 'series')
        formats = self._extract_m3u8_formats(m3u8, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls')
        self._sort_formats(formats)
        return {
            'id': video_id,
            'title': lecture_title,
            'series': lecture_series_title,
            'formats': formats,
        }
 class RbgTumCourseIE(InfoExtractor):
    _VALID_URL = r'https://live\.rbg\.tum\.de/course/(?P<id>.+)'
    _TESTS = [{
        'url': 'https://live.rbg.tum.de/course/2022/S/fpv',
        'info_dict': {
            'title': 'Funktionale Programmierung und Verifikation (IN0003)',
            'id': '2022/S/fpv',
        },
        'params': {
            'noplaylist': False,
        },
        'playlist_count': 13,
    }, {
        'url': 'https://live.rbg.tum.de/course/2022/W/set',
        'info_dict': {
            'title': 'SET FSMPIC',
            'id': '2022/W/set',
        },
        'params': {
            'noplaylist': False,
        },
        'playlist_count': 6,
    }, ]
    def _real_extract(self, url):
        course_id = self._match_id(url)
        webpage = self._download_webpage(url, course_id)
        lecture_series_title = self._html_search_regex(r'(?si)<h1.*?>(.*)</h1>', webpage, 'title')
        lecture_urls = []
        for lecture_url in re.findall(r'(?i)href="/w/(.+)(?<!/cam)(?<!/pres)(?<!/chat)"', webpage):
            lecture_urls.append(self.url_result('https://live.rbg.tum.de/w/' + lecture_url, ie=RbgTumIE.ie_key()))
        return self.playlist_result(lecture_urls, course_id, lecture_series_title)
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@ -14,12 +14,11 @@ from ..compat import (
    compat_chr,
    compat_HTTPError,
    compat_map as map,
    compat_parse_qs,
    compat_str,
    compat_urllib_parse,
    compat_urllib_parse_parse_qs as compat_parse_qs,
    compat_urllib_parse_unquote_plus,
    compat_urllib_parse_urlencode,
    compat_urllib_parse_urlparse,
    compat_urlparse,
 )
 from ..jsinterp import JSInterpreter
 from ..utils import (
@ -28,20 +27,25 @@ from ..utils import (
    dict_get,
    error_to_compat_str,
    float_or_none,
    extract_attributes,
    get_element_by_attribute,
    int_or_none,
    js_to_json,
    mimetype2ext,
    parse_codecs,
    parse_duration,
    parse_qs,
    qualities,
    remove_start,
    smuggle_url,
    str_or_none,
    str_to_int,
    traverse_obj,
    try_get,
    unescapeHTML,
    unified_strdate,
    unsmuggle_url,
    update_url,
    update_url_query,
    url_or_none,
    urlencode_postdata,
@ -49,10 +53,6 @@ from ..utils import (
 )
 def parse_qs(url):
    return compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
 class YoutubeBaseInfoExtractor(InfoExtractor):
    """Provide base functions for Youtube extractors"""
    _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
@ -286,15 +286,18 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
    _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
    _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
-    def _call_api(self, ep, query, video_id, fatal=True):
+    def _call_api(self, ep, query, video_id, fatal=True, headers=None):
        data = self._DEFAULT_API_DATA.copy()
        data.update(query)
        real_headers = {'content-type': 'application/json'}
        if headers:
            real_headers.update(headers)
        return self._download_json(
            'https://www.youtube.com/youtubei/v1/%s' % ep, video_id=video_id,
            note='Downloading API JSON', errnote='Unable to download API page',
            data=json.dumps(data).encode('utf8'), fatal=fatal,
-            headers={'content-type': 'application/json'},
+            headers=real_headers,
            query={'key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'})
    def _extract_yt_initial_data(self, video_id, webpage):
@ -515,6 +518,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                'uploader': 'Philipp Hagemeister',
                'uploader_id': 'phihag',
                'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
                'channel': 'Philipp Hagemeister',
                'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
                'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
                'upload_date': '20121002',
@ -524,10 +528,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                'duration': 10,
                'view_count': int,
                'like_count': int,
-                'dislike_count': int,
+                'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
                'start_time': 1,
                'end_time': 9,
-            }
+            },
        },
        {
            'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
@ -562,7 +566,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                'duration': 10,
                'view_count': int,
                'like_count': int,
                'dislike_count': int,
            },
            'params': {
                'skip_download': True,
@ -621,8 +624,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms \r\n\r\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
            }
        },
-        # Normal age-gate video (No vevo, embed allowed), available via embed page
+        # Age-gated videos
        {
            'note': 'Age-gated video (No vevo, embed allowed)',
            'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
            'info_dict': {
                'id': 'HtVdAasjOgU',
@ -634,14 +638,98 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                'uploader_id': 'WitcherGame',
                'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
                'upload_date': '20140605',
                'thumbnail': 'https://i.ytimg.com/vi/HtVdAasjOgU/maxresdefault.jpg',
                'age_limit': 18,
                'categories': ['Gaming'],
                'tags': 'count:17',
                'channel': 'The Witcher',
                'channel_url': 'https://www.youtube.com/channel/UCzybXLxv08IApdjdN0mJhEg',
                'channel_id': 'UCzybXLxv08IApdjdN0mJhEg',
                'view_count': int,
                'like_count': int,
            },
        },
        {
-            # Age-gated video only available with authentication (unavailable
+            'note': 'Age-gated video with embed allowed in public site',
-            # via embed page workaround)
+            'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',
            'info_dict': {
                'id': 'HsUATh_Nc2U',
                'ext': 'mp4',
                'title': 'Godzilla 2 (Official Video)',
                'description': 'md5:bf77e03fcae5529475e500129b05668a',
                'duration': 177,
                'uploader': 'FlyingKitty',
                'uploader_id': 'FlyingKitty900',
                'upload_date': '20200408',
                'thumbnail': 'https://i.ytimg.com/vi/HsUATh_Nc2U/maxresdefault.jpg',
                'age_limit': 18,
                'categories': ['Entertainment'],
                'tags': ['Flyingkitty', 'godzilla 2'],
                'channel': 'FlyingKitty',
                'channel_url': 'https://www.youtube.com/channel/UCYQT13AtrJC0gsM1far_zJg',
                'channel_id': 'UCYQT13AtrJC0gsM1far_zJg',
                'view_count': int,
                'like_count': int,
            },
        },
        {
            'note': 'Age-gated video embeddable only with clientScreen=EMBED',
            'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',
            'info_dict': {
                'id': 'Tq92D6wQ1mg',
                'ext': 'mp4',
                'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
                'description': 'md5:17eccca93a786d51bc67646756894066',
                'duration': 106,
                'uploader': 'Projekt Melody',
                'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
                'upload_date': '20191227',
                'age_limit': 18,
                'thumbnail': 'https://i.ytimg.com/vi/Tq92D6wQ1mg/sddefault.jpg',
                'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],
                'categories': ['Entertainment'],
                'channel': 'Projekt Melody',
                'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
                'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
                'view_count': int,
                'like_count': int,
            },
        },
        {
            'note': 'Non-Age-gated non-embeddable video',
            'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',
            'info_dict': {
                'id': 'MeJVWBSsPAY',
                'ext': 'mp4',
                'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',
                'description': 'Fan Video. Music & Lyrics by OOMPH!.',
                'duration': 210,
                'uploader': 'Herr Lurik',
                'uploader_id': 'st3in234',
                'upload_date': '20130730',
                'uploader_url': 'http://www.youtube.com/user/st3in234',
                'age_limit': 0,
                'thumbnail': 'https://i.ytimg.com/vi/MeJVWBSsPAY/hqdefault.jpg',
                'tags': ['oomph', 'such mich find mich', 'lyrics', 'german industrial', 'musica industrial'],
                'categories': ['Music'],
                'channel': 'Herr Lurik',
                'channel_url': 'https://www.youtube.com/channel/UCdR3RSDPqub28LjZx0v9-aA',
                'channel_id': 'UCdR3RSDPqub28LjZx0v9-aA',
                'artist': 'OOMPH!',
                'view_count': int,
                'like_count': int,
            },
        },
        {
            'note': 'Non-bypassable age-gated video',
            'url': 'https://youtube.com/watch?v=Cr381pDsSsA',
            'only_matching': True,
        },
        {
            'note': 'Age-gated video only available with authentication (not via embed workaround)',
            'url': 'XgnwCQzjau8',
            'only_matching': True,
            'skip': '''This video has been removed for violating YouTube's Community Guidelines''',
        },
        # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
        # YouTube Red ad is not captured for creator
@ -670,17 +758,23 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
            'info_dict': {
                'id': 'lqQg6PlCWgI',
                'ext': 'mp4',
                'title': 'Hockey - Women -  GER-AUS - London 2012 Olympic Games',
                'description': r're:(?s)(?:.+\s)?HO09  - Women -  GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games\s*',
                'duration': 6085,
                'upload_date': '20150827',
                'uploader_id': 'olympic',
                'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
-                'description': 'HO09  - Women -  GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
+                'uploader': r're:Olympics?',
-                'uploader': 'Olympic',
+                'age_limit': 0,
-                'title': 'Hockey - Women -  GER-AUS - London 2012 Olympic Games',
+                'thumbnail': 'https://i.ytimg.com/vi/lqQg6PlCWgI/maxresdefault.jpg',
                'categories': ['Sports'],
                'tags': ['Hockey', '2012-07-31', '31 July 2012', 'Riverbank Arena', 'Session', 'Olympics', 'Olympic Games', 'London 2012', '2012 Summer Olympics', 'Summer Games'],
                'channel': 'Olympics',
                'channel_url': 'https://www.youtube.com/channel/UCTl3QQTvqHFjurroKxexy2Q',
                'channel_id': 'UCTl3QQTvqHFjurroKxexy2Q',
                'view_count': int,
                'like_count': int,
            },
            'params': {
                'skip_download': 'requires avconv',
            }
        },
        # Non-square pixels
        {
@ -840,16 +934,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                'id': 'lsguqyKfVQg',
                'ext': 'mp4',
                'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
-                'alt_title': 'Dark Walk - Position Music',
+                'alt_title': 'Dark Walk',
                'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
                'duration': 133,
                'upload_date': '20151119',
                'uploader_id': 'IronSoulElf',
                'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
                'uploader': 'IronSoulElf',
-                'creator': 'Todd Haberman,  Daniel Law Heath and Aaron Kaplan',
+                'creator': r're:Todd Haberman[;,]\s+Daniel Law Heath and Aaron Kaplan',
-                'track': 'Dark Walk - Position Music',
+                'track': 'Dark Walk',
-                'artist': 'Todd Haberman,  Daniel Law Heath and Aaron Kaplan',
+                'artist': r're:Todd Haberman[;,]\s+Daniel Law Heath and Aaron Kaplan',
                'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
            },
            'params': {
@ -1301,11 +1395,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
    @classmethod
    def suitable(cls, url):
-        # Hack for lazy extractors until more generic solution is implemented
+        if parse_qs(url).get('list', [None])[0]:
        # (see #28780)
        from .youtube import parse_qs
        qs = parse_qs(url)
        if qs.get('list', [None])[0]:
            return False
        return super(YoutubeIE, cls).suitable(url)
@ -1455,7 +1545,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
        if player_url.startswith('//'):
            player_url = 'https:' + player_url
        elif not re.match(r'https?://', player_url):
-            player_url = compat_urlparse.urljoin(
+            player_url = compat_urllib_parse.urljoin(
                'https://www.youtube.com', player_url)
        return player_url
@ -1537,9 +1627,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
    def _unthrottle_format_urls(self, video_id, player_url, formats):
        for fmt in formats:
-            parsed_fmt_url = compat_urlparse.urlparse(fmt['url'])
+            parsed_fmt_url = compat_urllib_parse.urlparse(fmt['url'])
-            qs = compat_urlparse.parse_qs(parsed_fmt_url.query)
+            n_param = compat_parse_qs(parsed_fmt_url.query).get('n')
            n_param = qs.get('n')
            if not n_param:
                continue
            n_param = n_param[-1]
@ -1547,9 +1636,29 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
            if n_response is None:
                # give up if descrambling failed
                break
-            qs['n'] = [n_response]
+            fmt['url'] = update_url(
-            fmt['url'] = compat_urlparse.urlunparse(
+                parsed_fmt_url, query_update={'n': [n_response]})
-                parsed_fmt_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
+
    # from yt-dlp, with tweaks
    def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
        """
        Extract signatureTimestamp (sts)
        Required to tell API what sig/player version is in use.
        """
        sts = int_or_none(ytcfg.get('STS')) if isinstance(ytcfg, dict) else None
        if not sts:
            # Attempt to extract from player
            if player_url is None:
                error_msg = 'Cannot extract signature timestamp without player_url.'
                if fatal:
                    raise ExtractorError(error_msg)
                self._downloader.report_warning(error_msg)
                return
            code = self._get_player_code(video_id, player_url)
            sts = int_or_none(self._search_regex(
                r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code or '',
                'JS player signature timestamp', group='sts', fatal=fatal))
        return sts
    def _mark_watched(self, video_id, player_response):
        playback_url = url_or_none(try_get(
@ -1557,20 +1666,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
            lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']))
        if not playback_url:
            return
        parsed_playback_url = compat_urlparse.urlparse(playback_url)
        qs = compat_urlparse.parse_qs(parsed_playback_url.query)
        # cpn generation algorithm is reverse engineered from base.js.
        # In fact it works even with dummy cpn.
        CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
        cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
-        qs.update({
+        playback_url = update_url(
            playback_url, query_update={
                'ver': ['2'],
                'cpn': [cpn],
            })
        playback_url = compat_urlparse.urlunparse(
            parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
        self._download_webpage(
            playback_url, video_id, 'Marking watched',
@ -1675,6 +1781,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
            webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
        player_response = None
        player_url = None
        if webpage:
            player_response = self._extract_yt_initial_variable(
                webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
@ -1683,27 +1790,61 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
            player_response = self._call_api(
                'player', {'videoId': video_id}, video_id)
-        playability_status = player_response.get('playabilityStatus') or {}
+        def is_agegated(playability):
-        if playability_status.get('reason') == 'Sign in to confirm your age':
+            if not isinstance(playability, dict):
-            video_info = self._download_webpage(
+                return
-                base_url + 'get_video_info', video_id,
+
-                'Refetching age-gated info webpage',
+            if playability.get('desktopLegacyAgeGateReason'):
-                'unable to download video info webpage', query={
+                return True
-                    'video_id': video_id,
+
-                    'eurl': 'https://youtube.googleapis.com/v/' + video_id,
+            reasons = filter(None, (playability.get(r) for r in ('status', 'reason')))
-                    'html5': 1,
+            AGE_GATE_REASONS = (
-                    # See https://github.com/ytdl-org/youtube-dl/issues/29333#issuecomment-864049544
+                'confirm your age', 'age-restricted', 'inappropriate',  # reason
-                    'c': 'TVHTML5',
+                'age_verification_required', 'age_check_required',  # status
-                    'cver': '6.20180913',
+            )
-                }, fatal=False)
+            return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)
-            if video_info:
+
-                pr = self._parse_json(
+        def get_playability_status(response):
-                    try_get(
+            return try_get(response, lambda x: x['playabilityStatus'], dict) or {}
-                        compat_parse_qs(video_info),
+
-                        lambda x: x['player_response'][0], compat_str) or '{}',
+        playability_status = get_playability_status(player_response)
-                    video_id, fatal=False)
+        if (is_agegated(playability_status)
-                if pr and isinstance(pr, dict):
+                and int_or_none(self._downloader.params.get('age_limit'), default=18) >= 18):
-                    player_response = pr
+
            self.report_age_confirmation()
            # Thanks: https://github.com/yt-dlp/yt-dlp/pull/3233
            pb_context = {'html5Preference': 'HTML5_PREF_WANTS'}
            # Use signatureTimestamp if available
            # Thanks https://github.com/ytdl-org/youtube-dl/issues/31034#issuecomment-1160718026
            player_url = self._extract_player_url(webpage)
            ytcfg = self._extract_ytcfg(video_id, webpage)
            sts = self._extract_signature_timestamp(video_id, player_url, ytcfg)
            if sts:
                pb_context['signatureTimestamp'] = sts
            query = {
                'playbackContext': {'contentPlaybackContext': pb_context},
                'contentCheckOk': True,
                'racyCheckOk': True,
                'context': {
                    'client': {'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER', 'clientVersion': '2.0', 'hl': 'en', 'clientScreen': 'EMBED'},
                    'thirdParty': {'embedUrl': 'https://google.com'},
                },
                'videoId': video_id,
            }
            headers = {
                'X-YouTube-Client-Name': '85',
                'X-YouTube-Client-Version': '2.0',
                'Origin': 'https://www.youtube.com'
            }
            video_info = self._call_api('player', query, video_id, fatal=False, headers=headers)
            age_gate_status = get_playability_status(video_info)
            if age_gate_status.get('status') == 'OK':
                player_response = video_info
                playability_status = age_gate_status
        trailer_video_id = try_get(
            playability_status,
@ -1785,7 +1926,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
        formats = []
        itags = []
        itag_qualities = {}
        player_url = None
        q = qualities(['tiny', 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'])
        streaming_data = player_response.get('streamingData') or {}
        streaming_formats = streaming_data.get('formats') or []
@ -1929,15 +2069,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
        thumbnails = []
        for container in (video_details, microformat):
-            for thumbnail in (try_get(
+            for thumbnail in try_get(
                    container,
-                    lambda x: x['thumbnail']['thumbnails'], list) or []):
+                    lambda x: x['thumbnail']['thumbnails'], list) or []:
-                thumbnail_url = thumbnail.get('url')
+                thumbnail_url = url_or_none(thumbnail.get('url'))
                if not thumbnail_url:
                    continue
                thumbnails.append({
                    'height': int_or_none(thumbnail.get('height')),
-                    'url': thumbnail_url,
+                    'url': update_url(thumbnail_url, query=None, fragment=None),
                    'width': int_or_none(thumbnail.get('width')),
                })
            if thumbnails:
@ -1956,7 +2096,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
            or microformat.get('lengthSeconds')) \
            or parse_duration(search_meta('duration'))
        is_live = video_details.get('isLive')
-        owner_profile_url = microformat.get('ownerProfileUrl')
+
        def gen_owner_profile_url():
            yield microformat.get('ownerProfileUrl')
            yield extract_attributes(self._search_regex(
                r'''(?s)(<link\b[^>]+\bitemprop\s*=\s*("|')url\2[^>]*>)''',
                get_element_by_attribute('itemprop', 'author', webpage),
                'owner_profile_url', default='')).get('href')
        owner_profile_url = next(
            (x for x in map(url_or_none, gen_owner_profile_url()) if x),
            None)
        if not player_url:
            player_url = self._extract_player_url(webpage)
@ -2041,6 +2191,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                        info[d_k] = parse_duration(query[k][0])
        if video_description:
            # Youtube Music Auto-generated description
            mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
            if mobj:
                release_year = mobj.group('release_year')
@ -2115,7 +2266,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                                lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
                            info['location'] = stl
                        else:
-                            mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
+                            # •? doesn't match, but [•]? does; \xa0 = non-breaking space
                            mobj = re.search(r'([^\xa0\s].*?)[\xa0\s]*S(\d+)[\xa0\s]*[•]?[\xa0\s]*E(\d+)', stl)
                            if mobj:
                                info.update({
                                    'series': mobj.group(1),
@ -2126,7 +2278,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                            vpir,
                            lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
                            list) or []):
-                        tbr = tlb.get('toggleButtonRenderer') or {}
+                        tbr = traverse_obj(tlb, ('segmentedLikeDislikeButtonRenderer', 'likeButton', 'toggleButtonRenderer'), 'toggleButtonRenderer') or {}
                        for getter, regex in [(
                                lambda x: x['defaultText']['accessibility']['accessibilityData'],
                                r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
@ -2142,6 +2294,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                    sbr_tooltip = try_get(
                        vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
                    if sbr_tooltip:
                        # however dislike_count was hidden by YT, as if there could ever be dislikable content on YT
                        like_count, dislike_count = sbr_tooltip.split(' / ')
                        info.update({
                            'like_count': str_to_int(like_count),
@ -2179,6 +2332,30 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                            elif mrr_title == 'Song':
                                info['track'] = mrr_contents_text
            # this is not extraction but spelunking!
            carousel_lockups = traverse_obj(
                initial_data,
                ('engagementPanels', Ellipsis, 'engagementPanelSectionListRenderer',
                 'content', 'structuredDescriptionContentRenderer', 'items', Ellipsis,
                 'videoDescriptionMusicSectionRenderer', 'carouselLockups', Ellipsis),
                expected_type=dict) or []
            # try to reproduce logic from metadataRowContainerRenderer above (if it still is)
            fields = (('ALBUM', 'album'), ('ARTIST', 'artist'), ('SONG', 'track'), ('LICENSES', 'license'))
            # multiple_songs ?
            if len(carousel_lockups) > 1:
                fields = fields[-1:]
            for info_row in traverse_obj(
                    carousel_lockups,
                    (0, 'carouselLockupRenderer', 'infoRows', Ellipsis, 'infoRowRenderer'),
                    expected_type=dict):
                row_title = traverse_obj(info_row, ('title', 'simpleText'))
                row_text = traverse_obj(info_row, 'defaultMetadata', 'expandedMetadata', expected_type=get_text)
                if not row_text:
                    continue
                for name, field in fields:
                    if name == row_title and not info.get(field):
                        info[field] = row_text
        for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
            v = info.get(s_k)
            if v:
@ -2411,7 +2588,6 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
            'tags': list,
            'view_count': int,
            'like_count': int,
            'dislike_count': int,
        },
        'params': {
            'skip_download': True,
@ -2438,7 +2614,6 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
            'categories': ['News & Politics'],
            'tags': list,
            'like_count': int,
            'dislike_count': int,
        },
        'params': {
            'skip_download': True,
@ -2458,7 +2633,6 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
            'categories': ['News & Politics'],
            'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
            'like_count': int,
            'dislike_count': int,
        },
        'params': {
            'skip_download': True,
@ -3043,8 +3217,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
    def _real_extract(self, url):
        item_id = self._match_id(url)
-        url = compat_urlparse.urlunparse(
+        url = update_url(url, netloc='www.youtube.com')
            compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
        # Handle both video/playlist URLs
        qs = parse_qs(url)
        video_id = qs.get('v', [None])[0]
@ -3144,11 +3317,7 @@ class YoutubePlaylistIE(InfoExtractor):
    def suitable(cls, url):
        if YoutubeTabIE.suitable(url):
            return False
-        # Hack for lazy extractors until more generic solution is implemented
+        if parse_qs(url).get('v', [None])[0]:
        # (see #28780)
        from .youtube import parse_qs
        qs = parse_qs(url)
        if qs.get('v', [None])[0]:
            return False
        return super(YoutubePlaylistIE, cls).suitable(url)
@ -3178,7 +3347,6 @@ class YoutubeYtBeIE(InfoExtractor):
            'categories': ['Nonprofits & Activism'],
            'tags': list,
            'like_count': int,
            'dislike_count': int,
        },
        'params': {
            'noplaylist': True,
@ -3288,9 +3456,9 @@ class YoutubeSearchURLIE(YoutubeBaseInfoExtractor):
    }]
    def _real_extract(self, url):
-        qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
+        qs = parse_qs(url)
-        query = (qs.get('search_query') or qs.get('q'))[0]
+        query = (qs.get('search_query') or qs.get('q'))[-1]
-        params = qs.get('sp', ('',))[0]
+        params = qs.get('sp', ('',))[-1]
        return self.playlist_result(self._search_results(query, params), query, query)
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@ -42,6 +42,7 @@ from .compat import (
    compat_HTMLParser,
    compat_HTTPError,
    compat_basestring,
    compat_casefold,
    compat_chr,
    compat_collections_abc,
    compat_cookiejar,
@ -54,18 +55,18 @@ from .compat import (
    compat_integer_types,
    compat_kwargs,
    compat_os_name,
-    compat_parse_qs,
+    compat_re_Match,
    compat_shlex_quote,
    compat_str,
    compat_struct_pack,
    compat_struct_unpack,
    compat_urllib_error,
    compat_urllib_parse,
    compat_urllib_parse_parse_qs as compat_parse_qs,
    compat_urllib_parse_urlencode,
    compat_urllib_parse_urlparse,
    compat_urllib_parse_unquote_plus,
    compat_urllib_request,
    compat_urlparse,
    compat_xpath,
 )
@ -80,12 +81,12 @@ def register_socks_protocols():
    # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
    # URLs with protocols not in urlparse.uses_netloc are not handled correctly
    for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
-        if scheme not in compat_urlparse.uses_netloc:
+        if scheme not in compat_urllib_parse.uses_netloc:
-            compat_urlparse.uses_netloc.append(scheme)
+            compat_urllib_parse.uses_netloc.append(scheme)
-# This is not clearly defined otherwise
+# Unfavoured alias
-compiled_regex_type = type(re.compile(''))
+compiled_regex_type = compat_re_Match
 def random_user_agent():
@ -2725,7 +2726,7 @@ def make_socks_conn_class(base_class, socks_proxy):
    assert issubclass(base_class, (
        compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
-    url_components = compat_urlparse.urlparse(socks_proxy)
+    url_components = compat_urllib_parse.urlparse(socks_proxy)
    if url_components.scheme.lower() == 'socks5':
        socks_type = ProxyType.SOCKS5
    elif url_components.scheme.lower() in ('socks', 'socks4'):
@ -3673,7 +3674,7 @@ def remove_quotes(s):
 def url_basename(url):
-    path = compat_urlparse.urlparse(url).path
+    path = compat_urllib_parse.urlparse(url).path
    return path.strip('/').split('/')[-1]
@ -3693,7 +3694,7 @@ def urljoin(base, path):
    if not isinstance(base, compat_str) or not re.match(
            r'^(?:https?:)?//', base):
        return None
-    return compat_urlparse.urljoin(base, path)
+    return compat_urllib_parse.urljoin(base, path)
 class HEADRequest(compat_urllib_request.Request):
@ -4091,6 +4092,10 @@ def escape_url(url):
    ).geturl()
 def parse_qs(url):
    return compat_parse_qs(compat_urllib_parse.urlparse(url).query)
 def read_batch_urls(batch_fd):
    def fixup(url):
        if not isinstance(url, compat_str):
@ -4111,14 +4116,28 @@ def urlencode_postdata(*args, **kargs):
    return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii')
-def update_url_query(url, query):
+def update_url(url, **kwargs):
-    if not query:
+    """Replace URL components specified by kwargs
-        return url
+       url: compat_str or parsed URL tuple
-    parsed_url = compat_urlparse.urlparse(url)
+       if query_update is in kwargs, update query with
-    qs = compat_parse_qs(parsed_url.query)
+       its value instead of replacing (overrides any `query`)
       returns: compat_str
    """
    if not kwargs:
        return compat_urllib_parse.urlunparse(url) if isinstance(url, tuple) else url
    if not isinstance(url, tuple):
        url = compat_urllib_parse.urlparse(url)
    query = kwargs.pop('query_update', None)
    if query:
        qs = compat_parse_qs(url.query)
        qs.update(query)
-    return compat_urlparse.urlunparse(parsed_url._replace(
+        kwargs['query'] = compat_urllib_parse_urlencode(qs, True)
-        query=compat_urllib_parse_urlencode(qs, True)))
+        kwargs = compat_kwargs(kwargs)
    return compat_urllib_parse.urlunparse(url._replace(**kwargs))
 def update_url_query(url, query):
    return update_url(url, query_update=query)
 def update_Request(req, url=None, data=None, headers={}, query={}):
@ -5586,7 +5605,7 @@ class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
        if proxy == '__noproxy__':
            return None  # No Proxy
-        if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
+        if compat_urllib_parse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
            req.add_header('Ytdl-socks-proxy', proxy)
            # youtube-dl's http/https handlers do wrapping the socket with socks
            return None
@ -6024,14 +6043,6 @@ def traverse_obj(obj, *paths, **kwargs):
    str = compat_str
    is_sequence = lambda x: isinstance(x, compat_collections_abc.Sequence) and not isinstance(x, (str, bytes))
    # stand-in until compat_re_Match is added
    compat_re_Match = type(re.match('a', 'a'))
    # stand-in until casefold.py is added
    try:
        ''.casefold()
        compat_casefold = lambda s: s.casefold()
    except AttributeError:
        compat_casefold = lambda s: s.lower()
    casefold = lambda k: compat_casefold(k) if isinstance(k, str) else k
    if isinstance(expected_type, type):
Author	SHA1	Message	Date
Valentin Metz	80c0228777	[rbgtum] Add new extractor (#31305 ) * [rbgtum] Add new extractor * Small update, force CI --------- Co-authored-by: dirkf <fieldhouse@gmx.net>	2023-02-09 11:25:28 +00:00
dirkf	bcf597ea17	[YouTube] Fix tests	2023-02-09 11:01:57 +00:00
dirkf	f640916de1	[YouTube] Refresh compat/utils usage * import parse_qs() * import parse_qs in lazy_extractors (clears old TODO) * clean up old compiled lazy_extractors for Py2 * use update_url()	2023-02-09 11:01:57 +00:00
dirkf	b337af9c62	[compat] Update test_compat [skip ci]	2023-02-09 11:01:57 +00:00
dirkf	a6f7d10d44	[utils] Add parse_qs, update_url [skip ci]	2023-02-09 11:01:57 +00:00
dirkf	9ca224b697	[compat] Systematise compat_ naming [skip ci]	2023-02-09 11:01:57 +00:00
dirkf	ce81ae3846	[test] Fix TestAgeRestriction * age restriction may cause DownloadError * update obsolete test URLs [skip ci]	2023-02-09 11:01:57 +00:00
dirkf	2be0cd2616	[YouTube] Add `signatureTimestamp` for age-gate bypass	2023-02-09 11:01:57 +00:00
dirkf	b14b33a2e9	[YouTube] Bypass age-gating for certain restricted videos * Use TVHTML5_SIMPLY_EMBEDDED_PLAYER client * Also add and fix tests * Introduce and use new utility function `update_url()`	2023-02-09 11:01:57 +00:00