Compare commits

..

5 Commits

Author SHA1 Message Date
dirkf
5add3f4373 Merge branch 'pukkandan-yt-searchurl' into yt-dl-master
Closes #27749
2022-02-04 03:50:32 +00:00
pukkandan
a3373da70c
Merge branch 'UP/youtube-dl' into dl/YoutubeSearchURLIE 2022-01-30 01:07:28 +05:30
pukkandan
2c4cb134a9
Fix max_results 2022-01-30 00:54:22 +05:30
pukkandan
bfe72723d8
Use itertools.islice 2022-01-30 00:49:55 +05:30
pukkandan
ed99d68bdd
Add back YoutubeSearchURLIE 2022-01-30 00:41:47 +05:30
3 changed files with 74 additions and 79 deletions

View File

@ -66,9 +66,9 @@ class TestAllURLsMatching(unittest.TestCase):
self.assertMatch('https://www.youtube.com/feed/watch_later', ['youtube:tab']) self.assertMatch('https://www.youtube.com/feed/watch_later', ['youtube:tab'])
self.assertMatch('https://www.youtube.com/feed/subscriptions', ['youtube:tab']) self.assertMatch('https://www.youtube.com/feed/subscriptions', ['youtube:tab'])
# def test_youtube_search_matching(self): def test_youtube_search_matching(self):
# self.assertMatch('http://www.youtube.com/results?search_query=making+mustard', ['youtube:search_url']) self.assertMatch('http://www.youtube.com/results?search_query=making+mustard', ['youtube:search_url'])
# self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url']) self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url'])
def test_facebook_matching(self): def test_facebook_matching(self):
self.assertTrue(FacebookIE.suitable('https://www.facebook.com/Shiniknoh#!/photo.php?v=10153317450565268')) self.assertTrue(FacebookIE.suitable('https://www.facebook.com/Shiniknoh#!/photo.php?v=10153317450565268'))

View File

@ -1606,7 +1606,7 @@ from .youtube import (
YoutubeRecommendedIE, YoutubeRecommendedIE,
YoutubeSearchDateIE, YoutubeSearchDateIE,
YoutubeSearchIE, YoutubeSearchIE,
#YoutubeSearchURLIE, YoutubeSearchURLIE,
YoutubeSubscriptionsIE, YoutubeSubscriptionsIE,
YoutubeTruncatedIDIE, YoutubeTruncatedIDIE,
YoutubeTruncatedURLIE, YoutubeTruncatedURLIE,

View File

@ -342,6 +342,60 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
'uploader': uploader, 'uploader': uploader,
} }
def _search_results(self, query, params):
data = {
'context': {
'client': {
'clientName': 'WEB',
'clientVersion': '2.20201021.03.00',
}
},
'query': query,
}
if params:
data['params'] = params
for page_num in itertools.count(1):
search = self._download_json(
'https://www.youtube.com/youtubei/v1/search?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
video_id='query "%s"' % query,
note='Downloading page %s' % page_num,
errnote='Unable to download API page', fatal=False,
data=json.dumps(data).encode('utf8'),
headers={'content-type': 'application/json'})
if not search:
break
slr_contents = try_get(
search,
(lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
list)
if not slr_contents:
break
for slr_content in slr_contents:
isr_contents = try_get(
slr_content,
lambda x: x['itemSectionRenderer']['contents'],
list)
if not isr_contents:
continue
for content in isr_contents:
if not isinstance(content, dict):
continue
video = content.get('videoRenderer')
if not isinstance(video, dict):
continue
video_id = video.get('videoId')
if not video_id:
continue
yield self._extract_video(video)
token = try_get(
slr_contents,
lambda x: x[-1]['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],
compat_str)
if not token:
break
data['continuation'] = token
class YoutubeIE(YoutubeBaseInfoExtractor): class YoutubeIE(YoutubeBaseInfoExtractor):
IE_DESC = 'YouTube.com' IE_DESC = 'YouTube.com'
@ -2135,7 +2189,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
(?: (?:
(?:channel|c|user|feed|hashtag)/| (?:channel|c|user|feed|hashtag)/|
(?:playlist|watch)\?.*?\blist=| (?:playlist|watch)\?.*?\blist=|
(?!(?:watch|embed|v|e)\b) (?!(?:watch|embed|v|e|results)\b)
) )
(?P<id>[^/?\#&]+) (?P<id>[^/?\#&]+)
''' '''
@ -3148,93 +3202,35 @@ class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
class YoutubeSearchIE(SearchInfoExtractor, YoutubeBaseInfoExtractor): class YoutubeSearchIE(SearchInfoExtractor, YoutubeBaseInfoExtractor):
IE_DESC = 'YouTube.com searches' IE_DESC = 'YouTube.com searches'
# there doesn't appear to be a real limit, for example if you search for
# 'python' you get more than 8.000.000 results
_MAX_RESULTS = float('inf')
IE_NAME = 'youtube:search' IE_NAME = 'youtube:search'
_SEARCH_KEY = 'ytsearch' _SEARCH_KEY = 'ytsearch'
_SEARCH_PARAMS = None _SEARCH_PARAMS = 'EgIQAQ%3D%3D' # Videos only
_MAX_RESULTS = float('inf')
_TESTS = [] _TESTS = []
def _entries(self, query, n):
data = {
'context': {
'client': {
'clientName': 'WEB',
'clientVersion': '2.20201021.03.00',
}
},
'query': query,
}
if self._SEARCH_PARAMS:
data['params'] = self._SEARCH_PARAMS
total = 0
for page_num in itertools.count(1):
search = self._download_json(
'https://www.youtube.com/youtubei/v1/search?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
video_id='query "%s"' % query,
note='Downloading page %s' % page_num,
errnote='Unable to download API page', fatal=False,
data=json.dumps(data).encode('utf8'),
headers={'content-type': 'application/json'})
if not search:
break
slr_contents = try_get(
search,
(lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
list)
if not slr_contents:
break
for slr_content in slr_contents:
isr_contents = try_get(
slr_content,
lambda x: x['itemSectionRenderer']['contents'],
list)
if not isr_contents:
continue
for content in isr_contents:
if not isinstance(content, dict):
continue
video = content.get('videoRenderer')
if not isinstance(video, dict):
continue
video_id = video.get('videoId')
if not video_id:
continue
yield self._extract_video(video)
total += 1
if total == n:
return
token = try_get(
slr_contents,
lambda x: x[-1]['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],
compat_str)
if not token:
break
data['continuation'] = token
def _get_n_results(self, query, n): def _get_n_results(self, query, n):
"""Get a specified number of results for a query""" """Get a specified number of results for a query"""
return self.playlist_result(self._entries(query, n), query) entries = itertools.islice(self._search_results(query, self._SEARCH_PARAMS), 0, None if n == float('inf') else n)
return self.playlist_result(entries, query, query)
class YoutubeSearchDateIE(YoutubeSearchIE): class YoutubeSearchDateIE(YoutubeSearchIE):
IE_NAME = YoutubeSearchIE.IE_NAME + ':date' IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
_SEARCH_KEY = 'ytsearchdate' _SEARCH_KEY = 'ytsearchdate'
IE_DESC = 'YouTube.com searches, newest videos first' IE_DESC = 'YouTube.com searches, newest videos first'
_SEARCH_PARAMS = 'CAI%3D' _SEARCH_PARAMS = 'CAISAhAB' # Videos only, sorted by date
_TESTS = []
r""" class YoutubeSearchURLIE(YoutubeBaseInfoExtractor):
class YoutubeSearchURLIE(YoutubeSearchIE): IE_DESC = 'YouTube search URLs with sorting and filter support'
IE_DESC = 'YouTube.com search URLs' IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
IE_NAME = 'youtube:search_url' _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
_VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)'
_TESTS = [{ _TESTS = [{
'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
'playlist_mincount': 5, 'playlist_mincount': 5,
'info_dict': { 'info_dict': {
'id': 'youtube-dl test video',
'title': 'youtube-dl test video', 'title': 'youtube-dl test video',
} }
}, { }, {
@ -3243,11 +3239,10 @@ class YoutubeSearchURLIE(YoutubeSearchIE):
}] }]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
query = compat_urllib_parse_unquote_plus(mobj.group('query')) query = (qs.get('search_query') or qs.get('q'))[0]
webpage = self._download_webpage(url, query) params = qs.get('sp', ('',))[0]
return self.playlist_result(self._process_page(webpage), playlist_title=query) return self.playlist_result(self._search_results(query, params), query, query)
"""
class YoutubeFeedsInfoExtractor(YoutubeTabIE): class YoutubeFeedsInfoExtractor(YoutubeTabIE):