mirror of
https://github.com/ytdl-org/youtube-dl.git
synced 2024-12-18 22:52:05 +00:00
Compare commits
No commits in common. "7e8b3f9439ebefb3a3a4e5da9c0bd2b595976438" and "9f6c03a00602eb1119e43a522cf50682f6d6a6dd" have entirely different histories.
7e8b3f9439
...
9f6c03a006
@ -70,6 +70,15 @@ class TestAllURLsMatching(unittest.TestCase):
|
|||||||
# self.assertMatch('http://www.youtube.com/results?search_query=making+mustard', ['youtube:search_url'])
|
# self.assertMatch('http://www.youtube.com/results?search_query=making+mustard', ['youtube:search_url'])
|
||||||
# self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url'])
|
# self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url'])
|
||||||
|
|
||||||
|
def test_youtube_extract(self):
|
||||||
|
assertExtractId = lambda url, id: self.assertEqual(YoutubeIE.extract_id(url), id)
|
||||||
|
assertExtractId('http://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc')
|
||||||
|
assertExtractId('https://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc')
|
||||||
|
assertExtractId('https://www.youtube.com/watch?feature=player_embedded&v=BaW_jenozKc', 'BaW_jenozKc')
|
||||||
|
assertExtractId('https://www.youtube.com/watch_popup?v=BaW_jenozKc', 'BaW_jenozKc')
|
||||||
|
assertExtractId('http://www.youtube.com/watch?v=BaW_jenozKcsharePLED17F32AD9753930', 'BaW_jenozKc')
|
||||||
|
assertExtractId('BaW_jenozKc', 'BaW_jenozKc')
|
||||||
|
|
||||||
def test_facebook_matching(self):
|
def test_facebook_matching(self):
|
||||||
self.assertTrue(FacebookIE.suitable('https://www.facebook.com/Shiniknoh#!/photo.php?v=10153317450565268'))
|
self.assertTrue(FacebookIE.suitable('https://www.facebook.com/Shiniknoh#!/photo.php?v=10153317450565268'))
|
||||||
self.assertTrue(FacebookIE.suitable('https://www.facebook.com/cindyweather?fref=ts#!/photo.php?v=10152183998945793'))
|
self.assertTrue(FacebookIE.suitable('https://www.facebook.com/cindyweather?fref=ts#!/photo.php?v=10152183998945793'))
|
||||||
|
@ -39,16 +39,6 @@ class TestExecution(unittest.TestCase):
|
|||||||
_, stderr = p.communicate()
|
_, stderr = p.communicate()
|
||||||
self.assertFalse(stderr)
|
self.assertFalse(stderr)
|
||||||
|
|
||||||
def test_lazy_extractors(self):
|
|
||||||
try:
|
|
||||||
subprocess.check_call([sys.executable, 'devscripts/make_lazy_extractors.py', 'youtube_dl/extractor/lazy_extractors.py'], cwd=rootDir, stdout=_DEV_NULL)
|
|
||||||
subprocess.check_call([sys.executable, 'test/test_all_urls.py'], cwd=rootDir, stdout=_DEV_NULL)
|
|
||||||
finally:
|
|
||||||
try:
|
|
||||||
os.remove('youtube_dl/extractor/lazy_extractors.py')
|
|
||||||
except (IOError, OSError):
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
@ -1,26 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
from __future__ import unicode_literals
|
|
||||||
|
|
||||||
# Allow direct execution
|
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
import unittest
|
|
||||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|
||||||
|
|
||||||
|
|
||||||
from youtube_dl.extractor import YoutubeIE
|
|
||||||
|
|
||||||
|
|
||||||
class TestYoutubeMisc(unittest.TestCase):
|
|
||||||
def test_youtube_extract(self):
|
|
||||||
assertExtractId = lambda url, id: self.assertEqual(YoutubeIE.extract_id(url), id)
|
|
||||||
assertExtractId('http://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc')
|
|
||||||
assertExtractId('https://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc')
|
|
||||||
assertExtractId('https://www.youtube.com/watch?feature=player_embedded&v=BaW_jenozKc', 'BaW_jenozKc')
|
|
||||||
assertExtractId('https://www.youtube.com/watch_popup?v=BaW_jenozKc', 'BaW_jenozKc')
|
|
||||||
assertExtractId('http://www.youtube.com/watch?v=BaW_jenozKcsharePLED17F32AD9753930', 'BaW_jenozKc')
|
|
||||||
assertExtractId('BaW_jenozKc', 'BaW_jenozKc')
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
unittest.main()
|
|
@ -11,7 +11,6 @@ from ..compat import (
|
|||||||
compat_etree_Element,
|
compat_etree_Element,
|
||||||
compat_HTTPError,
|
compat_HTTPError,
|
||||||
compat_parse_qs,
|
compat_parse_qs,
|
||||||
compat_str,
|
|
||||||
compat_urllib_parse_urlparse,
|
compat_urllib_parse_urlparse,
|
||||||
compat_urlparse,
|
compat_urlparse,
|
||||||
)
|
)
|
||||||
@ -26,10 +25,8 @@ from ..utils import (
|
|||||||
js_to_json,
|
js_to_json,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
strip_or_none,
|
|
||||||
try_get,
|
try_get,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
unified_timestamp,
|
|
||||||
url_or_none,
|
url_or_none,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
urljoin,
|
urljoin,
|
||||||
@ -764,17 +761,8 @@ class BBCIE(BBCCoUkIE):
|
|||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
# custom redirection to www.bbc.com
|
# custom redirection to www.bbc.com
|
||||||
# also, video with window.__INITIAL_DATA__
|
|
||||||
'url': 'http://www.bbc.co.uk/news/science-environment-33661876',
|
'url': 'http://www.bbc.co.uk/news/science-environment-33661876',
|
||||||
'info_dict': {
|
'only_matching': True,
|
||||||
'id': 'p02xzws1',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': "Pluto may have 'nitrogen glaciers'",
|
|
||||||
'description': 'md5:6a95b593f528d7a5f2605221bc56912f',
|
|
||||||
'thumbnail': r're:https?://.+/.+\.jpg',
|
|
||||||
'timestamp': 1437785037,
|
|
||||||
'upload_date': '20150725',
|
|
||||||
},
|
|
||||||
}, {
|
}, {
|
||||||
# single video article embedded with data-media-vpid
|
# single video article embedded with data-media-vpid
|
||||||
'url': 'http://www.bbc.co.uk/sport/rowing/35908187',
|
'url': 'http://www.bbc.co.uk/sport/rowing/35908187',
|
||||||
@ -1176,29 +1164,12 @@ class BBCIE(BBCCoUkIE):
|
|||||||
continue
|
continue
|
||||||
formats, subtitles = self._download_media_selector(item_id)
|
formats, subtitles = self._download_media_selector(item_id)
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
item_desc = None
|
|
||||||
blocks = try_get(media, lambda x: x['summary']['blocks'], list)
|
|
||||||
if blocks:
|
|
||||||
summary = []
|
|
||||||
for block in blocks:
|
|
||||||
text = try_get(block, lambda x: x['model']['text'], compat_str)
|
|
||||||
if text:
|
|
||||||
summary.append(text)
|
|
||||||
if summary:
|
|
||||||
item_desc = '\n\n'.join(summary)
|
|
||||||
item_time = None
|
|
||||||
for meta in try_get(media, lambda x: x['metadata']['items'], list) or []:
|
|
||||||
if try_get(meta, lambda x: x['label']) == 'Published':
|
|
||||||
item_time = unified_timestamp(meta.get('timestamp'))
|
|
||||||
break
|
|
||||||
entries.append({
|
entries.append({
|
||||||
'id': item_id,
|
'id': item_id,
|
||||||
'title': item_title,
|
'title': item_title,
|
||||||
'thumbnail': item.get('holdingImageUrl'),
|
'thumbnail': item.get('holdingImageUrl'),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
'timestamp': item_time,
|
|
||||||
'description': strip_or_none(item_desc),
|
|
||||||
})
|
})
|
||||||
for resp in (initial_data.get('data') or {}).values():
|
for resp in (initial_data.get('data') or {}).values():
|
||||||
name = resp.get('name')
|
name = resp.get('name')
|
||||||
|
@ -4,12 +4,10 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .adobepass import AdobePassIE
|
from .adobepass import AdobePassIE
|
||||||
from ..compat import compat_str
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
int_or_none,
|
int_or_none,
|
||||||
determine_ext,
|
determine_ext,
|
||||||
parse_age_limit,
|
parse_age_limit,
|
||||||
try_get,
|
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
)
|
)
|
||||||
@ -118,18 +116,6 @@ class GoIE(AdobePassIE):
|
|||||||
# m3u8 download
|
# m3u8 download
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
}, {
|
|
||||||
'url': 'https://abc.com/shows/modern-family/episode-guide/season-01/101-pilot',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'VDKA22600213',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Pilot',
|
|
||||||
'description': 'md5:74306df917cfc199d76d061d66bebdb4',
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
# m3u8 download
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://abc.go.com/shows/the-catch/episode-guide/season-01/10-the-wedding',
|
'url': 'http://abc.go.com/shows/the-catch/episode-guide/season-01/10-the-wedding',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@ -163,27 +149,11 @@ class GoIE(AdobePassIE):
|
|||||||
brand = site_info.get('brand')
|
brand = site_info.get('brand')
|
||||||
if not video_id or not site_info:
|
if not video_id or not site_info:
|
||||||
webpage = self._download_webpage(url, display_id or video_id)
|
webpage = self._download_webpage(url, display_id or video_id)
|
||||||
data = self._parse_json(
|
|
||||||
self._search_regex(
|
|
||||||
r'["\']__abc_com__["\']\s*\]\s*=\s*({.+?})\s*;', webpage,
|
|
||||||
'data', default='{}'),
|
|
||||||
display_id or video_id, fatal=False)
|
|
||||||
# https://abc.com/shows/modern-family/episode-guide/season-01/101-pilot
|
|
||||||
layout = try_get(data, lambda x: x['page']['content']['video']['layout'], dict)
|
|
||||||
video_id = None
|
|
||||||
if layout:
|
|
||||||
video_id = try_get(
|
|
||||||
layout,
|
|
||||||
(lambda x: x['videoid'], lambda x: x['video']['id']),
|
|
||||||
compat_str)
|
|
||||||
if not video_id:
|
|
||||||
video_id = self._search_regex(
|
video_id = self._search_regex(
|
||||||
(
|
(
|
||||||
# There may be inner quotes, e.g. data-video-id="'VDKA3609139'"
|
# There may be inner quotes, e.g. data-video-id="'VDKA3609139'"
|
||||||
# from http://freeform.go.com/shows/shadowhunters/episodes/season-2/1-this-guilty-blood
|
# from http://freeform.go.com/shows/shadowhunters/episodes/season-2/1-this-guilty-blood
|
||||||
r'data-video-id=["\']*(VDKA\w+)',
|
r'data-video-id=["\']*(VDKA\w+)',
|
||||||
# page.analytics.videoIdCode
|
|
||||||
r'\bvideoIdCode["\']\s*:\s*["\']((?:vdka|VDKA)\w+)',
|
|
||||||
# https://abc.com/shows/the-rookie/episode-guide/season-02/03-the-bet
|
# https://abc.com/shows/the-rookie/episode-guide/season-02/03-the-bet
|
||||||
r'\b(?:video)?id["\']\s*:\s*["\'](VDKA\w+)'
|
r'\b(?:video)?id["\']\s*:\s*["\'](VDKA\w+)'
|
||||||
), webpage, 'video id', default=video_id)
|
), webpage, 'video id', default=video_id)
|
||||||
|
@ -65,6 +65,11 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
|||||||
|
|
||||||
_PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM)'
|
_PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM)'
|
||||||
|
|
||||||
|
def _ids_to_results(self, ids):
|
||||||
|
return [
|
||||||
|
self.url_result(vid_id, 'Youtube', video_id=vid_id)
|
||||||
|
for vid_id in ids]
|
||||||
|
|
||||||
def _login(self):
|
def _login(self):
|
||||||
"""
|
"""
|
||||||
Attempt to log in to YouTube.
|
Attempt to log in to YouTube.
|
||||||
@ -1214,9 +1219,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def suitable(cls, url):
|
def suitable(cls, url):
|
||||||
# Hack for lazy extractors until more generic solution is implemented
|
|
||||||
# (see #28780)
|
|
||||||
from .youtube import parse_qs
|
|
||||||
qs = parse_qs(url)
|
qs = parse_qs(url)
|
||||||
if qs.get('list', [None])[0]:
|
if qs.get('list', [None])[0]:
|
||||||
return False
|
return False
|
||||||
@ -2908,9 +2910,6 @@ class YoutubePlaylistIE(InfoExtractor):
|
|||||||
def suitable(cls, url):
|
def suitable(cls, url):
|
||||||
if YoutubeTabIE.suitable(url):
|
if YoutubeTabIE.suitable(url):
|
||||||
return False
|
return False
|
||||||
# Hack for lazy extractors until more generic solution is implemented
|
|
||||||
# (see #28780)
|
|
||||||
from .youtube import parse_qs
|
|
||||||
qs = parse_qs(url)
|
qs = parse_qs(url)
|
||||||
if qs.get('v', [None])[0]:
|
if qs.get('v', [None])[0]:
|
||||||
return False
|
return False
|
||||||
|
Loading…
Reference in New Issue
Block a user