mirror of
https://github.com/ytdl-org/youtube-dl.git
synced 2024-12-26 10:32:02 +00:00
Compare commits
No commits in common. "734dfbb4e3ad4ee4d98609dc902ac864b94033a4" and "8248133e5ee5579316120cbcbff3ba8b713f1017" have entirely different histories.
734dfbb4e3
...
8248133e5e
@ -18,6 +18,7 @@
|
|||||||
"noprogress": false,
|
"noprogress": false,
|
||||||
"outtmpl": "%(id)s.%(ext)s",
|
"outtmpl": "%(id)s.%(ext)s",
|
||||||
"password": null,
|
"password": null,
|
||||||
|
"playlistend": -1,
|
||||||
"playliststart": 1,
|
"playliststart": 1,
|
||||||
"prefer_free_formats": false,
|
"prefer_free_formats": false,
|
||||||
"quiet": false,
|
"quiet": false,
|
||||||
|
@ -121,7 +121,6 @@ def generator(test_case, tname):
|
|||||||
params['outtmpl'] = tname + '_' + params['outtmpl']
|
params['outtmpl'] = tname + '_' + params['outtmpl']
|
||||||
if is_playlist and 'playlist' not in test_case:
|
if is_playlist and 'playlist' not in test_case:
|
||||||
params.setdefault('extract_flat', 'in_playlist')
|
params.setdefault('extract_flat', 'in_playlist')
|
||||||
params.setdefault('playlistend', test_case.get('playlist_mincount'))
|
|
||||||
params.setdefault('skip_download', True)
|
params.setdefault('skip_download', True)
|
||||||
|
|
||||||
ydl = YoutubeDL(params, auto_init=False)
|
ydl = YoutubeDL(params, auto_init=False)
|
||||||
|
@ -1,31 +1,22 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from ..utils import (
|
|
||||||
determine_protocol,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def get_suitable_downloader(info_dict, params={}):
|
|
||||||
info_dict['protocol'] = determine_protocol(info_dict)
|
|
||||||
info_copy = info_dict.copy()
|
|
||||||
return _get_suitable_downloader(info_copy, params)
|
|
||||||
|
|
||||||
|
|
||||||
# Some of these require get_suitable_downloader
|
|
||||||
from .common import FileDownloader
|
from .common import FileDownloader
|
||||||
from .dash import DashSegmentsFD
|
|
||||||
from .f4m import F4mFD
|
from .f4m import F4mFD
|
||||||
from .hls import HlsFD
|
from .hls import HlsFD
|
||||||
from .http import HttpFD
|
from .http import HttpFD
|
||||||
from .rtmp import RtmpFD
|
from .rtmp import RtmpFD
|
||||||
|
from .dash import DashSegmentsFD
|
||||||
from .rtsp import RtspFD
|
from .rtsp import RtspFD
|
||||||
from .ism import IsmFD
|
from .ism import IsmFD
|
||||||
from .niconico import NiconicoDmcFD
|
|
||||||
from .external import (
|
from .external import (
|
||||||
get_external_downloader,
|
get_external_downloader,
|
||||||
FFmpegFD,
|
FFmpegFD,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
from ..utils import (
|
||||||
|
determine_protocol,
|
||||||
|
)
|
||||||
|
|
||||||
PROTOCOL_MAP = {
|
PROTOCOL_MAP = {
|
||||||
'rtmp': RtmpFD,
|
'rtmp': RtmpFD,
|
||||||
'm3u8_native': HlsFD,
|
'm3u8_native': HlsFD,
|
||||||
@ -35,12 +26,13 @@ PROTOCOL_MAP = {
|
|||||||
'f4m': F4mFD,
|
'f4m': F4mFD,
|
||||||
'http_dash_segments': DashSegmentsFD,
|
'http_dash_segments': DashSegmentsFD,
|
||||||
'ism': IsmFD,
|
'ism': IsmFD,
|
||||||
'niconico_dmc': NiconicoDmcFD,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def _get_suitable_downloader(info_dict, params={}):
|
def get_suitable_downloader(info_dict, params={}):
|
||||||
"""Get the downloader class that can handle the info dict."""
|
"""Get the downloader class that can handle the info dict."""
|
||||||
|
protocol = determine_protocol(info_dict)
|
||||||
|
info_dict['protocol'] = protocol
|
||||||
|
|
||||||
# if (info_dict.get('start_time') or info_dict.get('end_time')) and not info_dict.get('requested_formats') and FFmpegFD.can_download(info_dict):
|
# if (info_dict.get('start_time') or info_dict.get('end_time')) and not info_dict.get('requested_formats') and FFmpegFD.can_download(info_dict):
|
||||||
# return FFmpegFD
|
# return FFmpegFD
|
||||||
@ -51,7 +43,6 @@ def _get_suitable_downloader(info_dict, params={}):
|
|||||||
if ed.can_download(info_dict):
|
if ed.can_download(info_dict):
|
||||||
return ed
|
return ed
|
||||||
|
|
||||||
protocol = info_dict['protocol']
|
|
||||||
if protocol.startswith('m3u8') and info_dict.get('is_live'):
|
if protocol.startswith('m3u8') and info_dict.get('is_live'):
|
||||||
return FFmpegFD
|
return FFmpegFD
|
||||||
|
|
||||||
|
@ -1,66 +0,0 @@
|
|||||||
# coding: utf-8
|
|
||||||
from __future__ import unicode_literals
|
|
||||||
|
|
||||||
try:
|
|
||||||
import threading
|
|
||||||
except ImportError:
|
|
||||||
threading = None
|
|
||||||
|
|
||||||
from .common import FileDownloader
|
|
||||||
from ..downloader import get_suitable_downloader
|
|
||||||
from ..extractor.niconico import NiconicoIE
|
|
||||||
from ..utils import sanitized_Request
|
|
||||||
|
|
||||||
|
|
||||||
class NiconicoDmcFD(FileDownloader):
|
|
||||||
""" Downloading niconico douga from DMC with heartbeat """
|
|
||||||
|
|
||||||
FD_NAME = 'niconico_dmc'
|
|
||||||
|
|
||||||
def real_download(self, filename, info_dict):
|
|
||||||
self.to_screen('[%s] Downloading from DMC' % self.FD_NAME)
|
|
||||||
|
|
||||||
ie = NiconicoIE(self.ydl)
|
|
||||||
info_dict, heartbeat_info_dict = ie._get_heartbeat_info(info_dict)
|
|
||||||
|
|
||||||
fd = get_suitable_downloader(info_dict, params=self.params)(self.ydl, self.params)
|
|
||||||
for ph in self._progress_hooks:
|
|
||||||
fd.add_progress_hook(ph)
|
|
||||||
|
|
||||||
if not threading:
|
|
||||||
self.to_screen('[%s] Threading for Heartbeat not available' % self.FD_NAME)
|
|
||||||
return fd.real_download(filename, info_dict)
|
|
||||||
|
|
||||||
success = download_complete = False
|
|
||||||
timer = [None]
|
|
||||||
heartbeat_lock = threading.Lock()
|
|
||||||
heartbeat_url = heartbeat_info_dict['url']
|
|
||||||
heartbeat_data = heartbeat_info_dict['data'].encode()
|
|
||||||
heartbeat_interval = heartbeat_info_dict.get('interval', 30)
|
|
||||||
|
|
||||||
request = sanitized_Request(heartbeat_url, heartbeat_data)
|
|
||||||
|
|
||||||
def heartbeat():
|
|
||||||
try:
|
|
||||||
self.ydl.urlopen(request).read()
|
|
||||||
except Exception:
|
|
||||||
self.to_screen('[%s] Heartbeat failed' % self.FD_NAME)
|
|
||||||
|
|
||||||
with heartbeat_lock:
|
|
||||||
if not download_complete:
|
|
||||||
timer[0] = threading.Timer(heartbeat_interval, heartbeat)
|
|
||||||
timer[0].start()
|
|
||||||
|
|
||||||
heartbeat_info_dict['ping']()
|
|
||||||
self.to_screen('[%s] Heartbeat with %d second interval ...' % (self.FD_NAME, heartbeat_interval))
|
|
||||||
try:
|
|
||||||
heartbeat()
|
|
||||||
if type(fd).__name__ == 'HlsFD':
|
|
||||||
info_dict.update(ie._extract_m3u8_formats(info_dict['url'], info_dict['id'])[0])
|
|
||||||
success = fd.real_download(filename, info_dict)
|
|
||||||
finally:
|
|
||||||
if heartbeat_lock:
|
|
||||||
with heartbeat_lock:
|
|
||||||
timer[0].cancel()
|
|
||||||
download_complete = True
|
|
||||||
return success
|
|
@ -3,11 +3,8 @@ from __future__ import unicode_literals
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
clean_html,
|
|
||||||
clean_podcast_url,
|
clean_podcast_url,
|
||||||
get_element_by_class,
|
|
||||||
int_or_none,
|
int_or_none,
|
||||||
parse_codecs,
|
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
try_get,
|
try_get,
|
||||||
)
|
)
|
||||||
@ -17,17 +14,16 @@ class ApplePodcastsIE(InfoExtractor):
|
|||||||
_VALID_URL = r'https?://podcasts\.apple\.com/(?:[^/]+/)?podcast(?:/[^/]+){1,2}.*?\bi=(?P<id>\d+)'
|
_VALID_URL = r'https?://podcasts\.apple\.com/(?:[^/]+/)?podcast(?:/[^/]+){1,2}.*?\bi=(?P<id>\d+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://podcasts.apple.com/us/podcast/207-whitney-webb-returns/id1135137367?i=1000482637777',
|
'url': 'https://podcasts.apple.com/us/podcast/207-whitney-webb-returns/id1135137367?i=1000482637777',
|
||||||
'md5': '41dc31cd650143e530d9423b6b5a344f',
|
'md5': 'df02e6acb11c10e844946a39e7222b08',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1000482637777',
|
'id': '1000482637777',
|
||||||
'ext': 'mp3',
|
'ext': 'mp3',
|
||||||
'title': '207 - Whitney Webb Returns',
|
'title': '207 - Whitney Webb Returns',
|
||||||
'description': 'md5:75ef4316031df7b41ced4e7b987f79c6',
|
'description': 'md5:13a73bade02d2e43737751e3987e1399',
|
||||||
'upload_date': '20200705',
|
'upload_date': '20200705',
|
||||||
'timestamp': 1593932400,
|
'timestamp': 1593921600,
|
||||||
'duration': 6454,
|
'duration': 6425,
|
||||||
'series': 'The Tim Dillon Show',
|
'series': 'The Tim Dillon Show',
|
||||||
'thumbnail': 're:.+[.](png|jpe?g|webp)',
|
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://podcasts.apple.com/podcast/207-whitney-webb-returns/id1135137367?i=1000482637777',
|
'url': 'https://podcasts.apple.com/podcast/207-whitney-webb-returns/id1135137367?i=1000482637777',
|
||||||
@ -43,40 +39,19 @@ class ApplePodcastsIE(InfoExtractor):
|
|||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
episode_id = self._match_id(url)
|
episode_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, episode_id)
|
webpage = self._download_webpage(url, episode_id)
|
||||||
episode_data = {}
|
|
||||||
ember_data = {}
|
|
||||||
# new page type 2021-11
|
|
||||||
amp_data = self._parse_json(self._search_regex(
|
|
||||||
r'(?s)id="shoebox-media-api-cache-amp-podcasts"[^>]*>\s*({.+?})\s*<',
|
|
||||||
webpage, 'AMP data', default='{}'), episode_id, fatal=False) or {}
|
|
||||||
amp_data = try_get(amp_data,
|
|
||||||
lambda a: self._parse_json(
|
|
||||||
next(a[x] for x in iter(a) if episode_id in x),
|
|
||||||
episode_id),
|
|
||||||
dict) or {}
|
|
||||||
amp_data = amp_data.get('d') or []
|
|
||||||
episode_data = try_get(
|
|
||||||
amp_data,
|
|
||||||
lambda a: next(x for x in a
|
|
||||||
if x['type'] == 'podcast-episodes' and x['id'] == episode_id),
|
|
||||||
dict)
|
|
||||||
if not episode_data:
|
|
||||||
# try pre 2021-11 page type: TODO: consider deleting if no longer used
|
|
||||||
ember_data = self._parse_json(self._search_regex(
|
ember_data = self._parse_json(self._search_regex(
|
||||||
r'(?s)id="shoebox-ember-data-store"[^>]*>\s*({.+?})\s*<',
|
r'id="shoebox-ember-data-store"[^>]*>\s*({.+?})\s*<',
|
||||||
webpage, 'ember data'), episode_id) or {}
|
webpage, 'ember data'), episode_id)
|
||||||
ember_data = ember_data.get(episode_id) or ember_data
|
ember_data = ember_data.get(episode_id) or ember_data
|
||||||
episode_data = try_get(ember_data, lambda x: x['data'], dict)
|
episode = ember_data['data']['attributes']
|
||||||
episode = episode_data['attributes']
|
|
||||||
description = episode.get('description') or {}
|
description = episode.get('description') or {}
|
||||||
|
|
||||||
series = None
|
series = None
|
||||||
for inc in (amp_data or ember_data.get('included') or []):
|
for inc in (ember_data.get('included') or []):
|
||||||
if inc.get('type') == 'media/podcast':
|
if inc.get('type') == 'media/podcast':
|
||||||
series = try_get(inc, lambda x: x['attributes']['name'])
|
series = try_get(inc, lambda x: x['attributes']['name'])
|
||||||
series = series or clean_html(get_element_by_class('podcast-header__identity', webpage))
|
|
||||||
|
|
||||||
info = [{
|
return {
|
||||||
'id': episode_id,
|
'id': episode_id,
|
||||||
'title': episode['name'],
|
'title': episode['name'],
|
||||||
'url': clean_podcast_url(episode['assetUrl']),
|
'url': clean_podcast_url(episode['assetUrl']),
|
||||||
@ -84,10 +59,4 @@ class ApplePodcastsIE(InfoExtractor):
|
|||||||
'timestamp': parse_iso8601(episode.get('releaseDateTime')),
|
'timestamp': parse_iso8601(episode.get('releaseDateTime')),
|
||||||
'duration': int_or_none(episode.get('durationInMilliseconds'), 1000),
|
'duration': int_or_none(episode.get('durationInMilliseconds'), 1000),
|
||||||
'series': series,
|
'series': series,
|
||||||
'thumbnail': self._og_search_thumbnail(webpage),
|
}
|
||||||
}]
|
|
||||||
self._sort_formats(info)
|
|
||||||
info = info[0]
|
|
||||||
codecs = parse_codecs(info.get('ext', 'mp3'))
|
|
||||||
info.update(codecs)
|
|
||||||
return info
|
|
||||||
|
@ -12,7 +12,6 @@ from ..utils import (
|
|||||||
ExtractorError,
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
qualities,
|
qualities,
|
||||||
strip_or_none,
|
|
||||||
try_get,
|
try_get,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
@ -253,49 +252,3 @@ class ArteTVPlaylistIE(ArteTVBaseIE):
|
|||||||
title = collection.get('title')
|
title = collection.get('title')
|
||||||
description = collection.get('shortDescription') or collection.get('teaserText')
|
description = collection.get('shortDescription') or collection.get('teaserText')
|
||||||
return self.playlist_result(entries, playlist_id, title, description)
|
return self.playlist_result(entries, playlist_id, title, description)
|
||||||
|
|
||||||
|
|
||||||
class ArteTVCategoryIE(ArteTVBaseIE):
|
|
||||||
_VALID_URL = r'https?://(?:www\.)?arte\.tv/(?P<lang>%s)/videos/(?P<id>[\w-]+(?:/[\w-]+)*)/?\s*$' % ArteTVBaseIE._ARTE_LANGUAGES
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'https://www.arte.tv/en/videos/politics-and-society/',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'politics-and-society',
|
|
||||||
'title': 'Politics and society',
|
|
||||||
'description': 'Investigative documentary series, geopolitical analysis, and international commentary',
|
|
||||||
},
|
|
||||||
'playlist_mincount': 13,
|
|
||||||
},
|
|
||||||
]
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def suitable(cls, url):
|
|
||||||
return (
|
|
||||||
not any(ie.suitable(url) for ie in (ArteTVIE, ArteTVPlaylistIE, ))
|
|
||||||
and super(ArteTVCategoryIE, cls).suitable(url))
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
lang, playlist_id = re.match(self._VALID_URL, url).groups()
|
|
||||||
webpage = self._download_webpage(url, playlist_id)
|
|
||||||
|
|
||||||
items = []
|
|
||||||
for video in re.finditer(
|
|
||||||
r'<a\b[^>]*?href\s*=\s*(?P<q>"|\'|\b)(?P<url>https?://www\.arte\.tv/%s/videos/[\w/-]+)(?P=q)' % lang,
|
|
||||||
webpage):
|
|
||||||
video = video.group('url')
|
|
||||||
if video == url:
|
|
||||||
continue
|
|
||||||
if any(ie.suitable(video) for ie in (ArteTVIE, ArteTVPlaylistIE, )):
|
|
||||||
items.append(video)
|
|
||||||
|
|
||||||
if items:
|
|
||||||
title = (self._og_search_title(webpage, default=None)
|
|
||||||
or self._html_search_regex(r'<title\b[^>]*>([^<]+)</title>', default=None))
|
|
||||||
title = strip_or_none(title.rsplit('|', 1)[0]) or self._generic_title(url)
|
|
||||||
|
|
||||||
result = self.playlist_from_matches(items, playlist_id=playlist_id, playlist_title=title)
|
|
||||||
if result:
|
|
||||||
description = self._og_search_description(webpage, default=None)
|
|
||||||
if description:
|
|
||||||
result['description'] = description
|
|
||||||
return result
|
|
||||||
|
@ -14,7 +14,7 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class AudiomackIE(InfoExtractor):
|
class AudiomackIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?audiomack\.com/(?:song/|(?=.+/song/))(?P<id>[\w/-]+)'
|
_VALID_URL = r'https?://(?:www\.)?audiomack\.com/song/(?P<id>[\w/-]+)'
|
||||||
IE_NAME = 'audiomack'
|
IE_NAME = 'audiomack'
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
# hosted on audiomack
|
# hosted on audiomack
|
||||||
@ -29,27 +29,25 @@ class AudiomackIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
# audiomack wrapper around soundcloud song
|
# audiomack wrapper around soundcloud song
|
||||||
# Needs new test URL.
|
|
||||||
{
|
{
|
||||||
'add_ie': ['Soundcloud'],
|
'add_ie': ['Soundcloud'],
|
||||||
'url': 'http://www.audiomack.com/song/hip-hop-daily/black-mamba-freestyle',
|
'url': 'http://www.audiomack.com/song/hip-hop-daily/black-mamba-freestyle',
|
||||||
'only_matching': True,
|
'info_dict': {
|
||||||
# 'info_dict': {
|
'id': '258901379',
|
||||||
# 'id': '258901379',
|
'ext': 'mp3',
|
||||||
# 'ext': 'mp3',
|
'description': 'mamba day freestyle for the legend Kobe Bryant ',
|
||||||
# 'description': 'mamba day freestyle for the legend Kobe Bryant ',
|
'title': 'Black Mamba Freestyle [Prod. By Danny Wolf]',
|
||||||
# 'title': 'Black Mamba Freestyle [Prod. By Danny Wolf]',
|
'uploader': 'ILOVEMAKONNEN',
|
||||||
# 'uploader': 'ILOVEMAKONNEN',
|
'upload_date': '20160414',
|
||||||
# 'upload_date': '20160414',
|
}
|
||||||
# }
|
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
# URLs end with [uploader name]/song/[uploader title]
|
# URLs end with [uploader name]/[uploader title]
|
||||||
# this title is whatever the user types in, and is rarely
|
# this title is whatever the user types in, and is rarely
|
||||||
# the proper song title. Real metadata is in the api response
|
# the proper song title. Real metadata is in the api response
|
||||||
album_url_tag = self._match_id(url).replace('/song/', '/')
|
album_url_tag = self._match_id(url)
|
||||||
|
|
||||||
# Request the extended version of the api for extra fields like artist and title
|
# Request the extended version of the api for extra fields like artist and title
|
||||||
api_response = self._download_json(
|
api_response = self._download_json(
|
||||||
@ -75,13 +73,13 @@ class AudiomackIE(InfoExtractor):
|
|||||||
|
|
||||||
|
|
||||||
class AudiomackAlbumIE(InfoExtractor):
|
class AudiomackAlbumIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?audiomack\.com/(?:album/|(?=.+/album/))(?P<id>[\w/-]+)'
|
_VALID_URL = r'https?://(?:www\.)?audiomack\.com/album/(?P<id>[\w/-]+)'
|
||||||
IE_NAME = 'audiomack:album'
|
IE_NAME = 'audiomack:album'
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
# Standard album playlist
|
# Standard album playlist
|
||||||
{
|
{
|
||||||
'url': 'http://www.audiomack.com/album/flytunezcom/tha-tour-part-2-mixtape',
|
'url': 'http://www.audiomack.com/album/flytunezcom/tha-tour-part-2-mixtape',
|
||||||
'playlist_count': 11,
|
'playlist_count': 15,
|
||||||
'info_dict':
|
'info_dict':
|
||||||
{
|
{
|
||||||
'id': '812251',
|
'id': '812251',
|
||||||
@ -97,24 +95,24 @@ class AudiomackAlbumIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
'playlist': [{
|
'playlist': [{
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'title': 'PPP (Pistol P Project) - 10. 4 Minutes Of Hell Part 4 (prod by DY OF 808 MAFIA)',
|
'title': 'PPP (Pistol P Project) - 9. Heaven or Hell (CHIMACA) ft Zuse (prod by DJ FU)',
|
||||||
'id': '837580',
|
'id': '837577',
|
||||||
'ext': 'mp3',
|
'ext': 'mp3',
|
||||||
'uploader': 'Lil Herb a.k.a. G Herbo',
|
'uploader': 'Lil Herb a.k.a. G Herbo',
|
||||||
}
|
}
|
||||||
}],
|
}],
|
||||||
'params': {
|
'params': {
|
||||||
'playliststart': 2,
|
'playliststart': 9,
|
||||||
'playlistend': 2,
|
'playlistend': 9,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
# URLs end with [uploader name]/album/[uploader title]
|
# URLs end with [uploader name]/[uploader title]
|
||||||
# this title is whatever the user types in, and is rarely
|
# this title is whatever the user types in, and is rarely
|
||||||
# the proper song title. Real metadata is in the api response
|
# the proper song title. Real metadata is in the api response
|
||||||
album_url_tag = self._match_id(url).replace('/album/', '/')
|
album_url_tag = self._match_id(url)
|
||||||
result = {'_type': 'playlist', 'entries': []}
|
result = {'_type': 'playlist', 'entries': []}
|
||||||
# There is no one endpoint for album metadata - instead it is included/repeated in each song's metadata
|
# There is no one endpoint for album metadata - instead it is included/repeated in each song's metadata
|
||||||
# Therefore we don't know how many songs the album has and must infi-loop until failure
|
# Therefore we don't know how many songs the album has and must infi-loop until failure
|
||||||
@ -136,7 +134,7 @@ class AudiomackAlbumIE(InfoExtractor):
|
|||||||
# Pull out the album metadata and add to result (if it exists)
|
# Pull out the album metadata and add to result (if it exists)
|
||||||
for resultkey, apikey in [('id', 'album_id'), ('title', 'album_title')]:
|
for resultkey, apikey in [('id', 'album_id'), ('title', 'album_title')]:
|
||||||
if apikey in api_response and resultkey not in result:
|
if apikey in api_response and resultkey not in result:
|
||||||
result[resultkey] = compat_str(api_response[apikey])
|
result[resultkey] = api_response[apikey]
|
||||||
song_id = url_basename(api_response['url']).rpartition('.')[0]
|
song_id = url_basename(api_response['url']).rpartition('.')[0]
|
||||||
result['entries'].append({
|
result['entries'].append({
|
||||||
'id': compat_str(api_response.get('id', song_id)),
|
'id': compat_str(api_response.get('id', song_id)),
|
||||||
|
@ -12,7 +12,6 @@ from ..compat import (
|
|||||||
compat_HTTPError,
|
compat_HTTPError,
|
||||||
compat_parse_qs,
|
compat_parse_qs,
|
||||||
compat_str,
|
compat_str,
|
||||||
compat_urllib_error,
|
|
||||||
compat_urllib_parse_urlparse,
|
compat_urllib_parse_urlparse,
|
||||||
compat_urlparse,
|
compat_urlparse,
|
||||||
)
|
)
|
||||||
@ -396,17 +395,9 @@ class BBCCoUkIE(InfoExtractor):
|
|||||||
formats.extend(self._extract_mpd_formats(
|
formats.extend(self._extract_mpd_formats(
|
||||||
href, programme_id, mpd_id=format_id, fatal=False))
|
href, programme_id, mpd_id=format_id, fatal=False))
|
||||||
elif transfer_format == 'hls':
|
elif transfer_format == 'hls':
|
||||||
# TODO: let expected_status be passed into _extract_xxx_formats() instead
|
formats.extend(self._extract_m3u8_formats(
|
||||||
try:
|
|
||||||
fmts = self._extract_m3u8_formats(
|
|
||||||
href, programme_id, ext='mp4', entry_protocol='m3u8_native',
|
href, programme_id, ext='mp4', entry_protocol='m3u8_native',
|
||||||
m3u8_id=format_id, fatal=False)
|
m3u8_id=format_id, fatal=False))
|
||||||
except ExtractorError as e:
|
|
||||||
if not (isinstance(e.exc_info[1], compat_urllib_error.HTTPError)
|
|
||||||
and e.exc_info[1].code in (403, 404)):
|
|
||||||
raise
|
|
||||||
fmts = []
|
|
||||||
formats.extend(fmts)
|
|
||||||
elif transfer_format == 'hds':
|
elif transfer_format == 'hds':
|
||||||
formats.extend(self._extract_f4m_formats(
|
formats.extend(self._extract_f4m_formats(
|
||||||
href, programme_id, f4m_id=format_id, fatal=False))
|
href, programme_id, f4m_id=format_id, fatal=False))
|
||||||
@ -784,33 +775,21 @@ class BBCIE(BBCCoUkIE):
|
|||||||
'timestamp': 1437785037,
|
'timestamp': 1437785037,
|
||||||
'upload_date': '20150725',
|
'upload_date': '20150725',
|
||||||
},
|
},
|
||||||
}, {
|
|
||||||
# video with window.__INITIAL_DATA__ and value as JSON string
|
|
||||||
'url': 'https://www.bbc.com/news/av/world-europe-59468682',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'p0b71qth',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Why France is making this woman a national hero',
|
|
||||||
'description': 'md5:7affdfab80e9c3a1f976230a1ff4d5e4',
|
|
||||||
'thumbnail': r're:https?://.+/.+\.jpg',
|
|
||||||
'timestamp': 1638230731,
|
|
||||||
'upload_date': '20211130',
|
|
||||||
},
|
|
||||||
}, {
|
}, {
|
||||||
# single video article embedded with data-media-vpid
|
# single video article embedded with data-media-vpid
|
||||||
'url': 'http://www.bbc.co.uk/sport/rowing/35908187',
|
'url': 'http://www.bbc.co.uk/sport/rowing/35908187',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
# bbcthreeConfig
|
|
||||||
'url': 'https://www.bbc.co.uk/bbcthree/clip/73d0bbd0-abc3-4cea-b3c0-cdae21905eb1',
|
'url': 'https://www.bbc.co.uk/bbcthree/clip/73d0bbd0-abc3-4cea-b3c0-cdae21905eb1',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'p06556y7',
|
'id': 'p06556y7',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Things Not To Say to people that live on council estates',
|
'title': 'Transfers: Cristiano Ronaldo to Man Utd, Arsenal to spend?',
|
||||||
'description': "From being labelled a 'chav', to the presumption that they're 'scroungers', people who live on council estates encounter all kinds of prejudices and false assumptions about themselves, their families, and their lifestyles. Here, eight people discuss the common statements, misconceptions, and clichés that they're tired of hearing.",
|
'description': 'md5:4b7dfd063d5a789a1512e99662be3ddd',
|
||||||
'duration': 360,
|
|
||||||
'thumbnail': r're:https?://.+/.+\.jpg',
|
|
||||||
},
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
}
|
||||||
}, {
|
}, {
|
||||||
# window.__PRELOADED_STATE__
|
# window.__PRELOADED_STATE__
|
||||||
'url': 'https://www.bbc.co.uk/radio/play/b0b9z4yl',
|
'url': 'https://www.bbc.co.uk/radio/play/b0b9z4yl',
|
||||||
@ -1183,16 +1162,9 @@ class BBCIE(BBCCoUkIE):
|
|||||||
return self.playlist_result(
|
return self.playlist_result(
|
||||||
entries, playlist_id, playlist_title, playlist_description)
|
entries, playlist_id, playlist_title, playlist_description)
|
||||||
|
|
||||||
initial_data = self._search_regex(
|
initial_data = self._parse_json(self._search_regex(
|
||||||
r'window\.__INITIAL_DATA__\s*=\s*("{.+?}")\s*;', webpage,
|
r'window\.__INITIAL_DATA__\s*=\s*({.+?});', webpage,
|
||||||
'quoted preload state', default=None)
|
'preload state', default='{}'), playlist_id, fatal=False)
|
||||||
if initial_data is None:
|
|
||||||
initial_data = self._search_regex(
|
|
||||||
r'window\.__INITIAL_DATA__\s*=\s*({.+?})\s*;', webpage,
|
|
||||||
'preload state', default={})
|
|
||||||
else:
|
|
||||||
initial_data = self._parse_json(initial_data or '"{}"', playlist_id, fatal=False)
|
|
||||||
initial_data = self._parse_json(initial_data, playlist_id, fatal=False)
|
|
||||||
if initial_data:
|
if initial_data:
|
||||||
def parse_media(media):
|
def parse_media(media):
|
||||||
if not media:
|
if not media:
|
||||||
@ -1233,10 +1205,7 @@ class BBCIE(BBCCoUkIE):
|
|||||||
if name == 'media-experience':
|
if name == 'media-experience':
|
||||||
parse_media(try_get(resp, lambda x: x['data']['initialItem']['mediaItem'], dict))
|
parse_media(try_get(resp, lambda x: x['data']['initialItem']['mediaItem'], dict))
|
||||||
elif name == 'article':
|
elif name == 'article':
|
||||||
for block in (try_get(resp,
|
for block in (try_get(resp, lambda x: x['data']['blocks'], list) or []):
|
||||||
(lambda x: x['data']['blocks'],
|
|
||||||
lambda x: x['data']['content']['model']['blocks'],),
|
|
||||||
list) or []):
|
|
||||||
if block.get('type') != 'media':
|
if block.get('type') != 'media':
|
||||||
continue
|
continue
|
||||||
parse_media(block.get('model'))
|
parse_media(block.get('model'))
|
||||||
|
@ -71,7 +71,6 @@ from .arte import (
|
|||||||
ArteTVIE,
|
ArteTVIE,
|
||||||
ArteTVEmbedIE,
|
ArteTVEmbedIE,
|
||||||
ArteTVPlaylistIE,
|
ArteTVPlaylistIE,
|
||||||
ArteTVCategoryIE,
|
|
||||||
)
|
)
|
||||||
from .arnes import ArnesIE
|
from .arnes import ArnesIE
|
||||||
from .asiancrush import (
|
from .asiancrush import (
|
||||||
@ -790,14 +789,7 @@ from .nick import (
|
|||||||
NickNightIE,
|
NickNightIE,
|
||||||
NickRuIE,
|
NickRuIE,
|
||||||
)
|
)
|
||||||
from .niconico import (
|
from .niconico import NiconicoIE, NiconicoPlaylistIE
|
||||||
NiconicoIE,
|
|
||||||
NiconicoPlaylistIE,
|
|
||||||
NiconicoUserIE,
|
|
||||||
NicovideoSearchIE,
|
|
||||||
NicovideoSearchDateIE,
|
|
||||||
NicovideoSearchURLIE,
|
|
||||||
)
|
|
||||||
from .ninecninemedia import NineCNineMediaIE
|
from .ninecninemedia import NineCNineMediaIE
|
||||||
from .ninegag import NineGagIE
|
from .ninegag import NineGagIE
|
||||||
from .ninenow import NineNowIE
|
from .ninenow import NineNowIE
|
||||||
|
@ -4,10 +4,8 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_urllib_parse_urlparse
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
determine_ext,
|
||||||
ExtractorError,
|
|
||||||
int_or_none,
|
int_or_none,
|
||||||
merge_dicts,
|
merge_dicts,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
@ -22,13 +20,13 @@ class NDRBaseIE(InfoExtractor):
|
|||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
display_id = next(group for group in mobj.groups() if group)
|
display_id = next(group for group in mobj.groups() if group)
|
||||||
webpage = self._download_webpage(url, display_id)
|
webpage = self._download_webpage(url, display_id)
|
||||||
return self._extract_embed(webpage, display_id, url)
|
return self._extract_embed(webpage, display_id)
|
||||||
|
|
||||||
|
|
||||||
class NDRIE(NDRBaseIE):
|
class NDRIE(NDRBaseIE):
|
||||||
IE_NAME = 'ndr'
|
IE_NAME = 'ndr'
|
||||||
IE_DESC = 'NDR.de - Norddeutscher Rundfunk'
|
IE_DESC = 'NDR.de - Norddeutscher Rundfunk'
|
||||||
_VALID_URL = r'https?://(?:\w+\.)*ndr\.de/(?:[^/]+/)*(?P<id>[^/?#]+),[\da-z]+\.html'
|
_VALID_URL = r'https?://(?:www\.)?ndr\.de/(?:[^/]+/)*(?P<id>[^/?#]+),[\da-z]+\.html'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
# httpVideo, same content id
|
# httpVideo, same content id
|
||||||
'url': 'http://www.ndr.de/fernsehen/Party-Poette-und-Parade,hafengeburtstag988.html',
|
'url': 'http://www.ndr.de/fernsehen/Party-Poette-und-Parade,hafengeburtstag988.html',
|
||||||
@ -40,14 +38,13 @@ class NDRIE(NDRBaseIE):
|
|||||||
'title': 'Party, Pötte und Parade',
|
'title': 'Party, Pötte und Parade',
|
||||||
'description': 'md5:ad14f9d2f91d3040b6930c697e5f6b4c',
|
'description': 'md5:ad14f9d2f91d3040b6930c697e5f6b4c',
|
||||||
'uploader': 'ndrtv',
|
'uploader': 'ndrtv',
|
||||||
'timestamp': 1431255671,
|
'timestamp': 1431108900,
|
||||||
'upload_date': '20150510',
|
'upload_date': '20150510',
|
||||||
'duration': 3498,
|
'duration': 3498,
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
'expected_warnings': ['Unable to download f4m manifest'],
|
|
||||||
}, {
|
}, {
|
||||||
# httpVideo, different content id
|
# httpVideo, different content id
|
||||||
'url': 'http://www.ndr.de/sport/fussball/40-Osnabrueck-spielt-sich-in-einen-Rausch,osna270.html',
|
'url': 'http://www.ndr.de/sport/fussball/40-Osnabrueck-spielt-sich-in-einen-Rausch,osna270.html',
|
||||||
@ -66,7 +63,6 @@ class NDRIE(NDRBaseIE):
|
|||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
'skip': 'No longer available',
|
|
||||||
}, {
|
}, {
|
||||||
# httpAudio, same content id
|
# httpAudio, same content id
|
||||||
'url': 'http://www.ndr.de/info/La-Valette-entgeht-der-Hinrichtung,audio51535.html',
|
'url': 'http://www.ndr.de/info/La-Valette-entgeht-der-Hinrichtung,audio51535.html',
|
||||||
@ -78,8 +74,8 @@ class NDRIE(NDRBaseIE):
|
|||||||
'title': 'La Valette entgeht der Hinrichtung',
|
'title': 'La Valette entgeht der Hinrichtung',
|
||||||
'description': 'md5:22f9541913a40fe50091d5cdd7c9f536',
|
'description': 'md5:22f9541913a40fe50091d5cdd7c9f536',
|
||||||
'uploader': 'ndrinfo',
|
'uploader': 'ndrinfo',
|
||||||
'timestamp': 1631711863,
|
'timestamp': 1290626100,
|
||||||
'upload_date': '20210915',
|
'upload_date': '20140729',
|
||||||
'duration': 884,
|
'duration': 884,
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
@ -93,10 +89,9 @@ class NDRIE(NDRBaseIE):
|
|||||||
'display_id': 'extra-3-Satiremagazin-mit-Christian-Ehring',
|
'display_id': 'extra-3-Satiremagazin-mit-Christian-Ehring',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Extra 3 vom 11.11.2020 mit Christian Ehring',
|
'title': 'Extra 3 vom 11.11.2020 mit Christian Ehring',
|
||||||
'description': 'md5:700f6de264010585012a72f97b0ac0c9',
|
'description': 'md5:42ee53990a715eaaf4dc7f13a3bd56c6',
|
||||||
'uploader': 'ndrtv',
|
'uploader': 'ndrtv',
|
||||||
'upload_date': '20201207',
|
'upload_date': '20201113',
|
||||||
'timestamp': 1614349457,
|
|
||||||
'duration': 1749,
|
'duration': 1749,
|
||||||
'subtitles': {
|
'subtitles': {
|
||||||
'de': [{
|
'de': [{
|
||||||
@ -114,38 +109,19 @@ class NDRIE(NDRBaseIE):
|
|||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _extract_embed(self, webpage, display_id, url):
|
def _extract_embed(self, webpage, display_id):
|
||||||
embed_url = (
|
embed_url = self._html_search_meta(
|
||||||
self._html_search_meta(
|
|
||||||
'embedURL', webpage, 'embed URL',
|
'embedURL', webpage, 'embed URL',
|
||||||
default=None)
|
default=None) or self._search_regex(
|
||||||
or self._search_regex(
|
|
||||||
r'\bembedUrl["\']\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
|
r'\bembedUrl["\']\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
|
||||||
'embed URL', group='url', default=None)
|
'embed URL', group='url')
|
||||||
or self._search_regex(
|
|
||||||
r'\bvar\s*sophoraID\s*=\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
|
|
||||||
'embed URL', group='url', default=''))
|
|
||||||
# some more work needed if we only found sophoraID
|
|
||||||
if re.match(r'^[a-z]+\d+$', embed_url):
|
|
||||||
# get the initial part of the url path,. eg /panorama/archiv/2022/
|
|
||||||
parsed_url = compat_urllib_parse_urlparse(url)
|
|
||||||
path = self._search_regex(r'(.+/)%s' % display_id, parsed_url.path or '', 'embed URL', default='')
|
|
||||||
# find tell-tale image with the actual ID
|
|
||||||
ndr_id = self._search_regex(r'%s([a-z]+\d+)(?!\.)\b' % (path, ), webpage, 'embed URL', default=None)
|
|
||||||
# or try to use special knowledge!
|
|
||||||
NDR_INFO_URL_TPL = 'https://www.ndr.de/info/%s-player.html'
|
|
||||||
embed_url = 'ndr:%s' % (ndr_id, ) if ndr_id else NDR_INFO_URL_TPL % (embed_url, )
|
|
||||||
if not embed_url:
|
|
||||||
raise ExtractorError('Unable to extract embedUrl')
|
|
||||||
|
|
||||||
description = self._search_regex(
|
description = self._search_regex(
|
||||||
r'<p[^>]+itemprop="description">([^<]+)</p>',
|
r'<p[^>]+itemprop="description">([^<]+)</p>',
|
||||||
webpage, 'description', default=None) or self._og_search_description(webpage)
|
webpage, 'description', default=None) or self._og_search_description(webpage)
|
||||||
timestamp = parse_iso8601(
|
timestamp = parse_iso8601(
|
||||||
self._search_regex(
|
self._search_regex(
|
||||||
(r'<span[^>]+itemprop="(?:datePublished|uploadDate)"[^>]+content="(?P<cont>[^"]+)"',
|
r'<span[^>]+itemprop="(?:datePublished|uploadDate)"[^>]+content="([^"]+)"',
|
||||||
r'\bvar\s*pdt\s*=\s*(?P<q>["\'])(?P<cont>(?:(?!(?P=q)).)+)(?P=q)', ),
|
webpage, 'upload date', default=None))
|
||||||
webpage, 'upload date', group='cont', default=None))
|
|
||||||
info = self._search_json_ld(webpage, display_id, default={})
|
info = self._search_json_ld(webpage, display_id, default={})
|
||||||
return merge_dicts({
|
return merge_dicts({
|
||||||
'_type': 'url_transparent',
|
'_type': 'url_transparent',
|
||||||
@ -177,19 +153,19 @@ class NJoyIE(NDRBaseIE):
|
|||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
'skip': 'No longer available',
|
|
||||||
}, {
|
}, {
|
||||||
# httpVideo, different content id
|
# httpVideo, different content id
|
||||||
'url': 'http://www.n-joy.de/musik/Das-frueheste-DJ-Set-des-Nordens-live-mit-Felix-Jaehn-,felixjaehn168.html',
|
'url': 'http://www.n-joy.de/musik/Das-frueheste-DJ-Set-des-Nordens-live-mit-Felix-Jaehn-,felixjaehn168.html',
|
||||||
'md5': '417660fffa90e6df2fda19f1b40a64d8',
|
'md5': '417660fffa90e6df2fda19f1b40a64d8',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'livestream283',
|
'id': 'dockville882',
|
||||||
'display_id': 'Das-frueheste-DJ-Set-des-Nordens-live-mit-Felix-Jaehn-',
|
'display_id': 'Das-frueheste-DJ-Set-des-Nordens-live-mit-Felix-Jaehn-',
|
||||||
'ext': 'mp3',
|
'ext': 'mp4',
|
||||||
'title': 'Das frueheste DJ Set des Nordens live mit Felix Jaehn',
|
'title': '"Ich hab noch nie" mit Felix Jaehn',
|
||||||
'description': 'md5:681698f527b8601e511e7b79edde7d2c',
|
'description': 'md5:85dd312d53be1b99e1f998a16452a2f3',
|
||||||
'uploader': 'njoy',
|
'uploader': 'njoy',
|
||||||
'upload_date': '20210830',
|
'upload_date': '20150822',
|
||||||
|
'duration': 211,
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
@ -199,25 +175,18 @@ class NJoyIE(NDRBaseIE):
|
|||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _extract_embed(self, webpage, display_id, url=None):
|
def _extract_embed(self, webpage, display_id):
|
||||||
# find tell-tale URL with the actual ID, or ...
|
|
||||||
video_id = self._search_regex(
|
video_id = self._search_regex(
|
||||||
(r'''\bsrc\s*=\s*["']?(?:/\w+)+/([a-z]+\d+)(?!\.)\b''',
|
r'<iframe[^>]+id="pp_([\da-z]+)"', webpage, 'embed id')
|
||||||
r'<iframe[^>]+id="pp_([\da-z]+)"', ),
|
description = self._search_regex(
|
||||||
webpage, 'NDR id', default=None)
|
|
||||||
|
|
||||||
description = (
|
|
||||||
self._html_search_meta('description', webpage)
|
|
||||||
or self._search_regex(
|
|
||||||
r'<div[^>]+class="subline"[^>]*>[^<]+</div>\s*<p>([^<]+)</p>',
|
r'<div[^>]+class="subline"[^>]*>[^<]+</div>\s*<p>([^<]+)</p>',
|
||||||
webpage, 'description', fatal=False))
|
webpage, 'description', fatal=False)
|
||||||
return {
|
return {
|
||||||
'_type': 'url_transparent',
|
'_type': 'url_transparent',
|
||||||
'ie_key': 'NDREmbedBase',
|
'ie_key': 'NDREmbedBase',
|
||||||
'url': 'ndr:%s' % video_id,
|
'url': 'ndr:%s' % video_id,
|
||||||
'display_id': display_id,
|
'display_id': display_id,
|
||||||
'description': description,
|
'description': description,
|
||||||
'title': display_id.replace('-', ' ').strip(),
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -322,7 +291,7 @@ class NDREmbedBaseIE(InfoExtractor):
|
|||||||
|
|
||||||
class NDREmbedIE(NDREmbedBaseIE):
|
class NDREmbedIE(NDREmbedBaseIE):
|
||||||
IE_NAME = 'ndr:embed'
|
IE_NAME = 'ndr:embed'
|
||||||
_VALID_URL = r'https?://(?:\w+\.)*ndr\.de/(?:[^/]+/)*(?P<id>[\da-z]+)-(?:(?:ard)?player|externalPlayer)\.html'
|
_VALID_URL = r'https?://(?:www\.)?ndr\.de/(?:[^/]+/)*(?P<id>[\da-z]+)-(?:player|externalPlayer)\.html'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.ndr.de/fernsehen/sendungen/ndr_aktuell/ndraktuell28488-player.html',
|
'url': 'http://www.ndr.de/fernsehen/sendungen/ndr_aktuell/ndraktuell28488-player.html',
|
||||||
'md5': '8b9306142fe65bbdefb5ce24edb6b0a9',
|
'md5': '8b9306142fe65bbdefb5ce24edb6b0a9',
|
||||||
@ -335,7 +304,6 @@ class NDREmbedIE(NDREmbedBaseIE):
|
|||||||
'upload_date': '20150907',
|
'upload_date': '20150907',
|
||||||
'duration': 132,
|
'duration': 132,
|
||||||
},
|
},
|
||||||
'skip': 'No longer available',
|
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.ndr.de/ndr2/events/soundcheck/soundcheck3366-player.html',
|
'url': 'http://www.ndr.de/ndr2/events/soundcheck/soundcheck3366-player.html',
|
||||||
'md5': '002085c44bae38802d94ae5802a36e78',
|
'md5': '002085c44bae38802d94ae5802a36e78',
|
||||||
@ -351,7 +319,6 @@ class NDREmbedIE(NDREmbedBaseIE):
|
|||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
'skip': 'No longer available',
|
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.ndr.de/info/audio51535-player.html',
|
'url': 'http://www.ndr.de/info/audio51535-player.html',
|
||||||
'md5': 'bb3cd38e24fbcc866d13b50ca59307b8',
|
'md5': 'bb3cd38e24fbcc866d13b50ca59307b8',
|
||||||
@ -361,7 +328,7 @@ class NDREmbedIE(NDREmbedBaseIE):
|
|||||||
'title': 'La Valette entgeht der Hinrichtung',
|
'title': 'La Valette entgeht der Hinrichtung',
|
||||||
'is_live': False,
|
'is_live': False,
|
||||||
'uploader': 'ndrinfo',
|
'uploader': 'ndrinfo',
|
||||||
'upload_date': '20210915',
|
'upload_date': '20140729',
|
||||||
'duration': 884,
|
'duration': 884,
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
@ -382,17 +349,15 @@ class NDREmbedIE(NDREmbedBaseIE):
|
|||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
'skip': 'No longer available',
|
|
||||||
}, {
|
}, {
|
||||||
# httpVideoLive
|
# httpVideoLive
|
||||||
'url': 'http://www.ndr.de/fernsehen/livestream/livestream217-externalPlayer.html',
|
'url': 'http://www.ndr.de/fernsehen/livestream/livestream217-externalPlayer.html',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'livestream217',
|
'id': 'livestream217',
|
||||||
'ext': 'mp4',
|
'ext': 'flv',
|
||||||
'title': r're:^NDR Fernsehen Niedersachsen \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
|
'title': r're:^NDR Fernsehen Niedersachsen \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
|
||||||
'is_live': True,
|
'is_live': True,
|
||||||
'upload_date': '20210409',
|
'upload_date': '20150910',
|
||||||
'uploader': 'ndrtv',
|
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
@ -430,10 +395,9 @@ class NJoyEmbedIE(NDREmbedBaseIE):
|
|||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Zehn Jahre Reeperbahn Festival - die Doku',
|
'title': 'Zehn Jahre Reeperbahn Festival - die Doku',
|
||||||
'is_live': False,
|
'is_live': False,
|
||||||
'upload_date': '20200826',
|
'upload_date': '20150807',
|
||||||
'duration': 1011,
|
'duration': 1011,
|
||||||
},
|
},
|
||||||
'expected_warnings': ['Unable to download f4m manifest'],
|
|
||||||
}, {
|
}, {
|
||||||
# httpAudio
|
# httpAudio
|
||||||
'url': 'http://www.n-joy.de/news_wissen/stefanrichter100-player_image-d5e938b1-f21a-4b9a-86b8-aaba8bca3a13_theme-n-joy.html',
|
'url': 'http://www.n-joy.de/news_wissen/stefanrichter100-player_image-d5e938b1-f21a-4b9a-86b8-aaba8bca3a13_theme-n-joy.html',
|
||||||
@ -450,7 +414,6 @@ class NJoyEmbedIE(NDREmbedBaseIE):
|
|||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
'skip': 'No longer available',
|
|
||||||
}, {
|
}, {
|
||||||
# httpAudioLive, no explicit ext
|
# httpAudioLive, no explicit ext
|
||||||
'url': 'http://www.n-joy.de/news_wissen/webradioweltweit100-player_image-3fec0484-2244-4565-8fb8-ed25fd28b173_theme-n-joy.html',
|
'url': 'http://www.n-joy.de/news_wissen/webradioweltweit100-player_image-3fec0484-2244-4565-8fb8-ed25fd28b173_theme-n-joy.html',
|
||||||
@ -460,7 +423,7 @@ class NJoyEmbedIE(NDREmbedBaseIE):
|
|||||||
'title': r're:^N-JOY Weltweit \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
|
'title': r're:^N-JOY Weltweit \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
|
||||||
'is_live': True,
|
'is_live': True,
|
||||||
'uploader': 'njoy',
|
'uploader': 'njoy',
|
||||||
'upload_date': '20210830',
|
'upload_date': '20150810',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
|
@ -2,28 +2,25 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import datetime
|
import datetime
|
||||||
import itertools
|
import functools
|
||||||
import json
|
import json
|
||||||
import re
|
import math
|
||||||
|
|
||||||
from .common import InfoExtractor, SearchInfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..postprocessor.ffmpeg import FFmpegPostProcessor
|
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_parse_qs,
|
compat_parse_qs,
|
||||||
compat_str,
|
|
||||||
compat_urllib_parse_urlparse,
|
compat_urllib_parse_urlparse,
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
determine_ext,
|
||||||
dict_get,
|
dict_get,
|
||||||
|
ExtractorError,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
|
InAdvancePagedList,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
OnDemandPagedList,
|
|
||||||
parse_duration,
|
parse_duration,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
PostProcessingError,
|
|
||||||
remove_start,
|
remove_start,
|
||||||
str_or_none,
|
|
||||||
try_get,
|
try_get,
|
||||||
unified_timestamp,
|
unified_timestamp,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
@ -37,7 +34,7 @@ class NiconicoIE(InfoExtractor):
|
|||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.nicovideo.jp/watch/sm22312215',
|
'url': 'http://www.nicovideo.jp/watch/sm22312215',
|
||||||
'md5': 'a5bad06f1347452102953f323c69da34s',
|
'md5': 'd1a75c0823e2f629128c43e1212760f9',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'sm22312215',
|
'id': 'sm22312215',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
@ -160,34 +157,11 @@ class NiconicoIE(InfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'http://sp.nicovideo.jp/watch/sm28964488?ss_pos=1&cp_in=wt_tg',
|
'url': 'http://sp.nicovideo.jp/watch/sm28964488?ss_pos=1&cp_in=wt_tg',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}, {
|
|
||||||
# DMC video with heartbeat
|
|
||||||
'url': 'https://www.nicovideo.jp/watch/sm34815188',
|
|
||||||
'md5': '9360c6e1f1519d7759e2fe8e1326ae83',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'sm34815188',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'md5:aee93e9f3366db72f902f6cd5d389cb7',
|
|
||||||
'description': 'md5:7b9149fc7a00ab053cafaf5c19662704',
|
|
||||||
'thumbnail': r're:https?://.*',
|
|
||||||
'uploader': 'md5:2762e18fa74dbb40aa1ad27c6291ee32',
|
|
||||||
'uploader_id': '67449889',
|
|
||||||
'upload_date': '20190322',
|
|
||||||
'timestamp': int, # timestamp is unstable
|
|
||||||
'duration': 1082.0,
|
|
||||||
'view_count': int,
|
|
||||||
'comment_count': int,
|
|
||||||
},
|
|
||||||
}]
|
}]
|
||||||
|
|
||||||
_VALID_URL = r'https?://(?:www\.|secure\.|sp\.)?nicovideo\.jp/watch/(?P<id>(?:[a-z]{2})?[0-9]+)'
|
_VALID_URL = r'https?://(?:www\.|secure\.|sp\.)?nicovideo\.jp/watch/(?P<id>(?:[a-z]{2})?[0-9]+)'
|
||||||
_NETRC_MACHINE = 'niconico'
|
_NETRC_MACHINE = 'niconico'
|
||||||
|
|
||||||
_API_HEADERS = {
|
|
||||||
'X-Frontend-ID': '6',
|
|
||||||
'X-Frontend-Version': '0'
|
|
||||||
}
|
|
||||||
|
|
||||||
def _real_initialize(self):
|
def _real_initialize(self):
|
||||||
self._login()
|
self._login()
|
||||||
|
|
||||||
@ -217,89 +191,37 @@ class NiconicoIE(InfoExtractor):
|
|||||||
self._downloader.report_warning('unable to log in: bad username or password')
|
self._downloader.report_warning('unable to log in: bad username or password')
|
||||||
return login_ok
|
return login_ok
|
||||||
|
|
||||||
def _get_heartbeat_info(self, info_dict):
|
def _extract_format_for_quality(self, api_data, video_id, audio_quality, video_quality):
|
||||||
|
def yesno(boolean):
|
||||||
|
return 'yes' if boolean else 'no'
|
||||||
|
|
||||||
video_id, video_src_id, audio_src_id = info_dict['url'].split(':')[1].split('/')
|
session_api_data = api_data['video']['dmcInfo']['session_api']
|
||||||
|
session_api_endpoint = session_api_data['urls'][0]
|
||||||
|
|
||||||
api_data = (
|
format_id = '-'.join(map(lambda s: remove_start(s['id'], 'archive_'), [video_quality, audio_quality]))
|
||||||
info_dict.get('_api_data')
|
|
||||||
or self._parse_json(
|
|
||||||
self._html_search_regex(
|
|
||||||
'data-api-data="([^"]+)"',
|
|
||||||
self._download_webpage('http://www.nicovideo.jp/watch/' + video_id, video_id),
|
|
||||||
'API data', default='{}'),
|
|
||||||
video_id))
|
|
||||||
|
|
||||||
session_api_data = try_get(api_data, lambda x: x['media']['delivery']['movie']['session'])
|
|
||||||
session_api_endpoint = try_get(session_api_data, lambda x: x['urls'][0])
|
|
||||||
|
|
||||||
def ping():
|
|
||||||
status = try_get(
|
|
||||||
self._download_json(
|
|
||||||
'https://nvapi.nicovideo.jp/v1/2ab0cbaa/watch', video_id,
|
|
||||||
query={'t': try_get(api_data, lambda x: x['media']['delivery']['trackingId'])},
|
|
||||||
note='Acquiring permission for downloading video',
|
|
||||||
headers=self._API_HEADERS),
|
|
||||||
lambda x: x['meta']['status'])
|
|
||||||
if status != 200:
|
|
||||||
self.report_warning('Failed to acquire permission for playing video. The video may not download.')
|
|
||||||
|
|
||||||
yesno = lambda x: 'yes' if x else 'no'
|
|
||||||
|
|
||||||
# m3u8 (encryption)
|
|
||||||
if try_get(api_data, lambda x: x['media']['delivery']['encryption']) is not None:
|
|
||||||
protocol = 'm3u8'
|
|
||||||
encryption = self._parse_json(session_api_data['token'], video_id)['hls_encryption']
|
|
||||||
session_api_http_parameters = {
|
|
||||||
'parameters': {
|
|
||||||
'hls_parameters': {
|
|
||||||
'encryption': {
|
|
||||||
encryption: {
|
|
||||||
'encrypted_key': try_get(api_data, lambda x: x['media']['delivery']['encryption']['encryptedKey']),
|
|
||||||
'key_uri': try_get(api_data, lambda x: x['media']['delivery']['encryption']['keyUri'])
|
|
||||||
}
|
|
||||||
},
|
|
||||||
'transfer_preset': '',
|
|
||||||
'use_ssl': yesno(session_api_endpoint['isSsl']),
|
|
||||||
'use_well_known_port': yesno(session_api_endpoint['isWellKnownPort']),
|
|
||||||
'segment_duration': 6000,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
# http
|
|
||||||
else:
|
|
||||||
protocol = 'http'
|
|
||||||
session_api_http_parameters = {
|
|
||||||
'parameters': {
|
|
||||||
'http_output_download_parameters': {
|
|
||||||
'use_ssl': yesno(session_api_endpoint['isSsl']),
|
|
||||||
'use_well_known_port': yesno(session_api_endpoint['isWellKnownPort']),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
session_response = self._download_json(
|
session_response = self._download_json(
|
||||||
session_api_endpoint['url'], video_id,
|
session_api_endpoint['url'], video_id,
|
||||||
query={'_format': 'json'},
|
query={'_format': 'json'},
|
||||||
headers={'Content-Type': 'application/json'},
|
headers={'Content-Type': 'application/json'},
|
||||||
note='Downloading JSON metadata for %s' % info_dict['format_id'],
|
note='Downloading JSON metadata for %s' % format_id,
|
||||||
data=json.dumps({
|
data=json.dumps({
|
||||||
'session': {
|
'session': {
|
||||||
'client_info': {
|
'client_info': {
|
||||||
'player_id': session_api_data.get('playerId'),
|
'player_id': session_api_data['player_id'],
|
||||||
},
|
},
|
||||||
'content_auth': {
|
'content_auth': {
|
||||||
'auth_type': try_get(session_api_data, lambda x: x['authTypes'][session_api_data['protocols'][0]]),
|
'auth_type': session_api_data['auth_types'][session_api_data['protocols'][0]],
|
||||||
'content_key_timeout': session_api_data.get('contentKeyTimeout'),
|
'content_key_timeout': session_api_data['content_key_timeout'],
|
||||||
'service_id': 'nicovideo',
|
'service_id': 'nicovideo',
|
||||||
'service_user_id': session_api_data.get('serviceUserId')
|
'service_user_id': session_api_data['service_user_id']
|
||||||
},
|
},
|
||||||
'content_id': session_api_data.get('contentId'),
|
'content_id': session_api_data['content_id'],
|
||||||
'content_src_id_sets': [{
|
'content_src_id_sets': [{
|
||||||
'content_src_ids': [{
|
'content_src_ids': [{
|
||||||
'src_id_to_mux': {
|
'src_id_to_mux': {
|
||||||
'audio_src_ids': [audio_src_id],
|
'audio_src_ids': [audio_quality['id']],
|
||||||
'video_src_ids': [video_src_id],
|
'video_src_ids': [video_quality['id']],
|
||||||
}
|
}
|
||||||
}]
|
}]
|
||||||
}],
|
}],
|
||||||
@ -307,81 +229,52 @@ class NiconicoIE(InfoExtractor):
|
|||||||
'content_uri': '',
|
'content_uri': '',
|
||||||
'keep_method': {
|
'keep_method': {
|
||||||
'heartbeat': {
|
'heartbeat': {
|
||||||
'lifetime': session_api_data.get('heartbeatLifetime')
|
'lifetime': session_api_data['heartbeat_lifetime']
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
'priority': session_api_data.get('priority'),
|
'priority': session_api_data['priority'],
|
||||||
'protocol': {
|
'protocol': {
|
||||||
'name': 'http',
|
'name': 'http',
|
||||||
'parameters': {
|
'parameters': {
|
||||||
'http_parameters': session_api_http_parameters
|
'http_parameters': {
|
||||||
|
'parameters': {
|
||||||
|
'http_output_download_parameters': {
|
||||||
|
'use_ssl': yesno(session_api_endpoint['is_ssl']),
|
||||||
|
'use_well_known_port': yesno(session_api_endpoint['is_well_known_port']),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
'recipe_id': session_api_data.get('recipeId'),
|
'recipe_id': session_api_data['recipe_id'],
|
||||||
'session_operation_auth': {
|
'session_operation_auth': {
|
||||||
'session_operation_auth_by_signature': {
|
'session_operation_auth_by_signature': {
|
||||||
'signature': session_api_data.get('signature'),
|
'signature': session_api_data['signature'],
|
||||||
'token': session_api_data.get('token'),
|
'token': session_api_data['token'],
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
'timing_constraint': 'unlimited'
|
'timing_constraint': 'unlimited'
|
||||||
}
|
}
|
||||||
}).encode())
|
}).encode())
|
||||||
|
|
||||||
info_dict['url'] = session_response['data']['session']['content_uri']
|
resolution = video_quality.get('resolution', {})
|
||||||
info_dict['protocol'] = protocol
|
|
||||||
|
|
||||||
# get heartbeat info
|
|
||||||
heartbeat_info_dict = {
|
|
||||||
'url': session_api_endpoint['url'] + '/' + session_response['data']['session']['id'] + '?_format=json&_method=PUT',
|
|
||||||
'data': json.dumps(session_response['data']),
|
|
||||||
# interval, convert milliseconds to seconds, then halve to make a buffer.
|
|
||||||
'interval': float_or_none(session_api_data.get('heartbeatLifetime'), scale=3000),
|
|
||||||
'ping': ping
|
|
||||||
}
|
|
||||||
|
|
||||||
return info_dict, heartbeat_info_dict
|
|
||||||
|
|
||||||
def _extract_format_for_quality(self, api_data, video_id, audio_quality, video_quality):
|
|
||||||
def parse_format_id(id_code):
|
|
||||||
mobj = re.match(r'''(?x)
|
|
||||||
(?:archive_)?
|
|
||||||
(?:(?P<codec>[^_]+)_)?
|
|
||||||
(?:(?P<br>[\d]+)kbps_)?
|
|
||||||
(?:(?P<res>[\d+]+)p_)?
|
|
||||||
''', '%s_' % id_code)
|
|
||||||
return mobj.groupdict() if mobj else {}
|
|
||||||
|
|
||||||
protocol = 'niconico_dmc'
|
|
||||||
format_id = '-'.join(map(lambda s: remove_start(s['id'], 'archive_'), [video_quality, audio_quality]))
|
|
||||||
vdict = parse_format_id(video_quality['id'])
|
|
||||||
adict = parse_format_id(audio_quality['id'])
|
|
||||||
resolution = try_get(video_quality, lambda x: x['metadata']['resolution'], dict) or {'height': vdict.get('res')}
|
|
||||||
vbr = try_get(video_quality, lambda x: x['metadata']['bitrate'], float)
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'url': '%s:%s/%s/%s' % (protocol, video_id, video_quality['id'], audio_quality['id']),
|
'url': session_response['data']['session']['content_uri'],
|
||||||
'format_id': format_id,
|
'format_id': format_id,
|
||||||
'format_note': 'DMC %s' % try_get(video_quality, lambda x: x['metadata']['label'], compat_str),
|
|
||||||
'ext': 'mp4', # Session API are used in HTML5, which always serves mp4
|
'ext': 'mp4', # Session API are used in HTML5, which always serves mp4
|
||||||
'vcodec': vdict.get('codec'),
|
'abr': float_or_none(audio_quality.get('bitrate'), 1000),
|
||||||
'acodec': adict.get('codec'),
|
'vbr': float_or_none(video_quality.get('bitrate'), 1000),
|
||||||
'vbr': float_or_none(vbr, 1000) or float_or_none(vdict.get('br')),
|
'height': resolution.get('height'),
|
||||||
'abr': float_or_none(audio_quality.get('bitrate'), 1000) or float_or_none(adict.get('br')),
|
'width': resolution.get('width'),
|
||||||
'height': int_or_none(resolution.get('height', vdict.get('res'))),
|
|
||||||
'width': int_or_none(resolution.get('width')),
|
|
||||||
'quality': -2 if 'low' in format_id else -1, # Default quality value is -1
|
|
||||||
'protocol': protocol,
|
|
||||||
'http_headers': {
|
|
||||||
'Origin': 'https://www.nicovideo.jp',
|
|
||||||
'Referer': 'https://www.nicovideo.jp/watch/' + video_id,
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
# Get video webpage for API data.
|
# Get video webpage. We are not actually interested in it for normal
|
||||||
|
# cases, but need the cookies in order to be able to download the
|
||||||
|
# info webpage
|
||||||
webpage, handle = self._download_webpage_handle(
|
webpage, handle = self._download_webpage_handle(
|
||||||
'http://www.nicovideo.jp/watch/' + video_id, video_id)
|
'http://www.nicovideo.jp/watch/' + video_id, video_id)
|
||||||
if video_id.startswith('so'):
|
if video_id.startswith('so'):
|
||||||
@ -391,15 +284,36 @@ class NiconicoIE(InfoExtractor):
|
|||||||
'data-api-data="([^"]+)"', webpage,
|
'data-api-data="([^"]+)"', webpage,
|
||||||
'API data', default='{}'), video_id)
|
'API data', default='{}'), video_id)
|
||||||
|
|
||||||
def get_video_info_web(items):
|
def _format_id_from_url(video_url):
|
||||||
return dict_get(api_data['video'], items)
|
return 'economy' if video_real_url.endswith('low') else 'normal'
|
||||||
|
|
||||||
|
try:
|
||||||
|
video_real_url = api_data['video']['smileInfo']['url']
|
||||||
|
except KeyError: # Flash videos
|
||||||
|
# Get flv info
|
||||||
|
flv_info_webpage = self._download_webpage(
|
||||||
|
'http://flapi.nicovideo.jp/api/getflv/' + video_id + '?as3=1',
|
||||||
|
video_id, 'Downloading flv info')
|
||||||
|
|
||||||
|
flv_info = compat_parse_qs(flv_info_webpage)
|
||||||
|
if 'url' not in flv_info:
|
||||||
|
if 'deleted' in flv_info:
|
||||||
|
raise ExtractorError('The video has been deleted.',
|
||||||
|
expected=True)
|
||||||
|
elif 'closed' in flv_info:
|
||||||
|
raise ExtractorError('Niconico videos now require logging in',
|
||||||
|
expected=True)
|
||||||
|
elif 'error' in flv_info:
|
||||||
|
raise ExtractorError('%s reports error: %s' % (
|
||||||
|
self.IE_NAME, flv_info['error'][0]), expected=True)
|
||||||
|
else:
|
||||||
|
raise ExtractorError('Unable to find video URL')
|
||||||
|
|
||||||
# Get video info
|
|
||||||
video_info_xml = self._download_xml(
|
video_info_xml = self._download_xml(
|
||||||
'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id,
|
'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id,
|
||||||
video_id, note='Downloading video info page')
|
video_id, note='Downloading video info page')
|
||||||
|
|
||||||
def get_video_info_xml(items):
|
def get_video_info(items):
|
||||||
if not isinstance(items, list):
|
if not isinstance(items, list):
|
||||||
items = [items]
|
items = [items]
|
||||||
for item in items:
|
for item in items:
|
||||||
@ -407,120 +321,49 @@ class NiconicoIE(InfoExtractor):
|
|||||||
if ret:
|
if ret:
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
if get_video_info_xml('error'):
|
video_real_url = flv_info['url'][0]
|
||||||
error_code = get_video_info_xml('code')
|
|
||||||
|
|
||||||
if error_code == 'DELETED':
|
extension = get_video_info('movie_type')
|
||||||
raise ExtractorError('The video has been deleted.',
|
if not extension:
|
||||||
expected=True)
|
extension = determine_ext(video_real_url)
|
||||||
elif error_code == 'NOT_FOUND':
|
|
||||||
raise ExtractorError('The video is not found.',
|
formats = [{
|
||||||
expected=True)
|
'url': video_real_url,
|
||||||
elif error_code == 'COMMUNITY':
|
'ext': extension,
|
||||||
self.to_screen('%s: The video is community members only.' % video_id)
|
'format_id': _format_id_from_url(video_real_url),
|
||||||
|
}]
|
||||||
else:
|
else:
|
||||||
raise ExtractorError('%s reports error: %s' % (self.IE_NAME, error_code))
|
|
||||||
|
|
||||||
# Start extracting video formats
|
|
||||||
formats = []
|
formats = []
|
||||||
|
|
||||||
# Get HTML5 videos info
|
dmc_info = api_data['video'].get('dmcInfo')
|
||||||
quality_info = try_get(api_data, lambda x: x['media']['delivery']['movie'])
|
if dmc_info: # "New" HTML5 videos
|
||||||
if not quality_info:
|
quality_info = dmc_info['quality']
|
||||||
raise ExtractorError('The video can\'t be downloaded', expected=True)
|
for audio_quality in quality_info['audios']:
|
||||||
|
for video_quality in quality_info['videos']:
|
||||||
for audio_quality in quality_info.get('audios') or {}:
|
if not audio_quality['available'] or not video_quality['available']:
|
||||||
for video_quality in quality_info.get('videos') or {}:
|
|
||||||
if not audio_quality.get('isAvailable') or not video_quality.get('isAvailable'):
|
|
||||||
continue
|
continue
|
||||||
formats.append(self._extract_format_for_quality(
|
formats.append(self._extract_format_for_quality(
|
||||||
api_data, video_id, audio_quality, video_quality))
|
api_data, video_id, audio_quality, video_quality))
|
||||||
|
|
||||||
# Get flv/swf info
|
|
||||||
timestamp = None
|
|
||||||
video_real_url = try_get(api_data, lambda x: x['video']['smileInfo']['url'])
|
|
||||||
if video_real_url:
|
|
||||||
is_economy = video_real_url.endswith('low')
|
|
||||||
|
|
||||||
if is_economy:
|
|
||||||
self.report_warning('Site is currently in economy mode! You will only have access to lower quality streams')
|
|
||||||
|
|
||||||
# Invoking ffprobe to determine resolution
|
|
||||||
pp = FFmpegPostProcessor(self._downloader)
|
|
||||||
cookies = self._get_cookies('https://nicovideo.jp').output(header='', sep='; path=/; domain=nicovideo.jp;\n')
|
|
||||||
|
|
||||||
self.to_screen('%s: %s' % (video_id, 'Checking smile format with ffprobe'))
|
|
||||||
|
|
||||||
try:
|
|
||||||
metadata = pp.get_metadata_object(video_real_url, ['-cookies', cookies])
|
|
||||||
except PostProcessingError as err:
|
|
||||||
raise ExtractorError(err.msg, expected=True)
|
|
||||||
|
|
||||||
v_stream = a_stream = {}
|
|
||||||
|
|
||||||
# Some complex swf files doesn't have video stream (e.g. nm4809023)
|
|
||||||
for stream in metadata['streams']:
|
|
||||||
if stream['codec_type'] == 'video':
|
|
||||||
v_stream = stream
|
|
||||||
elif stream['codec_type'] == 'audio':
|
|
||||||
a_stream = stream
|
|
||||||
|
|
||||||
# Community restricted videos seem to have issues with the thumb API not returning anything at all
|
|
||||||
filesize = int(
|
|
||||||
(get_video_info_xml('size_high') if not is_economy else get_video_info_xml('size_low'))
|
|
||||||
or metadata['format']['size']
|
|
||||||
)
|
|
||||||
extension = (
|
|
||||||
get_video_info_xml('movie_type')
|
|
||||||
or 'mp4' if 'mp4' in metadata['format']['format_name'] else metadata['format']['format_name']
|
|
||||||
)
|
|
||||||
|
|
||||||
# 'creation_time' tag on video stream of re-encoded SMILEVIDEO mp4 files are '1970-01-01T00:00:00.000000Z'.
|
|
||||||
timestamp = (
|
|
||||||
parse_iso8601(get_video_info_web('first_retrieve'))
|
|
||||||
or unified_timestamp(get_video_info_web('postedDateTime'))
|
|
||||||
)
|
|
||||||
metadata_timestamp = (
|
|
||||||
parse_iso8601(try_get(v_stream, lambda x: x['tags']['creation_time']))
|
|
||||||
or timestamp if extension != 'mp4' else 0
|
|
||||||
)
|
|
||||||
|
|
||||||
# According to compconf, smile videos from pre-2017 are always better quality than their DMC counterparts
|
|
||||||
smile_threshold_timestamp = parse_iso8601('2016-12-08T00:00:00+09:00')
|
|
||||||
|
|
||||||
is_source = timestamp < smile_threshold_timestamp or metadata_timestamp > 0
|
|
||||||
|
|
||||||
# If movie file size is unstable, old server movie is not source movie.
|
|
||||||
if filesize > 1:
|
|
||||||
formats.append({
|
|
||||||
'url': video_real_url,
|
|
||||||
'format_id': 'smile' if not is_economy else 'smile_low',
|
|
||||||
'format_note': 'SMILEVIDEO source' if not is_economy else 'SMILEVIDEO low quality',
|
|
||||||
'ext': extension,
|
|
||||||
'container': extension,
|
|
||||||
'vcodec': v_stream.get('codec_name'),
|
|
||||||
'acodec': a_stream.get('codec_name'),
|
|
||||||
# Some complex swf files doesn't have total bit rate metadata (e.g. nm6049209)
|
|
||||||
'tbr': int_or_none(metadata['format'].get('bit_rate'), scale=1000),
|
|
||||||
'vbr': int_or_none(v_stream.get('bit_rate'), scale=1000),
|
|
||||||
'abr': int_or_none(a_stream.get('bit_rate'), scale=1000),
|
|
||||||
'height': int_or_none(v_stream.get('height')),
|
|
||||||
'width': int_or_none(v_stream.get('width')),
|
|
||||||
'source_preference': 5 if not is_economy else -2,
|
|
||||||
'quality': 5 if is_source and not is_economy else None,
|
|
||||||
'filesize': filesize
|
|
||||||
})
|
|
||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
else: # "Old" HTML5 videos
|
||||||
|
formats = [{
|
||||||
|
'url': video_real_url,
|
||||||
|
'ext': 'mp4',
|
||||||
|
'format_id': _format_id_from_url(video_real_url),
|
||||||
|
}]
|
||||||
|
|
||||||
|
def get_video_info(items):
|
||||||
|
return dict_get(api_data['video'], items)
|
||||||
|
|
||||||
# Start extracting information
|
# Start extracting information
|
||||||
title = (
|
title = get_video_info('title')
|
||||||
get_video_info_xml('title') # prefer to get the untranslated original title
|
if not title:
|
||||||
or get_video_info_web(['originalTitle', 'title'])
|
title = self._og_search_title(webpage, default=None)
|
||||||
or self._og_search_title(webpage, default=None)
|
if not title:
|
||||||
or self._html_search_regex(
|
title = self._html_search_regex(
|
||||||
r'<span[^>]+class="videoHeaderTitle"[^>]*>([^<]+)</span>',
|
r'<span[^>]+class="videoHeaderTitle"[^>]*>([^<]+)</span>',
|
||||||
webpage, 'video title'))
|
webpage, 'video title')
|
||||||
|
|
||||||
watch_api_data_string = self._html_search_regex(
|
watch_api_data_string = self._html_search_regex(
|
||||||
r'<div[^>]+id="watchAPIDataContainer"[^>]+>([^<]+)</div>',
|
r'<div[^>]+id="watchAPIDataContainer"[^>]+>([^<]+)</div>',
|
||||||
@ -529,15 +372,14 @@ class NiconicoIE(InfoExtractor):
|
|||||||
video_detail = watch_api_data.get('videoDetail', {})
|
video_detail = watch_api_data.get('videoDetail', {})
|
||||||
|
|
||||||
thumbnail = (
|
thumbnail = (
|
||||||
self._html_search_regex(r'<meta property="og:image" content="([^"]+)">', webpage, 'thumbnail data', default=None)
|
get_video_info(['thumbnail_url', 'thumbnailURL'])
|
||||||
or dict_get( # choose highest from 720p to 240p
|
|
||||||
get_video_info_web('thumbnail'),
|
|
||||||
['ogp', 'player', 'largeUrl', 'middleUrl', 'url'])
|
|
||||||
or self._html_search_meta('image', webpage, 'thumbnail', default=None)
|
or self._html_search_meta('image', webpage, 'thumbnail', default=None)
|
||||||
or video_detail.get('thumbnail'))
|
or video_detail.get('thumbnail'))
|
||||||
|
|
||||||
description = get_video_info_web('description')
|
description = get_video_info('description')
|
||||||
|
|
||||||
|
timestamp = (parse_iso8601(get_video_info('first_retrieve'))
|
||||||
|
or unified_timestamp(get_video_info('postedDateTime')))
|
||||||
if not timestamp:
|
if not timestamp:
|
||||||
match = self._html_search_meta('datePublished', webpage, 'date published', default=None)
|
match = self._html_search_meta('datePublished', webpage, 'date published', default=None)
|
||||||
if match:
|
if match:
|
||||||
@ -546,25 +388,19 @@ class NiconicoIE(InfoExtractor):
|
|||||||
timestamp = parse_iso8601(
|
timestamp = parse_iso8601(
|
||||||
video_detail['postedAt'].replace('/', '-'),
|
video_detail['postedAt'].replace('/', '-'),
|
||||||
delimiter=' ', timezone=datetime.timedelta(hours=9))
|
delimiter=' ', timezone=datetime.timedelta(hours=9))
|
||||||
timestamp = timestamp or try_get(api_data, lambda x: parse_iso8601(x['video']['registeredAt']))
|
|
||||||
|
|
||||||
view_count = int_or_none(get_video_info_web(['view_counter', 'viewCount']))
|
view_count = int_or_none(get_video_info(['view_counter', 'viewCount']))
|
||||||
if not view_count:
|
if not view_count:
|
||||||
match = self._html_search_regex(
|
match = self._html_search_regex(
|
||||||
r'>Views: <strong[^>]*>([^<]+)</strong>',
|
r'>Views: <strong[^>]*>([^<]+)</strong>',
|
||||||
webpage, 'view count', default=None)
|
webpage, 'view count', default=None)
|
||||||
if match:
|
if match:
|
||||||
view_count = int_or_none(match.replace(',', ''))
|
view_count = int_or_none(match.replace(',', ''))
|
||||||
view_count = (
|
view_count = view_count or video_detail.get('viewCount')
|
||||||
view_count
|
|
||||||
or video_detail.get('viewCount')
|
|
||||||
or try_get(api_data, lambda x: x['video']['count']['view']))
|
|
||||||
|
|
||||||
comment_count = (
|
comment_count = (int_or_none(get_video_info('comment_num'))
|
||||||
int_or_none(get_video_info_web('comment_num'))
|
|
||||||
or video_detail.get('commentCount')
|
or video_detail.get('commentCount')
|
||||||
or try_get(api_data, lambda x: x['video']['count']['comment']))
|
or try_get(api_data, lambda x: x['thread']['commentCount']))
|
||||||
|
|
||||||
if not comment_count:
|
if not comment_count:
|
||||||
match = self._html_search_regex(
|
match = self._html_search_regex(
|
||||||
r'>Comments: <strong[^>]*>([^<]+)</strong>',
|
r'>Comments: <strong[^>]*>([^<]+)</strong>',
|
||||||
@ -573,41 +409,22 @@ class NiconicoIE(InfoExtractor):
|
|||||||
comment_count = int_or_none(match.replace(',', ''))
|
comment_count = int_or_none(match.replace(',', ''))
|
||||||
|
|
||||||
duration = (parse_duration(
|
duration = (parse_duration(
|
||||||
get_video_info_web('length')
|
get_video_info('length')
|
||||||
or self._html_search_meta(
|
or self._html_search_meta(
|
||||||
'video:duration', webpage, 'video duration', default=None))
|
'video:duration', webpage, 'video duration', default=None))
|
||||||
or video_detail.get('length')
|
or video_detail.get('length')
|
||||||
or get_video_info_web('duration'))
|
or get_video_info('duration'))
|
||||||
|
|
||||||
webpage_url = get_video_info_web('watch_url') or url
|
webpage_url = get_video_info('watch_url') or url
|
||||||
|
|
||||||
# for channel movie and community movie
|
|
||||||
channel_id = try_get(
|
|
||||||
api_data,
|
|
||||||
(lambda x: x['channel']['globalId'],
|
|
||||||
lambda x: x['community']['globalId']))
|
|
||||||
channel = try_get(
|
|
||||||
api_data,
|
|
||||||
(lambda x: x['channel']['name'],
|
|
||||||
lambda x: x['community']['name']))
|
|
||||||
|
|
||||||
# Note: cannot use api_data.get('owner', {}) because owner may be set to "null"
|
# Note: cannot use api_data.get('owner', {}) because owner may be set to "null"
|
||||||
# in the JSON, which will cause None to be returned instead of {}.
|
# in the JSON, which will cause None to be returned instead of {}.
|
||||||
owner = try_get(api_data, lambda x: x.get('owner'), dict) or {}
|
owner = try_get(api_data, lambda x: x.get('owner'), dict) or {}
|
||||||
uploader_id = str_or_none(
|
uploader_id = get_video_info(['ch_id', 'user_id']) or owner.get('id')
|
||||||
get_video_info_web(['ch_id', 'user_id'])
|
uploader = get_video_info(['ch_name', 'user_nickname']) or owner.get('nickname')
|
||||||
or owner.get('id')
|
|
||||||
or channel_id
|
|
||||||
)
|
|
||||||
uploader = (
|
|
||||||
get_video_info_web(['ch_name', 'user_nickname'])
|
|
||||||
or owner.get('nickname')
|
|
||||||
or channel
|
|
||||||
)
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'_api_data': api_data,
|
|
||||||
'title': title,
|
'title': title,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
@ -615,8 +432,6 @@ class NiconicoIE(InfoExtractor):
|
|||||||
'uploader': uploader,
|
'uploader': uploader,
|
||||||
'timestamp': timestamp,
|
'timestamp': timestamp,
|
||||||
'uploader_id': uploader_id,
|
'uploader_id': uploader_id,
|
||||||
'channel': channel,
|
|
||||||
'channel_id': channel_id,
|
|
||||||
'view_count': view_count,
|
'view_count': view_count,
|
||||||
'comment_count': comment_count,
|
'comment_count': comment_count,
|
||||||
'duration': duration,
|
'duration': duration,
|
||||||
@ -625,7 +440,7 @@ class NiconicoIE(InfoExtractor):
|
|||||||
|
|
||||||
|
|
||||||
class NiconicoPlaylistIE(InfoExtractor):
|
class NiconicoPlaylistIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?nicovideo\.jp/(?:user/\d+/|my/)?mylist/(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:www\.)?nicovideo\.jp/(?:user/\d+/)?mylist/(?P<id>\d+)'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.nicovideo.jp/mylist/27411728',
|
'url': 'http://www.nicovideo.jp/mylist/27411728',
|
||||||
@ -641,185 +456,60 @@ class NiconicoPlaylistIE(InfoExtractor):
|
|||||||
'url': 'https://www.nicovideo.jp/user/805442/mylist/27411728',
|
'url': 'https://www.nicovideo.jp/user/805442/mylist/27411728',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
_API_HEADERS = {
|
|
||||||
'X-Frontend-ID': '6',
|
|
||||||
'X-Frontend-Version': '0'
|
|
||||||
}
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
list_id = self._match_id(url)
|
|
||||||
|
|
||||||
def get_page_data(pagenum, pagesize):
|
|
||||||
return self._download_json(
|
|
||||||
'http://nvapi.nicovideo.jp/v2/mylists/' + list_id, list_id,
|
|
||||||
query={'page': 1 + pagenum, 'pageSize': pagesize},
|
|
||||||
headers=self._API_HEADERS).get('data').get('mylist')
|
|
||||||
|
|
||||||
data = get_page_data(0, 1)
|
|
||||||
title = data.get('name')
|
|
||||||
description = data.get('description')
|
|
||||||
uploader = data.get('owner').get('name')
|
|
||||||
uploader_id = data.get('owner').get('id')
|
|
||||||
|
|
||||||
def pagefunc(pagenum):
|
|
||||||
data = get_page_data(pagenum, 25)
|
|
||||||
return ({
|
|
||||||
'_type': 'url',
|
|
||||||
'url': 'http://www.nicovideo.jp/watch/' + item.get('watchId'),
|
|
||||||
} for item in data.get('items'))
|
|
||||||
|
|
||||||
return {
|
|
||||||
'_type': 'playlist',
|
|
||||||
'id': list_id,
|
|
||||||
'title': title,
|
|
||||||
'description': description,
|
|
||||||
'uploader': uploader,
|
|
||||||
'uploader_id': uploader_id,
|
|
||||||
'entries': OnDemandPagedList(pagefunc, 25),
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class NicovideoSearchBaseIE(InfoExtractor):
|
|
||||||
_MAX_RESULTS = float('inf')
|
|
||||||
|
|
||||||
def _entries(self, url, item_id, query=None, note='Downloading page %(page)s'):
|
|
||||||
query = query or {}
|
|
||||||
pages = [query['page']] if 'page' in query else itertools.count(1)
|
|
||||||
for page_num in pages:
|
|
||||||
query['page'] = str(page_num)
|
|
||||||
webpage = self._download_webpage(url, item_id, query=query, note=note % {'page': page_num})
|
|
||||||
results = re.findall(r'(?<=data-video-id=)["\']?(?P<videoid>.+?)(?=["\'])', webpage)
|
|
||||||
for item in results:
|
|
||||||
yield self.url_result('http://www.nicovideo.jp/watch/%s' % item, 'Niconico', item)
|
|
||||||
if not results:
|
|
||||||
break
|
|
||||||
|
|
||||||
def _get_n_results(self, query, n):
|
|
||||||
entries = self._entries(self._proto_relative_url('//www.nicovideo.jp/search/%s' % query), query)
|
|
||||||
if n < self._MAX_RESULTS:
|
|
||||||
entries = itertools.islice(entries, 0, n)
|
|
||||||
return self.playlist_result(entries, query, query)
|
|
||||||
|
|
||||||
|
|
||||||
class NicovideoSearchIE(NicovideoSearchBaseIE, SearchInfoExtractor):
|
|
||||||
IE_DESC = 'Nico video search'
|
|
||||||
IE_NAME = 'nicovideo:search'
|
|
||||||
_SEARCH_KEY = 'nicosearch'
|
|
||||||
|
|
||||||
def _search_results(self, query):
|
|
||||||
return self._entries(
|
|
||||||
self._proto_relative_url('//www.nicovideo.jp/search/%s' % query), query)
|
|
||||||
|
|
||||||
|
|
||||||
class NicovideoSearchURLIE(NicovideoSearchBaseIE):
|
|
||||||
IE_NAME = '%s_url' % NicovideoSearchIE.IE_NAME
|
|
||||||
IE_DESC = 'Nico video search URLs'
|
|
||||||
_VALID_URL = r'https?://(?:www\.)?nicovideo\.jp/search/(?P<id>[^?#&]+)?'
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'http://www.nicovideo.jp/search/sm9',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'sm9',
|
|
||||||
'title': 'sm9'
|
|
||||||
},
|
|
||||||
'playlist_mincount': 40,
|
|
||||||
}, {
|
|
||||||
'url': 'https://www.nicovideo.jp/search/sm9?sort=h&order=d&end=2020-12-31&start=2020-01-01',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'sm9',
|
|
||||||
'title': 'sm9'
|
|
||||||
},
|
|
||||||
'playlist_count': 31,
|
|
||||||
}]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
query = self._match_id(url)
|
|
||||||
return self.playlist_result(self._entries(url, query), query, query)
|
|
||||||
|
|
||||||
|
|
||||||
class NicovideoSearchDateIE(NicovideoSearchBaseIE, SearchInfoExtractor):
|
|
||||||
IE_DESC = 'Nico video search, newest first'
|
|
||||||
IE_NAME = '%s:date' % NicovideoSearchIE.IE_NAME
|
|
||||||
_SEARCH_KEY = 'nicosearchdate'
|
|
||||||
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'nicosearchdateall:a',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'a',
|
|
||||||
'title': 'a'
|
|
||||||
},
|
|
||||||
'playlist_mincount': 1610,
|
|
||||||
}]
|
|
||||||
|
|
||||||
_START_DATE = datetime.date(2007, 1, 1)
|
|
||||||
_RESULTS_PER_PAGE = 32
|
|
||||||
_MAX_PAGES = 50
|
|
||||||
|
|
||||||
def _entries(self, url, item_id, start_date=None, end_date=None):
|
|
||||||
start_date, end_date = start_date or self._START_DATE, end_date or datetime.datetime.now().date()
|
|
||||||
|
|
||||||
# If the last page has a full page of videos, we need to break down the query interval further
|
|
||||||
last_page_len = len(list(self._get_entries_for_date(
|
|
||||||
url, item_id, start_date, end_date, self._MAX_PAGES,
|
|
||||||
note='Checking number of videos from {0} to {1}'.format(start_date, end_date))))
|
|
||||||
if (last_page_len == self._RESULTS_PER_PAGE and start_date != end_date):
|
|
||||||
midpoint = start_date + ((end_date - start_date) // 2)
|
|
||||||
for entry in itertools.chain(
|
|
||||||
iter(self._entries(url, item_id, midpoint, end_date)),
|
|
||||||
iter(self._entries(url, item_id, start_date, midpoint))):
|
|
||||||
yield entry
|
|
||||||
else:
|
|
||||||
self.to_screen('{0}: Downloading results from {1} to {2}'.format(item_id, start_date, end_date))
|
|
||||||
for entry in iter(self._get_entries_for_date(
|
|
||||||
url, item_id, start_date, end_date, note=' Downloading page %(page)s')):
|
|
||||||
yield entry
|
|
||||||
|
|
||||||
def _get_entries_for_date(self, url, item_id, start_date, end_date=None, page_num=None, note=None):
|
|
||||||
query = {
|
|
||||||
'start': compat_str(start_date),
|
|
||||||
'end': compat_str(end_date or start_date),
|
|
||||||
'sort': 'f',
|
|
||||||
'order': 'd',
|
|
||||||
}
|
|
||||||
if page_num:
|
|
||||||
query['page'] = compat_str(page_num)
|
|
||||||
|
|
||||||
for entry in iter(super(NicovideoSearchDateIE, self)._entries(url, item_id, query=query, note=note)):
|
|
||||||
yield entry
|
|
||||||
|
|
||||||
|
|
||||||
class NiconicoUserIE(InfoExtractor):
|
|
||||||
_VALID_URL = r'https?://(?:www\.)?nicovideo\.jp/user/(?P<id>\d+)/?(?:$|[#?])'
|
|
||||||
_TEST = {
|
|
||||||
'url': 'https://www.nicovideo.jp/user/419948',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '419948',
|
|
||||||
},
|
|
||||||
'playlist_mincount': 101,
|
|
||||||
}
|
|
||||||
_API_URL = "https://nvapi.nicovideo.jp/v1/users/%s/videos?sortKey=registeredAt&sortOrder=desc&pageSize=%s&page=%s"
|
|
||||||
_PAGE_SIZE = 100
|
_PAGE_SIZE = 100
|
||||||
|
|
||||||
_API_HEADERS = {
|
def _call_api(self, list_id, resource, query):
|
||||||
'X-Frontend-ID': '6',
|
return self._download_json(
|
||||||
'X-Frontend-Version': '0'
|
'https://nvapi.nicovideo.jp/v2/mylists/' + list_id, list_id,
|
||||||
}
|
'Downloading %s JSON metatdata' % resource, query=query,
|
||||||
|
headers={'X-Frontend-Id': 6})['data']['mylist']
|
||||||
|
|
||||||
def _entries(self, list_id):
|
def _parse_owner(self, item):
|
||||||
total_count = 1
|
owner = item.get('owner') or {}
|
||||||
count = page_num = 0
|
if owner:
|
||||||
while count < total_count:
|
return {
|
||||||
json_parsed = self._download_json(
|
'uploader': owner.get('name'),
|
||||||
self._API_URL % (list_id, self._PAGE_SIZE, page_num + 1), list_id,
|
'uploader_id': owner.get('id'),
|
||||||
headers=self._API_HEADERS,
|
}
|
||||||
note='Downloading JSON metadata%s' % (' page %d' % page_num if page_num else ''))
|
return {}
|
||||||
if not page_num:
|
|
||||||
total_count = int_or_none(json_parsed['data'].get('totalCount'))
|
def _fetch_page(self, list_id, page):
|
||||||
for entry in json_parsed["data"]["items"]:
|
page += 1
|
||||||
count += 1
|
items = self._call_api(list_id, 'page %d' % page, {
|
||||||
yield self.url_result('https://www.nicovideo.jp/watch/%s' % entry['id'])
|
'page': page,
|
||||||
page_num += 1
|
'pageSize': self._PAGE_SIZE,
|
||||||
|
})['items']
|
||||||
|
for item in items:
|
||||||
|
video = item.get('video') or {}
|
||||||
|
video_id = video.get('id')
|
||||||
|
if not video_id:
|
||||||
|
continue
|
||||||
|
count = video.get('count') or {}
|
||||||
|
get_count = lambda x: int_or_none(count.get(x))
|
||||||
|
info = {
|
||||||
|
'_type': 'url',
|
||||||
|
'id': video_id,
|
||||||
|
'title': video.get('title'),
|
||||||
|
'url': 'https://www.nicovideo.jp/watch/' + video_id,
|
||||||
|
'description': video.get('shortDescription'),
|
||||||
|
'duration': int_or_none(video.get('duration')),
|
||||||
|
'view_count': get_count('view'),
|
||||||
|
'comment_count': get_count('comment'),
|
||||||
|
'ie_key': NiconicoIE.ie_key(),
|
||||||
|
}
|
||||||
|
info.update(self._parse_owner(video))
|
||||||
|
yield info
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
list_id = self._match_id(url)
|
list_id = self._match_id(url)
|
||||||
return self.playlist_result(self._entries(list_id), list_id)
|
mylist = self._call_api(list_id, 'list', {
|
||||||
|
'pageSize': 1,
|
||||||
|
})
|
||||||
|
entries = InAdvancePagedList(
|
||||||
|
functools.partial(self._fetch_page, list_id),
|
||||||
|
math.ceil(mylist['totalItemCount'] / self._PAGE_SIZE),
|
||||||
|
self._PAGE_SIZE)
|
||||||
|
result = self.playlist_result(
|
||||||
|
entries, list_id, mylist.get('name'), mylist.get('description'))
|
||||||
|
result.update(self._parse_owner(mylist))
|
||||||
|
return result
|
||||||
|
@ -1,16 +1,19 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from .jwplatform import JWPlatformIE
|
||||||
|
from .nexx import NexxIE
|
||||||
from ..compat import compat_urlparse
|
from ..compat import compat_urlparse
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
NO_DEFAULT,
|
||||||
extract_attributes,
|
smuggle_url,
|
||||||
)
|
)
|
||||||
|
|
||||||
from .dplay import DPlayIE
|
|
||||||
|
|
||||||
|
class Tele5IE(InfoExtractor):
|
||||||
class Tele5IE(DPlayIE):
|
|
||||||
_VALID_URL = r'https?://(?:www\.)?tele5\.de/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
_VALID_URL = r'https?://(?:www\.)?tele5\.de/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||||
_GEO_COUNTRIES = ['DE']
|
_GEO_COUNTRIES = ['DE']
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
@ -25,7 +28,6 @@ class Tele5IE(DPlayIE):
|
|||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
'skip': 'No longer available: "404 Seite nicht gefunden"',
|
|
||||||
}, {
|
}, {
|
||||||
# jwplatform, nexx unavailable
|
# jwplatform, nexx unavailable
|
||||||
'url': 'https://www.tele5.de/filme/ghoul-das-geheimnis-des-friedhofmonsters/',
|
'url': 'https://www.tele5.de/filme/ghoul-das-geheimnis-des-friedhofmonsters/',
|
||||||
@ -40,20 +42,7 @@ class Tele5IE(DPlayIE):
|
|||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
'skip': 'No longer available, redirects to Filme page',
|
'add_ie': [JWPlatformIE.ie_key()],
|
||||||
}, {
|
|
||||||
'url': 'https://tele5.de/mediathek/angel-of-mine/',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '1252360',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'upload_date': '20220109',
|
|
||||||
'timestamp': 1641762000,
|
|
||||||
'title': 'Angel of Mine',
|
|
||||||
'description': 'md5:a72546a175e1286eb3251843a52d1ad7',
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
'format': 'bestvideo',
|
|
||||||
},
|
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.tele5.de/kalkofes-mattscheibe/video-clips/politik-und-gesellschaft?ve_id=1551191',
|
'url': 'https://www.tele5.de/kalkofes-mattscheibe/video-clips/politik-und-gesellschaft?ve_id=1551191',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@ -75,18 +64,45 @@ class Tele5IE(DPlayIE):
|
|||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
|
||||||
webpage = self._download_webpage(url, video_id)
|
video_id = (qs.get('vid') or qs.get('ve_id') or [None])[0]
|
||||||
player_element = self._search_regex(r'(<hyoga-player\b[^>]+?>)', webpage, 'video player')
|
|
||||||
player_info = extract_attributes(player_element)
|
NEXX_ID_RE = r'\d{6,}'
|
||||||
asset_id, country, realm = (player_info[x] for x in ('assetid', 'locale', 'realm', ))
|
JWPLATFORM_ID_RE = r'[a-zA-Z0-9]{8}'
|
||||||
endpoint = compat_urlparse.urlparse(player_info['endpoint']).hostname
|
|
||||||
source_type = player_info.get('sourcetype')
|
def nexx_result(nexx_id):
|
||||||
if source_type:
|
return self.url_result(
|
||||||
endpoint = '%s-%s' % (source_type, endpoint)
|
'https://api.nexx.cloud/v3/759/videos/byid/%s' % nexx_id,
|
||||||
try:
|
ie=NexxIE.ie_key(), video_id=nexx_id)
|
||||||
return self._get_disco_api_info(url, asset_id, endpoint, realm, country)
|
|
||||||
except ExtractorError as e:
|
nexx_id = jwplatform_id = None
|
||||||
if getattr(e, 'message', '') == 'Missing deviceId in context':
|
|
||||||
raise ExtractorError('DRM protected', cause=e, expected=True)
|
if video_id:
|
||||||
raise
|
if re.match(NEXX_ID_RE, video_id):
|
||||||
|
return nexx_result(video_id)
|
||||||
|
elif re.match(JWPLATFORM_ID_RE, video_id):
|
||||||
|
jwplatform_id = video_id
|
||||||
|
|
||||||
|
if not nexx_id:
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
|
def extract_id(pattern, name, default=NO_DEFAULT):
|
||||||
|
return self._html_search_regex(
|
||||||
|
(r'id\s*=\s*["\']video-player["\'][^>]+data-id\s*=\s*["\'](%s)' % pattern,
|
||||||
|
r'\s+id\s*=\s*["\']player_(%s)' % pattern,
|
||||||
|
r'\bdata-id\s*=\s*["\'](%s)' % pattern), webpage, name,
|
||||||
|
default=default)
|
||||||
|
|
||||||
|
nexx_id = extract_id(NEXX_ID_RE, 'nexx id', default=None)
|
||||||
|
if nexx_id:
|
||||||
|
return nexx_result(nexx_id)
|
||||||
|
|
||||||
|
if not jwplatform_id:
|
||||||
|
jwplatform_id = extract_id(JWPLATFORM_ID_RE, 'jwplatform id')
|
||||||
|
|
||||||
|
return self.url_result(
|
||||||
|
smuggle_url(
|
||||||
|
'jwplatform:%s' % jwplatform_id,
|
||||||
|
{'geo_countries': self._GEO_COUNTRIES}),
|
||||||
|
ie=JWPlatformIE.ie_key(), video_id=jwplatform_id)
|
||||||
|
@ -95,6 +95,7 @@ class UOLIE(InfoExtractor):
|
|||||||
if v:
|
if v:
|
||||||
query[k] = v
|
query[k] = v
|
||||||
f_url = update_url_query(f_url, query)
|
f_url = update_url_query(f_url, query)
|
||||||
|
format_id = format_id
|
||||||
if format_id == 'HLS':
|
if format_id == 'HLS':
|
||||||
m3u8_formats = self._extract_m3u8_formats(
|
m3u8_formats = self._extract_m3u8_formats(
|
||||||
f_url, media_id, 'mp4', 'm3u8_native',
|
f_url, media_id, 'mp4', 'm3u8_native',
|
||||||
|
@ -4,11 +4,7 @@ from __future__ import unicode_literals
|
|||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
dict_get,
|
dict_get,
|
||||||
ExtractorError,
|
|
||||||
int_or_none,
|
int_or_none,
|
||||||
ISO639Utils,
|
|
||||||
parse_age_limit,
|
|
||||||
try_get,
|
|
||||||
unified_timestamp,
|
unified_timestamp,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -27,10 +23,9 @@ class URPlayIE(InfoExtractor):
|
|||||||
'upload_date': '20171214',
|
'upload_date': '20171214',
|
||||||
'series': 'UR Samtiden - Livet, universum och rymdens märkliga musik',
|
'series': 'UR Samtiden - Livet, universum och rymdens märkliga musik',
|
||||||
'duration': 2269,
|
'duration': 2269,
|
||||||
'categories': ['Vetenskap & teknik'],
|
'categories': ['Kultur & historia'],
|
||||||
'tags': ['Kritiskt tänkande', 'Vetenskap', 'Vetenskaplig verksamhet'],
|
'tags': ['Kritiskt tänkande', 'Vetenskap', 'Vetenskaplig verksamhet'],
|
||||||
'episode': 'Om vetenskap, kritiskt tänkande och motstånd',
|
'episode': 'Om vetenskap, kritiskt tänkande och motstånd',
|
||||||
'age_limit': 15,
|
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://urskola.se/Produkter/190031-Tripp-Trapp-Trad-Sovkudde',
|
'url': 'https://urskola.se/Produkter/190031-Tripp-Trapp-Trad-Sovkudde',
|
||||||
@ -55,19 +50,11 @@ class URPlayIE(InfoExtractor):
|
|||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
url = url.replace('skola.se/Produkter', 'play.se/program')
|
url = url.replace('skola.se/Produkter', 'play.se/program')
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
urplayer_data = self._search_regex(
|
vid = int(video_id)
|
||||||
r'(?s)\bid\s*=\s*"__NEXT_DATA__"[^>]*>\s*({.+?})\s*</script',
|
|
||||||
webpage, 'urplayer next data', fatal=False) or {}
|
|
||||||
if urplayer_data:
|
|
||||||
urplayer_data = self._parse_json(urplayer_data, video_id, fatal=False)
|
|
||||||
urplayer_data = try_get(urplayer_data, lambda x: x['props']['pageProps']['program'], dict)
|
|
||||||
if not urplayer_data:
|
|
||||||
raise ExtractorError('Unable to parse __NEXT_DATA__')
|
|
||||||
else:
|
|
||||||
accessible_episodes = self._parse_json(self._html_search_regex(
|
accessible_episodes = self._parse_json(self._html_search_regex(
|
||||||
r'data-react-class="routes/Product/components/ProgramContainer/ProgramContainer"[^>]+data-react-props="({.+?})"',
|
r'data-react-class="routes/Product/components/ProgramContainer/ProgramContainer"[^>]+data-react-props="({.+?})"',
|
||||||
webpage, 'urplayer data'), video_id)['accessibleEpisodes']
|
webpage, 'urplayer data'), video_id)['accessibleEpisodes']
|
||||||
urplayer_data = next(e for e in accessible_episodes if e.get('id') == int_or_none(video_id))
|
urplayer_data = next(e for e in accessible_episodes if e.get('id') == vid)
|
||||||
episode = urplayer_data['title']
|
episode = urplayer_data['title']
|
||||||
raw_streaming_info = urplayer_data['streamingInfo']['raw']
|
raw_streaming_info = urplayer_data['streamingInfo']['raw']
|
||||||
host = self._download_json(
|
host = self._download_json(
|
||||||
@ -85,30 +72,6 @@ class URPlayIE(InfoExtractor):
|
|||||||
video_id, skip_protocols=['f4m', 'rtmp', 'rtsp']))
|
video_id, skip_protocols=['f4m', 'rtmp', 'rtsp']))
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
subtitles = {}
|
|
||||||
|
|
||||||
def parse_lang_code(code):
|
|
||||||
"3-character language code or None (utils candidate)"
|
|
||||||
if code is None:
|
|
||||||
return
|
|
||||||
lang = code.lower()
|
|
||||||
if not ISO639Utils.long2short(lang):
|
|
||||||
lang = ISO639Utils.short2long(lang)
|
|
||||||
return lang or None
|
|
||||||
|
|
||||||
for k, v in (urplayer_data['streamingInfo'].get('sweComplete') or {}).items():
|
|
||||||
if (k in ('sd', 'hd') or not isinstance(v, dict)):
|
|
||||||
continue
|
|
||||||
lang, sttl_url = (v.get(kk) for kk in ('language', 'location', ))
|
|
||||||
if not sttl_url:
|
|
||||||
continue
|
|
||||||
lang = parse_lang_code(lang)
|
|
||||||
if not lang:
|
|
||||||
continue
|
|
||||||
sttl = subtitles.get(lang) or []
|
|
||||||
sttl.append({'ext': k, 'url': sttl_url, })
|
|
||||||
subtitles[lang] = sttl
|
|
||||||
|
|
||||||
image = urplayer_data.get('image') or {}
|
image = urplayer_data.get('image') or {}
|
||||||
thumbnails = []
|
thumbnails = []
|
||||||
for k, v in image.items():
|
for k, v in image.items():
|
||||||
@ -141,7 +104,4 @@ class URPlayIE(InfoExtractor):
|
|||||||
'season': series.get('label'),
|
'season': series.get('label'),
|
||||||
'episode': episode,
|
'episode': episode,
|
||||||
'episode_number': int_or_none(urplayer_data.get('episodeNumber')),
|
'episode_number': int_or_none(urplayer_data.get('episodeNumber')),
|
||||||
'age_limit': parse_age_limit(min(try_get(a, lambda x: x['from'], int) or 0
|
|
||||||
for a in urplayer_data.get('ageRanges', []))),
|
|
||||||
'subtitles': subtitles,
|
|
||||||
}
|
}
|
||||||
|
@ -10,7 +10,6 @@ from ..compat import (
|
|||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
determine_ext,
|
||||||
dict_get,
|
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
js_to_json,
|
js_to_json,
|
||||||
strip_jsonp,
|
strip_jsonp,
|
||||||
@ -23,10 +22,9 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class WDRIE(InfoExtractor):
|
class WDRIE(InfoExtractor):
|
||||||
__API_URL_TPL = '//deviceids-medp.wdr.de/ondemand/%s/%s'
|
_VALID_URL = r'https?://deviceids-medp\.wdr\.de/ondemand/\d+/(?P<id>\d+)\.js'
|
||||||
_VALID_URL = (r'(?:https?:' + __API_URL_TPL) % (r'\d+', r'(?=\d+\.js)|wdr:)(?P<id>\d{6,})')
|
|
||||||
_GEO_COUNTRIES = ['DE']
|
_GEO_COUNTRIES = ['DE']
|
||||||
_TESTS = [{
|
_TEST = {
|
||||||
'url': 'http://deviceids-medp.wdr.de/ondemand/155/1557833.js',
|
'url': 'http://deviceids-medp.wdr.de/ondemand/155/1557833.js',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'mdb-1557833',
|
'id': 'mdb-1557833',
|
||||||
@ -34,20 +32,11 @@ class WDRIE(InfoExtractor):
|
|||||||
'title': 'Biathlon-Staffel verpasst Podest bei Olympia-Generalprobe',
|
'title': 'Biathlon-Staffel verpasst Podest bei Olympia-Generalprobe',
|
||||||
'upload_date': '20180112',
|
'upload_date': '20180112',
|
||||||
},
|
},
|
||||||
},
|
}
|
||||||
]
|
|
||||||
|
|
||||||
def _asset_url(self, wdr_id):
|
|
||||||
id_len = max(len(wdr_id), 5)
|
|
||||||
return ''.join(('https:', self.__API_URL_TPL % (wdr_id[:id_len - 4], wdr_id, ), '.js'))
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
if url.startswith('wdr:'):
|
|
||||||
video_id = url[4:]
|
|
||||||
url = self._asset_url(video_id)
|
|
||||||
|
|
||||||
metadata = self._download_json(
|
metadata = self._download_json(
|
||||||
url, video_id, transform_source=strip_jsonp)
|
url, video_id, transform_source=strip_jsonp)
|
||||||
|
|
||||||
@ -126,10 +115,10 @@ class WDRIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class WDRPageIE(WDRIE):
|
class WDRPageIE(InfoExtractor):
|
||||||
_MAUS_REGEX = r'https?://(?:www\.)wdrmaus.de/(?:[^/]+/)*?(?P<maus_id>[^/?#.]+)(?:/?|/index\.php5|\.php5)$'
|
_CURRENT_MAUS_URL = r'https?://(?:www\.)wdrmaus.de/(?:[^/]+/){1,2}[^/?#]+\.php5'
|
||||||
_PAGE_REGEX = r'/(?:mediathek/)?(?:[^/]+/)*(?P<display_id>[^/]+)\.html'
|
_PAGE_REGEX = r'/(?:mediathek/)?(?:[^/]+/)*(?P<display_id>[^/]+)\.html'
|
||||||
_VALID_URL = r'https?://(?:www\d?\.)?(?:(?:kinder\.)?wdr\d?|sportschau)\.de' + _PAGE_REGEX + '|' + _MAUS_REGEX
|
_VALID_URL = r'https?://(?:www\d?\.)?(?:(?:kinder\.)?wdr\d?|sportschau)\.de' + _PAGE_REGEX + '|' + _CURRENT_MAUS_URL
|
||||||
|
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
@ -170,11 +159,11 @@ class WDRPageIE(WDRIE):
|
|||||||
{
|
{
|
||||||
'url': 'http://www1.wdr.de/mediathek/video/live/index.html',
|
'url': 'http://www1.wdr.de/mediathek/video/live/index.html',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'mdb-2296252',
|
'id': 'mdb-1406149',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': r're:^WDR Fernsehen im Livestream (?:\(nur in Deutschland erreichbar\) )?[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
'title': r're:^WDR Fernsehen im Livestream \(nur in Deutschland erreichbar\) [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||||
'alt_title': 'WDR Fernsehen Live',
|
'alt_title': 'WDR Fernsehen Live',
|
||||||
'upload_date': '20201112',
|
'upload_date': '20150101',
|
||||||
'is_live': True,
|
'is_live': True,
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
@ -183,7 +172,7 @@ class WDRPageIE(WDRIE):
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://www1.wdr.de/mediathek/video/sendungen/aktuelle-stunde/aktuelle-stunde-120.html',
|
'url': 'http://www1.wdr.de/mediathek/video/sendungen/aktuelle-stunde/aktuelle-stunde-120.html',
|
||||||
'playlist_mincount': 6,
|
'playlist_mincount': 7,
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'aktuelle-stunde-120',
|
'id': 'aktuelle-stunde-120',
|
||||||
},
|
},
|
||||||
@ -191,10 +180,10 @@ class WDRPageIE(WDRIE):
|
|||||||
{
|
{
|
||||||
'url': 'http://www.wdrmaus.de/aktuelle-sendung/index.php5',
|
'url': 'http://www.wdrmaus.de/aktuelle-sendung/index.php5',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'mdb-2627637',
|
'id': 'mdb-1552552',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'upload_date': 're:^[0-9]{8}$',
|
'upload_date': 're:^[0-9]{8}$',
|
||||||
'title': 're:^Die Sendung (?:mit der Maus )?vom [0-9.]{10}$',
|
'title': 're:^Die Sendung mit der Maus vom [0-9.]{10}$',
|
||||||
},
|
},
|
||||||
'skip': 'The id changes from week to week because of the new episode'
|
'skip': 'The id changes from week to week because of the new episode'
|
||||||
},
|
},
|
||||||
@ -207,7 +196,6 @@ class WDRPageIE(WDRIE):
|
|||||||
'upload_date': '20130919',
|
'upload_date': '20130919',
|
||||||
'title': 'Sachgeschichte - Achterbahn ',
|
'title': 'Sachgeschichte - Achterbahn ',
|
||||||
},
|
},
|
||||||
'skip': 'HTTP Error 404: Not Found',
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://www1.wdr.de/radio/player/radioplayer116~_layout-popupVersion.html',
|
'url': 'http://www1.wdr.de/radio/player/radioplayer116~_layout-popupVersion.html',
|
||||||
@ -233,7 +221,6 @@ class WDRPageIE(WDRIE):
|
|||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
'skip': 'HTTP Error 404: Not Found',
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://www.sportschau.de/handballem2018/audio-vorschau---die-handball-em-startet-mit-grossem-favoritenfeld-100.html',
|
'url': 'http://www.sportschau.de/handballem2018/audio-vorschau---die-handball-em-startet-mit-grossem-favoritenfeld-100.html',
|
||||||
@ -247,7 +234,7 @@ class WDRPageIE(WDRIE):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
display_id = dict_get(mobj.groupdict(), ('display_id', 'maus_id'), 'wdrmaus')
|
display_id = mobj.group('display_id')
|
||||||
webpage = self._download_webpage(url, display_id)
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
entries = []
|
entries = []
|
||||||
@ -273,14 +260,6 @@ class WDRPageIE(WDRIE):
|
|||||||
jsonp_url = try_get(
|
jsonp_url = try_get(
|
||||||
media_link_obj, lambda x: x['mediaObj']['url'], compat_str)
|
media_link_obj, lambda x: x['mediaObj']['url'], compat_str)
|
||||||
if jsonp_url:
|
if jsonp_url:
|
||||||
# metadata, or player JS with ['ref'] giving WDR id, or just media, perhaps
|
|
||||||
clip_id = media_link_obj['mediaObj'].get('ref')
|
|
||||||
if jsonp_url.endswith('.assetjsonp'):
|
|
||||||
asset = self._download_json(
|
|
||||||
jsonp_url, display_id, fatal=False, transform_source=strip_jsonp)
|
|
||||||
clip_id = try_get(asset, lambda x: x['trackerData']['trackerClipId'], compat_str)
|
|
||||||
if clip_id:
|
|
||||||
jsonp_url = self._asset_url(clip_id[4:])
|
|
||||||
entries.append(self.url_result(jsonp_url, ie=WDRIE.ie_key()))
|
entries.append(self.url_result(jsonp_url, ie=WDRIE.ie_key()))
|
||||||
|
|
||||||
# Playlist (e.g. https://www1.wdr.de/mediathek/video/sendungen/aktuelle-stunde/aktuelle-stunde-120.html)
|
# Playlist (e.g. https://www1.wdr.de/mediathek/video/sendungen/aktuelle-stunde/aktuelle-stunde-120.html)
|
||||||
@ -300,14 +279,16 @@ class WDRPageIE(WDRIE):
|
|||||||
class WDRElefantIE(InfoExtractor):
|
class WDRElefantIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)wdrmaus\.de/elefantenseite/#(?P<id>.+)'
|
_VALID_URL = r'https?://(?:www\.)wdrmaus\.de/elefantenseite/#(?P<id>.+)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.wdrmaus.de/elefantenseite/#elefantenkino_wippe',
|
'url': 'http://www.wdrmaus.de/elefantenseite/#folge_ostern_2015',
|
||||||
# adaptive stream: unstable file MD5
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'title': 'Wippe',
|
'title': 'Folge Oster-Spezial 2015',
|
||||||
'id': 'mdb-1198320',
|
'id': 'mdb-1088195',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'age_limit': None,
|
'age_limit': None,
|
||||||
'upload_date': '20071003'
|
'upload_date': '20150406'
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -342,7 +323,6 @@ class WDRMobileIE(InfoExtractor):
|
|||||||
/[0-9]+/[0-9]+/
|
/[0-9]+/[0-9]+/
|
||||||
(?P<id>[0-9]+)_(?P<title>[0-9]+)'''
|
(?P<id>[0-9]+)_(?P<title>[0-9]+)'''
|
||||||
IE_NAME = 'wdr:mobile'
|
IE_NAME = 'wdr:mobile'
|
||||||
_WORKING = False # no such domain
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://mobile-ondemand.wdr.de/CMS2010/mdb/ondemand/weltweit/fsk0/42/421735/421735_4283021.mp4',
|
'url': 'http://mobile-ondemand.wdr.de/CMS2010/mdb/ondemand/weltweit/fsk0/42/421735/421735_4283021.mp4',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
@ -82,7 +82,7 @@ class XVideosIE(InfoExtractor):
|
|||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
webpage = self._download_webpage(
|
webpage = self._download_webpage(
|
||||||
'https://www.xvideos.com/video%s/0' % video_id, video_id)
|
'https://www.xvideos.com/video%s/' % video_id, video_id)
|
||||||
|
|
||||||
mobj = re.search(r'<h1 class="inlineError">(.+?)</h1>', webpage)
|
mobj = re.search(r'<h1 class="inlineError">(.+?)</h1>', webpage)
|
||||||
if mobj:
|
if mobj:
|
||||||
|
@ -7,7 +7,6 @@ from .common import InfoExtractor
|
|||||||
from ..compat import compat_str
|
from ..compat import compat_str
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
determine_ext,
|
||||||
ExtractorError,
|
|
||||||
float_or_none,
|
float_or_none,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
merge_dicts,
|
merge_dicts,
|
||||||
@ -146,7 +145,6 @@ class ZDFIE(ZDFBaseIE):
|
|||||||
'timestamp': 1613948400,
|
'timestamp': 1613948400,
|
||||||
'upload_date': '20210221',
|
'upload_date': '20210221',
|
||||||
},
|
},
|
||||||
'skip': 'No longer available: "Diese Seite wurde leider nicht gefunden"',
|
|
||||||
}, {
|
}, {
|
||||||
# Same as https://www.3sat.de/film/ab-18/10-wochen-sommer-108.html
|
# Same as https://www.3sat.de/film/ab-18/10-wochen-sommer-108.html
|
||||||
'url': 'https://www.zdf.de/dokumentation/ab-18/10-wochen-sommer-102.html',
|
'url': 'https://www.zdf.de/dokumentation/ab-18/10-wochen-sommer-102.html',
|
||||||
@ -160,7 +158,6 @@ class ZDFIE(ZDFBaseIE):
|
|||||||
'timestamp': 1608604200,
|
'timestamp': 1608604200,
|
||||||
'upload_date': '20201222',
|
'upload_date': '20201222',
|
||||||
},
|
},
|
||||||
'skip': 'No longer available: "Diese Seite wurde leider nicht gefunden"',
|
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.zdf.de/dokumentation/terra-x/die-magie-der-farben-von-koenigspurpur-und-jeansblau-100.html',
|
'url': 'https://www.zdf.de/dokumentation/terra-x/die-magie-der-farben-von-koenigspurpur-und-jeansblau-100.html',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@ -193,17 +190,6 @@ class ZDFIE(ZDFBaseIE):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'https://www.zdf.de/dokumentation/planet-e/planet-e-uebersichtsseite-weitere-dokumentationen-von-planet-e-100.html',
|
'url': 'https://www.zdf.de/dokumentation/planet-e/planet-e-uebersichtsseite-weitere-dokumentationen-von-planet-e-100.html',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}, {
|
|
||||||
'url': 'https://www.zdf.de/arte/todliche-flucht/page-video-artede-toedliche-flucht-16-100.html',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'video_artede_083871-001-A',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Tödliche Flucht (1/6)',
|
|
||||||
'description': 'md5:e34f96a9a5f8abd839ccfcebad3d5315',
|
|
||||||
'duration': 3193.0,
|
|
||||||
'timestamp': 1641355200,
|
|
||||||
'upload_date': '20220105',
|
|
||||||
},
|
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _extract_entry(self, url, player, content, video_id):
|
def _extract_entry(self, url, player, content, video_id):
|
||||||
@ -211,18 +197,12 @@ class ZDFIE(ZDFBaseIE):
|
|||||||
|
|
||||||
t = content['mainVideoContent']['http://zdf.de/rels/target']
|
t = content['mainVideoContent']['http://zdf.de/rels/target']
|
||||||
|
|
||||||
def get_ptmd_path(d):
|
ptmd_path = t.get('http://zdf.de/rels/streams/ptmd')
|
||||||
return (
|
|
||||||
d.get('http://zdf.de/rels/streams/ptmd')
|
|
||||||
or d.get('http://zdf.de/rels/streams/ptmd-template',
|
|
||||||
'').replace('{playerId}', 'ngplayer_2_4'))
|
|
||||||
|
|
||||||
ptmd_path = get_ptmd_path(try_get(t, lambda x: x['streams']['default'], dict) or {})
|
|
||||||
if not ptmd_path:
|
|
||||||
ptmd_path = get_ptmd_path(t)
|
|
||||||
|
|
||||||
if not ptmd_path:
|
if not ptmd_path:
|
||||||
raise ExtractorError('Could not extract ptmd_path')
|
ptmd_path = t[
|
||||||
|
'http://zdf.de/rels/streams/ptmd-template'].replace(
|
||||||
|
'{playerId}', 'ngplayer_2_4')
|
||||||
|
|
||||||
info = self._extract_ptmd(
|
info = self._extract_ptmd(
|
||||||
urljoin(url, ptmd_path), video_id, player['apiToken'], url)
|
urljoin(url, ptmd_path), video_id, player['apiToken'], url)
|
||||||
|
Loading…
Reference in New Issue
Block a user