Compare commits

..

No commits in common. "a7260099873acc6dc7d76cafad2f6b139087afd0" and "a0df8a06178e530a1097f177a1faf1d2c609ac99" have entirely different histories.

8 changed files with 122 additions and 92 deletions

View File

@ -773,20 +773,11 @@ class YoutubeDL(object):
def extract_info(self, url, download=True, ie_key=None, extra_info={}, def extract_info(self, url, download=True, ie_key=None, extra_info={},
process=True, force_generic_extractor=False): process=True, force_generic_extractor=False):
""" '''
Return a list with a dictionary for each video extracted. Returns a list with a dictionary for each video we find.
If 'download', also downloads the videos.
Arguments: extra_info is a dict containing the extra values to add to each result
url -- URL to extract '''
Keyword arguments:
download -- whether to download videos during extraction
ie_key -- extractor key hint
extra_info -- dictionary containing the extra values to add to each result
process -- whether to resolve all unresolved references (URLs, playlist items),
must be True for download to work.
force_generic_extractor -- force using the generic extractor
"""
if not ie_key and force_generic_extractor: if not ie_key and force_generic_extractor:
ie_key = 'Generic' ie_key = 'Generic'

View File

@ -0,0 +1,86 @@
from __future__ import unicode_literals
import json
from .common import InfoExtractor
from ..utils import (
remove_start,
int_or_none,
)
class BlinkxIE(InfoExtractor):
_VALID_URL = r'(?:https?://(?:www\.)blinkx\.com/#?ce/|blinkx:)(?P<id>[^?]+)'
IE_NAME = 'blinkx'
_TEST = {
'url': 'http://www.blinkx.com/ce/Da0Gw3xc5ucpNduzLuDDlv4WC9PuI4fDi1-t6Y3LyfdY2SZS5Urbvn-UPJvrvbo8LTKTc67Wu2rPKSQDJyZeeORCR8bYkhs8lI7eqddznH2ofh5WEEdjYXnoRtj7ByQwt7atMErmXIeYKPsSDuMAAqJDlQZ-3Ff4HJVeH_s3Gh8oQ',
'md5': '337cf7a344663ec79bf93a526a2e06c7',
'info_dict': {
'id': 'Da0Gw3xc',
'ext': 'mp4',
'title': 'No Daily Show for John Oliver; HBO Show Renewed - IGN News',
'uploader': 'IGN News',
'upload_date': '20150217',
'timestamp': 1424215740,
'description': 'HBO has renewed Last Week Tonight With John Oliver for two more seasons.',
'duration': 47.743333,
},
}
def _real_extract(self, url):
video_id = self._match_id(url)
display_id = video_id[:8]
api_url = ('https://apib4.blinkx.com/api.php?action=play_video&'
+ 'video=%s' % video_id)
data_json = self._download_webpage(api_url, display_id)
data = json.loads(data_json)['api']['results'][0]
duration = None
thumbnails = []
formats = []
for m in data['media']:
if m['type'] == 'jpg':
thumbnails.append({
'url': m['link'],
'width': int(m['w']),
'height': int(m['h']),
})
elif m['type'] == 'original':
duration = float(m['d'])
elif m['type'] == 'youtube':
yt_id = m['link']
self.to_screen('Youtube video detected: %s' % yt_id)
return self.url_result(yt_id, 'Youtube', video_id=yt_id)
elif m['type'] in ('flv', 'mp4'):
vcodec = remove_start(m['vcodec'], 'ff')
acodec = remove_start(m['acodec'], 'ff')
vbr = int_or_none(m.get('vbr') or m.get('vbitrate'), 1000)
abr = int_or_none(m.get('abr') or m.get('abitrate'), 1000)
tbr = vbr + abr if vbr and abr else None
format_id = '%s-%sk-%s' % (vcodec, tbr, m['w'])
formats.append({
'format_id': format_id,
'url': m['link'],
'vcodec': vcodec,
'acodec': acodec,
'abr': abr,
'vbr': vbr,
'tbr': tbr,
'width': int_or_none(m.get('w')),
'height': int_or_none(m.get('h')),
})
self._sort_formats(formats)
return {
'id': display_id,
'fullid': video_id,
'title': data['title'],
'formats': formats,
'uploader': data['channel_name'],
'timestamp': data['pubdate_epoch'],
'description': data.get('description'),
'thumbnails': thumbnails,
'duration': duration,
}

View File

@ -32,18 +32,6 @@ class DigitallySpeakingIE(InfoExtractor):
# From http://www.gdcvault.com/play/1013700/Advanced-Material # From http://www.gdcvault.com/play/1013700/Advanced-Material
'url': 'http://sevt.dispeak.com/ubm/gdc/eur10/xml/11256_1282118587281VNIT.xml', 'url': 'http://sevt.dispeak.com/ubm/gdc/eur10/xml/11256_1282118587281VNIT.xml',
'only_matching': True, 'only_matching': True,
}, {
# From https://gdcvault.com/play/1016624, empty speakerVideo
'url': 'https://sevt.dispeak.com/ubm/gdc/online12/xml/201210-822101_1349794556671DDDD.xml',
'info_dict': {
'id': '201210-822101_1349794556671DDDD',
'ext': 'flv',
'title': 'Pre-launch - Preparing to Take the Plunge',
},
}, {
# From http://www.gdcvault.com/play/1014846/Conference-Keynote-Shigeru, empty slideVideo
'url': 'http://events.digitallyspeaking.com/gdc/project25/xml/p25-miyamoto1999_1282467389849HSVB.xml',
'only_matching': True,
}] }]
def _parse_mp4(self, metadata): def _parse_mp4(self, metadata):
@ -96,20 +84,26 @@ class DigitallySpeakingIE(InfoExtractor):
'vcodec': 'none', 'vcodec': 'none',
'format_id': audio.get('code'), 'format_id': audio.get('code'),
}) })
for video_key, format_id, preference in ( slide_video_path = xpath_text(metadata, './slideVideo', fatal=True)
('slide', 'slides', -2), ('speaker', 'speaker', -1)): formats.append({
video_path = xpath_text(metadata, './%sVideo' % video_key) 'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url,
if not video_path: 'play_path': remove_end(slide_video_path, '.flv'),
continue 'ext': 'flv',
formats.append({ 'format_note': 'slide deck video',
'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url, 'quality': -2,
'play_path': remove_end(video_path, '.flv'), 'preference': -2,
'ext': 'flv', 'format_id': 'slides',
'format_note': '%s video' % video_key, })
'quality': preference, speaker_video_path = xpath_text(metadata, './speakerVideo', fatal=True)
'preference': preference, formats.append({
'format_id': format_id, 'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url,
}) 'play_path': remove_end(speaker_video_path, '.flv'),
'ext': 'flv',
'format_note': 'speaker video',
'quality': -1,
'preference': -1,
'format_id': 'speaker',
})
return formats return formats
def _real_extract(self, url): def _real_extract(self, url):

View File

@ -132,6 +132,7 @@ from .bleacherreport import (
BleacherReportIE, BleacherReportIE,
BleacherReportCMSIE, BleacherReportCMSIE,
) )
from .blinkx import BlinkxIE
from .bloomberg import BloombergIE from .bloomberg import BloombergIE
from .bokecc import BokeCCIE from .bokecc import BokeCCIE
from .bongacams import BongaCamsIE from .bongacams import BongaCamsIE

View File

@ -16,7 +16,7 @@ from ..utils import (
class FunimationIE(InfoExtractor): class FunimationIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?funimation(?:\.com|now\.uk)/(?:[^/]+/)?shows/[^/]+/(?P<id>[^/?#&]+)' _VALID_URL = r'https?://(?:www\.)?funimation(?:\.com|now\.uk)/shows/[^/]+/(?P<id>[^/?#&]+)'
_NETRC_MACHINE = 'funimation' _NETRC_MACHINE = 'funimation'
_TOKEN = None _TOKEN = None
@ -51,10 +51,6 @@ class FunimationIE(InfoExtractor):
}, { }, {
'url': 'https://www.funimationnow.uk/shows/puzzle-dragons-x/drop-impact/simulcast/', 'url': 'https://www.funimationnow.uk/shows/puzzle-dragons-x/drop-impact/simulcast/',
'only_matching': True, 'only_matching': True,
}, {
# with lang code
'url': 'https://www.funimation.com/en/shows/hacksign/role-play/',
'only_matching': True,
}] }]
def _login(self): def _login(self):

View File

@ -6,7 +6,6 @@ from .common import InfoExtractor
from .kaltura import KalturaIE from .kaltura import KalturaIE
from ..utils import ( from ..utils import (
HEADRequest, HEADRequest,
remove_start,
sanitized_Request, sanitized_Request,
smuggle_url, smuggle_url,
urlencode_postdata, urlencode_postdata,
@ -103,26 +102,6 @@ class GDCVaultIE(InfoExtractor):
'format': 'mp4-408', 'format': 'mp4-408',
}, },
}, },
{
# Kaltura embed, whitespace between quote and embedded URL in iframe's src
'url': 'https://www.gdcvault.com/play/1025699',
'info_dict': {
'id': '0_zagynv0a',
'ext': 'mp4',
'title': 'Tech Toolbox',
'upload_date': '20190408',
'uploader_id': 'joe@blazestreaming.com',
'timestamp': 1554764629,
},
'params': {
'skip_download': True,
},
},
{
# HTML5 video
'url': 'http://www.gdcvault.com/play/1014846/Conference-Keynote-Shigeru',
'only_matching': True,
},
] ]
def _login(self, webpage_url, display_id): def _login(self, webpage_url, display_id):
@ -196,18 +175,7 @@ class GDCVaultIE(InfoExtractor):
xml_name = self._html_search_regex( xml_name = self._html_search_regex(
r'<iframe src=".*?\?xml(?:=|URL=xml/)(.+?\.xml).*?".*?</iframe>', r'<iframe src=".*?\?xml(?:=|URL=xml/)(.+?\.xml).*?".*?</iframe>',
start_page, 'xml filename', default=None) start_page, 'xml filename')
if not xml_name:
info = self._parse_html5_media_entries(url, start_page, video_id)[0]
info.update({
'title': remove_start(self._search_regex(
r'>Session Name:\s*<.*?>\s*<td>(.+?)</td>', start_page,
'title', default=None) or self._og_search_title(
start_page, default=None), 'GDC Vault - '),
'id': video_id,
'display_id': display_id,
})
return info
embed_url = '%s/xml/%s' % (xml_root, xml_name) embed_url = '%s/xml/%s' % (xml_root, xml_name)
ie_key = 'DigitallySpeaking' ie_key = 'DigitallySpeaking'

View File

@ -120,7 +120,7 @@ class KalturaIE(InfoExtractor):
def _extract_urls(webpage): def _extract_urls(webpage):
# Embed codes: https://knowledge.kaltura.com/embedding-kaltura-media-players-your-site # Embed codes: https://knowledge.kaltura.com/embedding-kaltura-media-players-your-site
finditer = ( finditer = (
list(re.finditer( re.finditer(
r"""(?xs) r"""(?xs)
kWidget\.(?:thumb)?[Ee]mbed\( kWidget\.(?:thumb)?[Ee]mbed\(
\{.*? \{.*?
@ -128,8 +128,8 @@ class KalturaIE(InfoExtractor):
(?P<q2>['"])_?(?P<partner_id>(?:(?!(?P=q2)).)+)(?P=q2),.*? (?P<q2>['"])_?(?P<partner_id>(?:(?!(?P=q2)).)+)(?P=q2),.*?
(?P<q3>['"])entry_?[Ii]d(?P=q3)\s*:\s* (?P<q3>['"])entry_?[Ii]d(?P=q3)\s*:\s*
(?P<q4>['"])(?P<id>(?:(?!(?P=q4)).)+)(?P=q4)(?:,|\s*\}) (?P<q4>['"])(?P<id>(?:(?!(?P=q4)).)+)(?P=q4)(?:,|\s*\})
""", webpage)) """, webpage)
or list(re.finditer( or re.finditer(
r'''(?xs) r'''(?xs)
(?P<q1>["']) (?P<q1>["'])
(?:https?:)?//cdnapi(?:sec)?\.kaltura\.com(?::\d+)?/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+)(?:(?!(?P=q1)).)* (?:https?:)?//cdnapi(?:sec)?\.kaltura\.com(?::\d+)?/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+)(?:(?!(?P=q1)).)*
@ -142,16 +142,16 @@ class KalturaIE(InfoExtractor):
\[\s*(?P<q2_1>["'])entry_?[Ii]d(?P=q2_1)\s*\]\s*=\s* \[\s*(?P<q2_1>["'])entry_?[Ii]d(?P=q2_1)\s*\]\s*=\s*
) )
(?P<q3>["'])(?P<id>(?:(?!(?P=q3)).)+)(?P=q3) (?P<q3>["'])(?P<id>(?:(?!(?P=q3)).)+)(?P=q3)
''', webpage)) ''', webpage)
or list(re.finditer( or re.finditer(
r'''(?xs) r'''(?xs)
<(?:iframe[^>]+src|meta[^>]+\bcontent)=(?P<q1>["'])\s* <(?:iframe[^>]+src|meta[^>]+\bcontent)=(?P<q1>["'])
(?:https?:)?//(?:(?:www|cdnapi(?:sec)?)\.)?kaltura\.com/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+) (?:https?:)?//(?:(?:www|cdnapi(?:sec)?)\.)?kaltura\.com/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+)
(?:(?!(?P=q1)).)* (?:(?!(?P=q1)).)*
[?&;]entry_id=(?P<id>(?:(?!(?P=q1))[^&])+) [?&;]entry_id=(?P<id>(?:(?!(?P=q1))[^&])+)
(?:(?!(?P=q1)).)* (?:(?!(?P=q1)).)*
(?P=q1) (?P=q1)
''', webpage)) ''', webpage)
) )
urls = [] urls = []
for mobj in finditer: for mobj in finditer:

View File

@ -15,7 +15,7 @@ from ..utils import (
class MedalTVIE(InfoExtractor): class MedalTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?medal\.tv/clips/(?P<id>[^/?#&]+)' _VALID_URL = r'https?://(?:www\.)?medal\.tv/clips/(?P<id>[a-zA-Z0-9]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://medal.tv/clips/2mA60jWAGQCBH', 'url': 'https://medal.tv/clips/2mA60jWAGQCBH',
'md5': '7b07b064331b1cf9e8e5c52a06ae68fa', 'md5': '7b07b064331b1cf9e8e5c52a06ae68fa',
@ -42,12 +42,6 @@ class MedalTVIE(InfoExtractor):
'upload_date': '20201117', 'upload_date': '20201117',
'uploader_id': '5156321', 'uploader_id': '5156321',
} }
}, {
'url': 'https://medal.tv/clips/37rMeFpryCC-9',
'only_matching': True,
}, {
'url': 'https://medal.tv/clips/2WRj40tpY_EU9',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):