mirror of
https://github.com/ytdl-org/youtube-dl.git
synced 2024-12-19 15:12:01 +00:00
Compare commits
2 Commits
ca304beb15
...
c5aa8f36bf
Author | SHA1 | Date | |
---|---|---|---|
|
c5aa8f36bf | ||
|
3748863070 |
101
youtube_dl/extractor/arnes.py
Normal file
101
youtube_dl/extractor/arnes.py
Normal file
@ -0,0 +1,101 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import (
|
||||||
|
compat_parse_qs,
|
||||||
|
compat_urllib_parse_urlparse,
|
||||||
|
)
|
||||||
|
from ..utils import (
|
||||||
|
float_or_none,
|
||||||
|
int_or_none,
|
||||||
|
parse_iso8601,
|
||||||
|
remove_start,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class ArnesIE(InfoExtractor):
|
||||||
|
IE_NAME = 'video.arnes.si'
|
||||||
|
IE_DESC = 'Arnes Video'
|
||||||
|
_VALID_URL = r'https?://video\.arnes\.si/(?:[a-z]{2}/)?(?:watch|embed|api/(?:asset|public/video))/(?P<id>[0-9a-zA-Z]{12})'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://video.arnes.si/watch/a1qrWTOQfVoU?t=10',
|
||||||
|
'md5': '4d0f4d0a03571b33e1efac25fd4a065d',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'a1qrWTOQfVoU',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Linearna neodvisnost, definicija',
|
||||||
|
'description': 'Linearna neodvisnost, definicija',
|
||||||
|
'license': 'PRIVATE',
|
||||||
|
'creator': 'Polona Oblak',
|
||||||
|
'timestamp': 1585063725,
|
||||||
|
'upload_date': '20200324',
|
||||||
|
'channel': 'Polona Oblak',
|
||||||
|
'channel_id': 'q6pc04hw24cj',
|
||||||
|
'channel_url': 'https://video.arnes.si/?channel=q6pc04hw24cj',
|
||||||
|
'duration': 596.75,
|
||||||
|
'view_count': int,
|
||||||
|
'tags': ['linearna_algebra'],
|
||||||
|
'start_time': 10,
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'https://video.arnes.si/api/asset/s1YjnV7hadlC/play.mp4',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://video.arnes.si/embed/s1YjnV7hadlC',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://video.arnes.si/en/watch/s1YjnV7hadlC',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://video.arnes.si/embed/s1YjnV7hadlC?t=123&hideRelated=1',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://video.arnes.si/api/public/video/s1YjnV7hadlC',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
_BASE_URL = 'https://video.arnes.si'
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
video = self._download_json(
|
||||||
|
self._BASE_URL + '/api/public/video/' + video_id, video_id)['data']
|
||||||
|
title = video['title']
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for media in (video.get('media') or []):
|
||||||
|
media_url = media.get('url')
|
||||||
|
if not media_url:
|
||||||
|
continue
|
||||||
|
formats.append({
|
||||||
|
'url': self._BASE_URL + media_url,
|
||||||
|
'format_id': remove_start(media.get('format'), 'FORMAT_'),
|
||||||
|
'format_note': media.get('formatTranslation'),
|
||||||
|
'width': int_or_none(media.get('width')),
|
||||||
|
'height': int_or_none(media.get('height')),
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
channel = video.get('channel') or {}
|
||||||
|
channel_id = channel.get('url')
|
||||||
|
thumbnail = video.get('thumbnailUrl')
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'formats': formats,
|
||||||
|
'thumbnail': self._BASE_URL + thumbnail,
|
||||||
|
'description': video.get('description'),
|
||||||
|
'license': video.get('license'),
|
||||||
|
'creator': video.get('author'),
|
||||||
|
'timestamp': parse_iso8601(video.get('creationTime')),
|
||||||
|
'channel': channel.get('name'),
|
||||||
|
'channel_id': channel_id,
|
||||||
|
'channel_url': self._BASE_URL + '/?channel=' + channel_id if channel_id else None,
|
||||||
|
'duration': float_or_none(video.get('duration'), 1000),
|
||||||
|
'view_count': int_or_none(video.get('views')),
|
||||||
|
'tags': video.get('hashtags'),
|
||||||
|
'start_time': int_or_none(compat_parse_qs(
|
||||||
|
compat_urllib_parse_urlparse(url).query).get('t', [None])[0]),
|
||||||
|
}
|
@ -72,6 +72,7 @@ from .arte import (
|
|||||||
ArteTVEmbedIE,
|
ArteTVEmbedIE,
|
||||||
ArteTVPlaylistIE,
|
ArteTVPlaylistIE,
|
||||||
)
|
)
|
||||||
|
from .arnes import ArnesIE
|
||||||
from .asiancrush import (
|
from .asiancrush import (
|
||||||
AsianCrushIE,
|
AsianCrushIE,
|
||||||
AsianCrushPlaylistIE,
|
AsianCrushPlaylistIE,
|
||||||
|
@ -1959,7 +1959,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
|||||||
invidio\.us
|
invidio\.us
|
||||||
)/
|
)/
|
||||||
(?:
|
(?:
|
||||||
(?:channel|c|user|feed)/|
|
(?:channel|c|user|feed|hashtag)/|
|
||||||
(?:playlist|watch)\?.*?\blist=|
|
(?:playlist|watch)\?.*?\blist=|
|
||||||
(?!(?:watch|embed|v|e)\b)
|
(?!(?:watch|embed|v|e)\b)
|
||||||
)
|
)
|
||||||
@ -2245,6 +2245,13 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'https://www.youtube.com/TheYoungTurks/live',
|
'url': 'https://www.youtube.com/TheYoungTurks/live',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.youtube.com/hashtag/cctv9',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'cctv9',
|
||||||
|
'title': '#cctv9',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 350,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
@ -2392,6 +2399,14 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
|||||||
for entry in self._post_thread_entries(renderer):
|
for entry in self._post_thread_entries(renderer):
|
||||||
yield entry
|
yield entry
|
||||||
|
|
||||||
|
def _rich_grid_entries(self, contents):
|
||||||
|
for content in contents:
|
||||||
|
video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
|
||||||
|
if video_renderer:
|
||||||
|
entry = self._video_entry(video_renderer)
|
||||||
|
if entry:
|
||||||
|
yield entry
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _build_continuation_query(continuation, ctp=None):
|
def _build_continuation_query(continuation, ctp=None):
|
||||||
query = {
|
query = {
|
||||||
@ -2442,55 +2457,60 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
|||||||
if not tab_content:
|
if not tab_content:
|
||||||
return
|
return
|
||||||
slr_renderer = try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
|
slr_renderer = try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
|
||||||
if not slr_renderer:
|
if slr_renderer:
|
||||||
return
|
is_channels_tab = tab.get('title') == 'Channels'
|
||||||
is_channels_tab = tab.get('title') == 'Channels'
|
continuation = None
|
||||||
continuation = None
|
slr_contents = try_get(slr_renderer, lambda x: x['contents'], list) or []
|
||||||
slr_contents = try_get(slr_renderer, lambda x: x['contents'], list) or []
|
for slr_content in slr_contents:
|
||||||
for slr_content in slr_contents:
|
if not isinstance(slr_content, dict):
|
||||||
if not isinstance(slr_content, dict):
|
|
||||||
continue
|
|
||||||
is_renderer = try_get(slr_content, lambda x: x['itemSectionRenderer'], dict)
|
|
||||||
if not is_renderer:
|
|
||||||
continue
|
|
||||||
isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
|
|
||||||
for isr_content in isr_contents:
|
|
||||||
if not isinstance(isr_content, dict):
|
|
||||||
continue
|
continue
|
||||||
renderer = isr_content.get('playlistVideoListRenderer')
|
is_renderer = try_get(slr_content, lambda x: x['itemSectionRenderer'], dict)
|
||||||
if renderer:
|
if not is_renderer:
|
||||||
for entry in self._playlist_entries(renderer):
|
|
||||||
yield entry
|
|
||||||
continuation = self._extract_continuation(renderer)
|
|
||||||
continue
|
continue
|
||||||
renderer = isr_content.get('gridRenderer')
|
isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
|
||||||
if renderer:
|
for isr_content in isr_contents:
|
||||||
for entry in self._grid_entries(renderer):
|
if not isinstance(isr_content, dict):
|
||||||
yield entry
|
continue
|
||||||
continuation = self._extract_continuation(renderer)
|
renderer = isr_content.get('playlistVideoListRenderer')
|
||||||
continue
|
if renderer:
|
||||||
renderer = isr_content.get('shelfRenderer')
|
for entry in self._playlist_entries(renderer):
|
||||||
if renderer:
|
yield entry
|
||||||
for entry in self._shelf_entries(renderer, not is_channels_tab):
|
continuation = self._extract_continuation(renderer)
|
||||||
yield entry
|
continue
|
||||||
continue
|
renderer = isr_content.get('gridRenderer')
|
||||||
renderer = isr_content.get('backstagePostThreadRenderer')
|
if renderer:
|
||||||
if renderer:
|
for entry in self._grid_entries(renderer):
|
||||||
for entry in self._post_thread_entries(renderer):
|
yield entry
|
||||||
yield entry
|
continuation = self._extract_continuation(renderer)
|
||||||
continuation = self._extract_continuation(renderer)
|
continue
|
||||||
continue
|
renderer = isr_content.get('shelfRenderer')
|
||||||
renderer = isr_content.get('videoRenderer')
|
if renderer:
|
||||||
if renderer:
|
for entry in self._shelf_entries(renderer, not is_channels_tab):
|
||||||
entry = self._video_entry(renderer)
|
yield entry
|
||||||
if entry:
|
continue
|
||||||
yield entry
|
renderer = isr_content.get('backstagePostThreadRenderer')
|
||||||
|
if renderer:
|
||||||
|
for entry in self._post_thread_entries(renderer):
|
||||||
|
yield entry
|
||||||
|
continuation = self._extract_continuation(renderer)
|
||||||
|
continue
|
||||||
|
renderer = isr_content.get('videoRenderer')
|
||||||
|
if renderer:
|
||||||
|
entry = self._video_entry(renderer)
|
||||||
|
if entry:
|
||||||
|
yield entry
|
||||||
|
|
||||||
|
if not continuation:
|
||||||
|
continuation = self._extract_continuation(is_renderer)
|
||||||
if not continuation:
|
if not continuation:
|
||||||
continuation = self._extract_continuation(is_renderer)
|
continuation = self._extract_continuation(slr_renderer)
|
||||||
|
else:
|
||||||
if not continuation:
|
rich_grid_renderer = tab_content.get('richGridRenderer')
|
||||||
continuation = self._extract_continuation(slr_renderer)
|
if not rich_grid_renderer:
|
||||||
|
return
|
||||||
|
for entry in self._rich_grid_entries(rich_grid_renderer.get('contents') or []):
|
||||||
|
yield entry
|
||||||
|
continuation = self._extract_continuation(rich_grid_renderer)
|
||||||
|
|
||||||
headers = {
|
headers = {
|
||||||
'x-youtube-client-name': '1',
|
'x-youtube-client-name': '1',
|
||||||
@ -2586,6 +2606,12 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
|||||||
yield entry
|
yield entry
|
||||||
continuation = self._extract_continuation(continuation_renderer)
|
continuation = self._extract_continuation(continuation_renderer)
|
||||||
continue
|
continue
|
||||||
|
renderer = continuation_item.get('richItemRenderer')
|
||||||
|
if renderer:
|
||||||
|
for entry in self._rich_grid_entries(continuation_items):
|
||||||
|
yield entry
|
||||||
|
continuation = self._extract_continuation({'contents': continuation_items})
|
||||||
|
continue
|
||||||
|
|
||||||
break
|
break
|
||||||
|
|
||||||
@ -2642,7 +2668,8 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
|||||||
selected_tab = self._extract_selected_tab(tabs)
|
selected_tab = self._extract_selected_tab(tabs)
|
||||||
renderer = try_get(
|
renderer = try_get(
|
||||||
data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
|
data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
|
||||||
playlist_id = title = description = None
|
playlist_id = item_id
|
||||||
|
title = description = None
|
||||||
if renderer:
|
if renderer:
|
||||||
channel_title = renderer.get('title') or item_id
|
channel_title = renderer.get('title') or item_id
|
||||||
tab_title = selected_tab.get('title')
|
tab_title = selected_tab.get('title')
|
||||||
@ -2651,12 +2678,16 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
|||||||
title += ' - %s' % tab_title
|
title += ' - %s' % tab_title
|
||||||
description = renderer.get('description')
|
description = renderer.get('description')
|
||||||
playlist_id = renderer.get('externalId')
|
playlist_id = renderer.get('externalId')
|
||||||
renderer = try_get(
|
else:
|
||||||
data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
|
renderer = try_get(
|
||||||
if renderer:
|
data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
|
||||||
title = renderer.get('title')
|
if renderer:
|
||||||
description = None
|
title = renderer.get('title')
|
||||||
playlist_id = item_id
|
else:
|
||||||
|
renderer = try_get(
|
||||||
|
data, lambda x: x['header']['hashtagHeaderRenderer'], dict)
|
||||||
|
if renderer:
|
||||||
|
title = try_get(renderer, lambda x: x['hashtag']['simpleText'])
|
||||||
playlist = self.playlist_result(
|
playlist = self.playlist_result(
|
||||||
self._entries(selected_tab, identity_token),
|
self._entries(selected_tab, identity_token),
|
||||||
playlist_id=playlist_id, playlist_title=title,
|
playlist_id=playlist_id, playlist_title=title,
|
||||||
|
Loading…
Reference in New Issue
Block a user