Compare commits

...

5 Commits

Author SHA1 Message Date
Sergey M․
bae7dbf78b
[bandcamp] Extract release_timestamp 2021-03-10 03:41:21 +07:00
Sergey M․
15c24b0346
[lbry] Extract release_timestamp (closes #28386) 2021-03-10 03:40:56 +07:00
Sergey M․
477bff6906
Introduce release_timestamp meta field (refs #28386) 2021-03-10 03:36:31 +07:00
Sergey M․
1a1ccd9a6e
[pornhub] Detect flagged videos 2021-03-10 02:56:01 +07:00
Sergey M․
7dc513487f
[pornhub] Extract formats from get_media end point (#28395) 2021-03-10 02:54:10 +07:00
5 changed files with 54 additions and 23 deletions

View File

@ -1511,12 +1511,16 @@ class YoutubeDL(object):
if 'display_id' not in info_dict and 'id' in info_dict: if 'display_id' not in info_dict and 'id' in info_dict:
info_dict['display_id'] = info_dict['id'] info_dict['display_id'] = info_dict['id']
if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None: for ts_key, date_key in (
('timestamp', 'upload_date'),
('release_timestamp', 'release_date'),
):
if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
# Working around out-of-range timestamp values (e.g. negative ones on Windows, # Working around out-of-range timestamp values (e.g. negative ones on Windows,
# see http://bugs.python.org/issue1646728) # see http://bugs.python.org/issue1646728)
try: try:
upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp']) upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
info_dict['upload_date'] = upload_date.strftime('%Y%m%d') info_dict[date_key] = upload_date.strftime('%Y%m%d')
except (ValueError, OverflowError, OSError): except (ValueError, OverflowError, OSError):
pass pass

View File

@ -49,6 +49,7 @@ class BandcampIE(InfoExtractor):
'uploader': 'Ben Prunty', 'uploader': 'Ben Prunty',
'timestamp': 1396508491, 'timestamp': 1396508491,
'upload_date': '20140403', 'upload_date': '20140403',
'release_timestamp': 1396483200,
'release_date': '20140403', 'release_date': '20140403',
'duration': 260.877, 'duration': 260.877,
'track': 'Lanius (Battle)', 'track': 'Lanius (Battle)',
@ -69,6 +70,7 @@ class BandcampIE(InfoExtractor):
'uploader': 'Mastodon', 'uploader': 'Mastodon',
'timestamp': 1322005399, 'timestamp': 1322005399,
'upload_date': '20111122', 'upload_date': '20111122',
'release_timestamp': 1076112000,
'release_date': '20040207', 'release_date': '20040207',
'duration': 120.79, 'duration': 120.79,
'track': 'Hail to Fire', 'track': 'Hail to Fire',
@ -197,7 +199,7 @@ class BandcampIE(InfoExtractor):
'thumbnail': thumbnail, 'thumbnail': thumbnail,
'uploader': artist, 'uploader': artist,
'timestamp': timestamp, 'timestamp': timestamp,
'release_date': unified_strdate(tralbum.get('album_release_date')), 'release_timestamp': unified_timestamp(tralbum.get('album_release_date')),
'duration': duration, 'duration': duration,
'track': track, 'track': track,
'track_number': track_number, 'track_number': track_number,

View File

@ -230,8 +230,10 @@ class InfoExtractor(object):
uploader: Full name of the video uploader. uploader: Full name of the video uploader.
license: License name the video is licensed under. license: License name the video is licensed under.
creator: The creator of the video. creator: The creator of the video.
release_timestamp: UNIX timestamp of the moment the video was released.
release_date: The date (YYYYMMDD) when the video was released. release_date: The date (YYYYMMDD) when the video was released.
timestamp: UNIX timestamp of the moment the video became available. timestamp: UNIX timestamp of the moment the video became available
(uploaded).
upload_date: Video upload date (YYYYMMDD). upload_date: Video upload date (YYYYMMDD).
If not explicitly set, calculated from timestamp. If not explicitly set, calculated from timestamp.
uploader_id: Nickname or id of the video uploader. uploader_id: Nickname or id of the video uploader.

View File

@ -60,6 +60,7 @@ class LBRYBaseIE(InfoExtractor):
'description': stream_value.get('description'), 'description': stream_value.get('description'),
'license': stream_value.get('license'), 'license': stream_value.get('license'),
'timestamp': int_or_none(stream.get('timestamp')), 'timestamp': int_or_none(stream.get('timestamp')),
'release_timestamp': int_or_none(stream_value.get('release_time')),
'tags': stream_value.get('tags'), 'tags': stream_value.get('tags'),
'duration': int_or_none(media.get('duration')), 'duration': int_or_none(media.get('duration')),
'channel': try_get(signing_channel, lambda x: x['value']['title']), 'channel': try_get(signing_channel, lambda x: x['value']['title']),
@ -92,6 +93,8 @@ class LBRYIE(LBRYBaseIE):
'description': 'md5:f6cb5c704b332d37f5119313c2c98f51', 'description': 'md5:f6cb5c704b332d37f5119313c2c98f51',
'timestamp': 1595694354, 'timestamp': 1595694354,
'upload_date': '20200725', 'upload_date': '20200725',
'release_timestamp': 1595340697,
'release_date': '20200721',
'width': 1280, 'width': 1280,
'height': 720, 'height': 720,
} }
@ -106,6 +109,8 @@ class LBRYIE(LBRYBaseIE):
'description': 'md5:661ac4f1db09f31728931d7b88807a61', 'description': 'md5:661ac4f1db09f31728931d7b88807a61',
'timestamp': 1591312601, 'timestamp': 1591312601,
'upload_date': '20200604', 'upload_date': '20200604',
'release_timestamp': 1591312421,
'release_date': '20200604',
'tags': list, 'tags': list,
'duration': 2570, 'duration': 2570,
'channel': 'The LBRY Foundation', 'channel': 'The LBRY Foundation',

View File

@ -167,6 +167,7 @@ class PornHubIE(PornHubBaseIE):
'params': { 'params': {
'skip_download': True, 'skip_download': True,
}, },
'skip': 'Video has been flagged for verification in accordance with our trust and safety policy',
}, { }, {
# subtitles # subtitles
'url': 'https://www.pornhub.com/view_video.php?viewkey=ph5af5fef7c2aa7', 'url': 'https://www.pornhub.com/view_video.php?viewkey=ph5af5fef7c2aa7',
@ -265,7 +266,8 @@ class PornHubIE(PornHubBaseIE):
webpage = dl_webpage('pc') webpage = dl_webpage('pc')
error_msg = self._html_search_regex( error_msg = self._html_search_regex(
r'(?s)<div[^>]+class=(["\'])(?:(?!\1).)*\b(?:removed|userMessageSection)\b(?:(?!\1).)*\1[^>]*>(?P<error>.+?)</div>', (r'(?s)<div[^>]+class=(["\'])(?:(?!\1).)*\b(?:removed|userMessageSection)\b(?:(?!\1).)*\1[^>]*>(?P<error>.+?)</div>',
r'(?s)<section[^>]+class=["\']noVideo["\'][^>]*>(?P<error>.+?)</section>'),
webpage, 'error message', default=None, group='error') webpage, 'error message', default=None, group='error')
if error_msg: if error_msg:
error_msg = re.sub(r'\s+', ' ', error_msg) error_msg = re.sub(r'\s+', ' ', error_msg)
@ -394,6 +396,21 @@ class PornHubIE(PornHubBaseIE):
upload_date = None upload_date = None
formats = [] formats = []
def add_format(format_url, height=None):
tbr = None
mobj = re.search(r'(?P<height>\d+)[pP]?_(?P<tbr>\d+)[kK]', format_url)
if mobj:
if not height:
height = int(mobj.group('height'))
tbr = int(mobj.group('tbr'))
formats.append({
'url': format_url,
'format_id': '%dp' % height if height else None,
'height': height,
'tbr': tbr,
})
for video_url, height in video_urls: for video_url, height in video_urls:
if not upload_date: if not upload_date:
upload_date = self._search_regex( upload_date = self._search_regex(
@ -410,18 +427,19 @@ class PornHubIE(PornHubBaseIE):
video_url, video_id, 'mp4', entry_protocol='m3u8_native', video_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False)) m3u8_id='hls', fatal=False))
continue continue
tbr = None if '/video/get_media' in video_url:
mobj = re.search(r'(?P<height>\d+)[pP]?_(?P<tbr>\d+)[kK]', video_url) medias = self._download_json(video_url, video_id, fatal=False)
if mobj: if isinstance(medias, list):
if not height: for media in medias:
height = int(mobj.group('height')) if not isinstance(media, dict):
tbr = int(mobj.group('tbr')) continue
formats.append({ video_url = url_or_none(media.get('videoUrl'))
'url': video_url, if not video_url:
'format_id': '%dp' % height if height else None, continue
'height': height, height = int_or_none(media.get('quality'))
'tbr': tbr, add_format(video_url, height)
}) continue
add_format(video_url)
self._sort_formats(formats) self._sort_formats(formats)
video_uploader = self._html_search_regex( video_uploader = self._html_search_regex(