[bandcamp] Extract release_timestamp

[lbry] Extract release_timestamp (closes #28386 )
Introduce release_timestamp meta field (refs #28386 )
2024-12-18 14:41:59 +00:00 · 2021-03-10 03:41:21 +07:00 · 2021-03-10 03:40:56 +07:00 · 2021-03-10 03:36:31 +07:00 · 2021-03-10 02:56:01 +07:00 · 2021-03-10 02:54:10 +07:00
5 changed files with 54 additions and 23 deletions
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@ -1511,12 +1511,16 @@ class YoutubeDL(object):
        if 'display_id' not in info_dict and 'id' in info_dict:
            info_dict['display_id'] = info_dict['id']
-        if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
+        for ts_key, date_key in (
                ('timestamp', 'upload_date'),
                ('release_timestamp', 'release_date'),
        ):
            if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
                # Working around out-of-range timestamp values (e.g. negative ones on Windows,
                # see http://bugs.python.org/issue1646728)
                try:
-                upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
+                    upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
-                info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
+                    info_dict[date_key] = upload_date.strftime('%Y%m%d')
                except (ValueError, OverflowError, OSError):
                    pass
--- a/youtube_dl/extractor/bandcamp.py
+++ b/youtube_dl/extractor/bandcamp.py
@ -49,6 +49,7 @@ class BandcampIE(InfoExtractor):
            'uploader': 'Ben Prunty',
            'timestamp': 1396508491,
            'upload_date': '20140403',
            'release_timestamp': 1396483200,
            'release_date': '20140403',
            'duration': 260.877,
            'track': 'Lanius (Battle)',
@ -69,6 +70,7 @@ class BandcampIE(InfoExtractor):
            'uploader': 'Mastodon',
            'timestamp': 1322005399,
            'upload_date': '20111122',
            'release_timestamp': 1076112000,
            'release_date': '20040207',
            'duration': 120.79,
            'track': 'Hail to Fire',
@ -197,7 +199,7 @@ class BandcampIE(InfoExtractor):
            'thumbnail': thumbnail,
            'uploader': artist,
            'timestamp': timestamp,
-            'release_date': unified_strdate(tralbum.get('album_release_date')),
+            'release_timestamp': unified_timestamp(tralbum.get('album_release_date')),
            'duration': duration,
            'track': track,
            'track_number': track_number,
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@ -230,8 +230,10 @@ class InfoExtractor(object):
    uploader:       Full name of the video uploader.
    license:        License name the video is licensed under.
    creator:        The creator of the video.
    release_timestamp: UNIX timestamp of the moment the video was released.
    release_date:   The date (YYYYMMDD) when the video was released.
-    timestamp:      UNIX timestamp of the moment the video became available.
+    timestamp:      UNIX timestamp of the moment the video became available
                    (uploaded).
    upload_date:    Video upload date (YYYYMMDD).
                    If not explicitly set, calculated from timestamp.
    uploader_id:    Nickname or id of the video uploader.
--- a/youtube_dl/extractor/lbry.py
+++ b/youtube_dl/extractor/lbry.py
@ -60,6 +60,7 @@ class LBRYBaseIE(InfoExtractor):
            'description': stream_value.get('description'),
            'license': stream_value.get('license'),
            'timestamp': int_or_none(stream.get('timestamp')),
            'release_timestamp': int_or_none(stream_value.get('release_time')),
            'tags': stream_value.get('tags'),
            'duration': int_or_none(media.get('duration')),
            'channel': try_get(signing_channel, lambda x: x['value']['title']),
@ -92,6 +93,8 @@ class LBRYIE(LBRYBaseIE):
            'description': 'md5:f6cb5c704b332d37f5119313c2c98f51',
            'timestamp': 1595694354,
            'upload_date': '20200725',
            'release_timestamp': 1595340697,
            'release_date': '20200721',
            'width': 1280,
            'height': 720,
        }
@ -106,6 +109,8 @@ class LBRYIE(LBRYBaseIE):
            'description': 'md5:661ac4f1db09f31728931d7b88807a61',
            'timestamp': 1591312601,
            'upload_date': '20200604',
            'release_timestamp': 1591312421,
            'release_date': '20200604',
            'tags': list,
            'duration': 2570,
            'channel': 'The LBRY Foundation',
--- a/youtube_dl/extractor/pornhub.py
+++ b/youtube_dl/extractor/pornhub.py
@ -167,6 +167,7 @@ class PornHubIE(PornHubBaseIE):
        'params': {
            'skip_download': True,
        },
        'skip': 'Video has been flagged for verification in accordance with our trust and safety policy',
    }, {
        # subtitles
        'url': 'https://www.pornhub.com/view_video.php?viewkey=ph5af5fef7c2aa7',
@ -265,7 +266,8 @@ class PornHubIE(PornHubBaseIE):
        webpage = dl_webpage('pc')
        error_msg = self._html_search_regex(
-            r'(?s)<div[^>]+class=(["\'])(?:(?!\1).)*\b(?:removed|userMessageSection)\b(?:(?!\1).)*\1[^>]*>(?P<error>.+?)</div>',
+            (r'(?s)<div[^>]+class=(["\'])(?:(?!\1).)*\b(?:removed|userMessageSection)\b(?:(?!\1).)*\1[^>]*>(?P<error>.+?)</div>',
             r'(?s)<section[^>]+class=["\']noVideo["\'][^>]*>(?P<error>.+?)</section>'),
            webpage, 'error message', default=None, group='error')
        if error_msg:
            error_msg = re.sub(r'\s+', ' ', error_msg)
@ -394,6 +396,21 @@ class PornHubIE(PornHubBaseIE):
        upload_date = None
        formats = []
        def add_format(format_url, height=None):
            tbr = None
            mobj = re.search(r'(?P<height>\d+)[pP]?_(?P<tbr>\d+)[kK]', format_url)
            if mobj:
                if not height:
                    height = int(mobj.group('height'))
                tbr = int(mobj.group('tbr'))
            formats.append({
                'url': format_url,
                'format_id': '%dp' % height if height else None,
                'height': height,
                'tbr': tbr,
            })
        for video_url, height in video_urls:
            if not upload_date:
                upload_date = self._search_regex(
@ -410,18 +427,19 @@ class PornHubIE(PornHubBaseIE):
                    video_url, video_id, 'mp4', entry_protocol='m3u8_native',
                    m3u8_id='hls', fatal=False))
                continue
-            tbr = None
+            if '/video/get_media' in video_url:
-            mobj = re.search(r'(?P<height>\d+)[pP]?_(?P<tbr>\d+)[kK]', video_url)
+                medias = self._download_json(video_url, video_id, fatal=False)
-            if mobj:
+                if isinstance(medias, list):
-                if not height:
+                    for media in medias:
-                    height = int(mobj.group('height'))
+                        if not isinstance(media, dict):
-                tbr = int(mobj.group('tbr'))
+                            continue
-            formats.append({
+                        video_url = url_or_none(media.get('videoUrl'))
-                'url': video_url,
+                        if not video_url:
-                'format_id': '%dp' % height if height else None,
+                            continue
-                'height': height,
+                        height = int_or_none(media.get('quality'))
-                'tbr': tbr,
+                        add_format(video_url, height)
-            })
+                continue
            add_format(video_url)
        self._sort_formats(formats)
        video_uploader = self._html_search_regex(
Author	SHA1	Message	Date
Sergey M․	bae7dbf78b	[bandcamp] Extract release_timestamp	2021-03-10 03:41:21 +07:00
Sergey M․	15c24b0346	[lbry] Extract release_timestamp (closes #28386 )	2021-03-10 03:40:56 +07:00
Sergey M․	477bff6906	Introduce release_timestamp meta field (refs #28386 )	2021-03-10 03:36:31 +07:00
Sergey M․	1a1ccd9a6e	[pornhub] Detect flagged videos	2021-03-10 02:56:01 +07:00
Sergey M․	7dc513487f	[pornhub] Extract formats from get_media end point (#28395 )	2021-03-10 02:54:10 +07:00