[AENetworksBaseIE] Report missing show data instead of crash

[devscripts] Add a hack to convert command-line options to API options
[core] Handle /../ sequences in HTTP URLs
2024-12-20 15:42:11 +00:00 · 2023-03-14 16:23:20 +00:00 · 2023-03-14 16:23:20 +00:00 · 2023-03-14 16:23:20 +00:00 · 2023-03-14 16:23:20 +00:00 · 2023-03-14 16:23:20 +00:00
8 changed files with 165 additions and 43 deletions
--- a/devscripts/cli_to_api.py
+++ b/devscripts/cli_to_api.py
@ -0,0 +1,64 @@
+#!/usr/bin/env python
+# coding: utf-8
+
+from __future__ import unicode_literals
+
+"""
+This script displays the API parameters corresponding to a yt-dl command line
+
+Example:
+$ ./cli_to_api.py -f best
+{u'format': 'best'}
+$
+"""
+
+# Allow direct execution
+import os
+import sys
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+import youtube_dl
+from types import MethodType
+
+
+def cli_to_api(*opts):
+    YDL = youtube_dl.YoutubeDL
+
+    # to extract the parsed options, break out of YoutubeDL instantiation
+
+    # return options via this Exception
+    class ParseYTDLResult(Exception):
+        def __init__(self, result):
+            super(ParseYTDLResult, self).__init__('result')
+            self.opts = result
+
+    # replacement constructor that raises ParseYTDLResult
+    def ytdl_init(ydl, ydl_opts):
+        super(YDL, ydl).__init__(ydl_opts)
+        raise ParseYTDLResult(ydl_opts)
+
+    # patch in the constructor
+    YDL.__init__ = MethodType(ytdl_init, YDL)
+
+    # core parser
+    def parsed_options(argv):
+        try:
+            youtube_dl._real_main(list(argv))
+        except ParseYTDLResult as result:
+            return result.opts
+
+    # from https://github.com/yt-dlp/yt-dlp/issues/5859#issuecomment-1363938900
+    default = parsed_options([])
+    diff = dict((k, v) for k, v in parsed_options(opts).items() if default[k] != v)
+    if 'postprocessors' in diff:
+        diff['postprocessors'] = [pp for pp in diff['postprocessors'] if pp not in default['postprocessors']]
+    return diff
+
+
+def main():
+    from pprint import pprint
+    pprint(cli_to_api(*sys.argv))
+
+
+if __name__ == '__main__':
+    main()
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@ -39,6 +39,7 @@ from .compat import (
    compat_str,
    compat_tokenize_tokenize,
    compat_urllib_error,
+    compat_urllib_parse,
    compat_urllib_request,
    compat_urllib_request_DataHandler,
 )
@ -60,6 +61,7 @@ from .utils import (
    format_bytes,
    formatSeconds,
    GeoRestrictedError,
+    HEADRequest,
    int_or_none,
    ISO3166Utils,
    locked_file,
@ -74,6 +76,7 @@ from .utils import (
    preferredencoding,
    prepend_extension,
    process_communicate_or_kill,
+    PUTRequest,
    register_socks_protocols,
    render_table,
    replace_extension,
@ -2297,6 +2300,27 @@ class YoutubeDL(object):
        """ Start an HTTP download """
        if isinstance(req, compat_basestring):
            req = sanitized_Request(req)
+        # an embedded /../ sequence is not automatically handled by urllib2
+        # see https://github.com/yt-dlp/yt-dlp/issues/3355
+        url = req.get_full_url()
+        parts = url.partition('/../')
+        if parts[1]:
+            url = compat_urllib_parse.urljoin(parts[0] + parts[1][:1], parts[1][1:] + parts[2])
+        if url:
+            # worse, URL path may have initial /../ against RFCs: work-around
+            # by stripping such prefixes, like eg Firefox
+            parts = compat_urllib_parse.urlsplit(url)
+            path = parts.path
+            while path.startswith('/../'):
+                path = path[3:]
+            url = parts._replace(path=path).geturl()
+            # get a new Request with the munged URL
+            if url != req.get_full_url():
+                req_type = {'HEAD': HEADRequest, 'PUT': PUTRequest}.get(
+                    req.get_method(), compat_urllib_request.Request)
+                req = req_type(
+                    url, data=req.data, headers=dict(req.header_items()),
+                    origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
        return self._opener.open(req, timeout=self._socket_timeout)

    def print_debug_header(self):
--- a/youtube_dl/downloader/common.py
+++ b/youtube_dl/downloader/common.py
@ -88,17 +88,21 @@ class FileDownloader(object):
            return '---.-%'
        return '%6s' % ('%3.1f%%' % percent)

-    @staticmethod
-    def calc_eta(start, now, total, current):
+    @classmethod
+    def calc_eta(cls, start_or_rate, now_or_remaining, *args):
+        if len(args) < 2:
+            rate, remaining = (start_or_rate, now_or_remaining)
+            if None in (rate, remaining):
+                return None
+            return int(float(remaining) / rate)
+        start, now = (start_or_rate, now_or_remaining)
+        total, current = args
        if total is None:
            return None
        if now is None:
            now = time.time()
-        dif = now - start
-        if current == 0 or dif < 0.001:  # One millisecond
-            return None
-        rate = float(current) / dif
-        return int((float(total) - float(current)) / rate)
+        rate = cls.calc_speed(start, now, current)
+        return rate and int((float(total) - float(current)) / rate)

    @staticmethod
    def format_eta(eta):
@ -123,6 +127,12 @@ class FileDownloader(object):
    def format_retries(retries):
        return 'inf' if retries == float('inf') else '%.0f' % retries

+    @staticmethod
+    def filesize_or_none(unencoded_filename):
+        fn = encodeFilename(unencoded_filename)
+        if os.path.isfile(fn):
+            return os.path.getsize(fn)
+
    @staticmethod
    def best_block_size(elapsed_time, bytes):
        new_min = max(bytes / 2.0, 1.0)
--- a/youtube_dl/downloader/dash.py
+++ b/youtube_dl/downloader/dash.py
@ -38,8 +38,7 @@ class DashSegmentsFD(FragmentFD):
            # In DASH, the first segment contains necessary headers to
            # generate a valid MP4 file, so always abort for the first segment
            fatal = i == 0 or not skip_unavailable_fragments
-            count = 0
-            while count <= fragment_retries:
+            for count in range(fragment_retries + 1):
                try:
                    fragment_url = fragment.get('url')
                    if not fragment_url:
@ -57,9 +56,8 @@ class DashSegmentsFD(FragmentFD):
                    # is usually enough) thus allowing to download the whole file successfully.
                    # To be future-proof we will retry all fragments that fail with any
                    # HTTP error.
-                    count += 1
-                    if count <= fragment_retries:
-                        self.report_retry_fragment(err, frag_index, count, fragment_retries)
+                    if count < fragment_retries:
+                        self.report_retry_fragment(err, frag_index, count + 1, fragment_retries)
                except DownloadError:
                    # Don't retry fragment if error occurred during HTTP downloading
                    # itself since it has own retry settings
@ -68,7 +66,7 @@ class DashSegmentsFD(FragmentFD):
                        break
                    raise

-            if count > fragment_retries:
+            if count >= fragment_retries:
                if not fatal:
                    self.report_skip_fragment(frag_index)
                    continue
--- a/youtube_dl/downloader/external.py
+++ b/youtube_dl/downloader/external.py
@ -273,7 +273,7 @@ class HttpieFD(ExternalFD):
 class FFmpegFD(ExternalFD):
    @classmethod
    def supports(cls, info_dict):
-        return info_dict['protocol'] in ('http', 'https', 'ftp', 'ftps', 'm3u8', 'rtsp', 'rtmp', 'mms')
+        return info_dict['protocol'] in ('http', 'https', 'ftp', 'ftps', 'm3u8', 'rtsp', 'rtmp', 'mms', 'http_dash_segments')

    @classmethod
    def available(cls):
--- a/youtube_dl/downloader/fragment.py
+++ b/youtube_dl/downloader/fragment.py
@ -71,7 +71,7 @@ class FragmentFD(FileDownloader):

    @staticmethod
    def __do_ytdl_file(ctx):
-        return not ctx['live'] and not ctx['tmpfilename'] == '-'
+        return ctx['live'] is not True and ctx['tmpfilename'] != '-'

    def _read_ytdl_file(self, ctx):
        assert 'ytdl_corrupt' not in ctx
@ -101,6 +101,13 @@ class FragmentFD(FileDownloader):
            'url': frag_url,
            'http_headers': headers or info_dict.get('http_headers'),
        }
+        frag_resume_len = 0
+        if ctx['dl'].params.get('continuedl', True):
+            frag_resume_len = self.filesize_or_none(
+                self.temp_name(fragment_filename))
+        fragment_info_dict['frag_resume_len'] = frag_resume_len
+        ctx['frag_resume_len'] = frag_resume_len or 0
+
        success = ctx['dl'].download(fragment_filename, fragment_info_dict)
        if not success:
            return False, None
@ -124,9 +131,7 @@ class FragmentFD(FileDownloader):
            del ctx['fragment_filename_sanitized']

    def _prepare_frag_download(self, ctx):
-        if 'live' not in ctx:
-            ctx['live'] = False
-        if not ctx['live']:
+        if not ctx.setdefault('live', False):
            total_frags_str = '%d' % ctx['total_frags']
            ad_frags = ctx.get('ad_frags', 0)
            if ad_frags:
@ -136,10 +141,11 @@ class FragmentFD(FileDownloader):
        self.to_screen(
            '[%s] Total fragments: %s' % (self.FD_NAME, total_frags_str))
        self.report_destination(ctx['filename'])
+        continuedl = self.params.get('continuedl', True)
        dl = HttpQuietDownloader(
            self.ydl,
            {
-                'continuedl': True,
+                'continuedl': continuedl,
                'quiet': True,
                'noprogress': True,
                'ratelimit': self.params.get('ratelimit'),
@ -150,12 +156,11 @@ class FragmentFD(FileDownloader):
        )
        tmpfilename = self.temp_name(ctx['filename'])
        open_mode = 'wb'
-        resume_len = 0

        # Establish possible resume length
-        if os.path.isfile(encodeFilename(tmpfilename)):
+        resume_len = self.filesize_or_none(tmpfilename) or 0
+        if resume_len > 0:
            open_mode = 'ab'
-            resume_len = os.path.getsize(encodeFilename(tmpfilename))

        # Should be initialized before ytdl file check
        ctx.update({
@ -164,7 +169,8 @@ class FragmentFD(FileDownloader):
        })

        if self.__do_ytdl_file(ctx):
-            if os.path.isfile(encodeFilename(self.ytdl_filename(ctx['filename']))):
+            ytdl_file_exists = os.path.isfile(encodeFilename(self.ytdl_filename(ctx['filename'])))
+            if continuedl and ytdl_file_exists:
                self._read_ytdl_file(ctx)
                is_corrupt = ctx.get('ytdl_corrupt') is True
                is_inconsistent = ctx['fragment_index'] > 0 and resume_len == 0
@ -178,7 +184,12 @@ class FragmentFD(FileDownloader):
                    if 'ytdl_corrupt' in ctx:
                        del ctx['ytdl_corrupt']
                    self._write_ytdl_file(ctx)
+
            else:
+                if not continuedl:
+                    if ytdl_file_exists:
+                        self._read_ytdl_file(ctx)
+                    ctx['fragment_index'] = resume_len = 0
                self._write_ytdl_file(ctx)
                assert ctx['fragment_index'] == 0

@ -209,6 +220,7 @@ class FragmentFD(FileDownloader):
        start = time.time()
        ctx.update({
            'started': start,
+            'fragment_started': start,
            # Amount of fragment's bytes downloaded by the time of the previous
            # frag progress hook invocation
            'prev_frag_downloaded_bytes': 0,
@ -218,6 +230,9 @@ class FragmentFD(FileDownloader):
            if s['status'] not in ('downloading', 'finished'):
                return

+            if not total_frags and ctx.get('fragment_count'):
+                state['fragment_count'] = ctx['fragment_count']
+
            time_now = time.time()
            state['elapsed'] = time_now - start
            frag_total_bytes = s.get('total_bytes') or 0
@ -232,16 +247,17 @@ class FragmentFD(FileDownloader):
                ctx['fragment_index'] = state['fragment_index']
                state['downloaded_bytes'] += frag_total_bytes - ctx['prev_frag_downloaded_bytes']
                ctx['complete_frags_downloaded_bytes'] = state['downloaded_bytes']
+                ctx['speed'] = state['speed'] = self.calc_speed(
+                    ctx['fragment_started'], time_now, frag_total_bytes)
+                ctx['fragment_started'] = time.time()
                ctx['prev_frag_downloaded_bytes'] = 0
            else:
                frag_downloaded_bytes = s['downloaded_bytes']
                state['downloaded_bytes'] += frag_downloaded_bytes - ctx['prev_frag_downloaded_bytes']
+                ctx['speed'] = state['speed'] = self.calc_speed(
+                    ctx['fragment_started'], time_now, frag_downloaded_bytes - ctx['frag_resume_len'])
                if not ctx['live']:
-                    state['eta'] = self.calc_eta(
-                        start, time_now, estimated_size - resume_len,
-                        state['downloaded_bytes'] - resume_len)
-                state['speed'] = s.get('speed') or ctx.get('speed')
-                ctx['speed'] = state['speed']
+                    state['eta'] = self.calc_eta(state['speed'], estimated_size - state['downloaded_bytes'])
                ctx['prev_frag_downloaded_bytes'] = frag_downloaded_bytes
            self._hook_progress(state)

@ -268,7 +284,7 @@ class FragmentFD(FileDownloader):
                        os.utime(ctx['filename'], (time.time(), filetime))
                    except Exception:
                        pass
-            downloaded_bytes = os.path.getsize(encodeFilename(ctx['filename']))
+            downloaded_bytes = self.filesize_or_none(ctx['filename']) or 0

        self._hook_progress({
            'downloaded_bytes': downloaded_bytes,
--- a/youtube_dl/downloader/http.py
+++ b/youtube_dl/downloader/http.py
@ -58,9 +58,9 @@ class HttpFD(FileDownloader):

        if self.params.get('continuedl', True):
            # Establish possible resume length
-            if os.path.isfile(encodeFilename(ctx.tmpfilename)):
-                ctx.resume_len = os.path.getsize(
-                    encodeFilename(ctx.tmpfilename))
+            ctx.resume_len = info_dict.get('frag_resume_len')
+            if ctx.resume_len is None:
+                ctx.resume_len = self.filesize_or_none(ctx.tmpfilename) or 0

        ctx.is_resume = ctx.resume_len > 0

@ -115,9 +115,9 @@ class HttpFD(FileDownloader):
                        raise RetryDownload(err)
                    raise err
                # When trying to resume, Content-Range HTTP header of response has to be checked
-                # to match the value of requested Range HTTP header. This is due to a webservers
+                # to match the value of requested Range HTTP header. This is due to webservers
                # that don't support resuming and serve a whole file with no Content-Range
-                # set in response despite of requested Range (see
+                # set in response despite requested Range (see
                # https://github.com/ytdl-org/youtube-dl/issues/6057#issuecomment-126129799)
                if has_range:
                    content_range = ctx.data.headers.get('Content-Range')
@ -293,10 +293,7 @@ class HttpFD(FileDownloader):

                # Progress message
                speed = self.calc_speed(start, now, byte_counter - ctx.resume_len)
-                if ctx.data_len is None:
-                    eta = None
-                else:
-                    eta = self.calc_eta(start, time.time(), ctx.data_len - ctx.resume_len, byte_counter - ctx.resume_len)
+                eta = self.calc_eta(speed, ctx.data_len and (ctx.data_len - ctx.resume_len))

                self._hook_progress({
                    'status': 'downloading',
--- a/youtube_dl/extractor/aenetworks.py
+++ b/youtube_dl/extractor/aenetworks.py
@ -8,6 +8,8 @@ from ..utils import (
    ExtractorError,
    GeoRestrictedError,
    int_or_none,
+    remove_start,
+    traverse_obj,
    update_url_query,
    urlencode_postdata,
 )
@ -33,14 +35,17 @@ class AENetworksBaseIE(ThePlatformIE):
    }

    def _extract_aen_smil(self, smil_url, video_id, auth=None):
-        query = {'mbr': 'true'}
+        query = {
+            'mbr': 'true',
+            'formats': 'M3U+none,MPEG-DASH+none,MPEG4,MP3',
+        }
        if auth:
            query['auth'] = auth
        TP_SMIL_QUERY = [{
            'assetTypes': 'high_video_ak',
-            'switch': 'hls_high_ak'
+            'switch': 'hls_high_ak',
        }, {
-            'assetTypes': 'high_video_s3'
+            'assetTypes': 'high_video_s3',
        }, {
            'assetTypes': 'high_video_s3',
            'switch': 'hls_high_fastly',
@ -75,7 +80,14 @@ class AENetworksBaseIE(ThePlatformIE):
        requestor_id, brand = self._DOMAIN_MAP[domain]
        result = self._download_json(
            'https://feeds.video.aetnd.com/api/v2/%s/videos' % brand,
-            filter_value, query={'filter[%s]' % filter_key: filter_value})['results'][0]
+            filter_value, query={'filter[%s]' % filter_key: filter_value})
+        result = traverse_obj(
+            result, ('results',
+                     lambda k, v: k == 0 and v[filter_key] == filter_value),
+            get_all=False)
+        if not result:
+            raise ExtractorError('Show not found in A&E feed (too new?)', expected=True,
+                                 video_id=remove_start(filter_value, '/'))
        title = result['title']
        video_id = result['id']
        media_url = result['publicUrl']
@ -126,7 +138,7 @@ class AENetworksIE(AENetworksBaseIE):
            'skip_download': True,
        },
        'add_ie': ['ThePlatform'],
-        'skip': 'This video is only available for users of participating TV providers.',
+        'skip': 'Geo-restricted - This content is not available in your location.'
    }, {
        'url': 'http://www.aetv.com/shows/duck-dynasty/season-9/episode-1',
        'info_dict': {
@ -143,6 +155,7 @@ class AENetworksIE(AENetworksBaseIE):
            'skip_download': True,
        },
        'add_ie': ['ThePlatform'],
+        'skip': 'This video is only available for users of participating TV providers.',
    }, {
        'url': 'http://www.fyi.tv/shows/tiny-house-nation/season-1/episode-8',
        'only_matching': True
Author	SHA1	Message	Date
dirkf	6fece0a96b	[AENetworksBaseIE] Report missing show data instead of crash	2023-03-14 16:23:20 +00:00
dirkf	70ff013910	[devscripts] Add a hack to convert command-line options to API options	2023-03-14 16:23:20 +00:00
dirkf	e8de54bce5	[core] Handle `/../` sequences in HTTP URLs * use Python's RFC implementation for embedded sequences * hack: strip unbalanced leading `../` from path, like eg Firefox See https://github.com/yt-dlp/yt-dlp/issues/3355	2023-03-14 16:23:20 +00:00
dirkf	baa6c5e95c	[FragmentFD] Respect `--no-continue` * discard partial fragment on `--no-continue` * continue with correct progress display otherwise Resolves #21467	2023-03-14 16:23:20 +00:00
dirkf	5c985d4f81	[downloader] Let _ffmpeg_ handle DASH segments Fixes https://github.com/ytdl-org/youtube-dl/issues/31792 after `3da1783`.	2023-03-14 16:23:20 +00:00