Merge 0afddd0b9a into e1b3fa242c

fix: refactor two extractors into one
fix: add new extractor for QingTing
2024-12-22 16:42:13 +00:00 · 2024-07-25 09:07:37 +08:00 · 2022-06-18 08:56:44 +08:00 · 2022-06-16 20:44:05 +08:00 · 2022-06-16 19:14:08 +08:00 · 2022-06-16 19:12:26 +08:00
2 changed files with 54 additions and 0 deletions
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@ -993,6 +993,7 @@ from .presstv import PressTVIE
 from .prosiebensat1 import ProSiebenSat1IE
 from .puls4 import Puls4IE
 from .pyvideo import PyvideoIE
 from .qingting import QingTingIE
 from .qqmusic import (
    QQMusicIE,
    QQMusicSingerIE,
--- a/youtube_dl/extractor/qingting.py
+++ b/youtube_dl/extractor/qingting.py
@ -0,0 +1,53 @@
 # coding: utf-8
 from __future__ import unicode_literals
 from .common import InfoExtractor
 from youtube_dl import utils
 class QingTingIE(InfoExtractor):
    IE_NAME = 'QingTing'
    _VALID_URL = r'''(?x)
                     (?:https?://)?(?:www\.)?
                         (?P<m>m\.)?(?:qingting\.fm|qtfm\.cn)/(?(m)v|)
                         channels/\d+/programs/(?P<id>\d+)'''
    _TEST = {
        'url': 'https://www.qingting.fm/channels/378005/programs/22257411/',
        'md5': '47e6a94f4e621ed832c316fd1888fb3c',
        'info_dict': {
            'id': '22257411',
            'ext': 'mp3',
            'title': '用了十年才修改，谁在乎教科书？-睡前消息-蜻蜓FM听头条',
        }
    }
    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)
        title = self._html_search_regex(r'(?s)<title\b[^>]*>(.*)</title>', webpage, 'title',
                                        default=None) or self._og_search_title(webpage)
        urlType = self._search_regex(
            self._VALID_URL,
            url, 'audio URL', group="m")
        if urlType == 'm.':
            url = self._search_regex(
                r'''("|')audioUrl\1\s*:\s*("|')(?P<url>(?:(?!\2).)*)\2''',
                webpage, 'audio URL', group="url")
            test_url = utils.url_or_none(url)
            if not test_url:
                raise utils.ExtractorError('Invalid audio URL %s' % (url,))
            return {
                'id': video_id,
                'title': title,
                'ext': 'mp3',
                'url': test_url,
            }
        else:
            url = self._search_regex(
                r'''("|')alternate\1\s*:\s*("|')(?P<url>(?:(?!\2).)*)\2''',
                webpage, 'alternate URL', group="url")
            test_url = utils.url_or_none(url)
            if not test_url:
                raise utils.ExtractorError('Invalid audio URL %s' % (url,))
            return self.url_result(url=test_url, video_id=video_id, video_title=title)
Author	SHA1	Message	Date
changren-wcr	ca95a8f2ed	Merge `0afddd0b9a` into `e1b3fa242c`	2024-07-25 09:07:37 +08:00
wangchangren	0afddd0b9a	fix: refactor two extractors into one	2022-06-18 08:56:44 +08:00
wangchangren	9ff181a1ab	fix: add new extractor for QingTing	2022-06-16 20:44:05 +08:00
changren-wcr	16e7b15f76	use youtube-dl match function instead of native python re Co-authored-by: dirkf <fieldhouse@gmx.net>	2022-06-16 19:14:08 +08:00
changren-wcr	9a421b4e7e	fix regular search pattern for title _html_search_regex() has default fatal=True: add a default to fall back to _og_search_title() allow line break in .* Co-authored-by: dirkf <fieldhouse@gmx.net>	2022-06-16 19:12:26 +08:00
changren-wcr	87706c5ec8	Use _search_regex() to get proper error reports in youtube-dl Co-authored-by: dirkf <fieldhouse@gmx.net>	2022-06-16 19:08:09 +08:00
changren-wcr	60783025df	remove capture of patterns that aren't used Co-authored-by: dirkf <fieldhouse@gmx.net>	2022-06-16 19:02:30 +08:00
wangchangren	174624aef8	[QingTing] Add new extractor	2022-06-12 10:58:44 +08:00