mirror of
https://github.com/ytdl-org/youtube-dl.git
synced 2024-12-30 12:32:05 +00:00
Compare commits
No commits in common. "b2b622a9b52cae9ae856755068ad99308e624e11" and "ffccff1a1f707b6c95e21ee4c3a0bab07487a467" have entirely different histories.
b2b622a9b5
...
ffccff1a1f
@ -123,10 +123,8 @@ def generator(test_case, tname):
|
||||
params['outtmpl'] = tname + '_' + params['outtmpl']
|
||||
if is_playlist and 'playlist' not in test_case:
|
||||
params.setdefault('extract_flat', 'in_playlist')
|
||||
# only process enough items for specified tests
|
||||
pl_counts = traverse_obj(test_case, (None, ('playlist_count', 'playlist_mincount', 'playlist_maxcount')))
|
||||
if pl_counts:
|
||||
params.setdefault('playlistend', max(pl_counts) + 1)
|
||||
if traverse_obj(test_case, 'playlist_count', 'playlist_maxcount', default=-1) < 0:
|
||||
params.setdefault('playlistend', test_case.get('playlist_mincount'))
|
||||
params.setdefault('skip_download', True)
|
||||
|
||||
ydl = YoutubeDL(params, auto_init=False)
|
||||
|
@ -1603,7 +1603,6 @@ from .xhamster import (
|
||||
XHamsterIE,
|
||||
XHamsterEmbedIE,
|
||||
XHamsterCategoryIE,
|
||||
XHamsterChannelIE,
|
||||
XHamsterCreatorIE,
|
||||
XHamsterSearchIE,
|
||||
XHamsterSearchKeyIE,
|
||||
|
@ -12,7 +12,6 @@ from ..compat import (
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
classpropinit,
|
||||
clean_html,
|
||||
determine_ext,
|
||||
extract_attributes,
|
||||
@ -23,7 +22,6 @@ from ..utils import (
|
||||
merge_dicts,
|
||||
parse_duration,
|
||||
parse_qs,
|
||||
remove_start,
|
||||
T,
|
||||
traverse_obj,
|
||||
txt_or_none,
|
||||
@ -34,18 +32,6 @@ from ..utils import (
|
||||
|
||||
|
||||
class XHamsterBaseIE(InfoExtractor):
|
||||
# base domains that don't redirect to xhamster.com (not xhday\d\.com, eg)
|
||||
_DOMAINS = '(?:%s)' % '|'.join((
|
||||
r'xhamster\d*\.(?:com|desi)',
|
||||
r'xhamster\.one',
|
||||
r'xhms\.pro',
|
||||
r'xh(?:open|access|victory|big|channel)\.com',
|
||||
r'(?:full|mega)xh\.com',
|
||||
r'xh(?:vid|official|planet)\d*\.com',
|
||||
# requires Tor
|
||||
r'xhamster[a-z2-7]+\.onion',
|
||||
))
|
||||
|
||||
def _download_webpage_handle(self, url, video_id, *args, **kwargs):
|
||||
# note=None, errnote=None, fatal=True, encoding=None, data=None, headers={}, query={}, expected_status=None)
|
||||
# default UA to 'Mozilla' (only) to avoid interstitial page
|
||||
@ -67,16 +53,25 @@ class XHamsterBaseIE(InfoExtractor):
|
||||
|
||||
|
||||
class XHamsterIE(XHamsterBaseIE):
|
||||
_VALID_URL = classpropinit(
|
||||
lambda cls:
|
||||
r'''(?x)
|
||||
https?://
|
||||
(?:.+?\.)?%s/
|
||||
(?:
|
||||
movies/(?P<id>[\dA-Za-z]+)/(?P<display_id>[^/]*)\.html|
|
||||
videos/(?P<display_id_2>[^/]*)-(?P<id_2>[\dA-Za-z]+)
|
||||
)
|
||||
''' % cls._DOMAINS)
|
||||
# base domains that don't redirect to xhamster.com (not xhday\d\.com, eg)
|
||||
_DOMAINS = '(?:%s)' % '|'.join((
|
||||
r'xhamster\d*\.(?:com|desi)',
|
||||
r'xhamster\.one',
|
||||
r'xhms\.pro',
|
||||
r'xh(?:open|access|victory|big|channel)\.com',
|
||||
r'(?:full|mega)xh\.com',
|
||||
r'xh(?:vid|official|planet)\d*\.com',
|
||||
# requires Tor
|
||||
r'xhamster[a-z2-7]+\.onion',
|
||||
))
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:.+?\.)?%s/
|
||||
(?:
|
||||
movies/(?P<id>[\dA-Za-z]+)/(?P<display_id>[^/]*)\.html|
|
||||
videos/(?P<display_id_2>[^/]*)-(?P<id_2>[\dA-Za-z]+)
|
||||
)
|
||||
''' % _DOMAINS
|
||||
_TESTS = [{
|
||||
'url': 'https://xhamster.com/videos/femaleagent-shy-beauty-takes-the-bait-1509445',
|
||||
'md5': '34e1ab926db5dc2750fed9e1f34304bb',
|
||||
@ -384,9 +379,7 @@ class XHamsterIE(XHamsterBaseIE):
|
||||
|
||||
|
||||
class XHamsterEmbedIE(XHamsterBaseIE):
|
||||
_VALID_URL = classpropinit(
|
||||
lambda cls:
|
||||
r'https?://(?:.+?\.)?%s/xembed\.php\?video=(?P<id>\d+)' % cls._DOMAINS)
|
||||
_VALID_URL = r'https?://(?:.+?\.)?%s/xembed\.php\?video=(?P<id>\d+)' % XHamsterIE._DOMAINS
|
||||
_TEST = {
|
||||
'url': 'http://xhamster.com/xembed.php?video=3328539',
|
||||
'info_dict': {
|
||||
@ -428,19 +421,9 @@ class XHamsterEmbedIE(XHamsterBaseIE):
|
||||
|
||||
class XHamsterPlaylistIE(XHamsterBaseIE):
|
||||
_NEXT_PAGE_RE = r'(<a\b[^>]+\bdata-page\s*=\s*["\']next[^>]+>)'
|
||||
_VALID_URL_TPL = r'''(?x)
|
||||
https?://(?:.+?\.)?%s
|
||||
/%s/(?P<id>[^/?#]+)
|
||||
(?:(?P<sub>(?:/%s)+))?
|
||||
(?:/(?P<pnum>\d+))?(?:[/?#]|$)
|
||||
'''
|
||||
|
||||
def _page_url(self, user_id, subs, page_num, url):
|
||||
n_url = self._PAGE_URL_TPL % (
|
||||
join_nonempty(user_id, *subs, delim='/'), page_num)
|
||||
n_url = compat_urlparse.urlsplit(n_url)
|
||||
url = compat_urlparse.urlsplit(url)
|
||||
return compat_urlparse.urlunsplit(n_url[:3] + url[3:])
|
||||
def _page_url(self, user_id, page_num, url=None):
|
||||
return self._PAGE_URL_TPL % (user_id, page_num)
|
||||
|
||||
def _extract_entries(self, page, user_id):
|
||||
for video_tag_match in re.finditer(
|
||||
@ -459,9 +442,9 @@ class XHamsterPlaylistIE(XHamsterBaseIE):
|
||||
self._search_regex(self._NEXT_PAGE_RE, page, 'next page', default=None),
|
||||
(T(extract_attributes), 'href', T(url_or_none)))
|
||||
|
||||
def _entries(self, user_id, subs, page_num=None, page=None, url=None):
|
||||
def _entries(self, user_id, page_num=None, page=None, url=None):
|
||||
page_1 = 1 if page_num is None else page_num
|
||||
next_page_url = self._page_url(user_id, subs, page_1, url)
|
||||
next_page_url = self._page_url(user_id, page_1, url)
|
||||
for pagenum in itertools.count(page_1):
|
||||
if not page:
|
||||
page = self._download_webpage(
|
||||
@ -480,28 +463,34 @@ class XHamsterPlaylistIE(XHamsterBaseIE):
|
||||
break
|
||||
page = None
|
||||
|
||||
def _fancy_page_url(self, user_id, page_num, url):
|
||||
sub = self._match_valid_url(url).group('sub')
|
||||
n_url = self._PAGE_URL_TPL % (
|
||||
join_nonempty(user_id, sub, delim='/'), page_num)
|
||||
return compat_urlparse.urljoin(n_url, url)
|
||||
|
||||
def _fancy_get_title(self, user_id, page_num, url):
|
||||
sub = self._match_valid_url(url).group('sub')
|
||||
sub = (sub or '').split('/')
|
||||
sub.extend((compat_urlparse.urlsplit(url).query or '').split('&'))
|
||||
sub.append('all' if page_num is None else ('p%d' % page_num))
|
||||
return '%s (%s)' % (user_id, join_nonempty(*sub, delim=','))
|
||||
|
||||
@staticmethod
|
||||
def _get_title(user_id, subs, page_num, url):
|
||||
subs = subs[:]
|
||||
if url:
|
||||
subs.extend((compat_urlparse.urlsplit(url).query or '').split('&'))
|
||||
subs.append('all' if page_num is None else ('p%d' % page_num))
|
||||
return '%s (%s)' % (user_id, join_nonempty(*subs, delim=','))
|
||||
def _get_title(user_id, page_num, url=None):
|
||||
return '%s (%s)' % (user_id, 'all' if page_num is None else ('p%d' % page_num))
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = self._match_valid_url(url).groupdict()
|
||||
user_id = mobj['id']
|
||||
page_num = int_or_none(mobj.get('pnum'))
|
||||
subs = remove_start(mobj.get('sub') or '', '/').split('/')
|
||||
mobj = self._match_valid_url(url)
|
||||
user_id = mobj.group('id')
|
||||
page_num = int_or_none(mobj.groupdict().get('pnum'))
|
||||
return self.playlist_result(
|
||||
self._entries(user_id, subs, page_num, url=url), user_id,
|
||||
self._get_title(user_id, subs, page_num, url=url))
|
||||
self._entries(user_id, page_num, url=url), user_id,
|
||||
self._get_title(user_id, page_num, url=url))
|
||||
|
||||
|
||||
class XHamsterUserIE(XHamsterPlaylistIE):
|
||||
_VALID_URL = classpropinit(
|
||||
lambda cls:
|
||||
r'https?://(?:.+?\.)?%s/users/(?P<id>[^/?#&]+)(?P<sub>/favorites)?(?:/videos/(?P<pnum>\d+))?' % cls._DOMAINS)
|
||||
_VALID_URL = r'https?://(?:.+?\.)?%s/users/(?P<id>[^/?#&]+)(?:/videos/(?P<pnum>\d+))?' % XHamsterIE._DOMAINS
|
||||
_PAGE_URL_TPL = 'https://xhamster.com/users/%s/videos/%s'
|
||||
_TESTS = [{
|
||||
# Paginated user profile
|
||||
@ -524,27 +513,10 @@ class XHamsterUserIE(XHamsterPlaylistIE):
|
||||
'url': 'https://xhamster.com/users/firatkaan/videos',
|
||||
'info_dict': {
|
||||
'id': 'firatkaan',
|
||||
'title': 'firatkaan (all)',
|
||||
},
|
||||
'playlist_mincount': 1,
|
||||
}, {
|
||||
# User with `favorites`
|
||||
'url': 'https://xhamster.com/users/cubafidel/videos/',
|
||||
'info_dict': {
|
||||
'id': 'cubafidel',
|
||||
'title': 'cubafidel (all)',
|
||||
},
|
||||
'playlist_maxcount': 300,
|
||||
}, {
|
||||
# Faves of user with `favorites`
|
||||
'url': 'https://xhamster.com/users/cubafidel/favorites/videos/',
|
||||
'info_dict': {
|
||||
'id': 'cubafidel',
|
||||
'title': 'cubafidel (favorites,all)',
|
||||
},
|
||||
'playlist_mincount': 400,
|
||||
}, {
|
||||
# below URL doesn't match but is redirected via generic
|
||||
# the below doesn't match but is redirected via generic
|
||||
# 'url': 'https://xhday.com/users/mobhunter',
|
||||
'url': 'https://xhvid.com/users/pelushe21',
|
||||
'only_matching': True,
|
||||
@ -553,13 +525,13 @@ class XHamsterUserIE(XHamsterPlaylistIE):
|
||||
|
||||
class XHamsterCreatorIE(XHamsterPlaylistIE):
|
||||
# `pornstars`, `celebrities` and `creators` share the same namespace
|
||||
_VALID_URL = classpropinit(
|
||||
lambda cls:
|
||||
cls._VALID_URL_TPL % (
|
||||
cls._DOMAINS,
|
||||
'(?:(?:gay|shemale)/)?(?:creators|pornstars|celebrities)',
|
||||
r'(?:hd|4k|newest|full-length|exclusive|best(?:/(?:weekly|monthly|year-\d{4}))?)',
|
||||
))
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(?:.+?\.)?%s
|
||||
/(?:(?:gay|shemale)/)?(?:creators|pornstars|celebrities)
|
||||
/(?P<id>[^/?#]+)
|
||||
(?:(?P<sub>(?:/(?:hd|4k|newest|full-length|exclusive))+))?
|
||||
(?:/(?P<pnum>\d+))?(?:[/?#]|$)
|
||||
''' % XHamsterIE._DOMAINS
|
||||
_PAGE_URL_TPL = 'https://xhamster.com/creators/%s/%s'
|
||||
_TESTS = [{
|
||||
# Paginated creator profile
|
||||
@ -597,66 +569,24 @@ class XHamsterCreatorIE(XHamsterPlaylistIE):
|
||||
'playlist_maxcount': 30,
|
||||
}]
|
||||
|
||||
def _page_url(self, user_id, page_num, url):
|
||||
return self._fancy_page_url(user_id, page_num, url)
|
||||
|
||||
class XHamsterChannelBaseIE(XHamsterPlaylistIE):
|
||||
_NEXT_PAGE_RE = r'(<a\b[^>]+\bclass\s*=\s*("|\')(?:[\w-]+\s+)*?prev-next-list-link--next(?:\s+[\w-]+)*\2[^>]+>)'
|
||||
def _get_title(self, user_id, page_num, url):
|
||||
return self._fancy_get_title(user_id, page_num, url)
|
||||
|
||||
|
||||
class XHamsterChannelIE(XHamsterChannelBaseIE):
|
||||
_VALID_URL = classpropinit(
|
||||
lambda cls:
|
||||
cls._VALID_URL_TPL % (
|
||||
cls._DOMAINS,
|
||||
'(?:(?:gay|shemale)/)?channels',
|
||||
r'(?:hd|4k|newest|full-length|best(?:/(?:weekly|monthly|year-\d{4}))?)',
|
||||
))
|
||||
_PAGE_URL_TPL = 'https://xhamster.com/channels/%s/%s'
|
||||
_TESTS = [{
|
||||
# Paginated channel
|
||||
'url': 'https://xhamster.com/channels/freeuse-fantasy',
|
||||
'info_dict': {
|
||||
'id': 'freeuse-fantasy',
|
||||
'title': 'freeuse-fantasy (all)',
|
||||
},
|
||||
'playlist_mincount': 90,
|
||||
}, {
|
||||
# Non-paginated channel (for now?)
|
||||
'url': 'https://xhamster.com/channels/oopsie',
|
||||
'info_dict': {
|
||||
'id': 'oopsie',
|
||||
'title': 'oopsie (all)',
|
||||
},
|
||||
'playlist_mincount': 30,
|
||||
'playlist_maxcount': 48,
|
||||
}, {
|
||||
# Channel filtered by path
|
||||
'url': 'https://xhamster.com/channels/freeuse-fantasy/best/year-2022',
|
||||
'info_dict': {
|
||||
'id': 'freeuse-fantasy',
|
||||
'title': 'freeuse-fantasy (best,year-2022,all)',
|
||||
},
|
||||
'playlist_count': 30,
|
||||
}, {
|
||||
# Channel filtered by query
|
||||
'url': 'https://xhamster.com/channels/freeuse-fantasy?min-duration=40',
|
||||
'info_dict': {
|
||||
'id': 'freeuse-fantasy',
|
||||
'title': 'freeuse-fantasy (min-duration=40,all)',
|
||||
},
|
||||
'playlist_maxcount': 10,
|
||||
}]
|
||||
|
||||
|
||||
class XHamsterCategoryIE(XHamsterChannelBaseIE):
|
||||
class XHamsterCategoryIE(XHamsterPlaylistIE):
|
||||
# `tags` and `categories` share the same namespace
|
||||
_VALID_URL = classpropinit(
|
||||
lambda cls:
|
||||
cls._VALID_URL_TPL % (
|
||||
cls._DOMAINS,
|
||||
'(?:(?P<queer>gay|shemale)/)?(?:categories|tags|(?=hd))',
|
||||
r'(?:hd|4k|producer|creator|best(?:/(?:weekly|monthly|year-\d{4}))?)',
|
||||
))
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(?:.+?\.)?%s
|
||||
(?:(?P<queer>gay|shemale)/)?(?:/categories|/tags|(?=/hd))
|
||||
/(?P<id>[^/?#]+)
|
||||
(?P<sub>(?:/(?:hd|4k|producer|creator|best(?:/(?:weekly|monthly|year-\d{4}))?))+)?
|
||||
(?:/(?P<pnum>\d+))?(?:[/?#]|$)
|
||||
''' % XHamsterIE._DOMAINS
|
||||
_PAGE_URL_TPL = 'https://xhamster.com/categories/%s/%s'
|
||||
_NEXT_PAGE_RE = r'(<a\b[^>]+\bclass\s*=\s*("|\')(?:[\w-]+\s+)*?prev-next-list-link--next(?:\s+[\w-]+)*\2[^>]+>)'
|
||||
_TESTS = [{
|
||||
# Paginated category/tag
|
||||
'url': 'https://xhamster.com/tags/hawaiian',
|
||||
@ -694,28 +624,26 @@ class XHamsterCategoryIE(XHamsterChannelBaseIE):
|
||||
'playlist_maxcount': 20,
|
||||
}]
|
||||
|
||||
def _page_url(self, user_id, subs, page_num, url):
|
||||
queer = self._match_valid_url(url).group('queer')
|
||||
def _page_url(self, user_id, page_num, url):
|
||||
queer, sub = self._match_valid_url(url).group('queer', 'sub')
|
||||
n_url = self._PAGE_URL_TPL % (
|
||||
join_nonempty(queer, user_id, *subs, delim='/'), page_num)
|
||||
join_nonempty(queer, user_id, sub, delim='/'), page_num)
|
||||
return compat_urlparse.urljoin(n_url, url)
|
||||
|
||||
def _get_title(self, user_id, subs, page_num, url):
|
||||
queer = self._match_valid_url(url).group('queer')
|
||||
if queer:
|
||||
subs = [queer] + subs
|
||||
subs.extend((compat_urlparse.urlsplit(url).query or '').split('&'))
|
||||
subs.append('all' if page_num is None else ('p%d' % page_num))
|
||||
return '%s (%s)' % (user_id, join_nonempty(*subs, delim=','))
|
||||
def _get_title(self, user_id, page_num, url):
|
||||
queer, sub = self._match_valid_url(url).group('queer', 'sub')
|
||||
queer = [] if queer is None else [queer]
|
||||
sub = queer + (sub or '').split('/')
|
||||
sub.extend((compat_urlparse.urlsplit(url).query or '').split('&'))
|
||||
sub.append('all' if page_num is None else ('p%d' % page_num))
|
||||
return '%s (%s)' % (user_id, join_nonempty(*sub, delim=','))
|
||||
|
||||
|
||||
class XHamsterSearchIE(XHamsterPlaylistIE):
|
||||
_VALID_URL = classpropinit(
|
||||
lambda cls:
|
||||
r'''(?x)
|
||||
https?://(?:.+?\.)?%s
|
||||
/search/(?P<id>[^/?#]+)
|
||||
''' % cls._DOMAINS)
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(?:.+?\.)?%s
|
||||
/search/(?P<id>[^/?#]+)
|
||||
''' % XHamsterIE._DOMAINS
|
||||
_TESTS = [{
|
||||
# Single page result
|
||||
'url': 'https://xhamster.com/search/latvia',
|
||||
@ -744,20 +672,20 @@ class XHamsterSearchIE(XHamsterPlaylistIE):
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _page_url(user_id, subs, page_num, url):
|
||||
def _page_url(user_id, page_num, url):
|
||||
return url
|
||||
|
||||
def _get_title(self, user_id, subs, page_num, url=None):
|
||||
def _get_title(self, user_id, page_num, url=None):
|
||||
return super(XHamsterSearchIE, self)._get_title(
|
||||
user_id.replace('+', ' '), [], page_num, url)
|
||||
user_id.replace('+', ' '), page_num, url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
user_id = self._match_id(url)
|
||||
page_num = traverse_obj(url, (
|
||||
T(parse_qs), 'page', -1, T(int_or_none)))
|
||||
return self.playlist_result(
|
||||
self._entries(user_id, None, page_num, url=url), user_id,
|
||||
self._get_title(user_id, None, page_num))
|
||||
self._entries(user_id, page_num, url=url), user_id,
|
||||
self._get_title(user_id, page_num))
|
||||
|
||||
|
||||
class XHamsterSearchKeyIE(SearchInfoExtractor, XHamsterSearchIE):
|
||||
|
@ -6510,63 +6510,3 @@ def join_nonempty(*values, **kwargs):
|
||||
if from_dict is not None:
|
||||
values = (traverse_obj(from_dict, variadic(v)) for v in values)
|
||||
return delim.join(map(compat_str, filter(None, values)))
|
||||
|
||||
|
||||
# from yt-dlp
|
||||
class classproperty(object):
|
||||
"""property access for class methods with optional caching"""
|
||||
def __new__(cls, *args, **kwargs):
|
||||
if 'func' in kwargs:
|
||||
func = kwargs.pop('func')
|
||||
elif len(args) > 0:
|
||||
func = args[0]
|
||||
args = args[1:]
|
||||
else:
|
||||
func = None
|
||||
if not func:
|
||||
return functools.partial(cls, *args, **kwargs)
|
||||
return super(classproperty, cls).__new__(cls)
|
||||
|
||||
def __init__(self, func, **kwargs):
|
||||
# kw-only arg
|
||||
cache = kwargs.get('cache', False)
|
||||
functools.update_wrapper(self, func)
|
||||
self.func = func
|
||||
self._cache = {} if cache else None
|
||||
|
||||
def __get__(self, n, cls):
|
||||
if self._cache is None:
|
||||
return self.func(cls)
|
||||
elif cls not in self._cache:
|
||||
self._cache[cls] = self.func(cls)
|
||||
return self._cache[cls]
|
||||
|
||||
|
||||
class classpropinit(classproperty):
|
||||
""" A Python fubar: parent class vars are not in scope when the
|
||||
`class suite` is evaluated, so disallowing `childvar = fn(parentvar)`.
|
||||
Instead, the parent class has to be mentioned redundantly and
|
||||
unmaintainably, since the current class isn't yet bound.
|
||||
This decorator evaluates a class method and assigns its result
|
||||
in place of the method.
|
||||
|
||||
class child(parent):
|
||||
# before
|
||||
childvar = fn(parent.parentvar)
|
||||
# now
|
||||
@classpropinit
|
||||
def childvar(cls):
|
||||
return fn(cls.parentvar)
|
||||
# or
|
||||
childvar = classpropinit(lambda cls: fn(cls.parentvar))
|
||||
"""
|
||||
|
||||
def __init__(self, func):
|
||||
functools.update_wrapper(self, func)
|
||||
self.name = func.__name__
|
||||
self.func = func
|
||||
|
||||
def __get__(self, _, cls):
|
||||
val = self.func(cls)
|
||||
setattr(cls, self.name, val)
|
||||
return val
|
||||
|
Loading…
Reference in New Issue
Block a user