Compare commits

...

2 Commits

Author SHA1 Message Date
Remita Amine
162bf9e10a [compat] add compat_SimpleCookie 2021-04-04 19:49:24 +01:00
Remita Amine
6beb1ac65b [extractor/common] keep support for non standard JSON-LD VideoObject author values 2021-04-04 19:16:17 +01:00
2 changed files with 18 additions and 8 deletions

View File

@ -73,6 +73,15 @@ try:
except ImportError: # Python 2 except ImportError: # Python 2
import Cookie as compat_cookies import Cookie as compat_cookies
if sys.version_info[0] == 2:
class compat_SimpleCookie(compat_cookies.SimpleCookie):
def load(self, rawdata):
if isinstance(rawdata, unicode):
rawdata = str(rawdata)
return super(compat_SimpleCookie, self).load(rawdata)
else:
compat_SimpleCookie = compat_cookies.SimpleCookie
try: try:
import html.entities as compat_html_entities import html.entities as compat_html_entities
except ImportError: # Python 2 except ImportError: # Python 2

View File

@ -17,13 +17,13 @@ import math
from ..compat import ( from ..compat import (
compat_cookiejar_Cookie, compat_cookiejar_Cookie,
compat_cookies,
compat_etree_Element, compat_etree_Element,
compat_etree_fromstring, compat_etree_fromstring,
compat_getpass, compat_getpass,
compat_integer_types, compat_integer_types,
compat_http_client, compat_http_client,
compat_os_name, compat_os_name,
compat_SimpleCookie,
compat_str, compat_str,
compat_urllib_error, compat_urllib_error,
compat_urllib_parse_unquote, compat_urllib_parse_unquote,
@ -70,7 +70,6 @@ from ..utils import (
str_or_none, str_or_none,
str_to_int, str_to_int,
strip_or_none, strip_or_none,
try_get,
unescapeHTML, unescapeHTML,
unified_strdate, unified_strdate,
unified_timestamp, unified_timestamp,
@ -1276,6 +1275,7 @@ class InfoExtractor(object):
def extract_video_object(e): def extract_video_object(e):
assert e['@type'] == 'VideoObject' assert e['@type'] == 'VideoObject'
author = e.get('author')
info.update({ info.update({
'url': url_or_none(e.get('contentUrl')), 'url': url_or_none(e.get('contentUrl')),
'title': unescapeHTML(e.get('name')), 'title': unescapeHTML(e.get('name')),
@ -1283,7 +1283,11 @@ class InfoExtractor(object):
'thumbnail': url_or_none(e.get('thumbnailUrl') or e.get('thumbnailURL')), 'thumbnail': url_or_none(e.get('thumbnailUrl') or e.get('thumbnailURL')),
'duration': parse_duration(e.get('duration')), 'duration': parse_duration(e.get('duration')),
'timestamp': unified_timestamp(e.get('uploadDate')), 'timestamp': unified_timestamp(e.get('uploadDate')),
'uploader': try_get(e, lambda x: x['author']['name'], compat_str), # author can be an instance of 'Organization' or 'Person' types.
# both types can have 'name' property(inherited from 'Thing' type). [1]
# however some websites are using 'Text' type instead.
# 1. https://schema.org/VideoObject
'uploader': author.get('name') if isinstance(author, dict) else author if isinstance(author, compat_str) else None,
'filesize': float_or_none(e.get('contentSize')), 'filesize': float_or_none(e.get('contentSize')),
'tbr': int_or_none(e.get('bitrate')), 'tbr': int_or_none(e.get('bitrate')),
'width': int_or_none(e.get('width')), 'width': int_or_none(e.get('width')),
@ -2897,13 +2901,10 @@ class InfoExtractor(object):
self._downloader.cookiejar.set_cookie(cookie) self._downloader.cookiejar.set_cookie(cookie)
def _get_cookies(self, url): def _get_cookies(self, url):
""" Return a compat_cookies.SimpleCookie with the cookies for the url """ """ Return a compat_SimpleCookie with the cookies for the url """
req = sanitized_Request(url) req = sanitized_Request(url)
self._downloader.cookiejar.add_cookie_header(req) self._downloader.cookiejar.add_cookie_header(req)
cookie = req.get_header('Cookie') return compat_SimpleCookie(req.get_header('Cookie'))
if cookie and sys.version_info[0] == 2:
cookie = str(cookie)
return compat_cookies.SimpleCookie(cookie)
def _apply_first_set_cookie_header(self, url_handle, cookie): def _apply_first_set_cookie_header(self, url_handle, cookie):
""" """