gh-74668: Fix support of bytes in urllib.parse.parse_qsl() (GH-115771)

urllib.parse functions parse_qs() and parse_qsl() now support bytes
arguments containing raw and percent-encoded non-ASCII data.
This commit is contained in:
Serhiy Storchaka 2024-03-05 17:49:50 +02:00 committed by GitHub
parent f97f25ef5d
commit bdba8ef42b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 64 additions and 26 deletions

View File

@ -19,6 +19,10 @@ parse_qsl_test_cases = [
("=a", [('', 'a')]), ("=a", [('', 'a')]),
("a", [('a', '')]), ("a", [('a', '')]),
("a=", [('a', '')]), ("a=", [('a', '')]),
("a=b=c", [('a', 'b=c')]),
("a%3Db=c", [('a=b', 'c')]),
("a=b&c=d", [('a', 'b'), ('c', 'd')]),
("a=b%26c=d", [('a', 'b&c=d')]),
("&a=b", [('a', 'b')]), ("&a=b", [('a', 'b')]),
("a=a+b&b=b+c", [('a', 'a b'), ('b', 'b c')]), ("a=a+b&b=b+c", [('a', 'a b'), ('b', 'b c')]),
("a=1&a=2", [('a', '1'), ('a', '2')]), ("a=1&a=2", [('a', '1'), ('a', '2')]),
@ -29,6 +33,10 @@ parse_qsl_test_cases = [
(b"=a", [(b'', b'a')]), (b"=a", [(b'', b'a')]),
(b"a", [(b'a', b'')]), (b"a", [(b'a', b'')]),
(b"a=", [(b'a', b'')]), (b"a=", [(b'a', b'')]),
(b"a=b=c", [(b'a', b'b=c')]),
(b"a%3Db=c", [(b'a=b', b'c')]),
(b"a=b&c=d", [(b'a', b'b'), (b'c', b'd')]),
(b"a=b%26c=d", [(b'a', b'b&c=d')]),
(b"&a=b", [(b'a', b'b')]), (b"&a=b", [(b'a', b'b')]),
(b"a=a+b&b=b+c", [(b'a', b'a b'), (b'b', b'b c')]), (b"a=a+b&b=b+c", [(b'a', b'a b'), (b'b', b'b c')]),
(b"a=1&a=2", [(b'a', b'1'), (b'a', b'2')]), (b"a=1&a=2", [(b'a', b'1'), (b'a', b'2')]),
@ -36,6 +44,14 @@ parse_qsl_test_cases = [
("a=a+b;b=b+c", [('a', 'a b;b=b c')]), ("a=a+b;b=b+c", [('a', 'a b;b=b c')]),
(b";a=b", [(b';a', b'b')]), (b";a=b", [(b';a', b'b')]),
(b"a=a+b;b=b+c", [(b'a', b'a b;b=b c')]), (b"a=a+b;b=b+c", [(b'a', b'a b;b=b c')]),
("\u0141=\xE9", [('\u0141', '\xE9')]),
("%C5%81=%C3%A9", [('\u0141', '\xE9')]),
("%81=%A9", [('\ufffd', '\ufffd')]),
(b"\xc5\x81=\xc3\xa9", [(b'\xc5\x81', b'\xc3\xa9')]),
(b"%C5%81=%C3%A9", [(b'\xc5\x81', b'\xc3\xa9')]),
(b"\x81=\xA9", [(b'\x81', b'\xa9')]),
(b"%81=%A9", [(b'\x81', b'\xa9')]),
] ]
# Each parse_qs testcase is a two-tuple that contains # Each parse_qs testcase is a two-tuple that contains
@ -49,6 +65,10 @@ parse_qs_test_cases = [
("=a", {'': ['a']}), ("=a", {'': ['a']}),
("a", {'a': ['']}), ("a", {'a': ['']}),
("a=", {'a': ['']}), ("a=", {'a': ['']}),
("a=b=c", {'a': ['b=c']}),
("a%3Db=c", {'a=b': ['c']}),
("a=b&c=d", {'a': ['b'], 'c': ['d']}),
("a=b%26c=d", {'a': ['b&c=d']}),
("&a=b", {'a': ['b']}), ("&a=b", {'a': ['b']}),
("a=a+b&b=b+c", {'a': ['a b'], 'b': ['b c']}), ("a=a+b&b=b+c", {'a': ['a b'], 'b': ['b c']}),
("a=1&a=2", {'a': ['1', '2']}), ("a=1&a=2", {'a': ['1', '2']}),
@ -59,6 +79,10 @@ parse_qs_test_cases = [
(b"=a", {b'': [b'a']}), (b"=a", {b'': [b'a']}),
(b"a", {b'a': [b'']}), (b"a", {b'a': [b'']}),
(b"a=", {b'a': [b'']}), (b"a=", {b'a': [b'']}),
(b"a=b=c", {b'a': [b'b=c']}),
(b"a%3Db=c", {b'a=b': [b'c']}),
(b"a=b&c=d", {b'a': [b'b'], b'c': [b'd']}),
(b"a=b%26c=d", {b'a': [b'b&c=d']}),
(b"&a=b", {b'a': [b'b']}), (b"&a=b", {b'a': [b'b']}),
(b"a=a+b&b=b+c", {b'a': [b'a b'], b'b': [b'b c']}), (b"a=a+b&b=b+c", {b'a': [b'a b'], b'b': [b'b c']}),
(b"a=1&a=2", {b'a': [b'1', b'2']}), (b"a=1&a=2", {b'a': [b'1', b'2']}),
@ -66,6 +90,15 @@ parse_qs_test_cases = [
("a=a+b;b=b+c", {'a': ['a b;b=b c']}), ("a=a+b;b=b+c", {'a': ['a b;b=b c']}),
(b";a=b", {b';a': [b'b']}), (b";a=b", {b';a': [b'b']}),
(b"a=a+b;b=b+c", {b'a':[ b'a b;b=b c']}), (b"a=a+b;b=b+c", {b'a':[ b'a b;b=b c']}),
(b"a=a%E2%80%99b", {b'a': [b'a\xe2\x80\x99b']}),
("\u0141=\xE9", {'\u0141': ['\xE9']}),
("%C5%81=%C3%A9", {'\u0141': ['\xE9']}),
("%81=%A9", {'\ufffd': ['\ufffd']}),
(b"\xc5\x81=\xc3\xa9", {b'\xc5\x81': [b'\xc3\xa9']}),
(b"%C5%81=%C3%A9", {b'\xc5\x81': [b'\xc3\xa9']}),
(b"\x81=\xA9", {b'\x81': [b'\xa9']}),
(b"%81=%A9", {b'\x81': [b'\xa9']}),
] ]
class UrlParseTestCase(unittest.TestCase): class UrlParseTestCase(unittest.TestCase):
@ -995,8 +1028,8 @@ class UrlParseTestCase(unittest.TestCase):
def test_parse_qsl_max_num_fields(self): def test_parse_qsl_max_num_fields(self):
with self.assertRaises(ValueError): with self.assertRaises(ValueError):
urllib.parse.parse_qs('&'.join(['a=a']*11), max_num_fields=10) urllib.parse.parse_qsl('&'.join(['a=a']*11), max_num_fields=10)
urllib.parse.parse_qs('&'.join(['a=a']*10), max_num_fields=10) urllib.parse.parse_qsl('&'.join(['a=a']*10), max_num_fields=10)
def test_parse_qs_separator(self): def test_parse_qs_separator(self):
parse_qs_semicolon_cases = [ parse_qs_semicolon_cases = [

View File

@ -763,42 +763,44 @@ def parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
Returns a list, as G-d intended. Returns a list, as G-d intended.
""" """
qs, _coerce_result = _coerce_args(qs)
separator, _ = _coerce_args(separator)
if not separator or (not isinstance(separator, (str, bytes))): if not separator or not isinstance(separator, (str, bytes)):
raise ValueError("Separator must be of type string or bytes.") raise ValueError("Separator must be of type string or bytes.")
if isinstance(qs, str):
if not isinstance(separator, str):
separator = str(separator, 'ascii')
eq = '='
def _unquote(s):
return unquote_plus(s, encoding=encoding, errors=errors)
else:
qs = bytes(qs)
if isinstance(separator, str):
separator = bytes(separator, 'ascii')
eq = b'='
def _unquote(s):
return unquote_to_bytes(s.replace(b'+', b' '))
if not qs:
return []
# If max_num_fields is defined then check that the number of fields # If max_num_fields is defined then check that the number of fields
# is less than max_num_fields. This prevents a memory exhaustion DOS # is less than max_num_fields. This prevents a memory exhaustion DOS
# attack via post bodies with many fields. # attack via post bodies with many fields.
if max_num_fields is not None: if max_num_fields is not None:
num_fields = 1 + qs.count(separator) if qs else 0 num_fields = 1 + qs.count(separator)
if max_num_fields < num_fields: if max_num_fields < num_fields:
raise ValueError('Max number of fields exceeded') raise ValueError('Max number of fields exceeded')
r = [] r = []
query_args = qs.split(separator) if qs else [] for name_value in qs.split(separator):
for name_value in query_args: if name_value or strict_parsing:
if not name_value and not strict_parsing: name, has_eq, value = name_value.partition(eq)
continue if not has_eq and strict_parsing:
nv = name_value.split('=', 1)
if len(nv) != 2:
if strict_parsing:
raise ValueError("bad query field: %r" % (name_value,)) raise ValueError("bad query field: %r" % (name_value,))
# Handle case of a control-name with no equal sign if value or keep_blank_values:
if keep_blank_values: name = _unquote(name)
nv.append('') value = _unquote(value)
else: r.append((name, value))
continue
if len(nv[1]) or keep_blank_values:
name = nv[0].replace('+', ' ')
name = unquote(name, encoding=encoding, errors=errors)
name = _coerce_result(name)
value = nv[1].replace('+', ' ')
value = unquote(value, encoding=encoding, errors=errors)
value = _coerce_result(value)
r.append((name, value))
return r return r
def unquote_plus(string, encoding='utf-8', errors='replace'): def unquote_plus(string, encoding='utf-8', errors='replace'):

View File

@ -0,0 +1,3 @@
:mod:`urllib.parse` functions :func:`~urllib.parse.parse_qs` and
:func:`~urllib.parse.parse_qsl` now support bytes arguments containing raw
and percent-encoded non-ASCII data.