Merged revisions 82510 via svnmerge from
svn+ssh://pythondev@svn.python.org/python/branches/py3k ........ r82510 | senthil.kumaran | 2010-07-03 23:18:22 +0530 (Sat, 03 Jul 2010) | 4 lines Fix Issue5468 - urlencode to handle bytes and other alternate encodings. (Extensive tests provided). Patch by Dan Mahn. ........
This commit is contained in:
parent
8e42fb7ada
commit
fe1ad15b4b
|
@ -307,23 +307,29 @@ The :mod:`urllib.parse` module defines the following functions:
|
|||
``b'a&\xef'``.
|
||||
|
||||
|
||||
.. function:: urlencode(query, doseq=False)
|
||||
.. function:: urlencode(query, doseq=False, safe='', encoding=None, errors=None)
|
||||
|
||||
Convert a mapping object or a sequence of two-element tuples to a
|
||||
"url-encoded" string, suitable to pass to :func:`urlopen` above as the
|
||||
optional *data* argument. This is useful to pass a dictionary of form
|
||||
fields to a ``POST`` request. The resulting string is a series of
|
||||
``key=value`` pairs separated by ``'&'`` characters, where both *key* and
|
||||
*value* are quoted using :func:`quote_plus` above. When a sequence of
|
||||
two-element tuples is used as the *query* argument, the first element of
|
||||
each tuple is a key and the second is a value. The value element in itself
|
||||
can be a sequence and in that case, if the optional parameter *doseq* is
|
||||
evaluates to *True*, individual ``key=value`` pairs separated by ``'&'``are
|
||||
generated for each element of the value sequence for the key. The order of
|
||||
parameters in the encoded string will match the order of parameter tuples in
|
||||
the sequence. This module provides the functions :func:`parse_qs` and
|
||||
:func:`parse_qsl` which are used to parse query strings into Python data
|
||||
structures.
|
||||
Convert a mapping object or a sequence of two-element, which may either be a
|
||||
:class:`str` or a :class:`bytes` tuples, to a "url-encoded" string,
|
||||
suitable to pass to :func:`urlopen` above as the optional *data* argument.
|
||||
This is useful to pass a dictionary of form fields to a ``POST`` request.
|
||||
The resulting string is a series of ``key=value`` pairs separated by ``'&'``
|
||||
characters, where both *key* and *value* are quoted using :func:`quote_plus`
|
||||
above. When a sequence of two-element tuples is used as the *query*
|
||||
argument, the first element of each tuple is a key and the second is a
|
||||
value. The value element in itself can be a sequence and in that case, if
|
||||
the optional parameter *doseq* is evaluates to *True*, individual
|
||||
``key=value`` pairs separated by ``'&'`` are generated for each element of
|
||||
the value sequence for the key. The order of parameters in the encoded
|
||||
string will match the order of parameter tuples in the sequence. This module
|
||||
provides the functions :func:`parse_qs` and :func:`parse_qsl` which are used
|
||||
to parse query strings into Python data structures.
|
||||
|
||||
When *query* parameter is a :class:`str`, the *safe*, *encoding* and *error*
|
||||
parameters are sent the :func:`quote_plus` for encoding.
|
||||
|
||||
.. versionchanged:: 3.2
|
||||
query paramater supports bytes and string.
|
||||
|
||||
|
||||
.. seealso::
|
||||
|
|
|
@ -797,6 +797,116 @@ class urlencode_Tests(unittest.TestCase):
|
|||
self.assertEqual("a=a&a=b",
|
||||
urllib.parse.urlencode({"a": {"a": 1, "b": 1}}, True))
|
||||
|
||||
def test_urlencode_encoding(self):
|
||||
# ASCII encoding. Expect %3F with errors="replace'
|
||||
given = (('\u00a0', '\u00c1'),)
|
||||
expect = '%3F=%3F'
|
||||
result = urllib.parse.urlencode(given, encoding="ASCII", errors="replace")
|
||||
self.assertEqual(expect, result)
|
||||
|
||||
# Default is UTF-8 encoding.
|
||||
given = (('\u00a0', '\u00c1'),)
|
||||
expect = '%C2%A0=%C3%81'
|
||||
result = urllib.parse.urlencode(given)
|
||||
self.assertEqual(expect, result)
|
||||
|
||||
# Latin-1 encoding.
|
||||
given = (('\u00a0', '\u00c1'),)
|
||||
expect = '%A0=%C1'
|
||||
result = urllib.parse.urlencode(given, encoding="latin-1")
|
||||
self.assertEqual(expect, result)
|
||||
|
||||
def test_urlencode_encoding_doseq(self):
|
||||
# ASCII Encoding. Expect %3F with errors="replace'
|
||||
given = (('\u00a0', '\u00c1'),)
|
||||
expect = '%3F=%3F'
|
||||
result = urllib.parse.urlencode(given, doseq=True,
|
||||
encoding="ASCII", errors="replace")
|
||||
self.assertEqual(expect, result)
|
||||
|
||||
# ASCII Encoding. On a sequence of values.
|
||||
given = (("\u00a0", (1, "\u00c1")),)
|
||||
expect = '%3F=1&%3F=%3F'
|
||||
result = urllib.parse.urlencode(given, True,
|
||||
encoding="ASCII", errors="replace")
|
||||
self.assertEqual(expect, result)
|
||||
|
||||
# Utf-8
|
||||
given = (("\u00a0", "\u00c1"),)
|
||||
expect = '%C2%A0=%C3%81'
|
||||
result = urllib.parse.urlencode(given, True)
|
||||
self.assertEqual(expect, result)
|
||||
|
||||
given = (("\u00a0", (42, "\u00c1")),)
|
||||
expect = '%C2%A0=42&%C2%A0=%C3%81'
|
||||
result = urllib.parse.urlencode(given, True)
|
||||
self.assertEqual(expect, result)
|
||||
|
||||
# latin-1
|
||||
given = (("\u00a0", "\u00c1"),)
|
||||
expect = '%A0=%C1'
|
||||
result = urllib.parse.urlencode(given, True, encoding="latin-1")
|
||||
self.assertEqual(expect, result)
|
||||
|
||||
given = (("\u00a0", (42, "\u00c1")),)
|
||||
expect = '%A0=42&%A0=%C1'
|
||||
result = urllib.parse.urlencode(given, True, encoding="latin-1")
|
||||
self.assertEqual(expect, result)
|
||||
|
||||
def test_urlencode_bytes(self):
|
||||
given = ((b'\xa0\x24', b'\xc1\x24'),)
|
||||
expect = '%A0%24=%C1%24'
|
||||
result = urllib.parse.urlencode(given)
|
||||
self.assertEqual(expect, result)
|
||||
result = urllib.parse.urlencode(given, True)
|
||||
self.assertEqual(expect, result)
|
||||
|
||||
# Sequence of values
|
||||
given = ((b'\xa0\x24', (42, b'\xc1\x24')),)
|
||||
expect = '%A0%24=42&%A0%24=%C1%24'
|
||||
result = urllib.parse.urlencode(given, True)
|
||||
self.assertEqual(expect, result)
|
||||
|
||||
def test_urlencode_encoding_safe_parameter(self):
|
||||
|
||||
# Send '$' (\x24) as safe character
|
||||
# Default utf-8 encoding
|
||||
|
||||
given = ((b'\xa0\x24', b'\xc1\x24'),)
|
||||
result = urllib.parse.urlencode(given, safe=":$")
|
||||
expect = '%A0$=%C1$'
|
||||
self.assertEqual(expect, result)
|
||||
|
||||
given = ((b'\xa0\x24', b'\xc1\x24'),)
|
||||
result = urllib.parse.urlencode(given, doseq=True, safe=":$")
|
||||
expect = '%A0$=%C1$'
|
||||
self.assertEqual(expect, result)
|
||||
|
||||
# Safe parameter in sequence
|
||||
given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),)
|
||||
expect = '%A0$=%C1$&%A0$=13&%A0$=42'
|
||||
result = urllib.parse.urlencode(given, True, safe=":$")
|
||||
self.assertEqual(expect, result)
|
||||
|
||||
# Test all above in latin-1 encoding
|
||||
|
||||
given = ((b'\xa0\x24', b'\xc1\x24'),)
|
||||
result = urllib.parse.urlencode(given, safe=":$",
|
||||
encoding="latin-1")
|
||||
expect = '%A0$=%C1$'
|
||||
self.assertEqual(expect, result)
|
||||
|
||||
given = ((b'\xa0\x24', b'\xc1\x24'),)
|
||||
expect = '%A0$=%C1$'
|
||||
result = urllib.parse.urlencode(given, doseq=True, safe=":$",
|
||||
encoding="latin-1")
|
||||
|
||||
given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),)
|
||||
expect = '%A0$=%C1$&%A0$=13&%A0$=42'
|
||||
result = urllib.parse.urlencode(given, True, safe=":$",
|
||||
encoding="latin-1")
|
||||
self.assertEqual(expect, result)
|
||||
|
||||
class Pathname_Tests(unittest.TestCase):
|
||||
"""Test pathname2url() and url2pathname()"""
|
||||
|
||||
|
|
|
@ -533,7 +533,7 @@ def quote_from_bytes(bs, safe='/'):
|
|||
_safe_quoters[cachekey] = quoter
|
||||
return ''.join([quoter[char] for char in bs])
|
||||
|
||||
def urlencode(query, doseq=False):
|
||||
def urlencode(query, doseq=False, safe='', encoding=None, errors=None):
|
||||
"""Encode a sequence of two-element tuples or dictionary into a URL query string.
|
||||
|
||||
If any values in the query arg are sequences and doseq is true, each
|
||||
|
@ -542,6 +542,10 @@ def urlencode(query, doseq=False):
|
|||
If the query arg is a sequence of two-element tuples, the order of the
|
||||
parameters in the output will match the order of parameters in the
|
||||
input.
|
||||
|
||||
The query arg may be either a string or a bytes type. When query arg is a
|
||||
string, the safe, encoding and error parameters are sent the quote_plus for
|
||||
encoding.
|
||||
"""
|
||||
|
||||
if hasattr(query, "items"):
|
||||
|
@ -566,14 +570,28 @@ def urlencode(query, doseq=False):
|
|||
l = []
|
||||
if not doseq:
|
||||
for k, v in query:
|
||||
k = quote_plus(str(k))
|
||||
v = quote_plus(str(v))
|
||||
if isinstance(k, bytes):
|
||||
k = quote_plus(k, safe)
|
||||
else:
|
||||
k = quote_plus(str(k), safe, encoding, errors)
|
||||
|
||||
if isinstance(v, bytes):
|
||||
v = quote_plus(v, safe)
|
||||
else:
|
||||
v = quote_plus(str(v), safe, encoding, errors)
|
||||
l.append(k + '=' + v)
|
||||
else:
|
||||
for k, v in query:
|
||||
k = quote_plus(str(k))
|
||||
if isinstance(v, str):
|
||||
v = quote_plus(v)
|
||||
if isinstance(k, bytes):
|
||||
k = quote_plus(k, safe)
|
||||
else:
|
||||
k = quote_plus(str(k), safe, encoding, errors)
|
||||
|
||||
if isinstance(v, bytes):
|
||||
v = quote_plus(v, safe)
|
||||
l.append(k + '=' + v)
|
||||
elif isinstance(v, str):
|
||||
v = quote_plus(v, safe, encoding, errors)
|
||||
l.append(k + '=' + v)
|
||||
else:
|
||||
try:
|
||||
|
@ -581,12 +599,16 @@ def urlencode(query, doseq=False):
|
|||
x = len(v)
|
||||
except TypeError:
|
||||
# not a sequence
|
||||
v = quote_plus(str(v))
|
||||
v = quote_plus(str(v), safe, encoding, errors)
|
||||
l.append(k + '=' + v)
|
||||
else:
|
||||
# loop over the sequence
|
||||
for elt in v:
|
||||
l.append(k + '=' + quote_plus(str(elt)))
|
||||
if isinstance(elt, bytes):
|
||||
elt = quote_plus(elt, safe)
|
||||
else:
|
||||
elt = quote_plus(str(elt), safe, encoding, errors)
|
||||
l.append(k + '=' + elt)
|
||||
return '&'.join(l)
|
||||
|
||||
# Utilities to parse URLs (most of these return None for missing parts):
|
||||
|
|
|
@ -75,6 +75,9 @@ C-API
|
|||
Library
|
||||
-------
|
||||
|
||||
- Issue #5468: urlencode to handle bytes type and other encodings in its query
|
||||
parameter. Patch by Dan Mahn.
|
||||
|
||||
- Issue #7673: Fix security vulnerability (CVE-2010-2089) in the audioop
|
||||
module, ensure that the input string length is a multiple of the frame size
|
||||
|
||||
|
|
Loading…
Reference in New Issue