Merged revisions 82510 via svnmerge from

svn+ssh://pythondev@svn.python.org/python/branches/py3k

........
  r82510 | senthil.kumaran | 2010-07-03 23:18:22 +0530 (Sat, 03 Jul 2010) | 4 lines

  Fix Issue5468 - urlencode to handle bytes and other alternate encodings.
  (Extensive tests provided). Patch by Dan Mahn.
........
This commit is contained in:
Senthil Kumaran 2010-07-03 17:55:41 +00:00
parent 8e42fb7ada
commit fe1ad15b4b
4 changed files with 165 additions and 24 deletions

View File

@ -307,23 +307,29 @@ The :mod:`urllib.parse` module defines the following functions:
``b'a&\xef'``.
.. function:: urlencode(query, doseq=False)
.. function:: urlencode(query, doseq=False, safe='', encoding=None, errors=None)
Convert a mapping object or a sequence of two-element tuples to a
"url-encoded" string, suitable to pass to :func:`urlopen` above as the
optional *data* argument. This is useful to pass a dictionary of form
fields to a ``POST`` request. The resulting string is a series of
``key=value`` pairs separated by ``'&'`` characters, where both *key* and
*value* are quoted using :func:`quote_plus` above. When a sequence of
two-element tuples is used as the *query* argument, the first element of
each tuple is a key and the second is a value. The value element in itself
can be a sequence and in that case, if the optional parameter *doseq* is
evaluates to *True*, individual ``key=value`` pairs separated by ``'&'``are
generated for each element of the value sequence for the key. The order of
parameters in the encoded string will match the order of parameter tuples in
the sequence. This module provides the functions :func:`parse_qs` and
:func:`parse_qsl` which are used to parse query strings into Python data
structures.
Convert a mapping object or a sequence of two-element, which may either be a
:class:`str` or a :class:`bytes` tuples, to a "url-encoded" string,
suitable to pass to :func:`urlopen` above as the optional *data* argument.
This is useful to pass a dictionary of form fields to a ``POST`` request.
The resulting string is a series of ``key=value`` pairs separated by ``'&'``
characters, where both *key* and *value* are quoted using :func:`quote_plus`
above. When a sequence of two-element tuples is used as the *query*
argument, the first element of each tuple is a key and the second is a
value. The value element in itself can be a sequence and in that case, if
the optional parameter *doseq* is evaluates to *True*, individual
``key=value`` pairs separated by ``'&'`` are generated for each element of
the value sequence for the key. The order of parameters in the encoded
string will match the order of parameter tuples in the sequence. This module
provides the functions :func:`parse_qs` and :func:`parse_qsl` which are used
to parse query strings into Python data structures.
When *query* parameter is a :class:`str`, the *safe*, *encoding* and *error*
parameters are sent the :func:`quote_plus` for encoding.
.. versionchanged:: 3.2
query paramater supports bytes and string.
.. seealso::

View File

@ -797,6 +797,116 @@ class urlencode_Tests(unittest.TestCase):
self.assertEqual("a=a&a=b",
urllib.parse.urlencode({"a": {"a": 1, "b": 1}}, True))
def test_urlencode_encoding(self):
# ASCII encoding. Expect %3F with errors="replace'
given = (('\u00a0', '\u00c1'),)
expect = '%3F=%3F'
result = urllib.parse.urlencode(given, encoding="ASCII", errors="replace")
self.assertEqual(expect, result)
# Default is UTF-8 encoding.
given = (('\u00a0', '\u00c1'),)
expect = '%C2%A0=%C3%81'
result = urllib.parse.urlencode(given)
self.assertEqual(expect, result)
# Latin-1 encoding.
given = (('\u00a0', '\u00c1'),)
expect = '%A0=%C1'
result = urllib.parse.urlencode(given, encoding="latin-1")
self.assertEqual(expect, result)
def test_urlencode_encoding_doseq(self):
# ASCII Encoding. Expect %3F with errors="replace'
given = (('\u00a0', '\u00c1'),)
expect = '%3F=%3F'
result = urllib.parse.urlencode(given, doseq=True,
encoding="ASCII", errors="replace")
self.assertEqual(expect, result)
# ASCII Encoding. On a sequence of values.
given = (("\u00a0", (1, "\u00c1")),)
expect = '%3F=1&%3F=%3F'
result = urllib.parse.urlencode(given, True,
encoding="ASCII", errors="replace")
self.assertEqual(expect, result)
# Utf-8
given = (("\u00a0", "\u00c1"),)
expect = '%C2%A0=%C3%81'
result = urllib.parse.urlencode(given, True)
self.assertEqual(expect, result)
given = (("\u00a0", (42, "\u00c1")),)
expect = '%C2%A0=42&%C2%A0=%C3%81'
result = urllib.parse.urlencode(given, True)
self.assertEqual(expect, result)
# latin-1
given = (("\u00a0", "\u00c1"),)
expect = '%A0=%C1'
result = urllib.parse.urlencode(given, True, encoding="latin-1")
self.assertEqual(expect, result)
given = (("\u00a0", (42, "\u00c1")),)
expect = '%A0=42&%A0=%C1'
result = urllib.parse.urlencode(given, True, encoding="latin-1")
self.assertEqual(expect, result)
def test_urlencode_bytes(self):
given = ((b'\xa0\x24', b'\xc1\x24'),)
expect = '%A0%24=%C1%24'
result = urllib.parse.urlencode(given)
self.assertEqual(expect, result)
result = urllib.parse.urlencode(given, True)
self.assertEqual(expect, result)
# Sequence of values
given = ((b'\xa0\x24', (42, b'\xc1\x24')),)
expect = '%A0%24=42&%A0%24=%C1%24'
result = urllib.parse.urlencode(given, True)
self.assertEqual(expect, result)
def test_urlencode_encoding_safe_parameter(self):
# Send '$' (\x24) as safe character
# Default utf-8 encoding
given = ((b'\xa0\x24', b'\xc1\x24'),)
result = urllib.parse.urlencode(given, safe=":$")
expect = '%A0$=%C1$'
self.assertEqual(expect, result)
given = ((b'\xa0\x24', b'\xc1\x24'),)
result = urllib.parse.urlencode(given, doseq=True, safe=":$")
expect = '%A0$=%C1$'
self.assertEqual(expect, result)
# Safe parameter in sequence
given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),)
expect = '%A0$=%C1$&%A0$=13&%A0$=42'
result = urllib.parse.urlencode(given, True, safe=":$")
self.assertEqual(expect, result)
# Test all above in latin-1 encoding
given = ((b'\xa0\x24', b'\xc1\x24'),)
result = urllib.parse.urlencode(given, safe=":$",
encoding="latin-1")
expect = '%A0$=%C1$'
self.assertEqual(expect, result)
given = ((b'\xa0\x24', b'\xc1\x24'),)
expect = '%A0$=%C1$'
result = urllib.parse.urlencode(given, doseq=True, safe=":$",
encoding="latin-1")
given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),)
expect = '%A0$=%C1$&%A0$=13&%A0$=42'
result = urllib.parse.urlencode(given, True, safe=":$",
encoding="latin-1")
self.assertEqual(expect, result)
class Pathname_Tests(unittest.TestCase):
"""Test pathname2url() and url2pathname()"""

View File

@ -533,7 +533,7 @@ def quote_from_bytes(bs, safe='/'):
_safe_quoters[cachekey] = quoter
return ''.join([quoter[char] for char in bs])
def urlencode(query, doseq=False):
def urlencode(query, doseq=False, safe='', encoding=None, errors=None):
"""Encode a sequence of two-element tuples or dictionary into a URL query string.
If any values in the query arg are sequences and doseq is true, each
@ -542,6 +542,10 @@ def urlencode(query, doseq=False):
If the query arg is a sequence of two-element tuples, the order of the
parameters in the output will match the order of parameters in the
input.
The query arg may be either a string or a bytes type. When query arg is a
string, the safe, encoding and error parameters are sent the quote_plus for
encoding.
"""
if hasattr(query, "items"):
@ -566,14 +570,28 @@ def urlencode(query, doseq=False):
l = []
if not doseq:
for k, v in query:
k = quote_plus(str(k))
v = quote_plus(str(v))
if isinstance(k, bytes):
k = quote_plus(k, safe)
else:
k = quote_plus(str(k), safe, encoding, errors)
if isinstance(v, bytes):
v = quote_plus(v, safe)
else:
v = quote_plus(str(v), safe, encoding, errors)
l.append(k + '=' + v)
else:
for k, v in query:
k = quote_plus(str(k))
if isinstance(v, str):
v = quote_plus(v)
if isinstance(k, bytes):
k = quote_plus(k, safe)
else:
k = quote_plus(str(k), safe, encoding, errors)
if isinstance(v, bytes):
v = quote_plus(v, safe)
l.append(k + '=' + v)
elif isinstance(v, str):
v = quote_plus(v, safe, encoding, errors)
l.append(k + '=' + v)
else:
try:
@ -581,12 +599,16 @@ def urlencode(query, doseq=False):
x = len(v)
except TypeError:
# not a sequence
v = quote_plus(str(v))
v = quote_plus(str(v), safe, encoding, errors)
l.append(k + '=' + v)
else:
# loop over the sequence
for elt in v:
l.append(k + '=' + quote_plus(str(elt)))
if isinstance(elt, bytes):
elt = quote_plus(elt, safe)
else:
elt = quote_plus(str(elt), safe, encoding, errors)
l.append(k + '=' + elt)
return '&'.join(l)
# Utilities to parse URLs (most of these return None for missing parts):

View File

@ -75,6 +75,9 @@ C-API
Library
-------
- Issue #5468: urlencode to handle bytes type and other encodings in its query
parameter. Patch by Dan Mahn.
- Issue #7673: Fix security vulnerability (CVE-2010-2089) in the audioop
module, ensure that the input string length is a multiple of the frame size