bpo-29979: Rewrite cgi.parse_multipart to make it consistent with FieldStorage (#991)
This commit is contained in:
parent
f34c685020
commit
cc3fa204d3
|
@ -294,19 +294,20 @@ algorithms implemented in this module in other circumstances.
|
|||
This function is deprecated in this module. Use :func:`urllib.parse.parse_qsl`
|
||||
instead. It is maintained here only for backward compatibility.
|
||||
|
||||
.. function:: parse_multipart(fp, pdict)
|
||||
.. function:: parse_multipart(fp, pdict, encoding="utf-8")
|
||||
|
||||
Parse input of type :mimetype:`multipart/form-data` (for file uploads).
|
||||
Arguments are *fp* for the input file and *pdict* for a dictionary containing
|
||||
other parameters in the :mailheader:`Content-Type` header.
|
||||
Arguments are *fp* for the input file, *pdict* for a dictionary containing
|
||||
other parameters in the :mailheader:`Content-Type` header, and *encoding*,
|
||||
the request encoding.
|
||||
|
||||
Returns a dictionary just like :func:`urllib.parse.parse_qs` keys are the field names, each
|
||||
value is a list of values for that field. This is easy to use but not much good
|
||||
if you are expecting megabytes to be uploaded --- in that case, use the
|
||||
:class:`FieldStorage` class instead which is much more flexible.
|
||||
Returns a dictionary just like :func:`urllib.parse.parse_qs`: keys are the
|
||||
field names, each value is a list of values for that field. For non-file
|
||||
fields, the value is a list of strings.
|
||||
|
||||
Note that this does not parse nested multipart parts --- use
|
||||
:class:`FieldStorage` for that.
|
||||
This is easy to use but not much good if you are expecting megabytes to be
|
||||
uploaded --- in that case, use the :class:`FieldStorage` class instead
|
||||
which is much more flexible.
|
||||
|
||||
|
||||
.. function:: parse_header(string)
|
||||
|
|
|
@ -95,6 +95,14 @@ New Modules
|
|||
Improved Modules
|
||||
================
|
||||
|
||||
cgi
|
||||
---
|
||||
|
||||
:func:`~cgi.parse_multipart` returns the same results as
|
||||
:class:`~FieldStorage` : for non-file fields, the value associated to a key
|
||||
is a list of strings, not bytes.
|
||||
(Contributed by Pierre Quentel in :issue:`29979`.)
|
||||
|
||||
binascii
|
||||
--------
|
||||
|
||||
|
|
105
Lib/cgi.py
105
Lib/cgi.py
|
@ -198,105 +198,28 @@ def parse_qsl(qs, keep_blank_values=0, strict_parsing=0):
|
|||
DeprecationWarning, 2)
|
||||
return urllib.parse.parse_qsl(qs, keep_blank_values, strict_parsing)
|
||||
|
||||
def parse_multipart(fp, pdict):
|
||||
def parse_multipart(fp, pdict, encoding="utf-8"):
|
||||
"""Parse multipart input.
|
||||
|
||||
Arguments:
|
||||
fp : input file
|
||||
pdict: dictionary containing other parameters of content-type header
|
||||
encoding: request encoding
|
||||
|
||||
Returns a dictionary just like parse_qs(): keys are the field names, each
|
||||
value is a list of values for that field. This is easy to use but not
|
||||
much good if you are expecting megabytes to be uploaded -- in that case,
|
||||
use the FieldStorage class instead which is much more flexible. Note
|
||||
that content-type is the raw, unparsed contents of the content-type
|
||||
header.
|
||||
|
||||
XXX This does not parse nested multipart parts -- use FieldStorage for
|
||||
that.
|
||||
|
||||
XXX This should really be subsumed by FieldStorage altogether -- no
|
||||
point in having two implementations of the same parsing algorithm.
|
||||
Also, FieldStorage protects itself better against certain DoS attacks
|
||||
by limiting the size of the data read in one chunk. The API here
|
||||
does not support that kind of protection. This also affects parse()
|
||||
since it can call parse_multipart().
|
||||
|
||||
value is a list of values for that field. For non-file fields, the value
|
||||
is a list of strings.
|
||||
"""
|
||||
import http.client
|
||||
|
||||
boundary = b""
|
||||
if 'boundary' in pdict:
|
||||
boundary = pdict['boundary']
|
||||
if not valid_boundary(boundary):
|
||||
raise ValueError('Invalid boundary in multipart form: %r'
|
||||
% (boundary,))
|
||||
|
||||
nextpart = b"--" + boundary
|
||||
lastpart = b"--" + boundary + b"--"
|
||||
partdict = {}
|
||||
terminator = b""
|
||||
|
||||
while terminator != lastpart:
|
||||
bytes = -1
|
||||
data = None
|
||||
if terminator:
|
||||
# At start of next part. Read headers first.
|
||||
headers = http.client.parse_headers(fp)
|
||||
clength = headers.get('content-length')
|
||||
if clength:
|
||||
try:
|
||||
bytes = int(clength)
|
||||
except ValueError:
|
||||
pass
|
||||
if bytes > 0:
|
||||
if maxlen and bytes > maxlen:
|
||||
raise ValueError('Maximum content length exceeded')
|
||||
data = fp.read(bytes)
|
||||
else:
|
||||
data = b""
|
||||
# Read lines until end of part.
|
||||
lines = []
|
||||
while 1:
|
||||
line = fp.readline()
|
||||
if not line:
|
||||
terminator = lastpart # End outer loop
|
||||
break
|
||||
if line.startswith(b"--"):
|
||||
terminator = line.rstrip()
|
||||
if terminator in (nextpart, lastpart):
|
||||
break
|
||||
lines.append(line)
|
||||
# Done with part.
|
||||
if data is None:
|
||||
continue
|
||||
if bytes < 0:
|
||||
if lines:
|
||||
# Strip final line terminator
|
||||
line = lines[-1]
|
||||
if line[-2:] == b"\r\n":
|
||||
line = line[:-2]
|
||||
elif line[-1:] == b"\n":
|
||||
line = line[:-1]
|
||||
lines[-1] = line
|
||||
data = b"".join(lines)
|
||||
line = headers['content-disposition']
|
||||
if not line:
|
||||
continue
|
||||
key, params = parse_header(line)
|
||||
if key != 'form-data':
|
||||
continue
|
||||
if 'name' in params:
|
||||
name = params['name']
|
||||
else:
|
||||
continue
|
||||
if name in partdict:
|
||||
partdict[name].append(data)
|
||||
else:
|
||||
partdict[name] = [data]
|
||||
|
||||
return partdict
|
||||
|
||||
# RFC 2026, Section 5.1 : The "multipart" boundary delimiters are always
|
||||
# represented as 7bit US-ASCII.
|
||||
boundary = pdict['boundary'].decode('ascii')
|
||||
ctype = "multipart/form-data; boundary={}".format(boundary)
|
||||
headers = Message()
|
||||
headers.set_type(ctype)
|
||||
headers['Content-Length'] = pdict['CONTENT-LENGTH']
|
||||
fs = FieldStorage(fp, headers=headers, encoding=encoding,
|
||||
environ={'REQUEST_METHOD': 'POST'})
|
||||
return {k: fs.getlist(k) for k in fs}
|
||||
|
||||
def _parseparam(s):
|
||||
while s[:1] == ';':
|
||||
|
|
|
@ -126,8 +126,8 @@ class CgiTests(unittest.TestCase):
|
|||
env = {'boundary': BOUNDARY.encode('latin1'),
|
||||
'CONTENT-LENGTH': '558'}
|
||||
result = cgi.parse_multipart(fp, env)
|
||||
expected = {'submit': [b' Add '], 'id': [b'1234'],
|
||||
'file': [b'Testing 123.\n'], 'title': [b'']}
|
||||
expected = {'submit': [' Add '], 'id': ['1234'],
|
||||
'file': [b'Testing 123.\n'], 'title': ['']}
|
||||
self.assertEqual(result, expected)
|
||||
|
||||
def test_fieldstorage_properties(self):
|
||||
|
|
|
@ -317,6 +317,10 @@ Extension Modules
|
|||
Library
|
||||
-------
|
||||
|
||||
- bpo-29979: rewrite cgi.parse_multipart, reusing the FieldStorage class and
|
||||
making its results consistent with those of FieldStorage for
|
||||
multipart/form-data requests. Patch by Pierre Quentel.
|
||||
|
||||
- bpo-30243: Removed the __init__ methods of _json's scanner and encoder.
|
||||
Misusing them could cause memory leaks or crashes. Now scanner and encoder
|
||||
objects are completely initialized in the __new__ methods.
|
||||
|
|
Loading…
Reference in New Issue