From e8f968dcde520160bea7c98d298e58128f9abaa4 Mon Sep 17 00:00:00 2001 From: "Miss Islington (bot)" <31488909+miss-islington@users.noreply.github.com> Date: Mon, 14 May 2018 15:20:06 -0700 Subject: [PATCH] bpo-33497: Add errors param to cgi.parse_multipart and make an encoding in FieldStorage use the given errors (GH-6804) (GH-6837) (cherry picked from commit 545c955be997efd6b3827b981024e6b9945d82d1) Co-authored-by: Amber Brown --- Doc/library/cgi.rst | 6 +++--- Lib/cgi.py | 10 ++++++---- Lib/test/test_cgi.py | 18 ++++++++++++++++++ 3 files changed, 27 insertions(+), 7 deletions(-) diff --git a/Doc/library/cgi.rst b/Doc/library/cgi.rst index 17386b83183..0b1aead9ddf 100644 --- a/Doc/library/cgi.rst +++ b/Doc/library/cgi.rst @@ -296,7 +296,7 @@ algorithms implemented in this module in other circumstances. instead. It is maintained here only for backward compatibility. -.. function:: parse_multipart(fp, pdict, encoding="utf-8") +.. function:: parse_multipart(fp, pdict, encoding="utf-8", errors="replace") Parse input of type :mimetype:`multipart/form-data` (for file uploads). Arguments are *fp* for the input file, *pdict* for a dictionary containing @@ -312,8 +312,8 @@ algorithms implemented in this module in other circumstances. which is much more flexible. .. versionchanged:: 3.7 - Added the *encoding* parameter. For non-file fields, the value is now - a list of strings, not bytes. + Added the *encoding* and *errors* parameters. For non-file fields, the + value is now a list of strings, not bytes. .. function:: parse_header(string) diff --git a/Lib/cgi.py b/Lib/cgi.py index f5e85aa263b..f82cc6c8bd5 100755 --- a/Lib/cgi.py +++ b/Lib/cgi.py @@ -198,13 +198,14 @@ def parse_qsl(qs, keep_blank_values=0, strict_parsing=0): DeprecationWarning, 2) return urllib.parse.parse_qsl(qs, keep_blank_values, strict_parsing) -def parse_multipart(fp, pdict, encoding="utf-8"): +def parse_multipart(fp, pdict, encoding="utf-8", errors="replace"): """Parse multipart input. Arguments: fp : input file pdict: dictionary containing other parameters of content-type header - encoding: request encoding + encoding, errors: request encoding and error handler, passed to + FieldStorage Returns a dictionary just like parse_qs(): keys are the field names, each value is a list of values for that field. For non-file fields, the value @@ -217,7 +218,7 @@ def parse_multipart(fp, pdict, encoding="utf-8"): headers = Message() headers.set_type(ctype) headers['Content-Length'] = pdict['CONTENT-LENGTH'] - fs = FieldStorage(fp, headers=headers, encoding=encoding, + fs = FieldStorage(fp, headers=headers, encoding=encoding, errors=errors, environ={'REQUEST_METHOD': 'POST'}) return {k: fs.getlist(k) for k in fs} @@ -458,7 +459,8 @@ class FieldStorage: self.type = ctype self.type_options = pdict if 'boundary' in pdict: - self.innerboundary = pdict['boundary'].encode(self.encoding) + self.innerboundary = pdict['boundary'].encode(self.encoding, + self.errors) else: self.innerboundary = b"" diff --git a/Lib/test/test_cgi.py b/Lib/test/test_cgi.py index 903d0731f97..4f2bba14a1b 100644 --- a/Lib/test/test_cgi.py +++ b/Lib/test/test_cgi.py @@ -130,6 +130,24 @@ class CgiTests(unittest.TestCase): 'file': [b'Testing 123.\n'], 'title': ['']} self.assertEqual(result, expected) + def test_parse_multipart_invalid_encoding(self): + BOUNDARY = "JfISa01" + POSTDATA = """--JfISa01 +Content-Disposition: form-data; name="submit-name" +Content-Length: 3 + +\u2603 +--JfISa01""" + fp = BytesIO(POSTDATA.encode('utf8')) + env = {'boundary': BOUNDARY.encode('latin1'), + 'CONTENT-LENGTH': str(len(POSTDATA.encode('utf8')))} + result = cgi.parse_multipart(fp, env, encoding="ascii", + errors="surrogateescape") + expected = {'submit-name': ["\udce2\udc98\udc83"]} + self.assertEqual(result, expected) + self.assertEqual("\u2603".encode('utf8'), + result["submit-name"][0].encode('utf8', 'surrogateescape')) + def test_fieldstorage_properties(self): fs = cgi.FieldStorage() self.assertFalse(fs)