From f83fa0b9eb1c11a4ef53275d77c603b97335eb23 Mon Sep 17 00:00:00 2001 From: Zackery Spytz Date: Wed, 11 Oct 2023 04:44:06 -0700 Subject: [PATCH] gh-84489: Properly handle trailing spaces in Py_BuildValue() format strings (GH-21158) The docs state that the space, tab, colon, and comma characters are ignored in Py_BuildValue() format strings. Co-authored-by: Serhiy Storchaka --- Lib/test/test_capi/test_misc.py | 24 ++++++--- .../2020-06-25-09-44-59.bpo-40309.CuoGoQ.rst | 1 + Python/modsupport.c | 50 +++++++++---------- 3 files changed, 40 insertions(+), 35 deletions(-) create mode 100644 Misc/NEWS.d/next/C API/2020-06-25-09-44-59.bpo-40309.CuoGoQ.rst diff --git a/Lib/test/test_capi/test_misc.py b/Lib/test/test_capi/test_misc.py index ef790c2a531..b3f32d860ee 100644 --- a/Lib/test/test_capi/test_misc.py +++ b/Lib/test/test_capi/test_misc.py @@ -319,6 +319,7 @@ class CAPITest(unittest.TestCase): self.assertEqual(buildvalue('((OO))', 1, 2), ((1, 2),)) self.assertEqual(buildvalue(' \t,:'), None) + self.assertEqual(buildvalue('O,', 1), 1) self.assertEqual(buildvalue(' O ', 1), 1) self.assertEqual(buildvalue('\tO\t', 1), 1) self.assertEqual(buildvalue('O,O', 1, 2), (1, 2)) @@ -327,17 +328,18 @@ class CAPITest(unittest.TestCase): self.assertEqual(buildvalue('O O', 1, 2), (1, 2)) self.assertEqual(buildvalue('O\tO', 1, 2), (1, 2)) self.assertEqual(buildvalue('(O,O)', 1, 2), (1, 2)) - self.assertEqual(buildvalue('(O, O)', 1, 2), (1, 2)) - self.assertEqual(buildvalue(' ( O O) ', 1, 2), (1, 2)) - self.assertEqual(buildvalue('\t(\tO\tO)\t', 1, 2), (1, 2)) + self.assertEqual(buildvalue('(O, O,)', 1, 2), (1, 2)) + self.assertEqual(buildvalue(' ( O O ) ', 1, 2), (1, 2)) + self.assertEqual(buildvalue('\t(\tO\tO\t)\t', 1, 2), (1, 2)) self.assertEqual(buildvalue('[O,O]', 1, 2), [1, 2]) - self.assertEqual(buildvalue('[O, O]', 1, 2), [1, 2]) - self.assertEqual(buildvalue(' [ O O] ', 1, 2), [1, 2]) + self.assertEqual(buildvalue('[O, O,]', 1, 2), [1, 2]) + self.assertEqual(buildvalue(' [ O O ] ', 1, 2), [1, 2]) + self.assertEqual(buildvalue(' [\tO\tO\t] ', 1, 2), [1, 2]) self.assertEqual(buildvalue('{O:O}', 1, 2), {1: 2}) self.assertEqual(buildvalue('{O:O,O:O}', 1, 2, 3, 4), {1: 2, 3: 4}) - self.assertEqual(buildvalue('{O: O, O: O}', 1, 2, 3, 4), {1: 2, 3: 4}) - self.assertEqual(buildvalue(' { O O O O} ', 1, 2, 3, 4), {1: 2, 3: 4}) - self.assertEqual(buildvalue('\t{\tO\tO\tO\tO}\t', 1, 2, 3, 4), {1: 2, 3: 4}) + self.assertEqual(buildvalue('{O: O, O: O,}', 1, 2, 3, 4), {1: 2, 3: 4}) + self.assertEqual(buildvalue(' { O O O O } ', 1, 2, 3, 4), {1: 2, 3: 4}) + self.assertEqual(buildvalue('\t{\tO\tO\tO\tO\t}\t', 1, 2, 3, 4), {1: 2, 3: 4}) self.assertRaises(SystemError, buildvalue, 'O', NULL) self.assertRaises(SystemError, buildvalue, '(O)', NULL) @@ -378,6 +380,12 @@ class CAPITest(unittest.TestCase): self.assertEqual(buildvalue('C', sys.maxunicode), chr(sys.maxunicode)) self.assertRaises(ValueError, buildvalue, 'C', -1) self.assertRaises(ValueError, buildvalue, 'C', sys.maxunicode+1) + + # gh-84489 + self.assertRaises(ValueError, buildvalue, '(C )i', -1, 2) + self.assertRaises(ValueError, buildvalue, '[C ]i', -1, 2) + self.assertRaises(ValueError, buildvalue, '{Ci }i', -1, 2, 3) + def test_buildvalue_N(self): _testcapi.test_buildvalue_N() diff --git a/Misc/NEWS.d/next/C API/2020-06-25-09-44-59.bpo-40309.CuoGoQ.rst b/Misc/NEWS.d/next/C API/2020-06-25-09-44-59.bpo-40309.CuoGoQ.rst new file mode 100644 index 00000000000..4d420f85343 --- /dev/null +++ b/Misc/NEWS.d/next/C API/2020-06-25-09-44-59.bpo-40309.CuoGoQ.rst @@ -0,0 +1 @@ +Properly handle trailing spaces before closing parenthesis in :c:func:`Py_BuildValue` format strings. diff --git a/Python/modsupport.c b/Python/modsupport.c index 18b3322ae81..e9abf304e65 100644 --- a/Python/modsupport.c +++ b/Python/modsupport.c @@ -88,6 +88,24 @@ static PyObject *do_mklist(const char**, va_list *, char, Py_ssize_t); static PyObject *do_mkdict(const char**, va_list *, char, Py_ssize_t); static PyObject *do_mkvalue(const char**, va_list *); +static int +check_end(const char **p_format, char endchar) +{ + const char *f = *p_format; + while (*f != endchar) { + if (*f != ' ' && *f != '\t' && *f != ',' && *f != ':') { + PyErr_SetString(PyExc_SystemError, + "Unmatched paren in format"); + return 0; + } + f++; + } + if (endchar) { + f++; + } + *p_format = f; + return 1; +} static void do_ignore(const char **p_format, va_list *p_va, char endchar, Py_ssize_t n) @@ -108,14 +126,9 @@ do_ignore(const char **p_format, va_list *p_va, char endchar, Py_ssize_t n) } } Py_XDECREF(v); - if (**p_format != endchar) { - PyErr_SetString(PyExc_SystemError, - "Unmatched paren in format"); + if (!check_end(p_format, endchar)) { return; } - if (endchar) { - ++*p_format; - } } static PyObject * @@ -157,14 +170,10 @@ do_mkdict(const char **p_format, va_list *p_va, char endchar, Py_ssize_t n) Py_DECREF(k); Py_DECREF(v); } - if (**p_format != endchar) { + if (!check_end(p_format, endchar)) { Py_DECREF(d); - PyErr_SetString(PyExc_SystemError, - "Unmatched paren in format"); return NULL; } - if (endchar) - ++*p_format; return d; } @@ -191,14 +200,10 @@ do_mklist(const char **p_format, va_list *p_va, char endchar, Py_ssize_t n) } PyList_SET_ITEM(v, i, w); } - if (**p_format != endchar) { + if (!check_end(p_format, endchar)) { Py_DECREF(v); - PyErr_SetString(PyExc_SystemError, - "Unmatched paren in format"); return NULL; } - if (endchar) - ++*p_format; return v; } @@ -221,14 +226,9 @@ do_mkstack(PyObject **stack, const char **p_format, va_list *p_va, } stack[i] = w; } - if (**p_format != endchar) { - PyErr_SetString(PyExc_SystemError, - "Unmatched paren in format"); + if (!check_end(p_format, endchar)) { goto error; } - if (endchar) { - ++*p_format; - } return 0; error: @@ -261,14 +261,10 @@ do_mktuple(const char **p_format, va_list *p_va, char endchar, Py_ssize_t n) } PyTuple_SET_ITEM(v, i, w); } - if (**p_format != endchar) { + if (!check_end(p_format, endchar)) { Py_DECREF(v); - PyErr_SetString(PyExc_SystemError, - "Unmatched paren in format"); return NULL; } - if (endchar) - ++*p_format; return v; }