gh-99240: Fix double-free bug in Argument Clinic str_converter generated code (GH-99241)

Fix double-free bug mentioned at https://github.com/python/cpython/issues/99240,
by moving memory clean up out of "exit" label.

Automerge-Triggered-By: GH:erlend-aasland
This commit is contained in:
colorfulappl 2022-11-24 22:01:26 +08:00 committed by GitHub
parent 69f6cc77d0
commit 8dbe08eb7c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 201 additions and 25 deletions

View File

@ -1740,29 +1740,18 @@ test_str_converter_encoding(PyObject *module, PyObject *const *args, Py_ssize_t
goto exit;
}
return_value = test_str_converter_encoding_impl(module, a, b, c, d, d_length, e, e_length);
/* Post parse cleanup for a */
PyMem_FREE(a);
/* Post parse cleanup for b */
PyMem_FREE(b);
/* Post parse cleanup for c */
PyMem_FREE(c);
/* Post parse cleanup for d */
PyMem_FREE(d);
/* Post parse cleanup for e */
PyMem_FREE(e);
exit:
/* Cleanup for a */
if (a) {
PyMem_FREE(a);
}
/* Cleanup for b */
if (b) {
PyMem_FREE(b);
}
/* Cleanup for c */
if (c) {
PyMem_FREE(c);
}
/* Cleanup for d */
if (d) {
PyMem_FREE(d);
}
/* Cleanup for e */
if (e) {
PyMem_FREE(e);
}
return return_value;
}
@ -1770,7 +1759,7 @@ static PyObject *
test_str_converter_encoding_impl(PyObject *module, char *a, char *b, char *c,
char *d, Py_ssize_t d_length, char *e,
Py_ssize_t e_length)
/*[clinic end generated code: output=8acb886a3843f3bc input=eb4c38e1f898f402]*/
/*[clinic end generated code: output=999c1deecfa15b0a input=eb4c38e1f898f402]*/
/*[clinic input]

View File

@ -1045,6 +1045,17 @@ class ClinicFunctionalTest(unittest.TestCase):
self.assertEqual(ac_tester.str_converter('a', b'b', b'c'), ('a', 'b', 'c'))
self.assertEqual(ac_tester.str_converter('a', b'b', 'c\0c'), ('a', 'b', 'c\0c'))
def test_str_converter_encoding(self):
with self.assertRaises(TypeError):
ac_tester.str_converter_encoding(1)
self.assertEqual(ac_tester.str_converter_encoding('a', 'b', 'c'), ('a', 'b', 'c'))
with self.assertRaises(TypeError):
ac_tester.str_converter_encoding('a', b'b\0b', 'c')
self.assertEqual(ac_tester.str_converter_encoding('a', b'b', bytearray([ord('c')])), ('a', 'b', 'c'))
self.assertEqual(ac_tester.str_converter_encoding('a', b'b', bytearray([ord('c'), 0, ord('c')])),
('a', 'b', 'c\x00c'))
self.assertEqual(ac_tester.str_converter_encoding('a', b'b', b'c\x00c'), ('a', 'b', 'c\x00c'))
def test_py_buffer_converter(self):
with self.assertRaises(TypeError):
ac_tester.py_buffer_converter('a', 'b')
@ -1225,6 +1236,10 @@ class ClinicFunctionalTest(unittest.TestCase):
arg_refcount_after = sys.getrefcount(arg)
self.assertEqual(arg_refcount_origin, arg_refcount_after)
def test_gh_99240_double_free(self):
expected_error = r'gh_99240_double_free\(\) argument 2 must be encoded string without null bytes, not str'
with self.assertRaisesRegex(TypeError, expected_error):
ac_tester.gh_99240_double_free('a', '\0b')
if __name__ == "__main__":
unittest.main()

View File

@ -0,0 +1,2 @@
Fix double-free bug in Argument Clinic ``str_converter`` by
extracting memory clean up to a new ``post_parsing`` section.

View File

@ -551,6 +551,64 @@ error:
}
/*[clinic input]
str_converter_encoding
a: str(encoding="idna")
b: str(encoding="idna", accept={bytes, bytearray, str})
c: str(encoding="idna", accept={bytes, bytearray, str}, zeroes=True)
/
[clinic start generated code]*/
static PyObject *
str_converter_encoding_impl(PyObject *module, char *a, char *b, char *c,
Py_ssize_t c_length)
/*[clinic end generated code: output=af68766049248a1c input=0c5cf5159d0e870d]*/
{
assert(!PyErr_Occurred());
PyObject *out[3] = {NULL,};
int i = 0;
PyObject *arg;
arg = PyUnicode_FromString(a);
assert(arg || PyErr_Occurred());
if (!arg) {
goto error;
}
out[i++] = arg;
arg = PyUnicode_FromString(b);
assert(arg || PyErr_Occurred());
if (!arg) {
goto error;
}
out[i++] = arg;
arg = PyUnicode_FromStringAndSize(c, c_length);
assert(arg || PyErr_Occurred());
if (!arg) {
goto error;
}
out[i++] = arg;
PyObject *tuple = PyTuple_New(3);
if (!tuple) {
goto error;
}
for (int j = 0; j < 3; j++) {
PyTuple_SET_ITEM(tuple, j, out[j]);
}
return tuple;
error:
for (int j = 0; j < i; j++) {
Py_DECREF(out[j]);
}
return NULL;
}
static PyObject *
bytes_from_buffer(Py_buffer *buf)
{
@ -927,6 +985,25 @@ gh_99233_refcount_impl(PyObject *module, PyObject *args)
}
/*[clinic input]
gh_99240_double_free
a: str(encoding="idna")
b: str(encoding="idna")
/
Proof-of-concept of GH-99240 double-free bug.
[clinic start generated code]*/
static PyObject *
gh_99240_double_free_impl(PyObject *module, char *a, char *b)
/*[clinic end generated code: output=586dc714992fe2ed input=23db44aa91870fc7]*/
{
Py_RETURN_NONE;
}
static PyMethodDef tester_methods[] = {
TEST_EMPTY_FUNCTION_METHODDEF
OBJECTS_CONVERTER_METHODDEF
@ -951,6 +1028,7 @@ static PyMethodDef tester_methods[] = {
DOUBLE_CONVERTER_METHODDEF
PY_COMPLEX_CONVERTER_METHODDEF
STR_CONVERTER_METHODDEF
STR_CONVERTER_ENCODING_METHODDEF
PY_BUFFER_CONVERTER_METHODDEF
KEYWORDS_METHODDEF
KEYWORDS_KWONLY_METHODDEF
@ -970,6 +1048,7 @@ static PyMethodDef tester_methods[] = {
KEYWORD_ONLY_PARAMETER_METHODDEF
VARARG_AND_POSONLY_METHODDEF
GH_99233_REFCOUNT_METHODDEF
GH_99240_DOUBLE_FREE_METHODDEF
{NULL, NULL}
};

View File

@ -1165,6 +1165,43 @@ exit:
return return_value;
}
PyDoc_STRVAR(str_converter_encoding__doc__,
"str_converter_encoding($module, a, b, c, /)\n"
"--\n"
"\n");
#define STR_CONVERTER_ENCODING_METHODDEF \
{"str_converter_encoding", _PyCFunction_CAST(str_converter_encoding), METH_FASTCALL, str_converter_encoding__doc__},
static PyObject *
str_converter_encoding_impl(PyObject *module, char *a, char *b, char *c,
Py_ssize_t c_length);
static PyObject *
str_converter_encoding(PyObject *module, PyObject *const *args, Py_ssize_t nargs)
{
PyObject *return_value = NULL;
char *a = NULL;
char *b = NULL;
char *c = NULL;
Py_ssize_t c_length;
if (!_PyArg_ParseStack(args, nargs, "esetet#:str_converter_encoding",
"idna", &a, "idna", &b, "idna", &c, &c_length)) {
goto exit;
}
return_value = str_converter_encoding_impl(module, a, b, c, c_length);
/* Post parse cleanup for a */
PyMem_FREE(a);
/* Post parse cleanup for b */
PyMem_FREE(b);
/* Post parse cleanup for c */
PyMem_FREE(c);
exit:
return return_value;
}
PyDoc_STRVAR(py_buffer_converter__doc__,
"py_buffer_converter($module, a, b, /)\n"
"--\n"
@ -2353,4 +2390,37 @@ exit:
Py_XDECREF(__clinic_args);
return return_value;
}
/*[clinic end generated code: output=a5c9f181f3a32d85 input=a9049054013a1b77]*/
PyDoc_STRVAR(gh_99240_double_free__doc__,
"gh_99240_double_free($module, a, b, /)\n"
"--\n"
"\n"
"Proof-of-concept of GH-99240 double-free bug.");
#define GH_99240_DOUBLE_FREE_METHODDEF \
{"gh_99240_double_free", _PyCFunction_CAST(gh_99240_double_free), METH_FASTCALL, gh_99240_double_free__doc__},
static PyObject *
gh_99240_double_free_impl(PyObject *module, char *a, char *b);
static PyObject *
gh_99240_double_free(PyObject *module, PyObject *const *args, Py_ssize_t nargs)
{
PyObject *return_value = NULL;
char *a = NULL;
char *b = NULL;
if (!_PyArg_ParseStack(args, nargs, "eses:gh_99240_double_free",
"idna", &a, "idna", &b)) {
goto exit;
}
return_value = gh_99240_double_free_impl(module, a, b);
/* Post parse cleanup for a */
PyMem_FREE(a);
/* Post parse cleanup for b */
PyMem_FREE(b);
exit:
return return_value;
}
/*[clinic end generated code: output=49dced2c99bcd0fb input=a9049054013a1b77]*/

View File

@ -348,6 +348,12 @@ class CRenderData:
# "goto exit" if there are any.
self.return_conversion = []
# The C statements required to do some operations
# after the end of parsing but before cleaning up.
# These operations may be, for example, memory deallocations which
# can only be done without any error happening during argument parsing.
self.post_parsing = []
# The C statements required to clean up after the impl call.
self.cleanup = []
@ -820,6 +826,7 @@ class CLanguage(Language):
{modifications}
{return_value} = {c_basename}_impl({impl_arguments});
{return_conversion}
{post_parsing}
{exit_label}
{cleanup}
@ -1460,6 +1467,7 @@ class CLanguage(Language):
template_dict['impl_parameters'] = ", ".join(data.impl_parameters)
template_dict['impl_arguments'] = ", ".join(data.impl_arguments)
template_dict['return_conversion'] = format_escape("".join(data.return_conversion).rstrip())
template_dict['post_parsing'] = format_escape("".join(data.post_parsing).rstrip())
template_dict['cleanup'] = format_escape("".join(data.cleanup))
template_dict['return_value'] = data.return_value
@ -1484,6 +1492,7 @@ class CLanguage(Language):
return_conversion=template_dict['return_conversion'],
initializers=template_dict['initializers'],
modifications=template_dict['modifications'],
post_parsing=template_dict['post_parsing'],
cleanup=template_dict['cleanup'],
)
@ -2725,6 +2734,10 @@ class CConverter(metaclass=CConverterAutoRegister):
# parse_arguments
self.parse_argument(data.parse_arguments)
# post_parsing
if post_parsing := self.post_parsing():
data.post_parsing.append('/* Post parse cleanup for ' + name + ' */\n' + post_parsing.rstrip() + '\n')
# cleanup
cleanup = self.cleanup()
if cleanup:
@ -2820,6 +2833,14 @@ class CConverter(metaclass=CConverterAutoRegister):
"""
return ""
def post_parsing(self):
"""
The C statements required to do some operations after the end of parsing but before cleaning up.
Return a string containing this code indented at column 0.
If no operation is necessary, return an empty string.
"""
return ""
def cleanup(self):
"""
The C statements required to clean up after this variable.
@ -3416,10 +3437,10 @@ class str_converter(CConverter):
if NoneType in accept and self.c_default == "Py_None":
self.c_default = "NULL"
def cleanup(self):
def post_parsing(self):
if self.encoding:
name = self.name
return "".join(["if (", name, ") {\n PyMem_FREE(", name, ");\n}\n"])
return f"PyMem_FREE({name});\n"
def parse_arg(self, argname, displayname):
if self.format_unit == 's':