From db72e58ea5940c3942ede9f70cb897510b52fc36 Mon Sep 17 00:00:00 2001 From: Ammar Askar Date: Wed, 6 Oct 2021 19:22:09 -0400 Subject: [PATCH] bpo-29505: Add fuzzer for ast.literal_eval (GH-28777) This supercedes https://github.com/python/cpython/pull/3437 and fuzzes the method we recommend for unsafe inputs, `ast.literal_eval`. This should exercise the tokenizer and parser. --- Modules/_xxtestfuzz/fuzz_tests.txt | 1 + Modules/_xxtestfuzz/fuzzer.c | 56 ++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+) diff --git a/Modules/_xxtestfuzz/fuzz_tests.txt b/Modules/_xxtestfuzz/fuzz_tests.txt index 053b77b41b1..4e046ecf6d8 100644 --- a/Modules/_xxtestfuzz/fuzz_tests.txt +++ b/Modules/_xxtestfuzz/fuzz_tests.txt @@ -6,3 +6,4 @@ fuzz_sre_compile fuzz_sre_match fuzz_csv_reader fuzz_struct_unpack +fuzz_ast_literal_eval diff --git a/Modules/_xxtestfuzz/fuzzer.c b/Modules/_xxtestfuzz/fuzzer.c index e1256f59cc9..366e81a5451 100644 --- a/Modules/_xxtestfuzz/fuzzer.c +++ b/Modules/_xxtestfuzz/fuzzer.c @@ -393,6 +393,51 @@ static int fuzz_csv_reader(const char* data, size_t size) { return 0; } +#define MAX_AST_LITERAL_EVAL_TEST_SIZE 0x10000 +PyObject* ast_literal_eval_method = NULL; +/* Called by LLVMFuzzerTestOneInput for initialization */ +static int init_ast_literal_eval(void) { + PyObject* ast_module = PyImport_ImportModule("ast"); + if (ast_module == NULL) { + return 0; + } + ast_literal_eval_method = PyObject_GetAttrString(ast_module, "literal_eval"); + return ast_literal_eval_method != NULL; +} +/* Fuzz ast.literal_eval(x) */ +static int fuzz_ast_literal_eval(const char* data, size_t size) { + if (size > MAX_AST_LITERAL_EVAL_TEST_SIZE) { + return 0; + } + /* Ignore non null-terminated strings since ast can't handle + embedded nulls */ + if (memchr(data, '\0', size) == NULL) { + return 0; + } + + PyObject* s = PyUnicode_FromString(data); + /* Ignore exceptions until we have a valid string */ + if (s == NULL) { + PyErr_Clear(); + return 0; + } + + PyObject* literal = PyObject_CallOneArg(ast_literal_eval_method, s); + /* Ignore some common errors thrown by ast.literal_eval */ + if (literal == NULL && (PyErr_ExceptionMatches(PyExc_ValueError) || + PyErr_ExceptionMatches(PyExc_TypeError) || + PyErr_ExceptionMatches(PyExc_SyntaxError) || + PyErr_ExceptionMatches(PyExc_MemoryError) || + PyErr_ExceptionMatches(PyExc_RecursionError)) + ) { + PyErr_Clear(); + } + + Py_XDECREF(literal); + Py_DECREF(s); + return 0; +} + /* Run fuzzer and abort on failure. */ static int _run_fuzz(const uint8_t *data, size_t size, int(*fuzzer)(const char* , size_t)) { int rv = fuzzer((const char*) data, size); @@ -507,6 +552,17 @@ int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { } rv |= _run_fuzz(data, size, fuzz_csv_reader); +#endif +#if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_ast_literal_eval) + static int AST_LITERAL_EVAL_INITIALIZED = 0; + if (!AST_LITERAL_EVAL_INITIALIZED && !init_ast_literal_eval()) { + PyErr_Print(); + abort(); + } else { + AST_LITERAL_EVAL_INITIALIZED = 1; + } + + rv |= _run_fuzz(data, size, fuzz_ast_literal_eval); #endif return rv; }