diff --git a/Lib/test/test_xxtestfuzz.py b/Lib/test/test_xxtestfuzz.py
index 532f5fe72aa..15924aaeff3 100644
--- a/Lib/test/test_xxtestfuzz.py
+++ b/Lib/test/test_xxtestfuzz.py
@@ -16,6 +16,8 @@ class TestFuzzer(unittest.TestCase):
_xxtestfuzz.run(b" ")
_xxtestfuzz.run(b"x")
_xxtestfuzz.run(b"1")
+ _xxtestfuzz.run(b"AAAAAAA")
+ _xxtestfuzz.run(b"AAAAAA\0")
if __name__ == "__main__":
diff --git a/Modules/_xxtestfuzz/dictionaries/fuzz_sre_compile.dict b/Modules/_xxtestfuzz/dictionaries/fuzz_sre_compile.dict
new file mode 100644
index 00000000000..961306a8790
--- /dev/null
+++ b/Modules/_xxtestfuzz/dictionaries/fuzz_sre_compile.dict
@@ -0,0 +1,219 @@
+"?"
+"abc"
+"()"
+"[]"
+"abc|def"
+"abc|def|ghi"
+"^xxx$"
+"ab\\b\\d\\bcd"
+"\\w|\\d"
+"a*?"
+"abc+"
+"abc+?"
+"xyz?"
+"xyz??"
+"xyz{0,1}"
+"xyz{0,1}?"
+"xyz{93}"
+"xyz{1,32}"
+"xyz{1,32}?"
+"xyz{1,}"
+"xyz{1,}?"
+"a\\fb\\nc\\rd\\te\\vf"
+"a\\nb\\bc"
+"(?:foo)"
+"(?: foo )"
+"foo|(bar|baz)|quux"
+"foo(?=bar)baz"
+"foo(?!bar)baz"
+"foo(?<=bar)baz"
+"foo(?)"
+"(?.)"
+"(?.)\\k"
diff --git a/Modules/_xxtestfuzz/fuzz_csv_reader_corpus/test.csv b/Modules/_xxtestfuzz/fuzz_csv_reader_corpus/test.csv
new file mode 100644
index 00000000000..8b7887d0f1d
Binary files /dev/null and b/Modules/_xxtestfuzz/fuzz_csv_reader_corpus/test.csv differ
diff --git a/Modules/_xxtestfuzz/fuzz_sre_compile_corpus/anchor_links b/Modules/_xxtestfuzz/fuzz_sre_compile_corpus/anchor_links
new file mode 100644
index 00000000000..d99247ccadf
--- /dev/null
+++ b/Modules/_xxtestfuzz/fuzz_sre_compile_corpus/anchor_links
@@ -0,0 +1 @@
+XX]
diff --git a/Modules/_xxtestfuzz/fuzz_sre_compile_corpus/characters b/Modules/_xxtestfuzz/fuzz_sre_compile_corpus/characters
new file mode 100644
index 00000000000..0c67ee7dfc1
--- /dev/null
+++ b/Modules/_xxtestfuzz/fuzz_sre_compile_corpus/characters
@@ -0,0 +1 @@
+XX^(Tim|Robert)\s+the\s+(Enchanter|Shrubber)$
diff --git a/Modules/_xxtestfuzz/fuzz_sre_compile_corpus/isbn b/Modules/_xxtestfuzz/fuzz_sre_compile_corpus/isbn
new file mode 100644
index 00000000000..cce8919e728
--- /dev/null
+++ b/Modules/_xxtestfuzz/fuzz_sre_compile_corpus/isbn
@@ -0,0 +1 @@
+XX/((978[\--– ])?[0-9][0-9\--– ]{10}[\--– ][0-9xX])|((978)?[0-9]{9}[0-9Xx])/
diff --git a/Modules/_xxtestfuzz/fuzz_sre_compile_corpus/phone_number b/Modules/_xxtestfuzz/fuzz_sre_compile_corpus/phone_number
new file mode 100644
index 00000000000..1e2efc51103
--- /dev/null
+++ b/Modules/_xxtestfuzz/fuzz_sre_compile_corpus/phone_number
@@ -0,0 +1 @@
+XX(\+1|1)?[ \-\.]?\(?(?[0-9]{3})\)?[ \-\.]?(?[0-9]{3})[ \-\.]?(?[0-9]{4})[ \.]*(ext|x)?[ \.]*(?[0-9]{0,5})
diff --git a/Modules/_xxtestfuzz/fuzz_tests.txt b/Modules/_xxtestfuzz/fuzz_tests.txt
index f0121291eaa..9d330a668ee 100644
--- a/Modules/_xxtestfuzz/fuzz_tests.txt
+++ b/Modules/_xxtestfuzz/fuzz_tests.txt
@@ -2,3 +2,6 @@ fuzz_builtin_float
fuzz_builtin_int
fuzz_builtin_unicode
fuzz_json_loads
+fuzz_sre_compile
+fuzz_sre_match
+fuzz_csv_reader
diff --git a/Modules/_xxtestfuzz/fuzzer.c b/Modules/_xxtestfuzz/fuzzer.c
index e862a99cfb3..16104e492ab 100644
--- a/Modules/_xxtestfuzz/fuzzer.c
+++ b/Modules/_xxtestfuzz/fuzzer.c
@@ -81,8 +81,17 @@ static int fuzz_builtin_unicode(const char* data, size_t size) {
#define MAX_JSON_TEST_SIZE 0x10000
-/* Initialized in LLVMFuzzerTestOneInput */
PyObject* json_loads_method = NULL;
+/* Called by LLVMFuzzerTestOneInput for initialization */
+static int init_json_loads() {
+ /* Import json.loads */
+ PyObject* json_module = PyImport_ImportModule("json");
+ if (json_module == NULL) {
+ return 0;
+ }
+ json_loads_method = PyObject_GetAttrString(json_module, "loads");
+ return json_loads_method != NULL;
+}
/* Fuzz json.loads(x) */
static int fuzz_json_loads(const char* data, size_t size) {
/* Since python supports arbitrarily large ints in JSON,
@@ -96,25 +105,230 @@ static int fuzz_json_loads(const char* data, size_t size) {
return 0;
}
PyObject* parsed = PyObject_CallFunctionObjArgs(json_loads_method, input_bytes, NULL);
- /* Ignore ValueError as the fuzzer will more than likely
- generate some invalid json and values */
- if (parsed == NULL && PyErr_ExceptionMatches(PyExc_ValueError)) {
- PyErr_Clear();
- }
- /* Ignore RecursionError as the fuzzer generates long sequences of
- arrays such as `[[[...` */
- if (parsed == NULL && PyErr_ExceptionMatches(PyExc_RecursionError)) {
- PyErr_Clear();
- }
- /* Ignore unicode errors, invalid byte sequences are common */
- if (parsed == NULL && PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {
- PyErr_Clear();
+ if (parsed == NULL) {
+ /* Ignore ValueError as the fuzzer will more than likely
+ generate some invalid json and values */
+ if (PyErr_ExceptionMatches(PyExc_ValueError) ||
+ /* Ignore RecursionError as the fuzzer generates long sequences of
+ arrays such as `[[[...` */
+ PyErr_ExceptionMatches(PyExc_RecursionError) ||
+ /* Ignore unicode errors, invalid byte sequences are common */
+ PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)
+ ) {
+ PyErr_Clear();
+ }
}
Py_DECREF(input_bytes);
Py_XDECREF(parsed);
return 0;
}
+#define MAX_RE_TEST_SIZE 0x10000
+
+PyObject* sre_compile_method = NULL;
+PyObject* sre_error_exception = NULL;
+int SRE_FLAG_DEBUG = 0;
+/* Called by LLVMFuzzerTestOneInput for initialization */
+static int init_sre_compile() {
+ /* Import sre_compile.compile and sre.error */
+ PyObject* sre_compile_module = PyImport_ImportModule("sre_compile");
+ if (sre_compile_module == NULL) {
+ return 0;
+ }
+ sre_compile_method = PyObject_GetAttrString(sre_compile_module, "compile");
+ if (sre_compile_method == NULL) {
+ return 0;
+ }
+
+ PyObject* sre_constants = PyImport_ImportModule("sre_constants");
+ if (sre_constants == NULL) {
+ return 0;
+ }
+ sre_error_exception = PyObject_GetAttrString(sre_constants, "error");
+ if (sre_error_exception == NULL) {
+ return 0;
+ }
+ PyObject* debug_flag = PyObject_GetAttrString(sre_constants, "SRE_FLAG_DEBUG");
+ if (debug_flag == NULL) {
+ return 0;
+ }
+ SRE_FLAG_DEBUG = PyLong_AsLong(debug_flag);
+ return 1;
+}
+/* Fuzz _sre.compile(x) */
+static int fuzz_sre_compile(const char* data, size_t size) {
+ /* Ignore really long regex patterns that will timeout the fuzzer */
+ if (size > MAX_RE_TEST_SIZE) {
+ return 0;
+ }
+ /* We treat the first 2 bytes of the input as a number for the flags */
+ if (size < 2) {
+ return 0;
+ }
+ uint16_t flags = ((uint16_t*) data)[0];
+ /* We remove the SRE_FLAG_DEBUG if present. This is because it
+ prints to stdout which greatly decreases fuzzing speed */
+ flags &= ~SRE_FLAG_DEBUG;
+
+ /* Pull the pattern from the remaining bytes */
+ PyObject* pattern_bytes = PyBytes_FromStringAndSize(data + 2, size - 2);
+ if (pattern_bytes == NULL) {
+ return 0;
+ }
+ PyObject* flags_obj = PyLong_FromUnsignedLong(flags);
+ if (flags_obj == NULL) {
+ Py_DECREF(pattern_bytes);
+ return 0;
+ }
+
+ /* compiled = _sre.compile(data[2:], data[0:2] */
+ PyObject* compiled = PyObject_CallFunctionObjArgs(
+ sre_compile_method, pattern_bytes, flags_obj, NULL);
+ /* Ignore ValueError as the fuzzer will more than likely
+ generate some invalid combination of flags */
+ if (compiled == NULL && PyErr_ExceptionMatches(PyExc_ValueError)) {
+ PyErr_Clear();
+ }
+ /* Ignore some common errors thrown by sre_parse:
+ Overflow, Assertion and Index */
+ if (compiled == NULL && (PyErr_ExceptionMatches(PyExc_OverflowError) ||
+ PyErr_ExceptionMatches(PyExc_AssertionError) ||
+ PyErr_ExceptionMatches(PyExc_IndexError))
+ ) {
+ PyErr_Clear();
+ }
+ /* Ignore re.error */
+ if (compiled == NULL && PyErr_ExceptionMatches(sre_error_exception)) {
+ PyErr_Clear();
+ }
+
+ Py_DECREF(pattern_bytes);
+ Py_DECREF(flags_obj);
+ Py_XDECREF(compiled);
+ return 0;
+}
+
+/* Some random patterns used to test re.match.
+ Be careful not to add catostraphically slow regexes here, we want to
+ excercise the matching code without causing timeouts.*/
+static const char* regex_patterns[] = {
+ ".", "^", "abc", "abc|def", "^xxx$", "\\b", "()", "[a-zA-Z0-9]",
+ "abc+", "[^A-Z]", "[x]", "(?=)", "a{z}", "a+b", "a*?", "a??", "a+?",
+ "{}", "a{,}", "{", "}", "^\\(*\\d{3}\\)*( |-)*\\d{3}( |-)*\\d{4}$",
+ "(?:a*)*", "a{1,2}?"
+};
+const size_t NUM_PATTERNS = sizeof(regex_patterns) / sizeof(regex_patterns[0]);
+PyObject** compiled_patterns = NULL;
+/* Called by LLVMFuzzerTestOneInput for initialization */
+static int init_sre_match() {
+ PyObject* re_module = PyImport_ImportModule("re");
+ if (re_module == NULL) {
+ return 0;
+ }
+ compiled_patterns = (PyObject**) PyMem_RawMalloc(
+ sizeof(PyObject*) * NUM_PATTERNS);
+ if (compiled_patterns == NULL) {
+ PyErr_NoMemory();
+ return 0;
+ }
+
+ /* Precompile all the regex patterns on the first run for faster fuzzing */
+ for (size_t i = 0; i < NUM_PATTERNS; i++) {
+ PyObject* compiled = PyObject_CallMethod(
+ re_module, "compile", "y", regex_patterns[i]);
+ /* Bail if any of the patterns fail to compile */
+ if (compiled == NULL) {
+ return 0;
+ }
+ compiled_patterns[i] = compiled;
+ }
+ return 1;
+}
+/* Fuzz re.match(x) */
+static int fuzz_sre_match(const char* data, size_t size) {
+ if (size < 1 || size > MAX_RE_TEST_SIZE) {
+ return 0;
+ }
+ /* Use the first byte as a uint8_t specifying the index of the
+ regex to use */
+ unsigned char idx = (unsigned char) data[0];
+ idx = idx % NUM_PATTERNS;
+
+ /* Pull the string to match from the remaining bytes */
+ PyObject* to_match = PyBytes_FromStringAndSize(data + 1, size - 1);
+ if (to_match == NULL) {
+ return 0;
+ }
+
+ PyObject* pattern = compiled_patterns[idx];
+ PyObject* match_callable = PyObject_GetAttrString(pattern, "match");
+
+ PyObject* matches = PyObject_CallFunctionObjArgs(match_callable, to_match, NULL);
+
+ Py_XDECREF(matches);
+ Py_DECREF(match_callable);
+ Py_DECREF(to_match);
+ return 0;
+}
+
+#define MAX_CSV_TEST_SIZE 0x10000
+PyObject* csv_module = NULL;
+PyObject* csv_error = NULL;
+/* Called by LLVMFuzzerTestOneInput for initialization */
+static int init_csv_reader() {
+ /* Import csv and csv.Error */
+ csv_module = PyImport_ImportModule("csv");
+ if (csv_module == NULL) {
+ return 0;
+ }
+ csv_error = PyObject_GetAttrString(csv_module, "Error");
+ return csv_error != NULL;
+}
+/* Fuzz csv.reader([x]) */
+static int fuzz_csv_reader(const char* data, size_t size) {
+ if (size < 1 || size > MAX_CSV_TEST_SIZE) {
+ return 0;
+ }
+ /* Ignore non null-terminated strings since _csv can't handle
+ embeded nulls */
+ if (memchr(data, '\0', size) == NULL) {
+ return 0;
+ }
+
+ PyObject* s = PyUnicode_FromString(data);
+ /* Ignore exceptions until we have a valid string */
+ if (s == NULL) {
+ PyErr_Clear();
+ return 0;
+ }
+
+ /* Split on \n so we can test multiple lines */
+ PyObject* lines = PyObject_CallMethod(s, "split", "s", "\n");
+ if (lines == NULL) {
+ Py_DECREF(s);
+ return 0;
+ }
+
+ PyObject* reader = PyObject_CallMethod(csv_module, "reader", "N", lines);
+ if (reader) {
+ /* Consume all of the reader as an iterator */
+ PyObject* parsed_line;
+ while ((parsed_line = PyIter_Next(reader))) {
+ Py_DECREF(parsed_line);
+ }
+ }
+
+ /* Ignore csv.Error because we're probably going to generate
+ some bad files (embeded new-lines, unterminated quotes etc) */
+ if (PyErr_ExceptionMatches(csv_error)) {
+ PyErr_Clear();
+ }
+
+ Py_XDECREF(reader);
+ Py_DECREF(s);
+ return 0;
+}
+
/* Run fuzzer and abort on failure. */
static int _run_fuzz(const uint8_t *data, size_t size, int(*fuzzer)(const char* , size_t)) {
int rv = fuzzer((const char*) data, size);
@@ -152,12 +366,6 @@ int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
initialize CPython ourselves on the first run. */
Py_InitializeEx(0);
}
-#if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_json_loads)
- if (json_loads_method == NULL) {
- PyObject* json_module = PyImport_ImportModule("json");
- json_loads_method = PyObject_GetAttrString(json_module, "loads");
- }
-#endif
int rv = 0;
@@ -171,7 +379,48 @@ int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
rv |= _run_fuzz(data, size, fuzz_builtin_unicode);
#endif
#if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_json_loads)
+ static int JSON_LOADS_INITIALIZED = 0;
+ if (!JSON_LOADS_INITIALIZED && !init_json_loads()) {
+ PyErr_Print();
+ abort();
+ } else {
+ JSON_LOADS_INITIALIZED = 1;
+ }
+
rv |= _run_fuzz(data, size, fuzz_json_loads);
+#endif
+#if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_sre_compile)
+ static int SRE_COMPILE_INITIALIZED = 0;
+ if (!SRE_COMPILE_INITIALIZED && !init_sre_compile()) {
+ PyErr_Print();
+ abort();
+ } else {
+ SRE_COMPILE_INITIALIZED = 1;
+ }
+
+ rv |= _run_fuzz(data, size, fuzz_sre_compile);
+#endif
+#if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_sre_match)
+ static int SRE_MATCH_INITIALIZED = 0;
+ if (!SRE_MATCH_INITIALIZED && !init_sre_match()) {
+ PyErr_Print();
+ abort();
+ } else {
+ SRE_MATCH_INITIALIZED = 1;
+ }
+
+ rv |= _run_fuzz(data, size, fuzz_sre_match);
+#endif
+#if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_csv_reader)
+ static int CSV_READER_INITIALIZED = 0;
+ if (!CSV_READER_INITIALIZED && !init_csv_reader()) {
+ PyErr_Print();
+ abort();
+ } else {
+ CSV_READER_INITIALIZED = 1;
+ }
+
+ rv |= _run_fuzz(data, size, fuzz_csv_reader);
#endif
return rv;
}