bpo-29505: Add fuzz tests for float(str), int(str), unicode(str) (#2878)

Add basic fuzz tests for a few common builtin functions.

This is an easy place to start, and these functions are probably safe.
We'll want to add more fuzz tests later.  Lets bootstrap using these.

While the fuzz tests are included in CPython and compiled / tested on a
very basic level inside CPython itself, the actual fuzzing happens as
part of oss-fuzz (https://github.com/google/oss-fuzz). The reason to
include the tests in CPython is to make sure that they're maintained
as part of the CPython project, especially when (as some eventually
will) they use internal implementation details in the test.

(This will be necessary sometimes because e.g. the fuzz test should
never enter Python's interpreter loop, whereas some APIs only expose
themselves publicly as Python functions.)

This particular set of changes is part of testing Python's builtins,
tracked internally at Google by b/37562550.

The _xxtestfuzz module that this change adds need not be shipped with binary distributions of Python.
This commit is contained in:
Devin Jeanpierre 2017-09-06 11:15:35 -07:00 committed by Gregory P. Smith
parent 5fcd5e64ee
commit c5bace2bf7
7 changed files with 252 additions and 0 deletions

View File

@ -0,0 +1,23 @@
import faulthandler
import test.support
import unittest
_xxtestfuzz = test.support.import_module('_xxtestfuzz')
class TestFuzzer(unittest.TestCase):
"""To keep our https://github.com/google/oss-fuzz API working."""
def test_sample_input_smoke_test(self):
"""This is only a regression test: Check that it doesn't crash."""
_xxtestfuzz.run(b"")
_xxtestfuzz.run(b"\0")
_xxtestfuzz.run(b"{")
_xxtestfuzz.run(b" ")
_xxtestfuzz.run(b"x")
_xxtestfuzz.run(b"1")
if __name__ == "__main__":
faulthandler.enable()
unittest.main()

View File

@ -0,0 +1 @@
Add fuzz tests for float(str), int(str), unicode(str); for oss-fuzz.

View File

@ -0,0 +1,46 @@
Fuzz Tests for CPython
======================
These fuzz tests are designed to be included in Google's `oss-fuzz`_ project.
oss-fuzz works against a library exposing a function of the form
``int LLVMFuzzerTestOneInput(const uint8_t* data, size_t length)``. We provide
that library (``fuzzer.c``), and include a ``_fuzz`` module for testing with
some toy values -- no fuzzing occurs in Python's test suite.
oss-fuzz will regularly pull from CPython, discover all the tests in
``fuzz_tests.txt``, and run them -- so adding a new test here means it will
automatically be run in oss-fuzz, while also being smoke-tested as part of
CPython's test suite.
Adding a new fuzz test
----------------------
Add the test name on a new line in ``fuzz_tests.txt``.
In ``fuzzer.c``, add a function to be run::
int $test_name (const char* data, size_t size) {
...
return 0;
}
And invoke it from ``LLVMFuzzerTestOneInput``::
#if _Py_FUZZ_YES(fuzz_builtin_float)
rv |= _run_fuzz(data, size, fuzz_builtin_float);
#endif
``LLVMFuzzerTestOneInput`` will run in oss-fuzz, with each test in
``fuzz_tests.txt`` run separately.
What makes a good fuzz test
---------------------------
Libraries written in C that might handle untrusted data are worthwhile. The
more complex the logic (e.g. parsing), the more likely this is to be a useful
fuzz test. See the existing examples for reference, and refer to the
`oss-fuzz`_ docs.
.. _oss-fuzz: https://github.com/google/oss-fuzz

View File

@ -0,0 +1,53 @@
#define PY_SSIZE_T_CLEAN
#include <Python.h>
#include <stdlib.h>
#include <inttypes.h>
int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size);
static PyObject* _fuzz_run(PyObject* self, PyObject* args) {
const char* buf;
Py_ssize_t size;
if (!PyArg_ParseTuple(args, "s#", &buf, &size)) {
return NULL;
}
int rv = LLVMFuzzerTestOneInput((const uint8_t*)buf, size);
if (PyErr_Occurred()) {
return NULL;
}
if (rv != 0) {
// Nonzero return codes are reserved for future use.
PyErr_Format(
PyExc_RuntimeError, "Nonzero return code from fuzzer: %d", rv);
return NULL;
}
Py_RETURN_NONE;
}
static PyMethodDef module_methods[] = {
{"run", (PyCFunction)_fuzz_run, METH_VARARGS, ""},
{NULL},
};
static struct PyModuleDef _fuzzmodule = {
PyModuleDef_HEAD_INIT,
"_fuzz",
NULL,
0,
module_methods,
NULL,
NULL,
NULL,
NULL
};
PyMODINIT_FUNC
PyInit__xxtestfuzz(void)
{
PyObject *m = NULL;
if ((m = PyModule_Create(&_fuzzmodule)) == NULL) {
return NULL;
}
return m;
}

View File

@ -0,0 +1,3 @@
fuzz_builtin_float
fuzz_builtin_int
fuzz_builtin_unicode

View File

@ -0,0 +1,120 @@
/* A fuzz test for CPython.
The only exposed function is LLVMFuzzerTestOneInput, which is called by
fuzzers and by the _fuzz module for smoke tests.
To build exactly one fuzz test, as when running in oss-fuzz etc.,
build with -D _Py_FUZZ_ONE and -D _Py_FUZZ_<test_name>. e.g. to build
LLVMFuzzerTestOneInput to only run "fuzz_builtin_float", build this file with
-D _Py_FUZZ_ONE -D _Py_FUZZ_fuzz_builtin_float.
See the source code for LLVMFuzzerTestOneInput for details. */
#include <Python.h>
#include <stdlib.h>
#include <inttypes.h>
/* Fuzz PyFloat_FromString as a proxy for float(str). */
static int fuzz_builtin_float(const char* data, size_t size) {
PyObject* s = PyBytes_FromStringAndSize(data, size);
if (s == NULL) return 0;
PyObject* f = PyFloat_FromString(s);
if (PyErr_Occurred() && PyErr_ExceptionMatches(PyExc_ValueError)) {
PyErr_Clear();
}
Py_XDECREF(f);
Py_DECREF(s);
return 0;
}
/* Fuzz PyLong_FromUnicodeObject as a proxy for int(str). */
static int fuzz_builtin_int(const char* data, size_t size) {
/* Pick a random valid base. (When the fuzzed function takes extra
parameters, it's somewhat normal to hash the input to generate those
parameters. We want to exercise all code paths, so we do so here.) */
int base = _Py_HashBytes(data, size) % 37;
if (base == 1) {
// 1 is the only number between 0 and 36 that is not a valid base.
base = 0;
}
if (base == -1) {
return 0; // An error occurred, bail early.
}
if (base < 0) {
base = -base;
}
PyObject* s = PyUnicode_FromStringAndSize(data, size);
if (s == NULL) {
if (PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {
PyErr_Clear();
}
return 0;
}
PyObject* l = PyLong_FromUnicodeObject(s, base);
if (l == NULL && PyErr_ExceptionMatches(PyExc_ValueError)) {
PyErr_Clear();
}
PyErr_Clear();
Py_XDECREF(l);
Py_DECREF(s);
return 0;
}
/* Fuzz PyUnicode_FromStringAndSize as a proxy for unicode(str). */
static int fuzz_builtin_unicode(const char* data, size_t size) {
PyObject* s = PyUnicode_FromStringAndSize(data, size);
if (s == NULL && PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {
PyErr_Clear();
}
Py_XDECREF(s);
return 0;
}
/* Run fuzzer and abort on failure. */
static int _run_fuzz(const uint8_t *data, size_t size, int(*fuzzer)(const char* , size_t)) {
int rv = fuzzer((const char*) data, size);
if (PyErr_Occurred()) {
/* Fuzz tests should handle expected errors for themselves.
This is last-ditch check in case they didn't. */
PyErr_Print();
abort();
}
/* Someday the return value might mean something, propagate it. */
return rv;
}
/* CPython generates a lot of leak warnings for whatever reason. */
int __lsan_is_turned_off(void) { return 1; }
/* Fuzz test interface.
This returns the bitwise or of all fuzz test's return values.
All fuzz tests must return 0, as all nonzero return codes are reserved for
future use -- we propagate the return values for that future case.
(And we bitwise or when running multiple tests to verify that normally we
only return 0.) */
int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
if (!Py_IsInitialized()) {
/* LLVMFuzzerTestOneInput is called repeatedly from the same process,
with no separate initialization phase, sadly, so we need to
initialize CPython ourselves on the first run. */
Py_InitializeEx(0);
}
int rv = 0;
#define _Py_FUZZ_YES(test_name) (defined(_Py_FUZZ_##test_name) || !defined(_Py_FUZZ_ONE))
#if _Py_FUZZ_YES(fuzz_builtin_float)
rv |= _run_fuzz(data, size, fuzz_builtin_float);
#endif
#if _Py_FUZZ_YES(fuzz_builtin_int)
rv |= _run_fuzz(data, size, fuzz_builtin_int);
#endif
#if _Py_FUZZ_YES(fuzz_builtin_unicode)
rv |= _run_fuzz(data, size, fuzz_builtin_unicode);
#endif
#undef _Py_FUZZ_YES
return rv;
}

View File

@ -715,6 +715,12 @@ class PyBuildExt(build_ext):
# syslog daemon interface
exts.append( Extension('syslog', ['syslogmodule.c']) )
# Fuzz tests.
exts.append( Extension(
'_xxtestfuzz',
['_xxtestfuzz/_xxtestfuzz.c', '_xxtestfuzz/fuzzer.c'])
)
#
# Here ends the simple stuff. From here on, modules need certain
# libraries, are platform-specific, or present other surprises.