cpython/Modules/_xxtestfuzz/fuzzer.c

/* A fuzz test for CPython.

  The only exposed function is LLVMFuzzerTestOneInput, which is called by
  fuzzers and by the _fuzz module for smoke tests.

  To build exactly one fuzz test, as when running in oss-fuzz etc.,
  build with -D _Py_FUZZ_ONE and -D _Py_FUZZ_<test_name>. e.g. to build
  LLVMFuzzerTestOneInput to only run "fuzz_builtin_float", build this file with
      -D _Py_FUZZ_ONE -D _Py_FUZZ_fuzz_builtin_float.

  See the source code for LLVMFuzzerTestOneInput for details. */

#include <Python.h>
#include <stdlib.h>
#include <inttypes.h>

/*  Fuzz PyFloat_FromString as a proxy for float(str). */
static int fuzz_builtin_float(const char* data, size_t size) {
    PyObject* s = PyBytes_FromStringAndSize(data, size);
    if (s == NULL) return 0;
    PyObject* f = PyFloat_FromString(s);
    if (PyErr_Occurred() && PyErr_ExceptionMatches(PyExc_ValueError)) {
        PyErr_Clear();
    }

    Py_XDECREF(f);
    Py_DECREF(s);
    return 0;
}

/* Fuzz PyLong_FromUnicodeObject as a proxy for int(str). */
static int fuzz_builtin_int(const char* data, size_t size) {
    /* Pick a random valid base. (When the fuzzed function takes extra
       parameters, it's somewhat normal to hash the input to generate those
       parameters. We want to exercise all code paths, so we do so here.) */
    int base = _Py_HashBytes(data, size) % 37;
    if (base == 1) {
        // 1 is the only number between 0 and 36 that is not a valid base.
        base = 0;
    }
    if (base == -1) {
        return 0;  // An error occurred, bail early.
    }
    if (base < 0) {
        base = -base;
    }

    PyObject* s = PyUnicode_FromStringAndSize(data, size);
    if (s == NULL) {
        if (PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {
            PyErr_Clear();
        }
        return 0;
    }
    PyObject* l = PyLong_FromUnicodeObject(s, base);
    if (l == NULL && PyErr_ExceptionMatches(PyExc_ValueError)) {
        PyErr_Clear();
    }
    PyErr_Clear();
    Py_XDECREF(l);
    Py_DECREF(s);
    return 0;
}

/* Fuzz PyUnicode_FromStringAndSize as a proxy for unicode(str). */
static int fuzz_builtin_unicode(const char* data, size_t size) {
    PyObject* s = PyUnicode_FromStringAndSize(data, size);
    if (s == NULL && PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {
        PyErr_Clear();
    }
    Py_XDECREF(s);
    return 0;
}

/* Run fuzzer and abort on failure. */
static int _run_fuzz(const uint8_t *data, size_t size, int(*fuzzer)(const char* , size_t)) {
    int rv = fuzzer((const char*) data, size);
    if (PyErr_Occurred()) {
        /* Fuzz tests should handle expected errors for themselves.
           This is last-ditch check in case they didn't. */
        PyErr_Print();
        abort();
    }
    /* Someday the return value might mean something, propagate it. */
    return rv;
}

/* CPython generates a lot of leak warnings for whatever reason. */
int __lsan_is_turned_off(void) { return 1; }

/* Fuzz test interface.
   This returns the bitwise or of all fuzz test's return values.

   All fuzz tests must return 0, as all nonzero return codes are reserved for
   future use -- we propagate the return values for that future case.
   (And we bitwise or when running multiple tests to verify that normally we
   only return 0.) */
int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
    if (!Py_IsInitialized()) {
        /* LLVMFuzzerTestOneInput is called repeatedly from the same process,
           with no separate initialization phase, sadly, so we need to
           initialize CPython ourselves on the first run. */
        Py_InitializeEx(0);
    }

    int rv = 0;

#if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_builtin_float)
    rv |= _run_fuzz(data, size, fuzz_builtin_float);
#endif
#if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_builtin_int)
    rv |= _run_fuzz(data, size, fuzz_builtin_int);
#endif
#if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_builtin_unicode)
    rv |= _run_fuzz(data, size, fuzz_builtin_unicode);
#endif
  return rv;
}
bpo-29505: Add fuzz tests for float(str), int(str), unicode(str) (#2878) Add basic fuzz tests for a few common builtin functions. This is an easy place to start, and these functions are probably safe. We'll want to add more fuzz tests later. Lets bootstrap using these. While the fuzz tests are included in CPython and compiled / tested on a very basic level inside CPython itself, the actual fuzzing happens as part of oss-fuzz (https://github.com/google/oss-fuzz). The reason to include the tests in CPython is to make sure that they're maintained as part of the CPython project, especially when (as some eventually will) they use internal implementation details in the test. (This will be necessary sometimes because e.g. the fuzz test should never enter Python's interpreter loop, whereas some APIs only expose themselves publicly as Python functions.) This particular set of changes is part of testing Python's builtins, tracked internally at Google by b/37562550. The _xxtestfuzz module that this change adds need not be shipped with binary distributions of Python. 2017-09-06 15:15:35 -03:00			`/* A fuzz test for CPython.`

			`The only exposed function is LLVMFuzzerTestOneInput, which is called by`
			`fuzzers and by the _fuzz module for smoke tests.`

			`To build exactly one fuzz test, as when running in oss-fuzz etc.,`
			`build with -D _Py_FUZZ_ONE and -D _Py_FUZZ_<test_name>. e.g. to build`
			`LLVMFuzzerTestOneInput to only run "fuzz_builtin_float", build this file with`
			`-D _Py_FUZZ_ONE -D _Py_FUZZ_fuzz_builtin_float.`

			`See the source code for LLVMFuzzerTestOneInput for details. */`

			`#include <Python.h>`
			`#include <stdlib.h>`
			`#include <inttypes.h>`

			`/* Fuzz PyFloat_FromString as a proxy for float(str). */`
			`static int fuzz_builtin_float(const char* data, size_t size) {`
			`PyObject* s = PyBytes_FromStringAndSize(data, size);`
			`if (s == NULL) return 0;`
			`PyObject* f = PyFloat_FromString(s);`
			`if (PyErr_Occurred() && PyErr_ExceptionMatches(PyExc_ValueError)) {`
			`PyErr_Clear();`
			`}`

			`Py_XDECREF(f);`
			`Py_DECREF(s);`
			`return 0;`
			`}`

			`/* Fuzz PyLong_FromUnicodeObject as a proxy for int(str). */`
			`static int fuzz_builtin_int(const char* data, size_t size) {`
			`/* Pick a random valid base. (When the fuzzed function takes extra`
			`parameters, it's somewhat normal to hash the input to generate those`
			`parameters. We want to exercise all code paths, so we do so here.) */`
			`int base = _Py_HashBytes(data, size) % 37;`
			`if (base == 1) {`
			`// 1 is the only number between 0 and 36 that is not a valid base.`
			`base = 0;`
			`}`
			`if (base == -1) {`
			`return 0; // An error occurred, bail early.`
			`}`
			`if (base < 0) {`
			`base = -base;`
			`}`

			`PyObject* s = PyUnicode_FromStringAndSize(data, size);`
			`if (s == NULL) {`
			`if (PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {`
			`PyErr_Clear();`
			`}`
			`return 0;`
			`}`
			`PyObject* l = PyLong_FromUnicodeObject(s, base);`
			`if (l == NULL && PyErr_ExceptionMatches(PyExc_ValueError)) {`
			`PyErr_Clear();`
			`}`
			`PyErr_Clear();`
			`Py_XDECREF(l);`
			`Py_DECREF(s);`
			`return 0;`
			`}`

			`/* Fuzz PyUnicode_FromStringAndSize as a proxy for unicode(str). */`
			`static int fuzz_builtin_unicode(const char* data, size_t size) {`
			`PyObject* s = PyUnicode_FromStringAndSize(data, size);`
			`if (s == NULL && PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {`
			`PyErr_Clear();`
			`}`
			`Py_XDECREF(s);`
			`return 0;`
			`}`

			`/* Run fuzzer and abort on failure. */`
			`static int _run_fuzz(const uint8_t data, size_t size, int(fuzzer)(const char* , size_t)) {`
			`int rv = fuzzer((const char*) data, size);`
			`if (PyErr_Occurred()) {`
			`/* Fuzz tests should handle expected errors for themselves.`
			`This is last-ditch check in case they didn't. */`
			`PyErr_Print();`
			`abort();`
			`}`
			`/* Someday the return value might mean something, propagate it. */`
			`return rv;`
			`}`

			`/* CPython generates a lot of leak warnings for whatever reason. */`
			`int __lsan_is_turned_off(void) { return 1; }`

			`/* Fuzz test interface.`
			`This returns the bitwise or of all fuzz test's return values.`

			`All fuzz tests must return 0, as all nonzero return codes are reserved for`
			`future use -- we propagate the return values for that future case.`
			`(And we bitwise or when running multiple tests to verify that normally we`
			`only return 0.) */`
			`int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {`
			`if (!Py_IsInitialized()) {`
			`/* LLVMFuzzerTestOneInput is called repeatedly from the same process,`
			`with no separate initialization phase, sadly, so we need to`
			`initialize CPython ourselves on the first run. */`
			`Py_InitializeEx(0);`
			`}`

			`int rv = 0;`

Avoid UB in test selection macro. (#3407) This fixes the gcc "warning: this use of "defined" may not be portable [-Wexpansion-to-defined]" See discussion in http://bugs.python.org/issue29505 2017-09-06 22:00:47 -03:00			`#if !defined(_Py_FUZZ_ONE) \|\| defined(_Py_FUZZ_fuzz_builtin_float)`
bpo-29505: Add fuzz tests for float(str), int(str), unicode(str) (#2878) Add basic fuzz tests for a few common builtin functions. This is an easy place to start, and these functions are probably safe. We'll want to add more fuzz tests later. Lets bootstrap using these. While the fuzz tests are included in CPython and compiled / tested on a very basic level inside CPython itself, the actual fuzzing happens as part of oss-fuzz (https://github.com/google/oss-fuzz). The reason to include the tests in CPython is to make sure that they're maintained as part of the CPython project, especially when (as some eventually will) they use internal implementation details in the test. (This will be necessary sometimes because e.g. the fuzz test should never enter Python's interpreter loop, whereas some APIs only expose themselves publicly as Python functions.) This particular set of changes is part of testing Python's builtins, tracked internally at Google by b/37562550. The _xxtestfuzz module that this change adds need not be shipped with binary distributions of Python. 2017-09-06 15:15:35 -03:00			`rv \|= _run_fuzz(data, size, fuzz_builtin_float);`
			`#endif`
Avoid UB in test selection macro. (#3407) This fixes the gcc "warning: this use of "defined" may not be portable [-Wexpansion-to-defined]" See discussion in http://bugs.python.org/issue29505 2017-09-06 22:00:47 -03:00			`#if !defined(_Py_FUZZ_ONE) \|\| defined(_Py_FUZZ_fuzz_builtin_int)`
bpo-29505: Add fuzz tests for float(str), int(str), unicode(str) (#2878) Add basic fuzz tests for a few common builtin functions. This is an easy place to start, and these functions are probably safe. We'll want to add more fuzz tests later. Lets bootstrap using these. While the fuzz tests are included in CPython and compiled / tested on a very basic level inside CPython itself, the actual fuzzing happens as part of oss-fuzz (https://github.com/google/oss-fuzz). The reason to include the tests in CPython is to make sure that they're maintained as part of the CPython project, especially when (as some eventually will) they use internal implementation details in the test. (This will be necessary sometimes because e.g. the fuzz test should never enter Python's interpreter loop, whereas some APIs only expose themselves publicly as Python functions.) This particular set of changes is part of testing Python's builtins, tracked internally at Google by b/37562550. The _xxtestfuzz module that this change adds need not be shipped with binary distributions of Python. 2017-09-06 15:15:35 -03:00			`rv \|= _run_fuzz(data, size, fuzz_builtin_int);`
			`#endif`
Avoid UB in test selection macro. (#3407) This fixes the gcc "warning: this use of "defined" may not be portable [-Wexpansion-to-defined]" See discussion in http://bugs.python.org/issue29505 2017-09-06 22:00:47 -03:00			`#if !defined(_Py_FUZZ_ONE) \|\| defined(_Py_FUZZ_fuzz_builtin_unicode)`
bpo-29505: Add fuzz tests for float(str), int(str), unicode(str) (#2878) Add basic fuzz tests for a few common builtin functions. This is an easy place to start, and these functions are probably safe. We'll want to add more fuzz tests later. Lets bootstrap using these. While the fuzz tests are included in CPython and compiled / tested on a very basic level inside CPython itself, the actual fuzzing happens as part of oss-fuzz (https://github.com/google/oss-fuzz). The reason to include the tests in CPython is to make sure that they're maintained as part of the CPython project, especially when (as some eventually will) they use internal implementation details in the test. (This will be necessary sometimes because e.g. the fuzz test should never enter Python's interpreter loop, whereas some APIs only expose themselves publicly as Python functions.) This particular set of changes is part of testing Python's builtins, tracked internally at Google by b/37562550. The _xxtestfuzz module that this change adds need not be shipped with binary distributions of Python. 2017-09-06 15:15:35 -03:00			`rv \|= _run_fuzz(data, size, fuzz_builtin_unicode);`
			`#endif`
			`return rv;`
			`}`