diff --git a/Doc/c-api/arg.rst b/Doc/c-api/arg.rst index ea897a823bd..1c49c8b3c84 100644 --- a/Doc/c-api/arg.rst +++ b/Doc/c-api/arg.rst @@ -125,6 +125,8 @@ Unless otherwise stated, buffers are not NUL-terminated. pointer variable, which will be filled with the pointer to an existing Unicode buffer. Please note that the width of a :ctype:`Py_UNICODE` character depends on compilation options (it is either 16 or 32 bits). + The Python string must not contain embedded NUL characters; if it does, + a :exc:`TypeError` exception is raised. .. note:: Since ``u`` doesn't give you back the length of the string, and it diff --git a/Lib/test/test_getargs2.py b/Lib/test/test_getargs2.py index baf70d16321..7186f55ed0b 100644 --- a/Lib/test/test_getargs2.py +++ b/Lib/test/test_getargs2.py @@ -293,8 +293,136 @@ class Keywords_TestCase(unittest.TestCase): else: self.fail('TypeError should have been raised') +class Bytes_TestCase(unittest.TestCase): + def test_s(self): + from _testcapi import getargs_s + self.assertEqual(getargs_s('abc\xe9'), b'abc\xc3\xa9') + self.assertRaises(TypeError, getargs_s, 'nul:\0') + self.assertRaises(TypeError, getargs_s, b'bytes') + self.assertRaises(TypeError, getargs_s, bytearray(b'bytearray')) + self.assertRaises(TypeError, getargs_s, memoryview(b'memoryview')) + self.assertRaises(TypeError, getargs_s, None) + + def test_s_star(self): + from _testcapi import getargs_s_star + self.assertEqual(getargs_s_star('abc\xe9'), b'abc\xc3\xa9') + self.assertEqual(getargs_s_star('nul:\0'), b'nul:\0') + self.assertEqual(getargs_s_star(b'bytes'), b'bytes') + self.assertEqual(getargs_s_star(bytearray(b'bytearray')), b'bytearray') + self.assertEqual(getargs_s_star(memoryview(b'memoryview')), b'memoryview') + self.assertRaises(TypeError, getargs_s_star, None) + + def test_s_hash(self): + from _testcapi import getargs_s_hash + self.assertEqual(getargs_s_hash('abc\xe9'), b'abc\xc3\xa9') + self.assertEqual(getargs_s_hash('nul:\0'), b'nul:\0') + self.assertEqual(getargs_s_hash(b'bytes'), b'bytes') + self.assertRaises(TypeError, getargs_s_hash, bytearray(b'bytearray')) + self.assertRaises(TypeError, getargs_s_hash, memoryview(b'memoryview')) + self.assertRaises(TypeError, getargs_s_hash, None) + + def test_z(self): + from _testcapi import getargs_z + self.assertEqual(getargs_z('abc\xe9'), b'abc\xc3\xa9') + self.assertRaises(TypeError, getargs_z, 'nul:\0') + self.assertEqual(getargs_z(b'bytes'), b'bytes') + self.assertRaises(TypeError, getargs_z, bytearray(b'bytearray')) + self.assertRaises(TypeError, getargs_z, memoryview(b'memoryview')) + self.assertIsNone(getargs_z(None)) + + def test_z_star(self): + from _testcapi import getargs_z_star + self.assertEqual(getargs_z_star('abc\xe9'), b'abc\xc3\xa9') + self.assertEqual(getargs_z_star('nul:\0'), b'nul:\0') + self.assertEqual(getargs_z_star(b'bytes'), b'bytes') + self.assertEqual(getargs_z_star(bytearray(b'bytearray')), b'bytearray') + self.assertEqual(getargs_z_star(memoryview(b'memoryview')), b'memoryview') + self.assertIsNone(getargs_z_star(None)) + + def test_z_hash(self): + from _testcapi import getargs_z_hash + self.assertEqual(getargs_z_hash('abc\xe9'), b'abc\xc3\xa9') + self.assertEqual(getargs_z_hash('nul:\0'), b'nul:\0') + self.assertEqual(getargs_z_hash(b'bytes'), b'bytes') + self.assertRaises(TypeError, getargs_z_hash, bytearray(b'bytearray')) + self.assertRaises(TypeError, getargs_z_hash, memoryview(b'memoryview')) + self.assertIsNone(getargs_z_hash(None)) + + def test_y(self): + from _testcapi import getargs_y + self.assertRaises(TypeError, getargs_y, 'abc\xe9') + self.assertEqual(getargs_y(b'bytes'), b'bytes') + self.assertRaises(TypeError, getargs_y, b'nul:\0') + self.assertRaises(TypeError, getargs_y, bytearray(b'bytearray')) + self.assertRaises(TypeError, getargs_y, memoryview(b'memoryview')) + self.assertRaises(TypeError, getargs_y, None) + + def test_y_star(self): + from _testcapi import getargs_y_star + self.assertRaises(TypeError, getargs_y_star, 'abc\xe9') + self.assertEqual(getargs_y_star(b'bytes'), b'bytes') + self.assertEqual(getargs_y_star(b'nul:\0'), b'nul:\0') + self.assertEqual(getargs_y_star(bytearray(b'bytearray')), b'bytearray') + self.assertEqual(getargs_y_star(memoryview(b'memoryview')), b'memoryview') + self.assertRaises(TypeError, getargs_y_star, None) + + def test_y_hash(self): + from _testcapi import getargs_y_hash + self.assertRaises(TypeError, getargs_y_hash, 'abc\xe9') + self.assertEqual(getargs_y_hash(b'bytes'), b'bytes') + self.assertEqual(getargs_y_hash(b'nul:\0'), b'nul:\0') + self.assertRaises(TypeError, getargs_y_hash, bytearray(b'bytearray')) + self.assertRaises(TypeError, getargs_y_hash, memoryview(b'memoryview')) + self.assertRaises(TypeError, getargs_y_hash, None) + + +class Unicode_TestCase(unittest.TestCase): + def test_u(self): + from _testcapi import getargs_u + self.assertEqual(getargs_u('abc\xe9'), 'abc\xe9') + self.assertRaises(TypeError, getargs_u, 'nul:\0') + self.assertRaises(TypeError, getargs_u, b'bytes') + self.assertRaises(TypeError, getargs_u, bytearray(b'bytearray')) + self.assertRaises(TypeError, getargs_u, memoryview(b'memoryview')) + self.assertRaises(TypeError, getargs_u, None) + + def test_u_hash(self): + from _testcapi import getargs_u_hash + self.assertEqual(getargs_u_hash('abc\xe9'), 'abc\xe9') + self.assertEqual(getargs_u_hash('nul:\0'), 'nul:\0') + self.assertRaises(TypeError, getargs_u_hash, b'bytes') + self.assertRaises(TypeError, getargs_u_hash, bytearray(b'bytearray')) + self.assertRaises(TypeError, getargs_u_hash, memoryview(b'memoryview')) + self.assertRaises(TypeError, getargs_u_hash, None) + + def test_Z(self): + from _testcapi import getargs_Z + self.assertEqual(getargs_Z('abc\xe9'), 'abc\xe9') + self.assertRaises(TypeError, getargs_Z, 'nul:\0') + self.assertRaises(TypeError, getargs_Z, b'bytes') + self.assertRaises(TypeError, getargs_Z, bytearray(b'bytearray')) + self.assertRaises(TypeError, getargs_Z, memoryview(b'memoryview')) + self.assertIsNone(getargs_Z(None)) + + def test_Z_hash(self): + from _testcapi import getargs_Z_hash + self.assertEqual(getargs_Z_hash('abc\xe9'), 'abc\xe9') + self.assertEqual(getargs_Z_hash('nul:\0'), 'nul:\0') + self.assertRaises(TypeError, getargs_Z_hash, b'bytes') + self.assertRaises(TypeError, getargs_Z_hash, bytearray(b'bytearray')) + self.assertRaises(TypeError, getargs_Z_hash, memoryview(b'memoryview')) + self.assertIsNone(getargs_Z_hash(None)) + + def test_main(): - tests = [Signed_TestCase, Unsigned_TestCase, Tuple_TestCase, Keywords_TestCase] + tests = [ + Signed_TestCase, + Unsigned_TestCase, + Tuple_TestCase, + Keywords_TestCase, + Bytes_TestCase, + Unicode_TestCase, + ] try: from _testcapi import getargs_L, getargs_K except ImportError: diff --git a/Misc/NEWS b/Misc/NEWS index 65ab474c86f..6855696d320 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -12,6 +12,10 @@ What's New in Python 3.2 Alpha 1? Core and Builtins ----------------- +- Issue #8592: PyArg_Parse*() functions raise a TypeError for "y", "u" and "Z" + formats if the string contains a null byte/character. Write unit tests for + string formats. + - Issue #7490: to facilitate sharing of doctests between 2.x and 3.x test suites, the IGNORE_EXCEPTION_DETAIL directive now also ignores the module location of the raised exception. diff --git a/Modules/_testcapimodule.c b/Modules/_testcapimodule.c index e1dc5f4aa9c..7bcc1d8cecb 100644 --- a/Modules/_testcapimodule.c +++ b/Modules/_testcapimodule.c @@ -1011,6 +1011,157 @@ test_k_code(PyObject *self) return Py_None; } +static PyObject * +getargs_s(PyObject *self, PyObject *args) +{ + char *str; + if (!PyArg_ParseTuple(args, "s", &str)) + return NULL; + return PyBytes_FromString(str); +} + +static PyObject * +getargs_s_star(PyObject *self, PyObject *args) +{ + Py_buffer buffer; + PyObject *bytes; + if (!PyArg_ParseTuple(args, "s*", &buffer)) + return NULL; + bytes = PyBytes_FromStringAndSize(buffer.buf, buffer.len); + PyBuffer_Release(&buffer); + return bytes; +} + +static PyObject * +getargs_s_hash(PyObject *self, PyObject *args) +{ + char *str; + Py_ssize_t size; + if (!PyArg_ParseTuple(args, "s#", &str, &size)) + return NULL; + return PyBytes_FromStringAndSize(str, size); +} + +static PyObject * +getargs_z(PyObject *self, PyObject *args) +{ + char *str; + if (!PyArg_ParseTuple(args, "z", &str)) + return NULL; + if (str != NULL) + return PyBytes_FromString(str); + else + Py_RETURN_NONE; +} + +static PyObject * +getargs_z_star(PyObject *self, PyObject *args) +{ + Py_buffer buffer; + PyObject *bytes; + if (!PyArg_ParseTuple(args, "z*", &buffer)) + return NULL; + if (buffer.buf != NULL) + bytes = PyBytes_FromStringAndSize(buffer.buf, buffer.len); + else { + Py_INCREF(Py_None); + bytes = Py_None; + } + PyBuffer_Release(&buffer); + return bytes; +} + +static PyObject * +getargs_z_hash(PyObject *self, PyObject *args) +{ + char *str; + Py_ssize_t size; + if (!PyArg_ParseTuple(args, "z#", &str, &size)) + return NULL; + if (str != NULL) + return PyBytes_FromStringAndSize(str, size); + else + Py_RETURN_NONE; +} + +static PyObject * +getargs_y(PyObject *self, PyObject *args) +{ + char *str; + if (!PyArg_ParseTuple(args, "y", &str)) + return NULL; + return PyBytes_FromString(str); +} + +static PyObject * +getargs_y_star(PyObject *self, PyObject *args) +{ + Py_buffer buffer; + PyObject *bytes; + if (!PyArg_ParseTuple(args, "y*", &buffer)) + return NULL; + bytes = PyBytes_FromStringAndSize(buffer.buf, buffer.len); + PyBuffer_Release(&buffer); + return bytes; +} + +static PyObject * +getargs_y_hash(PyObject *self, PyObject *args) +{ + char *str; + Py_ssize_t size; + if (!PyArg_ParseTuple(args, "y#", &str, &size)) + return NULL; + return PyBytes_FromStringAndSize(str, size); +} + +static PyObject * +getargs_u(PyObject *self, PyObject *args) +{ + Py_UNICODE *str; + Py_ssize_t size; + if (!PyArg_ParseTuple(args, "u", &str)) + return NULL; + size = Py_UNICODE_strlen(str); + return PyUnicode_FromUnicode(str, size); +} + +static PyObject * +getargs_u_hash(PyObject *self, PyObject *args) +{ + Py_UNICODE *str; + Py_ssize_t size; + if (!PyArg_ParseTuple(args, "u#", &str, &size)) + return NULL; + return PyUnicode_FromUnicode(str, size); +} + +static PyObject * +getargs_Z(PyObject *self, PyObject *args) +{ + Py_UNICODE *str; + Py_ssize_t size; + if (!PyArg_ParseTuple(args, "Z", &str)) + return NULL; + if (str != NULL) { + size = Py_UNICODE_strlen(str); + return PyUnicode_FromUnicode(str, size); + } else + Py_RETURN_NONE; +} + +static PyObject * +getargs_Z_hash(PyObject *self, PyObject *args) +{ + Py_UNICODE *str; + Py_ssize_t size; + if (!PyArg_ParseTuple(args, "Z#", &str, &size)) + return NULL; + if (str != NULL) + return PyUnicode_FromUnicode(str, size); + else + Py_RETURN_NONE; +} /* Test the s and z codes for PyArg_ParseTuple. */ @@ -2062,11 +2213,24 @@ static PyMethodDef TestMethods[] = { {"test_long_long_and_overflow", (PyCFunction)test_long_long_and_overflow, METH_NOARGS}, {"test_L_code", (PyCFunction)test_L_code, METH_NOARGS}, +#endif + {"getargs_s", getargs_s, METH_VARARGS}, + {"getargs_s_star", getargs_s_star, METH_VARARGS}, + {"getargs_s_hash", getargs_s_hash, METH_VARARGS}, + {"getargs_z", getargs_z, METH_VARARGS}, + {"getargs_z_star", getargs_z_star, METH_VARARGS}, + {"getargs_z_hash", getargs_z_hash, METH_VARARGS}, + {"getargs_y", getargs_y, METH_VARARGS}, + {"getargs_y_star", getargs_y_star, METH_VARARGS}, + {"getargs_y_hash", getargs_y_hash, METH_VARARGS}, + {"getargs_u", getargs_u, METH_VARARGS}, + {"getargs_u_hash", getargs_u_hash, METH_VARARGS}, + {"getargs_Z", getargs_Z, METH_VARARGS}, + {"getargs_Z_hash", getargs_Z_hash, METH_VARARGS}, {"codec_incrementalencoder", (PyCFunction)codec_incrementalencoder, METH_VARARGS}, {"codec_incrementaldecoder", (PyCFunction)codec_incrementaldecoder, METH_VARARGS}, -#endif {"test_s_code", (PyCFunction)test_s_code, METH_NOARGS}, {"test_u_code", (PyCFunction)test_u_code, METH_NOARGS}, {"test_Z_code", (PyCFunction)test_Z_code, METH_NOARGS}, diff --git a/Python/getargs.c b/Python/getargs.c index 127b1473a67..d4d8d8417b9 100644 --- a/Python/getargs.c +++ b/Python/getargs.c @@ -935,10 +935,15 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, count = convertbuffer(arg, p, &buf); if (count < 0) return converterr(buf, arg, msgbuf, bufsize); - else if (*format == '#') { + if (*format == '#') { FETCH_SIZE; STORE_SIZE(count); format++; + } else { + if (strlen(*p) != count) + return converterr( + "bytes without null bytes", + arg, msgbuf, bufsize); } break; } @@ -1045,9 +1050,13 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, if (arg == Py_None) *p = 0; - else if (PyUnicode_Check(arg)) + else if (PyUnicode_Check(arg)) { *p = PyUnicode_AS_UNICODE(arg); - else + if (Py_UNICODE_strlen(*p) != PyUnicode_GET_SIZE(arg)) + return converterr( + "str without null character or None", + arg, msgbuf, bufsize); + } else return converterr("str or None", arg, msgbuf, bufsize); } break; @@ -1227,6 +1236,11 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, FETCH_SIZE; STORE_SIZE(PyUnicode_GET_SIZE(arg)); format++; + } else { + if (Py_UNICODE_strlen(*p) != PyUnicode_GET_SIZE(arg)) + return converterr( + "str without null character", + arg, msgbuf, bufsize); } break; }