diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst index a0672ca2573..261a43c2785 100644 --- a/Doc/c-api/unicode.rst +++ b/Doc/c-api/unicode.rst @@ -641,7 +641,7 @@ APIs: Copy the string *u* into a UCS4 buffer, including a null character, if *copy_null* is set. Returns *NULL* and sets an exception on error (in - particular, a :exc:`ValueError` if *buflen* is smaller than the length of + particular, a :exc:`SystemError` if *buflen* is smaller than the length of *u*). *buffer* is returned on success. .. versionadded:: 3.3 diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h index 7cb19eb5a3c..2c496eaada4 100644 --- a/Include/unicodeobject.h +++ b/Include/unicodeobject.h @@ -749,7 +749,7 @@ PyAPI_FUNC(Py_UCS4) _PyUnicode_FindMaxChar ( #endif /* Copy the string into a UCS4 buffer including the null character if copy_null - is set. Return NULL and raise an exception on error. Raise a ValueError if + is set. Return NULL and raise an exception on error. Raise a SystemError if the buffer is smaller than the string. Return buffer on success. buflen is the length of the buffer in (Py_UCS4) characters. */ diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py index 082f6b59da3..6086bb81c65 100644 --- a/Lib/test/test_unicode.py +++ b/Lib/test/test_unicode.py @@ -2687,6 +2687,23 @@ class CAPITest(unittest.TestCase): self.assertEqual(size, nchar) self.assertEqual(wchar, nonbmp + '\0') + # Test PyUnicode_AsUCS4() + @support.cpython_only + def test_asucs4(self): + from _testcapi import unicode_asucs4 + for s in ['abc', '\xa1\xa2', '\u4f60\u597d', 'a\U0001f600', + 'a\ud800b\udfffc', '\ud834\udd1e']: + l = len(s) + self.assertEqual(unicode_asucs4(s, l, 1), s+'\0') + self.assertEqual(unicode_asucs4(s, l, 0), s+'\uffff') + self.assertEqual(unicode_asucs4(s, l+1, 1), s+'\0\uffff') + self.assertEqual(unicode_asucs4(s, l+1, 0), s+'\0\uffff') + self.assertRaises(SystemError, unicode_asucs4, s, l-1, 1) + self.assertRaises(SystemError, unicode_asucs4, s, l-2, 0) + s = '\0'.join([s, s]) + self.assertEqual(unicode_asucs4(s, len(s), 1), s+'\0') + self.assertEqual(unicode_asucs4(s, len(s), 0), s+'\uffff') + @support.cpython_only def test_encode_decimal(self): from _testcapi import unicode_encodedecimal diff --git a/Modules/_testcapimodule.c b/Modules/_testcapimodule.c index 81f5b1d2354..f4a1e97c396 100644 --- a/Modules/_testcapimodule.c +++ b/Modules/_testcapimodule.c @@ -1829,6 +1829,36 @@ unicode_aswidecharstring(PyObject *self, PyObject *args) return Py_BuildValue("(Nn)", result, size); } +static PyObject * +unicode_asucs4(PyObject *self, PyObject *args) +{ + PyObject *unicode, *result; + Py_UCS4 *buffer; + int copy_null; + Py_ssize_t str_len, buf_len; + + if (!PyArg_ParseTuple(args, "Unp:unicode_asucs4", &unicode, &str_len, ©_null)) { + return NULL; + } + + buf_len = str_len + 1; + buffer = PyMem_NEW(Py_UCS4, buf_len); + if (buffer == NULL) { + return PyErr_NoMemory(); + } + memset(buffer, 0, sizeof(Py_UCS4)*buf_len); + buffer[str_len] = 0xffffU; + + if (!PyUnicode_AsUCS4(unicode, buffer, buf_len, copy_null)) { + PyMem_FREE(buffer); + return NULL; + } + + result = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, buffer, buf_len); + PyMem_FREE(buffer); + return result; +} + static PyObject * unicode_encodedecimal(PyObject *self, PyObject *args) { @@ -3884,6 +3914,7 @@ static PyMethodDef TestMethods[] = { {"test_widechar", (PyCFunction)test_widechar, METH_NOARGS}, {"unicode_aswidechar", unicode_aswidechar, METH_VARARGS}, {"unicode_aswidecharstring",unicode_aswidecharstring, METH_VARARGS}, + {"unicode_asucs4", unicode_asucs4, METH_VARARGS}, {"unicode_encodedecimal", unicode_encodedecimal, METH_VARARGS}, {"unicode_transformdecimaltoascii", unicode_transformdecimaltoascii, METH_VARARGS}, {"unicode_legacy_string", unicode_legacy_string, METH_VARARGS},