Issue #28295: Fixed the documentation and added tests for PyUnicode_AsUCS4().

Original patch by Xiang Zhang.
This commit is contained in:
Serhiy Storchaka 2016-10-02 21:29:26 +03:00
parent 63b5b6fd45
commit cc164232aa
4 changed files with 50 additions and 2 deletions

View File

@ -641,7 +641,7 @@ APIs:
Copy the string *u* into a UCS4 buffer, including a null character, if Copy the string *u* into a UCS4 buffer, including a null character, if
*copy_null* is set. Returns *NULL* and sets an exception on error (in *copy_null* is set. Returns *NULL* and sets an exception on error (in
particular, a :exc:`ValueError` if *buflen* is smaller than the length of particular, a :exc:`SystemError` if *buflen* is smaller than the length of
*u*). *buffer* is returned on success. *u*). *buffer* is returned on success.
.. versionadded:: 3.3 .. versionadded:: 3.3

View File

@ -749,7 +749,7 @@ PyAPI_FUNC(Py_UCS4) _PyUnicode_FindMaxChar (
#endif #endif
/* Copy the string into a UCS4 buffer including the null character if copy_null /* Copy the string into a UCS4 buffer including the null character if copy_null
is set. Return NULL and raise an exception on error. Raise a ValueError if is set. Return NULL and raise an exception on error. Raise a SystemError if
the buffer is smaller than the string. Return buffer on success. the buffer is smaller than the string. Return buffer on success.
buflen is the length of the buffer in (Py_UCS4) characters. */ buflen is the length of the buffer in (Py_UCS4) characters. */

View File

@ -2687,6 +2687,23 @@ class CAPITest(unittest.TestCase):
self.assertEqual(size, nchar) self.assertEqual(size, nchar)
self.assertEqual(wchar, nonbmp + '\0') self.assertEqual(wchar, nonbmp + '\0')
# Test PyUnicode_AsUCS4()
@support.cpython_only
def test_asucs4(self):
from _testcapi import unicode_asucs4
for s in ['abc', '\xa1\xa2', '\u4f60\u597d', 'a\U0001f600',
'a\ud800b\udfffc', '\ud834\udd1e']:
l = len(s)
self.assertEqual(unicode_asucs4(s, l, 1), s+'\0')
self.assertEqual(unicode_asucs4(s, l, 0), s+'\uffff')
self.assertEqual(unicode_asucs4(s, l+1, 1), s+'\0\uffff')
self.assertEqual(unicode_asucs4(s, l+1, 0), s+'\0\uffff')
self.assertRaises(SystemError, unicode_asucs4, s, l-1, 1)
self.assertRaises(SystemError, unicode_asucs4, s, l-2, 0)
s = '\0'.join([s, s])
self.assertEqual(unicode_asucs4(s, len(s), 1), s+'\0')
self.assertEqual(unicode_asucs4(s, len(s), 0), s+'\uffff')
@support.cpython_only @support.cpython_only
def test_encode_decimal(self): def test_encode_decimal(self):
from _testcapi import unicode_encodedecimal from _testcapi import unicode_encodedecimal

View File

@ -1829,6 +1829,36 @@ unicode_aswidecharstring(PyObject *self, PyObject *args)
return Py_BuildValue("(Nn)", result, size); return Py_BuildValue("(Nn)", result, size);
} }
static PyObject *
unicode_asucs4(PyObject *self, PyObject *args)
{
PyObject *unicode, *result;
Py_UCS4 *buffer;
int copy_null;
Py_ssize_t str_len, buf_len;
if (!PyArg_ParseTuple(args, "Unp:unicode_asucs4", &unicode, &str_len, &copy_null)) {
return NULL;
}
buf_len = str_len + 1;
buffer = PyMem_NEW(Py_UCS4, buf_len);
if (buffer == NULL) {
return PyErr_NoMemory();
}
memset(buffer, 0, sizeof(Py_UCS4)*buf_len);
buffer[str_len] = 0xffffU;
if (!PyUnicode_AsUCS4(unicode, buffer, buf_len, copy_null)) {
PyMem_FREE(buffer);
return NULL;
}
result = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, buffer, buf_len);
PyMem_FREE(buffer);
return result;
}
static PyObject * static PyObject *
unicode_encodedecimal(PyObject *self, PyObject *args) unicode_encodedecimal(PyObject *self, PyObject *args)
{ {
@ -3884,6 +3914,7 @@ static PyMethodDef TestMethods[] = {
{"test_widechar", (PyCFunction)test_widechar, METH_NOARGS}, {"test_widechar", (PyCFunction)test_widechar, METH_NOARGS},
{"unicode_aswidechar", unicode_aswidechar, METH_VARARGS}, {"unicode_aswidechar", unicode_aswidechar, METH_VARARGS},
{"unicode_aswidecharstring",unicode_aswidecharstring, METH_VARARGS}, {"unicode_aswidecharstring",unicode_aswidecharstring, METH_VARARGS},
{"unicode_asucs4", unicode_asucs4, METH_VARARGS},
{"unicode_encodedecimal", unicode_encodedecimal, METH_VARARGS}, {"unicode_encodedecimal", unicode_encodedecimal, METH_VARARGS},
{"unicode_transformdecimaltoascii", unicode_transformdecimaltoascii, METH_VARARGS}, {"unicode_transformdecimaltoascii", unicode_transformdecimaltoascii, METH_VARARGS},
{"unicode_legacy_string", unicode_legacy_string, METH_VARARGS}, {"unicode_legacy_string", unicode_legacy_string, METH_VARARGS},