mirror of https://github.com/python/cpython
gh-99593: Add tests for Unicode C API (part 2) (#99868)
Add tests for lower-level functions. Co-authored-by: Oleg Iarygin <oleg@arhadthedev.net>
This commit is contained in:
parent
b7a0a52196
commit
2ba931ff72
|
@ -17,6 +17,287 @@ class Str(str):
|
|||
|
||||
class CAPITest(unittest.TestCase):
|
||||
|
||||
@support.cpython_only
|
||||
@unittest.skipIf(_testcapi is None, 'need _testcapi module')
|
||||
def test_new(self):
|
||||
"""Test PyUnicode_New()"""
|
||||
from _testcapi import unicode_new as new
|
||||
|
||||
for maxchar in 0, 0x61, 0xa1, 0x4f60, 0x1f600, 0x10ffff:
|
||||
self.assertEqual(new(0, maxchar), '')
|
||||
self.assertEqual(new(5, maxchar), chr(maxchar)*5)
|
||||
self.assertEqual(new(0, 0x110000), '')
|
||||
self.assertRaises(SystemError, new, 5, 0x110000)
|
||||
self.assertRaises(SystemError, new, -1, 0)
|
||||
|
||||
@support.cpython_only
|
||||
@unittest.skipIf(_testcapi is None, 'need _testcapi module')
|
||||
def test_fill(self):
|
||||
"""Test PyUnicode_Fill()"""
|
||||
from _testcapi import unicode_fill as fill
|
||||
|
||||
strings = [
|
||||
# all strings have exactly 5 characters
|
||||
'abcde', '\xa1\xa2\xa3\xa4\xa5',
|
||||
'\u4f60\u597d\u4e16\u754c\uff01',
|
||||
'\U0001f600\U0001f601\U0001f602\U0001f603\U0001f604'
|
||||
]
|
||||
chars = [0x78, 0xa9, 0x20ac, 0x1f638]
|
||||
|
||||
for idx, fill_char in enumerate(chars):
|
||||
# wide -> narrow: exceed maxchar limitation
|
||||
for to in strings[:idx]:
|
||||
self.assertRaises(ValueError, fill, to, 0, 0, fill_char)
|
||||
for to in strings[idx:]:
|
||||
for start in range(7):
|
||||
for length in range(-1, 7 - start):
|
||||
filled = max(min(length, 5 - start), 0)
|
||||
if filled == 5 and to != strings[idx]:
|
||||
# narrow -> wide
|
||||
# Tests omitted since this creates invalid strings.
|
||||
continue
|
||||
expected = to[:start] + chr(fill_char) * filled + to[start + filled:]
|
||||
self.assertEqual(fill(to, start, length, fill_char),
|
||||
(expected, filled))
|
||||
|
||||
s = strings[0]
|
||||
self.assertRaises(IndexError, fill, s, -1, 0, 0x78)
|
||||
self.assertRaises(ValueError, fill, s, 0, 0, 0x110000)
|
||||
self.assertRaises(SystemError, fill, b'abc', 0, 0, 0x78)
|
||||
self.assertRaises(SystemError, fill, [], 0, 0, 0x78)
|
||||
# CRASHES fill(s, 0, NULL, 0, 0)
|
||||
# CRASHES fill(NULL, 0, 0, 0x78)
|
||||
# TODO: Test PyUnicode_Fill() with non-modifiable unicode.
|
||||
|
||||
@support.cpython_only
|
||||
@unittest.skipIf(_testcapi is None, 'need _testcapi module')
|
||||
def test_writechar(self):
|
||||
"""Test PyUnicode_ReadChar()"""
|
||||
from _testcapi import unicode_writechar as writechar
|
||||
|
||||
strings = [
|
||||
# one string for every kind
|
||||
'abc', '\xa1\xa2\xa3', '\u4f60\u597d\u4e16',
|
||||
'\U0001f600\U0001f601\U0001f602'
|
||||
]
|
||||
# one character for every kind + out of range code
|
||||
chars = [0x78, 0xa9, 0x20ac, 0x1f638, 0x110000]
|
||||
for i, s in enumerate(strings):
|
||||
for j, c in enumerate(chars):
|
||||
if j <= i:
|
||||
self.assertEqual(writechar(s, 1, c),
|
||||
(s[:1] + chr(c) + s[2:], 0))
|
||||
else:
|
||||
self.assertRaises(ValueError, writechar, s, 1, c)
|
||||
|
||||
self.assertRaises(IndexError, writechar, 'abc', 3, 0x78)
|
||||
self.assertRaises(IndexError, writechar, 'abc', -1, 0x78)
|
||||
self.assertRaises(TypeError, writechar, b'abc', 0, 0x78)
|
||||
self.assertRaises(TypeError, writechar, [], 0, 0x78)
|
||||
# CRASHES writechar(NULL, 0, 0x78)
|
||||
# TODO: Test PyUnicode_CopyCharacters() with non-modifiable and legacy
|
||||
# unicode.
|
||||
|
||||
@support.cpython_only
|
||||
@unittest.skipIf(_testcapi is None, 'need _testcapi module')
|
||||
def test_resize(self):
|
||||
"""Test PyUnicode_Resize()"""
|
||||
from _testcapi import unicode_resize as resize
|
||||
|
||||
strings = [
|
||||
# all strings have exactly 3 characters
|
||||
'abc', '\xa1\xa2\xa3', '\u4f60\u597d\u4e16',
|
||||
'\U0001f600\U0001f601\U0001f602'
|
||||
]
|
||||
for s in strings:
|
||||
self.assertEqual(resize(s, 3), (s, 0))
|
||||
self.assertEqual(resize(s, 2), (s[:2], 0))
|
||||
self.assertEqual(resize(s, 4), (s + '\0', 0))
|
||||
self.assertEqual(resize(s, 0), ('', 0))
|
||||
self.assertRaises(SystemError, resize, b'abc', 0)
|
||||
self.assertRaises(SystemError, resize, [], 0)
|
||||
self.assertRaises(SystemError, resize, NULL, 0)
|
||||
# TODO: Test PyUnicode_Resize() with non-modifiable and legacy unicode
|
||||
# and with NULL as the address.
|
||||
|
||||
@support.cpython_only
|
||||
@unittest.skipIf(_testcapi is None, 'need _testcapi module')
|
||||
def test_append(self):
|
||||
"""Test PyUnicode_Append()"""
|
||||
from _testcapi import unicode_append as append
|
||||
|
||||
strings = [
|
||||
'abc', '\xa1\xa2\xa3', '\u4f60\u597d\u4e16',
|
||||
'\U0001f600\U0001f601\U0001f602'
|
||||
]
|
||||
for left in strings:
|
||||
left = left[::-1]
|
||||
for right in strings:
|
||||
expected = left + right
|
||||
self.assertEqual(append(left, right), expected)
|
||||
|
||||
self.assertRaises(SystemError, append, 'abc', b'abc')
|
||||
self.assertRaises(SystemError, append, b'abc', 'abc')
|
||||
self.assertRaises(SystemError, append, b'abc', b'abc')
|
||||
self.assertRaises(SystemError, append, 'abc', [])
|
||||
self.assertRaises(SystemError, append, [], 'abc')
|
||||
self.assertRaises(SystemError, append, [], [])
|
||||
self.assertRaises(SystemError, append, NULL, 'abc')
|
||||
self.assertRaises(SystemError, append, 'abc', NULL)
|
||||
# TODO: Test PyUnicode_Append() with modifiable unicode
|
||||
# and with NULL as the address.
|
||||
# TODO: Check reference counts.
|
||||
|
||||
@support.cpython_only
|
||||
@unittest.skipIf(_testcapi is None, 'need _testcapi module')
|
||||
def test_appendanddel(self):
|
||||
"""Test PyUnicode_AppendAndDel()"""
|
||||
from _testcapi import unicode_appendanddel as appendanddel
|
||||
|
||||
strings = [
|
||||
'abc', '\xa1\xa2\xa3', '\u4f60\u597d\u4e16',
|
||||
'\U0001f600\U0001f601\U0001f602'
|
||||
]
|
||||
for left in strings:
|
||||
left = left[::-1]
|
||||
for right in strings:
|
||||
self.assertEqual(appendanddel(left, right), left + right)
|
||||
|
||||
self.assertRaises(SystemError, appendanddel, 'abc', b'abc')
|
||||
self.assertRaises(SystemError, appendanddel, b'abc', 'abc')
|
||||
self.assertRaises(SystemError, appendanddel, b'abc', b'abc')
|
||||
self.assertRaises(SystemError, appendanddel, 'abc', [])
|
||||
self.assertRaises(SystemError, appendanddel, [], 'abc')
|
||||
self.assertRaises(SystemError, appendanddel, [], [])
|
||||
self.assertRaises(SystemError, appendanddel, NULL, 'abc')
|
||||
self.assertRaises(SystemError, appendanddel, 'abc', NULL)
|
||||
# TODO: Test PyUnicode_AppendAndDel() with modifiable unicode
|
||||
# and with NULL as the address.
|
||||
# TODO: Check reference counts.
|
||||
|
||||
@support.cpython_only
|
||||
@unittest.skipIf(_testcapi is None, 'need _testcapi module')
|
||||
def test_fromstringandsize(self):
|
||||
"""Test PyUnicode_FromStringAndSize()"""
|
||||
from _testcapi import unicode_fromstringandsize as fromstringandsize
|
||||
|
||||
self.assertEqual(fromstringandsize(b'abc'), 'abc')
|
||||
self.assertEqual(fromstringandsize(b'abc', 2), 'ab')
|
||||
self.assertEqual(fromstringandsize(b'abc\0def'), 'abc\0def')
|
||||
self.assertEqual(fromstringandsize(b'\xc2\xa1\xc2\xa2'), '\xa1\xa2')
|
||||
self.assertEqual(fromstringandsize(b'\xe4\xbd\xa0'), '\u4f60')
|
||||
self.assertEqual(fromstringandsize(b'\xf0\x9f\x98\x80'), '\U0001f600')
|
||||
self.assertRaises(UnicodeDecodeError, fromstringandsize, b'\xc2\xa1', 1)
|
||||
self.assertRaises(UnicodeDecodeError, fromstringandsize, b'\xa1', 1)
|
||||
self.assertEqual(fromstringandsize(b'', 0), '')
|
||||
self.assertEqual(fromstringandsize(NULL, 0), '')
|
||||
|
||||
self.assertRaises(SystemError, fromstringandsize, b'abc', -1)
|
||||
# TODO: Test PyUnicode_FromStringAndSize(NULL, size) for size != 0
|
||||
|
||||
@support.cpython_only
|
||||
@unittest.skipIf(_testcapi is None, 'need _testcapi module')
|
||||
def test_fromstring(self):
|
||||
"""Test PyUnicode_FromString()"""
|
||||
from _testcapi import unicode_fromstring as fromstring
|
||||
|
||||
self.assertEqual(fromstring(b'abc'), 'abc')
|
||||
self.assertEqual(fromstring(b'\xc2\xa1\xc2\xa2'), '\xa1\xa2')
|
||||
self.assertEqual(fromstring(b'\xe4\xbd\xa0'), '\u4f60')
|
||||
self.assertEqual(fromstring(b'\xf0\x9f\x98\x80'), '\U0001f600')
|
||||
self.assertRaises(UnicodeDecodeError, fromstring, b'\xc2')
|
||||
self.assertRaises(UnicodeDecodeError, fromstring, b'\xa1')
|
||||
self.assertEqual(fromstring(b''), '')
|
||||
|
||||
# CRASHES fromstring(NULL)
|
||||
|
||||
@support.cpython_only
|
||||
@unittest.skipIf(_testcapi is None, 'need _testcapi module')
|
||||
def test_fromkindanddata(self):
|
||||
"""Test PyUnicode_FromKindAndData()"""
|
||||
from _testcapi import unicode_fromkindanddata as fromkindanddata
|
||||
|
||||
strings = [
|
||||
'abcde', '\xa1\xa2\xa3\xa4\xa5',
|
||||
'\u4f60\u597d\u4e16\u754c\uff01',
|
||||
'\U0001f600\U0001f601\U0001f602\U0001f603\U0001f604'
|
||||
]
|
||||
enc1 = 'latin1'
|
||||
for s in strings[:2]:
|
||||
self.assertEqual(fromkindanddata(1, s.encode(enc1)), s)
|
||||
enc2 = 'utf-16le' if sys.byteorder == 'little' else 'utf-16be'
|
||||
for s in strings[:3]:
|
||||
self.assertEqual(fromkindanddata(2, s.encode(enc2)), s)
|
||||
enc4 = 'utf-32le' if sys.byteorder == 'little' else 'utf-32be'
|
||||
for s in strings:
|
||||
self.assertEqual(fromkindanddata(4, s.encode(enc4)), s)
|
||||
self.assertEqual(fromkindanddata(2, '\U0001f600'.encode(enc2)),
|
||||
'\ud83d\ude00')
|
||||
for kind in 1, 2, 4:
|
||||
self.assertEqual(fromkindanddata(kind, b''), '')
|
||||
self.assertEqual(fromkindanddata(kind, b'\0'*kind), '\0')
|
||||
self.assertEqual(fromkindanddata(kind, NULL, 0), '')
|
||||
|
||||
for kind in -1, 0, 3, 5, 8:
|
||||
self.assertRaises(SystemError, fromkindanddata, kind, b'')
|
||||
self.assertRaises(ValueError, fromkindanddata, 1, b'abc', -1)
|
||||
self.assertRaises(ValueError, fromkindanddata, 1, NULL, -1)
|
||||
# CRASHES fromkindanddata(1, NULL, 1)
|
||||
# CRASHES fromkindanddata(4, b'\xff\xff\xff\xff')
|
||||
|
||||
@support.cpython_only
|
||||
@unittest.skipIf(_testcapi is None, 'need _testcapi module')
|
||||
def test_substring(self):
|
||||
"""Test PyUnicode_Substring()"""
|
||||
from _testcapi import unicode_substring as substring
|
||||
|
||||
strings = [
|
||||
'ab', 'ab\xa1\xa2',
|
||||
'ab\xa1\xa2\u4f60\u597d',
|
||||
'ab\xa1\xa2\u4f60\u597d\U0001f600\U0001f601'
|
||||
]
|
||||
for s in strings:
|
||||
for start in range(0, len(s) + 2):
|
||||
for end in range(max(start-1, 0), len(s) + 2):
|
||||
self.assertEqual(substring(s, start, end), s[start:end])
|
||||
|
||||
self.assertRaises(IndexError, substring, 'abc', -1, 0)
|
||||
self.assertRaises(IndexError, substring, 'abc', 0, -1)
|
||||
# CRASHES substring(b'abc', 0, 0)
|
||||
# CRASHES substring([], 0, 0)
|
||||
# CRASHES substring(NULL, 0, 0)
|
||||
|
||||
@support.cpython_only
|
||||
@unittest.skipIf(_testcapi is None, 'need _testcapi module')
|
||||
def test_getlength(self):
|
||||
"""Test PyUnicode_GetLength()"""
|
||||
from _testcapi import unicode_getlength as getlength
|
||||
|
||||
for s in ['abc', '\xa1\xa2', '\u4f60\u597d', 'a\U0001f600',
|
||||
'a\ud800b\udfffc', '\ud834\udd1e']:
|
||||
self.assertEqual(getlength(s), len(s))
|
||||
|
||||
self.assertRaises(TypeError, getlength, b'abc')
|
||||
self.assertRaises(TypeError, getlength, [])
|
||||
# CRASHES getlength(NULL)
|
||||
|
||||
@support.cpython_only
|
||||
@unittest.skipIf(_testcapi is None, 'need _testcapi module')
|
||||
def test_readchar(self):
|
||||
"""Test PyUnicode_ReadChar()"""
|
||||
from _testcapi import unicode_readchar as readchar
|
||||
|
||||
for s in ['abc', '\xa1\xa2', '\u4f60\u597d', 'a\U0001f600',
|
||||
'a\ud800b\udfffc', '\ud834\udd1e']:
|
||||
for i, c in enumerate(s):
|
||||
self.assertEqual(readchar(s, i), ord(c))
|
||||
self.assertRaises(IndexError, readchar, s, len(s))
|
||||
self.assertRaises(IndexError, readchar, s, -1)
|
||||
|
||||
self.assertRaises(TypeError, readchar, b'abc', 0)
|
||||
self.assertRaises(TypeError, readchar, [], 0)
|
||||
# CRASHES readchar(NULL, 0)
|
||||
|
||||
@support.cpython_only
|
||||
@unittest.skipIf(_testcapi is None, 'need _testcapi module')
|
||||
def test_fromobject(self):
|
||||
|
@ -293,13 +574,70 @@ class CAPITest(unittest.TestCase):
|
|||
self.assertRaisesRegex(SystemError, 'invalid format string',
|
||||
PyUnicode_FromFormat, b'%+i', c_int(10))
|
||||
|
||||
@support.cpython_only
|
||||
@unittest.skipIf(_testcapi is None, 'need _testcapi module')
|
||||
def test_interninplace(self):
|
||||
"""Test PyUnicode_InternInPlace()"""
|
||||
from _testcapi import unicode_interninplace as interninplace
|
||||
|
||||
s = b'abc'.decode()
|
||||
r = interninplace(s)
|
||||
self.assertEqual(r, 'abc')
|
||||
|
||||
# CRASHES interninplace(b'abc')
|
||||
# CRASHES interninplace(NULL)
|
||||
|
||||
@support.cpython_only
|
||||
@unittest.skipIf(_testcapi is None, 'need _testcapi module')
|
||||
def test_internfromstring(self):
|
||||
"""Test PyUnicode_InternFromString()"""
|
||||
from _testcapi import unicode_internfromstring as internfromstring
|
||||
|
||||
self.assertEqual(internfromstring(b'abc'), 'abc')
|
||||
self.assertEqual(internfromstring(b'\xf0\x9f\x98\x80'), '\U0001f600')
|
||||
self.assertRaises(UnicodeDecodeError, internfromstring, b'\xc2')
|
||||
self.assertRaises(UnicodeDecodeError, internfromstring, b'\xa1')
|
||||
self.assertEqual(internfromstring(b''), '')
|
||||
|
||||
# CRASHES internfromstring(NULL)
|
||||
|
||||
@support.cpython_only
|
||||
@unittest.skipIf(_testcapi is None, 'need _testcapi module')
|
||||
def test_fromwidechar(self):
|
||||
"""Test PyUnicode_FromWideChar()"""
|
||||
from _testcapi import unicode_fromwidechar as fromwidechar
|
||||
from _testcapi import SIZEOF_WCHAR_T
|
||||
|
||||
if SIZEOF_WCHAR_T == 2:
|
||||
encoding = 'utf-16le' if sys.byteorder == 'little' else 'utf-16be'
|
||||
elif SIZEOF_WCHAR_T == 4:
|
||||
encoding = 'utf-32le' if sys.byteorder == 'little' else 'utf-32be'
|
||||
|
||||
for s in '', 'abc', '\xa1\xa2', '\u4f60', '\U0001f600':
|
||||
b = s.encode(encoding)
|
||||
self.assertEqual(fromwidechar(b), s)
|
||||
self.assertEqual(fromwidechar(b + b'\0'*SIZEOF_WCHAR_T, -1), s)
|
||||
for s in '\ud83d', '\ude00':
|
||||
b = s.encode(encoding, 'surrogatepass')
|
||||
self.assertEqual(fromwidechar(b), s)
|
||||
self.assertEqual(fromwidechar(b + b'\0'*SIZEOF_WCHAR_T, -1), s)
|
||||
|
||||
self.assertEqual(fromwidechar('abc'.encode(encoding), 2), 'ab')
|
||||
if SIZEOF_WCHAR_T == 2:
|
||||
self.assertEqual(fromwidechar('a\U0001f600'.encode(encoding), 2), 'a\ud83d')
|
||||
|
||||
self.assertRaises(SystemError, fromwidechar, b'\0'*SIZEOF_WCHAR_T, -2)
|
||||
self.assertEqual(fromwidechar(NULL, 0), '')
|
||||
self.assertRaises(SystemError, fromwidechar, NULL, 1)
|
||||
self.assertRaises(SystemError, fromwidechar, NULL, -1)
|
||||
|
||||
@support.cpython_only
|
||||
@unittest.skipIf(_testcapi is None, 'need _testcapi module')
|
||||
def test_aswidechar(self):
|
||||
"""Test PyUnicode_AsWideChar()"""
|
||||
from _testcapi import unicode_aswidechar
|
||||
import_helper.import_module('ctypes')
|
||||
from ctypes import c_wchar, sizeof
|
||||
from _testcapi import unicode_aswidechar_null
|
||||
from _testcapi import SIZEOF_WCHAR_T
|
||||
|
||||
wchar, size = unicode_aswidechar('abcdef', 2)
|
||||
self.assertEqual(size, 2)
|
||||
|
@ -308,6 +646,8 @@ class CAPITest(unittest.TestCase):
|
|||
wchar, size = unicode_aswidechar('abc', 3)
|
||||
self.assertEqual(size, 3)
|
||||
self.assertEqual(wchar, 'abc')
|
||||
self.assertEqual(unicode_aswidechar_null('abc', 10), 4)
|
||||
self.assertEqual(unicode_aswidechar_null('abc', 0), 4)
|
||||
|
||||
wchar, size = unicode_aswidechar('abc', 4)
|
||||
self.assertEqual(size, 3)
|
||||
|
@ -320,60 +660,113 @@ class CAPITest(unittest.TestCase):
|
|||
wchar, size = unicode_aswidechar('abc\0def', 20)
|
||||
self.assertEqual(size, 7)
|
||||
self.assertEqual(wchar, 'abc\0def\0')
|
||||
self.assertEqual(unicode_aswidechar_null('abc\0def', 20), 8)
|
||||
|
||||
nonbmp = chr(0x10ffff)
|
||||
if sizeof(c_wchar) == 2:
|
||||
buflen = 3
|
||||
if SIZEOF_WCHAR_T == 2:
|
||||
nchar = 2
|
||||
else: # sizeof(c_wchar) == 4
|
||||
buflen = 2
|
||||
else: # SIZEOF_WCHAR_T == 4
|
||||
nchar = 1
|
||||
wchar, size = unicode_aswidechar(nonbmp, buflen)
|
||||
wchar, size = unicode_aswidechar(nonbmp, 10)
|
||||
self.assertEqual(size, nchar)
|
||||
self.assertEqual(wchar, nonbmp + '\0')
|
||||
self.assertEqual(unicode_aswidechar_null(nonbmp, 10), nchar + 1)
|
||||
|
||||
self.assertRaises(TypeError, unicode_aswidechar, b'abc', 10)
|
||||
self.assertRaises(TypeError, unicode_aswidechar, [], 10)
|
||||
self.assertRaises(SystemError, unicode_aswidechar, NULL, 10)
|
||||
self.assertRaises(TypeError, unicode_aswidechar_null, b'abc', 10)
|
||||
self.assertRaises(TypeError, unicode_aswidechar_null, [], 10)
|
||||
self.assertRaises(SystemError, unicode_aswidechar_null, NULL, 10)
|
||||
|
||||
@support.cpython_only
|
||||
@unittest.skipIf(_testcapi is None, 'need _testcapi module')
|
||||
def test_aswidecharstring(self):
|
||||
"""Test PyUnicode_AsWideCharString()"""
|
||||
from _testcapi import unicode_aswidecharstring
|
||||
import_helper.import_module('ctypes')
|
||||
from ctypes import c_wchar, sizeof
|
||||
from _testcapi import unicode_aswidecharstring_null
|
||||
from _testcapi import SIZEOF_WCHAR_T
|
||||
|
||||
wchar, size = unicode_aswidecharstring('abc')
|
||||
self.assertEqual(size, 3)
|
||||
self.assertEqual(wchar, 'abc\0')
|
||||
self.assertEqual(unicode_aswidecharstring_null('abc'), 'abc')
|
||||
|
||||
wchar, size = unicode_aswidecharstring('abc\0def')
|
||||
self.assertEqual(size, 7)
|
||||
self.assertEqual(wchar, 'abc\0def\0')
|
||||
self.assertRaises(ValueError, unicode_aswidecharstring_null, 'abc\0def')
|
||||
|
||||
nonbmp = chr(0x10ffff)
|
||||
if sizeof(c_wchar) == 2:
|
||||
if SIZEOF_WCHAR_T == 2:
|
||||
nchar = 2
|
||||
else: # sizeof(c_wchar) == 4
|
||||
else: # SIZEOF_WCHAR_T == 4
|
||||
nchar = 1
|
||||
wchar, size = unicode_aswidecharstring(nonbmp)
|
||||
self.assertEqual(size, nchar)
|
||||
self.assertEqual(wchar, nonbmp + '\0')
|
||||
self.assertEqual(unicode_aswidecharstring_null(nonbmp), nonbmp)
|
||||
|
||||
self.assertRaises(TypeError, unicode_aswidecharstring, b'abc')
|
||||
self.assertRaises(TypeError, unicode_aswidecharstring, [])
|
||||
self.assertRaises(SystemError, unicode_aswidecharstring, NULL)
|
||||
self.assertRaises(TypeError, unicode_aswidecharstring_null, b'abc')
|
||||
self.assertRaises(TypeError, unicode_aswidecharstring_null, [])
|
||||
self.assertRaises(SystemError, unicode_aswidecharstring_null, NULL)
|
||||
|
||||
@support.cpython_only
|
||||
@unittest.skipIf(_testcapi is None, 'need _testcapi module')
|
||||
def test_asucs4(self):
|
||||
"""Test PyUnicode_AsUCS4()"""
|
||||
from _testcapi import unicode_asucs4
|
||||
|
||||
for s in ['abc', '\xa1\xa2', '\u4f60\u597d', 'a\U0001f600',
|
||||
'a\ud800b\udfffc', '\ud834\udd1e']:
|
||||
l = len(s)
|
||||
self.assertEqual(unicode_asucs4(s, l, True), s+'\0')
|
||||
self.assertEqual(unicode_asucs4(s, l, False), s+'\uffff')
|
||||
self.assertEqual(unicode_asucs4(s, l+1, True), s+'\0\uffff')
|
||||
self.assertEqual(unicode_asucs4(s, l+1, False), s+'\0\uffff')
|
||||
self.assertRaises(SystemError, unicode_asucs4, s, l-1, True)
|
||||
self.assertRaises(SystemError, unicode_asucs4, s, l-2, False)
|
||||
self.assertEqual(unicode_asucs4(s, l, 1), s+'\0')
|
||||
self.assertEqual(unicode_asucs4(s, l, 0), s+'\uffff')
|
||||
self.assertEqual(unicode_asucs4(s, l+1, 1), s+'\0\uffff')
|
||||
self.assertEqual(unicode_asucs4(s, l+1, 0), s+'\0\uffff')
|
||||
self.assertRaises(SystemError, unicode_asucs4, s, l-1, 1)
|
||||
self.assertRaises(SystemError, unicode_asucs4, s, l-2, 0)
|
||||
s = '\0'.join([s, s])
|
||||
self.assertEqual(unicode_asucs4(s, len(s), True), s+'\0')
|
||||
self.assertEqual(unicode_asucs4(s, len(s), False), s+'\uffff')
|
||||
self.assertEqual(unicode_asucs4(s, len(s), 1), s+'\0')
|
||||
self.assertEqual(unicode_asucs4(s, len(s), 0), s+'\uffff')
|
||||
|
||||
# CRASHES unicode_asucs4(b'abc', 1, 0)
|
||||
# CRASHES unicode_asucs4(b'abc', 1, 1)
|
||||
# CRASHES unicode_asucs4([], 1, 1)
|
||||
# CRASHES unicode_asucs4(NULL, 1, 0)
|
||||
# CRASHES unicode_asucs4(NULL, 1, 1)
|
||||
|
||||
@support.cpython_only
|
||||
@unittest.skipIf(_testcapi is None, 'need _testcapi module')
|
||||
def test_asucs4copy(self):
|
||||
"""Test PyUnicode_AsUCS4Copy()"""
|
||||
from _testcapi import unicode_asucs4copy as asucs4copy
|
||||
|
||||
for s in ['abc', '\xa1\xa2', '\u4f60\u597d', 'a\U0001f600',
|
||||
'a\ud800b\udfffc', '\ud834\udd1e']:
|
||||
self.assertEqual(asucs4copy(s), s+'\0')
|
||||
s = '\0'.join([s, s])
|
||||
self.assertEqual(asucs4copy(s), s+'\0')
|
||||
|
||||
# CRASHES asucs4copy(b'abc')
|
||||
# CRASHES asucs4copy([])
|
||||
# CRASHES asucs4copy(NULL)
|
||||
|
||||
@support.cpython_only
|
||||
@unittest.skipIf(_testcapi is None, 'need _testcapi module')
|
||||
def test_fromordinal(self):
|
||||
"""Test PyUnicode_FromOrdinal()"""
|
||||
from _testcapi import unicode_fromordinal as fromordinal
|
||||
|
||||
self.assertEqual(fromordinal(0x61), 'a')
|
||||
self.assertEqual(fromordinal(0x20ac), '\u20ac')
|
||||
self.assertEqual(fromordinal(0x1f600), '\U0001f600')
|
||||
|
||||
self.assertRaises(ValueError, fromordinal, 0x110000)
|
||||
self.assertRaises(ValueError, fromordinal, -1)
|
||||
|
||||
@support.cpython_only
|
||||
@unittest.skipIf(_testcapi is None, 'need _testcapi module')
|
||||
|
@ -381,29 +774,62 @@ class CAPITest(unittest.TestCase):
|
|||
"""Test PyUnicode_AsUTF8()"""
|
||||
from _testcapi import unicode_asutf8
|
||||
|
||||
bmp = '\u0100'
|
||||
bmp2 = '\uffff'
|
||||
nonbmp = chr(0x10ffff)
|
||||
self.assertEqual(unicode_asutf8('abc', 4), b'abc\0')
|
||||
self.assertEqual(unicode_asutf8('абв', 7), b'\xd0\xb0\xd0\xb1\xd0\xb2\0')
|
||||
self.assertEqual(unicode_asutf8('\U0001f600', 5), b'\xf0\x9f\x98\x80\0')
|
||||
self.assertEqual(unicode_asutf8('abc\0def', 8), b'abc\0def\0')
|
||||
|
||||
self.assertEqual(unicode_asutf8(bmp), b'\xc4\x80')
|
||||
self.assertEqual(unicode_asutf8(bmp2), b'\xef\xbf\xbf')
|
||||
self.assertEqual(unicode_asutf8(nonbmp), b'\xf4\x8f\xbf\xbf')
|
||||
self.assertRaises(UnicodeEncodeError, unicode_asutf8, 'a\ud800b\udfffc')
|
||||
self.assertRaises(UnicodeEncodeError, unicode_asutf8, '\ud8ff', 0)
|
||||
self.assertRaises(TypeError, unicode_asutf8, b'abc', 0)
|
||||
self.assertRaises(TypeError, unicode_asutf8, [], 0)
|
||||
# CRASHES unicode_asutf8(NULL, 0)
|
||||
|
||||
@support.cpython_only
|
||||
@unittest.skipIf(_testcapi is None, 'need _testcapi module')
|
||||
def test_asutf8andsize(self):
|
||||
"""Test PyUnicode_AsUTF8AndSize()"""
|
||||
from _testcapi import unicode_asutf8andsize
|
||||
from _testcapi import unicode_asutf8andsize_null
|
||||
|
||||
bmp = '\u0100'
|
||||
bmp2 = '\uffff'
|
||||
nonbmp = chr(0x10ffff)
|
||||
self.assertEqual(unicode_asutf8andsize('abc', 4), (b'abc\0', 3))
|
||||
self.assertEqual(unicode_asutf8andsize('абв', 7), (b'\xd0\xb0\xd0\xb1\xd0\xb2\0', 6))
|
||||
self.assertEqual(unicode_asutf8andsize('\U0001f600', 5), (b'\xf0\x9f\x98\x80\0', 4))
|
||||
self.assertEqual(unicode_asutf8andsize('abc\0def', 8), (b'abc\0def\0', 7))
|
||||
self.assertEqual(unicode_asutf8andsize_null('abc', 4), b'abc\0')
|
||||
self.assertEqual(unicode_asutf8andsize_null('abc\0def', 8), b'abc\0def\0')
|
||||
|
||||
self.assertEqual(unicode_asutf8andsize(bmp), (b'\xc4\x80', 2))
|
||||
self.assertEqual(unicode_asutf8andsize(bmp2), (b'\xef\xbf\xbf', 3))
|
||||
self.assertEqual(unicode_asutf8andsize(nonbmp), (b'\xf4\x8f\xbf\xbf', 4))
|
||||
self.assertRaises(UnicodeEncodeError, unicode_asutf8andsize, 'a\ud800b\udfffc')
|
||||
self.assertRaises(UnicodeEncodeError, unicode_asutf8andsize, '\ud8ff', 0)
|
||||
self.assertRaises(TypeError, unicode_asutf8andsize, b'abc', 0)
|
||||
self.assertRaises(TypeError, unicode_asutf8andsize, [], 0)
|
||||
# CRASHES unicode_asutf8andsize(NULL, 0)
|
||||
|
||||
@support.cpython_only
|
||||
@unittest.skipIf(_testcapi is None, 'need _testcapi module')
|
||||
def test_getdefaultencoding(self):
|
||||
"""Test PyUnicode_GetDefaultEncoding()"""
|
||||
from _testcapi import unicode_getdefaultencoding as getdefaultencoding
|
||||
|
||||
self.assertEqual(getdefaultencoding(), b'utf-8')
|
||||
|
||||
@support.cpython_only
|
||||
@unittest.skipIf(_testcapi is None, 'need _testcapi module')
|
||||
def test_transform_decimal_and_space(self):
|
||||
"""Test _PyUnicode_TransformDecimalAndSpaceToASCII()"""
|
||||
from _testcapi import unicode_transformdecimalandspacetoascii as transform_decimal
|
||||
|
||||
self.assertEqual(transform_decimal('123'),
|
||||
'123')
|
||||
self.assertEqual(transform_decimal('\u0663.\u0661\u0664'),
|
||||
'3.14')
|
||||
self.assertEqual(transform_decimal("\N{EM SPACE}3.14\N{EN SPACE}"),
|
||||
" 3.14 ")
|
||||
self.assertEqual(transform_decimal('12\u20ac3'),
|
||||
'12?')
|
||||
self.assertEqual(transform_decimal(''), '')
|
||||
|
||||
self.assertRaises(SystemError, transform_decimal, b'123')
|
||||
self.assertRaises(SystemError, transform_decimal, [])
|
||||
# CRASHES transform_decimal(NULL)
|
||||
|
||||
@support.cpython_only
|
||||
@unittest.skipIf(_testcapi is None, 'need _testcapi module')
|
||||
|
@ -858,6 +1284,7 @@ class CAPITest(unittest.TestCase):
|
|||
from _testcapi import unicode_copycharacters
|
||||
|
||||
strings = [
|
||||
# all strings have exactly 5 characters
|
||||
'abcde', '\xa1\xa2\xa3\xa4\xa5',
|
||||
'\u4f60\u597d\u4e16\u754c\uff01',
|
||||
'\U0001f600\U0001f601\U0001f602\U0001f603\U0001f604'
|
||||
|
@ -894,6 +1321,10 @@ class CAPITest(unittest.TestCase):
|
|||
self.assertRaises(SystemError, unicode_copycharacters, s, 1, s, 0, 5)
|
||||
self.assertRaises(SystemError, unicode_copycharacters, s, 0, s, 0, -1)
|
||||
self.assertRaises(SystemError, unicode_copycharacters, s, 0, b'', 0, 0)
|
||||
self.assertRaises(SystemError, unicode_copycharacters, s, 0, [], 0, 0)
|
||||
# CRASHES unicode_copycharacters(s, 0, NULL, 0, 0)
|
||||
# TODO: Test PyUnicode_CopyCharacters() with non-unicode and
|
||||
# non-modifiable unicode as "to".
|
||||
|
||||
@support.cpython_only
|
||||
@unittest.skipIf(_testcapi is None, 'need _testcapi module')
|
||||
|
|
|
@ -102,6 +102,278 @@ test_widechar(PyObject *self, PyObject *Py_UNUSED(ignored))
|
|||
|
||||
#define NULLABLE(x) do { if (x == Py_None) x = NULL; } while (0);
|
||||
|
||||
static PyObject *
|
||||
unicode_copy(PyObject *unicode)
|
||||
{
|
||||
PyObject *copy;
|
||||
|
||||
if (!unicode) {
|
||||
return NULL;
|
||||
}
|
||||
if (!PyUnicode_Check(unicode)) {
|
||||
Py_INCREF(unicode);
|
||||
return unicode;
|
||||
}
|
||||
|
||||
copy = PyUnicode_New(PyUnicode_GET_LENGTH(unicode),
|
||||
PyUnicode_MAX_CHAR_VALUE(unicode));
|
||||
if (!copy) {
|
||||
return NULL;
|
||||
}
|
||||
if (PyUnicode_CopyCharacters(copy, 0, unicode,
|
||||
0, PyUnicode_GET_LENGTH(unicode)) < 0)
|
||||
{
|
||||
Py_DECREF(copy);
|
||||
return NULL;
|
||||
}
|
||||
return copy;
|
||||
}
|
||||
|
||||
/* Test PyUnicode_New() */
|
||||
static PyObject *
|
||||
unicode_new(PyObject *self, PyObject *args)
|
||||
{
|
||||
Py_ssize_t size;
|
||||
unsigned int maxchar;
|
||||
PyObject *result;
|
||||
|
||||
if (!PyArg_ParseTuple(args, "nI", &size, &maxchar)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
result = PyUnicode_New(size, (Py_UCS4)maxchar);
|
||||
if (!result) {
|
||||
return NULL;
|
||||
}
|
||||
if (size > 0 && maxchar <= 0x10ffff &&
|
||||
PyUnicode_Fill(result, 0, size, (Py_UCS4)maxchar) < 0)
|
||||
{
|
||||
Py_DECREF(result);
|
||||
return NULL;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/* Test PyUnicode_Fill() */
|
||||
static PyObject *
|
||||
unicode_fill(PyObject *self, PyObject *args)
|
||||
{
|
||||
PyObject *to, *to_copy;
|
||||
Py_ssize_t start, length, filled;
|
||||
unsigned int fill_char;
|
||||
|
||||
if (!PyArg_ParseTuple(args, "OnnI", &to, &start, &length, &fill_char)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
NULLABLE(to);
|
||||
if (!(to_copy = unicode_copy(to)) && to) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
filled = PyUnicode_Fill(to_copy, start, length, (Py_UCS4)fill_char);
|
||||
if (filled == -1 && PyErr_Occurred()) {
|
||||
Py_DECREF(to_copy);
|
||||
return NULL;
|
||||
}
|
||||
return Py_BuildValue("(Nn)", to_copy, filled);
|
||||
}
|
||||
|
||||
/* Test PyUnicode_WriteChar() */
|
||||
static PyObject *
|
||||
unicode_writechar(PyObject *self, PyObject *args)
|
||||
{
|
||||
PyObject *to, *to_copy;
|
||||
Py_ssize_t index;
|
||||
unsigned int character;
|
||||
int result;
|
||||
|
||||
if (!PyArg_ParseTuple(args, "OnI", &to, &index, &character)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
NULLABLE(to);
|
||||
if (!(to_copy = unicode_copy(to)) && to) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
result = PyUnicode_WriteChar(to_copy, index, (Py_UCS4)character);
|
||||
if (result == -1 && PyErr_Occurred()) {
|
||||
Py_DECREF(to_copy);
|
||||
return NULL;
|
||||
}
|
||||
return Py_BuildValue("(Ni)", to_copy, result);
|
||||
}
|
||||
|
||||
/* Test PyUnicode_Resize() */
|
||||
static PyObject *
|
||||
unicode_resize(PyObject *self, PyObject *args)
|
||||
{
|
||||
PyObject *obj, *copy;
|
||||
Py_ssize_t length;
|
||||
int result;
|
||||
|
||||
if (!PyArg_ParseTuple(args, "On", &obj, &length)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
NULLABLE(obj);
|
||||
if (!(copy = unicode_copy(obj)) && obj) {
|
||||
return NULL;
|
||||
}
|
||||
result = PyUnicode_Resize(©, length);
|
||||
if (result == -1 && PyErr_Occurred()) {
|
||||
Py_XDECREF(copy);
|
||||
return NULL;
|
||||
}
|
||||
if (obj && PyUnicode_Check(obj) && length > PyUnicode_GET_LENGTH(obj)) {
|
||||
if (PyUnicode_Fill(copy, PyUnicode_GET_LENGTH(obj), length, 0U) < 0) {
|
||||
Py_DECREF(copy);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
return Py_BuildValue("(Ni)", copy, result);
|
||||
}
|
||||
|
||||
/* Test PyUnicode_Append() */
|
||||
static PyObject *
|
||||
unicode_append(PyObject *self, PyObject *args)
|
||||
{
|
||||
PyObject *left, *right, *left_copy;
|
||||
|
||||
if (!PyArg_ParseTuple(args, "OO", &left, &right))
|
||||
return NULL;
|
||||
|
||||
NULLABLE(left);
|
||||
NULLABLE(right);
|
||||
if (!(left_copy = unicode_copy(left)) && left) {
|
||||
return NULL;
|
||||
}
|
||||
PyUnicode_Append(&left_copy, right);
|
||||
return left_copy;
|
||||
}
|
||||
|
||||
/* Test PyUnicode_AppendAndDel() */
|
||||
static PyObject *
|
||||
unicode_appendanddel(PyObject *self, PyObject *args)
|
||||
{
|
||||
PyObject *left, *right, *left_copy;
|
||||
|
||||
if (!PyArg_ParseTuple(args, "OO", &left, &right))
|
||||
return NULL;
|
||||
|
||||
NULLABLE(left);
|
||||
NULLABLE(right);
|
||||
if (!(left_copy = unicode_copy(left)) && left) {
|
||||
return NULL;
|
||||
}
|
||||
Py_XINCREF(right);
|
||||
PyUnicode_AppendAndDel(&left_copy, right);
|
||||
return left_copy;
|
||||
}
|
||||
|
||||
/* Test PyUnicode_FromStringAndSize() */
|
||||
static PyObject *
|
||||
unicode_fromstringandsize(PyObject *self, PyObject *args)
|
||||
{
|
||||
const char *s;
|
||||
Py_ssize_t bsize;
|
||||
Py_ssize_t size = -100;
|
||||
|
||||
if (!PyArg_ParseTuple(args, "z#|n", &s, &bsize, &size)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (size == -100) {
|
||||
size = bsize;
|
||||
}
|
||||
return PyUnicode_FromStringAndSize(s, size);
|
||||
}
|
||||
|
||||
/* Test PyUnicode_FromString() */
|
||||
static PyObject *
|
||||
unicode_fromstring(PyObject *self, PyObject *arg)
|
||||
{
|
||||
const char *s;
|
||||
Py_ssize_t size;
|
||||
|
||||
if (!PyArg_Parse(arg, "z#", &s, &size)) {
|
||||
return NULL;
|
||||
}
|
||||
return PyUnicode_FromString(s);
|
||||
}
|
||||
|
||||
/* Test PyUnicode_FromKindAndData() */
|
||||
static PyObject *
|
||||
unicode_fromkindanddata(PyObject *self, PyObject *args)
|
||||
{
|
||||
int kind;
|
||||
void *buffer;
|
||||
Py_ssize_t bsize;
|
||||
Py_ssize_t size = -100;
|
||||
|
||||
if (!PyArg_ParseTuple(args, "iz#|n", &kind, &buffer, &bsize, &size)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (size == -100) {
|
||||
size = bsize;
|
||||
}
|
||||
if (kind && size % kind) {
|
||||
PyErr_SetString(PyExc_AssertionError,
|
||||
"invalid size in unicode_fromkindanddata()");
|
||||
return NULL;
|
||||
}
|
||||
return PyUnicode_FromKindAndData(kind, buffer, kind ? size / kind : 0);
|
||||
}
|
||||
|
||||
/* Test PyUnicode_Substring() */
|
||||
static PyObject *
|
||||
unicode_substring(PyObject *self, PyObject *args)
|
||||
{
|
||||
PyObject *str;
|
||||
Py_ssize_t start, end;
|
||||
|
||||
if (!PyArg_ParseTuple(args, "Onn", &str, &start, &end)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
NULLABLE(str);
|
||||
return PyUnicode_Substring(str, start, end);
|
||||
}
|
||||
|
||||
/* Test PyUnicode_GetLength() */
|
||||
static PyObject *
|
||||
unicode_getlength(PyObject *self, PyObject *arg)
|
||||
{
|
||||
Py_ssize_t result;
|
||||
|
||||
NULLABLE(arg);
|
||||
result = PyUnicode_GetLength(arg);
|
||||
if (result == -1)
|
||||
return NULL;
|
||||
return PyLong_FromSsize_t(result);
|
||||
}
|
||||
|
||||
/* Test PyUnicode_ReadChar() */
|
||||
static PyObject *
|
||||
unicode_readchar(PyObject *self, PyObject *args)
|
||||
{
|
||||
PyObject *unicode;
|
||||
Py_ssize_t index;
|
||||
Py_UCS4 result;
|
||||
|
||||
if (!PyArg_ParseTuple(args, "On", &unicode, &index)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
NULLABLE(unicode);
|
||||
result = PyUnicode_ReadChar(unicode, index);
|
||||
if (result == (Py_UCS4)-1)
|
||||
return NULL;
|
||||
return PyLong_FromUnsignedLong(result);
|
||||
}
|
||||
|
||||
/* Test PyUnicode_FromObject() */
|
||||
static PyObject *
|
||||
unicode_fromobject(PyObject *self, PyObject *arg)
|
||||
|
@ -110,6 +382,51 @@ unicode_fromobject(PyObject *self, PyObject *arg)
|
|||
return PyUnicode_FromObject(arg);
|
||||
}
|
||||
|
||||
/* Test PyUnicode_InternInPlace() */
|
||||
static PyObject *
|
||||
unicode_interninplace(PyObject *self, PyObject *arg)
|
||||
{
|
||||
NULLABLE(arg);
|
||||
Py_XINCREF(arg);
|
||||
PyUnicode_InternInPlace(&arg);
|
||||
return arg;
|
||||
}
|
||||
|
||||
/* Test PyUnicode_InternFromString() */
|
||||
static PyObject *
|
||||
unicode_internfromstring(PyObject *self, PyObject *arg)
|
||||
{
|
||||
const char *s;
|
||||
Py_ssize_t size;
|
||||
|
||||
if (!PyArg_Parse(arg, "z#", &s, &size)) {
|
||||
return NULL;
|
||||
}
|
||||
return PyUnicode_InternFromString(s);
|
||||
}
|
||||
|
||||
/* Test PyUnicode_FromWideChar() */
|
||||
static PyObject *
|
||||
unicode_fromwidechar(PyObject *self, PyObject *args)
|
||||
{
|
||||
const char *s;
|
||||
Py_ssize_t bsize;
|
||||
Py_ssize_t size = -100;
|
||||
|
||||
if (!PyArg_ParseTuple(args, "z#|n", &s, &bsize, &size)) {
|
||||
return NULL;
|
||||
}
|
||||
if (size == -100) {
|
||||
if (bsize % SIZEOF_WCHAR_T) {
|
||||
PyErr_SetString(PyExc_AssertionError,
|
||||
"invalid size in unicode_fromwidechar()");
|
||||
return NULL;
|
||||
}
|
||||
size = bsize / SIZEOF_WCHAR_T;
|
||||
}
|
||||
return PyUnicode_FromWideChar((const wchar_t *)s, size);
|
||||
}
|
||||
|
||||
/* Test PyUnicode_AsWideChar() */
|
||||
static PyObject *
|
||||
unicode_aswidechar(PyObject *self, PyObject *args)
|
||||
|
@ -118,8 +435,9 @@ unicode_aswidechar(PyObject *self, PyObject *args)
|
|||
Py_ssize_t buflen, size;
|
||||
wchar_t *buffer;
|
||||
|
||||
if (!PyArg_ParseTuple(args, "Un", &unicode, &buflen))
|
||||
if (!PyArg_ParseTuple(args, "On", &unicode, &buflen))
|
||||
return NULL;
|
||||
NULLABLE(unicode);
|
||||
buffer = PyMem_New(wchar_t, buflen);
|
||||
if (buffer == NULL)
|
||||
return PyErr_NoMemory();
|
||||
|
@ -142,17 +460,35 @@ unicode_aswidechar(PyObject *self, PyObject *args)
|
|||
return Py_BuildValue("(Nn)", result, size);
|
||||
}
|
||||
|
||||
/* Test PyUnicode_AsWideCharString() with NULL as buffer */
|
||||
static PyObject *
|
||||
unicode_aswidechar_null(PyObject *self, PyObject *args)
|
||||
{
|
||||
PyObject *unicode;
|
||||
Py_ssize_t buflen, size;
|
||||
|
||||
if (!PyArg_ParseTuple(args, "On", &unicode, &buflen))
|
||||
return NULL;
|
||||
NULLABLE(unicode);
|
||||
size = PyUnicode_AsWideChar(unicode, NULL, buflen);
|
||||
if (size == -1) {
|
||||
return NULL;
|
||||
}
|
||||
return PyLong_FromSsize_t(size);
|
||||
}
|
||||
|
||||
/* Test PyUnicode_AsWideCharString() */
|
||||
static PyObject *
|
||||
unicode_aswidecharstring(PyObject *self, PyObject *args)
|
||||
{
|
||||
PyObject *unicode, *result;
|
||||
Py_ssize_t size;
|
||||
Py_ssize_t size = 100;
|
||||
wchar_t *buffer;
|
||||
|
||||
if (!PyArg_ParseTuple(args, "U", &unicode))
|
||||
if (!PyArg_ParseTuple(args, "O", &unicode))
|
||||
return NULL;
|
||||
|
||||
NULLABLE(unicode);
|
||||
buffer = PyUnicode_AsWideCharString(unicode, &size);
|
||||
if (buffer == NULL)
|
||||
return NULL;
|
||||
|
@ -164,6 +500,28 @@ unicode_aswidecharstring(PyObject *self, PyObject *args)
|
|||
return Py_BuildValue("(Nn)", result, size);
|
||||
}
|
||||
|
||||
/* Test PyUnicode_AsWideCharString() with NULL as the size address */
|
||||
static PyObject *
|
||||
unicode_aswidecharstring_null(PyObject *self, PyObject *args)
|
||||
{
|
||||
PyObject *unicode, *result;
|
||||
wchar_t *buffer;
|
||||
|
||||
if (!PyArg_ParseTuple(args, "O", &unicode))
|
||||
return NULL;
|
||||
|
||||
NULLABLE(unicode);
|
||||
buffer = PyUnicode_AsWideCharString(unicode, NULL);
|
||||
if (buffer == NULL)
|
||||
return NULL;
|
||||
|
||||
result = PyUnicode_FromWideChar(buffer, -1);
|
||||
PyMem_Free(buffer);
|
||||
if (result == NULL)
|
||||
return NULL;
|
||||
return result;
|
||||
}
|
||||
|
||||
/* Test PyUnicode_AsUCS4() */
|
||||
static PyObject *
|
||||
unicode_asucs4(PyObject *self, PyObject *args)
|
||||
|
@ -173,10 +531,11 @@ unicode_asucs4(PyObject *self, PyObject *args)
|
|||
int copy_null;
|
||||
Py_ssize_t str_len, buf_len;
|
||||
|
||||
if (!PyArg_ParseTuple(args, "Unp:unicode_asucs4", &unicode, &str_len, ©_null)) {
|
||||
if (!PyArg_ParseTuple(args, "Onp:unicode_asucs4", &unicode, &str_len, ©_null)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
NULLABLE(unicode);
|
||||
buf_len = str_len + 1;
|
||||
buffer = PyMem_NEW(Py_UCS4, buf_len);
|
||||
if (buffer == NULL) {
|
||||
|
@ -195,48 +554,117 @@ unicode_asucs4(PyObject *self, PyObject *args)
|
|||
return result;
|
||||
}
|
||||
|
||||
/* Test PyUnicode_AsUCS4Copy() */
|
||||
static PyObject *
|
||||
unicode_asucs4copy(PyObject *self, PyObject *args)
|
||||
{
|
||||
PyObject *unicode;
|
||||
Py_UCS4 *buffer;
|
||||
PyObject *result;
|
||||
|
||||
if (!PyArg_ParseTuple(args, "O", &unicode)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
NULLABLE(unicode);
|
||||
buffer = PyUnicode_AsUCS4Copy(unicode);
|
||||
if (buffer == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
result = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
|
||||
buffer,
|
||||
PyUnicode_GET_LENGTH(unicode) + 1);
|
||||
PyMem_FREE(buffer);
|
||||
return result;
|
||||
}
|
||||
|
||||
/* Test PyUnicode_FromOrdinal() */
|
||||
static PyObject *
|
||||
unicode_fromordinal(PyObject *self, PyObject *args)
|
||||
{
|
||||
int ordinal;
|
||||
|
||||
if (!PyArg_ParseTuple(args, "i", &ordinal))
|
||||
return NULL;
|
||||
|
||||
return PyUnicode_FromOrdinal(ordinal);
|
||||
}
|
||||
|
||||
/* Test PyUnicode_AsUTF8() */
|
||||
static PyObject *
|
||||
unicode_asutf8(PyObject *self, PyObject *args)
|
||||
{
|
||||
PyObject *unicode;
|
||||
const char *buffer;
|
||||
Py_ssize_t buflen;
|
||||
const char *s;
|
||||
|
||||
if (!PyArg_ParseTuple(args, "U", &unicode)) {
|
||||
if (!PyArg_ParseTuple(args, "On", &unicode, &buflen))
|
||||
return NULL;
|
||||
}
|
||||
|
||||
buffer = PyUnicode_AsUTF8(unicode);
|
||||
if (buffer == NULL) {
|
||||
NULLABLE(unicode);
|
||||
s = PyUnicode_AsUTF8(unicode);
|
||||
if (s == NULL)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return PyBytes_FromString(buffer);
|
||||
return PyBytes_FromStringAndSize(s, buflen);
|
||||
}
|
||||
|
||||
/* Test PyUnicode_AsUTF8AndSize() */
|
||||
static PyObject *
|
||||
unicode_asutf8andsize(PyObject *self, PyObject *args)
|
||||
{
|
||||
PyObject *unicode, *result;
|
||||
const char *buffer;
|
||||
Py_ssize_t utf8_len;
|
||||
PyObject *unicode;
|
||||
Py_ssize_t buflen;
|
||||
const char *s;
|
||||
Py_ssize_t size = -100;
|
||||
|
||||
if(!PyArg_ParseTuple(args, "U", &unicode)) {
|
||||
if (!PyArg_ParseTuple(args, "On", &unicode, &buflen))
|
||||
return NULL;
|
||||
}
|
||||
|
||||
buffer = PyUnicode_AsUTF8AndSize(unicode, &utf8_len);
|
||||
if (buffer == NULL) {
|
||||
NULLABLE(unicode);
|
||||
s = PyUnicode_AsUTF8AndSize(unicode, &size);
|
||||
if (s == NULL)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
result = PyBytes_FromString(buffer);
|
||||
if (result == NULL) {
|
||||
return Py_BuildValue("(y#n)", s, buflen, size);
|
||||
}
|
||||
|
||||
/* Test PyUnicode_AsUTF8AndSize() with NULL as the size address */
|
||||
static PyObject *
|
||||
unicode_asutf8andsize_null(PyObject *self, PyObject *args)
|
||||
{
|
||||
PyObject *unicode;
|
||||
Py_ssize_t buflen;
|
||||
const char *s;
|
||||
|
||||
if (!PyArg_ParseTuple(args, "On", &unicode, &buflen))
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return Py_BuildValue("(Nn)", result, utf8_len);
|
||||
NULLABLE(unicode);
|
||||
s = PyUnicode_AsUTF8AndSize(unicode, NULL);
|
||||
if (s == NULL)
|
||||
return NULL;
|
||||
|
||||
return PyBytes_FromStringAndSize(s, buflen);
|
||||
}
|
||||
|
||||
/* Test PyUnicode_GetDefaultEncoding() */
|
||||
static PyObject *
|
||||
unicode_getdefaultencoding(PyObject *self, PyObject *Py_UNUSED(ignored))
|
||||
{
|
||||
const char *s = PyUnicode_GetDefaultEncoding();
|
||||
if (s == NULL)
|
||||
return NULL;
|
||||
|
||||
return PyBytes_FromString(s);
|
||||
}
|
||||
|
||||
/* Test _PyUnicode_TransformDecimalAndSpaceToASCII() */
|
||||
static PyObject *
|
||||
unicode_transformdecimalandspacetoascii(PyObject *self, PyObject *arg)
|
||||
{
|
||||
NULLABLE(arg);
|
||||
return _PyUnicode_TransformDecimalAndSpaceToASCII(arg);
|
||||
}
|
||||
|
||||
/* Test PyUnicode_DecodeUTF8() */
|
||||
|
@ -470,11 +898,11 @@ unicode_findchar(PyObject *self, PyObject *args)
|
|||
Py_ssize_t result;
|
||||
Py_ssize_t start, end;
|
||||
|
||||
if (!PyArg_ParseTuple(args, "UInni:unicode_findchar", &str, &ch,
|
||||
if (!PyArg_ParseTuple(args, "OInni:unicode_findchar", &str, &ch,
|
||||
&start, &end, &direction)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
NULLABLE(str);
|
||||
result = PyUnicode_FindChar(str, (Py_UCS4)ch, start, end, direction);
|
||||
if (result == -2)
|
||||
return NULL;
|
||||
|
@ -612,11 +1040,12 @@ unicode_copycharacters(PyObject *self, PyObject *args)
|
|||
PyObject *from, *to, *to_copy;
|
||||
Py_ssize_t from_start, to_start, how_many, copied;
|
||||
|
||||
if (!PyArg_ParseTuple(args, "UnOnn:unicode_copycharacters", &to, &to_start,
|
||||
if (!PyArg_ParseTuple(args, "UnOnn", &to, &to_start,
|
||||
&from, &from_start, &how_many)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
NULLABLE(from);
|
||||
if (!(to_copy = PyUnicode_New(PyUnicode_GET_LENGTH(to),
|
||||
PyUnicode_MAX_CHAR_VALUE(to)))) {
|
||||
return NULL;
|
||||
|
@ -626,8 +1055,9 @@ unicode_copycharacters(PyObject *self, PyObject *args)
|
|||
return NULL;
|
||||
}
|
||||
|
||||
if ((copied = PyUnicode_CopyCharacters(to_copy, to_start, from,
|
||||
from_start, how_many)) < 0) {
|
||||
copied = PyUnicode_CopyCharacters(to_copy, to_start, from,
|
||||
from_start, how_many);
|
||||
if (copied == -1 && PyErr_Occurred()) {
|
||||
Py_DECREF(to_copy);
|
||||
return NULL;
|
||||
}
|
||||
|
@ -1053,14 +1483,36 @@ static PyMethodDef TestMethods[] = {
|
|||
test_unicode_compare_with_ascii, METH_NOARGS},
|
||||
{"test_string_from_format", test_string_from_format, METH_NOARGS},
|
||||
{"test_widechar", test_widechar, METH_NOARGS},
|
||||
{"unicode_new", unicode_new, METH_VARARGS},
|
||||
{"unicode_fill", unicode_fill, METH_VARARGS},
|
||||
{"unicode_writechar", unicode_writechar, METH_VARARGS},
|
||||
{"unicode_resize", unicode_resize, METH_VARARGS},
|
||||
{"unicode_append", unicode_append, METH_VARARGS},
|
||||
{"unicode_appendanddel", unicode_appendanddel, METH_VARARGS},
|
||||
{"unicode_fromstringandsize",unicode_fromstringandsize, METH_VARARGS},
|
||||
{"unicode_fromstring", unicode_fromstring, METH_O},
|
||||
{"unicode_fromkindanddata", unicode_fromkindanddata, METH_VARARGS},
|
||||
{"unicode_substring", unicode_substring, METH_VARARGS},
|
||||
{"unicode_getlength", unicode_getlength, METH_O},
|
||||
{"unicode_readchar", unicode_readchar, METH_VARARGS},
|
||||
{"unicode_fromobject", unicode_fromobject, METH_O},
|
||||
{"unicode_interninplace", unicode_interninplace, METH_O},
|
||||
{"unicode_internfromstring", unicode_internfromstring, METH_O},
|
||||
{"unicode_fromwidechar", unicode_fromwidechar, METH_VARARGS},
|
||||
{"unicode_aswidechar", unicode_aswidechar, METH_VARARGS},
|
||||
{"unicode_aswidechar_null", unicode_aswidechar_null, METH_VARARGS},
|
||||
{"unicode_aswidecharstring", unicode_aswidecharstring, METH_VARARGS},
|
||||
{"unicode_aswidecharstring_null",unicode_aswidecharstring_null,METH_VARARGS},
|
||||
{"unicode_asucs4", unicode_asucs4, METH_VARARGS},
|
||||
{"unicode_asucs4copy", unicode_asucs4copy, METH_VARARGS},
|
||||
{"unicode_fromordinal", unicode_fromordinal, METH_VARARGS},
|
||||
{"unicode_asutf8", unicode_asutf8, METH_VARARGS},
|
||||
{"unicode_asutf8andsize", unicode_asutf8andsize, METH_VARARGS},
|
||||
{"unicode_asutf8andsize_null",unicode_asutf8andsize_null, METH_VARARGS},
|
||||
{"unicode_decodeutf8", unicode_decodeutf8, METH_VARARGS},
|
||||
{"unicode_decodeutf8stateful",unicode_decodeutf8stateful, METH_VARARGS},
|
||||
{"unicode_getdefaultencoding",unicode_getdefaultencoding, METH_NOARGS},
|
||||
{"unicode_transformdecimalandspacetoascii", unicode_transformdecimalandspacetoascii, METH_O},
|
||||
{"unicode_concat", unicode_concat, METH_VARARGS},
|
||||
{"unicode_splitlines", unicode_splitlines, METH_VARARGS},
|
||||
{"unicode_split", unicode_split, METH_VARARGS},
|
||||
|
|
|
@ -4247,6 +4247,7 @@ PyInit__testcapi(void)
|
|||
PyModule_AddObject(m, "ULLONG_MAX", PyLong_FromUnsignedLongLong(ULLONG_MAX));
|
||||
PyModule_AddObject(m, "PY_SSIZE_T_MAX", PyLong_FromSsize_t(PY_SSIZE_T_MAX));
|
||||
PyModule_AddObject(m, "PY_SSIZE_T_MIN", PyLong_FromSsize_t(PY_SSIZE_T_MIN));
|
||||
PyModule_AddObject(m, "SIZEOF_WCHAR_T", PyLong_FromSsize_t(sizeof(wchar_t)));
|
||||
PyModule_AddObject(m, "SIZEOF_TIME_T", PyLong_FromSsize_t(sizeof(time_t)));
|
||||
PyModule_AddObject(m, "Py_Version", PyLong_FromUnsignedLong(Py_Version));
|
||||
Py_INCREF(&PyInstanceMethod_Type);
|
||||
|
|
Loading…
Reference in New Issue