libpython.py: fix support of non-BMP unicode characters
Forward port some code from Python3: * join surrogate pairs if sizeof(Py_UNICODE)==2 * Enable non-BMP test on narrow builds using u"\U0001D121" instead of unichr(0x1D121)
This commit is contained in:
parent
8f692275e9
commit
b1556c537d
|
@ -243,14 +243,8 @@ class PrettyPrintTests(DebuggerTests):
|
|||
# This is:
|
||||
# UTF-8: 0xF0 0x9D 0x84 0xA1
|
||||
# UTF-16: 0xD834 0xDD21
|
||||
try:
|
||||
# This will only work on wide-unicode builds:
|
||||
self.assertGdbRepr(unichr(0x1D121))
|
||||
except ValueError, e:
|
||||
# We're probably on a narrow-unicode build; if we're seeing a
|
||||
# different problem, then re-raise it:
|
||||
if e.args != ('unichr() arg not in range(0x10000) (narrow Python build)',):
|
||||
raise e
|
||||
# This will only work on wide-unicode builds:
|
||||
self.assertGdbRepr(u"\U0001D121")
|
||||
|
||||
def test_sets(self):
|
||||
'Verify the pretty-printing of sets'
|
||||
|
|
|
@ -1013,6 +1013,10 @@ class PyTypeObjectPtr(PyObjectPtr):
|
|||
class PyUnicodeObjectPtr(PyObjectPtr):
|
||||
_typename = 'PyUnicodeObject'
|
||||
|
||||
def char_width(self):
|
||||
_type_Py_UNICODE = gdb.lookup_type('Py_UNICODE')
|
||||
return _type_Py_UNICODE.sizeof
|
||||
|
||||
def proxyval(self, visited):
|
||||
# From unicodeobject.h:
|
||||
# Py_ssize_t length; /* Length of raw Unicode data in buffer */
|
||||
|
@ -1029,6 +1033,30 @@ class PyUnicodeObjectPtr(PyObjectPtr):
|
|||
result = u''.join([unichr(ucs) for ucs in Py_UNICODEs])
|
||||
return result
|
||||
|
||||
def write_repr(self, out, visited):
|
||||
proxy = self.proxyval(visited)
|
||||
if self.char_width() == 2:
|
||||
# sizeof(Py_UNICODE)==2: join surrogates
|
||||
proxy2 = []
|
||||
i = 0
|
||||
while i < len(proxy):
|
||||
ch = proxy[i]
|
||||
i += 1
|
||||
if (i < len(proxy)
|
||||
and 0xD800 <= ord(ch) < 0xDC00 \
|
||||
and 0xDC00 <= ord(proxy[i]) <= 0xDFFF):
|
||||
# Get code point from surrogate pair
|
||||
ch2 = proxy[i]
|
||||
code = (ord(ch) & 0x03FF) << 10
|
||||
code |= ord(ch2) & 0x03FF
|
||||
code += 0x00010000
|
||||
i += 1
|
||||
proxy2.append(unichr(code))
|
||||
else:
|
||||
proxy2.append(ch)
|
||||
proxy = u''.join(proxy2)
|
||||
out.write(repr(proxy))
|
||||
|
||||
|
||||
def int_from_int(gdbval):
|
||||
return int(str(gdbval))
|
||||
|
|
Loading…
Reference in New Issue