diff --git a/Lib/test/test_gdb.py b/Lib/test/test_gdb.py index 15d8034c172..e5d1222608e 100644 --- a/Lib/test/test_gdb.py +++ b/Lib/test/test_gdb.py @@ -8,6 +8,7 @@ import re import subprocess import sys import unittest +import locale from test.support import run_unittest, findfile @@ -177,7 +178,7 @@ class PrettyPrintTests(DebuggerTests): def assertGdbRepr(self, val, exp_repr=None, cmds_after_breakpoint=None): # Ensure that gdb's rendering of the value in a debugged process # matches repr(value) in this process: - gdb_repr, gdb_output = self.get_gdb_repr('id(' + repr(val) + ')', + gdb_repr, gdb_output = self.get_gdb_repr('id(' + ascii(val) + ')', cmds_after_breakpoint) if not exp_repr: exp_repr = repr(val) @@ -226,31 +227,35 @@ class PrettyPrintTests(DebuggerTests): def test_strings(self): 'Verify the pretty-printing of unicode strings' + encoding = locale.getpreferredencoding() + def check_repr(text): + try: + text.encode(encoding) + printable = True + except UnicodeEncodeError: + self.assertGdbRepr(text, ascii(text)) + else: + self.assertGdbRepr(text) + self.assertGdbRepr('') self.assertGdbRepr('And now for something hopefully the same') self.assertGdbRepr('string with embedded NUL here \0 and then some more text') # Test printing a single character: # U+2620 SKULL AND CROSSBONES - self.assertGdbRepr('\u2620') + check_repr('\u2620') # Test printing a Japanese unicode string # (I believe this reads "mojibake", using 3 characters from the CJK # Unified Ideographs area, followed by U+3051 HIRAGANA LETTER KE) - self.assertGdbRepr('\u6587\u5b57\u5316\u3051') + check_repr('\u6587\u5b57\u5316\u3051') # Test a character outside the BMP: # U+1D121 MUSICAL SYMBOL C CLEF # This is: # UTF-8: 0xF0 0x9D 0x84 0xA1 # UTF-16: 0xD834 0xDD21 - if sys.maxunicode == 0x10FFFF: - # wide unicode: - self.assertGdbRepr(chr(0x1D121)) - else: - # narrow unicode: - self.assertGdbRepr(chr(0x1D121), - "'\\U0000d834\\U0000dd21'") + check_repr(chr(0x1D121)) def test_tuples(self): 'Verify the pretty-printing of tuples' diff --git a/Tools/gdb/libpython.py b/Tools/gdb/libpython.py index 426fb7ba63b..6ed608721f2 100644 --- a/Tools/gdb/libpython.py +++ b/Tools/gdb/libpython.py @@ -42,6 +42,7 @@ The module also extends gdb with some python-specific commands. ''' from __future__ import with_statement import gdb +import locale # Look up the gdb.Type for some standard types: _type_char_ptr = gdb.lookup_type('char').pointer() # char* @@ -69,6 +70,7 @@ MAX_OUTPUT_LEN=1024 hexdigits = "0123456789abcdef" +ENCODING = locale.getpreferredencoding() class NullPyObjectPtr(RuntimeError): pass @@ -1128,53 +1130,68 @@ class PyUnicodeObjectPtr(PyObjectPtr): # Non-ASCII characters else: - ucs = ch; - - if self.char_width == 2: - ch2 = 0 + ucs = ch + orig_ucs = None + if self.char_width() == 2: # Get code point from surrogate pair - if i < len(proxy): + if (i < len(proxy) + and 0xD800 <= ord(ch) < 0xDC00 \ + and 0xDC00 <= ord(proxy[i]) <= 0xDFFF): ch2 = proxy[i] - if (ord(ch) >= 0xD800 and ord(ch) < 0xDC00 - and ord(ch2) >= 0xDC00 and ord(ch2) <= 0xDFFF): - ucs = (((ch & 0x03FF) << 10) | (ch2 & 0x03FF)) + 0x00010000; - i += 1 + code = (ord(ch) & 0x03FF) << 10 + code |= ord(ch2) & 0x03FF + code += 0x00010000 + orig_ucs = ucs + ucs = unichr(code) + i += 1 + else: + ch2 = None + + printable = _unichr_is_printable(ucs) + if printable: + try: + ucs.encode(ENCODING) + except UnicodeEncodeError: + printable = False + if orig_ucs is not None: + ucs = orig_ucs + i -= 1 # Map Unicode whitespace and control characters # (categories Z* and C* except ASCII space) - if not _unichr_is_printable(ucs): + if not printable: # Unfortuately, Python 2's unicode type doesn't seem # to expose the "isprintable" method + code = ord(ucs) # Map 8-bit characters to '\\xhh' - if ucs <= 0xff: + if code <= 0xff: out.write('\\x') - out.write(hexdigits[(ord(ucs) >> 4) & 0x000F]) - out.write(hexdigits[ord(ucs) & 0x000F]) + out.write(hexdigits[(code >> 4) & 0x000F]) + out.write(hexdigits[code & 0x000F]) # Map 21-bit characters to '\U00xxxxxx' - elif ucs >= 0x10000: + elif code >= 0x10000: out.write('\\U') - out.write(hexdigits[(ord(ucs) >> 28) & 0x0000000F]) - out.write(hexdigits[(ord(ucs) >> 24) & 0x0000000F]) - out.write(hexdigits[(ord(ucs) >> 20) & 0x0000000F]) - out.write(hexdigits[(ord(ucs) >> 16) & 0x0000000F]) - out.write(hexdigits[(ord(ucs) >> 12) & 0x0000000F]) - out.write(hexdigits[(ord(ucs) >> 8) & 0x0000000F]) - out.write(hexdigits[(ord(ucs) >> 4) & 0x0000000F]) - out.write(hexdigits[ord(ucs) & 0x0000000F]) + out.write(hexdigits[(code >> 28) & 0x0000000F]) + out.write(hexdigits[(code >> 24) & 0x0000000F]) + out.write(hexdigits[(code >> 20) & 0x0000000F]) + out.write(hexdigits[(code >> 16) & 0x0000000F]) + out.write(hexdigits[(code >> 12) & 0x0000000F]) + out.write(hexdigits[(code >> 8) & 0x0000000F]) + out.write(hexdigits[(code >> 4) & 0x0000000F]) + out.write(hexdigits[code & 0x0000000F]) # Map 16-bit characters to '\uxxxx' else: out.write('\\u') - out.write(hexdigits[(ord(ucs) >> 12) & 0x000F]) - out.write(hexdigits[(ord(ucs) >> 8) & 0x000F]) - out.write(hexdigits[(ord(ucs) >> 4) & 0x000F]) - out.write(hexdigits[ord(ucs) & 0x000F]) + out.write(hexdigits[(code >> 12) & 0x000F]) + out.write(hexdigits[(code >> 8) & 0x000F]) + out.write(hexdigits[(code >> 4) & 0x000F]) + out.write(hexdigits[code & 0x000F]) else: # Copy characters as-is out.write(ch) - if self.char_width == 2: - if ord(ucs) >= 0x10000: - out.write(ch2) + if self.char_width() == 2 and (ch2 is not None): + out.write(ch2) out.write(quote)