Issue #10601: sys.displayhook uses 'backslashreplace' error handler on
UnicodeEncodeError.
This commit is contained in:
parent
44588b45d2
commit
13d49ee7d6
|
@ -99,13 +99,39 @@ always available.
|
||||||
|
|
||||||
.. function:: displayhook(value)
|
.. function:: displayhook(value)
|
||||||
|
|
||||||
If *value* is not ``None``, this function prints it to ``sys.stdout``, and saves
|
If *value* is not ``None``, this function prints ``repr(value)`` to
|
||||||
it in ``builtins._``.
|
``sys.stdout``, and saves *value* in ``builtins._``. If ``repr(value)`` is
|
||||||
|
not encodable to ``sys.stdout.encoding`` with ``sys.stdout.errors`` error
|
||||||
|
handler (which is probably ``'strict'``), encode it to
|
||||||
|
``sys.stdout.encoding`` with ``'backslashreplace'`` error handler.
|
||||||
|
|
||||||
``sys.displayhook`` is called on the result of evaluating an :term:`expression`
|
``sys.displayhook`` is called on the result of evaluating an :term:`expression`
|
||||||
entered in an interactive Python session. The display of these values can be
|
entered in an interactive Python session. The display of these values can be
|
||||||
customized by assigning another one-argument function to ``sys.displayhook``.
|
customized by assigning another one-argument function to ``sys.displayhook``.
|
||||||
|
|
||||||
|
Pseudo-code::
|
||||||
|
|
||||||
|
def displayhook(value):
|
||||||
|
if value is None:
|
||||||
|
return
|
||||||
|
# Set '_' to None to avoid recursion
|
||||||
|
builtins._ = None
|
||||||
|
text = repr(value)
|
||||||
|
try:
|
||||||
|
sys.stdout.write(text)
|
||||||
|
except UnicodeEncodeError:
|
||||||
|
bytes = text.encode(sys.stdout.encoding, 'backslashreplace')
|
||||||
|
if hasattr(sys.stdout, 'buffer'):
|
||||||
|
sys.stdout.buffer.write(bytes)
|
||||||
|
else:
|
||||||
|
text = bytes.decode(sys.stdout.encoding, 'strict')
|
||||||
|
sys.stdout.write(text)
|
||||||
|
sys.stdout.write("\n")
|
||||||
|
builtins._ = value
|
||||||
|
|
||||||
|
.. versionchanged:: 3.2
|
||||||
|
Use ``'backslashreplace'`` error handler on :exc:`UnicodeEncodeError`.
|
||||||
|
|
||||||
|
|
||||||
.. function:: excepthook(type, value, traceback)
|
.. function:: excepthook(type, value, traceback)
|
||||||
|
|
||||||
|
|
|
@ -221,6 +221,24 @@ class CmdLineTest(unittest.TestCase):
|
||||||
self.assertIn(path1.encode('ascii'), out)
|
self.assertIn(path1.encode('ascii'), out)
|
||||||
self.assertIn(path2.encode('ascii'), out)
|
self.assertIn(path2.encode('ascii'), out)
|
||||||
|
|
||||||
|
def test_displayhook_unencodable(self):
|
||||||
|
for encoding in ('ascii', 'latin1', 'utf8'):
|
||||||
|
env = os.environ.copy()
|
||||||
|
env['PYTHONIOENCODING'] = encoding
|
||||||
|
p = subprocess.Popen(
|
||||||
|
[sys.executable, '-i'],
|
||||||
|
stdin=subprocess.PIPE,
|
||||||
|
stdout=subprocess.PIPE,
|
||||||
|
stderr=subprocess.STDOUT,
|
||||||
|
env=env)
|
||||||
|
# non-ascii, surrogate, non-BMP printable, non-BMP unprintable
|
||||||
|
text = "a=\xe9 b=\uDC80 c=\U00010000 d=\U0010FFFF"
|
||||||
|
p.stdin.write(ascii(text).encode('ascii') + b"\n")
|
||||||
|
p.stdin.write(b'exit()\n')
|
||||||
|
data = kill_python(p)
|
||||||
|
escaped = repr(text).encode(encoding, 'backslashreplace')
|
||||||
|
self.assertIn(escaped, data)
|
||||||
|
|
||||||
|
|
||||||
def test_main():
|
def test_main():
|
||||||
test.support.run_unittest(CmdLineTest)
|
test.support.run_unittest(CmdLineTest)
|
||||||
|
|
|
@ -49,6 +49,9 @@ Core and Builtins
|
||||||
Library
|
Library
|
||||||
-------
|
-------
|
||||||
|
|
||||||
|
- Issue #10601: sys.displayhook uses 'backslashreplace' error handler on
|
||||||
|
UnicodeEncodeError.
|
||||||
|
|
||||||
- Add the "display" and "undisplay" pdb commands.
|
- Add the "display" and "undisplay" pdb commands.
|
||||||
|
|
||||||
- Issue #7245: Add a SIGINT handler in pdb that allows to break a program
|
- Issue #7245: Add a SIGINT handler in pdb that allows to break a program
|
||||||
|
|
|
@ -65,6 +65,68 @@ PySys_SetObject(const char *name, PyObject *v)
|
||||||
return PyDict_SetItemString(sd, name, v);
|
return PyDict_SetItemString(sd, name, v);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Write repr(o) to sys.stdout using sys.stdout.encoding and 'backslashreplace'
|
||||||
|
error handler. If sys.stdout has a buffer attribute, use
|
||||||
|
sys.stdout.buffer.write(encoded), otherwise redecode the string and use
|
||||||
|
sys.stdout.write(redecoded).
|
||||||
|
|
||||||
|
Helper function for sys_displayhook(). */
|
||||||
|
static int
|
||||||
|
sys_displayhook_unencodable(PyObject *outf, PyObject *o)
|
||||||
|
{
|
||||||
|
PyObject *stdout_encoding = NULL;
|
||||||
|
PyObject *encoded, *escaped_str, *repr_str, *buffer, *result;
|
||||||
|
char *stdout_encoding_str;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
stdout_encoding = PyObject_GetAttrString(outf, "encoding");
|
||||||
|
if (stdout_encoding == NULL)
|
||||||
|
goto error;
|
||||||
|
stdout_encoding_str = _PyUnicode_AsString(stdout_encoding);
|
||||||
|
if (stdout_encoding_str == NULL)
|
||||||
|
goto error;
|
||||||
|
|
||||||
|
repr_str = PyObject_Repr(o);
|
||||||
|
if (repr_str == NULL)
|
||||||
|
goto error;
|
||||||
|
encoded = PyUnicode_AsEncodedString(repr_str,
|
||||||
|
stdout_encoding_str,
|
||||||
|
"backslashreplace");
|
||||||
|
Py_DECREF(repr_str);
|
||||||
|
if (encoded == NULL)
|
||||||
|
goto error;
|
||||||
|
|
||||||
|
buffer = PyObject_GetAttrString(outf, "buffer");
|
||||||
|
if (buffer) {
|
||||||
|
result = PyObject_CallMethod(buffer, "write", "(O)", encoded);
|
||||||
|
Py_DECREF(buffer);
|
||||||
|
Py_DECREF(encoded);
|
||||||
|
if (result == NULL)
|
||||||
|
goto error;
|
||||||
|
Py_DECREF(result);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
PyErr_Clear();
|
||||||
|
escaped_str = PyUnicode_FromEncodedObject(encoded,
|
||||||
|
stdout_encoding_str,
|
||||||
|
"strict");
|
||||||
|
Py_DECREF(encoded);
|
||||||
|
if (PyFile_WriteObject(escaped_str, outf, Py_PRINT_RAW) != 0) {
|
||||||
|
Py_DECREF(escaped_str);
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
Py_DECREF(escaped_str);
|
||||||
|
}
|
||||||
|
ret = 0;
|
||||||
|
goto finally;
|
||||||
|
|
||||||
|
error:
|
||||||
|
ret = -1;
|
||||||
|
finally:
|
||||||
|
Py_XDECREF(stdout_encoding);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
static PyObject *
|
static PyObject *
|
||||||
sys_displayhook(PyObject *self, PyObject *o)
|
sys_displayhook(PyObject *self, PyObject *o)
|
||||||
{
|
{
|
||||||
|
@ -72,6 +134,7 @@ sys_displayhook(PyObject *self, PyObject *o)
|
||||||
PyInterpreterState *interp = PyThreadState_GET()->interp;
|
PyInterpreterState *interp = PyThreadState_GET()->interp;
|
||||||
PyObject *modules = interp->modules;
|
PyObject *modules = interp->modules;
|
||||||
PyObject *builtins = PyDict_GetItemString(modules, "builtins");
|
PyObject *builtins = PyDict_GetItemString(modules, "builtins");
|
||||||
|
int err;
|
||||||
|
|
||||||
if (builtins == NULL) {
|
if (builtins == NULL) {
|
||||||
PyErr_SetString(PyExc_RuntimeError, "lost builtins module");
|
PyErr_SetString(PyExc_RuntimeError, "lost builtins module");
|
||||||
|
@ -92,8 +155,19 @@ sys_displayhook(PyObject *self, PyObject *o)
|
||||||
PyErr_SetString(PyExc_RuntimeError, "lost sys.stdout");
|
PyErr_SetString(PyExc_RuntimeError, "lost sys.stdout");
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
if (PyFile_WriteObject(o, outf, 0) != 0)
|
if (PyFile_WriteObject(o, outf, 0) != 0) {
|
||||||
return NULL;
|
if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError)) {
|
||||||
|
/* repr(o) is not encodable to sys.stdout.encoding with
|
||||||
|
* sys.stdout.errors error handler (which is probably 'strict') */
|
||||||
|
PyErr_Clear();
|
||||||
|
err = sys_displayhook_unencodable(outf, o);
|
||||||
|
if (err)
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
if (PyFile_WriteString("\n", outf) != 0)
|
if (PyFile_WriteString("\n", outf) != 0)
|
||||||
return NULL;
|
return NULL;
|
||||||
if (PyObject_SetAttrString(builtins, "_", o) != 0)
|
if (PyObject_SetAttrString(builtins, "_", o) != 0)
|
||||||
|
|
Loading…
Reference in New Issue