Issue #10601: sys.displayhook uses 'backslashreplace' error handler on

UnicodeEncodeError.
This commit is contained in:
Victor Stinner 2010-12-04 17:24:33 +00:00
parent 44588b45d2
commit 13d49ee7d6
4 changed files with 125 additions and 4 deletions

View File

@ -99,13 +99,39 @@ always available.
.. function:: displayhook(value) .. function:: displayhook(value)
If *value* is not ``None``, this function prints it to ``sys.stdout``, and saves If *value* is not ``None``, this function prints ``repr(value)`` to
it in ``builtins._``. ``sys.stdout``, and saves *value* in ``builtins._``. If ``repr(value)`` is
not encodable to ``sys.stdout.encoding`` with ``sys.stdout.errors`` error
handler (which is probably ``'strict'``), encode it to
``sys.stdout.encoding`` with ``'backslashreplace'`` error handler.
``sys.displayhook`` is called on the result of evaluating an :term:`expression` ``sys.displayhook`` is called on the result of evaluating an :term:`expression`
entered in an interactive Python session. The display of these values can be entered in an interactive Python session. The display of these values can be
customized by assigning another one-argument function to ``sys.displayhook``. customized by assigning another one-argument function to ``sys.displayhook``.
Pseudo-code::
def displayhook(value):
if value is None:
return
# Set '_' to None to avoid recursion
builtins._ = None
text = repr(value)
try:
sys.stdout.write(text)
except UnicodeEncodeError:
bytes = text.encode(sys.stdout.encoding, 'backslashreplace')
if hasattr(sys.stdout, 'buffer'):
sys.stdout.buffer.write(bytes)
else:
text = bytes.decode(sys.stdout.encoding, 'strict')
sys.stdout.write(text)
sys.stdout.write("\n")
builtins._ = value
.. versionchanged:: 3.2
Use ``'backslashreplace'`` error handler on :exc:`UnicodeEncodeError`.
.. function:: excepthook(type, value, traceback) .. function:: excepthook(type, value, traceback)

View File

@ -221,6 +221,24 @@ class CmdLineTest(unittest.TestCase):
self.assertIn(path1.encode('ascii'), out) self.assertIn(path1.encode('ascii'), out)
self.assertIn(path2.encode('ascii'), out) self.assertIn(path2.encode('ascii'), out)
def test_displayhook_unencodable(self):
for encoding in ('ascii', 'latin1', 'utf8'):
env = os.environ.copy()
env['PYTHONIOENCODING'] = encoding
p = subprocess.Popen(
[sys.executable, '-i'],
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
env=env)
# non-ascii, surrogate, non-BMP printable, non-BMP unprintable
text = "a=\xe9 b=\uDC80 c=\U00010000 d=\U0010FFFF"
p.stdin.write(ascii(text).encode('ascii') + b"\n")
p.stdin.write(b'exit()\n')
data = kill_python(p)
escaped = repr(text).encode(encoding, 'backslashreplace')
self.assertIn(escaped, data)
def test_main(): def test_main():
test.support.run_unittest(CmdLineTest) test.support.run_unittest(CmdLineTest)

View File

@ -49,6 +49,9 @@ Core and Builtins
Library Library
------- -------
- Issue #10601: sys.displayhook uses 'backslashreplace' error handler on
UnicodeEncodeError.
- Add the "display" and "undisplay" pdb commands. - Add the "display" and "undisplay" pdb commands.
- Issue #7245: Add a SIGINT handler in pdb that allows to break a program - Issue #7245: Add a SIGINT handler in pdb that allows to break a program

View File

@ -65,6 +65,68 @@ PySys_SetObject(const char *name, PyObject *v)
return PyDict_SetItemString(sd, name, v); return PyDict_SetItemString(sd, name, v);
} }
/* Write repr(o) to sys.stdout using sys.stdout.encoding and 'backslashreplace'
error handler. If sys.stdout has a buffer attribute, use
sys.stdout.buffer.write(encoded), otherwise redecode the string and use
sys.stdout.write(redecoded).
Helper function for sys_displayhook(). */
static int
sys_displayhook_unencodable(PyObject *outf, PyObject *o)
{
PyObject *stdout_encoding = NULL;
PyObject *encoded, *escaped_str, *repr_str, *buffer, *result;
char *stdout_encoding_str;
int ret;
stdout_encoding = PyObject_GetAttrString(outf, "encoding");
if (stdout_encoding == NULL)
goto error;
stdout_encoding_str = _PyUnicode_AsString(stdout_encoding);
if (stdout_encoding_str == NULL)
goto error;
repr_str = PyObject_Repr(o);
if (repr_str == NULL)
goto error;
encoded = PyUnicode_AsEncodedString(repr_str,
stdout_encoding_str,
"backslashreplace");
Py_DECREF(repr_str);
if (encoded == NULL)
goto error;
buffer = PyObject_GetAttrString(outf, "buffer");
if (buffer) {
result = PyObject_CallMethod(buffer, "write", "(O)", encoded);
Py_DECREF(buffer);
Py_DECREF(encoded);
if (result == NULL)
goto error;
Py_DECREF(result);
}
else {
PyErr_Clear();
escaped_str = PyUnicode_FromEncodedObject(encoded,
stdout_encoding_str,
"strict");
Py_DECREF(encoded);
if (PyFile_WriteObject(escaped_str, outf, Py_PRINT_RAW) != 0) {
Py_DECREF(escaped_str);
goto error;
}
Py_DECREF(escaped_str);
}
ret = 0;
goto finally;
error:
ret = -1;
finally:
Py_XDECREF(stdout_encoding);
return ret;
}
static PyObject * static PyObject *
sys_displayhook(PyObject *self, PyObject *o) sys_displayhook(PyObject *self, PyObject *o)
{ {
@ -72,6 +134,7 @@ sys_displayhook(PyObject *self, PyObject *o)
PyInterpreterState *interp = PyThreadState_GET()->interp; PyInterpreterState *interp = PyThreadState_GET()->interp;
PyObject *modules = interp->modules; PyObject *modules = interp->modules;
PyObject *builtins = PyDict_GetItemString(modules, "builtins"); PyObject *builtins = PyDict_GetItemString(modules, "builtins");
int err;
if (builtins == NULL) { if (builtins == NULL) {
PyErr_SetString(PyExc_RuntimeError, "lost builtins module"); PyErr_SetString(PyExc_RuntimeError, "lost builtins module");
@ -92,8 +155,19 @@ sys_displayhook(PyObject *self, PyObject *o)
PyErr_SetString(PyExc_RuntimeError, "lost sys.stdout"); PyErr_SetString(PyExc_RuntimeError, "lost sys.stdout");
return NULL; return NULL;
} }
if (PyFile_WriteObject(o, outf, 0) != 0) if (PyFile_WriteObject(o, outf, 0) != 0) {
if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError)) {
/* repr(o) is not encodable to sys.stdout.encoding with
* sys.stdout.errors error handler (which is probably 'strict') */
PyErr_Clear();
err = sys_displayhook_unencodable(outf, o);
if (err)
return NULL; return NULL;
}
else {
return NULL;
}
}
if (PyFile_WriteString("\n", outf) != 0) if (PyFile_WriteString("\n", outf) != 0)
return NULL; return NULL;
if (PyObject_SetAttrString(builtins, "_", o) != 0) if (PyObject_SetAttrString(builtins, "_", o) != 0)