Issue #10601: sys.displayhook uses 'backslashreplace' error handler on

UnicodeEncodeError.
This commit is contained in:
Victor Stinner 2010-12-04 17:24:33 +00:00
parent 44588b45d2
commit 13d49ee7d6
4 changed files with 125 additions and 4 deletions

View File

@ -99,13 +99,39 @@ always available.
.. function:: displayhook(value)
If *value* is not ``None``, this function prints it to ``sys.stdout``, and saves
it in ``builtins._``.
If *value* is not ``None``, this function prints ``repr(value)`` to
``sys.stdout``, and saves *value* in ``builtins._``. If ``repr(value)`` is
not encodable to ``sys.stdout.encoding`` with ``sys.stdout.errors`` error
handler (which is probably ``'strict'``), encode it to
``sys.stdout.encoding`` with ``'backslashreplace'`` error handler.
``sys.displayhook`` is called on the result of evaluating an :term:`expression`
entered in an interactive Python session. The display of these values can be
customized by assigning another one-argument function to ``sys.displayhook``.
Pseudo-code::
def displayhook(value):
if value is None:
return
# Set '_' to None to avoid recursion
builtins._ = None
text = repr(value)
try:
sys.stdout.write(text)
except UnicodeEncodeError:
bytes = text.encode(sys.stdout.encoding, 'backslashreplace')
if hasattr(sys.stdout, 'buffer'):
sys.stdout.buffer.write(bytes)
else:
text = bytes.decode(sys.stdout.encoding, 'strict')
sys.stdout.write(text)
sys.stdout.write("\n")
builtins._ = value
.. versionchanged:: 3.2
Use ``'backslashreplace'`` error handler on :exc:`UnicodeEncodeError`.
.. function:: excepthook(type, value, traceback)

View File

@ -221,6 +221,24 @@ class CmdLineTest(unittest.TestCase):
self.assertIn(path1.encode('ascii'), out)
self.assertIn(path2.encode('ascii'), out)
def test_displayhook_unencodable(self):
for encoding in ('ascii', 'latin1', 'utf8'):
env = os.environ.copy()
env['PYTHONIOENCODING'] = encoding
p = subprocess.Popen(
[sys.executable, '-i'],
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
env=env)
# non-ascii, surrogate, non-BMP printable, non-BMP unprintable
text = "a=\xe9 b=\uDC80 c=\U00010000 d=\U0010FFFF"
p.stdin.write(ascii(text).encode('ascii') + b"\n")
p.stdin.write(b'exit()\n')
data = kill_python(p)
escaped = repr(text).encode(encoding, 'backslashreplace')
self.assertIn(escaped, data)
def test_main():
test.support.run_unittest(CmdLineTest)

View File

@ -49,6 +49,9 @@ Core and Builtins
Library
-------
- Issue #10601: sys.displayhook uses 'backslashreplace' error handler on
UnicodeEncodeError.
- Add the "display" and "undisplay" pdb commands.
- Issue #7245: Add a SIGINT handler in pdb that allows to break a program

View File

@ -65,6 +65,68 @@ PySys_SetObject(const char *name, PyObject *v)
return PyDict_SetItemString(sd, name, v);
}
/* Write repr(o) to sys.stdout using sys.stdout.encoding and 'backslashreplace'
error handler. If sys.stdout has a buffer attribute, use
sys.stdout.buffer.write(encoded), otherwise redecode the string and use
sys.stdout.write(redecoded).
Helper function for sys_displayhook(). */
static int
sys_displayhook_unencodable(PyObject *outf, PyObject *o)
{
PyObject *stdout_encoding = NULL;
PyObject *encoded, *escaped_str, *repr_str, *buffer, *result;
char *stdout_encoding_str;
int ret;
stdout_encoding = PyObject_GetAttrString(outf, "encoding");
if (stdout_encoding == NULL)
goto error;
stdout_encoding_str = _PyUnicode_AsString(stdout_encoding);
if (stdout_encoding_str == NULL)
goto error;
repr_str = PyObject_Repr(o);
if (repr_str == NULL)
goto error;
encoded = PyUnicode_AsEncodedString(repr_str,
stdout_encoding_str,
"backslashreplace");
Py_DECREF(repr_str);
if (encoded == NULL)
goto error;
buffer = PyObject_GetAttrString(outf, "buffer");
if (buffer) {
result = PyObject_CallMethod(buffer, "write", "(O)", encoded);
Py_DECREF(buffer);
Py_DECREF(encoded);
if (result == NULL)
goto error;
Py_DECREF(result);
}
else {
PyErr_Clear();
escaped_str = PyUnicode_FromEncodedObject(encoded,
stdout_encoding_str,
"strict");
Py_DECREF(encoded);
if (PyFile_WriteObject(escaped_str, outf, Py_PRINT_RAW) != 0) {
Py_DECREF(escaped_str);
goto error;
}
Py_DECREF(escaped_str);
}
ret = 0;
goto finally;
error:
ret = -1;
finally:
Py_XDECREF(stdout_encoding);
return ret;
}
static PyObject *
sys_displayhook(PyObject *self, PyObject *o)
{
@ -72,6 +134,7 @@ sys_displayhook(PyObject *self, PyObject *o)
PyInterpreterState *interp = PyThreadState_GET()->interp;
PyObject *modules = interp->modules;
PyObject *builtins = PyDict_GetItemString(modules, "builtins");
int err;
if (builtins == NULL) {
PyErr_SetString(PyExc_RuntimeError, "lost builtins module");
@ -92,8 +155,19 @@ sys_displayhook(PyObject *self, PyObject *o)
PyErr_SetString(PyExc_RuntimeError, "lost sys.stdout");
return NULL;
}
if (PyFile_WriteObject(o, outf, 0) != 0)
return NULL;
if (PyFile_WriteObject(o, outf, 0) != 0) {
if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError)) {
/* repr(o) is not encodable to sys.stdout.encoding with
* sys.stdout.errors error handler (which is probably 'strict') */
PyErr_Clear();
err = sys_displayhook_unencodable(outf, o);
if (err)
return NULL;
}
else {
return NULL;
}
}
if (PyFile_WriteString("\n", outf) != 0)
return NULL;
if (PyObject_SetAttrString(builtins, "_", o) != 0)