diff --git a/Lib/test/test_warnings.py b/Lib/test/test_warnings.py index 3c54c5a2100..52bbaf9a55e 100644 --- a/Lib/test/test_warnings.py +++ b/Lib/test/test_warnings.py @@ -331,6 +331,18 @@ class WarnTests(BaseTest): warning_tests.__name__ = module_name sys.argv = argv + def test_warn_explicit_non_ascii_filename(self): + with original_warnings.catch_warnings(record=True, + module=self.module) as w: + self.module.resetwarnings() + self.module.filterwarnings("always", category=UserWarning) + + self.module.warn_explicit("text", UserWarning, "nonascii\xe9\u20ac", 1) + self.assertEqual(w[-1].filename, "nonascii\xe9\u20ac") + + self.module.warn_explicit("text", UserWarning, "surrogate\udc80", 1) + self.assertEqual(w[-1].filename, "surrogate\udc80") + def test_warn_explicit_type_errors(self): # warn_explicit() should error out gracefully if it is given objects # of the wrong types. diff --git a/Misc/NEWS b/Misc/NEWS index 64325647099..ab0893fd1cd 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -31,6 +31,9 @@ Core and Builtins Library ------- +- Issue #19424: Fix the warnings module to accept filename containing surrogate + characters. + - Issue #19227: Remove pthread_atfork() handler. The handler was added to solve #18747 but has caused issues. diff --git a/Python/_warnings.c b/Python/_warnings.c index 23b3f5c02d3..cbc64e3b944 100644 --- a/Python/_warnings.c +++ b/Python/_warnings.c @@ -99,7 +99,7 @@ get_default_action(void) /* The item is a borrowed reference. */ -static const char * +static PyObject* get_filter(PyObject *category, PyObject *text, Py_ssize_t lineno, PyObject *module, PyObject **item) { @@ -152,13 +152,12 @@ get_filter(PyObject *category, PyObject *text, Py_ssize_t lineno, return NULL; if (good_msg && is_subclass && good_mod && (ln == 0 || lineno == ln)) - return _PyUnicode_AsString(action); + return action; } action = get_default_action(); - if (action != NULL) { - return _PyUnicode_AsString(action); - } + if (action != NULL) + return action; PyErr_SetString(PyExc_ValueError, MODULE_NAME ".defaultaction not found"); @@ -192,23 +191,26 @@ static PyObject * normalize_module(PyObject *filename) { PyObject *module; - const char *mod_str; + int kind; + void *data; Py_ssize_t len; - int rc = PyObject_IsTrue(filename); - if (rc == -1) - return NULL; - else if (rc == 0) - return PyUnicode_FromString(""); - - mod_str = _PyUnicode_AsString(filename); - if (mod_str == NULL) - return NULL; len = PyUnicode_GetLength(filename); if (len < 0) return NULL; + + if (len == 0) + return PyUnicode_FromString(""); + + kind = PyUnicode_KIND(filename); + data = PyUnicode_DATA(filename); + + /* if filename.endswith(".py"): */ if (len >= 3 && - strncmp(mod_str + (len - 3), ".py", 3) == 0) { + PyUnicode_READ(kind, data, len-3) == '.' && + PyUnicode_READ(kind, data, len-2) == 'p' && + PyUnicode_READ(kind, data, len-1) == 'y') + { module = PyUnicode_Substring(filename, 0, len-3); } else { @@ -273,19 +275,37 @@ show_warning(PyObject *filename, int lineno, PyObject *text, PyObject /* Print " source_line\n" */ if (sourceline) { - char *source_line_str = _PyUnicode_AsString(sourceline); - if (source_line_str == NULL) - return; - while (*source_line_str == ' ' || *source_line_str == '\t' || - *source_line_str == '\014') - source_line_str++; + int kind; + void *data; + Py_ssize_t i, len; + Py_UCS4 ch; + PyObject *truncated; - PyFile_WriteString(source_line_str, f_stderr); + if (PyUnicode_READY(sourceline) < 1) + goto error; + + kind = PyUnicode_KIND(sourceline); + data = PyUnicode_DATA(sourceline); + len = PyUnicode_GET_LENGTH(sourceline); + for (i=0; i