diff --git a/Lib/test/test_traceback.py b/Lib/test/test_traceback.py index 3f69e5e8935..c44e2b1d110 100644 --- a/Lib/test/test_traceback.py +++ b/Lib/test/test_traceback.py @@ -6,6 +6,7 @@ import sys import unittest import re from test.support import run_unittest, is_jython, Error, captured_output +from test.support import TESTFN, unlink import traceback @@ -90,6 +91,70 @@ class SyntaxTracebackCases(unittest.TestCase): err = traceback.format_exception_only(None, None) self.assertEqual(err, ['None\n']) + def test_encoded_file(self): + # Test that tracebacks are correctly printed for encoded source files: + # - correct line number (Issue2384) + # - respect file encoding (Issue3975) + import tempfile, sys, subprocess, os + + # The spawned subprocess has its stdout redirected to a PIPE, and its + # encoding may be different from the current interpreter, on Windows + # at least. + process = subprocess.Popen([sys.executable, "-c", + "import sys; print(sys.stdout.encoding)"], + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT) + stdout, stderr = process.communicate() + output_encoding = str(stdout, 'ascii').splitlines()[0] + + def do_test(firstlines, message, charset, lineno): + # Raise the message in a subprocess, and catch the output + try: + output = open(TESTFN, "w", encoding=charset) + output.write("""{0}if 1: + import traceback; + raise RuntimeError('{1}') + """.format(firstlines, message)) + output.close() + process = subprocess.Popen([sys.executable, TESTFN], + stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + stdout, stderr = process.communicate() + stdout = stdout.decode(output_encoding).splitlines() + finally: + unlink(TESTFN) + + # The source lines are encoded with the 'backslashreplace' handler + encoded_message = message.encode(output_encoding, + 'backslashreplace') + # and we just decoded them with the output_encoding. + message_ascii = encoded_message.decode(output_encoding) + + err_line = "raise RuntimeError('{0}')".format(message_ascii) + err_msg = "RuntimeError: {0}".format(message_ascii) + + self.assert_(("line %s" % lineno) in stdout[1], + "Invalid line number: {0!r} instead of {1}".format( + stdout[1], lineno)) + self.assert_(stdout[2].endswith(err_line), + "Invalid traceback line: {0!r} instead of {1!r}".format( + stdout[2], err_line)) + self.assert_(stdout[3] == err_msg, + "Invalid error message: {0!r} instead of {1!r}".format( + stdout[3], err_msg)) + + do_test("", "foo", "ascii", 3) + for charset in ("ascii", "iso-8859-1", "utf-8", "GBK"): + if charset == "ascii": + text = "foo" + elif charset == "GBK": + text = "\u4E02\u5100" + else: + text = "h\xe9 ho" + do_test("# coding: {0}\n".format(charset), + text, charset, 4) + do_test("#!shebang\n# coding: {0}\n".format(charset), + text, charset, 5) + class TracebackFormatTests(unittest.TestCase): diff --git a/Misc/NEWS b/Misc/NEWS index 2505bfa56a7..7bad53ce84a 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -15,6 +15,10 @@ What's New in Python 3.0 beta 5 Core and Builtins ----------------- +- Issues #2384 and #3975: Tracebacks were not correctly printed when the + source file contains a ``coding:`` header: the wrong line was displayed, and + the encoding was not respected. + - Issue #3740: Null-initialize module state. - Issue #3946: PyObject_CheckReadBuffer crashed on a memoryview object. diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index 18815aef452..4edf6d07e44 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -461,6 +461,14 @@ fp_setreadl(struct tok_state *tok, const char* enc) readline = PyObject_GetAttrString(stream, "readline"); tok->decoding_readline = readline; + /* The file has been reopened; parsing will restart from + * the beginning of the file, we have to reset the line number. + * But this function has been called from inside tok_nextc() which + * will increment lineno before it returns. So we set it -1 so that + * the next call to tok_nextc() will start with tok->lineno == 0. + */ + tok->lineno = -1; + cleanup: Py_XDECREF(stream); Py_XDECREF(io); diff --git a/Python/traceback.c b/Python/traceback.c index dffce35b445..63ecc3cb16e 100644 --- a/Python/traceback.c +++ b/Python/traceback.c @@ -8,9 +8,15 @@ #include "structmember.h" #include "osdefs.h" #include "traceback.h" +#ifdef HAVE_FCNTL_H +#include +#endif #define OFF(x) offsetof(PyTracebackObject, x) +/* Method from Parser/tokenizer.c */ +extern char * PyTokenizer_FindEncoding(int); + static PyObject * tb_dir(PyTracebackObject *self) { @@ -128,102 +134,156 @@ PyTraceBack_Here(PyFrameObject *frame) return 0; } +static int +_Py_FindSourceFile(const char* filename, char* namebuf, size_t namelen, int open_flags) +{ + int i; + int fd = -1; + PyObject *v; + Py_ssize_t _npath; + int npath; + size_t taillen; + PyObject *syspath; + const char* path; + const char* tail; + Py_ssize_t len; + + /* Search tail of filename in sys.path before giving up */ + tail = strrchr(filename, SEP); + if (tail == NULL) + tail = filename; + else + tail++; + taillen = strlen(tail); + + syspath = PySys_GetObject("path"); + if (syspath == NULL || !PyList_Check(syspath)) + return -1; + _npath = PyList_Size(syspath); + npath = Py_SAFE_DOWNCAST(_npath, Py_ssize_t, int); + + for (i = 0; i < npath; i++) { + v = PyList_GetItem(syspath, i); + if (v == NULL) { + PyErr_Clear(); + break; + } + if (!PyUnicode_Check(v)) + continue; + path = _PyUnicode_AsStringAndSize(v, &len); + if (len + 1 + taillen >= (Py_ssize_t)namelen - 1) + continue; /* Too long */ + strcpy(namebuf, path); + if (strlen(namebuf) != len) + continue; /* v contains '\0' */ + if (len > 0 && namebuf[len-1] != SEP) + namebuf[len++] = SEP; + strcpy(namebuf+len, tail); + Py_BEGIN_ALLOW_THREADS + fd = open(namebuf, open_flags); + Py_END_ALLOW_THREADS + if (0 <= fd) { + return fd; + } + } + return -1; +} + int _Py_DisplaySourceLine(PyObject *f, const char *filename, int lineno, int indent) { int err = 0; - FILE *xfp = NULL; - char linebuf[2000]; + int fd; int i; - char namebuf[MAXPATHLEN+1]; + char *found_encoding; + char *encoding; + PyObject *fob = NULL; + PyObject *lineobj = NULL; +#ifdef O_BINARY + const int open_flags = O_RDONLY | O_BINARY; /* necessary for Windows */ +#else + const int open_flags = O_RDONLY; +#endif + char buf[MAXPATHLEN+1]; + Py_UNICODE *u, *p; + Py_ssize_t len; + /* open the file */ if (filename == NULL) - return -1; - xfp = fopen(filename, "r" PY_STDIOTEXTMODE); - if (xfp == NULL) { - /* Search tail of filename in sys.path before giving up */ - PyObject *path; - const char *tail = strrchr(filename, SEP); - if (tail == NULL) - tail = filename; - else - tail++; - path = PySys_GetObject("path"); - if (path != NULL && PyList_Check(path)) { - Py_ssize_t _npath = PyList_Size(path); - int npath = Py_SAFE_DOWNCAST(_npath, Py_ssize_t, int); - size_t taillen = strlen(tail); - for (i = 0; i < npath; i++) { - PyObject *v = PyList_GetItem(path, i); - if (v == NULL) { - PyErr_Clear(); - break; - } - if (PyBytes_Check(v)) { - size_t len; - len = PyBytes_GET_SIZE(v); - if (len + 1 + taillen >= MAXPATHLEN) - continue; /* Too long */ - strcpy(namebuf, PyBytes_AsString(v)); - if (strlen(namebuf) != len) - continue; /* v contains '\0' */ - if (len > 0 && namebuf[len-1] != SEP) - namebuf[len++] = SEP; - strcpy(namebuf+len, tail); - xfp = fopen(namebuf, "r" PY_STDIOTEXTMODE); - if (xfp != NULL) { - filename = namebuf; - break; - } - } - } - } + return 0; + Py_BEGIN_ALLOW_THREADS + fd = open(filename, open_flags); + Py_END_ALLOW_THREADS + if (fd < 0) { + fd = _Py_FindSourceFile(filename, buf, sizeof(buf), open_flags); + if (fd < 0) + return 0; + filename = buf; } - if (xfp == NULL) - return err; - if (err != 0) { - fclose(xfp); - return err; - } + /* use the right encoding to decode the file as unicode */ + found_encoding = PyTokenizer_FindEncoding(fd); + encoding = (found_encoding != NULL) ? found_encoding : + (char*)PyUnicode_GetDefaultEncoding(); + lseek(fd, 0, 0); /* Reset position */ + fob = PyFile_FromFd(fd, (char*)filename, "r", -1, (char*)encoding, + NULL, NULL, 1); + PyMem_FREE(found_encoding); + if (fob == NULL) { + PyErr_Clear(); + close(fd); + return 0; + } + /* get the line number lineno */ for (i = 0; i < lineno; i++) { - char* pLastChar = &linebuf[sizeof(linebuf)-2]; - do { - *pLastChar = '\0'; - if (Py_UniversalNewlineFgets(linebuf, sizeof linebuf, xfp, NULL) == NULL) - break; - /* fgets read *something*; if it didn't get as - far as pLastChar, it must have found a newline - or hit the end of the file; if pLastChar is \n, - it obviously found a newline; else we haven't - yet seen a newline, so must continue */ - } while (*pLastChar != '\0' && *pLastChar != '\n'); - } - if (i == lineno) { - char buf[11]; - char *p = linebuf; - while (*p == ' ' || *p == '\t' || *p == '\014') - p++; - - /* Write some spaces before the line */ - strcpy(buf, " "); - assert (strlen(buf) == 10); - while (indent > 0) { - if(indent < 10) - buf[indent] = '\0'; - err = PyFile_WriteString(buf, f); - if (err != 0) - break; - indent -= 10; + Py_XDECREF(lineobj); + lineobj = PyFile_GetLine(fob, -1); + if (!lineobj) { + err = -1; + break; } - - if (err == 0) - err = PyFile_WriteString(p, f); - if (err == 0 && strchr(p, '\n') == NULL) - err = PyFile_WriteString("\n", f); } - fclose(xfp); + Py_DECREF(fob); + if (!lineobj || !PyUnicode_Check(lineobj)) { + Py_XDECREF(lineobj); + return err; + } + + /* remove the indentation of the line */ + u = PyUnicode_AS_UNICODE(lineobj); + len = PyUnicode_GET_SIZE(lineobj); + for (p=u; *p == ' ' || *p == '\t' || *p == '\014'; p++) + len--; + if (u != p) { + PyObject *truncated; + truncated = PyUnicode_FromUnicode(p, len); + if (truncated) { + Py_DECREF(lineobj); + lineobj = truncated; + } else { + PyErr_Clear(); + } + } + + /* Write some spaces before the line */ + strcpy(buf, " "); + assert (strlen(buf) == 10); + while (indent > 0) { + if(indent < 10) + buf[indent] = '\0'; + err = PyFile_WriteString(buf, f); + if (err != 0) + break; + indent -= 10; + } + + /* finally display the line */ + if (err == 0) + err = PyFile_WriteObject(lineobj, f, Py_PRINT_RAW); + Py_DECREF(lineobj); + if (err == 0) + err = PyFile_WriteString("\n", f); return err; }