Issues #2384 and #3975: Tracebacks were not correctly printed when the source file

contains a ``coding:`` header: the wrong line was displayed, and the encoding was not respected. Patch by Victor Stinner.
2008-10-09 23:37:48 +00:00 · 2008-10-09 23:37:48 +00:00 · cf8016a8d6
parent 76e5538749
commit cf8016a8d6
4 changed files with 220 additions and 83 deletions
--- a/Lib/test/test_traceback.py
+++ b/Lib/test/test_traceback.py
@ -6,6 +6,7 @@ import sys
 import unittest
 import re
 from test.support import run_unittest, is_jython, Error, captured_output
 from test.support import TESTFN, unlink
 import traceback
@ -90,6 +91,70 @@ class SyntaxTracebackCases(unittest.TestCase):
        err = traceback.format_exception_only(None, None)
        self.assertEqual(err, ['None\n'])
    def test_encoded_file(self):
        # Test that tracebacks are correctly printed for encoded source files:
        # - correct line number (Issue2384)
        # - respect file encoding (Issue3975)
        import tempfile, sys, subprocess, os
        # The spawned subprocess has its stdout redirected to a PIPE, and its
        # encoding may be different from the current interpreter, on Windows
        # at least.
        process = subprocess.Popen([sys.executable, "-c",
                                    "import sys; print(sys.stdout.encoding)"],
                                   stdout=subprocess.PIPE,
                                   stderr=subprocess.STDOUT)
        stdout, stderr = process.communicate()
        output_encoding = str(stdout, 'ascii').splitlines()[0]
        def do_test(firstlines, message, charset, lineno):
            # Raise the message in a subprocess, and catch the output
            try:
                output = open(TESTFN, "w", encoding=charset)
                output.write("""{0}if 1:
                    import traceback;
                    raise RuntimeError('{1}')
                    """.format(firstlines, message))
                output.close()
                process = subprocess.Popen([sys.executable, TESTFN],
                    stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
                stdout, stderr = process.communicate()
                stdout = stdout.decode(output_encoding).splitlines()
            finally:
                unlink(TESTFN)
            # The source lines are encoded with the 'backslashreplace' handler
            encoded_message = message.encode(output_encoding,
                                             'backslashreplace')
            # and we just decoded them with the output_encoding.
            message_ascii = encoded_message.decode(output_encoding)
            err_line = "raise RuntimeError('{0}')".format(message_ascii)
            err_msg = "RuntimeError: {0}".format(message_ascii)
            self.assert_(("line %s" % lineno) in stdout[1],
                "Invalid line number: {0!r} instead of {1}".format(
                    stdout[1], lineno))
            self.assert_(stdout[2].endswith(err_line),
                "Invalid traceback line: {0!r} instead of {1!r}".format(
                    stdout[2], err_line))
            self.assert_(stdout[3] == err_msg,
                "Invalid error message: {0!r} instead of {1!r}".format(
                    stdout[3], err_msg))
        do_test("", "foo", "ascii", 3)
        for charset in ("ascii", "iso-8859-1", "utf-8", "GBK"):
            if charset == "ascii":
                text = "foo"
            elif charset == "GBK":
                text = "\u4E02\u5100"
            else:
                text = "h\xe9 ho"
            do_test("# coding: {0}\n".format(charset),
                    text, charset, 4)
            do_test("#!shebang\n# coding: {0}\n".format(charset),
                    text, charset, 5)
 class TracebackFormatTests(unittest.TestCase):
--- a/Misc/NEWS
+++ b/Misc/NEWS
@ -15,6 +15,10 @@ What's New in Python 3.0 beta 5
 Core and Builtins
 -----------------
 - Issues #2384 and #3975: Tracebacks were not correctly printed when the
  source file contains a ``coding:`` header: the wrong line was displayed, and
  the encoding was not respected.
 - Issue #3740: Null-initialize module state.
 - Issue #3946: PyObject_CheckReadBuffer crashed on a memoryview object.
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@ -461,6 +461,14 @@ fp_setreadl(struct tok_state *tok, const char* enc)
 	readline = PyObject_GetAttrString(stream, "readline");
 	tok->decoding_readline = readline;
 	/* The file has been reopened; parsing will restart from
 	 * the beginning of the file, we have to reset the line number.
 	 * But this function has been called from inside tok_nextc() which
 	 * will increment lineno before it returns. So we set it -1 so that
 	 * the next call to tok_nextc() will start with tok->lineno == 0.
 	 */
 	tok->lineno = -1;
  cleanup:
 	Py_XDECREF(stream);
 	Py_XDECREF(io);
--- a/Python/traceback.c
+++ b/Python/traceback.c
@ -8,9 +8,15 @@
 #include "structmember.h"
 #include "osdefs.h"
 #include "traceback.h"
 #ifdef HAVE_FCNTL_H
 #include <fcntl.h>
 #endif
 #define OFF(x) offsetof(PyTracebackObject, x)
 /* Method from Parser/tokenizer.c */
 extern char * PyTokenizer_FindEncoding(int);
 static PyObject *
 tb_dir(PyTracebackObject *self)
 {
@ -128,102 +134,156 @@ PyTraceBack_Here(PyFrameObject *frame)
 	return 0;
 }
 static int
 _Py_FindSourceFile(const char* filename, char* namebuf, size_t namelen, int open_flags)
 {
 	int i;
 	int fd = -1;
 	PyObject *v;
 	Py_ssize_t _npath;
 	int npath;
 	size_t taillen;
 	PyObject *syspath;
 	const char* path;
 	const char* tail;
 	Py_ssize_t len;
 	/* Search tail of filename in sys.path before giving up */
 	tail = strrchr(filename, SEP);
 	if (tail == NULL)
 		tail = filename;
 	else
 		tail++;
 	taillen = strlen(tail);
 	syspath = PySys_GetObject("path");
 	if (syspath == NULL || !PyList_Check(syspath))
 		return -1;
 	_npath = PyList_Size(syspath);
 	npath = Py_SAFE_DOWNCAST(_npath, Py_ssize_t, int);
 	for (i = 0; i < npath; i++) {
 		v = PyList_GetItem(syspath, i);
 		if (v == NULL) {
 			PyErr_Clear();
 			break;
 		}
 		if (!PyUnicode_Check(v))
 			continue;
 		path = _PyUnicode_AsStringAndSize(v, &len);
 		if (len + 1 + taillen >= (Py_ssize_t)namelen - 1)
 			continue; /* Too long */
 		strcpy(namebuf, path);
 		if (strlen(namebuf) != len)
 			continue; /* v contains '\0' */
 		if (len > 0 && namebuf[len-1] != SEP)
 			namebuf[len++] = SEP;
 		strcpy(namebuf+len, tail);
 		Py_BEGIN_ALLOW_THREADS
 		fd = open(namebuf, open_flags);
 		Py_END_ALLOW_THREADS
 		if (0 <= fd) {
 			return fd;
 		}
 	}
 	return -1;
 }
 int
 _Py_DisplaySourceLine(PyObject *f, const char *filename, int lineno, int indent)
 {
 	int err = 0;
-	FILE *xfp = NULL;
+	int fd;
 	char linebuf[2000];
 	int i;
-	char namebuf[MAXPATHLEN+1];
+	char *found_encoding;
 	char *encoding;
 	PyObject *fob = NULL;
 	PyObject *lineobj = NULL;
 #ifdef O_BINARY
 	const int open_flags = O_RDONLY | O_BINARY;   /* necessary for Windows */
 #else
 	const int open_flags = O_RDONLY;
 #endif
 	char buf[MAXPATHLEN+1];
 	Py_UNICODE *u, *p;
 	Py_ssize_t len;
 	/* open the file */
 	if (filename == NULL)
-		return -1;
+		return 0;
-	xfp = fopen(filename, "r" PY_STDIOTEXTMODE);
+	Py_BEGIN_ALLOW_THREADS
-	if (xfp == NULL) {
+	fd = open(filename, open_flags);
-		/* Search tail of filename in sys.path before giving up */
+	Py_END_ALLOW_THREADS
-		PyObject *path;
+	if (fd < 0) {
-		const char *tail = strrchr(filename, SEP);
+		fd = _Py_FindSourceFile(filename, buf, sizeof(buf), open_flags);
-		if (tail == NULL)
+		if (fd < 0)
-			tail = filename;
+			return 0;
-		else
+		filename = buf;
 			tail++;
 		path = PySys_GetObject("path");
 		if (path != NULL && PyList_Check(path)) {
 			Py_ssize_t _npath = PyList_Size(path);
 			int npath = Py_SAFE_DOWNCAST(_npath, Py_ssize_t, int);
 			size_t taillen = strlen(tail);
 			for (i = 0; i < npath; i++) {
 				PyObject *v = PyList_GetItem(path, i);
 				if (v == NULL) {
 					PyErr_Clear();
 					break;
 				}
 				if (PyBytes_Check(v)) {
 					size_t len;
 					len = PyBytes_GET_SIZE(v);
 					if (len + 1 + taillen >= MAXPATHLEN)
 						continue; /* Too long */
 					strcpy(namebuf, PyBytes_AsString(v));
 					if (strlen(namebuf) != len)
 						continue; /* v contains '\0' */
 					if (len > 0 && namebuf[len-1] != SEP)
 						namebuf[len++] = SEP;
 					strcpy(namebuf+len, tail);
 					xfp = fopen(namebuf, "r" PY_STDIOTEXTMODE);
 					if (xfp != NULL) {
 						filename = namebuf;
 						break;
 					}
 				}
 			}
 		}
 	}
-        if (xfp == NULL)
+	/* use the right encoding to decode the file as unicode */
-            return err;
+	found_encoding = PyTokenizer_FindEncoding(fd);
-        if (err != 0) {
+	encoding = (found_encoding != NULL) ? found_encoding :
-            fclose(xfp);
+		(char*)PyUnicode_GetDefaultEncoding();
-            return err;
+	lseek(fd, 0, 0); /* Reset position */
-        }
+	fob = PyFile_FromFd(fd, (char*)filename, "r", -1, (char*)encoding,
 		NULL, NULL, 1);
 	PyMem_FREE(found_encoding);
 	if (fob == NULL) {
 		PyErr_Clear();
 		close(fd);
 		return 0;
 	}
 	/* get the line number lineno */
 	for (i = 0; i < lineno; i++) {
-		char* pLastChar = &linebuf[sizeof(linebuf)-2];
+		Py_XDECREF(lineobj);
-		do {
+		lineobj = PyFile_GetLine(fob, -1);
-			*pLastChar = '\0';
+		if (!lineobj) {
-			if (Py_UniversalNewlineFgets(linebuf, sizeof linebuf, xfp, NULL) == NULL)
+			err = -1;
-				break;
+			break;
 			/* fgets read *something*; if it didn't get as
 			   far as pLastChar, it must have found a newline
 			   or hit the end of the file;	if pLastChar is \n,
 			   it obviously found a newline; else we haven't
 			   yet seen a newline, so must continue */
 		} while (*pLastChar != '\0' && *pLastChar != '\n');
 	}
 	if (i == lineno) {
 		char buf[11];
 		char *p = linebuf;
 		while (*p == ' ' || *p == '\t' || *p == '\014')
 			p++;
 		/* Write some spaces before the line */
 		strcpy(buf, "          ");
 		assert (strlen(buf) == 10);
 		while (indent > 0) {
 			if(indent < 10)
 				buf[indent] = '\0';
 			err = PyFile_WriteString(buf, f);
 			if (err != 0)
 				break;
 			indent -= 10;
 		}
 		if (err == 0)
 			err = PyFile_WriteString(p, f);
 		if (err == 0 && strchr(p, '\n') == NULL)
 			err = PyFile_WriteString("\n", f);
 	}
-	fclose(xfp);
+	Py_DECREF(fob);
 	if (!lineobj || !PyUnicode_Check(lineobj)) {
 		Py_XDECREF(lineobj);
 		return err;
 	}
 	/* remove the indentation of the line */
 	u = PyUnicode_AS_UNICODE(lineobj);
 	len = PyUnicode_GET_SIZE(lineobj);
 	for (p=u; *p == ' ' || *p == '\t' || *p == '\014'; p++)
 		len--;
 	if (u != p) {
 		PyObject *truncated;
 		truncated = PyUnicode_FromUnicode(p, len);
 		if (truncated) {
 			Py_DECREF(lineobj);
 			lineobj = truncated;
 		} else {
 			PyErr_Clear();
 		}
 	}
 	/* Write some spaces before the line */
 	strcpy(buf, "          ");
 	assert (strlen(buf) == 10);
 	while (indent > 0) {
 		if(indent < 10)
 			buf[indent] = '\0';
 		err = PyFile_WriteString(buf, f);
 		if (err != 0)
 			break;
 		indent -= 10;
 	}
 	/* finally display the line */
 	if (err == 0)
 		err = PyFile_WriteObject(lineobj, f, Py_PRINT_RAW);
 	Py_DECREF(lineobj);
 	if  (err == 0)
 		err = PyFile_WriteString("\n", f);
 	return err;
 }