diff --git a/.hgeol b/.hgeol index aad79c21136..2919f76a110 100644 --- a/.hgeol +++ b/.hgeol @@ -38,6 +38,8 @@ Lib/test/xmltestdata/* = BIN Lib/venv/scripts/nt/* = BIN +Lib/test/coding20731.py = BIN + # All other files (which presumably are human-editable) are "native". # This must be the last rule! diff --git a/Lib/test/coding20731.py b/Lib/test/coding20731.py new file mode 100644 index 00000000000..b0e227ad110 --- /dev/null +++ b/Lib/test/coding20731.py @@ -0,0 +1,4 @@ +#coding:latin1 + + + diff --git a/Lib/test/test_source_encoding.py b/Lib/test/test_source_encoding.py index cd9d2b374c7..39b623e637f 100644 --- a/Lib/test/test_source_encoding.py +++ b/Lib/test/test_source_encoding.py @@ -5,6 +5,7 @@ from test.support import TESTFN, unlink, unload import importlib import os import sys +import subprocess class SourceEncodingTest(unittest.TestCase): @@ -58,6 +59,14 @@ class SourceEncodingTest(unittest.TestCase): # two bytes in common with the UTF-8 BOM self.assertRaises(SyntaxError, eval, b'\xef\xbb\x20') + def test_20731(self): + sub = subprocess.Popen([sys.executable, + os.path.join(os.path.dirname(__file__), + 'coding20731.py')], + stderr=subprocess.PIPE) + err = sub.communicate()[1] + self.assertEquals(err, b'') + def test_error_message(self): compile(b'# -*- coding: iso-8859-15 -*-\n', 'dummy', 'exec') compile(b'\xef\xbb\xbf\n', 'dummy', 'exec') diff --git a/Misc/NEWS b/Misc/NEWS index 3604a307a9e..c63054a1a72 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -8,6 +8,9 @@ What's New in Python 3.4.1? Core and Builtins ----------------- +- Issue #20731: Properly position in source code files even if they + are opened in text mode. Patch by Serhiy Storchaka. + - Issue #20637: Key-sharing now also works for instance dictionaries of subclasses. Patch by Peter Ingebretson. diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index c32a3bfd1c8..7283058fc88 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -498,9 +498,13 @@ fp_setreadl(struct tok_state *tok, const char* enc) fd = fileno(tok->fp); /* Due to buffering the file offset for fd can be different from the file - * position of tok->fp. */ + * position of tok->fp. If tok->fp was opened in text mode on Windows, + * its file position counts CRLF as one char and can't be directly mapped + * to the file offset for fd. Instead we step back one byte and read to + * the end of line.*/ pos = ftell(tok->fp); - if (pos == -1 || lseek(fd, (off_t)pos, SEEK_SET) == (off_t)-1) { + if (pos == -1 || + lseek(fd, (off_t)(pos > 0 ? pos - 1 : pos), SEEK_SET) == (off_t)-1) { PyErr_SetFromErrnoWithFilename(PyExc_OSError, NULL); goto cleanup; } @@ -513,6 +517,12 @@ fp_setreadl(struct tok_state *tok, const char* enc) Py_XDECREF(tok->decoding_readline); readline = _PyObject_GetAttrId(stream, &PyId_readline); tok->decoding_readline = readline; + if (pos > 0) { + if (PyObject_CallObject(readline, NULL) == NULL) { + readline = NULL; + goto cleanup; + } + } cleanup: Py_XDECREF(stream);