Issue #9319: Include the filename in "Non-UTF8 code ..." syntax error.

This commit is contained in:
Victor Stinner 2011-04-05 01:48:03 +02:00
parent 7f2fee3640
commit fe7c5b5bdf
6 changed files with 43 additions and 23 deletions

View File

@ -58,6 +58,12 @@ class ImportTests(unittest.TestCase):
with imp.find_module('module_' + mod, self.test_path)[0] as fd: with imp.find_module('module_' + mod, self.test_path)[0] as fd:
self.assertEqual(fd.encoding, encoding) self.assertEqual(fd.encoding, encoding)
path = [os.path.dirname(__file__)]
self.assertRaisesRegex(SyntaxError,
r"Non-UTF-8 code starting with '\\xf6'"
r" in file .*badsyntax_pep3120.py",
imp.find_module, 'badsyntax_pep3120', path)
def test_issue1267(self): def test_issue1267(self):
for mod, encoding, _ in self.test_strings: for mod, encoding, _ in self.test_strings:
fp, filename, info = imp.find_module('module_' + mod, fp, filename, info = imp.find_module('module_' + mod,

View File

@ -10,6 +10,8 @@ What's New in Python 3.3 Alpha 1?
Core and Builtins Core and Builtins
----------------- -----------------
- Issue #9319: Include the filename in "Non-UTF8 code ..." syntax error.
- Issue #10785: Store the filename as Unicode in the Python parser. - Issue #10785: Store the filename as Unicode in the Python parser.
- Issue #11619: _PyImport_LoadDynamicModule() doesn't encode the path to bytes - Issue #11619: _PyImport_LoadDynamicModule() doesn't encode the path to bytes

View File

@ -1690,17 +1690,18 @@ PyTokenizer_Get(struct tok_state *tok, char **p_start, char **p_end)
return result; return result;
} }
/* Get -*- encoding -*- from a Python file. /* Get the encoding of a Python file. Check for the coding cookie and check if
the file starts with a BOM.
PyTokenizer_FindEncoding returns NULL when it can't find the encoding in PyTokenizer_FindEncodingFilename() returns NULL when it can't find the
the first or second line of the file (in which case the encoding encoding in the first or second line of the file (in which case the encoding
should be assumed to be PyUnicode_GetDefaultEncoding()). should be assumed to be UTF-8).
The char* returned is malloc'ed via PyMem_MALLOC() and thus must be freed
by the caller. */
The char * returned is malloc'ed via PyMem_MALLOC() and thus must be freed
by the caller.
*/
char * char *
PyTokenizer_FindEncoding(int fd) PyTokenizer_FindEncodingFilename(int fd, PyObject *filename)
{ {
struct tok_state *tok; struct tok_state *tok;
FILE *fp; FILE *fp;
@ -1720,9 +1721,18 @@ PyTokenizer_FindEncoding(int fd)
return NULL; return NULL;
} }
#ifndef PGEN #ifndef PGEN
tok->filename = PyUnicode_FromString("<string>"); if (filename != NULL) {
if (tok->filename == NULL) Py_INCREF(filename);
goto error; tok->filename = filename;
}
else {
tok->filename = PyUnicode_FromString("<string>");
if (tok->filename == NULL) {
fclose(fp);
PyTokenizer_Free(tok);
return encoding;
}
}
#endif #endif
while (tok->lineno < 2 && tok->done == E_OK) { while (tok->lineno < 2 && tok->done == E_OK) {
PyTokenizer_Get(tok, &p_start, &p_end); PyTokenizer_Get(tok, &p_start, &p_end);
@ -1733,13 +1743,16 @@ PyTokenizer_FindEncoding(int fd)
if (encoding) if (encoding)
strcpy(encoding, tok->encoding); strcpy(encoding, tok->encoding);
} }
#ifndef PGEN
error:
#endif
PyTokenizer_Free(tok); PyTokenizer_Free(tok);
return encoding; return encoding;
} }
char *
PyTokenizer_FindEncoding(int fd)
{
return PyTokenizer_FindEncodingFilename(fd, NULL);
}
#ifdef Py_DEBUG #ifdef Py_DEBUG
void void

View File

@ -75,7 +75,6 @@ extern void PyTokenizer_Free(struct tok_state *);
extern int PyTokenizer_Get(struct tok_state *, char **, char **); extern int PyTokenizer_Get(struct tok_state *, char **, char **);
extern char * PyTokenizer_RestoreEncoding(struct tok_state* tok, extern char * PyTokenizer_RestoreEncoding(struct tok_state* tok,
int len, int *offset); int len, int *offset);
extern char * PyTokenizer_FindEncoding(int);
#ifdef __cplusplus #ifdef __cplusplus
} }

View File

@ -124,12 +124,12 @@ static const Py_UNICODE PYC_TAG_UNICODE[] = {
/* See _PyImport_FixupExtensionObject() below */ /* See _PyImport_FixupExtensionObject() below */
static PyObject *extensions = NULL; static PyObject *extensions = NULL;
/* Function from Parser/tokenizer.c */
extern char * PyTokenizer_FindEncodingFilename(int, PyObject *);
/* This table is defined in config.c: */ /* This table is defined in config.c: */
extern struct _inittab _PyImport_Inittab[]; extern struct _inittab _PyImport_Inittab[];
/* Method from Parser/tokenizer.c */
extern char * PyTokenizer_FindEncoding(int);
struct _inittab *PyImport_Inittab = _PyImport_Inittab; struct _inittab *PyImport_Inittab = _PyImport_Inittab;
/* these tables define the module suffixes that Python recognizes */ /* these tables define the module suffixes that Python recognizes */
@ -3540,9 +3540,9 @@ call_find_module(PyObject *name, PyObject *path_list)
} }
if (fd != -1) { if (fd != -1) {
if (strchr(fdp->mode, 'b') == NULL) { if (strchr(fdp->mode, 'b') == NULL) {
/* PyTokenizer_FindEncoding() returns PyMem_MALLOC'ed /* PyTokenizer_FindEncodingFilename() returns PyMem_MALLOC'ed
memory. */ memory. */
found_encoding = PyTokenizer_FindEncoding(fd); found_encoding = PyTokenizer_FindEncodingFilename(fd, pathobj);
lseek(fd, 0, 0); /* Reset position */ lseek(fd, 0, 0); /* Reset position */
if (found_encoding == NULL && PyErr_Occurred()) { if (found_encoding == NULL && PyErr_Occurred()) {
Py_XDECREF(pathobj); Py_XDECREF(pathobj);

View File

@ -18,8 +18,8 @@
#define MAX_FRAME_DEPTH 100 #define MAX_FRAME_DEPTH 100
#define MAX_NTHREADS 100 #define MAX_NTHREADS 100
/* Method from Parser/tokenizer.c */ /* Function from Parser/tokenizer.c */
extern char * PyTokenizer_FindEncoding(int); extern char * PyTokenizer_FindEncodingFilename(int, PyObject *);
static PyObject * static PyObject *
tb_dir(PyTracebackObject *self) tb_dir(PyTracebackObject *self)
@ -251,7 +251,7 @@ _Py_DisplaySourceLine(PyObject *f, PyObject *filename, int lineno, int indent)
/* use the right encoding to decode the file as unicode */ /* use the right encoding to decode the file as unicode */
fd = PyObject_AsFileDescriptor(binary); fd = PyObject_AsFileDescriptor(binary);
found_encoding = PyTokenizer_FindEncoding(fd); found_encoding = PyTokenizer_FindEncodingFilename(fd, filename);
encoding = (found_encoding != NULL) ? found_encoding : "utf-8"; encoding = (found_encoding != NULL) ? found_encoding : "utf-8";
lseek(fd, 0, 0); /* Reset position */ lseek(fd, 0, 0); /* Reset position */
fob = PyObject_CallMethod(io, "TextIOWrapper", "Os", binary, encoding); fob = PyObject_CallMethod(io, "TextIOWrapper", "Os", binary, encoding);