Issue #9319: Include the filename in "Non-UTF8 code ..." syntax error.
This commit is contained in:
parent
7f2fee3640
commit
fe7c5b5bdf
|
@ -58,6 +58,12 @@ class ImportTests(unittest.TestCase):
|
|||
with imp.find_module('module_' + mod, self.test_path)[0] as fd:
|
||||
self.assertEqual(fd.encoding, encoding)
|
||||
|
||||
path = [os.path.dirname(__file__)]
|
||||
self.assertRaisesRegex(SyntaxError,
|
||||
r"Non-UTF-8 code starting with '\\xf6'"
|
||||
r" in file .*badsyntax_pep3120.py",
|
||||
imp.find_module, 'badsyntax_pep3120', path)
|
||||
|
||||
def test_issue1267(self):
|
||||
for mod, encoding, _ in self.test_strings:
|
||||
fp, filename, info = imp.find_module('module_' + mod,
|
||||
|
|
|
@ -10,6 +10,8 @@ What's New in Python 3.3 Alpha 1?
|
|||
Core and Builtins
|
||||
-----------------
|
||||
|
||||
- Issue #9319: Include the filename in "Non-UTF8 code ..." syntax error.
|
||||
|
||||
- Issue #10785: Store the filename as Unicode in the Python parser.
|
||||
|
||||
- Issue #11619: _PyImport_LoadDynamicModule() doesn't encode the path to bytes
|
||||
|
|
|
@ -1690,17 +1690,18 @@ PyTokenizer_Get(struct tok_state *tok, char **p_start, char **p_end)
|
|||
return result;
|
||||
}
|
||||
|
||||
/* Get -*- encoding -*- from a Python file.
|
||||
/* Get the encoding of a Python file. Check for the coding cookie and check if
|
||||
the file starts with a BOM.
|
||||
|
||||
PyTokenizer_FindEncoding returns NULL when it can't find the encoding in
|
||||
the first or second line of the file (in which case the encoding
|
||||
should be assumed to be PyUnicode_GetDefaultEncoding()).
|
||||
PyTokenizer_FindEncodingFilename() returns NULL when it can't find the
|
||||
encoding in the first or second line of the file (in which case the encoding
|
||||
should be assumed to be UTF-8).
|
||||
|
||||
The char* returned is malloc'ed via PyMem_MALLOC() and thus must be freed
|
||||
by the caller.
|
||||
*/
|
||||
by the caller. */
|
||||
|
||||
char *
|
||||
PyTokenizer_FindEncoding(int fd)
|
||||
PyTokenizer_FindEncodingFilename(int fd, PyObject *filename)
|
||||
{
|
||||
struct tok_state *tok;
|
||||
FILE *fp;
|
||||
|
@ -1720,9 +1721,18 @@ PyTokenizer_FindEncoding(int fd)
|
|||
return NULL;
|
||||
}
|
||||
#ifndef PGEN
|
||||
if (filename != NULL) {
|
||||
Py_INCREF(filename);
|
||||
tok->filename = filename;
|
||||
}
|
||||
else {
|
||||
tok->filename = PyUnicode_FromString("<string>");
|
||||
if (tok->filename == NULL)
|
||||
goto error;
|
||||
if (tok->filename == NULL) {
|
||||
fclose(fp);
|
||||
PyTokenizer_Free(tok);
|
||||
return encoding;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
while (tok->lineno < 2 && tok->done == E_OK) {
|
||||
PyTokenizer_Get(tok, &p_start, &p_end);
|
||||
|
@ -1733,13 +1743,16 @@ PyTokenizer_FindEncoding(int fd)
|
|||
if (encoding)
|
||||
strcpy(encoding, tok->encoding);
|
||||
}
|
||||
#ifndef PGEN
|
||||
error:
|
||||
#endif
|
||||
PyTokenizer_Free(tok);
|
||||
return encoding;
|
||||
}
|
||||
|
||||
char *
|
||||
PyTokenizer_FindEncoding(int fd)
|
||||
{
|
||||
return PyTokenizer_FindEncodingFilename(fd, NULL);
|
||||
}
|
||||
|
||||
#ifdef Py_DEBUG
|
||||
|
||||
void
|
||||
|
|
|
@ -75,7 +75,6 @@ extern void PyTokenizer_Free(struct tok_state *);
|
|||
extern int PyTokenizer_Get(struct tok_state *, char **, char **);
|
||||
extern char * PyTokenizer_RestoreEncoding(struct tok_state* tok,
|
||||
int len, int *offset);
|
||||
extern char * PyTokenizer_FindEncoding(int);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
|
@ -124,12 +124,12 @@ static const Py_UNICODE PYC_TAG_UNICODE[] = {
|
|||
/* See _PyImport_FixupExtensionObject() below */
|
||||
static PyObject *extensions = NULL;
|
||||
|
||||
/* Function from Parser/tokenizer.c */
|
||||
extern char * PyTokenizer_FindEncodingFilename(int, PyObject *);
|
||||
|
||||
/* This table is defined in config.c: */
|
||||
extern struct _inittab _PyImport_Inittab[];
|
||||
|
||||
/* Method from Parser/tokenizer.c */
|
||||
extern char * PyTokenizer_FindEncoding(int);
|
||||
|
||||
struct _inittab *PyImport_Inittab = _PyImport_Inittab;
|
||||
|
||||
/* these tables define the module suffixes that Python recognizes */
|
||||
|
@ -3540,9 +3540,9 @@ call_find_module(PyObject *name, PyObject *path_list)
|
|||
}
|
||||
if (fd != -1) {
|
||||
if (strchr(fdp->mode, 'b') == NULL) {
|
||||
/* PyTokenizer_FindEncoding() returns PyMem_MALLOC'ed
|
||||
/* PyTokenizer_FindEncodingFilename() returns PyMem_MALLOC'ed
|
||||
memory. */
|
||||
found_encoding = PyTokenizer_FindEncoding(fd);
|
||||
found_encoding = PyTokenizer_FindEncodingFilename(fd, pathobj);
|
||||
lseek(fd, 0, 0); /* Reset position */
|
||||
if (found_encoding == NULL && PyErr_Occurred()) {
|
||||
Py_XDECREF(pathobj);
|
||||
|
|
|
@ -18,8 +18,8 @@
|
|||
#define MAX_FRAME_DEPTH 100
|
||||
#define MAX_NTHREADS 100
|
||||
|
||||
/* Method from Parser/tokenizer.c */
|
||||
extern char * PyTokenizer_FindEncoding(int);
|
||||
/* Function from Parser/tokenizer.c */
|
||||
extern char * PyTokenizer_FindEncodingFilename(int, PyObject *);
|
||||
|
||||
static PyObject *
|
||||
tb_dir(PyTracebackObject *self)
|
||||
|
@ -251,7 +251,7 @@ _Py_DisplaySourceLine(PyObject *f, PyObject *filename, int lineno, int indent)
|
|||
|
||||
/* use the right encoding to decode the file as unicode */
|
||||
fd = PyObject_AsFileDescriptor(binary);
|
||||
found_encoding = PyTokenizer_FindEncoding(fd);
|
||||
found_encoding = PyTokenizer_FindEncodingFilename(fd, filename);
|
||||
encoding = (found_encoding != NULL) ? found_encoding : "utf-8";
|
||||
lseek(fd, 0, 0); /* Reset position */
|
||||
fob = PyObject_CallMethod(io, "TextIOWrapper", "Os", binary, encoding);
|
||||
|
|
Loading…
Reference in New Issue