Issue #9319: Include the filename in "Non-UTF8 code ..." syntax error.
This commit is contained in:
parent
7f2fee3640
commit
fe7c5b5bdf
|
@ -58,6 +58,12 @@ class ImportTests(unittest.TestCase):
|
||||||
with imp.find_module('module_' + mod, self.test_path)[0] as fd:
|
with imp.find_module('module_' + mod, self.test_path)[0] as fd:
|
||||||
self.assertEqual(fd.encoding, encoding)
|
self.assertEqual(fd.encoding, encoding)
|
||||||
|
|
||||||
|
path = [os.path.dirname(__file__)]
|
||||||
|
self.assertRaisesRegex(SyntaxError,
|
||||||
|
r"Non-UTF-8 code starting with '\\xf6'"
|
||||||
|
r" in file .*badsyntax_pep3120.py",
|
||||||
|
imp.find_module, 'badsyntax_pep3120', path)
|
||||||
|
|
||||||
def test_issue1267(self):
|
def test_issue1267(self):
|
||||||
for mod, encoding, _ in self.test_strings:
|
for mod, encoding, _ in self.test_strings:
|
||||||
fp, filename, info = imp.find_module('module_' + mod,
|
fp, filename, info = imp.find_module('module_' + mod,
|
||||||
|
|
|
@ -10,6 +10,8 @@ What's New in Python 3.3 Alpha 1?
|
||||||
Core and Builtins
|
Core and Builtins
|
||||||
-----------------
|
-----------------
|
||||||
|
|
||||||
|
- Issue #9319: Include the filename in "Non-UTF8 code ..." syntax error.
|
||||||
|
|
||||||
- Issue #10785: Store the filename as Unicode in the Python parser.
|
- Issue #10785: Store the filename as Unicode in the Python parser.
|
||||||
|
|
||||||
- Issue #11619: _PyImport_LoadDynamicModule() doesn't encode the path to bytes
|
- Issue #11619: _PyImport_LoadDynamicModule() doesn't encode the path to bytes
|
||||||
|
|
|
@ -1690,17 +1690,18 @@ PyTokenizer_Get(struct tok_state *tok, char **p_start, char **p_end)
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Get -*- encoding -*- from a Python file.
|
/* Get the encoding of a Python file. Check for the coding cookie and check if
|
||||||
|
the file starts with a BOM.
|
||||||
|
|
||||||
PyTokenizer_FindEncoding returns NULL when it can't find the encoding in
|
PyTokenizer_FindEncodingFilename() returns NULL when it can't find the
|
||||||
the first or second line of the file (in which case the encoding
|
encoding in the first or second line of the file (in which case the encoding
|
||||||
should be assumed to be PyUnicode_GetDefaultEncoding()).
|
should be assumed to be UTF-8).
|
||||||
|
|
||||||
|
The char* returned is malloc'ed via PyMem_MALLOC() and thus must be freed
|
||||||
|
by the caller. */
|
||||||
|
|
||||||
The char * returned is malloc'ed via PyMem_MALLOC() and thus must be freed
|
|
||||||
by the caller.
|
|
||||||
*/
|
|
||||||
char *
|
char *
|
||||||
PyTokenizer_FindEncoding(int fd)
|
PyTokenizer_FindEncodingFilename(int fd, PyObject *filename)
|
||||||
{
|
{
|
||||||
struct tok_state *tok;
|
struct tok_state *tok;
|
||||||
FILE *fp;
|
FILE *fp;
|
||||||
|
@ -1720,9 +1721,18 @@ PyTokenizer_FindEncoding(int fd)
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
#ifndef PGEN
|
#ifndef PGEN
|
||||||
tok->filename = PyUnicode_FromString("<string>");
|
if (filename != NULL) {
|
||||||
if (tok->filename == NULL)
|
Py_INCREF(filename);
|
||||||
goto error;
|
tok->filename = filename;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
tok->filename = PyUnicode_FromString("<string>");
|
||||||
|
if (tok->filename == NULL) {
|
||||||
|
fclose(fp);
|
||||||
|
PyTokenizer_Free(tok);
|
||||||
|
return encoding;
|
||||||
|
}
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
while (tok->lineno < 2 && tok->done == E_OK) {
|
while (tok->lineno < 2 && tok->done == E_OK) {
|
||||||
PyTokenizer_Get(tok, &p_start, &p_end);
|
PyTokenizer_Get(tok, &p_start, &p_end);
|
||||||
|
@ -1733,13 +1743,16 @@ PyTokenizer_FindEncoding(int fd)
|
||||||
if (encoding)
|
if (encoding)
|
||||||
strcpy(encoding, tok->encoding);
|
strcpy(encoding, tok->encoding);
|
||||||
}
|
}
|
||||||
#ifndef PGEN
|
|
||||||
error:
|
|
||||||
#endif
|
|
||||||
PyTokenizer_Free(tok);
|
PyTokenizer_Free(tok);
|
||||||
return encoding;
|
return encoding;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
char *
|
||||||
|
PyTokenizer_FindEncoding(int fd)
|
||||||
|
{
|
||||||
|
return PyTokenizer_FindEncodingFilename(fd, NULL);
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef Py_DEBUG
|
#ifdef Py_DEBUG
|
||||||
|
|
||||||
void
|
void
|
||||||
|
|
|
@ -75,7 +75,6 @@ extern void PyTokenizer_Free(struct tok_state *);
|
||||||
extern int PyTokenizer_Get(struct tok_state *, char **, char **);
|
extern int PyTokenizer_Get(struct tok_state *, char **, char **);
|
||||||
extern char * PyTokenizer_RestoreEncoding(struct tok_state* tok,
|
extern char * PyTokenizer_RestoreEncoding(struct tok_state* tok,
|
||||||
int len, int *offset);
|
int len, int *offset);
|
||||||
extern char * PyTokenizer_FindEncoding(int);
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
|
|
|
@ -124,12 +124,12 @@ static const Py_UNICODE PYC_TAG_UNICODE[] = {
|
||||||
/* See _PyImport_FixupExtensionObject() below */
|
/* See _PyImport_FixupExtensionObject() below */
|
||||||
static PyObject *extensions = NULL;
|
static PyObject *extensions = NULL;
|
||||||
|
|
||||||
|
/* Function from Parser/tokenizer.c */
|
||||||
|
extern char * PyTokenizer_FindEncodingFilename(int, PyObject *);
|
||||||
|
|
||||||
/* This table is defined in config.c: */
|
/* This table is defined in config.c: */
|
||||||
extern struct _inittab _PyImport_Inittab[];
|
extern struct _inittab _PyImport_Inittab[];
|
||||||
|
|
||||||
/* Method from Parser/tokenizer.c */
|
|
||||||
extern char * PyTokenizer_FindEncoding(int);
|
|
||||||
|
|
||||||
struct _inittab *PyImport_Inittab = _PyImport_Inittab;
|
struct _inittab *PyImport_Inittab = _PyImport_Inittab;
|
||||||
|
|
||||||
/* these tables define the module suffixes that Python recognizes */
|
/* these tables define the module suffixes that Python recognizes */
|
||||||
|
@ -3540,9 +3540,9 @@ call_find_module(PyObject *name, PyObject *path_list)
|
||||||
}
|
}
|
||||||
if (fd != -1) {
|
if (fd != -1) {
|
||||||
if (strchr(fdp->mode, 'b') == NULL) {
|
if (strchr(fdp->mode, 'b') == NULL) {
|
||||||
/* PyTokenizer_FindEncoding() returns PyMem_MALLOC'ed
|
/* PyTokenizer_FindEncodingFilename() returns PyMem_MALLOC'ed
|
||||||
memory. */
|
memory. */
|
||||||
found_encoding = PyTokenizer_FindEncoding(fd);
|
found_encoding = PyTokenizer_FindEncodingFilename(fd, pathobj);
|
||||||
lseek(fd, 0, 0); /* Reset position */
|
lseek(fd, 0, 0); /* Reset position */
|
||||||
if (found_encoding == NULL && PyErr_Occurred()) {
|
if (found_encoding == NULL && PyErr_Occurred()) {
|
||||||
Py_XDECREF(pathobj);
|
Py_XDECREF(pathobj);
|
||||||
|
|
|
@ -18,8 +18,8 @@
|
||||||
#define MAX_FRAME_DEPTH 100
|
#define MAX_FRAME_DEPTH 100
|
||||||
#define MAX_NTHREADS 100
|
#define MAX_NTHREADS 100
|
||||||
|
|
||||||
/* Method from Parser/tokenizer.c */
|
/* Function from Parser/tokenizer.c */
|
||||||
extern char * PyTokenizer_FindEncoding(int);
|
extern char * PyTokenizer_FindEncodingFilename(int, PyObject *);
|
||||||
|
|
||||||
static PyObject *
|
static PyObject *
|
||||||
tb_dir(PyTracebackObject *self)
|
tb_dir(PyTracebackObject *self)
|
||||||
|
@ -251,7 +251,7 @@ _Py_DisplaySourceLine(PyObject *f, PyObject *filename, int lineno, int indent)
|
||||||
|
|
||||||
/* use the right encoding to decode the file as unicode */
|
/* use the right encoding to decode the file as unicode */
|
||||||
fd = PyObject_AsFileDescriptor(binary);
|
fd = PyObject_AsFileDescriptor(binary);
|
||||||
found_encoding = PyTokenizer_FindEncoding(fd);
|
found_encoding = PyTokenizer_FindEncodingFilename(fd, filename);
|
||||||
encoding = (found_encoding != NULL) ? found_encoding : "utf-8";
|
encoding = (found_encoding != NULL) ? found_encoding : "utf-8";
|
||||||
lseek(fd, 0, 0); /* Reset position */
|
lseek(fd, 0, 0); /* Reset position */
|
||||||
fob = PyObject_CallMethod(io, "TextIOWrapper", "Os", binary, encoding);
|
fob = PyObject_CallMethod(io, "TextIOWrapper", "Os", binary, encoding);
|
||||||
|
|
Loading…
Reference in New Issue