Issue #10785: Store the filename as Unicode in the Python parser.
This commit is contained in:
parent
9bdb43e43f
commit
7f2fee3640
|
@ -9,7 +9,10 @@ extern "C" {
|
|||
|
||||
typedef struct {
|
||||
int error;
|
||||
const char *filename; /* decoded from the filesystem encoding */
|
||||
#ifndef PGEN
|
||||
/* The filename is useless for pgen, see comment in tok_state structure */
|
||||
PyObject *filename;
|
||||
#endif
|
||||
int lineno;
|
||||
int offset;
|
||||
char *text; /* UTF-8-encoded string */
|
||||
|
@ -66,8 +69,10 @@ PyAPI_FUNC(node *) PyParser_ParseStringFlagsFilenameEx(
|
|||
perrdetail *err_ret,
|
||||
int *flags);
|
||||
|
||||
/* Note that he following function is defined in pythonrun.c not parsetok.c. */
|
||||
/* Note that the following functions are defined in pythonrun.c,
|
||||
not in parsetok.c */
|
||||
PyAPI_FUNC(void) PyParser_SetError(perrdetail *);
|
||||
PyAPI_FUNC(void) PyParser_ClearError(perrdetail *);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
|
@ -238,14 +238,13 @@ POBJS= \
|
|||
Parser/listnode.o \
|
||||
Parser/node.o \
|
||||
Parser/parser.o \
|
||||
Parser/parsetok.o \
|
||||
Parser/bitset.o \
|
||||
Parser/metagrammar.o \
|
||||
Parser/firstsets.o \
|
||||
Parser/grammar.o \
|
||||
Parser/pgen.o
|
||||
|
||||
PARSER_OBJS= $(POBJS) Parser/myreadline.o Parser/tokenizer.o
|
||||
PARSER_OBJS= $(POBJS) Parser/myreadline.o Parser/parsetok.o Parser/tokenizer.o
|
||||
|
||||
PGOBJS= \
|
||||
Objects/obmalloc.o \
|
||||
|
@ -254,10 +253,12 @@ PGOBJS= \
|
|||
Python/pyctype.o \
|
||||
Parser/tokenizer_pgen.o \
|
||||
Parser/printgrammar.o \
|
||||
Parser/parsetok_pgen.o \
|
||||
Parser/pgenmain.o
|
||||
|
||||
PARSER_HEADERS= \
|
||||
Parser/parser.h \
|
||||
Include/parsetok.h \
|
||||
Parser/tokenizer.h
|
||||
|
||||
PGENOBJS= $(PGENMAIN) $(POBJS) $(PGOBJS)
|
||||
|
@ -593,6 +594,7 @@ Parser/grammar.o: $(srcdir)/Parser/grammar.c \
|
|||
Parser/metagrammar.o: $(srcdir)/Parser/metagrammar.c
|
||||
|
||||
Parser/tokenizer_pgen.o: $(srcdir)/Parser/tokenizer.c
|
||||
Parser/parsetok_pgen.o: $(srcdir)/Parser/parsetok.c
|
||||
|
||||
Parser/pgenmain.o: $(srcdir)/Include/parsetok.h
|
||||
|
||||
|
@ -700,7 +702,6 @@ PYTHON_HEADERS= \
|
|||
Include/objimpl.h \
|
||||
Include/opcode.h \
|
||||
Include/osdefs.h \
|
||||
Include/parsetok.h \
|
||||
Include/patchlevel.h \
|
||||
Include/pgen.h \
|
||||
Include/pgenheaders.h \
|
||||
|
|
|
@ -10,6 +10,8 @@ What's New in Python 3.3 Alpha 1?
|
|||
Core and Builtins
|
||||
-----------------
|
||||
|
||||
- Issue #10785: Store the filename as Unicode in the Python parser.
|
||||
|
||||
- Issue #11619: _PyImport_LoadDynamicModule() doesn't encode the path to bytes
|
||||
on Windows.
|
||||
|
||||
|
|
|
@ -584,6 +584,7 @@ parser_do_parse(PyObject *args, PyObject *kw, char *argspec, int type)
|
|||
else
|
||||
PyParser_SetError(&err);
|
||||
}
|
||||
PyParser_ClearError(&err);
|
||||
return (res);
|
||||
}
|
||||
|
||||
|
|
|
@ -13,7 +13,7 @@
|
|||
|
||||
/* Forward */
|
||||
static node *parsetok(struct tok_state *, grammar *, int, perrdetail *, int *);
|
||||
static void initerr(perrdetail *err_ret, const char* filename);
|
||||
static int initerr(perrdetail *err_ret, const char* filename);
|
||||
|
||||
/* Parse input coming from a string. Return error code, print some errors. */
|
||||
node *
|
||||
|
@ -48,7 +48,8 @@ PyParser_ParseStringFlagsFilenameEx(const char *s, const char *filename,
|
|||
struct tok_state *tok;
|
||||
int exec_input = start == file_input;
|
||||
|
||||
initerr(err_ret, filename);
|
||||
if (initerr(err_ret, filename) < 0)
|
||||
return NULL;
|
||||
|
||||
if (*flags & PyPARSE_IGNORE_COOKIE)
|
||||
tok = PyTokenizer_FromUTF8(s, exec_input);
|
||||
|
@ -59,7 +60,10 @@ PyParser_ParseStringFlagsFilenameEx(const char *s, const char *filename,
|
|||
return NULL;
|
||||
}
|
||||
|
||||
tok->filename = filename ? filename : "<string>";
|
||||
#ifndef PGEN
|
||||
Py_INCREF(err_ret->filename);
|
||||
tok->filename = err_ret->filename;
|
||||
#endif
|
||||
return parsetok(tok, g, start, err_ret, flags);
|
||||
}
|
||||
|
||||
|
@ -90,13 +94,17 @@ PyParser_ParseFileFlagsEx(FILE *fp, const char *filename,
|
|||
{
|
||||
struct tok_state *tok;
|
||||
|
||||
initerr(err_ret, filename);
|
||||
if (initerr(err_ret, filename) < 0)
|
||||
return NULL;
|
||||
|
||||
if ((tok = PyTokenizer_FromFile(fp, (char *)enc, ps1, ps2)) == NULL) {
|
||||
err_ret->error = E_NOMEM;
|
||||
return NULL;
|
||||
}
|
||||
tok->filename = filename;
|
||||
#ifndef PGEN
|
||||
Py_INCREF(err_ret->filename);
|
||||
tok->filename = err_ret->filename;
|
||||
#endif
|
||||
return parsetok(tok, g, start, err_ret, flags);
|
||||
}
|
||||
|
||||
|
@ -267,14 +275,24 @@ done:
|
|||
return n;
|
||||
}
|
||||
|
||||
static void
|
||||
static int
|
||||
initerr(perrdetail *err_ret, const char *filename)
|
||||
{
|
||||
err_ret->error = E_OK;
|
||||
err_ret->filename = filename;
|
||||
err_ret->lineno = 0;
|
||||
err_ret->offset = 0;
|
||||
err_ret->text = NULL;
|
||||
err_ret->token = -1;
|
||||
err_ret->expected = -1;
|
||||
#ifndef PGEN
|
||||
if (filename)
|
||||
err_ret->filename = PyUnicode_DecodeFSDefault(filename);
|
||||
else
|
||||
err_ret->filename = PyUnicode_FromString("<string>");
|
||||
if (err_ret->filename == NULL) {
|
||||
err_ret->error = E_ERROR;
|
||||
return -1;
|
||||
}
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -0,0 +1,2 @@
|
|||
#define PGEN
|
||||
#include "parsetok.c"
|
|
@ -128,7 +128,6 @@ tok_new(void)
|
|||
tok->prompt = tok->nextprompt = NULL;
|
||||
tok->lineno = 0;
|
||||
tok->level = 0;
|
||||
tok->filename = NULL;
|
||||
tok->altwarning = 1;
|
||||
tok->alterror = 1;
|
||||
tok->alttabsize = 1;
|
||||
|
@ -140,6 +139,7 @@ tok_new(void)
|
|||
tok->encoding = NULL;
|
||||
tok->cont_line = 0;
|
||||
#ifndef PGEN
|
||||
tok->filename = NULL;
|
||||
tok->decoding_readline = NULL;
|
||||
tok->decoding_buffer = NULL;
|
||||
#endif
|
||||
|
@ -545,7 +545,6 @@ decoding_fgets(char *s, int size, struct tok_state *tok)
|
|||
{
|
||||
char *line = NULL;
|
||||
int badchar = 0;
|
||||
PyObject *filename;
|
||||
for (;;) {
|
||||
if (tok->decoding_state == STATE_NORMAL) {
|
||||
/* We already have a codec associated with
|
||||
|
@ -586,16 +585,12 @@ decoding_fgets(char *s, int size, struct tok_state *tok)
|
|||
if (badchar) {
|
||||
/* Need to add 1 to the line number, since this line
|
||||
has not been counted, yet. */
|
||||
filename = PyUnicode_DecodeFSDefault(tok->filename);
|
||||
if (filename != NULL) {
|
||||
PyErr_Format(PyExc_SyntaxError,
|
||||
"Non-UTF-8 code starting with '\\x%.2x' "
|
||||
"in file %U on line %i, "
|
||||
"but no encoding declared; "
|
||||
"see http://python.org/dev/peps/pep-0263/ for details",
|
||||
badchar, filename, tok->lineno + 1);
|
||||
Py_DECREF(filename);
|
||||
}
|
||||
badchar, tok->filename, tok->lineno + 1);
|
||||
return error_ret(tok);
|
||||
}
|
||||
#endif
|
||||
|
@ -853,6 +848,7 @@ PyTokenizer_Free(struct tok_state *tok)
|
|||
#ifndef PGEN
|
||||
Py_XDECREF(tok->decoding_readline);
|
||||
Py_XDECREF(tok->decoding_buffer);
|
||||
Py_XDECREF(tok->filename);
|
||||
#endif
|
||||
if (tok->fp != NULL && tok->buf != NULL)
|
||||
PyMem_FREE(tok->buf);
|
||||
|
@ -1247,8 +1243,13 @@ indenterror(struct tok_state *tok)
|
|||
return 1;
|
||||
}
|
||||
if (tok->altwarning) {
|
||||
PySys_WriteStderr("%s: inconsistent use of tabs and spaces "
|
||||
#ifdef PGEN
|
||||
PySys_WriteStderr("inconsistent use of tabs and spaces "
|
||||
"in indentation\n");
|
||||
#else
|
||||
PySys_FormatStderr("%U: inconsistent use of tabs and spaces "
|
||||
"in indentation\n", tok->filename);
|
||||
#endif
|
||||
tok->altwarning = 0;
|
||||
}
|
||||
return 0;
|
||||
|
@ -1718,6 +1719,11 @@ PyTokenizer_FindEncoding(int fd)
|
|||
fclose(fp);
|
||||
return NULL;
|
||||
}
|
||||
#ifndef PGEN
|
||||
tok->filename = PyUnicode_FromString("<string>");
|
||||
if (tok->filename == NULL)
|
||||
goto error;
|
||||
#endif
|
||||
while (tok->lineno < 2 && tok->done == E_OK) {
|
||||
PyTokenizer_Get(tok, &p_start, &p_end);
|
||||
}
|
||||
|
@ -1727,6 +1733,9 @@ PyTokenizer_FindEncoding(int fd)
|
|||
if (encoding)
|
||||
strcpy(encoding, tok->encoding);
|
||||
}
|
||||
#ifndef PGEN
|
||||
error:
|
||||
#endif
|
||||
PyTokenizer_Free(tok);
|
||||
return encoding;
|
||||
}
|
||||
|
|
|
@ -40,7 +40,13 @@ struct tok_state {
|
|||
int level; /* () [] {} Parentheses nesting level */
|
||||
/* Used to allow free continuations inside them */
|
||||
/* Stuff for checking on different tab sizes */
|
||||
const char *filename; /* encoded to the filesystem encoding */
|
||||
#ifndef PGEN
|
||||
/* pgen doesn't have access to Python codecs, it cannot decode the input
|
||||
filename. The bytes filename might be kept, but it is only used by
|
||||
indenterror() and it is not really needed: pgen only compiles one file
|
||||
(Grammar/Grammar). */
|
||||
PyObject *filename;
|
||||
#endif
|
||||
int altwarning; /* Issue warning if alternate tabs don't match */
|
||||
int alterror; /* Issue error if alternate tabs don't match */
|
||||
int alttabsize; /* Alternate tab spacing */
|
||||
|
|
|
@ -62,6 +62,7 @@ static PyObject *run_mod(mod_ty, const char *, PyObject *, PyObject *,
|
|||
static PyObject *run_pyc_file(FILE *, const char *, PyObject *, PyObject *,
|
||||
PyCompilerFlags *);
|
||||
static void err_input(perrdetail *);
|
||||
static void err_free(perrdetail *);
|
||||
static void initsigs(void);
|
||||
static void call_py_exitfuncs(void);
|
||||
static void wait_for_thread_shutdown(void);
|
||||
|
@ -1887,12 +1888,13 @@ PyParser_ASTFromString(const char *s, const char *filename, int start,
|
|||
flags->cf_flags |= iflags & PyCF_MASK;
|
||||
mod = PyAST_FromNode(n, flags, filename, arena);
|
||||
PyNode_Free(n);
|
||||
return mod;
|
||||
}
|
||||
else {
|
||||
err_input(&err);
|
||||
return NULL;
|
||||
mod = NULL;
|
||||
}
|
||||
err_free(&err);
|
||||
return mod;
|
||||
}
|
||||
|
||||
mod_ty
|
||||
|
@ -1917,14 +1919,15 @@ PyParser_ASTFromFile(FILE *fp, const char *filename, const char* enc,
|
|||
flags->cf_flags |= iflags & PyCF_MASK;
|
||||
mod = PyAST_FromNode(n, flags, filename, arena);
|
||||
PyNode_Free(n);
|
||||
return mod;
|
||||
}
|
||||
else {
|
||||
err_input(&err);
|
||||
if (errcode)
|
||||
*errcode = err.error;
|
||||
return NULL;
|
||||
mod = NULL;
|
||||
}
|
||||
err_free(&err);
|
||||
return mod;
|
||||
}
|
||||
|
||||
/* Simplified interface to parsefile -- return node or set exception */
|
||||
|
@ -1938,6 +1941,7 @@ PyParser_SimpleParseFileFlags(FILE *fp, const char *filename, int start, int fla
|
|||
start, NULL, NULL, &err, flags);
|
||||
if (n == NULL)
|
||||
err_input(&err);
|
||||
err_free(&err);
|
||||
|
||||
return n;
|
||||
}
|
||||
|
@ -1952,6 +1956,7 @@ PyParser_SimpleParseStringFlags(const char *str, int start, int flags)
|
|||
start, &err, flags);
|
||||
if (n == NULL)
|
||||
err_input(&err);
|
||||
err_free(&err);
|
||||
return n;
|
||||
}
|
||||
|
||||
|
@ -1964,6 +1969,7 @@ PyParser_SimpleParseStringFlagsFilename(const char *str, const char *filename,
|
|||
&_PyParser_Grammar, start, &err, flags);
|
||||
if (n == NULL)
|
||||
err_input(&err);
|
||||
err_free(&err);
|
||||
return n;
|
||||
}
|
||||
|
||||
|
@ -1976,12 +1982,24 @@ PyParser_SimpleParseStringFilename(const char *str, const char *filename, int st
|
|||
/* May want to move a more generalized form of this to parsetok.c or
|
||||
even parser modules. */
|
||||
|
||||
void
|
||||
PyParser_ClearError(perrdetail *err)
|
||||
{
|
||||
err_free(err);
|
||||
}
|
||||
|
||||
void
|
||||
PyParser_SetError(perrdetail *err)
|
||||
{
|
||||
err_input(err);
|
||||
}
|
||||
|
||||
static void
|
||||
err_free(perrdetail *err)
|
||||
{
|
||||
Py_CLEAR(err->filename);
|
||||
}
|
||||
|
||||
/* Set the error appropriate to the given input error code (see errcode.h) */
|
||||
|
||||
static void
|
||||
|
@ -1989,7 +2007,6 @@ err_input(perrdetail *err)
|
|||
{
|
||||
PyObject *v, *w, *errtype, *errtext;
|
||||
PyObject *msg_obj = NULL;
|
||||
PyObject *filename;
|
||||
char *msg = NULL;
|
||||
|
||||
errtype = PyExc_SyntaxError;
|
||||
|
@ -2075,17 +2092,8 @@ err_input(perrdetail *err)
|
|||
errtext = PyUnicode_DecodeUTF8(err->text, strlen(err->text),
|
||||
"replace");
|
||||
}
|
||||
if (err->filename != NULL)
|
||||
filename = PyUnicode_DecodeFSDefault(err->filename);
|
||||
else {
|
||||
Py_INCREF(Py_None);
|
||||
filename = Py_None;
|
||||
}
|
||||
if (filename != NULL)
|
||||
v = Py_BuildValue("(NiiN)", filename,
|
||||
v = Py_BuildValue("(OiiN)", err->filename,
|
||||
err->lineno, err->offset, errtext);
|
||||
else
|
||||
v = NULL;
|
||||
if (v != NULL) {
|
||||
if (msg_obj)
|
||||
w = Py_BuildValue("(OO)", msg_obj, v);
|
||||
|
|
Loading…
Reference in New Issue