Issue #10785: Store the filename as Unicode in the Python parser.

This commit is contained in:
Victor Stinner 2011-04-05 00:39:01 +02:00
parent 9bdb43e43f
commit 7f2fee3640
9 changed files with 94 additions and 42 deletions

View File

@ -9,7 +9,10 @@ extern "C" {
typedef struct { typedef struct {
int error; int error;
const char *filename; /* decoded from the filesystem encoding */ #ifndef PGEN
/* The filename is useless for pgen, see comment in tok_state structure */
PyObject *filename;
#endif
int lineno; int lineno;
int offset; int offset;
char *text; /* UTF-8-encoded string */ char *text; /* UTF-8-encoded string */
@ -66,8 +69,10 @@ PyAPI_FUNC(node *) PyParser_ParseStringFlagsFilenameEx(
perrdetail *err_ret, perrdetail *err_ret,
int *flags); int *flags);
/* Note that he following function is defined in pythonrun.c not parsetok.c. */ /* Note that the following functions are defined in pythonrun.c,
not in parsetok.c */
PyAPI_FUNC(void) PyParser_SetError(perrdetail *); PyAPI_FUNC(void) PyParser_SetError(perrdetail *);
PyAPI_FUNC(void) PyParser_ClearError(perrdetail *);
#ifdef __cplusplus #ifdef __cplusplus
} }

View File

@ -238,14 +238,13 @@ POBJS= \
Parser/listnode.o \ Parser/listnode.o \
Parser/node.o \ Parser/node.o \
Parser/parser.o \ Parser/parser.o \
Parser/parsetok.o \
Parser/bitset.o \ Parser/bitset.o \
Parser/metagrammar.o \ Parser/metagrammar.o \
Parser/firstsets.o \ Parser/firstsets.o \
Parser/grammar.o \ Parser/grammar.o \
Parser/pgen.o Parser/pgen.o
PARSER_OBJS= $(POBJS) Parser/myreadline.o Parser/tokenizer.o PARSER_OBJS= $(POBJS) Parser/myreadline.o Parser/parsetok.o Parser/tokenizer.o
PGOBJS= \ PGOBJS= \
Objects/obmalloc.o \ Objects/obmalloc.o \
@ -254,10 +253,12 @@ PGOBJS= \
Python/pyctype.o \ Python/pyctype.o \
Parser/tokenizer_pgen.o \ Parser/tokenizer_pgen.o \
Parser/printgrammar.o \ Parser/printgrammar.o \
Parser/parsetok_pgen.o \
Parser/pgenmain.o Parser/pgenmain.o
PARSER_HEADERS= \ PARSER_HEADERS= \
Parser/parser.h \ Parser/parser.h \
Include/parsetok.h \
Parser/tokenizer.h Parser/tokenizer.h
PGENOBJS= $(PGENMAIN) $(POBJS) $(PGOBJS) PGENOBJS= $(PGENMAIN) $(POBJS) $(PGOBJS)
@ -593,6 +594,7 @@ Parser/grammar.o: $(srcdir)/Parser/grammar.c \
Parser/metagrammar.o: $(srcdir)/Parser/metagrammar.c Parser/metagrammar.o: $(srcdir)/Parser/metagrammar.c
Parser/tokenizer_pgen.o: $(srcdir)/Parser/tokenizer.c Parser/tokenizer_pgen.o: $(srcdir)/Parser/tokenizer.c
Parser/parsetok_pgen.o: $(srcdir)/Parser/parsetok.c
Parser/pgenmain.o: $(srcdir)/Include/parsetok.h Parser/pgenmain.o: $(srcdir)/Include/parsetok.h
@ -700,7 +702,6 @@ PYTHON_HEADERS= \
Include/objimpl.h \ Include/objimpl.h \
Include/opcode.h \ Include/opcode.h \
Include/osdefs.h \ Include/osdefs.h \
Include/parsetok.h \
Include/patchlevel.h \ Include/patchlevel.h \
Include/pgen.h \ Include/pgen.h \
Include/pgenheaders.h \ Include/pgenheaders.h \

View File

@ -10,6 +10,8 @@ What's New in Python 3.3 Alpha 1?
Core and Builtins Core and Builtins
----------------- -----------------
- Issue #10785: Store the filename as Unicode in the Python parser.
- Issue #11619: _PyImport_LoadDynamicModule() doesn't encode the path to bytes - Issue #11619: _PyImport_LoadDynamicModule() doesn't encode the path to bytes
on Windows. on Windows.

View File

@ -584,6 +584,7 @@ parser_do_parse(PyObject *args, PyObject *kw, char *argspec, int type)
else else
PyParser_SetError(&err); PyParser_SetError(&err);
} }
PyParser_ClearError(&err);
return (res); return (res);
} }

View File

@ -13,7 +13,7 @@
/* Forward */ /* Forward */
static node *parsetok(struct tok_state *, grammar *, int, perrdetail *, int *); static node *parsetok(struct tok_state *, grammar *, int, perrdetail *, int *);
static void initerr(perrdetail *err_ret, const char* filename); static int initerr(perrdetail *err_ret, const char* filename);
/* Parse input coming from a string. Return error code, print some errors. */ /* Parse input coming from a string. Return error code, print some errors. */
node * node *
@ -48,7 +48,8 @@ PyParser_ParseStringFlagsFilenameEx(const char *s, const char *filename,
struct tok_state *tok; struct tok_state *tok;
int exec_input = start == file_input; int exec_input = start == file_input;
initerr(err_ret, filename); if (initerr(err_ret, filename) < 0)
return NULL;
if (*flags & PyPARSE_IGNORE_COOKIE) if (*flags & PyPARSE_IGNORE_COOKIE)
tok = PyTokenizer_FromUTF8(s, exec_input); tok = PyTokenizer_FromUTF8(s, exec_input);
@ -59,7 +60,10 @@ PyParser_ParseStringFlagsFilenameEx(const char *s, const char *filename,
return NULL; return NULL;
} }
tok->filename = filename ? filename : "<string>"; #ifndef PGEN
Py_INCREF(err_ret->filename);
tok->filename = err_ret->filename;
#endif
return parsetok(tok, g, start, err_ret, flags); return parsetok(tok, g, start, err_ret, flags);
} }
@ -90,13 +94,17 @@ PyParser_ParseFileFlagsEx(FILE *fp, const char *filename,
{ {
struct tok_state *tok; struct tok_state *tok;
initerr(err_ret, filename); if (initerr(err_ret, filename) < 0)
return NULL;
if ((tok = PyTokenizer_FromFile(fp, (char *)enc, ps1, ps2)) == NULL) { if ((tok = PyTokenizer_FromFile(fp, (char *)enc, ps1, ps2)) == NULL) {
err_ret->error = E_NOMEM; err_ret->error = E_NOMEM;
return NULL; return NULL;
} }
tok->filename = filename; #ifndef PGEN
Py_INCREF(err_ret->filename);
tok->filename = err_ret->filename;
#endif
return parsetok(tok, g, start, err_ret, flags); return parsetok(tok, g, start, err_ret, flags);
} }
@ -267,14 +275,24 @@ done:
return n; return n;
} }
static void static int
initerr(perrdetail *err_ret, const char *filename) initerr(perrdetail *err_ret, const char *filename)
{ {
err_ret->error = E_OK; err_ret->error = E_OK;
err_ret->filename = filename;
err_ret->lineno = 0; err_ret->lineno = 0;
err_ret->offset = 0; err_ret->offset = 0;
err_ret->text = NULL; err_ret->text = NULL;
err_ret->token = -1; err_ret->token = -1;
err_ret->expected = -1; err_ret->expected = -1;
#ifndef PGEN
if (filename)
err_ret->filename = PyUnicode_DecodeFSDefault(filename);
else
err_ret->filename = PyUnicode_FromString("<string>");
if (err_ret->filename == NULL) {
err_ret->error = E_ERROR;
return -1;
}
#endif
return 0;
} }

2
Parser/parsetok_pgen.c Normal file
View File

@ -0,0 +1,2 @@
#define PGEN
#include "parsetok.c"

View File

@ -128,7 +128,6 @@ tok_new(void)
tok->prompt = tok->nextprompt = NULL; tok->prompt = tok->nextprompt = NULL;
tok->lineno = 0; tok->lineno = 0;
tok->level = 0; tok->level = 0;
tok->filename = NULL;
tok->altwarning = 1; tok->altwarning = 1;
tok->alterror = 1; tok->alterror = 1;
tok->alttabsize = 1; tok->alttabsize = 1;
@ -140,6 +139,7 @@ tok_new(void)
tok->encoding = NULL; tok->encoding = NULL;
tok->cont_line = 0; tok->cont_line = 0;
#ifndef PGEN #ifndef PGEN
tok->filename = NULL;
tok->decoding_readline = NULL; tok->decoding_readline = NULL;
tok->decoding_buffer = NULL; tok->decoding_buffer = NULL;
#endif #endif
@ -545,7 +545,6 @@ decoding_fgets(char *s, int size, struct tok_state *tok)
{ {
char *line = NULL; char *line = NULL;
int badchar = 0; int badchar = 0;
PyObject *filename;
for (;;) { for (;;) {
if (tok->decoding_state == STATE_NORMAL) { if (tok->decoding_state == STATE_NORMAL) {
/* We already have a codec associated with /* We already have a codec associated with
@ -586,16 +585,12 @@ decoding_fgets(char *s, int size, struct tok_state *tok)
if (badchar) { if (badchar) {
/* Need to add 1 to the line number, since this line /* Need to add 1 to the line number, since this line
has not been counted, yet. */ has not been counted, yet. */
filename = PyUnicode_DecodeFSDefault(tok->filename); PyErr_Format(PyExc_SyntaxError,
if (filename != NULL) { "Non-UTF-8 code starting with '\\x%.2x' "
PyErr_Format(PyExc_SyntaxError, "in file %U on line %i, "
"Non-UTF-8 code starting with '\\x%.2x' " "but no encoding declared; "
"in file %U on line %i, " "see http://python.org/dev/peps/pep-0263/ for details",
"but no encoding declared; " badchar, tok->filename, tok->lineno + 1);
"see http://python.org/dev/peps/pep-0263/ for details",
badchar, filename, tok->lineno + 1);
Py_DECREF(filename);
}
return error_ret(tok); return error_ret(tok);
} }
#endif #endif
@ -853,6 +848,7 @@ PyTokenizer_Free(struct tok_state *tok)
#ifndef PGEN #ifndef PGEN
Py_XDECREF(tok->decoding_readline); Py_XDECREF(tok->decoding_readline);
Py_XDECREF(tok->decoding_buffer); Py_XDECREF(tok->decoding_buffer);
Py_XDECREF(tok->filename);
#endif #endif
if (tok->fp != NULL && tok->buf != NULL) if (tok->fp != NULL && tok->buf != NULL)
PyMem_FREE(tok->buf); PyMem_FREE(tok->buf);
@ -1247,8 +1243,13 @@ indenterror(struct tok_state *tok)
return 1; return 1;
} }
if (tok->altwarning) { if (tok->altwarning) {
PySys_WriteStderr("%s: inconsistent use of tabs and spaces " #ifdef PGEN
PySys_WriteStderr("inconsistent use of tabs and spaces "
"in indentation\n");
#else
PySys_FormatStderr("%U: inconsistent use of tabs and spaces "
"in indentation\n", tok->filename); "in indentation\n", tok->filename);
#endif
tok->altwarning = 0; tok->altwarning = 0;
} }
return 0; return 0;
@ -1718,6 +1719,11 @@ PyTokenizer_FindEncoding(int fd)
fclose(fp); fclose(fp);
return NULL; return NULL;
} }
#ifndef PGEN
tok->filename = PyUnicode_FromString("<string>");
if (tok->filename == NULL)
goto error;
#endif
while (tok->lineno < 2 && tok->done == E_OK) { while (tok->lineno < 2 && tok->done == E_OK) {
PyTokenizer_Get(tok, &p_start, &p_end); PyTokenizer_Get(tok, &p_start, &p_end);
} }
@ -1727,6 +1733,9 @@ PyTokenizer_FindEncoding(int fd)
if (encoding) if (encoding)
strcpy(encoding, tok->encoding); strcpy(encoding, tok->encoding);
} }
#ifndef PGEN
error:
#endif
PyTokenizer_Free(tok); PyTokenizer_Free(tok);
return encoding; return encoding;
} }

View File

@ -40,7 +40,13 @@ struct tok_state {
int level; /* () [] {} Parentheses nesting level */ int level; /* () [] {} Parentheses nesting level */
/* Used to allow free continuations inside them */ /* Used to allow free continuations inside them */
/* Stuff for checking on different tab sizes */ /* Stuff for checking on different tab sizes */
const char *filename; /* encoded to the filesystem encoding */ #ifndef PGEN
/* pgen doesn't have access to Python codecs, it cannot decode the input
filename. The bytes filename might be kept, but it is only used by
indenterror() and it is not really needed: pgen only compiles one file
(Grammar/Grammar). */
PyObject *filename;
#endif
int altwarning; /* Issue warning if alternate tabs don't match */ int altwarning; /* Issue warning if alternate tabs don't match */
int alterror; /* Issue error if alternate tabs don't match */ int alterror; /* Issue error if alternate tabs don't match */
int alttabsize; /* Alternate tab spacing */ int alttabsize; /* Alternate tab spacing */

View File

@ -62,6 +62,7 @@ static PyObject *run_mod(mod_ty, const char *, PyObject *, PyObject *,
static PyObject *run_pyc_file(FILE *, const char *, PyObject *, PyObject *, static PyObject *run_pyc_file(FILE *, const char *, PyObject *, PyObject *,
PyCompilerFlags *); PyCompilerFlags *);
static void err_input(perrdetail *); static void err_input(perrdetail *);
static void err_free(perrdetail *);
static void initsigs(void); static void initsigs(void);
static void call_py_exitfuncs(void); static void call_py_exitfuncs(void);
static void wait_for_thread_shutdown(void); static void wait_for_thread_shutdown(void);
@ -1887,12 +1888,13 @@ PyParser_ASTFromString(const char *s, const char *filename, int start,
flags->cf_flags |= iflags & PyCF_MASK; flags->cf_flags |= iflags & PyCF_MASK;
mod = PyAST_FromNode(n, flags, filename, arena); mod = PyAST_FromNode(n, flags, filename, arena);
PyNode_Free(n); PyNode_Free(n);
return mod;
} }
else { else {
err_input(&err); err_input(&err);
return NULL; mod = NULL;
} }
err_free(&err);
return mod;
} }
mod_ty mod_ty
@ -1917,14 +1919,15 @@ PyParser_ASTFromFile(FILE *fp, const char *filename, const char* enc,
flags->cf_flags |= iflags & PyCF_MASK; flags->cf_flags |= iflags & PyCF_MASK;
mod = PyAST_FromNode(n, flags, filename, arena); mod = PyAST_FromNode(n, flags, filename, arena);
PyNode_Free(n); PyNode_Free(n);
return mod;
} }
else { else {
err_input(&err); err_input(&err);
if (errcode) if (errcode)
*errcode = err.error; *errcode = err.error;
return NULL; mod = NULL;
} }
err_free(&err);
return mod;
} }
/* Simplified interface to parsefile -- return node or set exception */ /* Simplified interface to parsefile -- return node or set exception */
@ -1938,6 +1941,7 @@ PyParser_SimpleParseFileFlags(FILE *fp, const char *filename, int start, int fla
start, NULL, NULL, &err, flags); start, NULL, NULL, &err, flags);
if (n == NULL) if (n == NULL)
err_input(&err); err_input(&err);
err_free(&err);
return n; return n;
} }
@ -1952,6 +1956,7 @@ PyParser_SimpleParseStringFlags(const char *str, int start, int flags)
start, &err, flags); start, &err, flags);
if (n == NULL) if (n == NULL)
err_input(&err); err_input(&err);
err_free(&err);
return n; return n;
} }
@ -1964,6 +1969,7 @@ PyParser_SimpleParseStringFlagsFilename(const char *str, const char *filename,
&_PyParser_Grammar, start, &err, flags); &_PyParser_Grammar, start, &err, flags);
if (n == NULL) if (n == NULL)
err_input(&err); err_input(&err);
err_free(&err);
return n; return n;
} }
@ -1976,12 +1982,24 @@ PyParser_SimpleParseStringFilename(const char *str, const char *filename, int st
/* May want to move a more generalized form of this to parsetok.c or /* May want to move a more generalized form of this to parsetok.c or
even parser modules. */ even parser modules. */
void
PyParser_ClearError(perrdetail *err)
{
err_free(err);
}
void void
PyParser_SetError(perrdetail *err) PyParser_SetError(perrdetail *err)
{ {
err_input(err); err_input(err);
} }
static void
err_free(perrdetail *err)
{
Py_CLEAR(err->filename);
}
/* Set the error appropriate to the given input error code (see errcode.h) */ /* Set the error appropriate to the given input error code (see errcode.h) */
static void static void
@ -1989,7 +2007,6 @@ err_input(perrdetail *err)
{ {
PyObject *v, *w, *errtype, *errtext; PyObject *v, *w, *errtype, *errtext;
PyObject *msg_obj = NULL; PyObject *msg_obj = NULL;
PyObject *filename;
char *msg = NULL; char *msg = NULL;
errtype = PyExc_SyntaxError; errtype = PyExc_SyntaxError;
@ -2075,17 +2092,8 @@ err_input(perrdetail *err)
errtext = PyUnicode_DecodeUTF8(err->text, strlen(err->text), errtext = PyUnicode_DecodeUTF8(err->text, strlen(err->text),
"replace"); "replace");
} }
if (err->filename != NULL) v = Py_BuildValue("(OiiN)", err->filename,
filename = PyUnicode_DecodeFSDefault(err->filename); err->lineno, err->offset, errtext);
else {
Py_INCREF(Py_None);
filename = Py_None;
}
if (filename != NULL)
v = Py_BuildValue("(NiiN)", filename,
err->lineno, err->offset, errtext);
else
v = NULL;
if (v != NULL) { if (v != NULL) {
if (msg_obj) if (msg_obj)
w = Py_BuildValue("(OO)", msg_obj, v); w = Py_BuildValue("(OO)", msg_obj, v);