mirror of https://github.com/python/cpython
Patch #534304: Implement phase 1 of PEP 263.
This commit is contained in:
parent
a729daf2e4
commit
00f1e3f5a5
|
@ -7,11 +7,14 @@ chapter describes how the lexical analyzer breaks a file into tokens.
|
||||||
\index{parser}
|
\index{parser}
|
||||||
\index{token}
|
\index{token}
|
||||||
|
|
||||||
Python uses the 7-bit \ASCII{} character set for program text and string
|
Python uses the 7-bit \ASCII{} character set for program text.
|
||||||
literals. 8-bit characters may be used in string literals and comments
|
\versionadded[An encoding declaration can be used to indicate that
|
||||||
but their interpretation is platform dependent; the proper way to
|
string literals and comments use an encoding different from ASCII.]{2.3}
|
||||||
insert 8-bit characters in string literals is by using octal or
|
For compatibility with older versions, Python only warns if it finds
|
||||||
hexadecimal escape sequences.
|
8-bit characters; those warnings should be corrected by either declaring
|
||||||
|
an explicit encoding, or using escape sequences if those bytes are binary
|
||||||
|
data, instead of characters.
|
||||||
|
|
||||||
|
|
||||||
The run-time character set depends on the I/O devices connected to the
|
The run-time character set depends on the I/O devices connected to the
|
||||||
program but is generally a superset of \ASCII.
|
program but is generally a superset of \ASCII.
|
||||||
|
@ -69,6 +72,37 @@ Comments are ignored by the syntax; they are not tokens.
|
||||||
\index{hash character}
|
\index{hash character}
|
||||||
|
|
||||||
|
|
||||||
|
\subsection{Encoding declarations\label{encodings}}
|
||||||
|
|
||||||
|
If a comment in the first or second line of the Python script matches
|
||||||
|
the regular expression "coding[=:]\s*([\w-_.]+)", this comment is
|
||||||
|
processed as an encoding declaration; the first group of this
|
||||||
|
expression names the encoding of the source code file. The recommended
|
||||||
|
forms of this expression are
|
||||||
|
|
||||||
|
\begin{verbatim}
|
||||||
|
# -*- coding: <encoding-name> -*-
|
||||||
|
\end{verbatim}
|
||||||
|
|
||||||
|
which is recognized also by GNU Emacs, and
|
||||||
|
|
||||||
|
\begin{verbatim}
|
||||||
|
# vim:fileencoding=<encoding-name>
|
||||||
|
\end{verbatim}
|
||||||
|
|
||||||
|
which is recognized by Bram Moolenar's VIM. In addition, if the first
|
||||||
|
bytes of the file are the UTF-8 signature ($'\xef\xbb\xbf'$), the
|
||||||
|
declared file encoding is UTF-8 (this is supported, among others, by
|
||||||
|
Microsoft's notepad.exe).
|
||||||
|
|
||||||
|
If an encoding is declared, the encoding name must be recognized by
|
||||||
|
Python. % XXX there should be a list of supported encodings.
|
||||||
|
The encoding is used for all lexical analysis, in particular to find
|
||||||
|
the end of a string, and to interpret the contents of Unicode literals.
|
||||||
|
String literals are converted to Unicode for syntactical analysis,
|
||||||
|
then converted back to their original encoding before interpretation
|
||||||
|
starts.
|
||||||
|
|
||||||
\subsection{Explicit line joining\label{explicit-joining}}
|
\subsection{Explicit line joining\label{explicit-joining}}
|
||||||
|
|
||||||
Two or more physical lines may be joined into logical lines using
|
Two or more physical lines may be joined into logical lines using
|
||||||
|
|
|
@ -102,3 +102,6 @@ list_for: 'for' exprlist 'in' testlist_safe [list_iter]
|
||||||
list_if: 'if' test [list_iter]
|
list_if: 'if' test [list_iter]
|
||||||
|
|
||||||
testlist1: test (',' test)*
|
testlist1: test (',' test)*
|
||||||
|
|
||||||
|
# not used in grammar, but may appear in "node" passed from Parser to Compiler
|
||||||
|
encoding_decl: NAME
|
||||||
|
|
|
@ -25,6 +25,7 @@ extern "C" {
|
||||||
#define E_OVERFLOW 19 /* Node had too many children */
|
#define E_OVERFLOW 19 /* Node had too many children */
|
||||||
#define E_TOODEEP 20 /* Too many indentation levels */
|
#define E_TOODEEP 20 /* Too many indentation levels */
|
||||||
#define E_DEDENT 21 /* No matching outer block for dedent */
|
#define E_DEDENT 21 /* No matching outer block for dedent */
|
||||||
|
#define E_DECODE 22 /* Error in decoding into Unicode */
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
|
|
|
@ -65,3 +65,4 @@
|
||||||
#define list_for 320
|
#define list_for 320
|
||||||
#define list_if 321
|
#define list_if 321
|
||||||
#define testlist1 322
|
#define testlist1 322
|
||||||
|
#define encoding_decl 323
|
||||||
|
|
|
@ -190,15 +190,15 @@ POBJS= \
|
||||||
Parser/node.o \
|
Parser/node.o \
|
||||||
Parser/parser.o \
|
Parser/parser.o \
|
||||||
Parser/parsetok.o \
|
Parser/parsetok.o \
|
||||||
Parser/tokenizer.o \
|
|
||||||
Parser/bitset.o \
|
Parser/bitset.o \
|
||||||
Parser/metagrammar.o
|
Parser/metagrammar.o
|
||||||
|
|
||||||
PARSER_OBJS= $(POBJS) Parser/myreadline.o
|
PARSER_OBJS= $(POBJS) Parser/myreadline.o Parser/tokenizer.o
|
||||||
|
|
||||||
PGOBJS= \
|
PGOBJS= \
|
||||||
Objects/obmalloc.o \
|
Objects/obmalloc.o \
|
||||||
Python/mysnprintf.o \
|
Python/mysnprintf.o \
|
||||||
|
Parser/tokenizer_pgen.o \
|
||||||
Parser/firstsets.o \
|
Parser/firstsets.o \
|
||||||
Parser/grammar.o \
|
Parser/grammar.o \
|
||||||
Parser/pgen.o \
|
Parser/pgen.o \
|
||||||
|
@ -434,6 +434,8 @@ Parser/grammar.o: $(srcdir)/Parser/grammar.c \
|
||||||
$(srcdir)/Include/grammar.h
|
$(srcdir)/Include/grammar.h
|
||||||
Parser/metagrammar.o: $(srcdir)/Parser/metagrammar.c
|
Parser/metagrammar.o: $(srcdir)/Parser/metagrammar.c
|
||||||
|
|
||||||
|
Parser/tokenizer_pgen.o: $(srcdir)/Parser/tokenizer.c
|
||||||
|
|
||||||
|
|
||||||
Python/compile.o Python/symtable.o: $(GRAMMAR_H)
|
Python/compile.o Python/symtable.o: $(GRAMMAR_H)
|
||||||
|
|
||||||
|
|
|
@ -6,6 +6,8 @@ Type/class unification and new-style classes
|
||||||
|
|
||||||
Core and builtins
|
Core and builtins
|
||||||
|
|
||||||
|
- Encoding declarations (PEP 263, phase 1) have been implemented.
|
||||||
|
|
||||||
- list.sort() has a new implementation. While cross-platform results
|
- list.sort() has a new implementation. While cross-platform results
|
||||||
may vary, and in data-dependent ways, this is much faster on many
|
may vary, and in data-dependent ways, this is much faster on many
|
||||||
kinds of partially ordered lists than the previous implementation,
|
kinds of partially ordered lists than the previous implementation,
|
||||||
|
|
|
@ -8,6 +8,7 @@
|
||||||
#include "parser.h"
|
#include "parser.h"
|
||||||
#include "parsetok.h"
|
#include "parsetok.h"
|
||||||
#include "errcode.h"
|
#include "errcode.h"
|
||||||
|
#include "graminit.h"
|
||||||
|
|
||||||
int Py_TabcheckFlag;
|
int Py_TabcheckFlag;
|
||||||
|
|
||||||
|
@ -45,8 +46,8 @@ PyParser_ParseStringFlagsFilename(char *s, char *filename,
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
tok->filename = filename ? filename : "<string>";
|
||||||
if (Py_TabcheckFlag || Py_VerboseFlag) {
|
if (Py_TabcheckFlag || Py_VerboseFlag) {
|
||||||
tok->filename = filename ? filename : "<string>";
|
|
||||||
tok->altwarning = (tok->filename != NULL);
|
tok->altwarning = (tok->filename != NULL);
|
||||||
if (Py_TabcheckFlag >= 2)
|
if (Py_TabcheckFlag >= 2)
|
||||||
tok->alterror++;
|
tok->alterror++;
|
||||||
|
@ -78,8 +79,8 @@ PyParser_ParseFileFlags(FILE *fp, char *filename, grammar *g, int start,
|
||||||
err_ret->error = E_NOMEM;
|
err_ret->error = E_NOMEM;
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
tok->filename = filename;
|
||||||
if (Py_TabcheckFlag || Py_VerboseFlag) {
|
if (Py_TabcheckFlag || Py_VerboseFlag) {
|
||||||
tok->filename = filename;
|
|
||||||
tok->altwarning = (filename != NULL);
|
tok->altwarning = (filename != NULL);
|
||||||
if (Py_TabcheckFlag >= 2)
|
if (Py_TabcheckFlag >= 2)
|
||||||
tok->alterror++;
|
tok->alterror++;
|
||||||
|
@ -185,6 +186,13 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
|
||||||
err_ret->text[len] = '\0';
|
err_ret->text[len] = '\0';
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
} else if (tok->encoding != NULL) {
|
||||||
|
node* r = PyNode_New(encoding_decl);
|
||||||
|
r->n_str = tok->encoding;
|
||||||
|
r->n_nchildren = 1;
|
||||||
|
r->n_child = n;
|
||||||
|
tok->encoding = NULL;
|
||||||
|
n = r;
|
||||||
}
|
}
|
||||||
|
|
||||||
PyTokenizer_Free(tok);
|
PyTokenizer_Free(tok);
|
||||||
|
|
|
@ -5,10 +5,19 @@
|
||||||
#include "pgenheaders.h"
|
#include "pgenheaders.h"
|
||||||
|
|
||||||
#include <ctype.h>
|
#include <ctype.h>
|
||||||
|
#include <assert.h>
|
||||||
|
|
||||||
#include "tokenizer.h"
|
#include "tokenizer.h"
|
||||||
#include "errcode.h"
|
#include "errcode.h"
|
||||||
|
|
||||||
|
#ifndef PGEN
|
||||||
|
#include "unicodeobject.h"
|
||||||
|
#include "stringobject.h"
|
||||||
|
#include "fileobject.h"
|
||||||
|
#include "codecs.h"
|
||||||
|
#include "abstract.h"
|
||||||
|
#endif /* PGEN */
|
||||||
|
|
||||||
extern char *PyOS_Readline(char *);
|
extern char *PyOS_Readline(char *);
|
||||||
/* Return malloc'ed string including trailing \n;
|
/* Return malloc'ed string including trailing \n;
|
||||||
empty malloc'ed string for EOF;
|
empty malloc'ed string for EOF;
|
||||||
|
@ -114,9 +123,416 @@ tok_new(void)
|
||||||
tok->alterror = 0;
|
tok->alterror = 0;
|
||||||
tok->alttabsize = 1;
|
tok->alttabsize = 1;
|
||||||
tok->altindstack[0] = 0;
|
tok->altindstack[0] = 0;
|
||||||
|
tok->decoding_state = 0;
|
||||||
|
tok->decoding_erred = 0;
|
||||||
|
tok->read_coding_spec = 0;
|
||||||
|
tok->issued_encoding_warning = 0;
|
||||||
|
tok->encoding = NULL;
|
||||||
|
tok->decoding_readline = NULL;
|
||||||
|
tok->decoding_buffer = NULL;
|
||||||
return tok;
|
return tok;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef PGEN
|
||||||
|
|
||||||
|
static char *
|
||||||
|
decoding_fgets(char *s, int size, struct tok_state *tok)
|
||||||
|
{
|
||||||
|
return fgets(s, size, tok->fp);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
decoding_feof(struct tok_state *tok)
|
||||||
|
{
|
||||||
|
return feof(tok->fp);
|
||||||
|
}
|
||||||
|
|
||||||
|
static const char *
|
||||||
|
decode_str(const char *str, struct tok_state *tok)
|
||||||
|
{
|
||||||
|
return str;
|
||||||
|
}
|
||||||
|
|
||||||
|
#else /* PGEN */
|
||||||
|
|
||||||
|
static char *
|
||||||
|
error_ret(struct tok_state *tok) /* XXX */
|
||||||
|
{
|
||||||
|
tok->decoding_erred = 1;
|
||||||
|
if (tok->fp != NULL && tok->buf != NULL) /* see PyTokenizer_Free */
|
||||||
|
PyMem_DEL(tok->buf);
|
||||||
|
tok->buf = NULL;
|
||||||
|
return NULL; /* as if it were EOF */
|
||||||
|
}
|
||||||
|
|
||||||
|
static char *
|
||||||
|
new_string(const char *s, int len)
|
||||||
|
{
|
||||||
|
char* result = PyMem_NEW(char, len + 1);
|
||||||
|
if (result != NULL) {
|
||||||
|
memcpy(result, s, len);
|
||||||
|
result[len] = '\0';
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static char *
|
||||||
|
get_normal_name(char *s) /* for utf-8 and latin-1 */
|
||||||
|
{
|
||||||
|
char buf[13];
|
||||||
|
int i;
|
||||||
|
for (i = 0; i < 12; i++) {
|
||||||
|
int c = s[i];
|
||||||
|
if (c == '\0') break;
|
||||||
|
else if (c == '_') buf[i] = '-';
|
||||||
|
else buf[i] = tolower(c);
|
||||||
|
}
|
||||||
|
buf[i] = '\0';
|
||||||
|
if (strcmp(buf, "utf-8") == 0 ||
|
||||||
|
strncmp(buf, "utf-8-", 6) == 0) return "utf-8";
|
||||||
|
else if (strcmp(buf, "latin-1") == 0 ||
|
||||||
|
strcmp(buf, "iso-8859-1") == 0 ||
|
||||||
|
strcmp(buf, "iso-latin-1") == 0 ||
|
||||||
|
strncmp(buf, "latin-1-", 8) == 0 ||
|
||||||
|
strncmp(buf, "iso-8859-1-", 11) == 0 ||
|
||||||
|
strncmp(buf, "iso-latin-1-", 12) == 0) return "iso-8859-1";
|
||||||
|
else return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Return the coding spec in S, or NULL if none is found. */
|
||||||
|
|
||||||
|
static char *
|
||||||
|
get_coding_spec(const char *s, int size)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
for (i = 0; i < size - 6; i++) { /* XXX inefficient search */
|
||||||
|
const char* t = s + i;
|
||||||
|
if (strncmp(t, "coding", 6) == 0) {
|
||||||
|
const char* begin = NULL;
|
||||||
|
t += 6;
|
||||||
|
if (t[0] != ':' && t[0] != '=')
|
||||||
|
continue;
|
||||||
|
do {
|
||||||
|
t++;
|
||||||
|
} while (t[0] == '\x20' || t[0] == '\t');
|
||||||
|
|
||||||
|
begin = t;
|
||||||
|
while (isalnum(t[0]) || t[0] == '-' || t[0] == '_' ||
|
||||||
|
t[0] == '.')
|
||||||
|
t++;
|
||||||
|
|
||||||
|
if (begin < t) {
|
||||||
|
char* r = new_string(begin, t - begin);
|
||||||
|
char* q = get_normal_name(r);
|
||||||
|
if (r != q) {
|
||||||
|
assert(strlen(r) >= strlen(q));
|
||||||
|
strcpy(r, q);
|
||||||
|
}
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Check whether the line contains a coding spec. If it does,
|
||||||
|
invoke the set_readline function for the new encoding.
|
||||||
|
This function receives the tok_state and the new encoding.
|
||||||
|
Return 1 on success, 0 on failure. */
|
||||||
|
|
||||||
|
static int
|
||||||
|
check_coding_spec(const char* line, int size, struct tok_state *tok,
|
||||||
|
int set_readline(struct tok_state *, const char *))
|
||||||
|
{
|
||||||
|
int r = 1;
|
||||||
|
char* cs = get_coding_spec(line, size);
|
||||||
|
if (cs != NULL) {
|
||||||
|
tok->read_coding_spec = 1;
|
||||||
|
if (tok->encoding == NULL) {
|
||||||
|
assert(tok->decoding_state == 1); /* raw */
|
||||||
|
if (strcmp(cs, "utf-8") == 0 ||
|
||||||
|
strcmp(cs, "iso-8859-1") == 0) {
|
||||||
|
tok->encoding = cs;
|
||||||
|
} else {
|
||||||
|
r = set_readline(tok, cs);
|
||||||
|
if (r) {
|
||||||
|
tok->encoding = cs;
|
||||||
|
tok->decoding_state = -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else { /* then, compare cs with BOM */
|
||||||
|
r = (strcmp(tok->encoding, cs) == 0);
|
||||||
|
PyMem_DEL(cs);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* See whether the file starts with a BOM. If it does,
|
||||||
|
invoke the set_readline function with the new encoding.
|
||||||
|
Return 1 on success, 0 on failure. */
|
||||||
|
|
||||||
|
static int
|
||||||
|
check_bom(int get_char(struct tok_state *),
|
||||||
|
void unget_char(int, struct tok_state *),
|
||||||
|
int set_readline(struct tok_state *, const char *),
|
||||||
|
struct tok_state *tok)
|
||||||
|
{
|
||||||
|
int ch = get_char(tok);
|
||||||
|
tok->decoding_state = 1;
|
||||||
|
if (ch == EOF) {
|
||||||
|
return 1;
|
||||||
|
} else if (ch == 0xEF) {
|
||||||
|
ch = get_char(tok); if (ch != 0xBB) goto NON_BOM;
|
||||||
|
ch = get_char(tok); if (ch != 0xBF) goto NON_BOM;
|
||||||
|
#if 0
|
||||||
|
/* Disable support for UTF-16 BOMs until a decision
|
||||||
|
is made whether this needs to be supported. */
|
||||||
|
} else if (ch == 0xFE) {
|
||||||
|
ch = get_char(tok); if (ch != 0xFF) goto NON_BOM;
|
||||||
|
if (!set_readline(tok, "utf-16-be")) return 0;
|
||||||
|
tok->decoding_state = -1;
|
||||||
|
} else if (ch == 0xFF) {
|
||||||
|
ch = get_char(tok); if (ch != 0xFE) goto NON_BOM;
|
||||||
|
if (!set_readline(tok, "utf-16-le")) return 0;
|
||||||
|
tok->decoding_state = -1;
|
||||||
|
#endif
|
||||||
|
} else {
|
||||||
|
unget_char(ch, tok);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
tok->encoding = new_string("utf-8", 5); /* resulting is in utf-8 */
|
||||||
|
return 1;
|
||||||
|
NON_BOM:
|
||||||
|
/* any token beginning with '\xEF', '\xFE', '\xFF' is a bad token */
|
||||||
|
unget_char(0xFF, tok); /* XXX this will cause a syntax error */
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Read a line of text from TOK into S, using the stream in TOK.
|
||||||
|
Return NULL on failure, else S. */
|
||||||
|
|
||||||
|
static char *
|
||||||
|
fp_readl(char *s, int size, struct tok_state *tok)
|
||||||
|
{
|
||||||
|
PyObject* utf8;
|
||||||
|
PyObject* buf = tok->decoding_buffer;
|
||||||
|
if (buf == NULL) {
|
||||||
|
buf = PyObject_CallObject(tok->decoding_readline, NULL);
|
||||||
|
if (buf == NULL) return error_ret(tok);
|
||||||
|
} else {
|
||||||
|
tok->decoding_buffer = NULL;
|
||||||
|
}
|
||||||
|
utf8 = PyUnicode_AsUTF8String(buf);
|
||||||
|
Py_DECREF(buf);
|
||||||
|
if (utf8 == NULL) return error_ret(tok);
|
||||||
|
else {
|
||||||
|
const char* str = PyString_AsString(utf8);
|
||||||
|
assert(strlen(str) < size); /* XXX */
|
||||||
|
strcpy(s, str);
|
||||||
|
Py_DECREF(utf8);
|
||||||
|
if (s[0] == '\0') return NULL; /* EOF */
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Set the readline function for TOK to a StreamReader's
|
||||||
|
readline function. The StreamReader is named ENC.
|
||||||
|
|
||||||
|
This function is called from check_bom and check_coding_spec.
|
||||||
|
|
||||||
|
ENC is usually identical to the future value of tok->encoding,
|
||||||
|
except for the (currently unsupported) case of UTF-16.
|
||||||
|
|
||||||
|
Return 1 on success, 0 on failure. */
|
||||||
|
|
||||||
|
static int
|
||||||
|
fp_setreadl(struct tok_state *tok, const char* enc)
|
||||||
|
{
|
||||||
|
PyObject *reader, *stream, *readline;
|
||||||
|
|
||||||
|
stream = PyFile_FromFile(tok->fp, tok->filename, "rb", NULL);
|
||||||
|
if (stream == NULL) return 0;
|
||||||
|
|
||||||
|
reader = PyCodec_StreamReader(enc, stream, NULL);
|
||||||
|
Py_DECREF(stream);
|
||||||
|
if (reader == NULL) return 0;
|
||||||
|
|
||||||
|
readline = PyObject_GetAttrString(reader, "readline");
|
||||||
|
Py_DECREF(reader);
|
||||||
|
if (readline == NULL) return 0;
|
||||||
|
|
||||||
|
tok->decoding_readline = readline;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Fetch the next byte from TOK. */
|
||||||
|
|
||||||
|
static int fp_getc(struct tok_state *tok) {
|
||||||
|
return getc(tok->fp);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Unfetch the last byte back into TOK. */
|
||||||
|
|
||||||
|
static void fp_ungetc(int c, struct tok_state *tok) {
|
||||||
|
ungetc(c, tok->fp);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Read a line of input from TOK. Determine encoding
|
||||||
|
if necessary. */
|
||||||
|
|
||||||
|
static char *
|
||||||
|
decoding_fgets(char *s, int size, struct tok_state *tok)
|
||||||
|
{
|
||||||
|
char *line;
|
||||||
|
int warn = 0, badchar = 0;
|
||||||
|
for (;;)
|
||||||
|
if (tok->decoding_state < 0) {
|
||||||
|
/* We already have a codec associated with
|
||||||
|
this input. */
|
||||||
|
line = fp_readl(s, size, tok);
|
||||||
|
break;
|
||||||
|
} else if (tok->decoding_state > 0) {
|
||||||
|
/* We want a 'raw' read. */
|
||||||
|
line = Py_UniversalNewlineFgets(s, size,
|
||||||
|
tok->fp, NULL);
|
||||||
|
warn = 1;
|
||||||
|
break;
|
||||||
|
} else {
|
||||||
|
/* We have not yet determined the encoding.
|
||||||
|
If an encoding is found, use the file-pointer
|
||||||
|
reader functions from now on. */
|
||||||
|
if (!check_bom(fp_getc, fp_ungetc, fp_setreadl, tok))
|
||||||
|
return error_ret(tok);
|
||||||
|
assert(tok->decoding_state != 0);
|
||||||
|
}
|
||||||
|
if (line != NULL && tok->lineno < 2 && !tok->read_coding_spec) {
|
||||||
|
if (!check_coding_spec(line, strlen(line), tok, fp_setreadl)) {
|
||||||
|
return error_ret(tok);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#ifndef PGEN
|
||||||
|
if (warn && line && !tok->issued_encoding_warning && !tok->encoding) {
|
||||||
|
unsigned char *c;
|
||||||
|
for (c = line; *c; c++)
|
||||||
|
if (*c > 127) {
|
||||||
|
badchar = *c;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (badchar) {
|
||||||
|
char buf[200];
|
||||||
|
sprintf(buf, "Non-ASCII character '\\x%.2x', "
|
||||||
|
"but no declared encoding", badchar);
|
||||||
|
PyErr_WarnExplicit(PyExc_DeprecationWarning,
|
||||||
|
buf, tok->filename, tok->lineno,
|
||||||
|
NULL, NULL);
|
||||||
|
tok->issued_encoding_warning = 1;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
return line;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
decoding_feof(struct tok_state *tok)
|
||||||
|
{
|
||||||
|
if (tok->decoding_state >= 0) {
|
||||||
|
return feof(tok->fp);
|
||||||
|
} else {
|
||||||
|
PyObject* buf = tok->decoding_buffer;
|
||||||
|
if (buf == NULL) {
|
||||||
|
buf = PyObject_CallObject(tok->decoding_readline, NULL);
|
||||||
|
if (buf == NULL) {
|
||||||
|
error_ret(tok);
|
||||||
|
return 1;
|
||||||
|
} else {
|
||||||
|
tok->decoding_buffer = buf;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return PyObject_Length(buf) == 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Fetch a byte from TOK, using the string buffer. */
|
||||||
|
|
||||||
|
static int buf_getc(struct tok_state *tok) {
|
||||||
|
return *tok->str++;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Unfetch a byte from TOK, using the string buffer. */
|
||||||
|
|
||||||
|
static void buf_ungetc(int c, struct tok_state *tok) {
|
||||||
|
tok->str--;
|
||||||
|
assert(*tok->str == c); /* tok->cur may point to read-only segment */
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Set the readline function for TOK to ENC. For the string-based
|
||||||
|
tokenizer, this means to just record the encoding. */
|
||||||
|
|
||||||
|
static int buf_setreadl(struct tok_state *tok, const char* enc) {
|
||||||
|
tok->enc = enc;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Return a UTF-8 encoding Python string object from the
|
||||||
|
C byte string STR, which is encoded with ENC. */
|
||||||
|
|
||||||
|
static PyObject *
|
||||||
|
translate_into_utf8(const char* str, const char* enc) {
|
||||||
|
PyObject *utf8;
|
||||||
|
PyObject* buf = PyUnicode_Decode(str, strlen(str), enc, NULL);
|
||||||
|
if (buf == NULL)
|
||||||
|
return NULL;
|
||||||
|
utf8 = PyUnicode_AsUTF8String(buf);
|
||||||
|
Py_DECREF(buf);
|
||||||
|
return utf8;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Decode a byte string STR for use as the buffer of TOK.
|
||||||
|
Look for encoding declarations inside STR, and record them
|
||||||
|
inside TOK. */
|
||||||
|
|
||||||
|
static const char *
|
||||||
|
decode_str(const char *str, struct tok_state *tok)
|
||||||
|
{
|
||||||
|
PyObject* utf8 = NULL;
|
||||||
|
const char *s;
|
||||||
|
int lineno = 0;
|
||||||
|
tok->enc = NULL;
|
||||||
|
tok->str = str;
|
||||||
|
if (!check_bom(buf_getc, buf_ungetc, buf_setreadl, tok))
|
||||||
|
return NULL;
|
||||||
|
str = tok->str; /* string after BOM if any */
|
||||||
|
assert(r);
|
||||||
|
if (tok->enc != NULL) {
|
||||||
|
utf8 = translate_into_utf8(str, tok->enc);
|
||||||
|
if (utf8 == NULL)
|
||||||
|
return NULL;
|
||||||
|
str = PyString_AsString(utf8);
|
||||||
|
}
|
||||||
|
for (s = str;; s++) {
|
||||||
|
if (*s == '\0') break;
|
||||||
|
else if (*s == '\n') {
|
||||||
|
lineno++;
|
||||||
|
if (lineno == 2) break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
tok->enc = NULL;
|
||||||
|
if (!check_coding_spec(str, s - str, tok, buf_setreadl))
|
||||||
|
return NULL;
|
||||||
|
if (tok->enc != NULL) {
|
||||||
|
assert(utf8 == NULL);
|
||||||
|
utf8 = translate_into_utf8(str, tok->enc);
|
||||||
|
if (utf8 == NULL)
|
||||||
|
return NULL;
|
||||||
|
str = PyString_AsString(utf8);
|
||||||
|
}
|
||||||
|
assert(tok->decoding_buffer == NULL);
|
||||||
|
tok->decoding_buffer = utf8; /* CAUTION */
|
||||||
|
return str;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* PGEN */
|
||||||
|
|
||||||
/* Set up tokenizer for string */
|
/* Set up tokenizer for string */
|
||||||
|
|
||||||
|
@ -126,6 +542,9 @@ PyTokenizer_FromString(char *str)
|
||||||
struct tok_state *tok = tok_new();
|
struct tok_state *tok = tok_new();
|
||||||
if (tok == NULL)
|
if (tok == NULL)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
str = (char *)decode_str(str, tok);
|
||||||
|
if (str == NULL)
|
||||||
|
return NULL;
|
||||||
tok->buf = tok->cur = tok->end = tok->inp = str;
|
tok->buf = tok->cur = tok->end = tok->inp = str;
|
||||||
return tok;
|
return tok;
|
||||||
}
|
}
|
||||||
|
@ -157,6 +576,10 @@ PyTokenizer_FromFile(FILE *fp, char *ps1, char *ps2)
|
||||||
void
|
void
|
||||||
PyTokenizer_Free(struct tok_state *tok)
|
PyTokenizer_Free(struct tok_state *tok)
|
||||||
{
|
{
|
||||||
|
if (tok->encoding != NULL)
|
||||||
|
PyMem_DEL(tok->encoding);
|
||||||
|
Py_XDECREF(tok->decoding_readline);
|
||||||
|
Py_XDECREF(tok->decoding_buffer);
|
||||||
if (tok->fp != NULL && tok->buf != NULL)
|
if (tok->fp != NULL && tok->buf != NULL)
|
||||||
PyMem_DEL(tok->buf);
|
PyMem_DEL(tok->buf);
|
||||||
PyMem_DEL(tok);
|
PyMem_DEL(tok);
|
||||||
|
@ -246,8 +669,8 @@ tok_nextc(register struct tok_state *tok)
|
||||||
}
|
}
|
||||||
tok->end = tok->buf + BUFSIZ;
|
tok->end = tok->buf + BUFSIZ;
|
||||||
}
|
}
|
||||||
if (Py_UniversalNewlineFgets(tok->buf, (int)(tok->end - tok->buf),
|
if (decoding_fgets(tok->buf, (int)(tok->end - tok->buf),
|
||||||
tok->fp, NULL) == NULL) {
|
tok) == NULL) {
|
||||||
tok->done = E_EOF;
|
tok->done = E_EOF;
|
||||||
done = 1;
|
done = 1;
|
||||||
}
|
}
|
||||||
|
@ -259,7 +682,7 @@ tok_nextc(register struct tok_state *tok)
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
cur = tok->cur - tok->buf;
|
cur = tok->cur - tok->buf;
|
||||||
if (feof(tok->fp)) {
|
if (decoding_feof(tok)) {
|
||||||
tok->done = E_EOF;
|
tok->done = E_EOF;
|
||||||
done = 1;
|
done = 1;
|
||||||
}
|
}
|
||||||
|
@ -285,9 +708,9 @@ tok_nextc(register struct tok_state *tok)
|
||||||
tok->end = tok->buf + newsize;
|
tok->end = tok->buf + newsize;
|
||||||
tok->start = curstart < 0 ? NULL :
|
tok->start = curstart < 0 ? NULL :
|
||||||
tok->buf + curstart;
|
tok->buf + curstart;
|
||||||
if (Py_UniversalNewlineFgets(tok->inp,
|
if (decoding_fgets(tok->inp,
|
||||||
(int)(tok->end - tok->inp),
|
(int)(tok->end - tok->inp),
|
||||||
tok->fp, NULL) == NULL) {
|
tok) == NULL) {
|
||||||
/* Last line does not end in \n,
|
/* Last line does not end in \n,
|
||||||
fake one */
|
fake one */
|
||||||
strcpy(tok->inp, "\n");
|
strcpy(tok->inp, "\n");
|
||||||
|
@ -506,9 +929,8 @@ indenterror(struct tok_state *tok)
|
||||||
|
|
||||||
/* Get next token, after space stripping etc. */
|
/* Get next token, after space stripping etc. */
|
||||||
|
|
||||||
int
|
static int
|
||||||
PyTokenizer_Get(register struct tok_state *tok, char **p_start,
|
tok_get(register struct tok_state *tok, char **p_start, char **p_end)
|
||||||
char **p_end)
|
|
||||||
{
|
{
|
||||||
register int c;
|
register int c;
|
||||||
int blankline;
|
int blankline;
|
||||||
|
@ -915,6 +1337,16 @@ PyTokenizer_Get(register struct tok_state *tok, char **p_start,
|
||||||
return PyToken_OneChar(c);
|
return PyToken_OneChar(c);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
PyTokenizer_Get(struct tok_state *tok, char **p_start, char **p_end)
|
||||||
|
{
|
||||||
|
int result = tok_get(tok, p_start, p_end);
|
||||||
|
if (tok->decoding_erred) {
|
||||||
|
result = ERRORTOKEN;
|
||||||
|
tok->done = E_DECODE;
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef Py_DEBUG
|
#ifdef Py_DEBUG
|
||||||
|
|
||||||
|
|
|
@ -4,6 +4,7 @@
|
||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#include "object.h"
|
||||||
|
|
||||||
/* Tokenizer interface */
|
/* Tokenizer interface */
|
||||||
|
|
||||||
|
@ -38,6 +39,16 @@ struct tok_state {
|
||||||
int alterror; /* Issue error if alternate tabs don't match */
|
int alterror; /* Issue error if alternate tabs don't match */
|
||||||
int alttabsize; /* Alternate tab spacing */
|
int alttabsize; /* Alternate tab spacing */
|
||||||
int altindstack[MAXINDENT]; /* Stack of alternate indents */
|
int altindstack[MAXINDENT]; /* Stack of alternate indents */
|
||||||
|
/* Stuff for PEP 0263 */
|
||||||
|
int decoding_state; /* -1:decoding, 0:init, 1:raw */
|
||||||
|
int decoding_erred; /* whether erred in decoding */
|
||||||
|
int read_coding_spec; /* whether 'coding:...' has been read */
|
||||||
|
int issued_encoding_warning; /* whether non-ASCII warning was issued */
|
||||||
|
char *encoding;
|
||||||
|
PyObject *decoding_readline; /* codecs.open(...).readline */
|
||||||
|
PyObject *decoding_buffer;
|
||||||
|
const char* enc;
|
||||||
|
const char* str;
|
||||||
};
|
};
|
||||||
|
|
||||||
extern struct tok_state *PyTokenizer_FromString(char *);
|
extern struct tok_state *PyTokenizer_FromString(char *);
|
||||||
|
|
|
@ -0,0 +1,2 @@
|
||||||
|
#define PGEN
|
||||||
|
#include "tokenizer.c"
|
123
Python/compile.c
123
Python/compile.c
|
@ -485,6 +485,7 @@ struct compiling {
|
||||||
int c_closure; /* Is nested w/freevars? */
|
int c_closure; /* Is nested w/freevars? */
|
||||||
struct symtable *c_symtable; /* pointer to module symbol table */
|
struct symtable *c_symtable; /* pointer to module symbol table */
|
||||||
PyFutureFeatures *c_future; /* pointer to module's __future__ */
|
PyFutureFeatures *c_future; /* pointer to module's __future__ */
|
||||||
|
char *c_encoding; /* source encoding (a borrowed reference) */
|
||||||
};
|
};
|
||||||
|
|
||||||
static int
|
static int
|
||||||
|
@ -1181,6 +1182,23 @@ parsenumber(struct compiling *co, char *s)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static PyObject *
|
||||||
|
decode_utf8(char **sPtr, char *end, char* encoding)
|
||||||
|
{
|
||||||
|
PyObject *u, *v;
|
||||||
|
char *s, *t;
|
||||||
|
t = s = *sPtr;
|
||||||
|
/* while (s < end && *s != '\\') s++; */ /* inefficient for u".." */
|
||||||
|
while (s < end && (*s & 0x80)) s++;
|
||||||
|
*sPtr = s;
|
||||||
|
u = PyUnicode_DecodeUTF8(t, s - t, NULL);
|
||||||
|
if (u == NULL)
|
||||||
|
return NULL;
|
||||||
|
v = PyUnicode_AsEncodedString(u, encoding, NULL);
|
||||||
|
Py_DECREF(u);
|
||||||
|
return v;
|
||||||
|
}
|
||||||
|
|
||||||
static PyObject *
|
static PyObject *
|
||||||
parsestr(struct compiling *com, char *s)
|
parsestr(struct compiling *com, char *s)
|
||||||
{
|
{
|
||||||
|
@ -1193,6 +1211,8 @@ parsestr(struct compiling *com, char *s)
|
||||||
int first = *s;
|
int first = *s;
|
||||||
int quote = first;
|
int quote = first;
|
||||||
int rawmode = 0;
|
int rawmode = 0;
|
||||||
|
char* encoding = ((com == NULL) ? NULL : com->c_encoding);
|
||||||
|
int need_encoding;
|
||||||
int unicode = 0;
|
int unicode = 0;
|
||||||
|
|
||||||
if (isalpha(quote) || quote == '_') {
|
if (isalpha(quote) || quote == '_') {
|
||||||
|
@ -1230,28 +1250,101 @@ parsestr(struct compiling *com, char *s)
|
||||||
}
|
}
|
||||||
#ifdef Py_USING_UNICODE
|
#ifdef Py_USING_UNICODE
|
||||||
if (unicode || Py_UnicodeFlag) {
|
if (unicode || Py_UnicodeFlag) {
|
||||||
|
PyObject *u, *w;
|
||||||
|
if (encoding == NULL) {
|
||||||
|
buf = s;
|
||||||
|
u = NULL;
|
||||||
|
} else if (strcmp(encoding, "iso-8859-1") == 0) {
|
||||||
|
buf = s;
|
||||||
|
u = NULL;
|
||||||
|
} else {
|
||||||
|
/* "\XX" may become "\u005c\uHHLL" (12 bytes) */
|
||||||
|
u = PyString_FromStringAndSize((char *)NULL, len * 4);
|
||||||
|
if (u == NULL)
|
||||||
|
return NULL;
|
||||||
|
p = buf = PyString_AsString(u);
|
||||||
|
end = s + len;
|
||||||
|
while (s < end) {
|
||||||
|
if (*s == '\\') {
|
||||||
|
*p++ = *s++;
|
||||||
|
if (*s & 0x80) {
|
||||||
|
strcpy(p, "u005c");
|
||||||
|
p += 5;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (*s & 0x80) { /* XXX inefficient */
|
||||||
|
char *r;
|
||||||
|
int rn, i;
|
||||||
|
w = decode_utf8(&s, end, "utf-16-be");
|
||||||
|
if (w == NULL) {
|
||||||
|
Py_DECREF(u);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
r = PyString_AsString(w);
|
||||||
|
rn = PyString_Size(w);
|
||||||
|
assert(rn % 2 == 0);
|
||||||
|
for (i = 0; i < rn; i += 2) {
|
||||||
|
sprintf(p, "\\u%02x%02x",
|
||||||
|
r[i + 0] & 0xFF,
|
||||||
|
r[i + 1] & 0xFF);
|
||||||
|
p += 6;
|
||||||
|
}
|
||||||
|
Py_DECREF(w);
|
||||||
|
} else {
|
||||||
|
*p++ = *s++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
len = p - buf;
|
||||||
|
}
|
||||||
if (rawmode)
|
if (rawmode)
|
||||||
v = PyUnicode_DecodeRawUnicodeEscape(
|
v = PyUnicode_DecodeRawUnicodeEscape(buf, len, NULL);
|
||||||
s, len, NULL);
|
|
||||||
else
|
else
|
||||||
v = PyUnicode_DecodeUnicodeEscape(
|
v = PyUnicode_DecodeUnicodeEscape(buf, len, NULL);
|
||||||
s, len, NULL);
|
Py_XDECREF(u);
|
||||||
if (v == NULL)
|
if (v == NULL)
|
||||||
PyErr_SyntaxLocation(com->c_filename, com->c_lineno);
|
PyErr_SyntaxLocation(com->c_filename, com->c_lineno);
|
||||||
return v;
|
return v;
|
||||||
|
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
if (rawmode || strchr(s, '\\') == NULL)
|
need_encoding = (encoding != NULL &&
|
||||||
return PyString_FromStringAndSize(s, len);
|
strcmp(encoding, "utf-8") != 0 &&
|
||||||
v = PyString_FromStringAndSize((char *)NULL, len);
|
strcmp(encoding, "iso-8859-1") != 0);
|
||||||
|
if (rawmode || strchr(s, '\\') == NULL) {
|
||||||
|
if (need_encoding) {
|
||||||
|
PyObject* u = PyUnicode_DecodeUTF8(s, len, NULL);
|
||||||
|
if (u == NULL)
|
||||||
|
return NULL;
|
||||||
|
v = PyUnicode_AsEncodedString(u, encoding, NULL);
|
||||||
|
Py_DECREF(u);
|
||||||
|
return v;
|
||||||
|
} else {
|
||||||
|
return PyString_FromStringAndSize(s, len);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
v = PyString_FromStringAndSize((char *)NULL, /* XXX 4 is enough? */
|
||||||
|
need_encoding ? len * 4 : len);
|
||||||
if (v == NULL)
|
if (v == NULL)
|
||||||
return NULL;
|
return NULL;
|
||||||
p = buf = PyString_AsString(v);
|
p = buf = PyString_AsString(v);
|
||||||
end = s + len;
|
end = s + len;
|
||||||
while (s < end) {
|
while (s < end) {
|
||||||
if (*s != '\\') {
|
if (*s != '\\') {
|
||||||
*p++ = *s++;
|
ORDINAL:
|
||||||
|
if (need_encoding && (*s & 0x80)) {
|
||||||
|
char *r;
|
||||||
|
int rn;
|
||||||
|
PyObject* w = decode_utf8(&s, end, encoding);
|
||||||
|
if (w == NULL)
|
||||||
|
return NULL;
|
||||||
|
r = PyString_AsString(w);
|
||||||
|
rn = PyString_Size(w);
|
||||||
|
memcpy(p, r, rn);
|
||||||
|
p += rn;
|
||||||
|
Py_DECREF(w);
|
||||||
|
} else {
|
||||||
|
*p++ = *s++;
|
||||||
|
}
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
s++;
|
s++;
|
||||||
|
@ -1320,8 +1413,8 @@ parsestr(struct compiling *com, char *s)
|
||||||
#endif
|
#endif
|
||||||
default:
|
default:
|
||||||
*p++ = '\\';
|
*p++ = '\\';
|
||||||
*p++ = s[-1];
|
s--;
|
||||||
break;
|
goto ORDINAL;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
_PyString_Resize(&v, (int)(p - buf));
|
_PyString_Resize(&v, (int)(p - buf));
|
||||||
|
@ -4149,6 +4242,12 @@ jcompile(node *n, char *filename, struct compiling *base,
|
||||||
PyCodeObject *co;
|
PyCodeObject *co;
|
||||||
if (!com_init(&sc, filename))
|
if (!com_init(&sc, filename))
|
||||||
return NULL;
|
return NULL;
|
||||||
|
if (TYPE(n) == encoding_decl) {
|
||||||
|
sc.c_encoding = STR(n);
|
||||||
|
n = CHILD(n, 0);
|
||||||
|
} else {
|
||||||
|
sc.c_encoding = NULL;
|
||||||
|
}
|
||||||
if (base) {
|
if (base) {
|
||||||
sc.c_private = base->c_private;
|
sc.c_private = base->c_private;
|
||||||
sc.c_symtable = base->c_symtable;
|
sc.c_symtable = base->c_symtable;
|
||||||
|
@ -4157,6 +4256,10 @@ jcompile(node *n, char *filename, struct compiling *base,
|
||||||
|| (sc.c_symtable->st_cur->ste_type == TYPE_FUNCTION))
|
|| (sc.c_symtable->st_cur->ste_type == TYPE_FUNCTION))
|
||||||
sc.c_nested = 1;
|
sc.c_nested = 1;
|
||||||
sc.c_flags |= base->c_flags & PyCF_MASK;
|
sc.c_flags |= base->c_flags & PyCF_MASK;
|
||||||
|
if (base->c_encoding != NULL) {
|
||||||
|
assert(sc.c_encoding == NULL);
|
||||||
|
sc.c_encoding = base->c_encoding;
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
sc.c_private = NULL;
|
sc.c_private = NULL;
|
||||||
sc.c_future = PyNode_Future(n, filename);
|
sc.c_future = PyNode_Future(n, filename);
|
||||||
|
|
|
@ -1463,7 +1463,17 @@ static state states_66[2] = {
|
||||||
{1, arcs_66_0},
|
{1, arcs_66_0},
|
||||||
{2, arcs_66_1},
|
{2, arcs_66_1},
|
||||||
};
|
};
|
||||||
static dfa dfas[67] = {
|
static arc arcs_67_0[1] = {
|
||||||
|
{12, 1},
|
||||||
|
};
|
||||||
|
static arc arcs_67_1[1] = {
|
||||||
|
{0, 1},
|
||||||
|
};
|
||||||
|
static state states_67[2] = {
|
||||||
|
{1, arcs_67_0},
|
||||||
|
{1, arcs_67_1},
|
||||||
|
};
|
||||||
|
static dfa dfas[68] = {
|
||||||
{256, "single_input", 0, 3, states_0,
|
{256, "single_input", 0, 3, states_0,
|
||||||
"\004\030\001\000\000\000\124\360\213\011\162\000\002\000\140\210\244\005\001"},
|
"\004\030\001\000\000\000\124\360\213\011\162\000\002\000\140\210\244\005\001"},
|
||||||
{257, "file_input", 0, 2, states_1,
|
{257, "file_input", 0, 2, states_1,
|
||||||
|
@ -1598,8 +1608,10 @@ static dfa dfas[67] = {
|
||||||
"\000\000\000\000\000\000\000\000\000\000\002\000\000\000\000\000\000\000\000"},
|
"\000\000\000\000\000\000\000\000\000\000\002\000\000\000\000\000\000\000\000"},
|
||||||
{322, "testlist1", 0, 2, states_66,
|
{322, "testlist1", 0, 2, states_66,
|
||||||
"\000\020\001\000\000\000\000\000\000\000\000\000\002\000\140\210\244\005\000"},
|
"\000\020\001\000\000\000\000\000\000\000\000\000\002\000\140\210\244\005\000"},
|
||||||
|
{323, "encoding_decl", 0, 2, states_67,
|
||||||
|
"\000\020\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000"},
|
||||||
};
|
};
|
||||||
static label labels[148] = {
|
static label labels[149] = {
|
||||||
{0, "EMPTY"},
|
{0, "EMPTY"},
|
||||||
{256, 0},
|
{256, 0},
|
||||||
{4, 0},
|
{4, 0},
|
||||||
|
@ -1748,10 +1760,11 @@ static label labels[148] = {
|
||||||
{318, 0},
|
{318, 0},
|
||||||
{319, 0},
|
{319, 0},
|
||||||
{321, 0},
|
{321, 0},
|
||||||
|
{323, 0},
|
||||||
};
|
};
|
||||||
grammar _PyParser_Grammar = {
|
grammar _PyParser_Grammar = {
|
||||||
67,
|
68,
|
||||||
dfas,
|
dfas,
|
||||||
{148, labels},
|
{149, labels},
|
||||||
256
|
256
|
||||||
};
|
};
|
||||||
|
|
|
@ -1221,6 +1221,7 @@ static void
|
||||||
err_input(perrdetail *err)
|
err_input(perrdetail *err)
|
||||||
{
|
{
|
||||||
PyObject *v, *w, *errtype;
|
PyObject *v, *w, *errtype;
|
||||||
|
PyObject* u = NULL;
|
||||||
char *msg = NULL;
|
char *msg = NULL;
|
||||||
errtype = PyExc_SyntaxError;
|
errtype = PyExc_SyntaxError;
|
||||||
v = Py_BuildValue("(ziiz)", err->filename,
|
v = Py_BuildValue("(ziiz)", err->filename,
|
||||||
|
@ -1272,12 +1273,24 @@ err_input(perrdetail *err)
|
||||||
errtype = PyExc_IndentationError;
|
errtype = PyExc_IndentationError;
|
||||||
msg = "too many levels of indentation";
|
msg = "too many levels of indentation";
|
||||||
break;
|
break;
|
||||||
|
case E_DECODE: { /* XXX */
|
||||||
|
PyThreadState* tstate = PyThreadState_Get();
|
||||||
|
PyObject* value = tstate->curexc_value;
|
||||||
|
if (value != NULL) {
|
||||||
|
u = PyObject_Repr(value);
|
||||||
|
if (u != NULL) {
|
||||||
|
msg = PyString_AsString(u);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
default:
|
default:
|
||||||
fprintf(stderr, "error=%d\n", err->error);
|
fprintf(stderr, "error=%d\n", err->error);
|
||||||
msg = "unknown parsing error";
|
msg = "unknown parsing error";
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
w = Py_BuildValue("(sO)", msg, v);
|
w = Py_BuildValue("(sO)", msg, v);
|
||||||
|
Py_XDECREF(u);
|
||||||
Py_XDECREF(v);
|
Py_XDECREF(v);
|
||||||
PyErr_SetObject(errtype, w);
|
PyErr_SetObject(errtype, w);
|
||||||
Py_XDECREF(w);
|
Py_XDECREF(w);
|
||||||
|
|
Loading…
Reference in New Issue