closes bpo-39721: Fix constness of members of tok_state struct. (GH-18600)
The function PyTokenizer_FromUTF8 from Parser/tokenizer.c had a comment: /* XXX: constify members. */ This patch addresses that. In the tok_state struct: * end and start were non-const but could be made const * str and input were const but should have been non-const Changes to support this include: * decode_str() now returns a char * since it is allocated. * PyTokenizer_FromString() and PyTokenizer_FromUTF8() each creates a new char * for an allocate string instead of reusing the input const char *. * PyTokenizer_Get() and tok_get() now take const char ** arguments. * Various local vars are const or non-const accordingly. I was able to remove five casts that cast away constness.
This commit is contained in:
parent
766b7546a5
commit
384f3c536d
|
@ -240,7 +240,7 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
for (;;) {
|
for (;;) {
|
||||||
char *a, *b;
|
const char *a, *b;
|
||||||
int type;
|
int type;
|
||||||
size_t len;
|
size_t len;
|
||||||
char *str;
|
char *str;
|
||||||
|
@ -371,7 +371,7 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
|
||||||
buffer after parsing. Trailing whitespace and comments
|
buffer after parsing. Trailing whitespace and comments
|
||||||
are OK. */
|
are OK. */
|
||||||
if (err_ret->error == E_DONE && start == single_input) {
|
if (err_ret->error == E_DONE && start == single_input) {
|
||||||
char *cur = tok->cur;
|
const char *cur = tok->cur;
|
||||||
char c = *tok->cur;
|
char c = *tok->cur;
|
||||||
|
|
||||||
for (;;) {
|
for (;;) {
|
||||||
|
|
|
@ -59,7 +59,9 @@ tok_new(void)
|
||||||
sizeof(struct tok_state));
|
sizeof(struct tok_state));
|
||||||
if (tok == NULL)
|
if (tok == NULL)
|
||||||
return NULL;
|
return NULL;
|
||||||
tok->buf = tok->cur = tok->end = tok->inp = tok->start = NULL;
|
tok->buf = tok->cur = tok->inp = NULL;
|
||||||
|
tok->start = NULL;
|
||||||
|
tok->end = NULL;
|
||||||
tok->done = E_OK;
|
tok->done = E_OK;
|
||||||
tok->fp = NULL;
|
tok->fp = NULL;
|
||||||
tok->input = NULL;
|
tok->input = NULL;
|
||||||
|
@ -111,7 +113,9 @@ error_ret(struct tok_state *tok) /* XXX */
|
||||||
tok->decoding_erred = 1;
|
tok->decoding_erred = 1;
|
||||||
if (tok->fp != NULL && tok->buf != NULL) /* see PyTokenizer_Free */
|
if (tok->fp != NULL && tok->buf != NULL) /* see PyTokenizer_Free */
|
||||||
PyMem_FREE(tok->buf);
|
PyMem_FREE(tok->buf);
|
||||||
tok->buf = tok->cur = tok->end = tok->inp = tok->start = NULL;
|
tok->buf = tok->cur = tok->inp = NULL;
|
||||||
|
tok->start = NULL;
|
||||||
|
tok->end = NULL;
|
||||||
tok->done = E_DECODE;
|
tok->done = E_DECODE;
|
||||||
return NULL; /* as if it were EOF */
|
return NULL; /* as if it were EOF */
|
||||||
}
|
}
|
||||||
|
@ -664,11 +668,11 @@ translate_newlines(const char *s, int exec_input, struct tok_state *tok) {
|
||||||
Look for encoding declarations inside STR, and record them
|
Look for encoding declarations inside STR, and record them
|
||||||
inside TOK. */
|
inside TOK. */
|
||||||
|
|
||||||
static const char *
|
static char *
|
||||||
decode_str(const char *input, int single, struct tok_state *tok)
|
decode_str(const char *input, int single, struct tok_state *tok)
|
||||||
{
|
{
|
||||||
PyObject* utf8 = NULL;
|
PyObject* utf8 = NULL;
|
||||||
const char *str;
|
char *str;
|
||||||
const char *s;
|
const char *s;
|
||||||
const char *newl[2] = {NULL, NULL};
|
const char *newl[2] = {NULL, NULL};
|
||||||
int lineno = 0;
|
int lineno = 0;
|
||||||
|
@ -726,16 +730,18 @@ struct tok_state *
|
||||||
PyTokenizer_FromString(const char *str, int exec_input)
|
PyTokenizer_FromString(const char *str, int exec_input)
|
||||||
{
|
{
|
||||||
struct tok_state *tok = tok_new();
|
struct tok_state *tok = tok_new();
|
||||||
|
char *decoded;
|
||||||
|
|
||||||
if (tok == NULL)
|
if (tok == NULL)
|
||||||
return NULL;
|
return NULL;
|
||||||
str = decode_str(str, exec_input, tok);
|
decoded = decode_str(str, exec_input, tok);
|
||||||
if (str == NULL) {
|
if (decoded == NULL) {
|
||||||
PyTokenizer_Free(tok);
|
PyTokenizer_Free(tok);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* XXX: constify members. */
|
tok->buf = tok->cur = tok->inp = decoded;
|
||||||
tok->buf = tok->cur = tok->end = tok->inp = (char*)str;
|
tok->end = decoded;
|
||||||
return tok;
|
return tok;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -743,17 +749,18 @@ struct tok_state *
|
||||||
PyTokenizer_FromUTF8(const char *str, int exec_input)
|
PyTokenizer_FromUTF8(const char *str, int exec_input)
|
||||||
{
|
{
|
||||||
struct tok_state *tok = tok_new();
|
struct tok_state *tok = tok_new();
|
||||||
|
char *translated;
|
||||||
if (tok == NULL)
|
if (tok == NULL)
|
||||||
return NULL;
|
return NULL;
|
||||||
tok->input = str = translate_newlines(str, exec_input, tok);
|
tok->input = translated = translate_newlines(str, exec_input, tok);
|
||||||
if (str == NULL) {
|
if (translated == NULL) {
|
||||||
PyTokenizer_Free(tok);
|
PyTokenizer_Free(tok);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
tok->decoding_state = STATE_RAW;
|
tok->decoding_state = STATE_RAW;
|
||||||
tok->read_coding_spec = 1;
|
tok->read_coding_spec = 1;
|
||||||
tok->enc = NULL;
|
tok->enc = NULL;
|
||||||
tok->str = str;
|
tok->str = translated;
|
||||||
tok->encoding = (char *)PyMem_MALLOC(6);
|
tok->encoding = (char *)PyMem_MALLOC(6);
|
||||||
if (!tok->encoding) {
|
if (!tok->encoding) {
|
||||||
PyTokenizer_Free(tok);
|
PyTokenizer_Free(tok);
|
||||||
|
@ -761,8 +768,8 @@ PyTokenizer_FromUTF8(const char *str, int exec_input)
|
||||||
}
|
}
|
||||||
strcpy(tok->encoding, "utf-8");
|
strcpy(tok->encoding, "utf-8");
|
||||||
|
|
||||||
/* XXX: constify members. */
|
tok->buf = tok->cur = tok->inp = translated;
|
||||||
tok->buf = tok->cur = tok->end = tok->inp = (char*)str;
|
tok->end = translated;
|
||||||
return tok;
|
return tok;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -812,7 +819,7 @@ PyTokenizer_Free(struct tok_state *tok)
|
||||||
if (tok->fp != NULL && tok->buf != NULL)
|
if (tok->fp != NULL && tok->buf != NULL)
|
||||||
PyMem_FREE(tok->buf);
|
PyMem_FREE(tok->buf);
|
||||||
if (tok->input)
|
if (tok->input)
|
||||||
PyMem_FREE((char *)tok->input);
|
PyMem_FREE(tok->input);
|
||||||
PyMem_FREE(tok);
|
PyMem_FREE(tok);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1138,7 +1145,7 @@ tok_decimal_tail(struct tok_state *tok)
|
||||||
/* Get next token, after space stripping etc. */
|
/* Get next token, after space stripping etc. */
|
||||||
|
|
||||||
static int
|
static int
|
||||||
tok_get(struct tok_state *tok, char **p_start, char **p_end)
|
tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
|
||||||
{
|
{
|
||||||
int c;
|
int c;
|
||||||
int blankline, nonascii;
|
int blankline, nonascii;
|
||||||
|
@ -1321,7 +1328,7 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
|
||||||
&& ((unsigned char)ignore_end[0] >= 128 || Py_ISALNUM(ignore_end[0]))));
|
&& ((unsigned char)ignore_end[0] >= 128 || Py_ISALNUM(ignore_end[0]))));
|
||||||
|
|
||||||
if (is_type_ignore) {
|
if (is_type_ignore) {
|
||||||
*p_start = (char *) ignore_end;
|
*p_start = ignore_end;
|
||||||
*p_end = tok->cur;
|
*p_end = tok->cur;
|
||||||
|
|
||||||
/* If this type ignore is the only thing on the line, consume the newline also. */
|
/* If this type ignore is the only thing on the line, consume the newline also. */
|
||||||
|
@ -1331,7 +1338,7 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
|
||||||
}
|
}
|
||||||
return TYPE_IGNORE;
|
return TYPE_IGNORE;
|
||||||
} else {
|
} else {
|
||||||
*p_start = (char *) type_start; /* after type_comment_prefix */
|
*p_start = type_start; /* after type_comment_prefix */
|
||||||
*p_end = tok->cur;
|
*p_end = tok->cur;
|
||||||
return TYPE_COMMENT;
|
return TYPE_COMMENT;
|
||||||
}
|
}
|
||||||
|
@ -1410,7 +1417,8 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
|
||||||
Look ahead one token to see if that is 'def'. */
|
Look ahead one token to see if that is 'def'. */
|
||||||
|
|
||||||
struct tok_state ahead_tok;
|
struct tok_state ahead_tok;
|
||||||
char *ahead_tok_start = NULL, *ahead_tok_end = NULL;
|
const char *ahead_tok_start = NULL;
|
||||||
|
const char *ahead_tok_end = NULL;
|
||||||
int ahead_tok_kind;
|
int ahead_tok_kind;
|
||||||
|
|
||||||
memcpy(&ahead_tok, tok, sizeof(ahead_tok));
|
memcpy(&ahead_tok, tok, sizeof(ahead_tok));
|
||||||
|
@ -1798,7 +1806,7 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
|
||||||
}
|
}
|
||||||
|
|
||||||
int
|
int
|
||||||
PyTokenizer_Get(struct tok_state *tok, char **p_start, char **p_end)
|
PyTokenizer_Get(struct tok_state *tok, const char **p_start, const char **p_end)
|
||||||
{
|
{
|
||||||
int result = tok_get(tok, p_start, p_end);
|
int result = tok_get(tok, p_start, p_end);
|
||||||
if (tok->decoding_erred) {
|
if (tok->decoding_erred) {
|
||||||
|
@ -1823,7 +1831,9 @@ PyTokenizer_FindEncodingFilename(int fd, PyObject *filename)
|
||||||
{
|
{
|
||||||
struct tok_state *tok;
|
struct tok_state *tok;
|
||||||
FILE *fp;
|
FILE *fp;
|
||||||
char *p_start =NULL , *p_end =NULL , *encoding = NULL;
|
const char *p_start = NULL;
|
||||||
|
const char *p_end = NULL;
|
||||||
|
char *encoding = NULL;
|
||||||
|
|
||||||
fd = _Py_dup(fd);
|
fd = _Py_dup(fd);
|
||||||
if (fd < 0) {
|
if (fd < 0) {
|
||||||
|
|
|
@ -26,8 +26,8 @@ struct tok_state {
|
||||||
char *buf; /* Input buffer, or NULL; malloc'ed if fp != NULL */
|
char *buf; /* Input buffer, or NULL; malloc'ed if fp != NULL */
|
||||||
char *cur; /* Next character in buffer */
|
char *cur; /* Next character in buffer */
|
||||||
char *inp; /* End of data in buffer */
|
char *inp; /* End of data in buffer */
|
||||||
char *end; /* End of input buffer if buf != NULL */
|
const char *end; /* End of input buffer if buf != NULL */
|
||||||
char *start; /* Start of current token if not NULL */
|
const char *start; /* Start of current token if not NULL */
|
||||||
int done; /* E_OK normally, E_EOF at EOF, otherwise error code */
|
int done; /* E_OK normally, E_EOF at EOF, otherwise error code */
|
||||||
/* NB If done != E_OK, cur must be == inp!!! */
|
/* NB If done != E_OK, cur must be == inp!!! */
|
||||||
FILE *fp; /* Rest of input; NULL if tokenizing a string */
|
FILE *fp; /* Rest of input; NULL if tokenizing a string */
|
||||||
|
@ -60,8 +60,8 @@ struct tok_state {
|
||||||
PyObject *decoding_readline; /* open(...).readline */
|
PyObject *decoding_readline; /* open(...).readline */
|
||||||
PyObject *decoding_buffer;
|
PyObject *decoding_buffer;
|
||||||
const char* enc; /* Encoding for the current str. */
|
const char* enc; /* Encoding for the current str. */
|
||||||
const char* str;
|
char* str;
|
||||||
const char* input; /* Tokenizer's newline translated copy of the string. */
|
char* input; /* Tokenizer's newline translated copy of the string. */
|
||||||
|
|
||||||
int type_comments; /* Whether to look for type comments */
|
int type_comments; /* Whether to look for type comments */
|
||||||
|
|
||||||
|
@ -78,7 +78,7 @@ extern struct tok_state *PyTokenizer_FromUTF8(const char *, int);
|
||||||
extern struct tok_state *PyTokenizer_FromFile(FILE *, const char*,
|
extern struct tok_state *PyTokenizer_FromFile(FILE *, const char*,
|
||||||
const char *, const char *);
|
const char *, const char *);
|
||||||
extern void PyTokenizer_Free(struct tok_state *);
|
extern void PyTokenizer_Free(struct tok_state *);
|
||||||
extern int PyTokenizer_Get(struct tok_state *, char **, char **);
|
extern int PyTokenizer_Get(struct tok_state *, const char **, const char **);
|
||||||
|
|
||||||
#define tok_dump _Py_tok_dump
|
#define tok_dump _Py_tok_dump
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue