merge 3.3 (closes #18470)
This commit is contained in:
commit
fd9c0203de
|
@ -147,13 +147,15 @@ tok_new(void)
|
||||||
}
|
}
|
||||||
|
|
||||||
static char *
|
static char *
|
||||||
new_string(const char *s, Py_ssize_t len)
|
new_string(const char *s, Py_ssize_t len, struct tok_state *tok)
|
||||||
{
|
{
|
||||||
char* result = (char *)PyMem_MALLOC(len + 1);
|
char* result = (char *)PyMem_MALLOC(len + 1);
|
||||||
if (result != NULL) {
|
if (!result) {
|
||||||
memcpy(result, s, len);
|
tok->done = E_NOMEM;
|
||||||
result[len] = '\0';
|
return NULL;
|
||||||
}
|
}
|
||||||
|
memcpy(result, s, len);
|
||||||
|
result[len] = '\0';
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -174,7 +176,7 @@ decoding_feof(struct tok_state *tok)
|
||||||
static char *
|
static char *
|
||||||
decode_str(const char *str, int exec_input, struct tok_state *tok)
|
decode_str(const char *str, int exec_input, struct tok_state *tok)
|
||||||
{
|
{
|
||||||
return new_string(str, strlen(str));
|
return new_string(str, strlen(str), tok);
|
||||||
}
|
}
|
||||||
|
|
||||||
#else /* PGEN */
|
#else /* PGEN */
|
||||||
|
@ -221,17 +223,18 @@ get_normal_name(char *s) /* for utf-8 and latin-1 */
|
||||||
|
|
||||||
/* Return the coding spec in S, or NULL if none is found. */
|
/* Return the coding spec in S, or NULL if none is found. */
|
||||||
|
|
||||||
static char *
|
static int
|
||||||
get_coding_spec(const char *s, Py_ssize_t size)
|
get_coding_spec(const char *s, char **spec, Py_ssize_t size, struct tok_state *tok)
|
||||||
{
|
{
|
||||||
Py_ssize_t i;
|
Py_ssize_t i;
|
||||||
|
*spec = NULL;
|
||||||
/* Coding spec must be in a comment, and that comment must be
|
/* Coding spec must be in a comment, and that comment must be
|
||||||
* the only statement on the source code line. */
|
* the only statement on the source code line. */
|
||||||
for (i = 0; i < size - 6; i++) {
|
for (i = 0; i < size - 6; i++) {
|
||||||
if (s[i] == '#')
|
if (s[i] == '#')
|
||||||
break;
|
break;
|
||||||
if (s[i] != ' ' && s[i] != '\t' && s[i] != '\014')
|
if (s[i] != ' ' && s[i] != '\t' && s[i] != '\014')
|
||||||
return NULL;
|
return 1;
|
||||||
}
|
}
|
||||||
for (; i < size - 6; i++) { /* XXX inefficient search */
|
for (; i < size - 6; i++) { /* XXX inefficient search */
|
||||||
const char* t = s + i;
|
const char* t = s + i;
|
||||||
|
@ -250,17 +253,21 @@ get_coding_spec(const char *s, Py_ssize_t size)
|
||||||
t++;
|
t++;
|
||||||
|
|
||||||
if (begin < t) {
|
if (begin < t) {
|
||||||
char* r = new_string(begin, t - begin);
|
char* r = new_string(begin, t - begin, tok);
|
||||||
|
if (!r)
|
||||||
|
return 0;
|
||||||
char* q = get_normal_name(r);
|
char* q = get_normal_name(r);
|
||||||
if (r != q) {
|
if (r != q) {
|
||||||
PyMem_FREE(r);
|
PyMem_FREE(r);
|
||||||
r = new_string(q, strlen(q));
|
r = new_string(q, strlen(q), tok);
|
||||||
|
if (!r)
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
return r;
|
*spec = r;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return NULL;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Check whether the line contains a coding spec. If it does,
|
/* Check whether the line contains a coding spec. If it does,
|
||||||
|
@ -272,38 +279,39 @@ static int
|
||||||
check_coding_spec(const char* line, Py_ssize_t size, struct tok_state *tok,
|
check_coding_spec(const char* line, Py_ssize_t size, struct tok_state *tok,
|
||||||
int set_readline(struct tok_state *, const char *))
|
int set_readline(struct tok_state *, const char *))
|
||||||
{
|
{
|
||||||
char * cs;
|
char *cs;
|
||||||
int r = 1;
|
int r = 1;
|
||||||
|
|
||||||
if (tok->cont_line)
|
if (tok->cont_line)
|
||||||
/* It's a continuation line, so it can't be a coding spec. */
|
/* It's a continuation line, so it can't be a coding spec. */
|
||||||
return 1;
|
return 1;
|
||||||
cs = get_coding_spec(line, size);
|
if (!get_coding_spec(line, &cs, size, tok))
|
||||||
if (cs != NULL) {
|
return 0;
|
||||||
tok->read_coding_spec = 1;
|
if (!cs)
|
||||||
if (tok->encoding == NULL) {
|
return 1;
|
||||||
assert(tok->decoding_state == STATE_RAW);
|
tok->read_coding_spec = 1;
|
||||||
if (strcmp(cs, "utf-8") == 0) {
|
if (tok->encoding == NULL) {
|
||||||
|
assert(tok->decoding_state == STATE_RAW);
|
||||||
|
if (strcmp(cs, "utf-8") == 0) {
|
||||||
|
tok->encoding = cs;
|
||||||
|
} else {
|
||||||
|
r = set_readline(tok, cs);
|
||||||
|
if (r) {
|
||||||
tok->encoding = cs;
|
tok->encoding = cs;
|
||||||
} else {
|
tok->decoding_state = STATE_NORMAL;
|
||||||
r = set_readline(tok, cs);
|
|
||||||
if (r) {
|
|
||||||
tok->encoding = cs;
|
|
||||||
tok->decoding_state = STATE_NORMAL;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
PyErr_Format(PyExc_SyntaxError,
|
|
||||||
"encoding problem: %s", cs);
|
|
||||||
PyMem_FREE(cs);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
} else { /* then, compare cs with BOM */
|
else {
|
||||||
r = (strcmp(tok->encoding, cs) == 0);
|
|
||||||
if (!r)
|
|
||||||
PyErr_Format(PyExc_SyntaxError,
|
PyErr_Format(PyExc_SyntaxError,
|
||||||
"encoding problem: %s with BOM", cs);
|
"encoding problem: %s", cs);
|
||||||
PyMem_FREE(cs);
|
PyMem_FREE(cs);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
} else { /* then, compare cs with BOM */
|
||||||
|
r = (strcmp(tok->encoding, cs) == 0);
|
||||||
|
if (!r)
|
||||||
|
PyErr_Format(PyExc_SyntaxError,
|
||||||
|
"encoding problem: %s with BOM", cs);
|
||||||
|
PyMem_FREE(cs);
|
||||||
}
|
}
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
@ -367,7 +375,9 @@ check_bom(int get_char(struct tok_state *),
|
||||||
}
|
}
|
||||||
if (tok->encoding != NULL)
|
if (tok->encoding != NULL)
|
||||||
PyMem_FREE(tok->encoding);
|
PyMem_FREE(tok->encoding);
|
||||||
tok->encoding = new_string("utf-8", 5); /* resulting is in utf-8 */
|
tok->encoding = new_string("utf-8", 5, tok);
|
||||||
|
if (!tok->encoding)
|
||||||
|
return 0;
|
||||||
/* No need to set_readline: input is already utf-8 */
|
/* No need to set_readline: input is already utf-8 */
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue