merge 3.3 (closes #18470)

This commit is contained in:
Benjamin Peterson 2013-07-15 20:47:47 -07:00
commit fd9c0203de
1 changed files with 46 additions and 36 deletions

View File

@ -147,13 +147,15 @@ tok_new(void)
} }
static char * static char *
new_string(const char *s, Py_ssize_t len) new_string(const char *s, Py_ssize_t len, struct tok_state *tok)
{ {
char* result = (char *)PyMem_MALLOC(len + 1); char* result = (char *)PyMem_MALLOC(len + 1);
if (result != NULL) { if (!result) {
memcpy(result, s, len); tok->done = E_NOMEM;
result[len] = '\0'; return NULL;
} }
memcpy(result, s, len);
result[len] = '\0';
return result; return result;
} }
@ -174,7 +176,7 @@ decoding_feof(struct tok_state *tok)
static char * static char *
decode_str(const char *str, int exec_input, struct tok_state *tok) decode_str(const char *str, int exec_input, struct tok_state *tok)
{ {
return new_string(str, strlen(str)); return new_string(str, strlen(str), tok);
} }
#else /* PGEN */ #else /* PGEN */
@ -221,17 +223,18 @@ get_normal_name(char *s) /* for utf-8 and latin-1 */
/* Return the coding spec in S, or NULL if none is found. */ /* Return the coding spec in S, or NULL if none is found. */
static char * static int
get_coding_spec(const char *s, Py_ssize_t size) get_coding_spec(const char *s, char **spec, Py_ssize_t size, struct tok_state *tok)
{ {
Py_ssize_t i; Py_ssize_t i;
*spec = NULL;
/* Coding spec must be in a comment, and that comment must be /* Coding spec must be in a comment, and that comment must be
* the only statement on the source code line. */ * the only statement on the source code line. */
for (i = 0; i < size - 6; i++) { for (i = 0; i < size - 6; i++) {
if (s[i] == '#') if (s[i] == '#')
break; break;
if (s[i] != ' ' && s[i] != '\t' && s[i] != '\014') if (s[i] != ' ' && s[i] != '\t' && s[i] != '\014')
return NULL; return 1;
} }
for (; i < size - 6; i++) { /* XXX inefficient search */ for (; i < size - 6; i++) { /* XXX inefficient search */
const char* t = s + i; const char* t = s + i;
@ -250,17 +253,21 @@ get_coding_spec(const char *s, Py_ssize_t size)
t++; t++;
if (begin < t) { if (begin < t) {
char* r = new_string(begin, t - begin); char* r = new_string(begin, t - begin, tok);
if (!r)
return 0;
char* q = get_normal_name(r); char* q = get_normal_name(r);
if (r != q) { if (r != q) {
PyMem_FREE(r); PyMem_FREE(r);
r = new_string(q, strlen(q)); r = new_string(q, strlen(q), tok);
if (!r)
return 0;
} }
return r; *spec = r;
} }
} }
} }
return NULL; return 1;
} }
/* Check whether the line contains a coding spec. If it does, /* Check whether the line contains a coding spec. If it does,
@ -272,38 +279,39 @@ static int
check_coding_spec(const char* line, Py_ssize_t size, struct tok_state *tok, check_coding_spec(const char* line, Py_ssize_t size, struct tok_state *tok,
int set_readline(struct tok_state *, const char *)) int set_readline(struct tok_state *, const char *))
{ {
char * cs; char *cs;
int r = 1; int r = 1;
if (tok->cont_line) if (tok->cont_line)
/* It's a continuation line, so it can't be a coding spec. */ /* It's a continuation line, so it can't be a coding spec. */
return 1; return 1;
cs = get_coding_spec(line, size); if (!get_coding_spec(line, &cs, size, tok))
if (cs != NULL) { return 0;
tok->read_coding_spec = 1; if (!cs)
if (tok->encoding == NULL) { return 1;
assert(tok->decoding_state == STATE_RAW); tok->read_coding_spec = 1;
if (strcmp(cs, "utf-8") == 0) { if (tok->encoding == NULL) {
assert(tok->decoding_state == STATE_RAW);
if (strcmp(cs, "utf-8") == 0) {
tok->encoding = cs;
} else {
r = set_readline(tok, cs);
if (r) {
tok->encoding = cs; tok->encoding = cs;
} else { tok->decoding_state = STATE_NORMAL;
r = set_readline(tok, cs);
if (r) {
tok->encoding = cs;
tok->decoding_state = STATE_NORMAL;
}
else {
PyErr_Format(PyExc_SyntaxError,
"encoding problem: %s", cs);
PyMem_FREE(cs);
}
} }
} else { /* then, compare cs with BOM */ else {
r = (strcmp(tok->encoding, cs) == 0);
if (!r)
PyErr_Format(PyExc_SyntaxError, PyErr_Format(PyExc_SyntaxError,
"encoding problem: %s with BOM", cs); "encoding problem: %s", cs);
PyMem_FREE(cs); PyMem_FREE(cs);
}
} }
} else { /* then, compare cs with BOM */
r = (strcmp(tok->encoding, cs) == 0);
if (!r)
PyErr_Format(PyExc_SyntaxError,
"encoding problem: %s with BOM", cs);
PyMem_FREE(cs);
} }
return r; return r;
} }
@ -367,7 +375,9 @@ check_bom(int get_char(struct tok_state *),
} }
if (tok->encoding != NULL) if (tok->encoding != NULL)
PyMem_FREE(tok->encoding); PyMem_FREE(tok->encoding);
tok->encoding = new_string("utf-8", 5); /* resulting is in utf-8 */ tok->encoding = new_string("utf-8", 5, tok);
if (!tok->encoding)
return 0;
/* No need to set_readline: input is already utf-8 */ /* No need to set_readline: input is already utf-8 */
return 1; return 1;
} }