merge 3.3 (closes #18470)

2013-07-15 20:47:47 -07:00 · 2013-07-15 20:47:47 -07:00 · fd9c0203de
parent eaaedcdd89 2dbfd88245
commit fd9c0203de
1 changed files with 46 additions and 36 deletions
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@ -147,13 +147,15 @@ tok_new(void)
 }

 static char *
-new_string(const char *s, Py_ssize_t len)
+new_string(const char *s, Py_ssize_t len, struct tok_state *tok)
 {
    char* result = (char *)PyMem_MALLOC(len + 1);
-    if (result != NULL) {
-        memcpy(result, s, len);
-        result[len] = '\0';
+    if (!result) {
+        tok->done = E_NOMEM;
+        return NULL;
    }
+    memcpy(result, s, len);
+    result[len] = '\0';
    return result;
 }

@ -174,7 +176,7 @@ decoding_feof(struct tok_state *tok)
 static char *
 decode_str(const char *str, int exec_input, struct tok_state *tok)
 {
-    return new_string(str, strlen(str));
+    return new_string(str, strlen(str), tok);
 }

 #else /* PGEN */
@ -221,17 +223,18 @@ get_normal_name(char *s)        /* for utf-8 and latin-1 */

 /* Return the coding spec in S, or NULL if none is found.  */

-static char *
-get_coding_spec(const char *s, Py_ssize_t size)
+static int
+get_coding_spec(const char *s, char **spec, Py_ssize_t size, struct tok_state *tok)
 {
    Py_ssize_t i;
+    *spec = NULL;
    /* Coding spec must be in a comment, and that comment must be
     * the only statement on the source code line. */
    for (i = 0; i < size - 6; i++) {
        if (s[i] == '#')
            break;
        if (s[i] != ' ' && s[i] != '\t' && s[i] != '\014')
-            return NULL;
+            return 1;
    }
    for (; i < size - 6; i++) { /* XXX inefficient search */
        const char* t = s + i;
@ -250,17 +253,21 @@ get_coding_spec(const char *s, Py_ssize_t size)
                t++;

            if (begin < t) {
-                char* r = new_string(begin, t - begin);
+                char* r = new_string(begin, t - begin, tok);
+                if (!r)
+                    return 0;
                char* q = get_normal_name(r);
                if (r != q) {
                    PyMem_FREE(r);
-                    r = new_string(q, strlen(q));
+                    r = new_string(q, strlen(q), tok);
+                    if (!r)
+                        return 0;
                }
-                return r;
+                *spec = r;
            }
        }
    }
-    return NULL;
+    return 1;
 }

 /* Check whether the line contains a coding spec. If it does,
@ -272,38 +279,39 @@ static int
 check_coding_spec(const char* line, Py_ssize_t size, struct tok_state *tok,
                  int set_readline(struct tok_state *, const char *))
 {
-    char * cs;
+    char *cs;
    int r = 1;

    if (tok->cont_line)
        /* It's a continuation line, so it can't be a coding spec. */
        return 1;
-    cs = get_coding_spec(line, size);
-    if (cs != NULL) {
-        tok->read_coding_spec = 1;
-        if (tok->encoding == NULL) {
-            assert(tok->decoding_state == STATE_RAW);
-            if (strcmp(cs, "utf-8") == 0) {
+    if (!get_coding_spec(line, &cs, size, tok))
+        return 0;
+    if (!cs)
+        return 1;
+    tok->read_coding_spec = 1;
+    if (tok->encoding == NULL) {
+        assert(tok->decoding_state == STATE_RAW);
+        if (strcmp(cs, "utf-8") == 0) {
+            tok->encoding = cs;
+        } else {
+            r = set_readline(tok, cs);
+            if (r) {
                tok->encoding = cs;
-            } else {
-                r = set_readline(tok, cs);
-                if (r) {
-                    tok->encoding = cs;
-                    tok->decoding_state = STATE_NORMAL;
-                }
-                else {
-                    PyErr_Format(PyExc_SyntaxError,
-                                 "encoding problem: %s", cs);
-                    PyMem_FREE(cs);
-                }
+                tok->decoding_state = STATE_NORMAL;
            }
-        } else {                /* then, compare cs with BOM */
-            r = (strcmp(tok->encoding, cs) == 0);
-            if (!r)
+            else {
                PyErr_Format(PyExc_SyntaxError,
-                             "encoding problem: %s with BOM", cs);
-            PyMem_FREE(cs);
+                             "encoding problem: %s", cs);
+                PyMem_FREE(cs);
+            }
        }
+    } else {                /* then, compare cs with BOM */
+        r = (strcmp(tok->encoding, cs) == 0);
+        if (!r)
+            PyErr_Format(PyExc_SyntaxError,
+                         "encoding problem: %s with BOM", cs);
+        PyMem_FREE(cs);
    }
    return r;
 }
@ -367,7 +375,9 @@ check_bom(int get_char(struct tok_state *),
    }
    if (tok->encoding != NULL)
        PyMem_FREE(tok->encoding);
-    tok->encoding = new_string("utf-8", 5);     /* resulting is in utf-8 */
+    tok->encoding = new_string("utf-8", 5, tok);
+    if (!tok->encoding)
+        return 0;
    /* No need to set_readline: input is already utf-8 */
    return 1;
 }