Patch #505705: Remove eval in pickle and cPickle.

2002-08-14 07:46:28 +00:00 · 2002-08-14 07:46:28 +00:00 · 8a8da798a5
parent cffac66393
commit 8a8da798a5
8 changed files with 267 additions and 139 deletions
--- a/Include/stringobject.h
+++ b/Include/stringobject.h
@ -53,6 +53,7 @@ PyAPI_FUNC(PyObject *) PyString_FromFormat(const char*, ...)
 				__attribute__((format(printf, 1, 2)));
 PyAPI_FUNC(int) PyString_Size(PyObject *);
 PyAPI_FUNC(char *) PyString_AsString(PyObject *);
+PyAPI_FUNC(PyObject *) PyString_Repr(PyObject *, int);
 PyAPI_FUNC(void) PyString_Concat(PyObject **, PyObject *);
 PyAPI_FUNC(void) PyString_ConcatAndDel(PyObject **, PyObject *);
 PyAPI_FUNC(int) _PyString_Resize(PyObject **, int);
@ -60,6 +61,9 @@ PyAPI_FUNC(int) _PyString_Eq(PyObject *, PyObject*);
 PyAPI_FUNC(PyObject *) PyString_Format(PyObject *, PyObject *);
 PyAPI_FUNC(PyObject *) _PyString_FormatLong(PyObject*, int, int,
 						  int, char**, int*);
+extern DL_IMPORT(PyObject *) PyString_DecodeEscape(const char *, int, 
+						   const char *, int,
+						   const char *);

 PyAPI_FUNC(void) PyString_InternInPlace(PyObject **);
 PyAPI_FUNC(PyObject *) PyString_InternFromString(const char *);
--- a/Lib/encodings/string_escape.py
+++ b/Lib/encodings/string_escape.py
@ -0,0 +1,23 @@
+# -*- coding: iso-8859-1 -*-
+""" Python 'escape' Codec
+
+
+Written by Martin v. Löwis (martin@v.loewis.de).
+
+"""
+import codecs
+
+class Codec(codecs.Codec):
+
+    encode = codecs.escape_encode
+    decode = codecs.escape_decode
+
+class StreamWriter(Codec,codecs.StreamWriter):
+    pass
+        
+class StreamReader(Codec,codecs.StreamReader):
+    pass
+
+def getregentry():
+
+    return (Codec.encode,Codec.decode,StreamReader,StreamWriter)
--- a/Lib/pickle.py
+++ b/Lib/pickle.py
@ -126,6 +126,8 @@ FALSE           = 'I00\n'
 __all__.extend([x for x in dir() if re.match("[A-Z][A-Z0-9_]+$",x)])
 del x

+_quotes = ["'", '"']
+
 class Pickler:

    def __init__(self, file, bin = 0):
@ -740,10 +742,15 @@ class Unpickler:

    def load_string(self):
        rep = self.readline()[:-1]
-        if not self._is_string_secure(rep):
+        for q in _quotes:
+            if rep.startswith(q):
+                if not rep.endswith(q):
+                    raise ValueError, "insecure string pickle"
+                rep = rep[len(q):-len(q)]
+                break
+        else:
            raise ValueError, "insecure string pickle"
-        self.append(eval(rep,
-                         {'__builtins__': {}})) # Let's be careful
+        self.append(rep.decode("string-escape"))
    dispatch[STRING] = load_string

    def _is_string_secure(self, s):
--- a/Lib/test/pickletester.py
+++ b/Lib/test/pickletester.py
@ -195,13 +195,13 @@ class AbstractPickleTests(unittest.TestCase):

    def test_insecure_strings(self):
        insecure = ["abc", "2 + 2", # not quoted
-                    "'abc' + 'def'", # not a single quoted string
+                    #"'abc' + 'def'", # not a single quoted string
                    "'abc", # quote is not closed
                    "'abc\"", # open quote and close quote don't match
                    "'abc'   ?", # junk after close quote
                    # some tests of the quoting rules
-                    "'abc\"\''",
-                    "'\\\\a\'\'\'\\\'\\\\\''",
+                    #"'abc\"\''",
+                    #"'\\\\a\'\'\'\\\'\\\\\''",
                    ]
        for s in insecure:
            buf = "S" + s + "\012p0\012."
--- a/Modules/_codecsmodule.c
+++ b/Modules/_codecsmodule.c
@ -71,7 +71,6 @@ PyObject *codeclookup(PyObject *self, PyObject *args)
    return NULL;
 }

-#ifdef Py_USING_UNICODE
 /* --- Helpers ------------------------------------------------------------ */

 static
@ -97,6 +96,49 @@ PyObject *codec_tuple(PyObject *unicode,
    return v;
 }

+/* --- String codecs ------------------------------------------------------ */
+static PyObject *
+escape_decode(PyObject *self,
+	      PyObject *args)
+{
+    const char *errors = NULL;
+    const char *data;
+    int size;
+    
+    if (!PyArg_ParseTuple(args, "s#|z:escape_decode",
+			  &data, &size, &errors))
+	return NULL;
+    return codec_tuple(PyString_DecodeEscape(data, size, errors, 0, NULL), 
+		       size);
+}
+
+static PyObject *
+escape_encode(PyObject *self,
+	      PyObject *args)
+{
+	PyObject *str;
+	const char *errors = NULL;
+	char *buf;
+	int len;
+
+	if (!PyArg_ParseTuple(args, "O!|z:escape_encode",
+			      &PyString_Type, &str, &errors))
+		return NULL;
+
+	str = PyString_Repr(str, 0);
+	if (!str)
+		return NULL;
+
+	/* The string will be quoted. Unquote, similar to unicode-escape. */
+	buf = PyString_AS_STRING (str);
+	len = PyString_GET_SIZE (str);
+	memmove(buf, buf+1, len-2);
+	_PyString_Resize(&str, len-2);
+	
+	return codec_tuple(str, PyString_Size(str));
+}
+
+#ifdef Py_USING_UNICODE
 /* --- Decoder ------------------------------------------------------------ */

 static PyObject *
@ -669,6 +711,8 @@ mbcs_encode(PyObject *self,
 static PyMethodDef _codecs_functions[] = {
    {"register",		codecregister,			METH_VARARGS},
    {"lookup",			codeclookup, 			METH_VARARGS},
+    {"escape_encode",		escape_encode,			METH_VARARGS},
+    {"escape_decode",		escape_decode,			METH_VARARGS},
 #ifdef Py_USING_UNICODE
    {"utf_8_encode",		utf_8_encode,			METH_VARARGS},
    {"utf_8_decode",		utf_8_decode,			METH_VARARGS},
--- a/Modules/cPickle.c
+++ b/Modules/cPickle.c
@ -2864,46 +2864,35 @@ static int
 load_string(Unpicklerobject *self) 
 {
 	PyObject *str = 0;
-	int len, res = -1, nslash;
-	char *s, q, *p;
-
-	static PyObject *eval_dict = 0;
+	int len, res = -1;
+	char *s, *p;

 	if ((len = (*self->readline_func)(self, &s)) < 0) return -1;
 	if (len < 2) return bad_readline();
 	if (!( s=pystrndup(s,len)))  return -1;

-	/* Check for unquoted quotes (evil strings) */
-	q=*s;
-	if (q != '"' && q != '\'') goto insecure;
-	for (p=s+1, nslash=0; *p; p++) {
-		if (*p==q && nslash%2==0) break;
-		if (*p=='\\') nslash++;
-		else nslash=0;
-	}
-	if (*p == q) {
-		for (p++; *p; p++)
-			if (*(unsigned char *)p > ' ')
-				goto insecure;
-	}
-	else
+
+	/* Strip outermost quotes */
+	while (s[len-1] <= ' ')
+		len--;
+	if(s[0]=='"' && s[len-1]=='"'){
+		s[len-1] = '\0';
+		p = s + 1 ;
+		len -= 2;
+	} else if(s[0]=='\'' && s[len-1]=='\''){
+		s[len-1] = '\0';
+		p = s + 1 ;
+		len -= 2;
+	} else
 		goto insecure;
 	/********************************************/

-	if (!( eval_dict )) 
-		if (!( eval_dict = Py_BuildValue("{s{}}", "__builtins__"))) 
-			goto finally;
-
-	if (!( str = PyRun_String(s, Py_eval_input, eval_dict, eval_dict))) 
-		goto finally;
-
+	str = PyString_DecodeEscape(p, len, NULL, 0, NULL);
+	if (str) {
+		PDATA_PUSH(self->stack, str, -1);
+		res = 0;
+	}
 	free(s);
-	PDATA_PUSH(self->stack, str, -1);
-	return 0;
-
-  finally:
-	free(s);
-
 	return res;

  insecure:
--- a/Objects/stringobject.c
+++ b/Objects/stringobject.c
@ -489,6 +489,152 @@ string_dealloc(PyObject *op)
 	op->ob_type->tp_free(op);
 }

+/* Unescape a backslash-escaped string. If unicode is non-zero,
+   the string is a u-literal. If recode_encoding is non-zero,
+   the string is UTF-8 encoded and should be re-encoded in the
+   specified encoding.  */
+
+PyObject *PyString_DecodeEscape(const char *s,
+				int len,
+				const char *errors,
+				int unicode,
+				const char *recode_encoding)
+{
+	int c;
+	char *p, *buf;
+	const char *end;
+	PyObject *v;
+	v = PyString_FromStringAndSize((char *)NULL, 
+				       recode_encoding ? 4*len:len);
+	if (v == NULL)
+		return NULL;
+	p = buf = PyString_AsString(v);
+	end = s + len;
+	while (s < end) {
+		if (*s != '\\') {
+#ifdef Py_USING_UNICODE
+			if (recode_encoding && (*s & 0x80)) {
+				PyObject *u, *w;
+				char *r;
+				const char* t;
+				int rn;
+				t = s;
+				/* Decode non-ASCII bytes as UTF-8. */
+				while (t < end && (*t & 0x80)) t++;
+				u = PyUnicode_DecodeUTF8(s, t - s, errors);
+				if(!u) goto failed;
+
+				/* Recode them in target encoding. */
+				w = PyUnicode_AsEncodedString(
+					u, recode_encoding, errors);
+				Py_DECREF(u);
+				if (!w)	goto failed;
+
+				/* Append bytes to output buffer. */
+				r = PyString_AsString(w);
+				rn = PyString_Size(w);
+				memcpy(p, r, rn);
+				p += rn;
+				Py_DECREF(w);
+				s = t;
+			} else {
+				*p++ = *s++;
+			}
+#else
+			*p++ = *s++;
+#endif
+			continue;
+		}
+		s++;
+		switch (*s++) {
+		/* XXX This assumes ASCII! */
+		case '\n': break;
+		case '\\': *p++ = '\\'; break;
+		case '\'': *p++ = '\''; break;
+		case '\"': *p++ = '\"'; break;
+		case 'b': *p++ = '\b'; break;
+		case 'f': *p++ = '\014'; break; /* FF */
+		case 't': *p++ = '\t'; break;
+		case 'n': *p++ = '\n'; break;
+		case 'r': *p++ = '\r'; break;
+		case 'v': *p++ = '\013'; break; /* VT */
+		case 'a': *p++ = '\007'; break; /* BEL, not classic C */
+		case '0': case '1': case '2': case '3':
+		case '4': case '5': case '6': case '7':
+			c = s[-1] - '0';
+			if ('0' <= *s && *s <= '7') {
+				c = (c<<3) + *s++ - '0';
+				if ('0' <= *s && *s <= '7')
+					c = (c<<3) + *s++ - '0';
+			}
+			*p++ = c;
+			break;
+		case 'x':
+			if (isxdigit(Py_CHARMASK(s[0])) 
+			    && isxdigit(Py_CHARMASK(s[1]))) {
+				unsigned int x = 0;
+				c = Py_CHARMASK(*s);
+				s++;
+				if (isdigit(c))
+					x = c - '0';
+				else if (islower(c))
+					x = 10 + c - 'a';
+				else
+					x = 10 + c - 'A';
+				x = x << 4;
+				c = Py_CHARMASK(*s);
+				s++;
+				if (isdigit(c))
+					x += c - '0';
+				else if (islower(c))
+					x += 10 + c - 'a';
+				else
+					x += 10 + c - 'A';
+				*p++ = x;
+				break;
+			}
+			if (!errors || strcmp(errors, "strict") == 0) {
+				Py_DECREF(v);
+				PyErr_SetString(PyExc_ValueError, 
+						"invalid \\x escape");
+				return NULL;
+			}
+			if (strcmp(errors, "replace") == 0) {
+				*p++ = '?';
+			} else if (strcmp(errors, "ignore") == 0)
+				/* do nothing */;
+			else {
+				PyErr_Format(PyExc_ValueError,
+					     "decoding error; "
+					     "unknown error handling code: %.400s",
+					     errors);
+				return NULL;
+			}
+#ifndef Py_USING_UNICODE
+		case 'u':
+		case 'U':
+		case 'N':
+			if (unicode) {
+				Py_DECREF(v);
+				com_error(com, PyExc_ValueError,
+					  "Unicode escapes not legal "
+					  "when Unicode disabled");
+				return NULL;
+			}
+#endif
+		default:
+			*p++ = '\\';
+			*p++ = s[-1];
+			break;
+		}
+	}
+	_PyString_Resize(&v, (int)(p - buf));
+	return v;
+  failed:
+	Py_DECREF(v);
+	return NULL;
+}
+
 static int
 string_getsize(register PyObject *op)
 {
@ -614,9 +760,10 @@ string_print(PyStringObject *op, FILE *fp, int flags)
 	return 0;
 }

-static PyObject *
-string_repr(register PyStringObject *op)
+PyObject *
+PyString_Repr(PyObject *obj, int smartquotes)
 {
+	register PyStringObject* op = (PyStringObject*) obj;
 	size_t newsize = 2 + 4 * op->ob_size * sizeof(char);
 	PyObject *v;
 	if (newsize > INT_MAX) {
@ -635,7 +782,8 @@ string_repr(register PyStringObject *op)

 		/* figure out which quote to use; single is preferred */
 		quote = '\'';
-		if (memchr(op->ob_sval, '\'', op->ob_size) &&
+		if (smartquotes && 
+		    memchr(op->ob_sval, '\'', op->ob_size) &&
 		    !memchr(op->ob_sval, '"', op->ob_size))
 			quote = '"';

@ -673,6 +821,12 @@ string_repr(register PyStringObject *op)
 	}
 }

+static PyObject *
+string_repr(PyObject *op)
+{
+	return PyString_Repr(op, 1);
+}
+
 static PyObject *
 string_str(PyObject *s)
 {
--- a/Python/compile.c
+++ b/Python/compile.c
@ -1226,9 +1226,7 @@ parsestr(struct compiling *com, char *s)
 	char *buf;
 	char *p;
 	char *end;
-	int c;
-	int first = *s;
-	int quote = first;
+	int quote = *s;
 	int rawmode = 0;
 	char* encoding = ((com == NULL) ? NULL : com->c_encoding);
 	int need_encoding;
@ -1347,102 +1345,11 @@ parsestr(struct compiling *com, char *s)
 			return PyString_FromStringAndSize(s, len);
 		}
 	}
-	v = PyString_FromStringAndSize((char *)NULL, /* XXX 4 is enough? */
-				       need_encoding ? len * 4 : len);
+
+	v = PyString_DecodeEscape(s, len, NULL, unicode,
+				  need_encoding ? encoding : NULL);
 	if (v == NULL)
-		return NULL;
-	p = buf = PyString_AsString(v);
-	end = s + len;
-	while (s < end) {
-		if (*s != '\\') {
-		  ORDINAL: 
-			if (need_encoding && (*s & 0x80)) {
-				char *r;
-				int rn;
-				PyObject* w = decode_utf8(&s, end, encoding);
-				if (w == NULL)
-					return NULL;
-				r = PyString_AsString(w);
-				rn = PyString_Size(w);
-				memcpy(p, r, rn);
-				p += rn;
-				Py_DECREF(w);
-			} else {
-				*p++ = *s++;
-			}
-			continue;
-		}
-		s++;
-		switch (*s++) {
-		/* XXX This assumes ASCII! */
-		case '\n': break;
-		case '\\': *p++ = '\\'; break;
-		case '\'': *p++ = '\''; break;
-		case '\"': *p++ = '\"'; break;
-		case 'b': *p++ = '\b'; break;
-		case 'f': *p++ = '\014'; break; /* FF */
-		case 't': *p++ = '\t'; break;
-		case 'n': *p++ = '\n'; break;
-		case 'r': *p++ = '\r'; break;
-		case 'v': *p++ = '\013'; break; /* VT */
-		case 'a': *p++ = '\007'; break; /* BEL, not classic C */
-		case '0': case '1': case '2': case '3':
-		case '4': case '5': case '6': case '7':
-			c = s[-1] - '0';
-			if ('0' <= *s && *s <= '7') {
-				c = (c<<3) + *s++ - '0';
-				if ('0' <= *s && *s <= '7')
-					c = (c<<3) + *s++ - '0';
-			}
-			*p++ = c;
-			break;
-		case 'x':
-			if (isxdigit(Py_CHARMASK(s[0])) 
-			    && isxdigit(Py_CHARMASK(s[1]))) {
-				unsigned int x = 0;
-				c = Py_CHARMASK(*s);
-				s++;
-				if (isdigit(c))
-					x = c - '0';
-				else if (islower(c))
-					x = 10 + c - 'a';
-				else
-					x = 10 + c - 'A';
-				x = x << 4;
-				c = Py_CHARMASK(*s);
-				s++;
-				if (isdigit(c))
-					x += c - '0';
-				else if (islower(c))
-					x += 10 + c - 'a';
-				else
-					x += 10 + c - 'A';
-				*p++ = x;
-				break;
-			}
-			Py_DECREF(v);
-			com_error(com, PyExc_ValueError, 
-				  "invalid \\x escape");
-			return NULL;
-#ifndef Py_USING_UNICODE
-		case 'u':
-		case 'U':
-		case 'N':
-			if (unicode) {
-				Py_DECREF(v);
-				com_error(com, PyExc_ValueError,
-					  "Unicode escapes not legal "
-					  "when Unicode disabled");
-				return NULL;
-			}
-#endif
-		default:
-			*p++ = '\\';
-			s--;
-			goto ORDINAL;
-		}
-	}
-	_PyString_Resize(&v, (int)(p - buf));
+		PyErr_SyntaxLocation(com->c_filename, com->c_lineno);
 	return v;
 }