Patch #505705: Remove eval in pickle and cPickle.

This commit is contained in:
Martin v. Löwis 2002-08-14 07:46:28 +00:00
parent cffac66393
commit 8a8da798a5
8 changed files with 267 additions and 139 deletions

View File

@ -53,6 +53,7 @@ PyAPI_FUNC(PyObject *) PyString_FromFormat(const char*, ...)
__attribute__((format(printf, 1, 2)));
PyAPI_FUNC(int) PyString_Size(PyObject *);
PyAPI_FUNC(char *) PyString_AsString(PyObject *);
PyAPI_FUNC(PyObject *) PyString_Repr(PyObject *, int);
PyAPI_FUNC(void) PyString_Concat(PyObject **, PyObject *);
PyAPI_FUNC(void) PyString_ConcatAndDel(PyObject **, PyObject *);
PyAPI_FUNC(int) _PyString_Resize(PyObject **, int);
@ -60,6 +61,9 @@ PyAPI_FUNC(int) _PyString_Eq(PyObject *, PyObject*);
PyAPI_FUNC(PyObject *) PyString_Format(PyObject *, PyObject *);
PyAPI_FUNC(PyObject *) _PyString_FormatLong(PyObject*, int, int,
int, char**, int*);
extern DL_IMPORT(PyObject *) PyString_DecodeEscape(const char *, int,
const char *, int,
const char *);
PyAPI_FUNC(void) PyString_InternInPlace(PyObject **);
PyAPI_FUNC(PyObject *) PyString_InternFromString(const char *);

View File

@ -0,0 +1,23 @@
# -*- coding: iso-8859-1 -*-
""" Python 'escape' Codec
Written by Martin v. Löwis (martin@v.loewis.de).
"""
import codecs
class Codec(codecs.Codec):
encode = codecs.escape_encode
decode = codecs.escape_decode
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
def getregentry():
return (Codec.encode,Codec.decode,StreamReader,StreamWriter)

View File

@ -126,6 +126,8 @@ FALSE = 'I00\n'
__all__.extend([x for x in dir() if re.match("[A-Z][A-Z0-9_]+$",x)])
del x
_quotes = ["'", '"']
class Pickler:
def __init__(self, file, bin = 0):
@ -740,10 +742,15 @@ class Unpickler:
def load_string(self):
rep = self.readline()[:-1]
if not self._is_string_secure(rep):
for q in _quotes:
if rep.startswith(q):
if not rep.endswith(q):
raise ValueError, "insecure string pickle"
rep = rep[len(q):-len(q)]
break
else:
raise ValueError, "insecure string pickle"
self.append(eval(rep,
{'__builtins__': {}})) # Let's be careful
self.append(rep.decode("string-escape"))
dispatch[STRING] = load_string
def _is_string_secure(self, s):

View File

@ -195,13 +195,13 @@ class AbstractPickleTests(unittest.TestCase):
def test_insecure_strings(self):
insecure = ["abc", "2 + 2", # not quoted
"'abc' + 'def'", # not a single quoted string
#"'abc' + 'def'", # not a single quoted string
"'abc", # quote is not closed
"'abc\"", # open quote and close quote don't match
"'abc' ?", # junk after close quote
# some tests of the quoting rules
"'abc\"\''",
"'\\\\a\'\'\'\\\'\\\\\''",
#"'abc\"\''",
#"'\\\\a\'\'\'\\\'\\\\\''",
]
for s in insecure:
buf = "S" + s + "\012p0\012."

View File

@ -71,7 +71,6 @@ PyObject *codeclookup(PyObject *self, PyObject *args)
return NULL;
}
#ifdef Py_USING_UNICODE
/* --- Helpers ------------------------------------------------------------ */
static
@ -97,6 +96,49 @@ PyObject *codec_tuple(PyObject *unicode,
return v;
}
/* --- String codecs ------------------------------------------------------ */
static PyObject *
escape_decode(PyObject *self,
PyObject *args)
{
const char *errors = NULL;
const char *data;
int size;
if (!PyArg_ParseTuple(args, "s#|z:escape_decode",
&data, &size, &errors))
return NULL;
return codec_tuple(PyString_DecodeEscape(data, size, errors, 0, NULL),
size);
}
static PyObject *
escape_encode(PyObject *self,
PyObject *args)
{
PyObject *str;
const char *errors = NULL;
char *buf;
int len;
if (!PyArg_ParseTuple(args, "O!|z:escape_encode",
&PyString_Type, &str, &errors))
return NULL;
str = PyString_Repr(str, 0);
if (!str)
return NULL;
/* The string will be quoted. Unquote, similar to unicode-escape. */
buf = PyString_AS_STRING (str);
len = PyString_GET_SIZE (str);
memmove(buf, buf+1, len-2);
_PyString_Resize(&str, len-2);
return codec_tuple(str, PyString_Size(str));
}
#ifdef Py_USING_UNICODE
/* --- Decoder ------------------------------------------------------------ */
static PyObject *
@ -669,6 +711,8 @@ mbcs_encode(PyObject *self,
static PyMethodDef _codecs_functions[] = {
{"register", codecregister, METH_VARARGS},
{"lookup", codeclookup, METH_VARARGS},
{"escape_encode", escape_encode, METH_VARARGS},
{"escape_decode", escape_decode, METH_VARARGS},
#ifdef Py_USING_UNICODE
{"utf_8_encode", utf_8_encode, METH_VARARGS},
{"utf_8_decode", utf_8_decode, METH_VARARGS},

View File

@ -2864,46 +2864,35 @@ static int
load_string(Unpicklerobject *self)
{
PyObject *str = 0;
int len, res = -1, nslash;
char *s, q, *p;
static PyObject *eval_dict = 0;
int len, res = -1;
char *s, *p;
if ((len = (*self->readline_func)(self, &s)) < 0) return -1;
if (len < 2) return bad_readline();
if (!( s=pystrndup(s,len))) return -1;
/* Check for unquoted quotes (evil strings) */
q=*s;
if (q != '"' && q != '\'') goto insecure;
for (p=s+1, nslash=0; *p; p++) {
if (*p==q && nslash%2==0) break;
if (*p=='\\') nslash++;
else nslash=0;
}
if (*p == q) {
for (p++; *p; p++)
if (*(unsigned char *)p > ' ')
goto insecure;
}
else
/* Strip outermost quotes */
while (s[len-1] <= ' ')
len--;
if(s[0]=='"' && s[len-1]=='"'){
s[len-1] = '\0';
p = s + 1 ;
len -= 2;
} else if(s[0]=='\'' && s[len-1]=='\''){
s[len-1] = '\0';
p = s + 1 ;
len -= 2;
} else
goto insecure;
/********************************************/
if (!( eval_dict ))
if (!( eval_dict = Py_BuildValue("{s{}}", "__builtins__")))
goto finally;
if (!( str = PyRun_String(s, Py_eval_input, eval_dict, eval_dict)))
goto finally;
str = PyString_DecodeEscape(p, len, NULL, 0, NULL);
if (str) {
PDATA_PUSH(self->stack, str, -1);
res = 0;
}
free(s);
PDATA_PUSH(self->stack, str, -1);
return 0;
finally:
free(s);
return res;
insecure:

View File

@ -489,6 +489,152 @@ string_dealloc(PyObject *op)
op->ob_type->tp_free(op);
}
/* Unescape a backslash-escaped string. If unicode is non-zero,
the string is a u-literal. If recode_encoding is non-zero,
the string is UTF-8 encoded and should be re-encoded in the
specified encoding. */
PyObject *PyString_DecodeEscape(const char *s,
int len,
const char *errors,
int unicode,
const char *recode_encoding)
{
int c;
char *p, *buf;
const char *end;
PyObject *v;
v = PyString_FromStringAndSize((char *)NULL,
recode_encoding ? 4*len:len);
if (v == NULL)
return NULL;
p = buf = PyString_AsString(v);
end = s + len;
while (s < end) {
if (*s != '\\') {
#ifdef Py_USING_UNICODE
if (recode_encoding && (*s & 0x80)) {
PyObject *u, *w;
char *r;
const char* t;
int rn;
t = s;
/* Decode non-ASCII bytes as UTF-8. */
while (t < end && (*t & 0x80)) t++;
u = PyUnicode_DecodeUTF8(s, t - s, errors);
if(!u) goto failed;
/* Recode them in target encoding. */
w = PyUnicode_AsEncodedString(
u, recode_encoding, errors);
Py_DECREF(u);
if (!w) goto failed;
/* Append bytes to output buffer. */
r = PyString_AsString(w);
rn = PyString_Size(w);
memcpy(p, r, rn);
p += rn;
Py_DECREF(w);
s = t;
} else {
*p++ = *s++;
}
#else
*p++ = *s++;
#endif
continue;
}
s++;
switch (*s++) {
/* XXX This assumes ASCII! */
case '\n': break;
case '\\': *p++ = '\\'; break;
case '\'': *p++ = '\''; break;
case '\"': *p++ = '\"'; break;
case 'b': *p++ = '\b'; break;
case 'f': *p++ = '\014'; break; /* FF */
case 't': *p++ = '\t'; break;
case 'n': *p++ = '\n'; break;
case 'r': *p++ = '\r'; break;
case 'v': *p++ = '\013'; break; /* VT */
case 'a': *p++ = '\007'; break; /* BEL, not classic C */
case '0': case '1': case '2': case '3':
case '4': case '5': case '6': case '7':
c = s[-1] - '0';
if ('0' <= *s && *s <= '7') {
c = (c<<3) + *s++ - '0';
if ('0' <= *s && *s <= '7')
c = (c<<3) + *s++ - '0';
}
*p++ = c;
break;
case 'x':
if (isxdigit(Py_CHARMASK(s[0]))
&& isxdigit(Py_CHARMASK(s[1]))) {
unsigned int x = 0;
c = Py_CHARMASK(*s);
s++;
if (isdigit(c))
x = c - '0';
else if (islower(c))
x = 10 + c - 'a';
else
x = 10 + c - 'A';
x = x << 4;
c = Py_CHARMASK(*s);
s++;
if (isdigit(c))
x += c - '0';
else if (islower(c))
x += 10 + c - 'a';
else
x += 10 + c - 'A';
*p++ = x;
break;
}
if (!errors || strcmp(errors, "strict") == 0) {
Py_DECREF(v);
PyErr_SetString(PyExc_ValueError,
"invalid \\x escape");
return NULL;
}
if (strcmp(errors, "replace") == 0) {
*p++ = '?';
} else if (strcmp(errors, "ignore") == 0)
/* do nothing */;
else {
PyErr_Format(PyExc_ValueError,
"decoding error; "
"unknown error handling code: %.400s",
errors);
return NULL;
}
#ifndef Py_USING_UNICODE
case 'u':
case 'U':
case 'N':
if (unicode) {
Py_DECREF(v);
com_error(com, PyExc_ValueError,
"Unicode escapes not legal "
"when Unicode disabled");
return NULL;
}
#endif
default:
*p++ = '\\';
*p++ = s[-1];
break;
}
}
_PyString_Resize(&v, (int)(p - buf));
return v;
failed:
Py_DECREF(v);
return NULL;
}
static int
string_getsize(register PyObject *op)
{
@ -614,9 +760,10 @@ string_print(PyStringObject *op, FILE *fp, int flags)
return 0;
}
static PyObject *
string_repr(register PyStringObject *op)
PyObject *
PyString_Repr(PyObject *obj, int smartquotes)
{
register PyStringObject* op = (PyStringObject*) obj;
size_t newsize = 2 + 4 * op->ob_size * sizeof(char);
PyObject *v;
if (newsize > INT_MAX) {
@ -635,7 +782,8 @@ string_repr(register PyStringObject *op)
/* figure out which quote to use; single is preferred */
quote = '\'';
if (memchr(op->ob_sval, '\'', op->ob_size) &&
if (smartquotes &&
memchr(op->ob_sval, '\'', op->ob_size) &&
!memchr(op->ob_sval, '"', op->ob_size))
quote = '"';
@ -673,6 +821,12 @@ string_repr(register PyStringObject *op)
}
}
static PyObject *
string_repr(PyObject *op)
{
return PyString_Repr(op, 1);
}
static PyObject *
string_str(PyObject *s)
{

View File

@ -1226,9 +1226,7 @@ parsestr(struct compiling *com, char *s)
char *buf;
char *p;
char *end;
int c;
int first = *s;
int quote = first;
int quote = *s;
int rawmode = 0;
char* encoding = ((com == NULL) ? NULL : com->c_encoding);
int need_encoding;
@ -1347,102 +1345,11 @@ parsestr(struct compiling *com, char *s)
return PyString_FromStringAndSize(s, len);
}
}
v = PyString_FromStringAndSize((char *)NULL, /* XXX 4 is enough? */
need_encoding ? len * 4 : len);
v = PyString_DecodeEscape(s, len, NULL, unicode,
need_encoding ? encoding : NULL);
if (v == NULL)
return NULL;
p = buf = PyString_AsString(v);
end = s + len;
while (s < end) {
if (*s != '\\') {
ORDINAL:
if (need_encoding && (*s & 0x80)) {
char *r;
int rn;
PyObject* w = decode_utf8(&s, end, encoding);
if (w == NULL)
return NULL;
r = PyString_AsString(w);
rn = PyString_Size(w);
memcpy(p, r, rn);
p += rn;
Py_DECREF(w);
} else {
*p++ = *s++;
}
continue;
}
s++;
switch (*s++) {
/* XXX This assumes ASCII! */
case '\n': break;
case '\\': *p++ = '\\'; break;
case '\'': *p++ = '\''; break;
case '\"': *p++ = '\"'; break;
case 'b': *p++ = '\b'; break;
case 'f': *p++ = '\014'; break; /* FF */
case 't': *p++ = '\t'; break;
case 'n': *p++ = '\n'; break;
case 'r': *p++ = '\r'; break;
case 'v': *p++ = '\013'; break; /* VT */
case 'a': *p++ = '\007'; break; /* BEL, not classic C */
case '0': case '1': case '2': case '3':
case '4': case '5': case '6': case '7':
c = s[-1] - '0';
if ('0' <= *s && *s <= '7') {
c = (c<<3) + *s++ - '0';
if ('0' <= *s && *s <= '7')
c = (c<<3) + *s++ - '0';
}
*p++ = c;
break;
case 'x':
if (isxdigit(Py_CHARMASK(s[0]))
&& isxdigit(Py_CHARMASK(s[1]))) {
unsigned int x = 0;
c = Py_CHARMASK(*s);
s++;
if (isdigit(c))
x = c - '0';
else if (islower(c))
x = 10 + c - 'a';
else
x = 10 + c - 'A';
x = x << 4;
c = Py_CHARMASK(*s);
s++;
if (isdigit(c))
x += c - '0';
else if (islower(c))
x += 10 + c - 'a';
else
x += 10 + c - 'A';
*p++ = x;
break;
}
Py_DECREF(v);
com_error(com, PyExc_ValueError,
"invalid \\x escape");
return NULL;
#ifndef Py_USING_UNICODE
case 'u':
case 'U':
case 'N':
if (unicode) {
Py_DECREF(v);
com_error(com, PyExc_ValueError,
"Unicode escapes not legal "
"when Unicode disabled");
return NULL;
}
#endif
default:
*p++ = '\\';
s--;
goto ORDINAL;
}
}
_PyString_Resize(&v, (int)(p - buf));
PyErr_SyntaxLocation(com->c_filename, com->c_lineno);
return v;
}