From e239d23e8cc66605f548585ad4489a8f12fc070d Mon Sep 17 00:00:00 2001 From: Alexander Belopolsky Date: Wed, 8 Dec 2010 23:31:48 +0000 Subject: [PATCH] Issue #6697: Fixed instances of _PyUnicode_AsString() result not checked for NULL --- Lib/test/datetimetester.py | 8 ++++- Lib/test/test_pyexpat.py | 2 ++ Lib/test/test_socket.py | 2 ++ Lib/test/test_syslog.py | 2 ++ Lib/test/test_xml_etree_c.py | 18 +++++++++- Modules/_datetimemodule.c | 3 +- Modules/_elementtree.c | 47 ++++++++++++------------- Modules/_lsprof.c | 9 +++++ Modules/_testcapimodule.c | 17 ++++----- Modules/parsermodule.c | 13 +++++++ Modules/pyexpat.c | 68 +++++++++++++++++++----------------- Modules/socketmodule.c | 8 +++-- Modules/syslogmodule.c | 25 ++++++++----- 13 files changed, 144 insertions(+), 78 deletions(-) diff --git a/Lib/test/datetimetester.py b/Lib/test/datetimetester.py index 48e5095f3d6..dddf0f837d6 100644 --- a/Lib/test/datetimetester.py +++ b/Lib/test/datetimetester.py @@ -2508,11 +2508,17 @@ class TestTimeTZ(TestTime, TZInfoBase, unittest.TestCase): # Check that an invalid tzname result raises an exception. class Badtzname(tzinfo): - def tzname(self, dt): return 42 + tz = 42 + def tzname(self, dt): return self.tz t = time(2, 3, 4, tzinfo=Badtzname()) self.assertEqual(t.strftime("%H:%M:%S"), "02:03:04") self.assertRaises(TypeError, t.strftime, "%Z") + # Issue #6697: + if '_Fast' in str(type(self)): + Badtzname.tz = '\ud800' + self.assertRaises(ValueError, t.strftime, "%Z") + def test_hash_edge_cases(self): # Offsets that overflow a basic time. t1 = self.theclass(0, 1, 2, 3, tzinfo=FixedOffset(1439, "")) diff --git a/Lib/test/test_pyexpat.py b/Lib/test/test_pyexpat.py index 5a193bb32e7..5bb8c9718f0 100644 --- a/Lib/test/test_pyexpat.py +++ b/Lib/test/test_pyexpat.py @@ -203,6 +203,8 @@ class ParseTest(unittest.TestCase): operations = out.out self._verify_parse_output(operations) + # Issue #6697. + self.assertRaises(AttributeError, getattr, parser, '\uD800') def test_parse_file(self): # Try parsing a file diff --git a/Lib/test/test_socket.py b/Lib/test/test_socket.py index 32084daf0e0..6bdb6c919e7 100644 --- a/Lib/test/test_socket.py +++ b/Lib/test/test_socket.py @@ -667,6 +667,8 @@ class GeneralModuleTests(unittest.TestCase): type=socket.SOCK_STREAM, proto=0, flags=socket.AI_PASSIVE) self.assertEqual(a, b) + # Issue #6697. + self.assertRaises(UnicodeEncodeError, socket.getaddrinfo, 'localhost', '\uD800') def test_getnameinfo(self): # only IP addresses are allowed diff --git a/Lib/test/test_syslog.py b/Lib/test/test_syslog.py index 028dcb49aaf..4e7621e5ec2 100644 --- a/Lib/test/test_syslog.py +++ b/Lib/test/test_syslog.py @@ -11,6 +11,8 @@ class Test(unittest.TestCase): def test_openlog(self): syslog.openlog('python') + # Issue #6697. + self.assertRaises(UnicodeEncodeError, syslog.openlog, '\uD800') def test_syslog(self): syslog.openlog('python') diff --git a/Lib/test/test_xml_etree_c.py b/Lib/test/test_xml_etree_c.py index ee270f900dd..5c0bf6cac8d 100644 --- a/Lib/test/test_xml_etree_c.py +++ b/Lib/test/test_xml_etree_c.py @@ -8,10 +8,26 @@ cET = support.import_module('xml.etree.cElementTree') # cElementTree specific tests def sanity(): - """ + r""" Import sanity. >>> from xml.etree import cElementTree + + Issue #6697. + + >>> e = cElementTree.Element('a') + >>> getattr(e, '\uD800') # doctest: +ELLIPSIS + Traceback (most recent call last): + ... + UnicodeEncodeError: ... + + >>> p = cElementTree.XMLParser() + >>> p.version.split()[0] + 'Expat' + >>> getattr(p, '\uD800') + Traceback (most recent call last): + ... + AttributeError: 'XMLParser' object has no attribute '\ud800' """ diff --git a/Modules/_datetimemodule.c b/Modules/_datetimemodule.c index 32fe835a026..16964d8dbe3 100644 --- a/Modules/_datetimemodule.c +++ b/Modules/_datetimemodule.c @@ -1257,7 +1257,8 @@ wrap_strftime(PyObject *object, PyObject *format, PyObject *timetuple, assert(PyUnicode_Check(Zreplacement)); ptoappend = _PyUnicode_AsStringAndSize(Zreplacement, &ntoappend); - ntoappend = Py_SIZE(Zreplacement); + if (ptoappend == NULL) + goto Done; } else if (ch == 'f') { /* format microseconds */ diff --git a/Modules/_elementtree.c b/Modules/_elementtree.c index 4fed46eb6b0..3061d8eaf3f 100644 --- a/Modules/_elementtree.c +++ b/Modules/_elementtree.c @@ -1483,6 +1483,9 @@ element_getattro(ElementObject* self, PyObject* nameobj) if (PyUnicode_Check(nameobj)) name = _PyUnicode_AsString(nameobj); + + if (name == NULL) + return NULL; /* handle common attributes first */ if (strcmp(name, "tag") == 0) { @@ -2139,7 +2142,7 @@ expat_set_error(const char* message, int line, int column) PyObject *position; char buffer[256]; - sprintf(buffer, "%s: line %d, column %d", message, line, column); + sprintf(buffer, "%.100s: line %d, column %d", message, line, column); error = PyObject_CallFunction(elementtree_parseerror_obj, "s", buffer); if (!error) @@ -2194,8 +2197,8 @@ expat_default_handler(XMLParserObject* self, const XML_Char* data_in, Py_XDECREF(res); } else if (!PyErr_Occurred()) { /* Report the first error, not the last */ - char message[128]; - sprintf(message, "undefined entity &%.100s;", _PyUnicode_AsString(key)); + char message[128] = "undefined entity "; + strncat(message, data_in, data_len < 100?data_len:100); expat_set_error( message, EXPAT(GetErrorLineNumber)(self->parser), @@ -2796,29 +2799,25 @@ static PyMethodDef xmlparser_methods[] = { static PyObject* xmlparser_getattro(XMLParserObject* self, PyObject* nameobj) { - PyObject* res; - char *name = ""; - - if (PyUnicode_Check(nameobj)) - name = _PyUnicode_AsString(nameobj); - - PyErr_Clear(); - - if (strcmp(name, "entity") == 0) - res = self->entity; - else if (strcmp(name, "target") == 0) - res = self->target; - else if (strcmp(name, "version") == 0) { - char buffer[100]; - sprintf(buffer, "Expat %d.%d.%d", XML_MAJOR_VERSION, + if (PyUnicode_Check(nameobj)) { + PyObject* res; + if (PyUnicode_CompareWithASCIIString(nameobj, "entity") == 0) + res = self->entity; + else if (PyUnicode_CompareWithASCIIString(nameobj, "target") == 0) + res = self->target; + else if (PyUnicode_CompareWithASCIIString(nameobj, "version") == 0) { + return PyUnicode_FromFormat( + "Expat %d.%d.%d", XML_MAJOR_VERSION, XML_MINOR_VERSION, XML_MICRO_VERSION); - return PyUnicode_DecodeUTF8(buffer, strlen(buffer), "strict"); - } else { - return PyObject_GenericGetAttr((PyObject*) self, nameobj); - } + } + else + goto generic; - Py_INCREF(res); - return res; + Py_INCREF(res); + return res; + } + generic: + return PyObject_GenericGetAttr((PyObject*) self, nameobj); } static PyTypeObject XMLParser_Type = { diff --git a/Modules/_lsprof.c b/Modules/_lsprof.c index 048cfc8b74b..cc412bfc232 100644 --- a/Modules/_lsprof.c +++ b/Modules/_lsprof.c @@ -178,7 +178,16 @@ normalizeUserObj(PyObject *obj) PyObject *mod = fn->m_module; const char *modname; if (mod && PyUnicode_Check(mod)) { + /* XXX: The following will truncate module names with embedded + * null-characters. It is unlikely that this can happen in + * practice and the concequences are not serious enough to + * introduce extra checks here. + */ modname = _PyUnicode_AsString(mod); + if (modname == NULL) { + modname = ""; + PyErr_Clear(); + } } else if (mod && PyModule_Check(mod)) { modname = PyModule_GetName(mod); diff --git a/Modules/_testcapimodule.c b/Modules/_testcapimodule.c index 42d0bcd4647..f0c07aec455 100644 --- a/Modules/_testcapimodule.c +++ b/Modules/_testcapimodule.c @@ -1741,15 +1741,16 @@ test_string_from_format(PyObject *self, PyObject *args) { PyObject *result; char *msg; + static const Py_UNICODE one[] = {'1', 0}; -#define CHECK_1_FORMAT(FORMAT, TYPE) \ - result = PyUnicode_FromFormat(FORMAT, (TYPE)1); \ - if (result == NULL) \ - return NULL; \ - if (strcmp(_PyUnicode_AsString(result), "1")) { \ - msg = FORMAT " failed at 1"; \ - goto Fail; \ - } \ +#define CHECK_1_FORMAT(FORMAT, TYPE) \ + result = PyUnicode_FromFormat(FORMAT, (TYPE)1); \ + if (result == NULL) \ + return NULL; \ + if (Py_UNICODE_strcmp(PyUnicode_AS_UNICODE(result), one)) { \ + msg = FORMAT " failed at 1"; \ + goto Fail; \ + } \ Py_DECREF(result) CHECK_1_FORMAT("%d", int); diff --git a/Modules/parsermodule.c b/Modules/parsermodule.c index 27b680fd1a8..e9593213815 100644 --- a/Modules/parsermodule.c +++ b/Modules/parsermodule.c @@ -792,6 +792,11 @@ build_node_children(PyObject *tuple, node *root, int *line_num) } } temp_str = _PyUnicode_AsStringAndSize(temp, &len); + if (temp_str == NULL) { + Py_DECREF(temp); + Py_XDECREF(elem); + return 0; + } strn = (char *)PyObject_MALLOC(len + 1); if (strn != NULL) (void) memcpy(strn, temp_str, len + 1); @@ -870,6 +875,8 @@ build_node_tree(PyObject *tuple) encoding = PySequence_GetItem(tuple, 2); /* tuple isn't borrowed anymore here, need to DECREF */ tuple = PySequence_GetSlice(tuple, 0, 2); + if (tuple == NULL) + return NULL; } res = PyNode_New(num); if (res != NULL) { @@ -881,6 +888,12 @@ build_node_tree(PyObject *tuple) Py_ssize_t len; const char *temp; temp = _PyUnicode_AsStringAndSize(encoding, &len); + if (temp == NULL) { + Py_DECREF(res); + Py_DECREF(encoding); + Py_DECREF(tuple); + return NULL; + } res->n_str = (char *)PyObject_MALLOC(len + 1); if (res->n_str != NULL && temp != NULL) (void) memcpy(res->n_str, temp, len + 1); diff --git a/Modules/pyexpat.c b/Modules/pyexpat.c index 5aadcc01e15..1a00347ed7e 100644 --- a/Modules/pyexpat.c +++ b/Modules/pyexpat.c @@ -1215,11 +1215,12 @@ xmlparse_dealloc(xmlparseobject *self) } static int -handlername2int(const char *name) +handlername2int(PyObject *name) { int i; for (i = 0; handler_info[i].name != NULL; i++) { - if (strcmp(name, handler_info[i].name) == 0) { + if (PyUnicode_CompareWithASCIIString( + name, handler_info[i].name) == 0) { return i; } } @@ -1237,13 +1238,13 @@ get_pybool(int istrue) static PyObject * xmlparse_getattro(xmlparseobject *self, PyObject *nameobj) { - char *name = ""; + Py_UNICODE *name; int handlernum = -1; - if (PyUnicode_Check(nameobj)) - name = _PyUnicode_AsString(nameobj); + if (!PyUnicode_Check(nameobj)) + goto generic; - handlernum = handlername2int(name); + handlernum = handlername2int(nameobj); if (handlernum != -1) { PyObject *result = self->handlers[handlernum]; @@ -1252,46 +1253,48 @@ xmlparse_getattro(xmlparseobject *self, PyObject *nameobj) Py_INCREF(result); return result; } + + name = PyUnicode_AS_UNICODE(nameobj); if (name[0] == 'E') { - if (strcmp(name, "ErrorCode") == 0) + if (PyUnicode_CompareWithASCIIString(nameobj, "ErrorCode") == 0) return PyLong_FromLong((long) XML_GetErrorCode(self->itself)); - if (strcmp(name, "ErrorLineNumber") == 0) + if (PyUnicode_CompareWithASCIIString(nameobj, "ErrorLineNumber") == 0) return PyLong_FromLong((long) XML_GetErrorLineNumber(self->itself)); - if (strcmp(name, "ErrorColumnNumber") == 0) + if (PyUnicode_CompareWithASCIIString(nameobj, "ErrorColumnNumber") == 0) return PyLong_FromLong((long) XML_GetErrorColumnNumber(self->itself)); - if (strcmp(name, "ErrorByteIndex") == 0) + if (PyUnicode_CompareWithASCIIString(nameobj, "ErrorByteIndex") == 0) return PyLong_FromLong((long) XML_GetErrorByteIndex(self->itself)); } if (name[0] == 'C') { - if (strcmp(name, "CurrentLineNumber") == 0) + if (PyUnicode_CompareWithASCIIString(nameobj, "CurrentLineNumber") == 0) return PyLong_FromLong((long) XML_GetCurrentLineNumber(self->itself)); - if (strcmp(name, "CurrentColumnNumber") == 0) + if (PyUnicode_CompareWithASCIIString(nameobj, "CurrentColumnNumber") == 0) return PyLong_FromLong((long) XML_GetCurrentColumnNumber(self->itself)); - if (strcmp(name, "CurrentByteIndex") == 0) + if (PyUnicode_CompareWithASCIIString(nameobj, "CurrentByteIndex") == 0) return PyLong_FromLong((long) XML_GetCurrentByteIndex(self->itself)); } if (name[0] == 'b') { - if (strcmp(name, "buffer_size") == 0) + if (PyUnicode_CompareWithASCIIString(nameobj, "buffer_size") == 0) return PyLong_FromLong((long) self->buffer_size); - if (strcmp(name, "buffer_text") == 0) + if (PyUnicode_CompareWithASCIIString(nameobj, "buffer_text") == 0) return get_pybool(self->buffer != NULL); - if (strcmp(name, "buffer_used") == 0) + if (PyUnicode_CompareWithASCIIString(nameobj, "buffer_used") == 0) return PyLong_FromLong((long) self->buffer_used); } - if (strcmp(name, "namespace_prefixes") == 0) + if (PyUnicode_CompareWithASCIIString(nameobj, "namespace_prefixes") == 0) return get_pybool(self->ns_prefixes); - if (strcmp(name, "ordered_attributes") == 0) + if (PyUnicode_CompareWithASCIIString(nameobj, "ordered_attributes") == 0) return get_pybool(self->ordered_attributes); - if (strcmp(name, "specified_attributes") == 0) + if (PyUnicode_CompareWithASCIIString(nameobj, "specified_attributes") == 0) return get_pybool((long) self->specified_attributes); - if (strcmp(name, "intern") == 0) { + if (PyUnicode_CompareWithASCIIString(nameobj, "intern") == 0) { if (self->intern == NULL) { Py_INCREF(Py_None); return Py_None; @@ -1301,7 +1304,7 @@ xmlparse_getattro(xmlparseobject *self, PyObject *nameobj) return self->intern; } } - + generic: return PyObject_GenericGetAttr((PyObject*)self, nameobj); } @@ -1352,7 +1355,7 @@ xmlparse_dir(PyObject *self, PyObject* noargs) } static int -sethandler(xmlparseobject *self, const char *name, PyObject* v) +sethandler(xmlparseobject *self, PyObject *name, PyObject* v) { int handlernum = handlername2int(name); if (handlernum >= 0) { @@ -1388,14 +1391,15 @@ sethandler(xmlparseobject *self, const char *name, PyObject* v) } static int -xmlparse_setattr(xmlparseobject *self, char *name, PyObject *v) +xmlparse_setattro(xmlparseobject *self, PyObject *name, PyObject *v) { /* Set attribute 'name' to value 'v'. v==NULL means delete */ if (v == NULL) { PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute"); return -1; } - if (strcmp(name, "buffer_text") == 0) { + assert(PyUnicode_Check(name)); + if (PyUnicode_CompareWithASCIIString(name, "buffer_text") == 0) { if (PyObject_IsTrue(v)) { if (self->buffer == NULL) { self->buffer = malloc(self->buffer_size); @@ -1414,7 +1418,7 @@ xmlparse_setattr(xmlparseobject *self, char *name, PyObject *v) } return 0; } - if (strcmp(name, "namespace_prefixes") == 0) { + if (PyUnicode_CompareWithASCIIString(name, "namespace_prefixes") == 0) { if (PyObject_IsTrue(v)) self->ns_prefixes = 1; else @@ -1422,14 +1426,14 @@ xmlparse_setattr(xmlparseobject *self, char *name, PyObject *v) XML_SetReturnNSTriplet(self->itself, self->ns_prefixes); return 0; } - if (strcmp(name, "ordered_attributes") == 0) { + if (PyUnicode_CompareWithASCIIString(name, "ordered_attributes") == 0) { if (PyObject_IsTrue(v)) self->ordered_attributes = 1; else self->ordered_attributes = 0; return 0; } - if (strcmp(name, "specified_attributes") == 0) { + if (PyUnicode_CompareWithASCIIString(name, "specified_attributes") == 0) { if (PyObject_IsTrue(v)) self->specified_attributes = 1; else @@ -1437,7 +1441,7 @@ xmlparse_setattr(xmlparseobject *self, char *name, PyObject *v) return 0; } - if (strcmp(name, "buffer_size") == 0) { + if (PyUnicode_CompareWithASCIIString(name, "buffer_size") == 0) { long new_buffer_size; if (!PyLong_Check(v)) { PyErr_SetString(PyExc_TypeError, "buffer_size must be an integer"); @@ -1480,7 +1484,7 @@ xmlparse_setattr(xmlparseobject *self, char *name, PyObject *v) return 0; } - if (strcmp(name, "CharacterDataHandler") == 0) { + if (PyUnicode_CompareWithASCIIString(name, "CharacterDataHandler") == 0) { /* If we're changing the character data handler, flush all * cached data with the old handler. Not sure there's a * "right" thing to do, though, but this probably won't @@ -1492,7 +1496,7 @@ xmlparse_setattr(xmlparseobject *self, char *name, PyObject *v) if (sethandler(self, name, v)) { return 0; } - PyErr_SetString(PyExc_AttributeError, name); + PyErr_SetObject(PyExc_AttributeError, name); return -1; } @@ -1524,7 +1528,7 @@ static PyTypeObject Xmlparsetype = { (destructor)xmlparse_dealloc, /*tp_dealloc*/ (printfunc)0, /*tp_print*/ 0, /*tp_getattr*/ - (setattrfunc)xmlparse_setattr, /*tp_setattr*/ + 0, /*tp_setattr*/ 0, /*tp_reserved*/ (reprfunc)0, /*tp_repr*/ 0, /*tp_as_number*/ @@ -1534,7 +1538,7 @@ static PyTypeObject Xmlparsetype = { (ternaryfunc)0, /*tp_call*/ (reprfunc)0, /*tp_str*/ (getattrofunc)xmlparse_getattro, /* tp_getattro */ - 0, /* tp_setattro */ + (setattrofunc)xmlparse_setattro, /* tp_setattro */ 0, /* tp_as_buffer */ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /*tp_flags*/ Xmlparsetype__doc__, /* tp_doc - Documentation string */ diff --git a/Modules/socketmodule.c b/Modules/socketmodule.c index abdd1b2b47a..327e4709246 100644 --- a/Modules/socketmodule.c +++ b/Modules/socketmodule.c @@ -1406,9 +1406,9 @@ getsockaddrarg(PySocketSockObject *s, PyObject *args, { struct sockaddr_hci *addr = (struct sockaddr_hci *)addr_ret; #if defined(__NetBSD__) || defined(__DragonFly__) - char *straddr = PyBytes_AS_STRING(args); + char *straddr = PyBytes_AS_STRING(args); - _BT_HCI_MEMB(addr, family) = AF_BLUETOOTH; + _BT_HCI_MEMB(addr, family) = AF_BLUETOOTH; if (straddr == NULL) { PyErr_SetString(socket_error, "getsockaddrarg: " "wrong format"); @@ -4022,8 +4022,10 @@ socket_getaddrinfo(PyObject *self, PyObject *args, PyObject* kwargs) pptr = pbuf; } else if (PyUnicode_Check(pobj)) { pptr = _PyUnicode_AsString(pobj); + if (pptr == NULL) + goto err; } else if (PyBytes_Check(pobj)) { - pptr = PyBytes_AsString(pobj); + pptr = PyBytes_AS_STRING(pobj); } else if (pobj == Py_None) { pptr = (char *)NULL; } else { diff --git a/Modules/syslogmodule.c b/Modules/syslogmodule.c index 1842dc95e8a..5b86963fc8f 100644 --- a/Modules/syslogmodule.c +++ b/Modules/syslogmodule.c @@ -68,9 +68,9 @@ syslog_get_argv(void) * is optional. */ - Py_ssize_t argv_len; + Py_ssize_t argv_len, scriptlen; PyObject *scriptobj; - char *atslash; + Py_UNICODE *atslash, *atstart; PyObject *argv = PySys_GetObject("argv"); if (argv == NULL) { @@ -90,13 +90,16 @@ syslog_get_argv(void) if (!PyUnicode_Check(scriptobj)) { return(NULL); } - if (PyUnicode_GET_SIZE(scriptobj) == 0) { + scriptlen = PyUnicode_GET_SIZE(scriptobj); + if (scriptlen == 0) { return(NULL); } - atslash = strrchr(_PyUnicode_AsString(scriptobj), SEP); + atstart = PyUnicode_AS_UNICODE(scriptobj); + atslash = Py_UNICODE_strrchr(atstart, SEP); if (atslash) { - return(PyUnicode_FromString(atslash + 1)); + return(PyUnicode_FromUnicode(atslash + 1, + scriptlen - (atslash - atstart) - 1)); } else { Py_INCREF(scriptobj); return(scriptobj); @@ -113,6 +116,7 @@ syslog_openlog(PyObject * self, PyObject * args, PyObject *kwds) long facility = LOG_USER; PyObject *new_S_ident_o = NULL; static char *keywords[] = {"ident", "logoption", "facility", 0}; + char *ident = NULL; if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Ull:openlog", keywords, &new_S_ident_o, &logopt, &facility)) @@ -120,12 +124,12 @@ syslog_openlog(PyObject * self, PyObject * args, PyObject *kwds) if (new_S_ident_o) { Py_INCREF(new_S_ident_o); - } + } /* get sys.argv[0] or NULL if we can't for some reason */ if (!new_S_ident_o) { new_S_ident_o = syslog_get_argv(); - } + } Py_XDECREF(S_ident_o); S_ident_o = new_S_ident_o; @@ -134,8 +138,13 @@ syslog_openlog(PyObject * self, PyObject * args, PyObject *kwds) * make a copy, and syslog(3) later uses it. We can't garbagecollect it * If NULL, just let openlog figure it out (probably using C argv[0]). */ + if (S_ident_o) { + ident = _PyUnicode_AsString(S_ident_o); + if (ident == NULL) + return NULL; + } - openlog(S_ident_o ? _PyUnicode_AsString(S_ident_o) : NULL, logopt, facility); + openlog(ident, logopt, facility); S_log_open = 1; Py_INCREF(Py_None);