diff --git a/Lib/test/test_parser.py b/Lib/test/test_parser.py index d6e6f715778..70cabb28598 100644 --- a/Lib/test/test_parser.py +++ b/Lib/test/test_parser.py @@ -1,4 +1,6 @@ +import copy import parser +import pickle import unittest import operator import struct @@ -424,6 +426,52 @@ class IllegalSyntaxTestCase(unittest.TestCase): # not even remotely valid: self.check_bad_tree((1, 2, 3), "") + def test_illegal_terminal(self): + tree = \ + (257, + (269, + (270, + (271, + (277, + (1,))), + (4, ''))), + (4, ''), + (0, '')) + self.check_bad_tree(tree, "too small items in terminal node") + tree = \ + (257, + (269, + (270, + (271, + (277, + (1, b'pass'))), + (4, ''))), + (4, ''), + (0, '')) + self.check_bad_tree(tree, "non-string second item in terminal node") + tree = \ + (257, + (269, + (270, + (271, + (277, + (1, 'pass', '0', 0))), + (4, ''))), + (4, ''), + (0, '')) + self.check_bad_tree(tree, "non-integer third item in terminal node") + tree = \ + (257, + (269, + (270, + (271, + (277, + (1, 'pass', 0, 0))), + (4, ''))), + (4, ''), + (0, '')) + self.check_bad_tree(tree, "too many items in terminal node") + def test_illegal_yield_1(self): # Illegal yield statement: def f(): return 1; yield 1 tree = \ @@ -628,6 +676,24 @@ class IllegalSyntaxTestCase(unittest.TestCase): (4, ''), (0, '')) self.check_bad_tree(tree, "from import fred") + def test_illegal_encoding(self): + # Illegal encoding declaration + tree = \ + (339, + (257, (0, ''))) + self.check_bad_tree(tree, "missed encoding") + tree = \ + (339, + (257, (0, '')), + b'iso-8859-1') + self.check_bad_tree(tree, "non-string encoding") + tree = \ + (339, + (257, (0, '')), + '\udcff') + with self.assertRaises(UnicodeEncodeError): + parser.sequence2st(tree) + class CompileTestCase(unittest.TestCase): @@ -772,6 +838,21 @@ class STObjectTestCase(unittest.TestCase): self.assertRaises(TypeError, operator.lt, st1, 1815) self.assertRaises(TypeError, operator.gt, b'waterloo', st2) + def test_copy_pickle(self): + sts = [ + parser.expr('2 + 3'), + parser.suite('x = 2; y = x + 3'), + parser.expr('list(x**3 for x in range(20))') + ] + for st in sts: + st_copy = copy.copy(st) + self.assertEqual(st_copy.totuple(), st.totuple()) + st_copy = copy.deepcopy(st) + self.assertEqual(st_copy.totuple(), st.totuple()) + for proto in range(pickle.HIGHEST_PROTOCOL+1): + st_copy = pickle.loads(pickle.dumps(st, proto)) + self.assertEqual(st_copy.totuple(), st.totuple()) + check_sizeof = support.check_sizeof @support.cpython_only diff --git a/Misc/NEWS b/Misc/NEWS index e57faacf615..3ec02e0e3e8 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -313,6 +313,8 @@ Extension Modules Library ------- +- bpo-30070: Fixed leaks and crashes in errors handling in the parser module. + - bpo-22352: Column widths in the output of dis.dis() are now adjusted for large line numbers and instruction offsets. diff --git a/Modules/parsermodule.c b/Modules/parsermodule.c index e5c4db3048c..929f2deb16c 100644 --- a/Modules/parsermodule.c +++ b/Modules/parsermodule.c @@ -697,7 +697,7 @@ validate_node(node *tree) short a_label = dfa_state->s_arc[arc].a_lbl; assert(a_label < _PyParser_Grammar.g_ll.ll_nlabels); if (_PyParser_Grammar.g_ll.ll_label[a_label].lb_type == ch_type) { - /* The child is acceptable; if non-terminal, validate it recursively. */ + /* The child is acceptable; if non-terminal, validate it recursively. */ if (ISNONTERMINAL(ch_type) && !validate_node(ch)) return 0; @@ -775,32 +775,35 @@ parser_tuple2st(PyST_Object *self, PyObject *args, PyObject *kw) */ tree = build_node_tree(tuple); if (tree != 0) { - node *validation_root = tree; + node *validation_root = NULL; int tree_type = 0; switch (TYPE(tree)) { case eval_input: /* Might be an eval form. */ tree_type = PyST_EXPR; + validation_root = tree; break; case encoding_decl: /* This looks like an encoding_decl so far. */ - if (NCH(tree) != 1) + if (NCH(tree) == 1) { + tree_type = PyST_SUITE; + validation_root = CHILD(tree, 0); + } + else { err_string("Error Parsing encoding_decl"); - validation_root = CHILD(tree, 0); - /* Fall through */ + } + break; case file_input: /* This looks like an exec form so far. */ - tree_type = PyST_SUITE; + validation_root = tree; break; default: /* This is a fragment, at best. */ - PyNode_Free(tree); err_string("parse tree does not use a valid start symbol"); - return (0); } - if (validate_node(validation_root)) + if (validation_root != NULL && validate_node(validation_root)) st = parser_newstobject(tree, tree_type); else PyNode_Free(tree); @@ -830,6 +833,9 @@ build_node_children(PyObject *tuple, node *root, int *line_num) Py_ssize_t i; int err; + if (len < 0) { + return NULL; + } for (i = 1; i < len; ++i) { /* elem must always be a sequence, however simple */ PyObject* elem = PySequence_GetItem(tuple, i); @@ -850,7 +856,7 @@ build_node_children(PyObject *tuple, node *root, int *line_num) if (type == -1 && PyErr_Occurred()) { Py_DECREF(temp); Py_DECREF(elem); - return 0; + return NULL; } } Py_DECREF(temp); @@ -862,7 +868,7 @@ build_node_children(PyObject *tuple, node *root, int *line_num) PyErr_SetObject(parser_error, err); Py_XDECREF(err); Py_XDECREF(elem); - return (0); + return NULL; } if (ISTERMINAL(type)) { Py_ssize_t len = PyObject_Size(elem); @@ -871,11 +877,14 @@ build_node_children(PyObject *tuple, node *root, int *line_num) if ((len != 2) && (len != 3)) { err_string("terminal nodes must have 2 or 3 entries"); - return 0; + Py_DECREF(elem); + return NULL; } temp = PySequence_GetItem(elem, 1); - if (temp == NULL) - return 0; + if (temp == NULL) { + Py_DECREF(elem); + return NULL; + } if (!PyUnicode_Check(temp)) { PyErr_Format(parser_error, "second item in terminal node must be a string," @@ -883,46 +892,49 @@ build_node_children(PyObject *tuple, node *root, int *line_num) Py_TYPE(temp)->tp_name); Py_DECREF(temp); Py_DECREF(elem); - return 0; + return NULL; } if (len == 3) { PyObject *o = PySequence_GetItem(elem, 2); - if (o != NULL) { - if (PyLong_Check(o)) { - int num = _PyLong_AsInt(o); - if (num == -1 && PyErr_Occurred()) { - Py_DECREF(o); - Py_DECREF(temp); - Py_DECREF(elem); - return 0; - } - *line_num = num; - } - else { - PyErr_Format(parser_error, - "third item in terminal node must be an" - " integer, found %s", - Py_TYPE(temp)->tp_name); + if (o == NULL) { + Py_DECREF(temp); + Py_DECREF(elem); + return NULL; + } + if (PyLong_Check(o)) { + int num = _PyLong_AsInt(o); + if (num == -1 && PyErr_Occurred()) { Py_DECREF(o); Py_DECREF(temp); Py_DECREF(elem); - return 0; + return NULL; } - Py_DECREF(o); + *line_num = num; } + else { + PyErr_Format(parser_error, + "third item in terminal node must be an" + " integer, found %s", + Py_TYPE(temp)->tp_name); + Py_DECREF(o); + Py_DECREF(temp); + Py_DECREF(elem); + return NULL; + } + Py_DECREF(o); } temp_str = PyUnicode_AsUTF8AndSize(temp, &len); if (temp_str == NULL) { Py_DECREF(temp); - Py_XDECREF(elem); - return 0; + Py_DECREF(elem); + return NULL; } strn = (char *)PyObject_MALLOC(len + 1); if (strn == NULL) { Py_DECREF(temp); - Py_XDECREF(elem); + Py_DECREF(elem); PyErr_NoMemory(); - return 0; + return NULL; } (void) memcpy(strn, temp_str, len + 1); Py_DECREF(temp); @@ -932,20 +944,21 @@ build_node_children(PyObject *tuple, node *root, int *line_num) * It has to be one or the other; this is an error. * Raise an exception. */ - PyObject *err = Py_BuildValue("os", elem, "unknown node type."); + PyObject *err = Py_BuildValue("Os", elem, "unknown node type."); PyErr_SetObject(parser_error, err); Py_XDECREF(err); - Py_XDECREF(elem); - return (0); + Py_DECREF(elem); + return NULL; } err = PyNode_AddChild(root, type, strn, *line_num, 0); if (err == E_NOMEM) { - Py_XDECREF(elem); + Py_DECREF(elem); PyObject_FREE(strn); - return (node *) PyErr_NoMemory(); + PyErr_NoMemory(); + return NULL; } if (err == E_OVERFLOW) { - Py_XDECREF(elem); + Py_DECREF(elem); PyObject_FREE(strn); PyErr_SetString(PyExc_ValueError, "unsupported number of child nodes"); @@ -956,14 +969,14 @@ build_node_children(PyObject *tuple, node *root, int *line_num) node* new_child = CHILD(root, i - 1); if (new_child != build_node_children(elem, new_child, line_num)) { - Py_XDECREF(elem); - return (0); + Py_DECREF(elem); + return NULL; } } else if (type == NEWLINE) { /* It's true: we increment the */ ++(*line_num); /* line number *after* the newline! */ } - Py_XDECREF(elem); + Py_DECREF(elem); } return root; } @@ -998,10 +1011,23 @@ build_node_tree(PyObject *tuple) if (num == encoding_decl) { encoding = PySequence_GetItem(tuple, 2); + if (encoding == NULL) { + PyErr_SetString(parser_error, "missed encoding"); + return NULL; + } + if (!PyUnicode_Check(encoding)) { + PyErr_Format(parser_error, + "encoding must be a string, found %.200s", + Py_TYPE(encoding)->tp_name); + Py_DECREF(encoding); + return NULL; + } /* tuple isn't borrowed anymore here, need to DECREF */ tuple = PySequence_GetSlice(tuple, 0, 2); - if (tuple == NULL) + if (tuple == NULL) { + Py_DECREF(encoding); return NULL; + } } res = PyNode_New(num); if (res != NULL) { @@ -1014,31 +1040,33 @@ build_node_tree(PyObject *tuple) const char *temp; temp = PyUnicode_AsUTF8AndSize(encoding, &len); if (temp == NULL) { - Py_DECREF(res); + PyNode_Free(res); Py_DECREF(encoding); Py_DECREF(tuple); return NULL; } res->n_str = (char *)PyObject_MALLOC(len + 1); if (res->n_str == NULL) { - Py_DECREF(res); + PyNode_Free(res); Py_DECREF(encoding); Py_DECREF(tuple); PyErr_NoMemory(); return NULL; } (void) memcpy(res->n_str, temp, len + 1); - Py_DECREF(encoding); - Py_DECREF(tuple); } } + if (encoding != NULL) { + Py_DECREF(encoding); + Py_DECREF(tuple); + } } else { /* The tuple is illegal -- if the number is neither TERMINAL nor * NONTERMINAL, we can't use it. Not sure the implementation * allows this condition, but the API doesn't preclude it. */ - PyObject *err = Py_BuildValue("os", tuple, + PyObject *err = Py_BuildValue("Os", tuple, "Illegal component tuple."); PyErr_SetObject(parser_error, err); Py_XDECREF(err); @@ -1073,7 +1101,6 @@ parser__pickler(PyObject *self, PyObject *args) result = Py_BuildValue("O(O)", pickle_constructor, tuple); Py_DECREF(tuple); } - Py_DECREF(empty_dict); Py_DECREF(newargs); } finally: