Issue #28350: String constants with null character no longer interned.

This commit is contained in:
Serhiy Storchaka 2016-10-04 18:21:25 +03:00
commit e81b0d335b
3 changed files with 27 additions and 11 deletions

View File

@ -135,19 +135,27 @@ class CodeTest(unittest.TestCase):
self.assertEqual(co.co_name, "funcname")
self.assertEqual(co.co_firstlineno, 15)
def isinterned(s):
return s is sys.intern(('_' + s + '_')[1:-1])
class CodeConstsTest(unittest.TestCase):
def find_const(self, consts, value):
for v in consts:
if v == value:
return v
self.assertIn(value, consts) # rises an exception
self.fail('Should be never reached')
self.assertIn(value, consts) # raises an exception
self.fail('Should never be reached')
def assertIsInterned(self, s):
if s is not sys.intern(s):
if not isinterned(s):
self.fail('String %r is not interned' % (s,))
def assertIsNotInterned(self, s):
if isinterned(s):
self.fail('String %r is interned' % (s,))
@cpython_only
def test_interned_string(self):
co = compile('res = "str_value"', '?', 'exec')
@ -172,6 +180,12 @@ class CodeConstsTest(unittest.TestCase):
return a
self.assertIsInterned(f())
@cpython_only
def test_interned_string_with_null(self):
co = compile(r'res = "str\0value!"', '?', 'exec')
v = self.find_const(co.co_consts, 'str\0value!')
self.assertIsNotInterned(v)
class CodeWeakRefTest(unittest.TestCase):

View File

@ -10,6 +10,8 @@ What's New in Python 3.6.0 beta 2
Core and Builtins
-----------------
- Issue #28350: String constants with null character no longer interned.
- Issue #26617: Fix crash when GC runs during weakref callbacks.
- Issue #27942: String constants now interned recursively in tuples and frozensets.

View File

@ -19,21 +19,21 @@ static int
all_name_chars(PyObject *o)
{
static char ok_name_char[256];
static unsigned char *name_chars = (unsigned char *)NAME_CHARS;
PyUnicodeObject *u = (PyUnicodeObject *)o;
const unsigned char *s;
static const unsigned char *name_chars = (unsigned char *)NAME_CHARS;
const unsigned char *s, *e;
if (!PyUnicode_Check(o) || PyUnicode_READY(u) == -1 ||
PyUnicode_MAX_CHAR_VALUE(u) >= 128)
if (!PyUnicode_Check(o) || PyUnicode_READY(o) == -1 ||
!PyUnicode_IS_ASCII(o))
return 0;
if (ok_name_char[*name_chars] == 0) {
unsigned char *p;
const unsigned char *p;
for (p = name_chars; *p; p++)
ok_name_char[*p] = 1;
}
s = PyUnicode_1BYTE_DATA(u);
while (*s) {
s = PyUnicode_1BYTE_DATA(o);
e = s + PyUnicode_GET_LENGTH(o);
while (s != e) {
if (ok_name_char[*s++] == 0)
return 0;
}