bpo-47084: Clear Unicode cached representations on finalization (GH-32032)

This commit is contained in:
Jeremy Kloth 2022-03-22 06:53:51 -06:00 committed by GitHub
parent 7d810b6a4e
commit 88872a29f1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 78 additions and 18 deletions

View File

@ -18,6 +18,7 @@ extern PyStatus _PyUnicode_InitGlobalObjects(PyInterpreterState *);
extern PyStatus _PyUnicode_InitTypes(PyInterpreterState *);
extern void _PyUnicode_Fini(PyInterpreterState *);
extern void _PyUnicode_FiniTypes(PyInterpreterState *);
extern void _PyStaticUnicode_Dealloc(PyObject *);
/* other API */

View File

@ -1,5 +1,14 @@
initialized = True
class TestFrozenUtf8_1:
"""\u00b6"""
class TestFrozenUtf8_2:
"""\u03c0"""
class TestFrozenUtf8_4:
"""\U0001f600"""
def main():
print("Hello world!")

View File

@ -1645,24 +1645,29 @@ class MiscTests(EmbeddingTestsMixin, unittest.TestCase):
'-X showrefcount requires a Python debug build')
def test_no_memleak(self):
# bpo-1635741: Python must release all memory at exit
cmd = [sys.executable, "-I", "-X", "showrefcount", "-c", "pass"]
proc = subprocess.run(cmd,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
text=True)
self.assertEqual(proc.returncode, 0)
out = proc.stdout.rstrip()
match = re.match(r'^\[(-?\d+) refs, (-?\d+) blocks\]', out)
if not match:
self.fail(f"unexpected output: {out!a}")
refs = int(match.group(1))
blocks = int(match.group(2))
self.assertEqual(refs, 0, out)
if not MS_WINDOWS:
self.assertEqual(blocks, 0, out)
else:
# bpo-46857: on Windows, Python still leaks 1 memory block at exit
self.assertIn(blocks, (0, 1), out)
tests = (
('off', 'pass'),
('on', 'pass'),
('off', 'import __hello__'),
('on', 'import __hello__'),
)
for flag, stmt in tests:
xopt = f"frozen_modules={flag}"
cmd = [sys.executable, "-I", "-X", "showrefcount", "-X", xopt, "-c", stmt]
proc = subprocess.run(cmd,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
text=True)
self.assertEqual(proc.returncode, 0)
out = proc.stdout.rstrip()
match = re.match(r'^\[(-?\d+) refs, (-?\d+) blocks\]', out)
if not match:
self.fail(f"unexpected output: {out!a}")
refs = int(match.group(1))
blocks = int(match.group(2))
with self.subTest(frozen_modules=flag, stmt=stmt):
self.assertEqual(refs, 0, out)
self.assertEqual(blocks, 0, out)
class StdPrinterTests(EmbeddingTestsMixin, unittest.TestCase):

View File

@ -16057,6 +16057,35 @@ _PyUnicode_FiniTypes(PyInterpreterState *interp)
}
static void unicode_static_dealloc(PyObject *op)
{
PyASCIIObject* ascii = (PyASCIIObject*)op;
assert(ascii->state.compact);
if (ascii->state.ascii) {
if (ascii->wstr) {
PyObject_Free(ascii->wstr);
ascii->wstr = NULL;
}
}
else {
PyCompactUnicodeObject* compact = (PyCompactUnicodeObject*)op;
void* data = (void*)(compact + 1);
if (ascii->wstr && ascii->wstr != data) {
PyObject_Free(ascii->wstr);
ascii->wstr = NULL;
compact->wstr_length = 0;
}
if (compact->utf8) {
PyObject_Free(compact->utf8);
compact->utf8 = NULL;
compact->utf8_length = 0;
}
}
}
void
_PyUnicode_Fini(PyInterpreterState *interp)
{
@ -16070,6 +16099,21 @@ _PyUnicode_Fini(PyInterpreterState *interp)
_PyUnicode_FiniEncodings(&state->fs_codec);
unicode_clear_identifiers(state);
// Clear the single character singletons
for (int i = 0; i < 128; i++) {
unicode_static_dealloc((PyObject*)&_Py_SINGLETON(strings).ascii[i]);
}
for (int i = 0; i < 128; i++) {
unicode_static_dealloc((PyObject*)&_Py_SINGLETON(strings).latin1[i]);
}
}
void
_PyStaticUnicode_Dealloc(PyObject *op)
{
unicode_static_dealloc(op);
}

View File

@ -185,6 +185,7 @@ class Printer:
else:
self.write("PyCompactUnicodeObject _compact;")
self.write(f"{datatype} _data[{len(s)+1}];")
self.deallocs.append(f"_PyStaticUnicode_Dealloc((PyObject *)&{name});")
with self.block(f"{name} =", ";"):
if ascii:
with self.block("._ascii =", ","):