bpo-38187: Fix a refleak in Tools/c-analyzer. (gh-16304)

The "Slot" helper (descriptor) is leaking references due to its caching mechanism. The change includes a partial fix to Slot, but also adds Variable.storage to replace the problematic use of Slot.

https://bugs.python.org/issue38187
This commit is contained in:
Eric Snow 2019-09-27 15:53:34 +01:00 committed by GitHub
parent 9055815809
commit 6693f730e0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 208 additions and 89 deletions

View File

@ -0,0 +1,6 @@
import os.path
from test.support import load_package_tests
def load_tests(*args):
return load_package_tests(os.path.dirname(__file__), *args)

View File

@ -15,9 +15,6 @@ class FromFileTests(unittest.TestCase):
_return_read_tsv = ()
def tearDown(self):
Variable._isglobal.instances.clear()
@property
def calls(self):
try:

View File

@ -0,0 +1,6 @@
import os.path
from test.support import load_package_tests
def load_tests(*args):
return load_package_tests(os.path.dirname(__file__), *args)

View File

@ -64,7 +64,9 @@ class StaticsFromBinaryTests(_Base):
**self.kwargs))
self.assertEqual(found, [
info.Variable.from_parts('dir1/spam.c', None, 'var1', 'int'),
info.Variable.from_parts('dir1/spam.c', None, 'var2', 'static int'),
info.Variable.from_parts('dir1/spam.c', None, 'var3', 'char *'),
info.Variable.from_parts('dir1/eggs.c', None, 'var1', 'static int'),
info.Variable.from_parts('dir1/eggs.c', 'func1', 'var2', 'static char *'),
])
@ -299,7 +301,7 @@ class StaticsTest(_Base):
info.Variable.from_parts('src1/spam.c', None, 'var1', 'static const char *'),
info.Variable.from_parts('src1/spam.c', None, 'var1b', 'const char *'),
info.Variable.from_parts('src1/spam.c', 'ham', 'initialized', 'static int'),
info.Variable.from_parts('src1/spam.c', 'ham', 'result', 'int'),
info.Variable.from_parts('src1/spam.c', 'ham', 'result', 'int'), # skipped
info.Variable.from_parts('src1/spam.c', None, 'var2', 'static PyObject *'),
info.Variable.from_parts('src1/eggs.c', 'tofu', 'ready', 'static int'),
info.Variable.from_parts('src1/spam.c', None, 'freelist', 'static (PyTupleObject *)[10]'),
@ -318,6 +320,7 @@ class StaticsTest(_Base):
self.assertEqual(found, [
info.Variable.from_parts('src1/spam.c', None, 'var1', 'static const char *'),
info.Variable.from_parts('src1/spam.c', None, 'var1b', 'const char *'),
info.Variable.from_parts('src1/spam.c', 'ham', 'initialized', 'static int'),
info.Variable.from_parts('src1/spam.c', None, 'var2', 'static PyObject *'),
info.Variable.from_parts('src1/eggs.c', 'tofu', 'ready', 'static int'),

View File

@ -0,0 +1,6 @@
import os.path
from test.support import load_package_tests
def load_tests(*args):
return load_package_tests(os.path.dirname(__file__), *args)

View File

@ -4,7 +4,7 @@ import unittest
from ..util import PseudoStr, StrProxy, Object
from .. import tool_imports_for_tests
with tool_imports_for_tests():
from c_analyzer_common.info import ID
from c_analyzer_common.info import ID, UNKNOWN
from c_parser.info import (
normalize_vartype, Variable,
)
@ -31,38 +31,47 @@ class VariableTests(unittest.TestCase):
VALID_ARGS = (
('x/y/z/spam.c', 'func', 'eggs'),
'static',
'int',
)
VALID_KWARGS = dict(zip(Variable._fields, VALID_ARGS))
VALID_EXPECTED = VALID_ARGS
def test_init_typical_global(self):
static = Variable(
id=ID(
filename='x/y/z/spam.c',
funcname=None,
name='eggs',
),
vartype='int',
)
for storage in ('static', 'extern', 'implicit'):
with self.subTest(storage):
static = Variable(
id=ID(
filename='x/y/z/spam.c',
funcname=None,
name='eggs',
),
storage=storage,
vartype='int',
)
self.assertEqual(static, (
('x/y/z/spam.c', None, 'eggs'),
'int',
))
self.assertEqual(static, (
('x/y/z/spam.c', None, 'eggs'),
storage,
'int',
))
def test_init_typical_local(self):
static = Variable(
id=ID(
filename='x/y/z/spam.c',
funcname='func',
name='eggs',
),
vartype='int',
)
for storage in ('static', 'local'):
with self.subTest(storage):
static = Variable(
id=ID(
filename='x/y/z/spam.c',
funcname='func',
name='eggs',
),
storage=storage,
vartype='int',
)
self.assertEqual(static, (
('x/y/z/spam.c', 'func', 'eggs'),
storage,
'int',
))
@ -71,10 +80,12 @@ class VariableTests(unittest.TestCase):
with self.subTest(repr(value)):
static = Variable(
id=value,
storage=value,
vartype=value,
)
self.assertEqual(static, (
None,
None,
None,
))
@ -89,34 +100,42 @@ class VariableTests(unittest.TestCase):
PseudoStr('func'),
PseudoStr('spam'),
),
storage=PseudoStr('static'),
vartype=PseudoStr('int'),
),
(id,
'static',
'int',
)),
('non-str 1',
dict(
id=id,
storage=Object(),
vartype=Object(),
),
(id,
'<object>',
'<object>',
)),
('non-str 2',
dict(
id=id,
storage=StrProxy('static'),
vartype=StrProxy('variable'),
),
(id,
'static',
'variable',
)),
('non-str',
dict(
id=id,
vartype=('a', 'b', 'c'),
storage=('a', 'b', 'c'),
vartype=('x', 'y', 'z'),
),
(id,
"('a', 'b', 'c')",
"('x', 'y', 'z')",
)),
]
for summary, kwargs, expected in tests:
@ -134,36 +153,43 @@ class VariableTests(unittest.TestCase):
def test_iterable(self):
static = Variable(**self.VALID_KWARGS)
id, vartype = static
id, storage, vartype = static
values = (id, vartype)
values = (id, storage, vartype)
for value, expected in zip(values, self.VALID_EXPECTED):
self.assertEqual(value, expected)
def test_fields(self):
static = Variable(('a', 'b', 'z'), 'x')
static = Variable(('a', 'b', 'z'), 'x', 'y')
self.assertEqual(static.id, ('a', 'b', 'z'))
self.assertEqual(static.vartype, 'x')
self.assertEqual(static.storage, 'x')
self.assertEqual(static.vartype, 'y')
def test___getattr__(self):
static = Variable(('a', 'b', 'z'), 'x')
static = Variable(('a', 'b', 'z'), 'x', 'y')
self.assertEqual(static.filename, 'a')
self.assertEqual(static.funcname, 'b')
self.assertEqual(static.name, 'z')
def test_validate_typical(self):
static = Variable(
id=ID(
filename='x/y/z/spam.c',
funcname='func',
name='eggs',
),
vartype='int',
)
validstorage = ('static', 'extern', 'implicit', 'local')
self.assertEqual(set(validstorage), set(Variable.STORAGE))
static.validate() # This does not fail.
for storage in validstorage:
with self.subTest(storage):
static = Variable(
id=ID(
filename='x/y/z/spam.c',
funcname='func',
name='eggs',
),
storage=storage,
vartype='int',
)
static.validate() # This does not fail.
def test_validate_missing_field(self):
for field in Variable._fields:
@ -171,6 +197,13 @@ class VariableTests(unittest.TestCase):
static = Variable(**self.VALID_KWARGS)
static = static._replace(**{field: None})
with self.assertRaises(TypeError):
static.validate()
for field in ('storage', 'vartype'):
with self.subTest(field):
static = Variable(**self.VALID_KWARGS)
static = static._replace(**{field: UNKNOWN})
with self.assertRaises(TypeError):
static.validate()
@ -185,6 +218,7 @@ class VariableTests(unittest.TestCase):
) + badch
tests = [
('id', ()), # Any non-empty str is okay.
('storage', ('external', 'global') + notnames),
('vartype', ()), # Any non-empty str is okay.
]
seen = set()
@ -199,6 +233,8 @@ class VariableTests(unittest.TestCase):
static.validate()
for field, invalid in tests:
if field == 'id':
continue
valid = seen - set(invalid)
for value in valid:
with self.subTest(f'{field}={value!r}'):

View File

@ -0,0 +1,6 @@
import os.path
from test.support import load_package_tests
def load_tests(*args):
return load_package_tests(os.path.dirname(__file__), *args)

View File

@ -262,7 +262,7 @@ def _known(symbol):
raise
if symbol.name not in decl:
decl = decl + symbol.name
return Variable(varid, decl)
return Variable(varid, 'static', decl)
def known_row(varid, decl):
@ -291,7 +291,7 @@ def known_rows(symbols, *,
except KeyError:
found = _find_match(symbol, cache, filenames)
if found is None:
found = Variable(symbol.id, UNKNOWN)
found = Variable(symbol.id, UNKNOWN, UNKNOWN)
yield _as_known(found.id, found.vartype)
else:
raise NotImplementedError # XXX incorporate KNOWN

View File

@ -34,34 +34,41 @@ def from_file(infile, *,
id = ID(filename, funcname, name)
if kind == 'variable':
values = known['variables']
value = Variable(id, declaration)
value._isglobal = _is_global(declaration) or id.funcname is None
if funcname:
storage = _get_storage(declaration) or 'local'
else:
storage = _get_storage(declaration) or 'implicit'
value = Variable(id, storage, declaration)
else:
raise ValueError(f'unsupported kind in row {row}')
if value.name == 'id' and declaration == UNKNOWN:
# None of these are variables.
declaration = 'int id';
else:
value.validate()
value.validate()
# if value.name == 'id' and declaration == UNKNOWN:
# # None of these are variables.
# declaration = 'int id';
# else:
# value.validate()
values[id] = value
return known
def _is_global(vartype):
def _get_storage(decl):
# statics
if vartype.startswith('static '):
return True
if vartype.startswith(('Py_LOCAL(', 'Py_LOCAL_INLINE(')):
return True
if vartype.startswith(('_Py_IDENTIFIER(', '_Py_static_string(')):
return True
if vartype.startswith('PyDoc_VAR('):
return True
if vartype.startswith(('SLOT1BINFULL(', 'SLOT1BIN(')):
return True
if vartype.startswith('WRAP_METHOD('):
return True
if decl.startswith('static '):
return 'static'
if decl.startswith(('Py_LOCAL(', 'Py_LOCAL_INLINE(')):
return 'static'
if decl.startswith(('_Py_IDENTIFIER(', '_Py_static_string(')):
return 'static'
if decl.startswith('PyDoc_VAR('):
return 'static'
if decl.startswith(('SLOT1BINFULL(', 'SLOT1BIN(')):
return 'static'
if decl.startswith('WRAP_METHOD('):
return 'static'
# public extern
if vartype.startswith('PyAPI_DATA('):
return True
return False
if decl.startswith('extern '):
return 'extern'
if decl.startswith('PyAPI_DATA('):
return 'extern'
# implicit or local
return None

View File

@ -82,6 +82,13 @@ class Slot:
self.default = default
self.readonly = readonly
# The instance cache is not inherently tied to the normal
# lifetime of the instances. So must do something in order to
# avoid keeping the instances alive by holding a reference here.
# Ideally we would use weakref.WeakValueDictionary to do this.
# However, most builtin types do not support weakrefs. So
# instead we monkey-patch __del__ on the attached class to clear
# the instance.
self.instances = {}
self.name = None
@ -89,6 +96,12 @@ class Slot:
if self.name is not None:
raise TypeError('already used')
self.name = name
try:
slotnames = cls.__slot_names__
except AttributeError:
slotnames = cls.__slot_names__ = []
slotnames.append(name)
self._ensure___del__(cls, slotnames)
def __get__(self, obj, cls):
if obj is None: # called on the class
@ -115,7 +128,23 @@ class Slot:
def __delete__(self, obj):
if self.readonly:
raise AttributeError(f'{self.name} is readonly')
self.instances[id(obj)] = self.default
self.instances[id(obj)] = self.default # XXX refleak?
def _ensure___del__(self, cls, slotnames): # See the comment in __init__().
try:
old___del__ = cls.__del__
except AttributeError:
old___del__ = (lambda s: None)
else:
if getattr(old___del__, '_slotted', False):
return
def __del__(_self):
for name in slotnames:
delattr(_self, name)
old___del__(_self)
__del__._slotted = True
cls.__del__ = __del__
def set(self, obj, value):
"""Update the cached value for an object.

View File

@ -1,4 +1,5 @@
from collections import namedtuple
import re
from c_analyzer_common import info, util
from c_analyzer_common.util import classonly, _NTBase
@ -15,28 +16,53 @@ def normalize_vartype(vartype):
return str(vartype)
def extract_storage(decl, *, isfunc=False):
"""Return (storage, vartype) based on the given declaration.
The default storage is "implicit" or "local".
"""
if decl == info.UNKNOWN:
return decl, decl
if decl.startswith('static '):
return 'static', decl
#return 'static', decl.partition(' ')[2].strip()
elif decl.startswith('extern '):
return 'extern', decl
#return 'extern', decl.partition(' ')[2].strip()
elif re.match('.*\b(static|extern)\b', decl):
raise NotImplementedError
elif isfunc:
return 'local', decl
else:
return 'implicit', decl
class Variable(_NTBase,
namedtuple('Variable', 'id vartype')):
namedtuple('Variable', 'id storage vartype')):
"""Information about a single variable declaration."""
__slots__ = ()
_isglobal = util.Slot()
def __del__(self):
del self._isglobal
STORAGE = (
'static',
'extern',
'implicit',
'local',
)
@classonly
def from_parts(cls, filename, funcname, name, vartype, isglobal=False):
def from_parts(cls, filename, funcname, name, decl, storage=None):
if storage is None:
storage, decl = extract_storage(decl, isfunc=funcname)
id = info.ID(filename, funcname, name)
self = cls(id, vartype)
if isglobal:
self._isglobal = True
self = cls(id, storage, decl)
return self
def __new__(cls, id, vartype):
def __new__(cls, id, storage, vartype):
self = super().__new__(
cls,
id=info.ID.from_raw(id),
storage=str(storage) if storage else None,
vartype=normalize_vartype(vartype) if vartype else None,
)
return self
@ -63,18 +89,17 @@ class Variable(_NTBase,
"""Fail if the object is invalid (i.e. init with bad data)."""
self._validate_id()
if self.storage is None or self.storage == info.UNKNOWN:
raise TypeError('missing storage')
elif self.storage not in self.STORAGE:
raise ValueError(f'unsupported storage {self.storage:r}')
if self.vartype is None or self.vartype == info.UNKNOWN:
raise TypeError('missing vartype')
@property
def isglobal(self):
try:
return self._isglobal
except AttributeError:
# XXX Include extern variables.
# XXX Ignore functions.
self._isglobal = ('static' in self.vartype.split())
return self._isglobal
return self.storage != 'local'
@property
def isconst(self):

View File

@ -163,7 +163,7 @@ def find_variables(varids, filenames=None, *,
srcfiles = [varid.filename]
else:
if not filenames:
yield Variable(varid, UNKNOWN)
yield Variable(varid, UNKNOWN, UNKNOWN)
continue
srcfiles = filenames
for filename in srcfiles:
@ -177,4 +177,4 @@ def find_variables(varids, filenames=None, *,
used.add(found)
break
else:
yield Variable(varid, UNKNOWN)
yield Variable(varid, UNKNOWN, UNKNOWN)

View File

@ -68,14 +68,11 @@ def find_in_source(symbol, dirnames, *,
if symbol.funcname and symbol.funcname != UNKNOWN:
raise NotImplementedError
(filename, funcname, vartype
(filename, funcname, decl
) = _find_symbol(symbol.name, filenames, _perfilecache)
if filename == UNKNOWN:
return None
return info.Variable(
id=(filename, funcname, symbol.name),
vartype=vartype,
)
return info.Variable.from_parts(filename, funcname, symbol.name, decl)
def get_resolver(knownvars=None, dirnames=None, *,
@ -144,6 +141,7 @@ def symbols_to_variables(symbols, *,
#raise NotImplementedError(symbol)
resolved = info.Variable(
id=symbol.id,
storage=UNKNOWN,
vartype=UNKNOWN,
)
yield resolved