bpo-36876: Re-organize the c-analyzer tool code. (gh-16841)

This is partly a cleanup of the code. It also is preparation for getting the variables from the source (cross-platform) rather than from the symbols.

The change only touches the tool (and its tests).
This commit is contained in:
Eric Snow 2019-10-18 19:00:04 -07:00 committed by GitHub
parent ea55c51bd9
commit e4c431ecf5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
56 changed files with 1376 additions and 1179 deletions

View File

@ -3,7 +3,7 @@ import test.test_tools
test.test_tools.skip_if_missing('c-analyzer') test.test_tools.skip_if_missing('c-analyzer')
with test.test_tools.imports_under_tool('c-analyzer'): with test.test_tools.imports_under_tool('c-analyzer'):
from c_globals.__main__ import main from cpython.__main__ import main
class ActualChecks(unittest.TestCase): class ActualChecks(unittest.TestCase):

View File

@ -1,68 +0,0 @@
import re
import textwrap
import unittest
from .. import tool_imports_for_tests
with tool_imports_for_tests():
from c_parser.info import Variable
from c_analyzer_common.info import ID
from c_analyzer_common.known import from_file
class FromFileTests(unittest.TestCase):
maxDiff = None
_return_read_tsv = ()
@property
def calls(self):
try:
return self._calls
except AttributeError:
self._calls = []
return self._calls
def _read_tsv(self, *args):
self.calls.append(('_read_tsv', args))
return self._return_read_tsv
def test_typical(self):
lines = textwrap.dedent('''
filename funcname name kind declaration
file1.c - var1 variable static int
file1.c func1 local1 variable static int
file1.c - var2 variable int
file1.c func2 local2 variable char *
file2.c - var1 variable char *
''').strip().splitlines()
lines = [re.sub(r'\s+', '\t', line, 4) for line in lines]
self._return_read_tsv = [tuple(v.strip() for v in line.split('\t'))
for line in lines[1:]]
known = from_file('spam.c', _read_tsv=self._read_tsv)
self.assertEqual(known, {
'variables': {v.id: v for v in [
Variable.from_parts('file1.c', '', 'var1', 'static int'),
Variable.from_parts('file1.c', 'func1', 'local1', 'static int'),
Variable.from_parts('file1.c', '', 'var2', 'int'),
Variable.from_parts('file1.c', 'func2', 'local2', 'char *'),
Variable.from_parts('file2.c', '', 'var1', 'char *'),
]},
})
self.assertEqual(self.calls, [
('_read_tsv', ('spam.c', 'filename\tfuncname\tname\tkind\tdeclaration')),
])
def test_empty(self):
self._return_read_tsv = []
known = from_file('spam.c', _read_tsv=self._read_tsv)
self.assertEqual(known, {
'variables': {},
})
self.assertEqual(self.calls, [
('_read_tsv', ('spam.c', 'filename\tfuncname\tname\tkind\tdeclaration')),
])

View File

@ -1,335 +0,0 @@
import unittest
from .. import tool_imports_for_tests
with tool_imports_for_tests():
from c_parser import info
from c_globals.find import globals_from_binary, globals
class _Base(unittest.TestCase):
maxDiff = None
@property
def calls(self):
try:
return self._calls
except AttributeError:
self._calls = []
return self._calls
class StaticsFromBinaryTests(_Base):
_return_iter_symbols = ()
_return_resolve_symbols = ()
_return_get_symbol_resolver = None
def setUp(self):
super().setUp()
self.kwargs = dict(
_iter_symbols=self._iter_symbols,
_resolve=self._resolve_symbols,
_get_symbol_resolver=self._get_symbol_resolver,
)
def _iter_symbols(self, binfile, find_local_symbol):
self.calls.append(('_iter_symbols', (binfile, find_local_symbol)))
return self._return_iter_symbols
def _resolve_symbols(self, symbols, resolve):
self.calls.append(('_resolve_symbols', (symbols, resolve,)))
return self._return_resolve_symbols
def _get_symbol_resolver(self, knownvars, dirnames=None):
self.calls.append(('_get_symbol_resolver', (knownvars, dirnames)))
return self._return_get_symbol_resolver
def test_typical(self):
symbols = self._return_iter_symbols = ()
resolver = self._return_get_symbol_resolver = object()
variables = self._return_resolve_symbols = [
info.Variable.from_parts('dir1/spam.c', None, 'var1', 'int'),
info.Variable.from_parts('dir1/spam.c', None, 'var2', 'static int'),
info.Variable.from_parts('dir1/spam.c', None, 'var3', 'char *'),
info.Variable.from_parts('dir1/spam.c', 'func2', 'var4', 'const char *'),
info.Variable.from_parts('dir1/eggs.c', None, 'var1', 'static int'),
info.Variable.from_parts('dir1/eggs.c', 'func1', 'var2', 'static char *'),
]
knownvars = object()
found = list(globals_from_binary('python',
knownvars=knownvars,
**self.kwargs))
self.assertEqual(found, [
info.Variable.from_parts('dir1/spam.c', None, 'var1', 'int'),
info.Variable.from_parts('dir1/spam.c', None, 'var2', 'static int'),
info.Variable.from_parts('dir1/spam.c', None, 'var3', 'char *'),
info.Variable.from_parts('dir1/eggs.c', None, 'var1', 'static int'),
info.Variable.from_parts('dir1/eggs.c', 'func1', 'var2', 'static char *'),
])
self.assertEqual(self.calls, [
('_iter_symbols', ('python', None)),
('_get_symbol_resolver', (knownvars, None)),
('_resolve_symbols', (symbols, resolver)),
])
# self._return_iter_symbols = [
# s_info.Symbol(('dir1/spam.c', None, 'var1'), 'variable', False),
# s_info.Symbol(('dir1/spam.c', None, 'var2'), 'variable', False),
# s_info.Symbol(('dir1/spam.c', None, 'func1'), 'function', False),
# s_info.Symbol(('dir1/spam.c', None, 'func2'), 'function', True),
# s_info.Symbol(('dir1/spam.c', None, 'var3'), 'variable', False),
# s_info.Symbol(('dir1/spam.c', 'func2', 'var4'), 'variable', False),
# s_info.Symbol(('dir1/ham.c', None, 'var1'), 'variable', True),
# s_info.Symbol(('dir1/eggs.c', None, 'var1'), 'variable', False),
# s_info.Symbol(('dir1/eggs.c', None, 'xyz'), 'other', False),
# s_info.Symbol(('dir1/eggs.c', '???', 'var2'), 'variable', False),
# s_info.Symbol(('???', None, 'var_x'), 'variable', False),
# s_info.Symbol(('???', '???', 'var_y'), 'variable', False),
# s_info.Symbol((None, None, '???'), 'other', False),
# ]
# known = object()
#
# globals_from_binary('python', knownvars=known, **this.kwargs)
# found = list(globals_from_symbols(['dir1'], self.iter_symbols))
#
# self.assertEqual(found, [
# info.Variable.from_parts('dir1/spam.c', None, 'var1', '???'),
# info.Variable.from_parts('dir1/spam.c', None, 'var2', '???'),
# info.Variable.from_parts('dir1/spam.c', None, 'var3', '???'),
# info.Variable.from_parts('dir1/spam.c', 'func2', 'var4', '???'),
# info.Variable.from_parts('dir1/eggs.c', None, 'var1', '???'),
# ])
# self.assertEqual(self.calls, [
# ('iter_symbols', (['dir1'],)),
# ])
#
# def test_no_symbols(self):
# self._return_iter_symbols = []
#
# found = list(globals_from_symbols(['dir1'], self.iter_symbols))
#
# self.assertEqual(found, [])
# self.assertEqual(self.calls, [
# ('iter_symbols', (['dir1'],)),
# ])
# XXX need functional test
#class StaticFromDeclarationsTests(_Base):
#
# _return_iter_declarations = ()
#
# def iter_declarations(self, dirnames):
# self.calls.append(('iter_declarations', (dirnames,)))
# return iter(self._return_iter_declarations)
#
# def test_typical(self):
# self._return_iter_declarations = [
# None,
# info.Variable.from_parts('dir1/spam.c', None, 'var1', '???'),
# object(),
# info.Variable.from_parts('dir1/spam.c', None, 'var2', '???'),
# info.Variable.from_parts('dir1/spam.c', None, 'var3', '???'),
# object(),
# info.Variable.from_parts('dir1/spam.c', 'func2', 'var4', '???'),
# object(),
# info.Variable.from_parts('dir1/eggs.c', None, 'var1', '???'),
# object(),
# ]
#
# found = list(globals_from_declarations(['dir1'], self.iter_declarations))
#
# self.assertEqual(found, [
# info.Variable.from_parts('dir1/spam.c', None, 'var1', '???'),
# info.Variable.from_parts('dir1/spam.c', None, 'var2', '???'),
# info.Variable.from_parts('dir1/spam.c', None, 'var3', '???'),
# info.Variable.from_parts('dir1/spam.c', 'func2', 'var4', '???'),
# info.Variable.from_parts('dir1/eggs.c', None, 'var1', '???'),
# ])
# self.assertEqual(self.calls, [
# ('iter_declarations', (['dir1'],)),
# ])
#
# def test_no_declarations(self):
# self._return_iter_declarations = []
#
# found = list(globals_from_declarations(['dir1'], self.iter_declarations))
#
# self.assertEqual(found, [])
# self.assertEqual(self.calls, [
# ('iter_declarations', (['dir1'],)),
# ])
#class IterVariablesTests(_Base):
#
# _return_from_symbols = ()
# _return_from_declarations = ()
#
# def _from_symbols(self, dirnames, iter_symbols):
# self.calls.append(('_from_symbols', (dirnames, iter_symbols)))
# return iter(self._return_from_symbols)
#
# def _from_declarations(self, dirnames, iter_declarations):
# self.calls.append(('_from_declarations', (dirnames, iter_declarations)))
# return iter(self._return_from_declarations)
#
# def test_typical(self):
# expected = [
# info.Variable.from_parts('dir1/spam.c', None, 'var1', '???'),
# info.Variable.from_parts('dir1/spam.c', None, 'var2', '???'),
# info.Variable.from_parts('dir1/spam.c', None, 'var3', '???'),
# info.Variable.from_parts('dir1/spam.c', 'func2', 'var4', '???'),
# info.Variable.from_parts('dir1/eggs.c', None, 'var1', '???'),
# ]
# self._return_from_symbols = expected
#
# found = list(iter_variables(['dir1'],
# _from_symbols=self._from_symbols,
# _from_declarations=self._from_declarations))
#
# self.assertEqual(found, expected)
# self.assertEqual(self.calls, [
# ('_from_symbols', (['dir1'], b_symbols.iter_symbols)),
# ])
#
# def test_no_symbols(self):
# self._return_from_symbols = []
#
# found = list(iter_variables(['dir1'],
# _from_symbols=self._from_symbols,
# _from_declarations=self._from_declarations))
#
# self.assertEqual(found, [])
# self.assertEqual(self.calls, [
# ('_from_symbols', (['dir1'], b_symbols.iter_symbols)),
# ])
#
# def test_from_binary(self):
# expected = [
# info.Variable.from_parts('dir1/spam.c', None, 'var1', '???'),
# info.Variable.from_parts('dir1/spam.c', None, 'var2', '???'),
# info.Variable.from_parts('dir1/spam.c', None, 'var3', '???'),
# info.Variable.from_parts('dir1/spam.c', 'func2', 'var4', '???'),
# info.Variable.from_parts('dir1/eggs.c', None, 'var1', '???'),
# ]
# self._return_from_symbols = expected
#
# found = list(iter_variables(['dir1'], 'platform',
# _from_symbols=self._from_symbols,
# _from_declarations=self._from_declarations))
#
# self.assertEqual(found, expected)
# self.assertEqual(self.calls, [
# ('_from_symbols', (['dir1'], b_symbols.iter_symbols)),
# ])
#
# def test_from_symbols(self):
# expected = [
# info.Variable.from_parts('dir1/spam.c', None, 'var1', '???'),
# info.Variable.from_parts('dir1/spam.c', None, 'var2', '???'),
# info.Variable.from_parts('dir1/spam.c', None, 'var3', '???'),
# info.Variable.from_parts('dir1/spam.c', 'func2', 'var4', '???'),
# info.Variable.from_parts('dir1/eggs.c', None, 'var1', '???'),
# ]
# self._return_from_symbols = expected
#
# found = list(iter_variables(['dir1'], 'symbols',
# _from_symbols=self._from_symbols,
# _from_declarations=self._from_declarations))
#
# self.assertEqual(found, expected)
# self.assertEqual(self.calls, [
# ('_from_symbols', (['dir1'], s_symbols.iter_symbols)),
# ])
#
# def test_from_declarations(self):
# expected = [
# info.Variable.from_parts('dir1/spam.c', None, 'var1', '???'),
# info.Variable.from_parts('dir1/spam.c', None, 'var2', '???'),
# info.Variable.from_parts('dir1/spam.c', None, 'var3', '???'),
# info.Variable.from_parts('dir1/spam.c', 'func2', 'var4', '???'),
# info.Variable.from_parts('dir1/eggs.c', None, 'var1', '???'),
# ]
# self._return_from_declarations = expected
#
# found = list(iter_variables(['dir1'], 'declarations',
# _from_symbols=self._from_symbols,
# _from_declarations=self._from_declarations))
#
# self.assertEqual(found, expected)
# self.assertEqual(self.calls, [
# ('_from_declarations', (['dir1'], declarations.iter_all)),
# ])
#
# def test_from_preprocessed(self):
# expected = [
# info.Variable.from_parts('dir1/spam.c', None, 'var1', '???'),
# info.Variable.from_parts('dir1/spam.c', None, 'var2', '???'),
# info.Variable.from_parts('dir1/spam.c', None, 'var3', '???'),
# info.Variable.from_parts('dir1/spam.c', 'func2', 'var4', '???'),
# info.Variable.from_parts('dir1/eggs.c', None, 'var1', '???'),
# ]
# self._return_from_declarations = expected
#
# found = list(iter_variables(['dir1'], 'preprocessed',
# _from_symbols=self._from_symbols,
# _from_declarations=self._from_declarations))
#
# self.assertEqual(found, expected)
# self.assertEqual(self.calls, [
# ('_from_declarations', (['dir1'], declarations.iter_preprocessed)),
# ])
class StaticsTest(_Base):
_return_iter_variables = None
def _iter_variables(self, kind, *, known, dirnames):
self.calls.append(
('_iter_variables', (kind, known, dirnames)))
return iter(self._return_iter_variables or ())
def test_typical(self):
self._return_iter_variables = [
info.Variable.from_parts('src1/spam.c', None, 'var1', 'static const char *'),
info.Variable.from_parts('src1/spam.c', None, 'var1b', 'const char *'),
info.Variable.from_parts('src1/spam.c', 'ham', 'initialized', 'static int'),
info.Variable.from_parts('src1/spam.c', 'ham', 'result', 'int'), # skipped
info.Variable.from_parts('src1/spam.c', None, 'var2', 'static PyObject *'),
info.Variable.from_parts('src1/eggs.c', 'tofu', 'ready', 'static int'),
info.Variable.from_parts('src1/spam.c', None, 'freelist', 'static (PyTupleObject *)[10]'),
info.Variable.from_parts('src1/sub/ham.c', None, 'var1', 'static const char const *'),
info.Variable.from_parts('src2/jam.c', None, 'var1', 'static int'),
info.Variable.from_parts('src2/jam.c', None, 'var2', 'static MyObject *'),
info.Variable.from_parts('Include/spam.h', None, 'data', 'static const int'),
]
dirnames = object()
known = object()
found = list(globals(dirnames, known,
kind='platform',
_iter_variables=self._iter_variables,
))
self.assertEqual(found, [
info.Variable.from_parts('src1/spam.c', None, 'var1', 'static const char *'),
info.Variable.from_parts('src1/spam.c', None, 'var1b', 'const char *'),
info.Variable.from_parts('src1/spam.c', 'ham', 'initialized', 'static int'),
info.Variable.from_parts('src1/spam.c', None, 'var2', 'static PyObject *'),
info.Variable.from_parts('src1/eggs.c', 'tofu', 'ready', 'static int'),
info.Variable.from_parts('src1/spam.c', None, 'freelist', 'static (PyTupleObject *)[10]'),
info.Variable.from_parts('src1/sub/ham.c', None, 'var1', 'static const char const *'),
info.Variable.from_parts('src2/jam.c', None, 'var1', 'static int'),
info.Variable.from_parts('src2/jam.c', None, 'var2', 'static MyObject *'),
info.Variable.from_parts('Include/spam.h', None, 'data', 'static const int'),
])
self.assertEqual(self.calls, [
('_iter_variables', ('platform', known, dirnames)),
])

View File

@ -3,7 +3,7 @@ import unittest
from .. import tool_imports_for_tests from .. import tool_imports_for_tests
with tool_imports_for_tests(): with tool_imports_for_tests():
from c_analyzer_common.files import ( from c_analyzer.common.files import (
iter_files, _walk_tree, glob_tree, iter_files, _walk_tree, glob_tree,
) )

View File

@ -4,7 +4,10 @@ import unittest
from ..util import PseudoStr, StrProxy, Object from ..util import PseudoStr, StrProxy, Object
from .. import tool_imports_for_tests from .. import tool_imports_for_tests
with tool_imports_for_tests(): with tool_imports_for_tests():
from c_analyzer_common.info import ID from c_analyzer.common.info import (
UNKNOWN,
ID,
)
class IDTests(unittest.TestCase): class IDTests(unittest.TestCase):

View File

@ -2,8 +2,10 @@ import unittest
from .. import tool_imports_for_tests from .. import tool_imports_for_tests
with tool_imports_for_tests(): with tool_imports_for_tests():
from c_parser import info from c_analyzer.variables import info
from c_globals.show import basic from c_analyzer.common.show import (
basic,
)
TYPICAL = [ TYPICAL = [

View File

@ -3,12 +3,13 @@ import unittest
from .. import tool_imports_for_tests from .. import tool_imports_for_tests
with tool_imports_for_tests(): with tool_imports_for_tests():
from c_analyzer_common import SOURCE_DIRS from c_analyzer.variables import info
from c_analyzer_common.known import DATA_FILE as KNOWN_FILE from cpython import SOURCE_DIRS
from c_parser import info from cpython.supported import IGNORED_FILE
import c_globals as cg from cpython.known import DATA_FILE as KNOWN_FILE
from c_globals.supported import IGNORED_FILE from cpython.__main__ import (
from c_globals.__main__ import cmd_check, cmd_show, parse_args, main cmd_check, cmd_show, parse_args, main,
)
TYPICAL = [ TYPICAL = [
@ -46,6 +47,8 @@ class CMDBase(unittest.TestCase):
maxDiff = None maxDiff = None
# _return_known_from_file = None
# _return_ignored_from_file = None
_return_find = () _return_find = ()
@property @property
@ -56,8 +59,16 @@ class CMDBase(unittest.TestCase):
self._calls = [] self._calls = []
return self._calls return self._calls
def _find(self, *args): # def _known_from_file(self, *args):
self.calls.append(('_find', args)) # self.calls.append(('_known_from_file', args))
# return self._return_known_from_file or {}
#
# def _ignored_from_file(self, *args):
# self.calls.append(('_ignored_from_file', args))
# return self._return_ignored_from_file or {}
def _find(self, known, ignored, skip_objects=False):
self.calls.append(('_find', (known, ignored, skip_objects)))
return self._return_find return self._return_find
def _show(self, *args): def _show(self, *args):
@ -78,41 +89,35 @@ class CheckTests(CMDBase):
_print=self._print, _print=self._print,
) )
self.assertEqual(self.calls[0], ( self.assertEqual(
'_find', ( self.calls[0],
SOURCE_DIRS, ('_find', (KNOWN_FILE, IGNORED_FILE, False)),
KNOWN_FILE, )
IGNORED_FILE,
),
))
def test_all_supported(self): def test_all_supported(self):
self._return_find = [(v, s) for v, s in TYPICAL if s] self._return_find = [(v, s) for v, s in TYPICAL if s]
dirs = ['src1', 'src2', 'Include'] dirs = ['src1', 'src2', 'Include']
cmd_check('check', cmd_check('check',
dirs, known='known.tsv',
ignored='ignored.tsv', ignored='ignored.tsv',
known='known.tsv', _find=self._find,
_find=self._find, _show=self._show,
_show=self._show, _print=self._print,
_print=self._print, )
)
self.assertEqual(self.calls, [ self.assertEqual(self.calls, [
('_find', (dirs, 'known.tsv', 'ignored.tsv')), ('_find', ('known.tsv', 'ignored.tsv', False)),
#('_print', ('okay',)), #('_print', ('okay',)),
]) ])
def test_some_unsupported(self): def test_some_unsupported(self):
self._return_find = TYPICAL self._return_find = TYPICAL
dirs = ['src1', 'src2', 'Include']
with self.assertRaises(SystemExit) as cm: with self.assertRaises(SystemExit) as cm:
cmd_check('check', cmd_check('check',
dirs,
ignored='ignored.tsv',
known='known.tsv', known='known.tsv',
ignored='ignored.tsv',
_find=self._find, _find=self._find,
_show=self._show, _show=self._show,
_print=self._print, _print=self._print,
@ -120,7 +125,7 @@ class CheckTests(CMDBase):
unsupported = [v for v, s in TYPICAL if not s] unsupported = [v for v, s in TYPICAL if not s]
self.assertEqual(self.calls, [ self.assertEqual(self.calls, [
('_find', (dirs, 'known.tsv', 'ignored.tsv')), ('_find', ('known.tsv', 'ignored.tsv', False)),
('_print', ('ERROR: found unsupported global variables',)), ('_print', ('ERROR: found unsupported global variables',)),
('_print', ()), ('_print', ()),
('_show', (sorted(unsupported),)), ('_show', (sorted(unsupported),)),
@ -140,20 +145,15 @@ class ShowTests(CMDBase):
_print=self._print, _print=self._print,
) )
self.assertEqual(self.calls[0], ( self.assertEqual(
'_find', ( self.calls[0],
SOURCE_DIRS, ('_find', (KNOWN_FILE, IGNORED_FILE, False)),
KNOWN_FILE, )
IGNORED_FILE,
),
))
def test_typical(self): def test_typical(self):
self._return_find = TYPICAL self._return_find = TYPICAL
dirs = ['src1', 'src2', 'Include']
cmd_show('show', cmd_show('show',
dirs,
known='known.tsv', known='known.tsv',
ignored='ignored.tsv', ignored='ignored.tsv',
_find=self._find, _find=self._find,
@ -164,7 +164,7 @@ class ShowTests(CMDBase):
supported = [v for v, s in TYPICAL if s] supported = [v for v, s in TYPICAL if s]
unsupported = [v for v, s in TYPICAL if not s] unsupported = [v for v, s in TYPICAL if not s]
self.assertEqual(self.calls, [ self.assertEqual(self.calls, [
('_find', (dirs, 'known.tsv', 'ignored.tsv')), ('_find', ('known.tsv', 'ignored.tsv', False)),
('_print', ('supported:',)), ('_print', ('supported:',)),
('_print', ('----------',)), ('_print', ('----------',)),
('_show', (sorted(supported),)), ('_show', (sorted(supported),)),
@ -201,7 +201,7 @@ class ParseArgsTests(unittest.TestCase):
self.assertEqual(cmdkwargs, { self.assertEqual(cmdkwargs, {
'ignored': IGNORED_FILE, 'ignored': IGNORED_FILE,
'known': KNOWN_FILE, 'known': KNOWN_FILE,
'dirs': SOURCE_DIRS, #'dirs': SOURCE_DIRS,
}) })
def test_check_full_args(self): def test_check_full_args(self):
@ -209,16 +209,16 @@ class ParseArgsTests(unittest.TestCase):
'check', 'check',
'--ignored', 'spam.tsv', '--ignored', 'spam.tsv',
'--known', 'eggs.tsv', '--known', 'eggs.tsv',
'dir1', #'dir1',
'dir2', #'dir2',
'dir3', #'dir3',
]) ])
self.assertEqual(cmd, 'check') self.assertEqual(cmd, 'check')
self.assertEqual(cmdkwargs, { self.assertEqual(cmdkwargs, {
'ignored': 'spam.tsv', 'ignored': 'spam.tsv',
'known': 'eggs.tsv', 'known': 'eggs.tsv',
'dirs': ['dir1', 'dir2', 'dir3'] #'dirs': ['dir1', 'dir2', 'dir3']
}) })
def test_show_no_args(self): def test_show_no_args(self):
@ -230,7 +230,7 @@ class ParseArgsTests(unittest.TestCase):
self.assertEqual(cmdkwargs, { self.assertEqual(cmdkwargs, {
'ignored': IGNORED_FILE, 'ignored': IGNORED_FILE,
'known': KNOWN_FILE, 'known': KNOWN_FILE,
'dirs': SOURCE_DIRS, #'dirs': SOURCE_DIRS,
'skip_objects': False, 'skip_objects': False,
}) })
@ -239,16 +239,16 @@ class ParseArgsTests(unittest.TestCase):
'show', 'show',
'--ignored', 'spam.tsv', '--ignored', 'spam.tsv',
'--known', 'eggs.tsv', '--known', 'eggs.tsv',
'dir1', #'dir1',
'dir2', #'dir2',
'dir3', #'dir3',
]) ])
self.assertEqual(cmd, 'show') self.assertEqual(cmd, 'show')
self.assertEqual(cmdkwargs, { self.assertEqual(cmdkwargs, {
'ignored': 'spam.tsv', 'ignored': 'spam.tsv',
'known': 'eggs.tsv', 'known': 'eggs.tsv',
'dirs': ['dir1', 'dir2', 'dir3'], #'dirs': ['dir1', 'dir2', 'dir3'],
'skip_objects': False, 'skip_objects': False,
}) })

View File

@ -4,9 +4,11 @@ import unittest
from .. import tool_imports_for_tests from .. import tool_imports_for_tests
with tool_imports_for_tests(): with tool_imports_for_tests():
from c_analyzer_common.info import ID from c_analyzer.common.info import ID
from c_parser import info from c_analyzer.variables.info import Variable
from c_globals.supported import is_supported, ignored_from_file from cpython.supported import (
is_supported, ignored_from_file,
)
class IsSupportedTests(unittest.TestCase): class IsSupportedTests(unittest.TestCase):
@ -14,8 +16,8 @@ class IsSupportedTests(unittest.TestCase):
@unittest.expectedFailure @unittest.expectedFailure
def test_supported(self): def test_supported(self):
statics = [ statics = [
info.StaticVar('src1/spam.c', None, 'var1', 'const char *'), Variable('src1/spam.c', None, 'var1', 'const char *'),
info.StaticVar('src1/spam.c', None, 'var1', 'int'), Variable('src1/spam.c', None, 'var1', 'int'),
] ]
for static in statics: for static in statics:
with self.subTest(static): with self.subTest(static):
@ -26,8 +28,8 @@ class IsSupportedTests(unittest.TestCase):
@unittest.expectedFailure @unittest.expectedFailure
def test_not_supported(self): def test_not_supported(self):
statics = [ statics = [
info.StaticVar('src1/spam.c', None, 'var1', 'PyObject *'), Variable('src1/spam.c', None, 'var1', 'PyObject *'),
info.StaticVar('src1/spam.c', None, 'var1', 'PyObject[10]'), Variable('src1/spam.c', None, 'var1', 'PyObject[10]'),
] ]
for static in statics: for static in statics:
with self.subTest(static): with self.subTest(static):

View File

@ -3,9 +3,9 @@ import unittest
from .. import tool_imports_for_tests from .. import tool_imports_for_tests
with tool_imports_for_tests(): with tool_imports_for_tests():
from c_parser.declarations import ( from c_analyzer.parser.declarations import (
iter_global_declarations, iter_local_statements, iter_global_declarations, iter_local_statements,
parse_func, parse_var, parse_compound, parse_func, _parse_var, parse_compound,
iter_variables, iter_variables,
) )
@ -515,7 +515,7 @@ class ParseVarTests(TestCaseBase):
]) ])
for stmt, expected in tests: for stmt, expected in tests:
with self.subTest(stmt): with self.subTest(stmt):
name, vartype = parse_var(stmt) name, vartype = _parse_var(stmt)
self.assertEqual((name, vartype), expected) self.assertEqual((name, vartype), expected)

View File

@ -6,7 +6,7 @@ import sys
from ..util import wrapped_arg_combos, StrProxy from ..util import wrapped_arg_combos, StrProxy
from .. import tool_imports_for_tests from .. import tool_imports_for_tests
with tool_imports_for_tests(): with tool_imports_for_tests():
from c_parser.preprocessor import ( from c_analyzer.parser.preprocessor import (
iter_lines, iter_lines,
# directives # directives
parse_directive, PreprocessorDirective, parse_directive, PreprocessorDirective,

View File

@ -4,8 +4,8 @@ import unittest
from ..util import PseudoStr, StrProxy, Object from ..util import PseudoStr, StrProxy, Object
from .. import tool_imports_for_tests from .. import tool_imports_for_tests
with tool_imports_for_tests(): with tool_imports_for_tests():
from c_analyzer_common.info import ID from c_analyzer.common.info import ID
from c_symbols.info import Symbol from c_analyzer.symbols.info import Symbol
class SymbolTests(unittest.TestCase): class SymbolTests(unittest.TestCase):

View File

@ -0,0 +1,6 @@
import os.path
from test.support import load_package_tests
def load_tests(*args):
return load_package_tests(os.path.dirname(__file__), *args)

View File

@ -0,0 +1,124 @@
import unittest
from .. import tool_imports_for_tests
with tool_imports_for_tests():
from c_analyzer.variables import info
from c_analyzer.variables.find import (
vars_from_binary,
)
class _Base(unittest.TestCase):
maxDiff = None
@property
def calls(self):
try:
return self._calls
except AttributeError:
self._calls = []
return self._calls
class VarsFromBinaryTests(_Base):
_return_iter_vars = ()
_return_get_symbol_resolver = None
def setUp(self):
super().setUp()
self.kwargs = dict(
_iter_vars=self._iter_vars,
_get_symbol_resolver=self._get_symbol_resolver,
)
def _iter_vars(self, binfile, resolve, handle_id):
self.calls.append(('_iter_vars', (binfile, resolve, handle_id)))
return [(v, v.id) for v in self._return_iter_vars]
def _get_symbol_resolver(self, known=None, dirnames=(), *,
handle_var,
filenames=None,
check_filename=None,
perfilecache=None,
):
self.calls.append(('_get_symbol_resolver',
(known, dirnames, handle_var, filenames,
check_filename, perfilecache)))
return self._return_get_symbol_resolver
def test_typical(self):
resolver = self._return_get_symbol_resolver = object()
variables = self._return_iter_vars = [
info.Variable.from_parts('dir1/spam.c', None, 'var1', 'int'),
info.Variable.from_parts('dir1/spam.c', None, 'var2', 'static int'),
info.Variable.from_parts('dir1/spam.c', None, 'var3', 'char *'),
info.Variable.from_parts('dir1/spam.c', 'func2', 'var4', 'const char *'),
info.Variable.from_parts('dir1/eggs.c', None, 'var1', 'static int'),
info.Variable.from_parts('dir1/eggs.c', 'func1', 'var2', 'static char *'),
]
known = object()
filenames = object()
found = list(vars_from_binary('python',
known=known,
filenames=filenames,
**self.kwargs))
self.assertEqual(found, [
info.Variable.from_parts('dir1/spam.c', None, 'var1', 'int'),
info.Variable.from_parts('dir1/spam.c', None, 'var2', 'static int'),
info.Variable.from_parts('dir1/spam.c', None, 'var3', 'char *'),
info.Variable.from_parts('dir1/spam.c', 'func2', 'var4', 'const char *'),
info.Variable.from_parts('dir1/eggs.c', None, 'var1', 'static int'),
info.Variable.from_parts('dir1/eggs.c', 'func1', 'var2', 'static char *'),
])
self.assertEqual(self.calls, [
('_get_symbol_resolver', (filenames, known, info.Variable.from_id, None, None, {})),
('_iter_vars', ('python', resolver, None)),
])
# self._return_iter_symbols = [
# s_info.Symbol(('dir1/spam.c', None, 'var1'), 'variable', False),
# s_info.Symbol(('dir1/spam.c', None, 'var2'), 'variable', False),
# s_info.Symbol(('dir1/spam.c', None, 'func1'), 'function', False),
# s_info.Symbol(('dir1/spam.c', None, 'func2'), 'function', True),
# s_info.Symbol(('dir1/spam.c', None, 'var3'), 'variable', False),
# s_info.Symbol(('dir1/spam.c', 'func2', 'var4'), 'variable', False),
# s_info.Symbol(('dir1/ham.c', None, 'var1'), 'variable', True),
# s_info.Symbol(('dir1/eggs.c', None, 'var1'), 'variable', False),
# s_info.Symbol(('dir1/eggs.c', None, 'xyz'), 'other', False),
# s_info.Symbol(('dir1/eggs.c', '???', 'var2'), 'variable', False),
# s_info.Symbol(('???', None, 'var_x'), 'variable', False),
# s_info.Symbol(('???', '???', 'var_y'), 'variable', False),
# s_info.Symbol((None, None, '???'), 'other', False),
# ]
# known = object()
#
# vars_from_binary('python', knownvars=known, **this.kwargs)
# found = list(globals_from_symbols(['dir1'], self.iter_symbols))
#
# self.assertEqual(found, [
# info.Variable.from_parts('dir1/spam.c', None, 'var1', '???'),
# info.Variable.from_parts('dir1/spam.c', None, 'var2', '???'),
# info.Variable.from_parts('dir1/spam.c', None, 'var3', '???'),
# info.Variable.from_parts('dir1/spam.c', 'func2', 'var4', '???'),
# info.Variable.from_parts('dir1/eggs.c', None, 'var1', '???'),
# ])
# self.assertEqual(self.calls, [
# ('iter_symbols', (['dir1'],)),
# ])
#
# def test_no_symbols(self):
# self._return_iter_symbols = []
#
# found = list(globals_from_symbols(['dir1'], self.iter_symbols))
#
# self.assertEqual(found, [])
# self.assertEqual(self.calls, [
# ('iter_symbols', (['dir1'],)),
# ])
# XXX need functional test

View File

@ -4,10 +4,10 @@ import unittest
from ..util import PseudoStr, StrProxy, Object from ..util import PseudoStr, StrProxy, Object
from .. import tool_imports_for_tests from .. import tool_imports_for_tests
with tool_imports_for_tests(): with tool_imports_for_tests():
from c_analyzer_common.info import ID, UNKNOWN from c_analyzer.common.info import UNKNOWN, ID
from c_parser.info import ( from c_analyzer.variables.info import (
normalize_vartype, Variable, normalize_vartype, Variable
) )
class NormalizeVartypeTests(unittest.TestCase): class NormalizeVartypeTests(unittest.TestCase):

View File

@ -0,0 +1,139 @@
import re
import textwrap
import unittest
from .. import tool_imports_for_tests
with tool_imports_for_tests():
from c_analyzer.common.info import ID
from c_analyzer.variables.info import Variable
from c_analyzer.variables.known import (
read_file,
from_file,
)
class _BaseTests(unittest.TestCase):
maxDiff = None
@property
def calls(self):
try:
return self._calls
except AttributeError:
self._calls = []
return self._calls
class ReadFileTests(_BaseTests):
_return_read_tsv = ()
def _read_tsv(self, *args):
self.calls.append(('_read_tsv', args))
return self._return_read_tsv
def test_typical(self):
lines = textwrap.dedent('''
filename funcname name kind declaration
file1.c - var1 variable static int
file1.c func1 local1 variable static int
file1.c - var2 variable int
file1.c func2 local2 variable char *
file2.c - var1 variable char *
''').strip().splitlines()
lines = [re.sub(r'\s+', '\t', line, 4) for line in lines]
self._return_read_tsv = [tuple(v.strip() for v in line.split('\t'))
for line in lines[1:]]
known = list(read_file('known.tsv', _read_tsv=self._read_tsv))
self.assertEqual(known, [
('variable', ID('file1.c', '', 'var1'), 'static int'),
('variable', ID('file1.c', 'func1', 'local1'), 'static int'),
('variable', ID('file1.c', '', 'var2'), 'int'),
('variable', ID('file1.c', 'func2', 'local2'), 'char *'),
('variable', ID('file2.c', '', 'var1'), 'char *'),
])
self.assertEqual(self.calls, [
('_read_tsv',
('known.tsv', 'filename\tfuncname\tname\tkind\tdeclaration')),
])
def test_empty(self):
self._return_read_tsv = []
known = list(read_file('known.tsv', _read_tsv=self._read_tsv))
self.assertEqual(known, [])
self.assertEqual(self.calls, [
('_read_tsv', ('known.tsv', 'filename\tfuncname\tname\tkind\tdeclaration')),
])
class FromFileTests(_BaseTests):
_return_read_file = ()
_return_handle_var = ()
def _read_file(self, infile):
self.calls.append(('_read_file', (infile,)))
return iter(self._return_read_file)
def _handle_var(self, varid, decl):
self.calls.append(('_handle_var', (varid, decl)))
var = self._return_handle_var.pop(0)
return var
def test_typical(self):
expected = [
Variable.from_parts('file1.c', '', 'var1', 'static int'),
Variable.from_parts('file1.c', 'func1', 'local1', 'static int'),
Variable.from_parts('file1.c', '', 'var2', 'int'),
Variable.from_parts('file1.c', 'func2', 'local2', 'char *'),
Variable.from_parts('file2.c', '', 'var1', 'char *'),
]
self._return_read_file = [('variable', v.id, v.vartype)
for v in expected]
# ('variable', ID('file1.c', '', 'var1'), 'static int'),
# ('variable', ID('file1.c', 'func1', 'local1'), 'static int'),
# ('variable', ID('file1.c', '', 'var2'), 'int'),
# ('variable', ID('file1.c', 'func2', 'local2'), 'char *'),
# ('variable', ID('file2.c', '', 'var1'), 'char *'),
# ]
self._return_handle_var = list(expected) # a copy
known = from_file('known.tsv',
handle_var=self._handle_var,
_read_file=self._read_file,
)
self.assertEqual(known, {
'variables': {v.id: v for v in expected},
})
# Variable.from_parts('file1.c', '', 'var1', 'static int'),
# Variable.from_parts('file1.c', 'func1', 'local1', 'static int'),
# Variable.from_parts('file1.c', '', 'var2', 'int'),
# Variable.from_parts('file1.c', 'func2', 'local2', 'char *'),
# Variable.from_parts('file2.c', '', 'var1', 'char *'),
# ]},
# })
self.assertEqual(self.calls, [
('_read_file', ('known.tsv',)),
*[('_handle_var', (v.id, v.vartype))
for v in expected],
])
def test_empty(self):
self._return_read_file = []
known = from_file('known.tsv',
handle_var=self._handle_var,
_read_file=self._read_file,
)
self.assertEqual(known, {
'variables': {},
})
self.assertEqual(self.calls, [
('_read_file', ('known.tsv',)),
])

View File

@ -1,6 +1,6 @@
# This is a script equivalent of running "python -m test.test_c_globals.cg". # This is a script equivalent of running "python -m test.test_c_globals.cg".
from c_globals.__main__ import parse_args, main from cpython.__main__ import parse_args, main
# This is effectively copied from cg/__main__.py: # This is effectively copied from cg/__main__.py:

View File

@ -2,7 +2,10 @@ import glob
import os import os
import os.path import os.path
from . import SOURCE_DIRS, REPO_ROOT # XXX need tests:
# * walk_tree()
# * glob_tree()
# * iter_files_by_suffix()
C_SOURCE_SUFFIXES = ('.c', '.h') C_SOURCE_SUFFIXES = ('.c', '.h')
@ -115,24 +118,3 @@ def iter_files_by_suffix(root, suffixes, relparent=None, *,
# XXX Ignore repeated suffixes? # XXX Ignore repeated suffixes?
for suffix in suffixes: for suffix in suffixes:
yield from _iter_files(root, suffix, relparent) yield from _iter_files(root, suffix, relparent)
def iter_cpython_files(*,
walk=walk_tree,
_files=iter_files_by_suffix,
):
"""Yield each file in the tree for each of the given directory names."""
excludedtrees = [
os.path.join('Include', 'cpython', ''),
]
def is_excluded(filename):
for root in excludedtrees:
if filename.startswith(root):
return True
return False
for filename in _files(SOURCE_DIRS, C_SOURCE_SUFFIXES, REPO_ROOT,
walk=walk,
):
if is_excluded(filename):
continue
yield filename

View File

@ -0,0 +1,138 @@
from collections import namedtuple
import re
from .util import classonly, _NTBase
# XXX need tests:
# * ID.match()
UNKNOWN = '???'
NAME_RE = re.compile(r'^([a-zA-Z]|_\w*[a-zA-Z]\w*|[a-zA-Z]\w*)$')
class ID(_NTBase, namedtuple('ID', 'filename funcname name')):
"""A unique ID for a single symbol or declaration."""
__slots__ = ()
# XXX Add optional conditions (tuple of strings) field.
#conditions = Slot()
@classonly
def from_raw(cls, raw):
if not raw:
return None
if isinstance(raw, str):
return cls(None, None, raw)
try:
name, = raw
filename = None
except ValueError:
try:
filename, name = raw
except ValueError:
return super().from_raw(raw)
return cls(filename, None, name)
def __new__(cls, filename, funcname, name):
self = super().__new__(
cls,
filename=str(filename) if filename else None,
funcname=str(funcname) if funcname else None,
name=str(name) if name else None,
)
#cls.conditions.set(self, tuple(str(s) if s else None
# for s in conditions or ()))
return self
def validate(self):
"""Fail if the object is invalid (i.e. init with bad data)."""
if not self.name:
raise TypeError('missing name')
else:
if not NAME_RE.match(self.name):
raise ValueError(
f'name must be an identifier, got {self.name!r}')
# Symbols from a binary might not have filename/funcname info.
if self.funcname:
if not self.filename:
raise TypeError('missing filename')
if not NAME_RE.match(self.funcname) and self.funcname != UNKNOWN:
raise ValueError(
f'name must be an identifier, got {self.funcname!r}')
# XXX Require the filename (at least UNKONWN)?
# XXX Check the filename?
@property
def islocal(self):
return self.funcname is not None
def match(self, other, *,
match_files=(lambda f1, f2: f1 == f2),
):
"""Return True if the two match.
At least one of the two must be completely valid (no UNKNOWN
anywhere). Otherwise False is returned. The remaining one
*may* have UNKNOWN for both funcname and filename. It must
have a valid name though.
The caller is responsible for knowing which of the two is valid
(and which to use if both are valid).
"""
# First check the name.
if self.name is None:
return False
if other.name != self.name:
return False
# Then check the filename.
if self.filename is None:
return False
if other.filename is None:
return False
if self.filename == UNKNOWN:
# "other" must be the valid one.
if other.funcname == UNKNOWN:
return False
elif self.funcname != UNKNOWN:
# XXX Try matching funcname even though we don't
# know the filename?
raise NotImplementedError
else:
return True
elif other.filename == UNKNOWN:
# "self" must be the valid one.
if self.funcname == UNKNOWN:
return False
elif other.funcname != UNKNOWN:
# XXX Try matching funcname even though we don't
# know the filename?
raise NotImplementedError
else:
return True
elif not match_files(self.filename, other.filename):
return False
# Finally, check the funcname.
if self.funcname == UNKNOWN:
# "other" must be the valid one.
if other.funcname == UNKNOWN:
return False
else:
return other.funcname is not None
elif other.funcname == UNKNOWN:
# "self" must be the valid one.
if self.funcname == UNKNOWN:
return False
else:
return self.funcname is not None
elif self.funcname == other.funcname:
# Both are valid.
return True
return False

View File

@ -0,0 +1,11 @@
def basic(variables, *,
_print=print):
"""Print each row simply."""
for var in variables:
if var.funcname:
line = f'{var.filename}:{var.funcname}():{var.name}'
else:
line = f'{var.filename}:{var.name}'
line = f'{line:<64} {var.vartype}'
_print(line)

View File

@ -2,6 +2,8 @@ import re
import shlex import shlex
import subprocess import subprocess
from ..common.info import UNKNOWN
from . import source from . import source
@ -194,7 +196,28 @@ def parse_func(stmt, body):
return name, signature return name, signature
def parse_var(stmt): #TYPE_SPEC = rf'''(?:
# )'''
#VAR_DECLARATOR = rf'''(?:
# )'''
#VAR_DECL = rf'''(?:
# {TYPE_SPEC}+
# {VAR_DECLARATOR}
# \s*
# )'''
#VAR_DECLARATION = rf'''(?:
# {VAR_DECL}
# (?: = [^=] [^;]* )?
# ;
# )'''
#
#
#def parse_variable(decl, *, inFunc=False):
# """Return [(name, storage, vartype)] for the given variable declaration."""
# ...
def _parse_var(stmt):
"""Return (name, vartype) for the given variable declaration.""" """Return (name, vartype) for the given variable declaration."""
stmt = stmt.rstrip(';') stmt = stmt.rstrip(';')
m = LOCAL_STMT_START_RE.match(stmt) m = LOCAL_STMT_START_RE.match(stmt)
@ -220,6 +243,27 @@ def parse_var(stmt):
return name, vartype return name, vartype
def extract_storage(decl, *, infunc=None):
"""Return (storage, vartype) based on the given declaration.
The default storage is "implicit" (or "local" if infunc is True).
"""
if decl == UNKNOWN:
return decl
if decl.startswith('static '):
return 'static'
#return 'static', decl.partition(' ')[2].strip()
elif decl.startswith('extern '):
return 'extern'
#return 'extern', decl.partition(' ')[2].strip()
elif re.match('.*\b(static|extern)\b', decl):
raise NotImplementedError
elif infunc:
return 'local'
else:
return 'implicit'
def parse_compound(stmt, blocks): def parse_compound(stmt, blocks):
"""Return (headers, bodies) for the given compound statement.""" """Return (headers, bodies) for the given compound statement."""
# XXX Identify declarations inside compound statements # XXX Identify declarations inside compound statements
@ -228,14 +272,17 @@ def parse_compound(stmt, blocks):
def iter_variables(filename, *, def iter_variables(filename, *,
preprocessed=False,
_iter_source_lines=source.iter_lines, _iter_source_lines=source.iter_lines,
_iter_global=iter_global_declarations, _iter_global=iter_global_declarations,
_iter_local=iter_local_statements, _iter_local=iter_local_statements,
_parse_func=parse_func, _parse_func=parse_func,
_parse_var=parse_var, _parse_var=_parse_var,
_parse_compound=parse_compound, _parse_compound=parse_compound,
): ):
"""Yield (funcname, name, vartype) for every variable in the given file.""" """Yield (funcname, name, vartype) for every variable in the given file."""
if preprocessed:
raise NotImplementedError
lines = _iter_source_lines(filename) lines = _iter_source_lines(filename)
for stmt, body in _iter_global(lines): for stmt, body in _iter_global(lines):
# At the file top-level we only have to worry about vars & funcs. # At the file top-level we only have to worry about vars & funcs.
@ -256,7 +303,7 @@ def iter_variables(filename, *,
def _iter_locals(lines, *, def _iter_locals(lines, *,
_iter_statements=iter_local_statements, _iter_statements=iter_local_statements,
_parse_var=parse_var, _parse_var=_parse_var,
_parse_compound=parse_compound, _parse_compound=parse_compound,
): ):
compound = [lines] compound = [lines]
@ -278,18 +325,15 @@ def _iter_locals(lines, *,
compound.extend(bodies) compound.extend(bodies)
def iter_all(dirnames): def iter_all(filename, *,
preprocessed=False,
):
"""Yield a Declaration for each one found. """Yield a Declaration for each one found.
If there are duplicates, due to preprocessor conditionals, then If there are duplicates, due to preprocessor conditionals, then
they are checked to make sure they are the same. they are checked to make sure they are the same.
""" """
raise NotImplementedError # XXX For the moment we cheat.
for funcname, name, decl in iter_variables(filename,
preprocessed=preprocessed):
def iter_preprocessed(dirnames): yield 'variable', funcname, name, decl
"""Yield a Declaration for each one found.
All source files are run through the preprocessor first.
"""
raise NotImplementedError

View File

@ -0,0 +1,107 @@
from ..common.info import UNKNOWN, ID
from . import declarations
# XXX need tests:
# * variables
# * variable
# * variable_from_id
def _iter_vars(filenames, preprocessed, *,
handle_id=None,
_iter_decls=declarations.iter_all,
):
if handle_id is None:
handle_id = ID
for filename in filenames or ():
for kind, funcname, name, decl in _iter_decls(filename,
preprocessed=preprocessed,
):
if kind != 'variable':
continue
varid = handle_id(filename, funcname, name)
yield varid, decl
# XXX Add a "handle_var" arg like we did for get_resolver()?
def variables(*filenames,
perfilecache=None,
preprocessed=False,
known=None, # for types
handle_id=None,
_iter_vars=_iter_vars,
):
"""Yield (varid, decl) for each variable found in the given files.
If "preprocessed" is provided (and not False/None) then it is used
to decide which tool to use to parse the source code after it runs
through the C preprocessor. Otherwise the raw
"""
if len(filenames) == 1 and not (filenames[0], str):
filenames, = filenames
if perfilecache is None:
yield from _iter_vars(filenames, preprocessed)
else:
# XXX Cache per-file variables (e.g. `{filename: [(varid, decl)]}`).
raise NotImplementedError
def variable(name, filenames, *,
local=False,
perfilecache=None,
preprocessed=False,
handle_id=None,
_iter_vars=variables,
):
"""Return (varid, decl) for the first found variable that matches.
If "local" is True then the first matching local variable in the
file will always be returned. To avoid that, pass perfilecache and
pop each variable from the cache after using it.
"""
for varid, decl in _iter_vars(filenames,
perfilecache=perfilecache,
preprocessed=preprocessed,
):
if varid.name != name:
continue
if local:
if varid.funcname:
if varid.funcname == UNKNOWN:
raise NotImplementedError
return varid, decl
elif not varid.funcname:
return varid, decl
else:
return None, None # No matching variable was found.
def variable_from_id(id, filenames, *,
perfilecache=None,
preprocessed=False,
handle_id=None,
_get_var=variable,
):
"""Return (varid, decl) for the first found variable that matches."""
local = False
if isinstance(id, str):
name = id
else:
if id.funcname == UNKNOWN:
local = True
elif id.funcname:
raise NotImplementedError
name = id.name
if id.filename and id.filename != UNKNOWN:
filenames = [id.filename]
return _get_var(name, filenames,
local=local,
perfilecache=perfilecache,
preprocessed=preprocessed,
handle_id=handle_id,
)

View File

@ -1,8 +1,7 @@
import re import re
from c_analyzer_common.info import UNKNOWN from ..common.info import UNKNOWN, ID
from .info import Variable
from .preprocessor import _iter_clean_lines from .preprocessor import _iter_clean_lines
@ -55,7 +54,7 @@ def parse_variable_declaration(srcline):
def parse_variable(srcline, funcname=None): def parse_variable(srcline, funcname=None):
"""Return a Variable for the variable declared on the line (or None).""" """Return (varid, decl) for the variable declared on the line (or None)."""
line = srcline.strip() line = srcline.strip()
# XXX Handle more than just static variables. # XXX Handle more than just static variables.
@ -74,7 +73,7 @@ def iter_variables(filename, *,
_get_srclines=get_srclines, _get_srclines=get_srclines,
_default_parse_variable=parse_variable, _default_parse_variable=parse_variable,
): ):
"""Yield a Variable for each in the given source file.""" """Yield (varid, decl) for each variable in the given source file."""
if parse_variable is None: if parse_variable is None:
parse_variable = _default_parse_variable parse_variable = _default_parse_variable
@ -99,13 +98,13 @@ def iter_variables(filename, *,
info = parse_variable(line, funcname) info = parse_variable(line, funcname)
if isinstance(info, list): if isinstance(info, list):
for name, _funcname, decl in info: for name, _funcname, decl in info:
yield Variable.from_parts(filename, _funcname, name, decl) yield ID(filename, _funcname, name), decl
continue continue
name, decl = info name, decl = info
if name is None: if name is None:
continue continue
yield Variable.from_parts(filename, funcname, name, decl) yield ID(filename, funcname, name), decl
def _match_varid(variable, name, funcname, ignored=None): def _match_varid(variable, name, funcname, ignored=None):
@ -134,12 +133,12 @@ def find_variable(filename, funcname, name, *,
Return None if the variable is not found. Return None if the variable is not found.
""" """
for variable in _iter_variables(filename, for varid, decl in _iter_variables(filename,
srccache=srccache, srccache=srccache,
parse_variable=parse_variable, parse_variable=parse_variable,
): ):
if _match_varid(variable, name, funcname, ignored): if _match_varid(varid, name, funcname, ignored):
return variable return varid, decl
else: else:
return None return None
@ -149,10 +148,10 @@ def find_variables(varids, filenames=None, *,
parse_variable=None, parse_variable=None,
_find_symbol=find_variable, _find_symbol=find_variable,
): ):
"""Yield a Variable for each ID. """Yield (varid, decl) for each ID.
If the variable is not found then its decl will be UNKNOWN. That If the variable is not found then its decl will be UNKNOWN. That
way there will be one resulting Variable per given ID. way there will be one resulting variable per given ID.
""" """
if srccache is _NOT_SET: if srccache is _NOT_SET:
srccache = {} srccache = {}
@ -163,18 +162,18 @@ def find_variables(varids, filenames=None, *,
srcfiles = [varid.filename] srcfiles = [varid.filename]
else: else:
if not filenames: if not filenames:
yield Variable(varid, UNKNOWN, UNKNOWN) yield varid, UNKNOWN
continue continue
srcfiles = filenames srcfiles = filenames
for filename in srcfiles: for filename in srcfiles:
found = _find_varid(filename, varid.funcname, varid.name, varid, decl = _find_varid(filename, varid.funcname, varid.name,
ignored=used, ignored=used,
srccache=srccache, srccache=srccache,
parse_variable=parse_variable, parse_variable=parse_variable,
) )
if found: if varid:
yield found yield varid, decl
used.add(found) used.add(varid)
break break
else: else:
yield Variable(varid, UNKNOWN, UNKNOWN) yield varid, UNKNOWN

View File

@ -3,8 +3,7 @@ import shlex
import os import os
import re import re
from c_analyzer_common import util from ..common import util, info
from . import info
CONTINUATION = '\\' + os.linesep CONTINUATION = '\\' + os.linesep

View File

@ -1,46 +1,24 @@
import os
import os.path import os.path
import shutil import shutil
import sys
from c_analyzer_common import util, info from c_analyzer.common import util, info
from . import source
from .info import Symbol from .info import Symbol
#PYTHON = os.path.join(REPO_ROOT, 'python') # XXX need tests:
PYTHON = sys.executable # * iter_symbols
NM_KINDS = {
def iter_symbols(binary=PYTHON, dirnames=None, *, 'b': Symbol.KIND.VARIABLE, # uninitialized
# Alternately, use look_up_known_symbol() 'd': Symbol.KIND.VARIABLE, # initialized
# from c_globals.supported. #'g': Symbol.KIND.VARIABLE, # uninitialized
find_local_symbol=source.find_symbol, #'s': Symbol.KIND.VARIABLE, # initialized
_file_exists=os.path.exists, 't': Symbol.KIND.FUNCTION,
_iter_symbols_nm=(lambda b, *a: _iter_symbols_nm(b, *a)), }
):
"""Yield a Symbol for each symbol found in the binary."""
if not _file_exists(binary):
raise Exception('executable missing (need to build it first?)')
if find_local_symbol:
cache = {}
def find_local_symbol(name, *, _find=find_local_symbol):
return _find(name, dirnames, _perfilecache=cache)
else:
find_local_symbol = None
if os.name == 'nt':
# XXX Support this.
raise NotImplementedError
else:
yield from _iter_symbols_nm(binary, find_local_symbol)
#############################
# binary format (e.g. ELF)
SPECIAL_SYMBOLS = { SPECIAL_SYMBOLS = {
# binary format (e.g. ELF)
'__bss_start', '__bss_start',
'__data_start', '__data_start',
'__dso_handle', '__dso_handle',
@ -63,29 +41,23 @@ def _is_special_symbol(name):
return False return False
############################# def iter_symbols(binfile, *,
# "nm" nm=None,
handle_id=None,
_which=shutil.which,
_run=util.run_cmd,
):
"""Yield a Symbol for each relevant entry reported by the "nm" command."""
if nm is None:
nm = _which('nm')
if not nm:
raise NotImplementedError
if handle_id is None:
handle_id = info.ID
NM_KINDS = {
'b': Symbol.KIND.VARIABLE, # uninitialized
'd': Symbol.KIND.VARIABLE, # initialized
#'g': Symbol.KIND.VARIABLE, # uninitialized
#'s': Symbol.KIND.VARIABLE, # initialized
't': Symbol.KIND.FUNCTION,
}
def _iter_symbols_nm(binary, find_local_symbol=None,
*,
_which=shutil.which,
_run=util.run_cmd,
):
nm = _which('nm')
if not nm:
raise NotImplementedError
argv = [nm, argv = [nm,
'--line-numbers', '--line-numbers',
binary, binfile,
] ]
try: try:
output = _run(argv) output = _run(argv)
@ -95,23 +67,20 @@ def _iter_symbols_nm(binary, find_local_symbol=None,
raise NotImplementedError raise NotImplementedError
raise raise
for line in output.splitlines(): for line in output.splitlines():
(name, kind, external, filename, funcname, vartype, (name, kind, external, filename, funcname,
) = _parse_nm_line(line, ) = _parse_nm_line(line)
_find_local_symbol=find_local_symbol,
)
if kind != Symbol.KIND.VARIABLE: if kind != Symbol.KIND.VARIABLE:
continue continue
elif _is_special_symbol(name): elif _is_special_symbol(name):
continue continue
assert vartype is None
yield Symbol( yield Symbol(
id=(filename, funcname, name), id=handle_id(filename, funcname, name),
kind=kind, kind=kind,
external=external, external=external,
) )
def _parse_nm_line(line, *, _find_local_symbol=None): def _parse_nm_line(line):
_origline = line _origline = line
_, _, line = line.partition(' ') # strip off the address _, _, line = line.partition(' ') # strip off the address
line = line.strip() line = line.strip()
@ -128,18 +97,9 @@ def _parse_nm_line(line, *, _find_local_symbol=None):
else: else:
filename = info.UNKNOWN filename = info.UNKNOWN
vartype = None
name, islocal = _parse_nm_name(name, kind) name, islocal = _parse_nm_name(name, kind)
if islocal: funcname = info.UNKNOWN if islocal else None
funcname = info.UNKNOWN return name, kind, external, filename, funcname
if _find_local_symbol is not None:
filename, funcname, vartype = _find_local_symbol(name)
filename = filename or info.UNKNOWN
funcname = funcname or info.UNKNOWN
else:
funcname = None
# XXX fine filename and vartype?
return name, kind, external, filename, funcname, vartype
def _parse_nm_name(name, kind): def _parse_nm_name(name, kind):

View File

@ -0,0 +1,175 @@
import os
import os.path
import shutil
from ..common import files
from ..common.info import UNKNOWN, ID
from ..parser import find as p_find
from . import _nm
from .info import Symbol
# XXX need tests:
# * get_resolver()
# * get_resolver_from_dirs()
# * symbol()
# * symbols()
# * variables()
def _resolve_known(symbol, knownvars):
for varid in knownvars:
if symbol.match(varid):
break
else:
return None
return knownvars.pop(varid)
def get_resolver(filenames=None, known=None, *,
handle_var,
check_filename=None,
perfilecache=None,
preprocessed=False,
_from_source=p_find.variable_from_id,
):
"""Return a "resolver" func for the given known vars/types and filenames.
"handle_var" is a callable that takes (ID, decl) and returns a
Variable. Variable.from_id is a suitable callable.
The returned func takes a single Symbol and returns a corresponding
Variable. If the symbol was located then the variable will be
valid, populated with the corresponding information. Otherwise None
is returned.
"""
knownvars = (known or {}).get('variables')
if knownvars:
knownvars = dict(knownvars) # a copy
if filenames:
if check_filename is None:
filenames = list(filenames)
def check_filename(filename):
return filename in filenames
def resolve(symbol):
# XXX Check "found" instead?
if not check_filename(symbol.filename):
return None
found = _resolve_known(symbol, knownvars)
if found is None:
#return None
varid, decl = _from_source(symbol, filenames,
perfilecache=perfilecache,
preprocessed=preprocessed,
)
found = handle_var(varid, decl)
return found
else:
def resolve(symbol):
return _resolve_known(symbol, knownvars)
elif filenames:
def resolve(symbol):
varid, decl = _from_source(symbol, filenames,
perfilecache=perfilecache,
preprocessed=preprocessed,
)
return handle_var(varid, decl)
else:
def resolve(symbol):
return None
return resolve
def get_resolver_from_dirs(dirnames, known=None, *,
handle_var,
suffixes=('.c',),
perfilecache=None,
preprocessed=False,
_iter_files=files.iter_files_by_suffix,
_get_resolver=get_resolver,
):
"""Return a "resolver" func for the given known vars/types and filenames.
"dirnames" should be absolute paths. If not then they will be
resolved relative to CWD.
See get_resolver().
"""
dirnames = [d if d.endswith(os.path.sep) else d + os.path.sep
for d in dirnames]
filenames = _iter_files(dirnames, suffixes)
def check_filename(filename):
for dirname in dirnames:
if filename.startswith(dirname):
return True
else:
return False
return _get_resolver(filenames, known,
handle_var=handle_var,
check_filename=check_filename,
perfilecache=perfilecache,
preprocessed=preprocessed,
)
def symbol(symbol, filenames, known=None, *,
perfilecache=None,
preprocessed=False,
handle_id=None,
_get_resolver=get_resolver,
):
"""Return a Variable for the one matching the given symbol.
"symbol" can be one of several objects:
* Symbol - use the contained info
* name (str) - look for a global variable with that name
* (filename, name) - look for named global in file
* (filename, funcname, name) - look for named local in file
A name is always required. If the filename is None, "", or
"UNKNOWN" then all files will be searched. If the funcname is
"" or "UNKNOWN" then only local variables will be searched for.
"""
resolve = _get_resolver(known, filenames,
handle_id=handle_id,
perfilecache=perfilecache,
preprocessed=preprocessed,
)
return resolve(symbol)
def _get_platform_tool():
if os.name == 'nt':
# XXX Support this.
raise NotImplementedError
elif nm := shutil.which('nm'):
return lambda b, hi: _nm.iter_symbols(b, nm=nm, handle_id=hi)
else:
raise NotImplementedError
def symbols(binfile, *,
handle_id=None,
_file_exists=os.path.exists,
_get_platform_tool=_get_platform_tool,
):
"""Yield a Symbol for each one found in the binary."""
if not _file_exists(binfile):
raise Exception('executable missing (need to build it first?)')
_iter_symbols = _get_platform_tool()
yield from _iter_symbols(binfile, handle_id)
def variables(binfile, *,
resolve,
handle_id=None,
_iter_symbols=symbols,
):
"""Yield (Variable, Symbol) for each found symbol."""
for symbol in _iter_symbols(binfile, handle_id=handle_id):
if symbol.kind != Symbol.KIND.VARIABLE:
continue
var = resolve(symbol) or None
yield var, symbol

View File

@ -1,7 +1,7 @@
from collections import namedtuple from collections import namedtuple
from c_analyzer_common.info import ID from c_analyzer.common.info import ID
from c_analyzer_common.util import classonly, _NTBase from c_analyzer.common.util import classonly, _NTBase
class Symbol(_NTBase, namedtuple('Symbol', 'id kind external')): class Symbol(_NTBase, namedtuple('Symbol', 'id kind external')):

View File

@ -0,0 +1,75 @@
from ..common import files
from ..common.info import UNKNOWN
from ..parser import (
find as p_find,
)
from ..symbols import (
info as s_info,
find as s_find,
)
from .info import Variable
# XXX need tests:
# * vars_from_source
def _remove_cached(cache, var):
if not cache:
return
try:
cached = cache[var.filename]
cached.remove(var)
except (KeyError, IndexError):
pass
def vars_from_binary(binfile, *,
known=None,
filenames=None,
handle_id=None,
check_filename=None,
handle_var=Variable.from_id,
_iter_vars=s_find.variables,
_get_symbol_resolver=s_find.get_resolver,
):
"""Yield a Variable for each found Symbol.
Details are filled in from the given "known" variables and types.
"""
cache = {}
resolve = _get_symbol_resolver(filenames, known,
handle_var=handle_var,
check_filename=check_filename,
perfilecache=cache,
)
for var, symbol in _iter_vars(binfile,
resolve=resolve,
handle_id=handle_id,
):
if var is None:
var = Variable(symbol.id, UNKNOWN, UNKNOWN)
yield var
_remove_cached(cache, var)
def vars_from_source(filenames, *,
preprocessed=None,
known=None,
handle_id=None,
handle_var=Variable.from_id,
iter_vars=p_find.variables,
):
"""Yield a Variable for each declaration in the raw source code.
Details are filled in from the given "known" variables and types.
"""
cache = {}
for varid, decl in iter_vars(filenames or (),
perfilecache=cache,
preprocessed=preprocessed,
known=known,
handle_id=handle_id,
):
var = handle_var(varid, decl)
yield var
_remove_cached(cache, var)

View File

@ -1,8 +1,7 @@
from collections import namedtuple from collections import namedtuple
import re
from c_analyzer_common import info, util from ..common.info import ID, UNKNOWN
from c_analyzer_common.util import classonly, _NTBase from ..common.util import classonly, _NTBase
def normalize_vartype(vartype): def normalize_vartype(vartype):
@ -16,26 +15,7 @@ def normalize_vartype(vartype):
return str(vartype) return str(vartype)
def extract_storage(decl, *, isfunc=False): # XXX Variable.vartype -> decl (Declaration).
"""Return (storage, vartype) based on the given declaration.
The default storage is "implicit" or "local".
"""
if decl == info.UNKNOWN:
return decl, decl
if decl.startswith('static '):
return 'static', decl
#return 'static', decl.partition(' ')[2].strip()
elif decl.startswith('extern '):
return 'extern', decl
#return 'extern', decl.partition(' ')[2].strip()
elif re.match('.*\b(static|extern)\b', decl):
raise NotImplementedError
elif isfunc:
return 'local', decl
else:
return 'implicit', decl
class Variable(_NTBase, class Variable(_NTBase,
namedtuple('Variable', 'id storage vartype')): namedtuple('Variable', 'id storage vartype')):
@ -52,16 +32,23 @@ class Variable(_NTBase,
@classonly @classonly
def from_parts(cls, filename, funcname, name, decl, storage=None): def from_parts(cls, filename, funcname, name, decl, storage=None):
varid = ID(filename, funcname, name)
if storage is None: if storage is None:
storage, decl = extract_storage(decl, isfunc=funcname) self = cls.from_id(varid, decl)
id = info.ID(filename, funcname, name) else:
self = cls(id, storage, decl) self = cls(varid, storage, decl)
return self return self
@classonly
def from_id(cls, varid, decl):
from ..parser.declarations import extract_storage
storage = extract_storage(decl, infunc=varid.funcname)
return cls(varid, storage, decl)
def __new__(cls, id, storage, vartype): def __new__(cls, id, storage, vartype):
self = super().__new__( self = super().__new__(
cls, cls,
id=info.ID.from_raw(id), id=ID.from_raw(id),
storage=str(storage) if storage else None, storage=str(storage) if storage else None,
vartype=normalize_vartype(vartype) if vartype else None, vartype=normalize_vartype(vartype) if vartype else None,
) )
@ -77,10 +64,10 @@ class Variable(_NTBase,
if not self.id: if not self.id:
raise TypeError('missing id') raise TypeError('missing id')
if not self.filename or self.filename == info.UNKNOWN: if not self.filename or self.filename == UNKNOWN:
raise TypeError(f'id missing filename ({self.id})') raise TypeError(f'id missing filename ({self.id})')
if self.funcname and self.funcname == info.UNKNOWN: if self.funcname and self.funcname == UNKNOWN:
raise TypeError(f'id missing funcname ({self.id})') raise TypeError(f'id missing funcname ({self.id})')
self.id.validate() self.id.validate()
@ -89,12 +76,12 @@ class Variable(_NTBase,
"""Fail if the object is invalid (i.e. init with bad data).""" """Fail if the object is invalid (i.e. init with bad data)."""
self._validate_id() self._validate_id()
if self.storage is None or self.storage == info.UNKNOWN: if self.storage is None or self.storage == UNKNOWN:
raise TypeError('missing storage') raise TypeError('missing storage')
elif self.storage not in self.STORAGE: elif self.storage not in self.STORAGE:
raise ValueError(f'unsupported storage {self.storage:r}') raise ValueError(f'unsupported storage {self.storage:r}')
if self.vartype is None or self.vartype == info.UNKNOWN: if self.vartype is None or self.vartype == UNKNOWN:
raise TypeError('missing vartype') raise TypeError('missing vartype')
@property @property

View File

@ -0,0 +1,91 @@
import csv
from ..common.info import ID, UNKNOWN
from ..common.util import read_tsv
from .info import Variable
# XXX need tests:
# * read_file()
# * look_up_variable()
COLUMNS = ('filename', 'funcname', 'name', 'kind', 'declaration')
HEADER = '\t'.join(COLUMNS)
def read_file(infile, *,
_read_tsv=read_tsv,
):
"""Yield (kind, id, decl) for each row in the data file.
The caller is responsible for validating each row.
"""
for row in _read_tsv(infile, HEADER):
filename, funcname, name, kind, declaration = row
if not funcname or funcname == '-':
funcname = None
id = ID(filename, funcname, name)
yield kind, id, declaration
def from_file(infile, *,
handle_var=Variable.from_id,
_read_file=read_file,
):
"""Return the info for known declarations in the given file."""
known = {
'variables': {},
#'types': {},
#'constants': {},
#'macros': {},
}
for kind, id, decl in _read_file(infile):
if kind == 'variable':
values = known['variables']
value = handle_var(id, decl)
else:
raise ValueError(f'unsupported kind in row {row}')
value.validate()
values[id] = value
return known
def look_up_variable(varid, knownvars, *,
match_files=(lambda f1, f2: f1 == f2),
):
"""Return the known Variable matching the given ID.
"knownvars" is a mapping of ID to Variable.
"match_files" is used to verify if two filenames point to
the same file.
If no match is found then None is returned.
"""
if not knownvars:
return None
if varid.funcname == UNKNOWN:
if not varid.filename or varid.filename == UNKNOWN:
for varid in knownvars:
if not varid.funcname:
continue
if varid.name == varid.name:
return knownvars[varid]
else:
return None
else:
for varid in knownvars:
if not varid.funcname:
continue
if not match_files(varid.filename, varid.filename):
continue
if varid.name == varid.name:
return knownvars[varid]
else:
return None
elif not varid.filename or varid.filename == UNKNOWN:
raise NotImplementedError
else:
return knownvars.get(varid.id)

View File

@ -1,19 +0,0 @@
import os.path
PKG_ROOT = os.path.dirname(__file__)
DATA_DIR = os.path.dirname(PKG_ROOT)
REPO_ROOT = os.path.dirname(
os.path.dirname(DATA_DIR))
SOURCE_DIRS = [os.path.join(REPO_ROOT, name) for name in [
'Include',
'Python',
'Parser',
'Objects',
'Modules',
]]
# Clean up the namespace.
del os

View File

@ -1,69 +0,0 @@
from collections import namedtuple
import re
from .util import classonly, _NTBase
UNKNOWN = '???'
NAME_RE = re.compile(r'^([a-zA-Z]|_\w*[a-zA-Z]\w*|[a-zA-Z]\w*)$')
class ID(_NTBase, namedtuple('ID', 'filename funcname name')):
"""A unique ID for a single symbol or declaration."""
__slots__ = ()
# XXX Add optional conditions (tuple of strings) field.
#conditions = Slot()
@classonly
def from_raw(cls, raw):
if not raw:
return None
if isinstance(raw, str):
return cls(None, None, raw)
try:
name, = raw
filename = None
except ValueError:
try:
filename, name = raw
except ValueError:
return super().from_raw(raw)
return cls(filename, None, name)
def __new__(cls, filename, funcname, name):
self = super().__new__(
cls,
filename=str(filename) if filename else None,
funcname=str(funcname) if funcname else None,
name=str(name) if name else None,
)
#cls.conditions.set(self, tuple(str(s) if s else None
# for s in conditions or ()))
return self
def validate(self):
"""Fail if the object is invalid (i.e. init with bad data)."""
if not self.name:
raise TypeError('missing name')
else:
if not NAME_RE.match(self.name):
raise ValueError(
f'name must be an identifier, got {self.name!r}')
# Symbols from a binary might not have filename/funcname info.
if self.funcname:
if not self.filename:
raise TypeError('missing filename')
if not NAME_RE.match(self.funcname) and self.funcname != UNKNOWN:
raise ValueError(
f'name must be an identifier, got {self.funcname!r}')
# XXX Require the filename (at least UNKONWN)?
# XXX Check the filename?
@property
def islocal(self):
return self.funcname is not None

View File

@ -1,74 +0,0 @@
import csv
import os.path
from c_parser.info import Variable
from . import DATA_DIR
from .info import ID, UNKNOWN
from .util import read_tsv
DATA_FILE = os.path.join(DATA_DIR, 'known.tsv')
COLUMNS = ('filename', 'funcname', 'name', 'kind', 'declaration')
HEADER = '\t'.join(COLUMNS)
# XXX need tests:
# * from_file()
def from_file(infile, *,
_read_tsv=read_tsv,
):
"""Return the info for known declarations in the given file."""
known = {
'variables': {},
#'types': {},
#'constants': {},
#'macros': {},
}
for row in _read_tsv(infile, HEADER):
filename, funcname, name, kind, declaration = row
if not funcname or funcname == '-':
funcname = None
id = ID(filename, funcname, name)
if kind == 'variable':
values = known['variables']
if funcname:
storage = _get_storage(declaration) or 'local'
else:
storage = _get_storage(declaration) or 'implicit'
value = Variable(id, storage, declaration)
else:
raise ValueError(f'unsupported kind in row {row}')
value.validate()
# if value.name == 'id' and declaration == UNKNOWN:
# # None of these are variables.
# declaration = 'int id';
# else:
# value.validate()
values[id] = value
return known
def _get_storage(decl):
# statics
if decl.startswith('static '):
return 'static'
if decl.startswith(('Py_LOCAL(', 'Py_LOCAL_INLINE(')):
return 'static'
if decl.startswith(('_Py_IDENTIFIER(', '_Py_static_string(')):
return 'static'
if decl.startswith('PyDoc_VAR('):
return 'static'
if decl.startswith(('SLOT1BINFULL(', 'SLOT1BIN(')):
return 'static'
if decl.startswith('WRAP_METHOD('):
return 'static'
# public extern
if decl.startswith('extern '):
return 'extern'
if decl.startswith('PyAPI_DATA('):
return 'extern'
# implicit or local
return None

View File

@ -1,95 +0,0 @@
from c_analyzer_common import SOURCE_DIRS
from c_analyzer_common.info import UNKNOWN
from c_symbols import (
info as s_info,
binary as b_symbols,
source as s_symbols,
resolve,
)
from c_parser import info, declarations
# XXX needs tests:
# * iter_variables
def globals_from_binary(binfile=b_symbols.PYTHON, *,
knownvars=None,
dirnames=None,
_iter_symbols=b_symbols.iter_symbols,
_resolve=resolve.symbols_to_variables,
_get_symbol_resolver=resolve.get_resolver,
):
"""Yield a Variable for each found Symbol.
Details are filled in from the given "known" variables and types.
"""
symbols = _iter_symbols(binfile, find_local_symbol=None)
#symbols = list(symbols)
for variable in _resolve(symbols,
resolve=_get_symbol_resolver(knownvars, dirnames),
):
# Skip each non-global variable (unless we couldn't find it).
# XXX Drop the "UNKNOWN" condition?
if not variable.isglobal and variable.vartype != UNKNOWN:
continue
yield variable
def globals_from_declarations(dirnames=SOURCE_DIRS, *,
known=None,
):
"""Yield a Variable for each found declaration.
Details are filled in from the given "known" variables and types.
"""
raise NotImplementedError
def iter_variables(kind='platform', *,
known=None,
dirnames=None,
_resolve_symbols=resolve.symbols_to_variables,
_get_symbol_resolver=resolve.get_resolver,
_symbols_from_binary=b_symbols.iter_symbols,
_symbols_from_source=s_symbols.iter_symbols,
_iter_raw=declarations.iter_all,
_iter_preprocessed=declarations.iter_preprocessed,
):
"""Yield a Variable for each one found (e.g. in files)."""
kind = kind or 'platform'
if kind == 'symbols':
knownvars = (known or {}).get('variables')
yield from _resolve_symbols(
_symbols_from_source(dirnames, known),
resolve=_get_symbol_resolver(knownvars, dirnames),
)
elif kind == 'platform':
knownvars = (known or {}).get('variables')
yield from _resolve_symbols(
_symbols_from_binary(find_local_symbol=None),
resolve=_get_symbol_resolver(knownvars, dirnames),
)
elif kind == 'declarations':
for decl in _iter_raw(dirnames):
if not isinstance(decl, info.Variable):
continue
yield decl
elif kind == 'preprocessed':
for decl in _iter_preprocessed(dirnames):
if not isinstance(decl, info.Variable):
continue
yield decl
else:
raise ValueError(f'unsupported kind {kind!r}')
def globals(dirnames, known, *,
kind=None, # Use the default.
_iter_variables=iter_variables,
):
"""Return a list of (StaticVar, <supported>) for each found global var."""
for found in _iter_variables(kind, known=known, dirnames=dirnames):
if not found.isglobal:
continue
yield found

View File

@ -1,16 +0,0 @@
def basic(globals, *,
_print=print):
"""Print each row simply."""
for variable in globals:
if variable.funcname:
line = f'{variable.filename}:{variable.funcname}():{variable.name}'
else:
line = f'{variable.filename}:{variable.name}'
vartype = variable.vartype
#if vartype.startswith('static '):
# vartype = vartype.partition(' ')[2]
#else:
# vartype = '=' + vartype
line = f'{line:<64} {vartype}'
_print(line)

View File

@ -1,147 +0,0 @@
import os.path
from c_analyzer_common import files
from c_analyzer_common.info import UNKNOWN
from c_parser import declarations, info
from .info import Symbol
from .source import _find_symbol
# XXX need tests:
# * look_up_known_symbol()
# * symbol_from_source()
# * get_resolver()
# * symbols_to_variables()
def look_up_known_symbol(symbol, knownvars, *,
match_files=(lambda f1, f2: f1 == f2),
):
"""Return the known variable matching the given symbol.
"knownvars" is a mapping of common.ID to parser.Variable.
"match_files" is used to verify if two filenames point to
the same file.
"""
if not knownvars:
return None
if symbol.funcname == UNKNOWN:
if not symbol.filename or symbol.filename == UNKNOWN:
for varid in knownvars:
if not varid.funcname:
continue
if varid.name == symbol.name:
return knownvars[varid]
else:
return None
else:
for varid in knownvars:
if not varid.funcname:
continue
if not match_files(varid.filename, symbol.filename):
continue
if varid.name == symbol.name:
return knownvars[varid]
else:
return None
elif not symbol.filename or symbol.filename == UNKNOWN:
raise NotImplementedError
else:
return knownvars.get(symbol.id)
def find_in_source(symbol, dirnames, *,
_perfilecache={},
_find_symbol=_find_symbol,
_iter_files=files.iter_files_by_suffix,
):
"""Return the Variable matching the given Symbol.
If there is no match then return None.
"""
if symbol.filename and symbol.filename != UNKNOWN:
filenames = [symbol.filename]
else:
filenames = _iter_files(dirnames, ('.c', '.h'))
if symbol.funcname and symbol.funcname != UNKNOWN:
raise NotImplementedError
(filename, funcname, decl
) = _find_symbol(symbol.name, filenames, _perfilecache)
if filename == UNKNOWN:
return None
return info.Variable.from_parts(filename, funcname, symbol.name, decl)
def get_resolver(knownvars=None, dirnames=None, *,
_look_up_known=look_up_known_symbol,
_from_source=find_in_source,
):
"""Return a "resolver" func for the given known vars and dirnames.
The func takes a single Symbol and returns a corresponding Variable.
If the symbol was located then the variable will be valid, populated
with the corresponding information. Otherwise None is returned.
"""
if knownvars:
knownvars = dict(knownvars) # a copy
def resolve_known(symbol):
found = _look_up_known(symbol, knownvars)
if found is None:
return None
elif symbol.funcname == UNKNOWN:
knownvars.pop(found.id)
elif not symbol.filename or symbol.filename == UNKNOWN:
knownvars.pop(found.id)
return found
if dirnames:
def resolve(symbol):
found = resolve_known(symbol)
if found is None:
return None
#return _from_source(symbol, dirnames)
else:
for dirname in dirnames:
if not dirname.endswith(os.path.sep):
dirname += os.path.sep
if found.filename.startswith(dirname):
break
else:
return None
return found
else:
resolve = resolve_known
elif dirnames:
def resolve(symbol):
return _from_source(symbol, dirnames)
else:
def resolve(symbol):
return None
return resolve
def symbols_to_variables(symbols, *,
resolve=(lambda s: look_up_known_symbol(s, None)),
):
"""Yield the variable the matches each given symbol.
Use get_resolver() for a "resolve" func to use.
"""
for symbol in symbols:
if isinstance(symbol, info.Variable):
# XXX validate?
yield symbol
continue
if symbol.kind != Symbol.KIND.VARIABLE:
continue
resolved = resolve(symbol)
if resolved is None:
#raise NotImplementedError(symbol)
resolved = info.Variable(
id=symbol.id,
storage=UNKNOWN,
vartype=UNKNOWN,
)
yield resolved

View File

@ -1,58 +0,0 @@
from c_analyzer_common import files
from c_analyzer_common.info import UNKNOWN
from c_parser import declarations
# XXX need tests:
# * find_symbol()
def find_symbol(name, dirnames, *,
_perfilecache,
_iter_files=files.iter_files_by_suffix,
**kwargs
):
"""Return (filename, funcname, vartype) for the matching Symbol."""
filenames = _iter_files(dirnames, ('.c', '.h'))
return _find_symbol(name, filenames, _perfilecache, **kwargs)
def _get_symbols(filename, *,
_iter_variables=declarations.iter_variables,
):
"""Return the list of Symbols found in the given file."""
symbols = {}
for funcname, name, vartype in _iter_variables(filename):
if not funcname:
continue
try:
instances = symbols[name]
except KeyError:
instances = symbols[name] = []
instances.append((funcname, vartype))
return symbols
def _find_symbol(name, filenames, _perfilecache, *,
_get_local_symbols=_get_symbols,
):
for filename in filenames:
try:
symbols = _perfilecache[filename]
except KeyError:
symbols = _perfilecache[filename] = _get_local_symbols(filename)
try:
instances = symbols[name]
except KeyError:
continue
funcname, vartype = instances.pop(0)
if not instances:
symbols.pop(name)
return filename, funcname, vartype
else:
return UNKNOWN, UNKNOWN, UNKNOWN
def iter_symbols():
raise NotImplementedError

View File

@ -0,0 +1,29 @@
import os.path
import sys
TOOL_ROOT = os.path.abspath(
os.path.dirname( # c-analyzer/
os.path.dirname(__file__))) # cpython/
DATA_DIR = TOOL_ROOT
REPO_ROOT = (
os.path.dirname( # ..
os.path.dirname(TOOL_ROOT))) # Tools/
INCLUDE_DIRS = [os.path.join(REPO_ROOT, name) for name in [
'Include',
]]
SOURCE_DIRS = [os.path.join(REPO_ROOT, name) for name in [
'Python',
'Parser',
'Objects',
'Modules',
]]
#PYTHON = os.path.join(REPO_ROOT, 'python')
PYTHON = sys.executable
# Clean up the namespace.
del sys
del os

View File

@ -1,42 +1,42 @@
import argparse import argparse
import os.path
import re import re
import sys import sys
from c_analyzer_common import SOURCE_DIRS, REPO_ROOT from c_analyzer.common import show
from c_analyzer_common.info import UNKNOWN from c_analyzer.common.info import UNKNOWN
from c_analyzer_common.known import (
from . import SOURCE_DIRS
from .find import supported_vars
from .known import (
from_file as known_from_file, from_file as known_from_file,
DATA_FILE as KNOWN_FILE, DATA_FILE as KNOWN_FILE,
) )
from . import find, show from .supported import IGNORED_FILE
from .supported import is_supported, ignored_from_file, IGNORED_FILE, _is_object
def _match_unused_global(variable, knownvars, used):
found = []
for varid in knownvars:
if varid in used:
continue
if varid.funcname is not None:
continue
if varid.name != variable.name:
continue
if variable.filename and variable.filename != UNKNOWN:
if variable.filename == varid.filename:
found.append(varid)
else:
found.append(varid)
return found
def _check_results(unknown, knownvars, used): def _check_results(unknown, knownvars, used):
def _match_unused_global(variable):
found = []
for varid in knownvars:
if varid in used:
continue
if varid.funcname is not None:
continue
if varid.name != variable.name:
continue
if variable.filename and variable.filename != UNKNOWN:
if variable.filename == varid.filename:
found.append(varid)
else:
found.append(varid)
return found
badknown = set() badknown = set()
for variable in sorted(unknown): for variable in sorted(unknown):
msg = None msg = None
if variable.funcname != UNKNOWN: if variable.funcname != UNKNOWN:
msg = f'could not find global symbol {variable.id}' msg = f'could not find global symbol {variable.id}'
elif m := _match_unused_global(variable, knownvars, used): elif m := _match_unused_global(variable):
assert isinstance(m, list) assert isinstance(m, list)
badknown.update(m) badknown.update(m)
elif variable.name in ('completed', 'id'): # XXX Figure out where these variables are. elif variable.name in ('completed', 'id'): # XXX Figure out where these variables are.
@ -65,32 +65,29 @@ def _check_results(unknown, knownvars, used):
raise Exception('could not find all symbols') raise Exception('could not find all symbols')
def _find_globals(dirnames, known, ignored): # XXX Move this check to its own command.
if dirnames == SOURCE_DIRS: def cmd_check_cache(cmd, *,
dirnames = [os.path.relpath(d, REPO_ROOT) for d in dirnames] known=KNOWN_FILE,
ignored=IGNORED_FILE,
ignored = ignored_from_file(ignored) _known_from_file=known_from_file,
known = known_from_file(known) _find=supported_vars,
):
known = _known_from_file(known)
used = set() used = set()
unknown = set() unknown = set()
knownvars = (known or {}).get('variables') for var, supported in _find(known=known, ignored=ignored):
for variable in find.globals_from_binary(knownvars=knownvars, if supported is None:
dirnames=dirnames): unknown.add(var)
#for variable in find.globals(dirnames, known, kind='platform'):
if variable.vartype == UNKNOWN:
unknown.add(variable)
continue continue
yield variable, is_supported(variable, ignored, known) used.add(var.id)
used.add(variable.id) _check_results(unknown, known['variables'], used)
#_check_results(unknown, knownvars, used)
def cmd_check(cmd, dirs=SOURCE_DIRS, *, def cmd_check(cmd, *,
ignored=IGNORED_FILE,
known=KNOWN_FILE, known=KNOWN_FILE,
_find=_find_globals, ignored=IGNORED_FILE,
_find=supported_vars,
_show=show.basic, _show=show.basic,
_print=print, _print=print,
): ):
@ -100,7 +97,11 @@ def cmd_check(cmd, dirs=SOURCE_DIRS, *,
In the failure case, the list of unsupported variables In the failure case, the list of unsupported variables
will be printed out. will be printed out.
""" """
unsupported = [v for v, s in _find(dirs, known, ignored) if not s] unsupported = []
for var, supported in _find(known=known, ignored=ignored):
if not supported:
unsupported.append(var)
if not unsupported: if not unsupported:
#_print('okay') #_print('okay')
return return
@ -112,11 +113,11 @@ def cmd_check(cmd, dirs=SOURCE_DIRS, *,
sys.exit(1) sys.exit(1)
def cmd_show(cmd, dirs=SOURCE_DIRS, *, def cmd_show(cmd, *,
ignored=IGNORED_FILE,
known=KNOWN_FILE, known=KNOWN_FILE,
ignored=IGNORED_FILE,
skip_objects=False, skip_objects=False,
_find=_find_globals, _find=supported_vars,
_show=show.basic, _show=show.basic,
_print=print, _print=print,
): ):
@ -127,10 +128,12 @@ def cmd_show(cmd, dirs=SOURCE_DIRS, *,
""" """
allsupported = [] allsupported = []
allunsupported = [] allunsupported = []
for found, supported in _find(dirs, known, ignored): for found, supported in _find(known=known,
if skip_objects: # XXX Support proper filters instead. ignored=ignored,
if _is_object(found.vartype): skip_objects=skip_objects,
continue ):
if supported is None:
continue
(allsupported if supported else allunsupported (allsupported if supported else allunsupported
).append(found) ).append(found)
@ -165,9 +168,9 @@ def parse_args(prog=PROG, argv=sys.argv[1:], *, _fail=None):
common.add_argument('--known', metavar='FILE', common.add_argument('--known', metavar='FILE',
default=KNOWN_FILE, default=KNOWN_FILE,
help='path to file that lists known types') help='path to file that lists known types')
common.add_argument('dirs', metavar='DIR', nargs='*', #common.add_argument('dirs', metavar='DIR', nargs='*',
default=SOURCE_DIRS, # default=SOURCE_DIRS,
help='a directory to check') # help='a directory to check')
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(
prog=prog, prog=prog,

View File

@ -1,15 +1,16 @@
# The code here consists of hacks for pre-populating the known.tsv file. # The code here consists of hacks for pre-populating the known.tsv file.
from c_parser.preprocessor import _iter_clean_lines from c_analyzer.parser.preprocessor import _iter_clean_lines
from c_parser.naive import ( from c_analyzer.parser.naive import (
iter_variables, parse_variable_declaration, find_variables, iter_variables, parse_variable_declaration, find_variables,
) )
from c_parser.info import Variable from c_analyzer.common.known import HEADER as KNOWN_HEADER
from c_analyzer.common.info import UNKNOWN, ID
from c_analyzer.variables import Variable
from c_analyzer.util import write_tsv
from . import SOURCE_DIRS, REPO_ROOT from . import SOURCE_DIRS, REPO_ROOT
from .known import DATA_FILE as KNOWN_FILE, HEADER as KNOWN_HEADER from .known import DATA_FILE as KNOWN_FILE
from .info import UNKNOWN, ID
from .util import write_tsv
from .files import iter_cpython_files from .files import iter_cpython_files

View File

@ -0,0 +1,29 @@
from c_analyzer.common.files import (
C_SOURCE_SUFFIXES, walk_tree, iter_files_by_suffix,
)
from . import SOURCE_DIRS, REPO_ROOT
# XXX need tests:
# * iter_files()
def iter_files(*,
walk=walk_tree,
_files=iter_files_by_suffix,
):
"""Yield each file in the tree for each of the given directory names."""
excludedtrees = [
os.path.join('Include', 'cpython', ''),
]
def is_excluded(filename):
for root in excludedtrees:
if filename.startswith(root):
return True
return False
for filename in _files(SOURCE_DIRS, C_SOURCE_SUFFIXES, REPO_ROOT,
walk=walk,
):
if is_excluded(filename):
continue
yield filename

View File

@ -0,0 +1,101 @@
import os.path
from c_analyzer.common import files
from c_analyzer.common.info import UNKNOWN, ID
from c_analyzer.variables import find as _common
from . import SOURCE_DIRS, PYTHON, REPO_ROOT
from .known import (
from_file as known_from_file,
DATA_FILE as KNOWN_FILE,
)
from .supported import (
ignored_from_file, IGNORED_FILE, is_supported, _is_object,
)
# XXX need tests:
# * vars_from_binary()
# * vars_from_source()
# * supported_vars()
def _handle_id(filename, funcname, name, *,
_relpath=os.path.relpath,
):
filename = _relpath(filename, REPO_ROOT)
return ID(filename, funcname, name)
def vars_from_binary(*,
known=KNOWN_FILE,
_known_from_file=known_from_file,
_iter_files=files.iter_files_by_suffix,
_iter_vars=_common.vars_from_binary,
):
"""Yield a Variable for each found Symbol.
Details are filled in from the given "known" variables and types.
"""
if isinstance(known, str):
known = _known_from_file(known)
dirnames = SOURCE_DIRS
suffixes = ('.c',)
filenames = _iter_files(dirnames, suffixes)
# XXX For now we only use known variables (no source lookup).
filenames = None
yield from _iter_vars(PYTHON,
known=known,
filenames=filenames,
handle_id=_handle_id,
check_filename=(lambda n: True),
)
def vars_from_source(*,
preprocessed=None,
known=KNOWN_FILE,
_known_from_file=known_from_file,
_iter_files=files.iter_files_by_suffix,
_iter_vars=_common.vars_from_source,
):
"""Yield a Variable for each declaration in the raw source code.
Details are filled in from the given "known" variables and types.
"""
if isinstance(known, str):
known = _known_from_file(known)
dirnames = SOURCE_DIRS
suffixes = ('.c',)
filenames = _iter_files(dirnames, suffixes)
yield from _iter_vars(filenames,
preprocessed=preprocessed,
known=known,
handle_id=_handle_id,
)
def supported_vars(*,
known=KNOWN_FILE,
ignored=IGNORED_FILE,
skip_objects=False,
_known_from_file=known_from_file,
_ignored_from_file=ignored_from_file,
_iter_vars=vars_from_binary,
_is_supported=is_supported,
):
"""Yield (var, is supported) for each found variable."""
if isinstance(known, str):
known = _known_from_file(known)
if isinstance(ignored, str):
ignored = _ignored_from_file(ignored)
for var in _iter_vars(known=known):
if not var.isglobal:
continue
elif var.vartype == UNKNOWN:
yield var, None
# XXX Support proper filters instead.
elif skip_objects and _is_object(found.vartype):
continue
else:
yield var, _is_supported(var, ignored, known)

View File

@ -0,0 +1,66 @@
import csv
import os.path
from c_analyzer.parser.declarations import extract_storage
from c_analyzer.variables import known as _common
from c_analyzer.variables.info import Variable
from . import DATA_DIR
# XXX need tests:
# * from_file()
# * look_up_variable()
DATA_FILE = os.path.join(DATA_DIR, 'known.tsv')
def _get_storage(decl, infunc):
# statics
if decl.startswith(('Py_LOCAL(', 'Py_LOCAL_INLINE(')):
return 'static'
if decl.startswith(('_Py_IDENTIFIER(', '_Py_static_string(')):
return 'static'
if decl.startswith('PyDoc_VAR('):
return 'static'
if decl.startswith(('SLOT1BINFULL(', 'SLOT1BIN(')):
return 'static'
if decl.startswith('WRAP_METHOD('):
return 'static'
# public extern
if decl.startswith('PyAPI_DATA('):
return 'extern'
# Fall back to the normal handler.
return extract_storage(decl, infunc=infunc)
def _handle_var(varid, decl):
# if varid.name == 'id' and decl == UNKNOWN:
# # None of these are variables.
# decl = 'int id';
storage = _get_storage(decl, varid.funcname)
return Variable(varid, storage, decl)
def from_file(infile=DATA_FILE, *,
_from_file=_common.from_file,
_handle_var=_handle_var,
):
"""Return the info for known declarations in the given file."""
return _from_file(infile, handle_var=_handle_var)
def look_up_variable(varid, knownvars, *,
_lookup=_common.look_up_variable,
):
"""Return the known variable matching the given ID.
"knownvars" is a mapping of ID to Variable.
"match_files" is used to verify if two filenames point to
the same file.
If no match is found then None is returned.
"""
return _lookup(varid, knownvars)

View File

@ -1,9 +1,13 @@
import os.path import os.path
import re import re
from c_analyzer_common import DATA_DIR from c_analyzer.common.info import ID
from c_analyzer_common.info import ID from c_analyzer.common.util import read_tsv, write_tsv
from c_analyzer_common.util import read_tsv, write_tsv
from . import DATA_DIR
# XXX need tests:
# * generate / script
IGNORED_FILE = os.path.join(DATA_DIR, 'ignored.tsv') IGNORED_FILE = os.path.join(DATA_DIR, 'ignored.tsv')
@ -379,11 +383,12 @@ def _generate_ignored_file(variables, filename=None, *,
if __name__ == '__main__': if __name__ == '__main__':
from c_analyzer_common import SOURCE_DIRS from cpython import SOURCE_DIRS
from c_analyzer_common.known import ( from cpython.known import (
from_file as known_from_file, from_file as known_from_file,
DATA_FILE as KNOWN_FILE, DATA_FILE as KNOWN_FILE,
) )
# XXX This is wrong!
from . import find from . import find
known = known_from_file(KNOWN_FILE) known = known_from_file(KNOWN_FILE)
knownvars = (known or {}).get('variables') knownvars = (known or {}).get('variables')