From e4c431ecf50def40eb93c3969c1e4eeaf7bf32f1 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Fri, 18 Oct 2019 19:00:04 -0700 Subject: [PATCH] bpo-36876: Re-organize the c-analyzer tool code. (gh-16841) This is partly a cleanup of the code. It also is preparation for getting the variables from the source (cross-platform) rather than from the symbols. The change only touches the tool (and its tests). --- Lib/test/test_check_c_globals.py | 2 +- .../test_c_analyzer_common/test_known.py | 68 ---- .../test_c_globals/test_find.py | 335 ------------------ .../__init__.py | 0 .../test_files.py | 2 +- .../test_info.py | 5 +- .../test_show.py | 6 +- .../__init__.py | 0 .../test___main__.py | 94 ++--- .../test_functional.py | 0 .../test_supported.py | 16 +- .../__init__.py | 0 .../test_declarations.py | 6 +- .../test_preprocessor.py | 2 +- .../__init__.py | 0 .../test_info.py | 4 +- .../test_variables/__init__.py | 6 + .../test_variables/test_find.py | 124 +++++++ .../test_info.py | 8 +- .../test_variables/test_known.py | 139 ++++++++ Tools/c-analyzer/c-globals.py | 2 +- .../{c_globals => c_analyzer}/__init__.py | 0 .../common}/__init__.py | 0 .../common}/files.py | 26 +- Tools/c-analyzer/c_analyzer/common/info.py | 138 ++++++++ Tools/c-analyzer/c_analyzer/common/show.py | 11 + .../common}/util.py | 0 .../parser}/__init__.py | 0 .../parser}/declarations.py | 70 +++- Tools/c-analyzer/c_analyzer/parser/find.py | 107 ++++++ .../{c_parser => c_analyzer/parser}/naive.py | 41 ++- .../parser}/preprocessor.py | 3 +- .../{c_parser => c_analyzer/parser}/source.py | 0 .../c-analyzer/c_analyzer/symbols/__init__.py | 0 .../binary.py => c_analyzer/symbols/_nm.py} | 104 ++---- Tools/c-analyzer/c_analyzer/symbols/find.py | 175 +++++++++ .../{c_symbols => c_analyzer/symbols}/info.py | 4 +- .../c_analyzer/variables/__init__.py | 0 Tools/c-analyzer/c_analyzer/variables/find.py | 75 ++++ .../variables}/info.py | 49 +-- .../c-analyzer/c_analyzer/variables/known.py | 91 +++++ .../c-analyzer/c_analyzer_common/__init__.py | 19 - Tools/c-analyzer/c_analyzer_common/info.py | 69 ---- Tools/c-analyzer/c_analyzer_common/known.py | 74 ---- Tools/c-analyzer/c_globals/find.py | 95 ----- Tools/c-analyzer/c_globals/show.py | 16 - Tools/c-analyzer/c_symbols/resolve.py | 147 -------- Tools/c-analyzer/c_symbols/source.py | 58 --- .../c-analyzer/{c_globals => cpython}/README | 0 Tools/c-analyzer/cpython/__init__.py | 29 ++ .../{c_globals => cpython}/__main__.py | 111 +++--- .../_generate.py | 13 +- Tools/c-analyzer/cpython/files.py | 29 ++ Tools/c-analyzer/cpython/find.py | 101 ++++++ Tools/c-analyzer/cpython/known.py | 66 ++++ .../{c_globals => cpython}/supported.py | 15 +- 56 files changed, 1376 insertions(+), 1179 deletions(-) delete mode 100644 Lib/test/test_tools/test_c_analyzer/test_c_analyzer_common/test_known.py delete mode 100644 Lib/test/test_tools/test_c_analyzer/test_c_globals/test_find.py rename Lib/test/test_tools/test_c_analyzer/{test_c_analyzer_common => test_common}/__init__.py (100%) rename Lib/test/test_tools/test_c_analyzer/{test_c_analyzer_common => test_common}/test_files.py (99%) rename Lib/test/test_tools/test_c_analyzer/{test_c_analyzer_common => test_common}/test_info.py (98%) rename Lib/test/test_tools/test_c_analyzer/{test_c_globals => test_common}/test_show.py (94%) rename Lib/test/test_tools/test_c_analyzer/{test_c_globals => test_cpython}/__init__.py (100%) rename Lib/test/test_tools/test_c_analyzer/{test_c_globals => test_cpython}/test___main__.py (79%) rename Lib/test/test_tools/test_c_analyzer/{test_c_globals => test_cpython}/test_functional.py (100%) rename Lib/test/test_tools/test_c_analyzer/{test_c_globals => test_cpython}/test_supported.py (85%) rename Lib/test/test_tools/test_c_analyzer/{test_c_parser => test_parser}/__init__.py (100%) rename Lib/test/test_tools/test_c_analyzer/{test_c_parser => test_parser}/test_declarations.py (99%) rename Lib/test/test_tools/test_c_analyzer/{test_c_parser => test_parser}/test_preprocessor.py (99%) rename Lib/test/test_tools/test_c_analyzer/{test_c_symbols => test_symbols}/__init__.py (100%) rename Lib/test/test_tools/test_c_analyzer/{test_c_symbols => test_symbols}/test_info.py (98%) create mode 100644 Lib/test/test_tools/test_c_analyzer/test_variables/__init__.py create mode 100644 Lib/test/test_tools/test_c_analyzer/test_variables/test_find.py rename Lib/test/test_tools/test_c_analyzer/{test_c_parser => test_variables}/test_info.py (98%) create mode 100644 Lib/test/test_tools/test_c_analyzer/test_variables/test_known.py rename Tools/c-analyzer/{c_globals => c_analyzer}/__init__.py (100%) rename Tools/c-analyzer/{c_parser => c_analyzer/common}/__init__.py (100%) rename Tools/c-analyzer/{c_analyzer_common => c_analyzer/common}/files.py (82%) create mode 100644 Tools/c-analyzer/c_analyzer/common/info.py create mode 100644 Tools/c-analyzer/c_analyzer/common/show.py rename Tools/c-analyzer/{c_analyzer_common => c_analyzer/common}/util.py (100%) rename Tools/c-analyzer/{c_symbols => c_analyzer/parser}/__init__.py (100%) rename Tools/c-analyzer/{c_parser => c_analyzer/parser}/declarations.py (83%) create mode 100644 Tools/c-analyzer/c_analyzer/parser/find.py rename Tools/c-analyzer/{c_parser => c_analyzer/parser}/naive.py (80%) rename Tools/c-analyzer/{c_parser => c_analyzer/parser}/preprocessor.py (99%) rename Tools/c-analyzer/{c_parser => c_analyzer/parser}/source.py (100%) create mode 100644 Tools/c-analyzer/c_analyzer/symbols/__init__.py rename Tools/c-analyzer/{c_symbols/binary.py => c_analyzer/symbols/_nm.py} (50%) create mode 100644 Tools/c-analyzer/c_analyzer/symbols/find.py rename Tools/c-analyzer/{c_symbols => c_analyzer/symbols}/info.py (93%) create mode 100644 Tools/c-analyzer/c_analyzer/variables/__init__.py create mode 100644 Tools/c-analyzer/c_analyzer/variables/find.py rename Tools/c-analyzer/{c_parser => c_analyzer/variables}/info.py (61%) create mode 100644 Tools/c-analyzer/c_analyzer/variables/known.py delete mode 100644 Tools/c-analyzer/c_analyzer_common/__init__.py delete mode 100644 Tools/c-analyzer/c_analyzer_common/info.py delete mode 100644 Tools/c-analyzer/c_analyzer_common/known.py delete mode 100644 Tools/c-analyzer/c_globals/find.py delete mode 100644 Tools/c-analyzer/c_globals/show.py delete mode 100644 Tools/c-analyzer/c_symbols/resolve.py delete mode 100644 Tools/c-analyzer/c_symbols/source.py rename Tools/c-analyzer/{c_globals => cpython}/README (100%) create mode 100644 Tools/c-analyzer/cpython/__init__.py rename Tools/c-analyzer/{c_globals => cpython}/__main__.py (68%) rename Tools/c-analyzer/{c_analyzer_common => cpython}/_generate.py (97%) create mode 100644 Tools/c-analyzer/cpython/files.py create mode 100644 Tools/c-analyzer/cpython/find.py create mode 100644 Tools/c-analyzer/cpython/known.py rename Tools/c-analyzer/{c_globals => cpython}/supported.py (97%) diff --git a/Lib/test/test_check_c_globals.py b/Lib/test/test_check_c_globals.py index a3925f0ca88..030debc452e 100644 --- a/Lib/test/test_check_c_globals.py +++ b/Lib/test/test_check_c_globals.py @@ -3,7 +3,7 @@ import test.test_tools test.test_tools.skip_if_missing('c-analyzer') with test.test_tools.imports_under_tool('c-analyzer'): - from c_globals.__main__ import main + from cpython.__main__ import main class ActualChecks(unittest.TestCase): diff --git a/Lib/test/test_tools/test_c_analyzer/test_c_analyzer_common/test_known.py b/Lib/test/test_tools/test_c_analyzer/test_c_analyzer_common/test_known.py deleted file mode 100644 index 215023da577..00000000000 --- a/Lib/test/test_tools/test_c_analyzer/test_c_analyzer_common/test_known.py +++ /dev/null @@ -1,68 +0,0 @@ -import re -import textwrap -import unittest - -from .. import tool_imports_for_tests -with tool_imports_for_tests(): - from c_parser.info import Variable - from c_analyzer_common.info import ID - from c_analyzer_common.known import from_file - - -class FromFileTests(unittest.TestCase): - - maxDiff = None - - _return_read_tsv = () - - @property - def calls(self): - try: - return self._calls - except AttributeError: - self._calls = [] - return self._calls - - def _read_tsv(self, *args): - self.calls.append(('_read_tsv', args)) - return self._return_read_tsv - - def test_typical(self): - lines = textwrap.dedent(''' - filename funcname name kind declaration - file1.c - var1 variable static int - file1.c func1 local1 variable static int - file1.c - var2 variable int - file1.c func2 local2 variable char * - file2.c - var1 variable char * - ''').strip().splitlines() - lines = [re.sub(r'\s+', '\t', line, 4) for line in lines] - self._return_read_tsv = [tuple(v.strip() for v in line.split('\t')) - for line in lines[1:]] - - known = from_file('spam.c', _read_tsv=self._read_tsv) - - self.assertEqual(known, { - 'variables': {v.id: v for v in [ - Variable.from_parts('file1.c', '', 'var1', 'static int'), - Variable.from_parts('file1.c', 'func1', 'local1', 'static int'), - Variable.from_parts('file1.c', '', 'var2', 'int'), - Variable.from_parts('file1.c', 'func2', 'local2', 'char *'), - Variable.from_parts('file2.c', '', 'var1', 'char *'), - ]}, - }) - self.assertEqual(self.calls, [ - ('_read_tsv', ('spam.c', 'filename\tfuncname\tname\tkind\tdeclaration')), - ]) - - def test_empty(self): - self._return_read_tsv = [] - - known = from_file('spam.c', _read_tsv=self._read_tsv) - - self.assertEqual(known, { - 'variables': {}, - }) - self.assertEqual(self.calls, [ - ('_read_tsv', ('spam.c', 'filename\tfuncname\tname\tkind\tdeclaration')), - ]) diff --git a/Lib/test/test_tools/test_c_analyzer/test_c_globals/test_find.py b/Lib/test/test_tools/test_c_analyzer/test_c_globals/test_find.py deleted file mode 100644 index 828899201b7..00000000000 --- a/Lib/test/test_tools/test_c_analyzer/test_c_globals/test_find.py +++ /dev/null @@ -1,335 +0,0 @@ -import unittest - -from .. import tool_imports_for_tests -with tool_imports_for_tests(): - from c_parser import info - from c_globals.find import globals_from_binary, globals - - -class _Base(unittest.TestCase): - - maxDiff = None - - @property - def calls(self): - try: - return self._calls - except AttributeError: - self._calls = [] - return self._calls - - -class StaticsFromBinaryTests(_Base): - - _return_iter_symbols = () - _return_resolve_symbols = () - _return_get_symbol_resolver = None - - def setUp(self): - super().setUp() - - self.kwargs = dict( - _iter_symbols=self._iter_symbols, - _resolve=self._resolve_symbols, - _get_symbol_resolver=self._get_symbol_resolver, - ) - - def _iter_symbols(self, binfile, find_local_symbol): - self.calls.append(('_iter_symbols', (binfile, find_local_symbol))) - return self._return_iter_symbols - - def _resolve_symbols(self, symbols, resolve): - self.calls.append(('_resolve_symbols', (symbols, resolve,))) - return self._return_resolve_symbols - - def _get_symbol_resolver(self, knownvars, dirnames=None): - self.calls.append(('_get_symbol_resolver', (knownvars, dirnames))) - return self._return_get_symbol_resolver - - def test_typical(self): - symbols = self._return_iter_symbols = () - resolver = self._return_get_symbol_resolver = object() - variables = self._return_resolve_symbols = [ - info.Variable.from_parts('dir1/spam.c', None, 'var1', 'int'), - info.Variable.from_parts('dir1/spam.c', None, 'var2', 'static int'), - info.Variable.from_parts('dir1/spam.c', None, 'var3', 'char *'), - info.Variable.from_parts('dir1/spam.c', 'func2', 'var4', 'const char *'), - info.Variable.from_parts('dir1/eggs.c', None, 'var1', 'static int'), - info.Variable.from_parts('dir1/eggs.c', 'func1', 'var2', 'static char *'), - ] - knownvars = object() - - found = list(globals_from_binary('python', - knownvars=knownvars, - **self.kwargs)) - - self.assertEqual(found, [ - info.Variable.from_parts('dir1/spam.c', None, 'var1', 'int'), - info.Variable.from_parts('dir1/spam.c', None, 'var2', 'static int'), - info.Variable.from_parts('dir1/spam.c', None, 'var3', 'char *'), - info.Variable.from_parts('dir1/eggs.c', None, 'var1', 'static int'), - info.Variable.from_parts('dir1/eggs.c', 'func1', 'var2', 'static char *'), - ]) - self.assertEqual(self.calls, [ - ('_iter_symbols', ('python', None)), - ('_get_symbol_resolver', (knownvars, None)), - ('_resolve_symbols', (symbols, resolver)), - ]) - -# self._return_iter_symbols = [ -# s_info.Symbol(('dir1/spam.c', None, 'var1'), 'variable', False), -# s_info.Symbol(('dir1/spam.c', None, 'var2'), 'variable', False), -# s_info.Symbol(('dir1/spam.c', None, 'func1'), 'function', False), -# s_info.Symbol(('dir1/spam.c', None, 'func2'), 'function', True), -# s_info.Symbol(('dir1/spam.c', None, 'var3'), 'variable', False), -# s_info.Symbol(('dir1/spam.c', 'func2', 'var4'), 'variable', False), -# s_info.Symbol(('dir1/ham.c', None, 'var1'), 'variable', True), -# s_info.Symbol(('dir1/eggs.c', None, 'var1'), 'variable', False), -# s_info.Symbol(('dir1/eggs.c', None, 'xyz'), 'other', False), -# s_info.Symbol(('dir1/eggs.c', '???', 'var2'), 'variable', False), -# s_info.Symbol(('???', None, 'var_x'), 'variable', False), -# s_info.Symbol(('???', '???', 'var_y'), 'variable', False), -# s_info.Symbol((None, None, '???'), 'other', False), -# ] -# known = object() -# -# globals_from_binary('python', knownvars=known, **this.kwargs) -# found = list(globals_from_symbols(['dir1'], self.iter_symbols)) -# -# self.assertEqual(found, [ -# info.Variable.from_parts('dir1/spam.c', None, 'var1', '???'), -# info.Variable.from_parts('dir1/spam.c', None, 'var2', '???'), -# info.Variable.from_parts('dir1/spam.c', None, 'var3', '???'), -# info.Variable.from_parts('dir1/spam.c', 'func2', 'var4', '???'), -# info.Variable.from_parts('dir1/eggs.c', None, 'var1', '???'), -# ]) -# self.assertEqual(self.calls, [ -# ('iter_symbols', (['dir1'],)), -# ]) -# -# def test_no_symbols(self): -# self._return_iter_symbols = [] -# -# found = list(globals_from_symbols(['dir1'], self.iter_symbols)) -# -# self.assertEqual(found, []) -# self.assertEqual(self.calls, [ -# ('iter_symbols', (['dir1'],)), -# ]) - - # XXX need functional test - - -#class StaticFromDeclarationsTests(_Base): -# -# _return_iter_declarations = () -# -# def iter_declarations(self, dirnames): -# self.calls.append(('iter_declarations', (dirnames,))) -# return iter(self._return_iter_declarations) -# -# def test_typical(self): -# self._return_iter_declarations = [ -# None, -# info.Variable.from_parts('dir1/spam.c', None, 'var1', '???'), -# object(), -# info.Variable.from_parts('dir1/spam.c', None, 'var2', '???'), -# info.Variable.from_parts('dir1/spam.c', None, 'var3', '???'), -# object(), -# info.Variable.from_parts('dir1/spam.c', 'func2', 'var4', '???'), -# object(), -# info.Variable.from_parts('dir1/eggs.c', None, 'var1', '???'), -# object(), -# ] -# -# found = list(globals_from_declarations(['dir1'], self.iter_declarations)) -# -# self.assertEqual(found, [ -# info.Variable.from_parts('dir1/spam.c', None, 'var1', '???'), -# info.Variable.from_parts('dir1/spam.c', None, 'var2', '???'), -# info.Variable.from_parts('dir1/spam.c', None, 'var3', '???'), -# info.Variable.from_parts('dir1/spam.c', 'func2', 'var4', '???'), -# info.Variable.from_parts('dir1/eggs.c', None, 'var1', '???'), -# ]) -# self.assertEqual(self.calls, [ -# ('iter_declarations', (['dir1'],)), -# ]) -# -# def test_no_declarations(self): -# self._return_iter_declarations = [] -# -# found = list(globals_from_declarations(['dir1'], self.iter_declarations)) -# -# self.assertEqual(found, []) -# self.assertEqual(self.calls, [ -# ('iter_declarations', (['dir1'],)), -# ]) - - -#class IterVariablesTests(_Base): -# -# _return_from_symbols = () -# _return_from_declarations = () -# -# def _from_symbols(self, dirnames, iter_symbols): -# self.calls.append(('_from_symbols', (dirnames, iter_symbols))) -# return iter(self._return_from_symbols) -# -# def _from_declarations(self, dirnames, iter_declarations): -# self.calls.append(('_from_declarations', (dirnames, iter_declarations))) -# return iter(self._return_from_declarations) -# -# def test_typical(self): -# expected = [ -# info.Variable.from_parts('dir1/spam.c', None, 'var1', '???'), -# info.Variable.from_parts('dir1/spam.c', None, 'var2', '???'), -# info.Variable.from_parts('dir1/spam.c', None, 'var3', '???'), -# info.Variable.from_parts('dir1/spam.c', 'func2', 'var4', '???'), -# info.Variable.from_parts('dir1/eggs.c', None, 'var1', '???'), -# ] -# self._return_from_symbols = expected -# -# found = list(iter_variables(['dir1'], -# _from_symbols=self._from_symbols, -# _from_declarations=self._from_declarations)) -# -# self.assertEqual(found, expected) -# self.assertEqual(self.calls, [ -# ('_from_symbols', (['dir1'], b_symbols.iter_symbols)), -# ]) -# -# def test_no_symbols(self): -# self._return_from_symbols = [] -# -# found = list(iter_variables(['dir1'], -# _from_symbols=self._from_symbols, -# _from_declarations=self._from_declarations)) -# -# self.assertEqual(found, []) -# self.assertEqual(self.calls, [ -# ('_from_symbols', (['dir1'], b_symbols.iter_symbols)), -# ]) -# -# def test_from_binary(self): -# expected = [ -# info.Variable.from_parts('dir1/spam.c', None, 'var1', '???'), -# info.Variable.from_parts('dir1/spam.c', None, 'var2', '???'), -# info.Variable.from_parts('dir1/spam.c', None, 'var3', '???'), -# info.Variable.from_parts('dir1/spam.c', 'func2', 'var4', '???'), -# info.Variable.from_parts('dir1/eggs.c', None, 'var1', '???'), -# ] -# self._return_from_symbols = expected -# -# found = list(iter_variables(['dir1'], 'platform', -# _from_symbols=self._from_symbols, -# _from_declarations=self._from_declarations)) -# -# self.assertEqual(found, expected) -# self.assertEqual(self.calls, [ -# ('_from_symbols', (['dir1'], b_symbols.iter_symbols)), -# ]) -# -# def test_from_symbols(self): -# expected = [ -# info.Variable.from_parts('dir1/spam.c', None, 'var1', '???'), -# info.Variable.from_parts('dir1/spam.c', None, 'var2', '???'), -# info.Variable.from_parts('dir1/spam.c', None, 'var3', '???'), -# info.Variable.from_parts('dir1/spam.c', 'func2', 'var4', '???'), -# info.Variable.from_parts('dir1/eggs.c', None, 'var1', '???'), -# ] -# self._return_from_symbols = expected -# -# found = list(iter_variables(['dir1'], 'symbols', -# _from_symbols=self._from_symbols, -# _from_declarations=self._from_declarations)) -# -# self.assertEqual(found, expected) -# self.assertEqual(self.calls, [ -# ('_from_symbols', (['dir1'], s_symbols.iter_symbols)), -# ]) -# -# def test_from_declarations(self): -# expected = [ -# info.Variable.from_parts('dir1/spam.c', None, 'var1', '???'), -# info.Variable.from_parts('dir1/spam.c', None, 'var2', '???'), -# info.Variable.from_parts('dir1/spam.c', None, 'var3', '???'), -# info.Variable.from_parts('dir1/spam.c', 'func2', 'var4', '???'), -# info.Variable.from_parts('dir1/eggs.c', None, 'var1', '???'), -# ] -# self._return_from_declarations = expected -# -# found = list(iter_variables(['dir1'], 'declarations', -# _from_symbols=self._from_symbols, -# _from_declarations=self._from_declarations)) -# -# self.assertEqual(found, expected) -# self.assertEqual(self.calls, [ -# ('_from_declarations', (['dir1'], declarations.iter_all)), -# ]) -# -# def test_from_preprocessed(self): -# expected = [ -# info.Variable.from_parts('dir1/spam.c', None, 'var1', '???'), -# info.Variable.from_parts('dir1/spam.c', None, 'var2', '???'), -# info.Variable.from_parts('dir1/spam.c', None, 'var3', '???'), -# info.Variable.from_parts('dir1/spam.c', 'func2', 'var4', '???'), -# info.Variable.from_parts('dir1/eggs.c', None, 'var1', '???'), -# ] -# self._return_from_declarations = expected -# -# found = list(iter_variables(['dir1'], 'preprocessed', -# _from_symbols=self._from_symbols, -# _from_declarations=self._from_declarations)) -# -# self.assertEqual(found, expected) -# self.assertEqual(self.calls, [ -# ('_from_declarations', (['dir1'], declarations.iter_preprocessed)), -# ]) - - -class StaticsTest(_Base): - - _return_iter_variables = None - - def _iter_variables(self, kind, *, known, dirnames): - self.calls.append( - ('_iter_variables', (kind, known, dirnames))) - return iter(self._return_iter_variables or ()) - - def test_typical(self): - self._return_iter_variables = [ - info.Variable.from_parts('src1/spam.c', None, 'var1', 'static const char *'), - info.Variable.from_parts('src1/spam.c', None, 'var1b', 'const char *'), - info.Variable.from_parts('src1/spam.c', 'ham', 'initialized', 'static int'), - info.Variable.from_parts('src1/spam.c', 'ham', 'result', 'int'), # skipped - info.Variable.from_parts('src1/spam.c', None, 'var2', 'static PyObject *'), - info.Variable.from_parts('src1/eggs.c', 'tofu', 'ready', 'static int'), - info.Variable.from_parts('src1/spam.c', None, 'freelist', 'static (PyTupleObject *)[10]'), - info.Variable.from_parts('src1/sub/ham.c', None, 'var1', 'static const char const *'), - info.Variable.from_parts('src2/jam.c', None, 'var1', 'static int'), - info.Variable.from_parts('src2/jam.c', None, 'var2', 'static MyObject *'), - info.Variable.from_parts('Include/spam.h', None, 'data', 'static const int'), - ] - dirnames = object() - known = object() - - found = list(globals(dirnames, known, - kind='platform', - _iter_variables=self._iter_variables, - )) - - self.assertEqual(found, [ - info.Variable.from_parts('src1/spam.c', None, 'var1', 'static const char *'), - info.Variable.from_parts('src1/spam.c', None, 'var1b', 'const char *'), - info.Variable.from_parts('src1/spam.c', 'ham', 'initialized', 'static int'), - info.Variable.from_parts('src1/spam.c', None, 'var2', 'static PyObject *'), - info.Variable.from_parts('src1/eggs.c', 'tofu', 'ready', 'static int'), - info.Variable.from_parts('src1/spam.c', None, 'freelist', 'static (PyTupleObject *)[10]'), - info.Variable.from_parts('src1/sub/ham.c', None, 'var1', 'static const char const *'), - info.Variable.from_parts('src2/jam.c', None, 'var1', 'static int'), - info.Variable.from_parts('src2/jam.c', None, 'var2', 'static MyObject *'), - info.Variable.from_parts('Include/spam.h', None, 'data', 'static const int'), - ]) - self.assertEqual(self.calls, [ - ('_iter_variables', ('platform', known, dirnames)), - ]) diff --git a/Lib/test/test_tools/test_c_analyzer/test_c_analyzer_common/__init__.py b/Lib/test/test_tools/test_c_analyzer/test_common/__init__.py similarity index 100% rename from Lib/test/test_tools/test_c_analyzer/test_c_analyzer_common/__init__.py rename to Lib/test/test_tools/test_c_analyzer/test_common/__init__.py diff --git a/Lib/test/test_tools/test_c_analyzer/test_c_analyzer_common/test_files.py b/Lib/test/test_tools/test_c_analyzer/test_common/test_files.py similarity index 99% rename from Lib/test/test_tools/test_c_analyzer/test_c_analyzer_common/test_files.py rename to Lib/test/test_tools/test_c_analyzer/test_common/test_files.py index 6d14aea78a4..0c97d2a0bbf 100644 --- a/Lib/test/test_tools/test_c_analyzer/test_c_analyzer_common/test_files.py +++ b/Lib/test/test_tools/test_c_analyzer/test_common/test_files.py @@ -3,7 +3,7 @@ import unittest from .. import tool_imports_for_tests with tool_imports_for_tests(): - from c_analyzer_common.files import ( + from c_analyzer.common.files import ( iter_files, _walk_tree, glob_tree, ) diff --git a/Lib/test/test_tools/test_c_analyzer/test_c_analyzer_common/test_info.py b/Lib/test/test_tools/test_c_analyzer/test_common/test_info.py similarity index 98% rename from Lib/test/test_tools/test_c_analyzer/test_c_analyzer_common/test_info.py rename to Lib/test/test_tools/test_c_analyzer/test_common/test_info.py index 2d386713b99..69dbb582c6b 100644 --- a/Lib/test/test_tools/test_c_analyzer/test_c_analyzer_common/test_info.py +++ b/Lib/test/test_tools/test_c_analyzer/test_common/test_info.py @@ -4,7 +4,10 @@ import unittest from ..util import PseudoStr, StrProxy, Object from .. import tool_imports_for_tests with tool_imports_for_tests(): - from c_analyzer_common.info import ID + from c_analyzer.common.info import ( + UNKNOWN, + ID, + ) class IDTests(unittest.TestCase): diff --git a/Lib/test/test_tools/test_c_analyzer/test_c_globals/test_show.py b/Lib/test/test_tools/test_c_analyzer/test_common/test_show.py similarity index 94% rename from Lib/test/test_tools/test_c_analyzer/test_c_globals/test_show.py rename to Lib/test/test_tools/test_c_analyzer/test_common/test_show.py index ce1dad85db1..91ca2f3b344 100644 --- a/Lib/test/test_tools/test_c_analyzer/test_c_globals/test_show.py +++ b/Lib/test/test_tools/test_c_analyzer/test_common/test_show.py @@ -2,8 +2,10 @@ import unittest from .. import tool_imports_for_tests with tool_imports_for_tests(): - from c_parser import info - from c_globals.show import basic + from c_analyzer.variables import info + from c_analyzer.common.show import ( + basic, + ) TYPICAL = [ diff --git a/Lib/test/test_tools/test_c_analyzer/test_c_globals/__init__.py b/Lib/test/test_tools/test_c_analyzer/test_cpython/__init__.py similarity index 100% rename from Lib/test/test_tools/test_c_analyzer/test_c_globals/__init__.py rename to Lib/test/test_tools/test_c_analyzer/test_cpython/__init__.py diff --git a/Lib/test/test_tools/test_c_analyzer/test_c_globals/test___main__.py b/Lib/test/test_tools/test_c_analyzer/test_cpython/test___main__.py similarity index 79% rename from Lib/test/test_tools/test_c_analyzer/test_c_globals/test___main__.py rename to Lib/test/test_tools/test_c_analyzer/test_cpython/test___main__.py index 5f52c588d7c..6d69ed7525b 100644 --- a/Lib/test/test_tools/test_c_analyzer/test_c_globals/test___main__.py +++ b/Lib/test/test_tools/test_c_analyzer/test_cpython/test___main__.py @@ -3,12 +3,13 @@ import unittest from .. import tool_imports_for_tests with tool_imports_for_tests(): - from c_analyzer_common import SOURCE_DIRS - from c_analyzer_common.known import DATA_FILE as KNOWN_FILE - from c_parser import info - import c_globals as cg - from c_globals.supported import IGNORED_FILE - from c_globals.__main__ import cmd_check, cmd_show, parse_args, main + from c_analyzer.variables import info + from cpython import SOURCE_DIRS + from cpython.supported import IGNORED_FILE + from cpython.known import DATA_FILE as KNOWN_FILE + from cpython.__main__ import ( + cmd_check, cmd_show, parse_args, main, + ) TYPICAL = [ @@ -46,6 +47,8 @@ class CMDBase(unittest.TestCase): maxDiff = None +# _return_known_from_file = None +# _return_ignored_from_file = None _return_find = () @property @@ -56,8 +59,16 @@ class CMDBase(unittest.TestCase): self._calls = [] return self._calls - def _find(self, *args): - self.calls.append(('_find', args)) +# def _known_from_file(self, *args): +# self.calls.append(('_known_from_file', args)) +# return self._return_known_from_file or {} +# +# def _ignored_from_file(self, *args): +# self.calls.append(('_ignored_from_file', args)) +# return self._return_ignored_from_file or {} + + def _find(self, known, ignored, skip_objects=False): + self.calls.append(('_find', (known, ignored, skip_objects))) return self._return_find def _show(self, *args): @@ -78,41 +89,35 @@ class CheckTests(CMDBase): _print=self._print, ) - self.assertEqual(self.calls[0], ( - '_find', ( - SOURCE_DIRS, - KNOWN_FILE, - IGNORED_FILE, - ), - )) + self.assertEqual( + self.calls[0], + ('_find', (KNOWN_FILE, IGNORED_FILE, False)), + ) def test_all_supported(self): self._return_find = [(v, s) for v, s in TYPICAL if s] dirs = ['src1', 'src2', 'Include'] cmd_check('check', - dirs, - ignored='ignored.tsv', - known='known.tsv', - _find=self._find, - _show=self._show, - _print=self._print, - ) + known='known.tsv', + ignored='ignored.tsv', + _find=self._find, + _show=self._show, + _print=self._print, + ) self.assertEqual(self.calls, [ - ('_find', (dirs, 'known.tsv', 'ignored.tsv')), + ('_find', ('known.tsv', 'ignored.tsv', False)), #('_print', ('okay',)), ]) def test_some_unsupported(self): self._return_find = TYPICAL - dirs = ['src1', 'src2', 'Include'] with self.assertRaises(SystemExit) as cm: cmd_check('check', - dirs, - ignored='ignored.tsv', known='known.tsv', + ignored='ignored.tsv', _find=self._find, _show=self._show, _print=self._print, @@ -120,7 +125,7 @@ class CheckTests(CMDBase): unsupported = [v for v, s in TYPICAL if not s] self.assertEqual(self.calls, [ - ('_find', (dirs, 'known.tsv', 'ignored.tsv')), + ('_find', ('known.tsv', 'ignored.tsv', False)), ('_print', ('ERROR: found unsupported global variables',)), ('_print', ()), ('_show', (sorted(unsupported),)), @@ -140,20 +145,15 @@ class ShowTests(CMDBase): _print=self._print, ) - self.assertEqual(self.calls[0], ( - '_find', ( - SOURCE_DIRS, - KNOWN_FILE, - IGNORED_FILE, - ), - )) + self.assertEqual( + self.calls[0], + ('_find', (KNOWN_FILE, IGNORED_FILE, False)), + ) def test_typical(self): self._return_find = TYPICAL - dirs = ['src1', 'src2', 'Include'] cmd_show('show', - dirs, known='known.tsv', ignored='ignored.tsv', _find=self._find, @@ -164,7 +164,7 @@ class ShowTests(CMDBase): supported = [v for v, s in TYPICAL if s] unsupported = [v for v, s in TYPICAL if not s] self.assertEqual(self.calls, [ - ('_find', (dirs, 'known.tsv', 'ignored.tsv')), + ('_find', ('known.tsv', 'ignored.tsv', False)), ('_print', ('supported:',)), ('_print', ('----------',)), ('_show', (sorted(supported),)), @@ -201,7 +201,7 @@ class ParseArgsTests(unittest.TestCase): self.assertEqual(cmdkwargs, { 'ignored': IGNORED_FILE, 'known': KNOWN_FILE, - 'dirs': SOURCE_DIRS, + #'dirs': SOURCE_DIRS, }) def test_check_full_args(self): @@ -209,16 +209,16 @@ class ParseArgsTests(unittest.TestCase): 'check', '--ignored', 'spam.tsv', '--known', 'eggs.tsv', - 'dir1', - 'dir2', - 'dir3', + #'dir1', + #'dir2', + #'dir3', ]) self.assertEqual(cmd, 'check') self.assertEqual(cmdkwargs, { 'ignored': 'spam.tsv', 'known': 'eggs.tsv', - 'dirs': ['dir1', 'dir2', 'dir3'] + #'dirs': ['dir1', 'dir2', 'dir3'] }) def test_show_no_args(self): @@ -230,7 +230,7 @@ class ParseArgsTests(unittest.TestCase): self.assertEqual(cmdkwargs, { 'ignored': IGNORED_FILE, 'known': KNOWN_FILE, - 'dirs': SOURCE_DIRS, + #'dirs': SOURCE_DIRS, 'skip_objects': False, }) @@ -239,16 +239,16 @@ class ParseArgsTests(unittest.TestCase): 'show', '--ignored', 'spam.tsv', '--known', 'eggs.tsv', - 'dir1', - 'dir2', - 'dir3', + #'dir1', + #'dir2', + #'dir3', ]) self.assertEqual(cmd, 'show') self.assertEqual(cmdkwargs, { 'ignored': 'spam.tsv', 'known': 'eggs.tsv', - 'dirs': ['dir1', 'dir2', 'dir3'], + #'dirs': ['dir1', 'dir2', 'dir3'], 'skip_objects': False, }) diff --git a/Lib/test/test_tools/test_c_analyzer/test_c_globals/test_functional.py b/Lib/test/test_tools/test_c_analyzer/test_cpython/test_functional.py similarity index 100% rename from Lib/test/test_tools/test_c_analyzer/test_c_globals/test_functional.py rename to Lib/test/test_tools/test_c_analyzer/test_cpython/test_functional.py diff --git a/Lib/test/test_tools/test_c_analyzer/test_c_globals/test_supported.py b/Lib/test/test_tools/test_c_analyzer/test_cpython/test_supported.py similarity index 85% rename from Lib/test/test_tools/test_c_analyzer/test_c_globals/test_supported.py rename to Lib/test/test_tools/test_c_analyzer/test_cpython/test_supported.py index 1e7d40e2afc..a244b97e1fc 100644 --- a/Lib/test/test_tools/test_c_analyzer/test_c_globals/test_supported.py +++ b/Lib/test/test_tools/test_c_analyzer/test_cpython/test_supported.py @@ -4,9 +4,11 @@ import unittest from .. import tool_imports_for_tests with tool_imports_for_tests(): - from c_analyzer_common.info import ID - from c_parser import info - from c_globals.supported import is_supported, ignored_from_file + from c_analyzer.common.info import ID + from c_analyzer.variables.info import Variable + from cpython.supported import ( + is_supported, ignored_from_file, + ) class IsSupportedTests(unittest.TestCase): @@ -14,8 +16,8 @@ class IsSupportedTests(unittest.TestCase): @unittest.expectedFailure def test_supported(self): statics = [ - info.StaticVar('src1/spam.c', None, 'var1', 'const char *'), - info.StaticVar('src1/spam.c', None, 'var1', 'int'), + Variable('src1/spam.c', None, 'var1', 'const char *'), + Variable('src1/spam.c', None, 'var1', 'int'), ] for static in statics: with self.subTest(static): @@ -26,8 +28,8 @@ class IsSupportedTests(unittest.TestCase): @unittest.expectedFailure def test_not_supported(self): statics = [ - info.StaticVar('src1/spam.c', None, 'var1', 'PyObject *'), - info.StaticVar('src1/spam.c', None, 'var1', 'PyObject[10]'), + Variable('src1/spam.c', None, 'var1', 'PyObject *'), + Variable('src1/spam.c', None, 'var1', 'PyObject[10]'), ] for static in statics: with self.subTest(static): diff --git a/Lib/test/test_tools/test_c_analyzer/test_c_parser/__init__.py b/Lib/test/test_tools/test_c_analyzer/test_parser/__init__.py similarity index 100% rename from Lib/test/test_tools/test_c_analyzer/test_c_parser/__init__.py rename to Lib/test/test_tools/test_c_analyzer/test_parser/__init__.py diff --git a/Lib/test/test_tools/test_c_analyzer/test_c_parser/test_declarations.py b/Lib/test/test_tools/test_c_analyzer/test_parser/test_declarations.py similarity index 99% rename from Lib/test/test_tools/test_c_analyzer/test_c_parser/test_declarations.py rename to Lib/test/test_tools/test_c_analyzer/test_parser/test_declarations.py index b68744ef0ab..674fcb1af1c 100644 --- a/Lib/test/test_tools/test_c_analyzer/test_c_parser/test_declarations.py +++ b/Lib/test/test_tools/test_c_analyzer/test_parser/test_declarations.py @@ -3,9 +3,9 @@ import unittest from .. import tool_imports_for_tests with tool_imports_for_tests(): - from c_parser.declarations import ( + from c_analyzer.parser.declarations import ( iter_global_declarations, iter_local_statements, - parse_func, parse_var, parse_compound, + parse_func, _parse_var, parse_compound, iter_variables, ) @@ -515,7 +515,7 @@ class ParseVarTests(TestCaseBase): ]) for stmt, expected in tests: with self.subTest(stmt): - name, vartype = parse_var(stmt) + name, vartype = _parse_var(stmt) self.assertEqual((name, vartype), expected) diff --git a/Lib/test/test_tools/test_c_analyzer/test_c_parser/test_preprocessor.py b/Lib/test/test_tools/test_c_analyzer/test_parser/test_preprocessor.py similarity index 99% rename from Lib/test/test_tools/test_c_analyzer/test_c_parser/test_preprocessor.py rename to Lib/test/test_tools/test_c_analyzer/test_parser/test_preprocessor.py index 89e15570d65..56a1c9c612f 100644 --- a/Lib/test/test_tools/test_c_analyzer/test_c_parser/test_preprocessor.py +++ b/Lib/test/test_tools/test_c_analyzer/test_parser/test_preprocessor.py @@ -6,7 +6,7 @@ import sys from ..util import wrapped_arg_combos, StrProxy from .. import tool_imports_for_tests with tool_imports_for_tests(): - from c_parser.preprocessor import ( + from c_analyzer.parser.preprocessor import ( iter_lines, # directives parse_directive, PreprocessorDirective, diff --git a/Lib/test/test_tools/test_c_analyzer/test_c_symbols/__init__.py b/Lib/test/test_tools/test_c_analyzer/test_symbols/__init__.py similarity index 100% rename from Lib/test/test_tools/test_c_analyzer/test_c_symbols/__init__.py rename to Lib/test/test_tools/test_c_analyzer/test_symbols/__init__.py diff --git a/Lib/test/test_tools/test_c_analyzer/test_c_symbols/test_info.py b/Lib/test/test_tools/test_c_analyzer/test_symbols/test_info.py similarity index 98% rename from Lib/test/test_tools/test_c_analyzer/test_c_symbols/test_info.py rename to Lib/test/test_tools/test_c_analyzer/test_symbols/test_info.py index e029dcf6612..1282a89718c 100644 --- a/Lib/test/test_tools/test_c_analyzer/test_c_symbols/test_info.py +++ b/Lib/test/test_tools/test_c_analyzer/test_symbols/test_info.py @@ -4,8 +4,8 @@ import unittest from ..util import PseudoStr, StrProxy, Object from .. import tool_imports_for_tests with tool_imports_for_tests(): - from c_analyzer_common.info import ID - from c_symbols.info import Symbol + from c_analyzer.common.info import ID + from c_analyzer.symbols.info import Symbol class SymbolTests(unittest.TestCase): diff --git a/Lib/test/test_tools/test_c_analyzer/test_variables/__init__.py b/Lib/test/test_tools/test_c_analyzer/test_variables/__init__.py new file mode 100644 index 00000000000..bc502ef32d2 --- /dev/null +++ b/Lib/test/test_tools/test_c_analyzer/test_variables/__init__.py @@ -0,0 +1,6 @@ +import os.path +from test.support import load_package_tests + + +def load_tests(*args): + return load_package_tests(os.path.dirname(__file__), *args) diff --git a/Lib/test/test_tools/test_c_analyzer/test_variables/test_find.py b/Lib/test/test_tools/test_c_analyzer/test_variables/test_find.py new file mode 100644 index 00000000000..7a13cf3f5bf --- /dev/null +++ b/Lib/test/test_tools/test_c_analyzer/test_variables/test_find.py @@ -0,0 +1,124 @@ +import unittest + +from .. import tool_imports_for_tests +with tool_imports_for_tests(): + from c_analyzer.variables import info + from c_analyzer.variables.find import ( + vars_from_binary, + ) + + +class _Base(unittest.TestCase): + + maxDiff = None + + @property + def calls(self): + try: + return self._calls + except AttributeError: + self._calls = [] + return self._calls + + +class VarsFromBinaryTests(_Base): + + _return_iter_vars = () + _return_get_symbol_resolver = None + + def setUp(self): + super().setUp() + + self.kwargs = dict( + _iter_vars=self._iter_vars, + _get_symbol_resolver=self._get_symbol_resolver, + ) + + def _iter_vars(self, binfile, resolve, handle_id): + self.calls.append(('_iter_vars', (binfile, resolve, handle_id))) + return [(v, v.id) for v in self._return_iter_vars] + + def _get_symbol_resolver(self, known=None, dirnames=(), *, + handle_var, + filenames=None, + check_filename=None, + perfilecache=None, + ): + self.calls.append(('_get_symbol_resolver', + (known, dirnames, handle_var, filenames, + check_filename, perfilecache))) + return self._return_get_symbol_resolver + + def test_typical(self): + resolver = self._return_get_symbol_resolver = object() + variables = self._return_iter_vars = [ + info.Variable.from_parts('dir1/spam.c', None, 'var1', 'int'), + info.Variable.from_parts('dir1/spam.c', None, 'var2', 'static int'), + info.Variable.from_parts('dir1/spam.c', None, 'var3', 'char *'), + info.Variable.from_parts('dir1/spam.c', 'func2', 'var4', 'const char *'), + info.Variable.from_parts('dir1/eggs.c', None, 'var1', 'static int'), + info.Variable.from_parts('dir1/eggs.c', 'func1', 'var2', 'static char *'), + ] + known = object() + filenames = object() + + found = list(vars_from_binary('python', + known=known, + filenames=filenames, + **self.kwargs)) + + self.assertEqual(found, [ + info.Variable.from_parts('dir1/spam.c', None, 'var1', 'int'), + info.Variable.from_parts('dir1/spam.c', None, 'var2', 'static int'), + info.Variable.from_parts('dir1/spam.c', None, 'var3', 'char *'), + info.Variable.from_parts('dir1/spam.c', 'func2', 'var4', 'const char *'), + info.Variable.from_parts('dir1/eggs.c', None, 'var1', 'static int'), + info.Variable.from_parts('dir1/eggs.c', 'func1', 'var2', 'static char *'), + ]) + self.assertEqual(self.calls, [ + ('_get_symbol_resolver', (filenames, known, info.Variable.from_id, None, None, {})), + ('_iter_vars', ('python', resolver, None)), + ]) + +# self._return_iter_symbols = [ +# s_info.Symbol(('dir1/spam.c', None, 'var1'), 'variable', False), +# s_info.Symbol(('dir1/spam.c', None, 'var2'), 'variable', False), +# s_info.Symbol(('dir1/spam.c', None, 'func1'), 'function', False), +# s_info.Symbol(('dir1/spam.c', None, 'func2'), 'function', True), +# s_info.Symbol(('dir1/spam.c', None, 'var3'), 'variable', False), +# s_info.Symbol(('dir1/spam.c', 'func2', 'var4'), 'variable', False), +# s_info.Symbol(('dir1/ham.c', None, 'var1'), 'variable', True), +# s_info.Symbol(('dir1/eggs.c', None, 'var1'), 'variable', False), +# s_info.Symbol(('dir1/eggs.c', None, 'xyz'), 'other', False), +# s_info.Symbol(('dir1/eggs.c', '???', 'var2'), 'variable', False), +# s_info.Symbol(('???', None, 'var_x'), 'variable', False), +# s_info.Symbol(('???', '???', 'var_y'), 'variable', False), +# s_info.Symbol((None, None, '???'), 'other', False), +# ] +# known = object() +# +# vars_from_binary('python', knownvars=known, **this.kwargs) +# found = list(globals_from_symbols(['dir1'], self.iter_symbols)) +# +# self.assertEqual(found, [ +# info.Variable.from_parts('dir1/spam.c', None, 'var1', '???'), +# info.Variable.from_parts('dir1/spam.c', None, 'var2', '???'), +# info.Variable.from_parts('dir1/spam.c', None, 'var3', '???'), +# info.Variable.from_parts('dir1/spam.c', 'func2', 'var4', '???'), +# info.Variable.from_parts('dir1/eggs.c', None, 'var1', '???'), +# ]) +# self.assertEqual(self.calls, [ +# ('iter_symbols', (['dir1'],)), +# ]) +# +# def test_no_symbols(self): +# self._return_iter_symbols = [] +# +# found = list(globals_from_symbols(['dir1'], self.iter_symbols)) +# +# self.assertEqual(found, []) +# self.assertEqual(self.calls, [ +# ('iter_symbols', (['dir1'],)), +# ]) + + # XXX need functional test diff --git a/Lib/test/test_tools/test_c_analyzer/test_c_parser/test_info.py b/Lib/test/test_tools/test_c_analyzer/test_variables/test_info.py similarity index 98% rename from Lib/test/test_tools/test_c_analyzer/test_c_parser/test_info.py rename to Lib/test/test_tools/test_c_analyzer/test_variables/test_info.py index d1a966c5890..d424d8eebb8 100644 --- a/Lib/test/test_tools/test_c_analyzer/test_c_parser/test_info.py +++ b/Lib/test/test_tools/test_c_analyzer/test_variables/test_info.py @@ -4,10 +4,10 @@ import unittest from ..util import PseudoStr, StrProxy, Object from .. import tool_imports_for_tests with tool_imports_for_tests(): - from c_analyzer_common.info import ID, UNKNOWN - from c_parser.info import ( - normalize_vartype, Variable, - ) + from c_analyzer.common.info import UNKNOWN, ID + from c_analyzer.variables.info import ( + normalize_vartype, Variable + ) class NormalizeVartypeTests(unittest.TestCase): diff --git a/Lib/test/test_tools/test_c_analyzer/test_variables/test_known.py b/Lib/test/test_tools/test_c_analyzer/test_variables/test_known.py new file mode 100644 index 00000000000..49ff45c6d1b --- /dev/null +++ b/Lib/test/test_tools/test_c_analyzer/test_variables/test_known.py @@ -0,0 +1,139 @@ +import re +import textwrap +import unittest + +from .. import tool_imports_for_tests +with tool_imports_for_tests(): + from c_analyzer.common.info import ID + from c_analyzer.variables.info import Variable + from c_analyzer.variables.known import ( + read_file, + from_file, + ) + +class _BaseTests(unittest.TestCase): + + maxDiff = None + + @property + def calls(self): + try: + return self._calls + except AttributeError: + self._calls = [] + return self._calls + + +class ReadFileTests(_BaseTests): + + _return_read_tsv = () + + def _read_tsv(self, *args): + self.calls.append(('_read_tsv', args)) + return self._return_read_tsv + + def test_typical(self): + lines = textwrap.dedent(''' + filename funcname name kind declaration + file1.c - var1 variable static int + file1.c func1 local1 variable static int + file1.c - var2 variable int + file1.c func2 local2 variable char * + file2.c - var1 variable char * + ''').strip().splitlines() + lines = [re.sub(r'\s+', '\t', line, 4) for line in lines] + self._return_read_tsv = [tuple(v.strip() for v in line.split('\t')) + for line in lines[1:]] + + known = list(read_file('known.tsv', _read_tsv=self._read_tsv)) + + self.assertEqual(known, [ + ('variable', ID('file1.c', '', 'var1'), 'static int'), + ('variable', ID('file1.c', 'func1', 'local1'), 'static int'), + ('variable', ID('file1.c', '', 'var2'), 'int'), + ('variable', ID('file1.c', 'func2', 'local2'), 'char *'), + ('variable', ID('file2.c', '', 'var1'), 'char *'), + ]) + self.assertEqual(self.calls, [ + ('_read_tsv', + ('known.tsv', 'filename\tfuncname\tname\tkind\tdeclaration')), + ]) + + def test_empty(self): + self._return_read_tsv = [] + + known = list(read_file('known.tsv', _read_tsv=self._read_tsv)) + + self.assertEqual(known, []) + self.assertEqual(self.calls, [ + ('_read_tsv', ('known.tsv', 'filename\tfuncname\tname\tkind\tdeclaration')), + ]) + + +class FromFileTests(_BaseTests): + + _return_read_file = () + _return_handle_var = () + + def _read_file(self, infile): + self.calls.append(('_read_file', (infile,))) + return iter(self._return_read_file) + + def _handle_var(self, varid, decl): + self.calls.append(('_handle_var', (varid, decl))) + var = self._return_handle_var.pop(0) + return var + + def test_typical(self): + expected = [ + Variable.from_parts('file1.c', '', 'var1', 'static int'), + Variable.from_parts('file1.c', 'func1', 'local1', 'static int'), + Variable.from_parts('file1.c', '', 'var2', 'int'), + Variable.from_parts('file1.c', 'func2', 'local2', 'char *'), + Variable.from_parts('file2.c', '', 'var1', 'char *'), + ] + self._return_read_file = [('variable', v.id, v.vartype) + for v in expected] +# ('variable', ID('file1.c', '', 'var1'), 'static int'), +# ('variable', ID('file1.c', 'func1', 'local1'), 'static int'), +# ('variable', ID('file1.c', '', 'var2'), 'int'), +# ('variable', ID('file1.c', 'func2', 'local2'), 'char *'), +# ('variable', ID('file2.c', '', 'var1'), 'char *'), +# ] + self._return_handle_var = list(expected) # a copy + + known = from_file('known.tsv', + handle_var=self._handle_var, + _read_file=self._read_file, + ) + + self.assertEqual(known, { + 'variables': {v.id: v for v in expected}, + }) +# Variable.from_parts('file1.c', '', 'var1', 'static int'), +# Variable.from_parts('file1.c', 'func1', 'local1', 'static int'), +# Variable.from_parts('file1.c', '', 'var2', 'int'), +# Variable.from_parts('file1.c', 'func2', 'local2', 'char *'), +# Variable.from_parts('file2.c', '', 'var1', 'char *'), +# ]}, +# }) + self.assertEqual(self.calls, [ + ('_read_file', ('known.tsv',)), + *[('_handle_var', (v.id, v.vartype)) + for v in expected], + ]) + + def test_empty(self): + self._return_read_file = [] + + known = from_file('known.tsv', + handle_var=self._handle_var, + _read_file=self._read_file, + ) + + self.assertEqual(known, { + 'variables': {}, + }) + self.assertEqual(self.calls, [ + ('_read_file', ('known.tsv',)), + ]) diff --git a/Tools/c-analyzer/c-globals.py b/Tools/c-analyzer/c-globals.py index 9afe059b28c..b36b791241d 100644 --- a/Tools/c-analyzer/c-globals.py +++ b/Tools/c-analyzer/c-globals.py @@ -1,6 +1,6 @@ # This is a script equivalent of running "python -m test.test_c_globals.cg". -from c_globals.__main__ import parse_args, main +from cpython.__main__ import parse_args, main # This is effectively copied from cg/__main__.py: diff --git a/Tools/c-analyzer/c_globals/__init__.py b/Tools/c-analyzer/c_analyzer/__init__.py similarity index 100% rename from Tools/c-analyzer/c_globals/__init__.py rename to Tools/c-analyzer/c_analyzer/__init__.py diff --git a/Tools/c-analyzer/c_parser/__init__.py b/Tools/c-analyzer/c_analyzer/common/__init__.py similarity index 100% rename from Tools/c-analyzer/c_parser/__init__.py rename to Tools/c-analyzer/c_analyzer/common/__init__.py diff --git a/Tools/c-analyzer/c_analyzer_common/files.py b/Tools/c-analyzer/c_analyzer/common/files.py similarity index 82% rename from Tools/c-analyzer/c_analyzer_common/files.py rename to Tools/c-analyzer/c_analyzer/common/files.py index b3cd16c8dc0..ab551a84bad 100644 --- a/Tools/c-analyzer/c_analyzer_common/files.py +++ b/Tools/c-analyzer/c_analyzer/common/files.py @@ -2,7 +2,10 @@ import glob import os import os.path -from . import SOURCE_DIRS, REPO_ROOT +# XXX need tests: +# * walk_tree() +# * glob_tree() +# * iter_files_by_suffix() C_SOURCE_SUFFIXES = ('.c', '.h') @@ -115,24 +118,3 @@ def iter_files_by_suffix(root, suffixes, relparent=None, *, # XXX Ignore repeated suffixes? for suffix in suffixes: yield from _iter_files(root, suffix, relparent) - - -def iter_cpython_files(*, - walk=walk_tree, - _files=iter_files_by_suffix, - ): - """Yield each file in the tree for each of the given directory names.""" - excludedtrees = [ - os.path.join('Include', 'cpython', ''), - ] - def is_excluded(filename): - for root in excludedtrees: - if filename.startswith(root): - return True - return False - for filename in _files(SOURCE_DIRS, C_SOURCE_SUFFIXES, REPO_ROOT, - walk=walk, - ): - if is_excluded(filename): - continue - yield filename diff --git a/Tools/c-analyzer/c_analyzer/common/info.py b/Tools/c-analyzer/c_analyzer/common/info.py new file mode 100644 index 00000000000..3f3f8c5b05d --- /dev/null +++ b/Tools/c-analyzer/c_analyzer/common/info.py @@ -0,0 +1,138 @@ +from collections import namedtuple +import re + +from .util import classonly, _NTBase + +# XXX need tests: +# * ID.match() + + +UNKNOWN = '???' + +NAME_RE = re.compile(r'^([a-zA-Z]|_\w*[a-zA-Z]\w*|[a-zA-Z]\w*)$') + + +class ID(_NTBase, namedtuple('ID', 'filename funcname name')): + """A unique ID for a single symbol or declaration.""" + + __slots__ = () + # XXX Add optional conditions (tuple of strings) field. + #conditions = Slot() + + @classonly + def from_raw(cls, raw): + if not raw: + return None + if isinstance(raw, str): + return cls(None, None, raw) + try: + name, = raw + filename = None + except ValueError: + try: + filename, name = raw + except ValueError: + return super().from_raw(raw) + return cls(filename, None, name) + + def __new__(cls, filename, funcname, name): + self = super().__new__( + cls, + filename=str(filename) if filename else None, + funcname=str(funcname) if funcname else None, + name=str(name) if name else None, + ) + #cls.conditions.set(self, tuple(str(s) if s else None + # for s in conditions or ())) + return self + + def validate(self): + """Fail if the object is invalid (i.e. init with bad data).""" + if not self.name: + raise TypeError('missing name') + else: + if not NAME_RE.match(self.name): + raise ValueError( + f'name must be an identifier, got {self.name!r}') + + # Symbols from a binary might not have filename/funcname info. + + if self.funcname: + if not self.filename: + raise TypeError('missing filename') + if not NAME_RE.match(self.funcname) and self.funcname != UNKNOWN: + raise ValueError( + f'name must be an identifier, got {self.funcname!r}') + + # XXX Require the filename (at least UNKONWN)? + # XXX Check the filename? + + @property + def islocal(self): + return self.funcname is not None + + def match(self, other, *, + match_files=(lambda f1, f2: f1 == f2), + ): + """Return True if the two match. + + At least one of the two must be completely valid (no UNKNOWN + anywhere). Otherwise False is returned. The remaining one + *may* have UNKNOWN for both funcname and filename. It must + have a valid name though. + + The caller is responsible for knowing which of the two is valid + (and which to use if both are valid). + """ + # First check the name. + if self.name is None: + return False + if other.name != self.name: + return False + + # Then check the filename. + if self.filename is None: + return False + if other.filename is None: + return False + if self.filename == UNKNOWN: + # "other" must be the valid one. + if other.funcname == UNKNOWN: + return False + elif self.funcname != UNKNOWN: + # XXX Try matching funcname even though we don't + # know the filename? + raise NotImplementedError + else: + return True + elif other.filename == UNKNOWN: + # "self" must be the valid one. + if self.funcname == UNKNOWN: + return False + elif other.funcname != UNKNOWN: + # XXX Try matching funcname even though we don't + # know the filename? + raise NotImplementedError + else: + return True + elif not match_files(self.filename, other.filename): + return False + + # Finally, check the funcname. + if self.funcname == UNKNOWN: + # "other" must be the valid one. + if other.funcname == UNKNOWN: + return False + else: + return other.funcname is not None + elif other.funcname == UNKNOWN: + # "self" must be the valid one. + if self.funcname == UNKNOWN: + return False + else: + return self.funcname is not None + elif self.funcname == other.funcname: + # Both are valid. + return True + + return False diff --git a/Tools/c-analyzer/c_analyzer/common/show.py b/Tools/c-analyzer/c_analyzer/common/show.py new file mode 100644 index 00000000000..5f3cb1c2fb0 --- /dev/null +++ b/Tools/c-analyzer/c_analyzer/common/show.py @@ -0,0 +1,11 @@ + +def basic(variables, *, + _print=print): + """Print each row simply.""" + for var in variables: + if var.funcname: + line = f'{var.filename}:{var.funcname}():{var.name}' + else: + line = f'{var.filename}:{var.name}' + line = f'{line:<64} {var.vartype}' + _print(line) diff --git a/Tools/c-analyzer/c_analyzer_common/util.py b/Tools/c-analyzer/c_analyzer/common/util.py similarity index 100% rename from Tools/c-analyzer/c_analyzer_common/util.py rename to Tools/c-analyzer/c_analyzer/common/util.py diff --git a/Tools/c-analyzer/c_symbols/__init__.py b/Tools/c-analyzer/c_analyzer/parser/__init__.py similarity index 100% rename from Tools/c-analyzer/c_symbols/__init__.py rename to Tools/c-analyzer/c_analyzer/parser/__init__.py diff --git a/Tools/c-analyzer/c_parser/declarations.py b/Tools/c-analyzer/c_analyzer/parser/declarations.py similarity index 83% rename from Tools/c-analyzer/c_parser/declarations.py rename to Tools/c-analyzer/c_analyzer/parser/declarations.py index 19fa3ff4e66..f37072cccad 100644 --- a/Tools/c-analyzer/c_parser/declarations.py +++ b/Tools/c-analyzer/c_analyzer/parser/declarations.py @@ -2,6 +2,8 @@ import re import shlex import subprocess +from ..common.info import UNKNOWN + from . import source @@ -194,7 +196,28 @@ def parse_func(stmt, body): return name, signature -def parse_var(stmt): +#TYPE_SPEC = rf'''(?: +# )''' +#VAR_DECLARATOR = rf'''(?: +# )''' +#VAR_DECL = rf'''(?: +# {TYPE_SPEC}+ +# {VAR_DECLARATOR} +# \s* +# )''' +#VAR_DECLARATION = rf'''(?: +# {VAR_DECL} +# (?: = [^=] [^;]* )? +# ; +# )''' +# +# +#def parse_variable(decl, *, inFunc=False): +# """Return [(name, storage, vartype)] for the given variable declaration.""" +# ... + + +def _parse_var(stmt): """Return (name, vartype) for the given variable declaration.""" stmt = stmt.rstrip(';') m = LOCAL_STMT_START_RE.match(stmt) @@ -220,6 +243,27 @@ def parse_var(stmt): return name, vartype +def extract_storage(decl, *, infunc=None): + """Return (storage, vartype) based on the given declaration. + + The default storage is "implicit" (or "local" if infunc is True). + """ + if decl == UNKNOWN: + return decl + if decl.startswith('static '): + return 'static' + #return 'static', decl.partition(' ')[2].strip() + elif decl.startswith('extern '): + return 'extern' + #return 'extern', decl.partition(' ')[2].strip() + elif re.match('.*\b(static|extern)\b', decl): + raise NotImplementedError + elif infunc: + return 'local' + else: + return 'implicit' + + def parse_compound(stmt, blocks): """Return (headers, bodies) for the given compound statement.""" # XXX Identify declarations inside compound statements @@ -228,14 +272,17 @@ def parse_compound(stmt, blocks): def iter_variables(filename, *, + preprocessed=False, _iter_source_lines=source.iter_lines, _iter_global=iter_global_declarations, _iter_local=iter_local_statements, _parse_func=parse_func, - _parse_var=parse_var, + _parse_var=_parse_var, _parse_compound=parse_compound, ): """Yield (funcname, name, vartype) for every variable in the given file.""" + if preprocessed: + raise NotImplementedError lines = _iter_source_lines(filename) for stmt, body in _iter_global(lines): # At the file top-level we only have to worry about vars & funcs. @@ -256,7 +303,7 @@ def iter_variables(filename, *, def _iter_locals(lines, *, _iter_statements=iter_local_statements, - _parse_var=parse_var, + _parse_var=_parse_var, _parse_compound=parse_compound, ): compound = [lines] @@ -278,18 +325,15 @@ def _iter_locals(lines, *, compound.extend(bodies) -def iter_all(dirnames): +def iter_all(filename, *, + preprocessed=False, + ): """Yield a Declaration for each one found. If there are duplicates, due to preprocessor conditionals, then they are checked to make sure they are the same. """ - raise NotImplementedError - - -def iter_preprocessed(dirnames): - """Yield a Declaration for each one found. - - All source files are run through the preprocessor first. - """ - raise NotImplementedError + # XXX For the moment we cheat. + for funcname, name, decl in iter_variables(filename, + preprocessed=preprocessed): + yield 'variable', funcname, name, decl diff --git a/Tools/c-analyzer/c_analyzer/parser/find.py b/Tools/c-analyzer/c_analyzer/parser/find.py new file mode 100644 index 00000000000..3860d3d459b --- /dev/null +++ b/Tools/c-analyzer/c_analyzer/parser/find.py @@ -0,0 +1,107 @@ +from ..common.info import UNKNOWN, ID + +from . import declarations + +# XXX need tests: +# * variables +# * variable +# * variable_from_id + + +def _iter_vars(filenames, preprocessed, *, + handle_id=None, + _iter_decls=declarations.iter_all, + ): + if handle_id is None: + handle_id = ID + + for filename in filenames or (): + for kind, funcname, name, decl in _iter_decls(filename, + preprocessed=preprocessed, + ): + if kind != 'variable': + continue + varid = handle_id(filename, funcname, name) + yield varid, decl + + +# XXX Add a "handle_var" arg like we did for get_resolver()? + +def variables(*filenames, + perfilecache=None, + preprocessed=False, + known=None, # for types + handle_id=None, + _iter_vars=_iter_vars, + ): + """Yield (varid, decl) for each variable found in the given files. + + If "preprocessed" is provided (and not False/None) then it is used + to decide which tool to use to parse the source code after it runs + through the C preprocessor. Otherwise the raw + """ + if len(filenames) == 1 and not (filenames[0], str): + filenames, = filenames + + if perfilecache is None: + yield from _iter_vars(filenames, preprocessed) + else: + # XXX Cache per-file variables (e.g. `{filename: [(varid, decl)]}`). + raise NotImplementedError + + +def variable(name, filenames, *, + local=False, + perfilecache=None, + preprocessed=False, + handle_id=None, + _iter_vars=variables, + ): + """Return (varid, decl) for the first found variable that matches. + + If "local" is True then the first matching local variable in the + file will always be returned. To avoid that, pass perfilecache and + pop each variable from the cache after using it. + """ + for varid, decl in _iter_vars(filenames, + perfilecache=perfilecache, + preprocessed=preprocessed, + ): + if varid.name != name: + continue + if local: + if varid.funcname: + if varid.funcname == UNKNOWN: + raise NotImplementedError + return varid, decl + elif not varid.funcname: + return varid, decl + else: + return None, None # No matching variable was found. + + +def variable_from_id(id, filenames, *, + perfilecache=None, + preprocessed=False, + handle_id=None, + _get_var=variable, + ): + """Return (varid, decl) for the first found variable that matches.""" + local = False + if isinstance(id, str): + name = id + else: + if id.funcname == UNKNOWN: + local = True + elif id.funcname: + raise NotImplementedError + + name = id.name + if id.filename and id.filename != UNKNOWN: + filenames = [id.filename] + return _get_var(name, filenames, + local=local, + perfilecache=perfilecache, + preprocessed=preprocessed, + handle_id=handle_id, + ) diff --git a/Tools/c-analyzer/c_parser/naive.py b/Tools/c-analyzer/c_analyzer/parser/naive.py similarity index 80% rename from Tools/c-analyzer/c_parser/naive.py rename to Tools/c-analyzer/c_analyzer/parser/naive.py index 160f96c279e..4a4822d84ff 100644 --- a/Tools/c-analyzer/c_parser/naive.py +++ b/Tools/c-analyzer/c_analyzer/parser/naive.py @@ -1,8 +1,7 @@ import re -from c_analyzer_common.info import UNKNOWN +from ..common.info import UNKNOWN, ID -from .info import Variable from .preprocessor import _iter_clean_lines @@ -55,7 +54,7 @@ def parse_variable_declaration(srcline): def parse_variable(srcline, funcname=None): - """Return a Variable for the variable declared on the line (or None).""" + """Return (varid, decl) for the variable declared on the line (or None).""" line = srcline.strip() # XXX Handle more than just static variables. @@ -74,7 +73,7 @@ def iter_variables(filename, *, _get_srclines=get_srclines, _default_parse_variable=parse_variable, ): - """Yield a Variable for each in the given source file.""" + """Yield (varid, decl) for each variable in the given source file.""" if parse_variable is None: parse_variable = _default_parse_variable @@ -99,13 +98,13 @@ def iter_variables(filename, *, info = parse_variable(line, funcname) if isinstance(info, list): for name, _funcname, decl in info: - yield Variable.from_parts(filename, _funcname, name, decl) + yield ID(filename, _funcname, name), decl continue name, decl = info if name is None: continue - yield Variable.from_parts(filename, funcname, name, decl) + yield ID(filename, funcname, name), decl def _match_varid(variable, name, funcname, ignored=None): @@ -134,12 +133,12 @@ def find_variable(filename, funcname, name, *, Return None if the variable is not found. """ - for variable in _iter_variables(filename, + for varid, decl in _iter_variables(filename, srccache=srccache, parse_variable=parse_variable, ): - if _match_varid(variable, name, funcname, ignored): - return variable + if _match_varid(varid, name, funcname, ignored): + return varid, decl else: return None @@ -149,10 +148,10 @@ def find_variables(varids, filenames=None, *, parse_variable=None, _find_symbol=find_variable, ): - """Yield a Variable for each ID. + """Yield (varid, decl) for each ID. If the variable is not found then its decl will be UNKNOWN. That - way there will be one resulting Variable per given ID. + way there will be one resulting variable per given ID. """ if srccache is _NOT_SET: srccache = {} @@ -163,18 +162,18 @@ def find_variables(varids, filenames=None, *, srcfiles = [varid.filename] else: if not filenames: - yield Variable(varid, UNKNOWN, UNKNOWN) + yield varid, UNKNOWN continue srcfiles = filenames for filename in srcfiles: - found = _find_varid(filename, varid.funcname, varid.name, - ignored=used, - srccache=srccache, - parse_variable=parse_variable, - ) - if found: - yield found - used.add(found) + varid, decl = _find_varid(filename, varid.funcname, varid.name, + ignored=used, + srccache=srccache, + parse_variable=parse_variable, + ) + if varid: + yield varid, decl + used.add(varid) break else: - yield Variable(varid, UNKNOWN, UNKNOWN) + yield varid, UNKNOWN diff --git a/Tools/c-analyzer/c_parser/preprocessor.py b/Tools/c-analyzer/c_analyzer/parser/preprocessor.py similarity index 99% rename from Tools/c-analyzer/c_parser/preprocessor.py rename to Tools/c-analyzer/c_analyzer/parser/preprocessor.py index 0e2866e4873..41f306e5f80 100644 --- a/Tools/c-analyzer/c_parser/preprocessor.py +++ b/Tools/c-analyzer/c_analyzer/parser/preprocessor.py @@ -3,8 +3,7 @@ import shlex import os import re -from c_analyzer_common import util -from . import info +from ..common import util, info CONTINUATION = '\\' + os.linesep diff --git a/Tools/c-analyzer/c_parser/source.py b/Tools/c-analyzer/c_analyzer/parser/source.py similarity index 100% rename from Tools/c-analyzer/c_parser/source.py rename to Tools/c-analyzer/c_analyzer/parser/source.py diff --git a/Tools/c-analyzer/c_analyzer/symbols/__init__.py b/Tools/c-analyzer/c_analyzer/symbols/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/Tools/c-analyzer/c_symbols/binary.py b/Tools/c-analyzer/c_analyzer/symbols/_nm.py similarity index 50% rename from Tools/c-analyzer/c_symbols/binary.py rename to Tools/c-analyzer/c_analyzer/symbols/_nm.py index e125dbd5b5e..f3a75a6d4ba 100644 --- a/Tools/c-analyzer/c_symbols/binary.py +++ b/Tools/c-analyzer/c_analyzer/symbols/_nm.py @@ -1,46 +1,24 @@ -import os import os.path import shutil -import sys -from c_analyzer_common import util, info -from . import source +from c_analyzer.common import util, info + from .info import Symbol -#PYTHON = os.path.join(REPO_ROOT, 'python') -PYTHON = sys.executable +# XXX need tests: +# * iter_symbols - -def iter_symbols(binary=PYTHON, dirnames=None, *, - # Alternately, use look_up_known_symbol() - # from c_globals.supported. - find_local_symbol=source.find_symbol, - _file_exists=os.path.exists, - _iter_symbols_nm=(lambda b, *a: _iter_symbols_nm(b, *a)), - ): - """Yield a Symbol for each symbol found in the binary.""" - if not _file_exists(binary): - raise Exception('executable missing (need to build it first?)') - - if find_local_symbol: - cache = {} - def find_local_symbol(name, *, _find=find_local_symbol): - return _find(name, dirnames, _perfilecache=cache) - else: - find_local_symbol = None - - if os.name == 'nt': - # XXX Support this. - raise NotImplementedError - else: - yield from _iter_symbols_nm(binary, find_local_symbol) - - -############################# -# binary format (e.g. ELF) +NM_KINDS = { + 'b': Symbol.KIND.VARIABLE, # uninitialized + 'd': Symbol.KIND.VARIABLE, # initialized + #'g': Symbol.KIND.VARIABLE, # uninitialized + #'s': Symbol.KIND.VARIABLE, # initialized + 't': Symbol.KIND.FUNCTION, + } SPECIAL_SYMBOLS = { + # binary format (e.g. ELF) '__bss_start', '__data_start', '__dso_handle', @@ -63,29 +41,23 @@ def _is_special_symbol(name): return False -############################# -# "nm" +def iter_symbols(binfile, *, + nm=None, + handle_id=None, + _which=shutil.which, + _run=util.run_cmd, + ): + """Yield a Symbol for each relevant entry reported by the "nm" command.""" + if nm is None: + nm = _which('nm') + if not nm: + raise NotImplementedError + if handle_id is None: + handle_id = info.ID -NM_KINDS = { - 'b': Symbol.KIND.VARIABLE, # uninitialized - 'd': Symbol.KIND.VARIABLE, # initialized - #'g': Symbol.KIND.VARIABLE, # uninitialized - #'s': Symbol.KIND.VARIABLE, # initialized - 't': Symbol.KIND.FUNCTION, - } - - -def _iter_symbols_nm(binary, find_local_symbol=None, - *, - _which=shutil.which, - _run=util.run_cmd, - ): - nm = _which('nm') - if not nm: - raise NotImplementedError argv = [nm, '--line-numbers', - binary, + binfile, ] try: output = _run(argv) @@ -95,23 +67,20 @@ def _iter_symbols_nm(binary, find_local_symbol=None, raise NotImplementedError raise for line in output.splitlines(): - (name, kind, external, filename, funcname, vartype, - ) = _parse_nm_line(line, - _find_local_symbol=find_local_symbol, - ) + (name, kind, external, filename, funcname, + ) = _parse_nm_line(line) if kind != Symbol.KIND.VARIABLE: continue elif _is_special_symbol(name): continue - assert vartype is None yield Symbol( - id=(filename, funcname, name), + id=handle_id(filename, funcname, name), kind=kind, external=external, ) -def _parse_nm_line(line, *, _find_local_symbol=None): +def _parse_nm_line(line): _origline = line _, _, line = line.partition(' ') # strip off the address line = line.strip() @@ -128,18 +97,9 @@ def _parse_nm_line(line, *, _find_local_symbol=None): else: filename = info.UNKNOWN - vartype = None name, islocal = _parse_nm_name(name, kind) - if islocal: - funcname = info.UNKNOWN - if _find_local_symbol is not None: - filename, funcname, vartype = _find_local_symbol(name) - filename = filename or info.UNKNOWN - funcname = funcname or info.UNKNOWN - else: - funcname = None - # XXX fine filename and vartype? - return name, kind, external, filename, funcname, vartype + funcname = info.UNKNOWN if islocal else None + return name, kind, external, filename, funcname def _parse_nm_name(name, kind): diff --git a/Tools/c-analyzer/c_analyzer/symbols/find.py b/Tools/c-analyzer/c_analyzer/symbols/find.py new file mode 100644 index 00000000000..85646523f7a --- /dev/null +++ b/Tools/c-analyzer/c_analyzer/symbols/find.py @@ -0,0 +1,175 @@ +import os +import os.path +import shutil + +from ..common import files +from ..common.info import UNKNOWN, ID +from ..parser import find as p_find + +from . import _nm +from .info import Symbol + +# XXX need tests: +# * get_resolver() +# * get_resolver_from_dirs() +# * symbol() +# * symbols() +# * variables() + + +def _resolve_known(symbol, knownvars): + for varid in knownvars: + if symbol.match(varid): + break + else: + return None + return knownvars.pop(varid) + + +def get_resolver(filenames=None, known=None, *, + handle_var, + check_filename=None, + perfilecache=None, + preprocessed=False, + _from_source=p_find.variable_from_id, + ): + """Return a "resolver" func for the given known vars/types and filenames. + + "handle_var" is a callable that takes (ID, decl) and returns a + Variable. Variable.from_id is a suitable callable. + + The returned func takes a single Symbol and returns a corresponding + Variable. If the symbol was located then the variable will be + valid, populated with the corresponding information. Otherwise None + is returned. + """ + knownvars = (known or {}).get('variables') + if knownvars: + knownvars = dict(knownvars) # a copy + if filenames: + if check_filename is None: + filenames = list(filenames) + def check_filename(filename): + return filename in filenames + def resolve(symbol): + # XXX Check "found" instead? + if not check_filename(symbol.filename): + return None + found = _resolve_known(symbol, knownvars) + if found is None: + #return None + varid, decl = _from_source(symbol, filenames, + perfilecache=perfilecache, + preprocessed=preprocessed, + ) + found = handle_var(varid, decl) + return found + else: + def resolve(symbol): + return _resolve_known(symbol, knownvars) + elif filenames: + def resolve(symbol): + varid, decl = _from_source(symbol, filenames, + perfilecache=perfilecache, + preprocessed=preprocessed, + ) + return handle_var(varid, decl) + else: + def resolve(symbol): + return None + return resolve + + +def get_resolver_from_dirs(dirnames, known=None, *, + handle_var, + suffixes=('.c',), + perfilecache=None, + preprocessed=False, + _iter_files=files.iter_files_by_suffix, + _get_resolver=get_resolver, + ): + """Return a "resolver" func for the given known vars/types and filenames. + + "dirnames" should be absolute paths. If not then they will be + resolved relative to CWD. + + See get_resolver(). + """ + dirnames = [d if d.endswith(os.path.sep) else d + os.path.sep + for d in dirnames] + filenames = _iter_files(dirnames, suffixes) + def check_filename(filename): + for dirname in dirnames: + if filename.startswith(dirname): + return True + else: + return False + return _get_resolver(filenames, known, + handle_var=handle_var, + check_filename=check_filename, + perfilecache=perfilecache, + preprocessed=preprocessed, + ) + + +def symbol(symbol, filenames, known=None, *, + perfilecache=None, + preprocessed=False, + handle_id=None, + _get_resolver=get_resolver, + ): + """Return a Variable for the one matching the given symbol. + + "symbol" can be one of several objects: + + * Symbol - use the contained info + * name (str) - look for a global variable with that name + * (filename, name) - look for named global in file + * (filename, funcname, name) - look for named local in file + + A name is always required. If the filename is None, "", or + "UNKNOWN" then all files will be searched. If the funcname is + "" or "UNKNOWN" then only local variables will be searched for. + """ + resolve = _get_resolver(known, filenames, + handle_id=handle_id, + perfilecache=perfilecache, + preprocessed=preprocessed, + ) + return resolve(symbol) + + +def _get_platform_tool(): + if os.name == 'nt': + # XXX Support this. + raise NotImplementedError + elif nm := shutil.which('nm'): + return lambda b, hi: _nm.iter_symbols(b, nm=nm, handle_id=hi) + else: + raise NotImplementedError + + +def symbols(binfile, *, + handle_id=None, + _file_exists=os.path.exists, + _get_platform_tool=_get_platform_tool, + ): + """Yield a Symbol for each one found in the binary.""" + if not _file_exists(binfile): + raise Exception('executable missing (need to build it first?)') + + _iter_symbols = _get_platform_tool() + yield from _iter_symbols(binfile, handle_id) + + +def variables(binfile, *, + resolve, + handle_id=None, + _iter_symbols=symbols, + ): + """Yield (Variable, Symbol) for each found symbol.""" + for symbol in _iter_symbols(binfile, handle_id=handle_id): + if symbol.kind != Symbol.KIND.VARIABLE: + continue + var = resolve(symbol) or None + yield var, symbol diff --git a/Tools/c-analyzer/c_symbols/info.py b/Tools/c-analyzer/c_analyzer/symbols/info.py similarity index 93% rename from Tools/c-analyzer/c_symbols/info.py rename to Tools/c-analyzer/c_analyzer/symbols/info.py index f6ed52c8f07..96a251abb7c 100644 --- a/Tools/c-analyzer/c_symbols/info.py +++ b/Tools/c-analyzer/c_analyzer/symbols/info.py @@ -1,7 +1,7 @@ from collections import namedtuple -from c_analyzer_common.info import ID -from c_analyzer_common.util import classonly, _NTBase +from c_analyzer.common.info import ID +from c_analyzer.common.util import classonly, _NTBase class Symbol(_NTBase, namedtuple('Symbol', 'id kind external')): diff --git a/Tools/c-analyzer/c_analyzer/variables/__init__.py b/Tools/c-analyzer/c_analyzer/variables/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/Tools/c-analyzer/c_analyzer/variables/find.py b/Tools/c-analyzer/c_analyzer/variables/find.py new file mode 100644 index 00000000000..3fe7284fc00 --- /dev/null +++ b/Tools/c-analyzer/c_analyzer/variables/find.py @@ -0,0 +1,75 @@ +from ..common import files +from ..common.info import UNKNOWN +from ..parser import ( + find as p_find, + ) +from ..symbols import ( + info as s_info, + find as s_find, + ) +from .info import Variable + +# XXX need tests: +# * vars_from_source + + +def _remove_cached(cache, var): + if not cache: + return + try: + cached = cache[var.filename] + cached.remove(var) + except (KeyError, IndexError): + pass + + +def vars_from_binary(binfile, *, + known=None, + filenames=None, + handle_id=None, + check_filename=None, + handle_var=Variable.from_id, + _iter_vars=s_find.variables, + _get_symbol_resolver=s_find.get_resolver, + ): + """Yield a Variable for each found Symbol. + + Details are filled in from the given "known" variables and types. + """ + cache = {} + resolve = _get_symbol_resolver(filenames, known, + handle_var=handle_var, + check_filename=check_filename, + perfilecache=cache, + ) + for var, symbol in _iter_vars(binfile, + resolve=resolve, + handle_id=handle_id, + ): + if var is None: + var = Variable(symbol.id, UNKNOWN, UNKNOWN) + yield var + _remove_cached(cache, var) + + +def vars_from_source(filenames, *, + preprocessed=None, + known=None, + handle_id=None, + handle_var=Variable.from_id, + iter_vars=p_find.variables, + ): + """Yield a Variable for each declaration in the raw source code. + + Details are filled in from the given "known" variables and types. + """ + cache = {} + for varid, decl in iter_vars(filenames or (), + perfilecache=cache, + preprocessed=preprocessed, + known=known, + handle_id=handle_id, + ): + var = handle_var(varid, decl) + yield var + _remove_cached(cache, var) diff --git a/Tools/c-analyzer/c_parser/info.py b/Tools/c-analyzer/c_analyzer/variables/info.py similarity index 61% rename from Tools/c-analyzer/c_parser/info.py rename to Tools/c-analyzer/c_analyzer/variables/info.py index a4e32d75eed..336a523c7a2 100644 --- a/Tools/c-analyzer/c_parser/info.py +++ b/Tools/c-analyzer/c_analyzer/variables/info.py @@ -1,8 +1,7 @@ from collections import namedtuple -import re -from c_analyzer_common import info, util -from c_analyzer_common.util import classonly, _NTBase +from ..common.info import ID, UNKNOWN +from ..common.util import classonly, _NTBase def normalize_vartype(vartype): @@ -16,26 +15,7 @@ def normalize_vartype(vartype): return str(vartype) -def extract_storage(decl, *, isfunc=False): - """Return (storage, vartype) based on the given declaration. - - The default storage is "implicit" or "local". - """ - if decl == info.UNKNOWN: - return decl, decl - if decl.startswith('static '): - return 'static', decl - #return 'static', decl.partition(' ')[2].strip() - elif decl.startswith('extern '): - return 'extern', decl - #return 'extern', decl.partition(' ')[2].strip() - elif re.match('.*\b(static|extern)\b', decl): - raise NotImplementedError - elif isfunc: - return 'local', decl - else: - return 'implicit', decl - +# XXX Variable.vartype -> decl (Declaration). class Variable(_NTBase, namedtuple('Variable', 'id storage vartype')): @@ -52,16 +32,23 @@ class Variable(_NTBase, @classonly def from_parts(cls, filename, funcname, name, decl, storage=None): + varid = ID(filename, funcname, name) if storage is None: - storage, decl = extract_storage(decl, isfunc=funcname) - id = info.ID(filename, funcname, name) - self = cls(id, storage, decl) + self = cls.from_id(varid, decl) + else: + self = cls(varid, storage, decl) return self + @classonly + def from_id(cls, varid, decl): + from ..parser.declarations import extract_storage + storage = extract_storage(decl, infunc=varid.funcname) + return cls(varid, storage, decl) + def __new__(cls, id, storage, vartype): self = super().__new__( cls, - id=info.ID.from_raw(id), + id=ID.from_raw(id), storage=str(storage) if storage else None, vartype=normalize_vartype(vartype) if vartype else None, ) @@ -77,10 +64,10 @@ class Variable(_NTBase, if not self.id: raise TypeError('missing id') - if not self.filename or self.filename == info.UNKNOWN: + if not self.filename or self.filename == UNKNOWN: raise TypeError(f'id missing filename ({self.id})') - if self.funcname and self.funcname == info.UNKNOWN: + if self.funcname and self.funcname == UNKNOWN: raise TypeError(f'id missing funcname ({self.id})') self.id.validate() @@ -89,12 +76,12 @@ class Variable(_NTBase, """Fail if the object is invalid (i.e. init with bad data).""" self._validate_id() - if self.storage is None or self.storage == info.UNKNOWN: + if self.storage is None or self.storage == UNKNOWN: raise TypeError('missing storage') elif self.storage not in self.STORAGE: raise ValueError(f'unsupported storage {self.storage:r}') - if self.vartype is None or self.vartype == info.UNKNOWN: + if self.vartype is None or self.vartype == UNKNOWN: raise TypeError('missing vartype') @property diff --git a/Tools/c-analyzer/c_analyzer/variables/known.py b/Tools/c-analyzer/c_analyzer/variables/known.py new file mode 100644 index 00000000000..aa2934a069e --- /dev/null +++ b/Tools/c-analyzer/c_analyzer/variables/known.py @@ -0,0 +1,91 @@ +import csv + +from ..common.info import ID, UNKNOWN +from ..common.util import read_tsv +from .info import Variable + + +# XXX need tests: +# * read_file() +# * look_up_variable() + + +COLUMNS = ('filename', 'funcname', 'name', 'kind', 'declaration') +HEADER = '\t'.join(COLUMNS) + + +def read_file(infile, *, + _read_tsv=read_tsv, + ): + """Yield (kind, id, decl) for each row in the data file. + + The caller is responsible for validating each row. + """ + for row in _read_tsv(infile, HEADER): + filename, funcname, name, kind, declaration = row + if not funcname or funcname == '-': + funcname = None + id = ID(filename, funcname, name) + yield kind, id, declaration + + +def from_file(infile, *, + handle_var=Variable.from_id, + _read_file=read_file, + ): + """Return the info for known declarations in the given file.""" + known = { + 'variables': {}, + #'types': {}, + #'constants': {}, + #'macros': {}, + } + for kind, id, decl in _read_file(infile): + if kind == 'variable': + values = known['variables'] + value = handle_var(id, decl) + else: + raise ValueError(f'unsupported kind in row {row}') + value.validate() + values[id] = value + return known + + +def look_up_variable(varid, knownvars, *, + match_files=(lambda f1, f2: f1 == f2), + ): + """Return the known Variable matching the given ID. + + "knownvars" is a mapping of ID to Variable. + + "match_files" is used to verify if two filenames point to + the same file. + + If no match is found then None is returned. + """ + if not knownvars: + return None + + if varid.funcname == UNKNOWN: + if not varid.filename or varid.filename == UNKNOWN: + for varid in knownvars: + if not varid.funcname: + continue + if varid.name == varid.name: + return knownvars[varid] + else: + return None + else: + for varid in knownvars: + if not varid.funcname: + continue + if not match_files(varid.filename, varid.filename): + continue + if varid.name == varid.name: + return knownvars[varid] + else: + return None + elif not varid.filename or varid.filename == UNKNOWN: + raise NotImplementedError + else: + return knownvars.get(varid.id) diff --git a/Tools/c-analyzer/c_analyzer_common/__init__.py b/Tools/c-analyzer/c_analyzer_common/__init__.py deleted file mode 100644 index 888b16ff41d..00000000000 --- a/Tools/c-analyzer/c_analyzer_common/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -import os.path - - -PKG_ROOT = os.path.dirname(__file__) -DATA_DIR = os.path.dirname(PKG_ROOT) -REPO_ROOT = os.path.dirname( - os.path.dirname(DATA_DIR)) - -SOURCE_DIRS = [os.path.join(REPO_ROOT, name) for name in [ - 'Include', - 'Python', - 'Parser', - 'Objects', - 'Modules', - ]] - - -# Clean up the namespace. -del os diff --git a/Tools/c-analyzer/c_analyzer_common/info.py b/Tools/c-analyzer/c_analyzer_common/info.py deleted file mode 100644 index e2173804064..00000000000 --- a/Tools/c-analyzer/c_analyzer_common/info.py +++ /dev/null @@ -1,69 +0,0 @@ -from collections import namedtuple -import re - -from .util import classonly, _NTBase - - -UNKNOWN = '???' - -NAME_RE = re.compile(r'^([a-zA-Z]|_\w*[a-zA-Z]\w*|[a-zA-Z]\w*)$') - - -class ID(_NTBase, namedtuple('ID', 'filename funcname name')): - """A unique ID for a single symbol or declaration.""" - - __slots__ = () - # XXX Add optional conditions (tuple of strings) field. - #conditions = Slot() - - @classonly - def from_raw(cls, raw): - if not raw: - return None - if isinstance(raw, str): - return cls(None, None, raw) - try: - name, = raw - filename = None - except ValueError: - try: - filename, name = raw - except ValueError: - return super().from_raw(raw) - return cls(filename, None, name) - - def __new__(cls, filename, funcname, name): - self = super().__new__( - cls, - filename=str(filename) if filename else None, - funcname=str(funcname) if funcname else None, - name=str(name) if name else None, - ) - #cls.conditions.set(self, tuple(str(s) if s else None - # for s in conditions or ())) - return self - - def validate(self): - """Fail if the object is invalid (i.e. init with bad data).""" - if not self.name: - raise TypeError('missing name') - else: - if not NAME_RE.match(self.name): - raise ValueError( - f'name must be an identifier, got {self.name!r}') - - # Symbols from a binary might not have filename/funcname info. - - if self.funcname: - if not self.filename: - raise TypeError('missing filename') - if not NAME_RE.match(self.funcname) and self.funcname != UNKNOWN: - raise ValueError( - f'name must be an identifier, got {self.funcname!r}') - - # XXX Require the filename (at least UNKONWN)? - # XXX Check the filename? - - @property - def islocal(self): - return self.funcname is not None diff --git a/Tools/c-analyzer/c_analyzer_common/known.py b/Tools/c-analyzer/c_analyzer_common/known.py deleted file mode 100644 index dec1e1d2e09..00000000000 --- a/Tools/c-analyzer/c_analyzer_common/known.py +++ /dev/null @@ -1,74 +0,0 @@ -import csv -import os.path - -from c_parser.info import Variable - -from . import DATA_DIR -from .info import ID, UNKNOWN -from .util import read_tsv - - -DATA_FILE = os.path.join(DATA_DIR, 'known.tsv') - -COLUMNS = ('filename', 'funcname', 'name', 'kind', 'declaration') -HEADER = '\t'.join(COLUMNS) - - -# XXX need tests: -# * from_file() - -def from_file(infile, *, - _read_tsv=read_tsv, - ): - """Return the info for known declarations in the given file.""" - known = { - 'variables': {}, - #'types': {}, - #'constants': {}, - #'macros': {}, - } - for row in _read_tsv(infile, HEADER): - filename, funcname, name, kind, declaration = row - if not funcname or funcname == '-': - funcname = None - id = ID(filename, funcname, name) - if kind == 'variable': - values = known['variables'] - if funcname: - storage = _get_storage(declaration) or 'local' - else: - storage = _get_storage(declaration) or 'implicit' - value = Variable(id, storage, declaration) - else: - raise ValueError(f'unsupported kind in row {row}') - value.validate() -# if value.name == 'id' and declaration == UNKNOWN: -# # None of these are variables. -# declaration = 'int id'; -# else: -# value.validate() - values[id] = value - return known - - -def _get_storage(decl): - # statics - if decl.startswith('static '): - return 'static' - if decl.startswith(('Py_LOCAL(', 'Py_LOCAL_INLINE(')): - return 'static' - if decl.startswith(('_Py_IDENTIFIER(', '_Py_static_string(')): - return 'static' - if decl.startswith('PyDoc_VAR('): - return 'static' - if decl.startswith(('SLOT1BINFULL(', 'SLOT1BIN(')): - return 'static' - if decl.startswith('WRAP_METHOD('): - return 'static' - # public extern - if decl.startswith('extern '): - return 'extern' - if decl.startswith('PyAPI_DATA('): - return 'extern' - # implicit or local - return None diff --git a/Tools/c-analyzer/c_globals/find.py b/Tools/c-analyzer/c_globals/find.py deleted file mode 100644 index a51b947cbdf..00000000000 --- a/Tools/c-analyzer/c_globals/find.py +++ /dev/null @@ -1,95 +0,0 @@ -from c_analyzer_common import SOURCE_DIRS -from c_analyzer_common.info import UNKNOWN -from c_symbols import ( - info as s_info, - binary as b_symbols, - source as s_symbols, - resolve, - ) -from c_parser import info, declarations - - -# XXX needs tests: -# * iter_variables - -def globals_from_binary(binfile=b_symbols.PYTHON, *, - knownvars=None, - dirnames=None, - _iter_symbols=b_symbols.iter_symbols, - _resolve=resolve.symbols_to_variables, - _get_symbol_resolver=resolve.get_resolver, - ): - """Yield a Variable for each found Symbol. - - Details are filled in from the given "known" variables and types. - """ - symbols = _iter_symbols(binfile, find_local_symbol=None) - #symbols = list(symbols) - for variable in _resolve(symbols, - resolve=_get_symbol_resolver(knownvars, dirnames), - ): - # Skip each non-global variable (unless we couldn't find it). - # XXX Drop the "UNKNOWN" condition? - if not variable.isglobal and variable.vartype != UNKNOWN: - continue - yield variable - - -def globals_from_declarations(dirnames=SOURCE_DIRS, *, - known=None, - ): - """Yield a Variable for each found declaration. - - Details are filled in from the given "known" variables and types. - """ - raise NotImplementedError - - -def iter_variables(kind='platform', *, - known=None, - dirnames=None, - _resolve_symbols=resolve.symbols_to_variables, - _get_symbol_resolver=resolve.get_resolver, - _symbols_from_binary=b_symbols.iter_symbols, - _symbols_from_source=s_symbols.iter_symbols, - _iter_raw=declarations.iter_all, - _iter_preprocessed=declarations.iter_preprocessed, - ): - """Yield a Variable for each one found (e.g. in files).""" - kind = kind or 'platform' - - if kind == 'symbols': - knownvars = (known or {}).get('variables') - yield from _resolve_symbols( - _symbols_from_source(dirnames, known), - resolve=_get_symbol_resolver(knownvars, dirnames), - ) - elif kind == 'platform': - knownvars = (known or {}).get('variables') - yield from _resolve_symbols( - _symbols_from_binary(find_local_symbol=None), - resolve=_get_symbol_resolver(knownvars, dirnames), - ) - elif kind == 'declarations': - for decl in _iter_raw(dirnames): - if not isinstance(decl, info.Variable): - continue - yield decl - elif kind == 'preprocessed': - for decl in _iter_preprocessed(dirnames): - if not isinstance(decl, info.Variable): - continue - yield decl - else: - raise ValueError(f'unsupported kind {kind!r}') - - -def globals(dirnames, known, *, - kind=None, # Use the default. - _iter_variables=iter_variables, - ): - """Return a list of (StaticVar, ) for each found global var.""" - for found in _iter_variables(kind, known=known, dirnames=dirnames): - if not found.isglobal: - continue - yield found diff --git a/Tools/c-analyzer/c_globals/show.py b/Tools/c-analyzer/c_globals/show.py deleted file mode 100644 index f4298b17b67..00000000000 --- a/Tools/c-analyzer/c_globals/show.py +++ /dev/null @@ -1,16 +0,0 @@ - -def basic(globals, *, - _print=print): - """Print each row simply.""" - for variable in globals: - if variable.funcname: - line = f'{variable.filename}:{variable.funcname}():{variable.name}' - else: - line = f'{variable.filename}:{variable.name}' - vartype = variable.vartype - #if vartype.startswith('static '): - # vartype = vartype.partition(' ')[2] - #else: - # vartype = '=' + vartype - line = f'{line:<64} {vartype}' - _print(line) diff --git a/Tools/c-analyzer/c_symbols/resolve.py b/Tools/c-analyzer/c_symbols/resolve.py deleted file mode 100644 index 56210cefeb8..00000000000 --- a/Tools/c-analyzer/c_symbols/resolve.py +++ /dev/null @@ -1,147 +0,0 @@ -import os.path - -from c_analyzer_common import files -from c_analyzer_common.info import UNKNOWN -from c_parser import declarations, info -from .info import Symbol -from .source import _find_symbol - - -# XXX need tests: -# * look_up_known_symbol() -# * symbol_from_source() -# * get_resolver() -# * symbols_to_variables() - -def look_up_known_symbol(symbol, knownvars, *, - match_files=(lambda f1, f2: f1 == f2), - ): - """Return the known variable matching the given symbol. - - "knownvars" is a mapping of common.ID to parser.Variable. - - "match_files" is used to verify if two filenames point to - the same file. - """ - if not knownvars: - return None - - if symbol.funcname == UNKNOWN: - if not symbol.filename or symbol.filename == UNKNOWN: - for varid in knownvars: - if not varid.funcname: - continue - if varid.name == symbol.name: - return knownvars[varid] - else: - return None - else: - for varid in knownvars: - if not varid.funcname: - continue - if not match_files(varid.filename, symbol.filename): - continue - if varid.name == symbol.name: - return knownvars[varid] - else: - return None - elif not symbol.filename or symbol.filename == UNKNOWN: - raise NotImplementedError - else: - return knownvars.get(symbol.id) - - -def find_in_source(symbol, dirnames, *, - _perfilecache={}, - _find_symbol=_find_symbol, - _iter_files=files.iter_files_by_suffix, - ): - """Return the Variable matching the given Symbol. - - If there is no match then return None. - """ - if symbol.filename and symbol.filename != UNKNOWN: - filenames = [symbol.filename] - else: - filenames = _iter_files(dirnames, ('.c', '.h')) - - if symbol.funcname and symbol.funcname != UNKNOWN: - raise NotImplementedError - - (filename, funcname, decl - ) = _find_symbol(symbol.name, filenames, _perfilecache) - if filename == UNKNOWN: - return None - return info.Variable.from_parts(filename, funcname, symbol.name, decl) - - -def get_resolver(knownvars=None, dirnames=None, *, - _look_up_known=look_up_known_symbol, - _from_source=find_in_source, - ): - """Return a "resolver" func for the given known vars and dirnames. - - The func takes a single Symbol and returns a corresponding Variable. - If the symbol was located then the variable will be valid, populated - with the corresponding information. Otherwise None is returned. - """ - if knownvars: - knownvars = dict(knownvars) # a copy - def resolve_known(symbol): - found = _look_up_known(symbol, knownvars) - if found is None: - return None - elif symbol.funcname == UNKNOWN: - knownvars.pop(found.id) - elif not symbol.filename or symbol.filename == UNKNOWN: - knownvars.pop(found.id) - return found - if dirnames: - def resolve(symbol): - found = resolve_known(symbol) - if found is None: - return None - #return _from_source(symbol, dirnames) - else: - for dirname in dirnames: - if not dirname.endswith(os.path.sep): - dirname += os.path.sep - if found.filename.startswith(dirname): - break - else: - return None - return found - else: - resolve = resolve_known - elif dirnames: - def resolve(symbol): - return _from_source(symbol, dirnames) - else: - def resolve(symbol): - return None - return resolve - - -def symbols_to_variables(symbols, *, - resolve=(lambda s: look_up_known_symbol(s, None)), - ): - """Yield the variable the matches each given symbol. - - Use get_resolver() for a "resolve" func to use. - """ - for symbol in symbols: - if isinstance(symbol, info.Variable): - # XXX validate? - yield symbol - continue - if symbol.kind != Symbol.KIND.VARIABLE: - continue - resolved = resolve(symbol) - if resolved is None: - #raise NotImplementedError(symbol) - resolved = info.Variable( - id=symbol.id, - storage=UNKNOWN, - vartype=UNKNOWN, - ) - yield resolved diff --git a/Tools/c-analyzer/c_symbols/source.py b/Tools/c-analyzer/c_symbols/source.py deleted file mode 100644 index a7248104c94..00000000000 --- a/Tools/c-analyzer/c_symbols/source.py +++ /dev/null @@ -1,58 +0,0 @@ -from c_analyzer_common import files -from c_analyzer_common.info import UNKNOWN -from c_parser import declarations - - -# XXX need tests: -# * find_symbol() - -def find_symbol(name, dirnames, *, - _perfilecache, - _iter_files=files.iter_files_by_suffix, - **kwargs - ): - """Return (filename, funcname, vartype) for the matching Symbol.""" - filenames = _iter_files(dirnames, ('.c', '.h')) - return _find_symbol(name, filenames, _perfilecache, **kwargs) - - -def _get_symbols(filename, *, - _iter_variables=declarations.iter_variables, - ): - """Return the list of Symbols found in the given file.""" - symbols = {} - for funcname, name, vartype in _iter_variables(filename): - if not funcname: - continue - try: - instances = symbols[name] - except KeyError: - instances = symbols[name] = [] - instances.append((funcname, vartype)) - return symbols - - -def _find_symbol(name, filenames, _perfilecache, *, - _get_local_symbols=_get_symbols, - ): - for filename in filenames: - try: - symbols = _perfilecache[filename] - except KeyError: - symbols = _perfilecache[filename] = _get_local_symbols(filename) - - try: - instances = symbols[name] - except KeyError: - continue - - funcname, vartype = instances.pop(0) - if not instances: - symbols.pop(name) - return filename, funcname, vartype - else: - return UNKNOWN, UNKNOWN, UNKNOWN - - -def iter_symbols(): - raise NotImplementedError diff --git a/Tools/c-analyzer/c_globals/README b/Tools/c-analyzer/cpython/README similarity index 100% rename from Tools/c-analyzer/c_globals/README rename to Tools/c-analyzer/cpython/README diff --git a/Tools/c-analyzer/cpython/__init__.py b/Tools/c-analyzer/cpython/__init__.py new file mode 100644 index 00000000000..ae45b424e3c --- /dev/null +++ b/Tools/c-analyzer/cpython/__init__.py @@ -0,0 +1,29 @@ +import os.path +import sys + + +TOOL_ROOT = os.path.abspath( + os.path.dirname( # c-analyzer/ + os.path.dirname(__file__))) # cpython/ +DATA_DIR = TOOL_ROOT +REPO_ROOT = ( + os.path.dirname( # .. + os.path.dirname(TOOL_ROOT))) # Tools/ + +INCLUDE_DIRS = [os.path.join(REPO_ROOT, name) for name in [ + 'Include', + ]] +SOURCE_DIRS = [os.path.join(REPO_ROOT, name) for name in [ + 'Python', + 'Parser', + 'Objects', + 'Modules', + ]] + +#PYTHON = os.path.join(REPO_ROOT, 'python') +PYTHON = sys.executable + + +# Clean up the namespace. +del sys +del os diff --git a/Tools/c-analyzer/c_globals/__main__.py b/Tools/c-analyzer/cpython/__main__.py similarity index 68% rename from Tools/c-analyzer/c_globals/__main__.py rename to Tools/c-analyzer/cpython/__main__.py index 9570fb6a14c..6b0f9bcb968 100644 --- a/Tools/c-analyzer/c_globals/__main__.py +++ b/Tools/c-analyzer/cpython/__main__.py @@ -1,42 +1,42 @@ import argparse -import os.path import re import sys -from c_analyzer_common import SOURCE_DIRS, REPO_ROOT -from c_analyzer_common.info import UNKNOWN -from c_analyzer_common.known import ( +from c_analyzer.common import show +from c_analyzer.common.info import UNKNOWN + +from . import SOURCE_DIRS +from .find import supported_vars +from .known import ( from_file as known_from_file, DATA_FILE as KNOWN_FILE, ) -from . import find, show -from .supported import is_supported, ignored_from_file, IGNORED_FILE, _is_object - - -def _match_unused_global(variable, knownvars, used): - found = [] - for varid in knownvars: - if varid in used: - continue - if varid.funcname is not None: - continue - if varid.name != variable.name: - continue - if variable.filename and variable.filename != UNKNOWN: - if variable.filename == varid.filename: - found.append(varid) - else: - found.append(varid) - return found +from .supported import IGNORED_FILE def _check_results(unknown, knownvars, used): + def _match_unused_global(variable): + found = [] + for varid in knownvars: + if varid in used: + continue + if varid.funcname is not None: + continue + if varid.name != variable.name: + continue + if variable.filename and variable.filename != UNKNOWN: + if variable.filename == varid.filename: + found.append(varid) + else: + found.append(varid) + return found + badknown = set() for variable in sorted(unknown): msg = None if variable.funcname != UNKNOWN: msg = f'could not find global symbol {variable.id}' - elif m := _match_unused_global(variable, knownvars, used): + elif m := _match_unused_global(variable): assert isinstance(m, list) badknown.update(m) elif variable.name in ('completed', 'id'): # XXX Figure out where these variables are. @@ -65,32 +65,29 @@ def _check_results(unknown, knownvars, used): raise Exception('could not find all symbols') -def _find_globals(dirnames, known, ignored): - if dirnames == SOURCE_DIRS: - dirnames = [os.path.relpath(d, REPO_ROOT) for d in dirnames] - - ignored = ignored_from_file(ignored) - known = known_from_file(known) +# XXX Move this check to its own command. +def cmd_check_cache(cmd, *, + known=KNOWN_FILE, + ignored=IGNORED_FILE, + _known_from_file=known_from_file, + _find=supported_vars, + ): + known = _known_from_file(known) used = set() unknown = set() - knownvars = (known or {}).get('variables') - for variable in find.globals_from_binary(knownvars=knownvars, - dirnames=dirnames): - #for variable in find.globals(dirnames, known, kind='platform'): - if variable.vartype == UNKNOWN: - unknown.add(variable) + for var, supported in _find(known=known, ignored=ignored): + if supported is None: + unknown.add(var) continue - yield variable, is_supported(variable, ignored, known) - used.add(variable.id) - - #_check_results(unknown, knownvars, used) + used.add(var.id) + _check_results(unknown, known['variables'], used) -def cmd_check(cmd, dirs=SOURCE_DIRS, *, - ignored=IGNORED_FILE, +def cmd_check(cmd, *, known=KNOWN_FILE, - _find=_find_globals, + ignored=IGNORED_FILE, + _find=supported_vars, _show=show.basic, _print=print, ): @@ -100,7 +97,11 @@ def cmd_check(cmd, dirs=SOURCE_DIRS, *, In the failure case, the list of unsupported variables will be printed out. """ - unsupported = [v for v, s in _find(dirs, known, ignored) if not s] + unsupported = [] + for var, supported in _find(known=known, ignored=ignored): + if not supported: + unsupported.append(var) + if not unsupported: #_print('okay') return @@ -112,11 +113,11 @@ def cmd_check(cmd, dirs=SOURCE_DIRS, *, sys.exit(1) -def cmd_show(cmd, dirs=SOURCE_DIRS, *, - ignored=IGNORED_FILE, +def cmd_show(cmd, *, known=KNOWN_FILE, + ignored=IGNORED_FILE, skip_objects=False, - _find=_find_globals, + _find=supported_vars, _show=show.basic, _print=print, ): @@ -127,10 +128,12 @@ def cmd_show(cmd, dirs=SOURCE_DIRS, *, """ allsupported = [] allunsupported = [] - for found, supported in _find(dirs, known, ignored): - if skip_objects: # XXX Support proper filters instead. - if _is_object(found.vartype): - continue + for found, supported in _find(known=known, + ignored=ignored, + skip_objects=skip_objects, + ): + if supported is None: + continue (allsupported if supported else allunsupported ).append(found) @@ -165,9 +168,9 @@ def parse_args(prog=PROG, argv=sys.argv[1:], *, _fail=None): common.add_argument('--known', metavar='FILE', default=KNOWN_FILE, help='path to file that lists known types') - common.add_argument('dirs', metavar='DIR', nargs='*', - default=SOURCE_DIRS, - help='a directory to check') + #common.add_argument('dirs', metavar='DIR', nargs='*', + # default=SOURCE_DIRS, + # help='a directory to check') parser = argparse.ArgumentParser( prog=prog, diff --git a/Tools/c-analyzer/c_analyzer_common/_generate.py b/Tools/c-analyzer/cpython/_generate.py similarity index 97% rename from Tools/c-analyzer/c_analyzer_common/_generate.py rename to Tools/c-analyzer/cpython/_generate.py index 9b2fc9edb5c..4c340acf99e 100644 --- a/Tools/c-analyzer/c_analyzer_common/_generate.py +++ b/Tools/c-analyzer/cpython/_generate.py @@ -1,15 +1,16 @@ # The code here consists of hacks for pre-populating the known.tsv file. -from c_parser.preprocessor import _iter_clean_lines -from c_parser.naive import ( +from c_analyzer.parser.preprocessor import _iter_clean_lines +from c_analyzer.parser.naive import ( iter_variables, parse_variable_declaration, find_variables, ) -from c_parser.info import Variable +from c_analyzer.common.known import HEADER as KNOWN_HEADER +from c_analyzer.common.info import UNKNOWN, ID +from c_analyzer.variables import Variable +from c_analyzer.util import write_tsv from . import SOURCE_DIRS, REPO_ROOT -from .known import DATA_FILE as KNOWN_FILE, HEADER as KNOWN_HEADER -from .info import UNKNOWN, ID -from .util import write_tsv +from .known import DATA_FILE as KNOWN_FILE from .files import iter_cpython_files diff --git a/Tools/c-analyzer/cpython/files.py b/Tools/c-analyzer/cpython/files.py new file mode 100644 index 00000000000..543097af7bc --- /dev/null +++ b/Tools/c-analyzer/cpython/files.py @@ -0,0 +1,29 @@ +from c_analyzer.common.files import ( + C_SOURCE_SUFFIXES, walk_tree, iter_files_by_suffix, + ) + +from . import SOURCE_DIRS, REPO_ROOT + +# XXX need tests: +# * iter_files() + + +def iter_files(*, + walk=walk_tree, + _files=iter_files_by_suffix, + ): + """Yield each file in the tree for each of the given directory names.""" + excludedtrees = [ + os.path.join('Include', 'cpython', ''), + ] + def is_excluded(filename): + for root in excludedtrees: + if filename.startswith(root): + return True + return False + for filename in _files(SOURCE_DIRS, C_SOURCE_SUFFIXES, REPO_ROOT, + walk=walk, + ): + if is_excluded(filename): + continue + yield filename diff --git a/Tools/c-analyzer/cpython/find.py b/Tools/c-analyzer/cpython/find.py new file mode 100644 index 00000000000..a7bc0b477b8 --- /dev/null +++ b/Tools/c-analyzer/cpython/find.py @@ -0,0 +1,101 @@ +import os.path + +from c_analyzer.common import files +from c_analyzer.common.info import UNKNOWN, ID +from c_analyzer.variables import find as _common + +from . import SOURCE_DIRS, PYTHON, REPO_ROOT +from .known import ( + from_file as known_from_file, + DATA_FILE as KNOWN_FILE, + ) +from .supported import ( + ignored_from_file, IGNORED_FILE, is_supported, _is_object, + ) + +# XXX need tests: +# * vars_from_binary() +# * vars_from_source() +# * supported_vars() + + +def _handle_id(filename, funcname, name, *, + _relpath=os.path.relpath, + ): + filename = _relpath(filename, REPO_ROOT) + return ID(filename, funcname, name) + + +def vars_from_binary(*, + known=KNOWN_FILE, + _known_from_file=known_from_file, + _iter_files=files.iter_files_by_suffix, + _iter_vars=_common.vars_from_binary, + ): + """Yield a Variable for each found Symbol. + + Details are filled in from the given "known" variables and types. + """ + if isinstance(known, str): + known = _known_from_file(known) + dirnames = SOURCE_DIRS + suffixes = ('.c',) + filenames = _iter_files(dirnames, suffixes) + # XXX For now we only use known variables (no source lookup). + filenames = None + yield from _iter_vars(PYTHON, + known=known, + filenames=filenames, + handle_id=_handle_id, + check_filename=(lambda n: True), + ) + + +def vars_from_source(*, + preprocessed=None, + known=KNOWN_FILE, + _known_from_file=known_from_file, + _iter_files=files.iter_files_by_suffix, + _iter_vars=_common.vars_from_source, + ): + """Yield a Variable for each declaration in the raw source code. + + Details are filled in from the given "known" variables and types. + """ + if isinstance(known, str): + known = _known_from_file(known) + dirnames = SOURCE_DIRS + suffixes = ('.c',) + filenames = _iter_files(dirnames, suffixes) + yield from _iter_vars(filenames, + preprocessed=preprocessed, + known=known, + handle_id=_handle_id, + ) + + +def supported_vars(*, + known=KNOWN_FILE, + ignored=IGNORED_FILE, + skip_objects=False, + _known_from_file=known_from_file, + _ignored_from_file=ignored_from_file, + _iter_vars=vars_from_binary, + _is_supported=is_supported, + ): + """Yield (var, is supported) for each found variable.""" + if isinstance(known, str): + known = _known_from_file(known) + if isinstance(ignored, str): + ignored = _ignored_from_file(ignored) + + for var in _iter_vars(known=known): + if not var.isglobal: + continue + elif var.vartype == UNKNOWN: + yield var, None + # XXX Support proper filters instead. + elif skip_objects and _is_object(found.vartype): + continue + else: + yield var, _is_supported(var, ignored, known) diff --git a/Tools/c-analyzer/cpython/known.py b/Tools/c-analyzer/cpython/known.py new file mode 100644 index 00000000000..c3cc2c06026 --- /dev/null +++ b/Tools/c-analyzer/cpython/known.py @@ -0,0 +1,66 @@ +import csv +import os.path + +from c_analyzer.parser.declarations import extract_storage +from c_analyzer.variables import known as _common +from c_analyzer.variables.info import Variable + +from . import DATA_DIR + + +# XXX need tests: +# * from_file() +# * look_up_variable() + + +DATA_FILE = os.path.join(DATA_DIR, 'known.tsv') + + +def _get_storage(decl, infunc): + # statics + if decl.startswith(('Py_LOCAL(', 'Py_LOCAL_INLINE(')): + return 'static' + if decl.startswith(('_Py_IDENTIFIER(', '_Py_static_string(')): + return 'static' + if decl.startswith('PyDoc_VAR('): + return 'static' + if decl.startswith(('SLOT1BINFULL(', 'SLOT1BIN(')): + return 'static' + if decl.startswith('WRAP_METHOD('): + return 'static' + # public extern + if decl.startswith('PyAPI_DATA('): + return 'extern' + # Fall back to the normal handler. + return extract_storage(decl, infunc=infunc) + + +def _handle_var(varid, decl): +# if varid.name == 'id' and decl == UNKNOWN: +# # None of these are variables. +# decl = 'int id'; + storage = _get_storage(decl, varid.funcname) + return Variable(varid, storage, decl) + + +def from_file(infile=DATA_FILE, *, + _from_file=_common.from_file, + _handle_var=_handle_var, + ): + """Return the info for known declarations in the given file.""" + return _from_file(infile, handle_var=_handle_var) + + +def look_up_variable(varid, knownvars, *, + _lookup=_common.look_up_variable, + ): + """Return the known variable matching the given ID. + + "knownvars" is a mapping of ID to Variable. + + "match_files" is used to verify if two filenames point to + the same file. + + If no match is found then None is returned. + """ + return _lookup(varid, knownvars) diff --git a/Tools/c-analyzer/c_globals/supported.py b/Tools/c-analyzer/cpython/supported.py similarity index 97% rename from Tools/c-analyzer/c_globals/supported.py rename to Tools/c-analyzer/cpython/supported.py index d185daa2463..18786eefd8d 100644 --- a/Tools/c-analyzer/c_globals/supported.py +++ b/Tools/c-analyzer/cpython/supported.py @@ -1,9 +1,13 @@ import os.path import re -from c_analyzer_common import DATA_DIR -from c_analyzer_common.info import ID -from c_analyzer_common.util import read_tsv, write_tsv +from c_analyzer.common.info import ID +from c_analyzer.common.util import read_tsv, write_tsv + +from . import DATA_DIR + +# XXX need tests: +# * generate / script IGNORED_FILE = os.path.join(DATA_DIR, 'ignored.tsv') @@ -379,11 +383,12 @@ def _generate_ignored_file(variables, filename=None, *, if __name__ == '__main__': - from c_analyzer_common import SOURCE_DIRS - from c_analyzer_common.known import ( + from cpython import SOURCE_DIRS + from cpython.known import ( from_file as known_from_file, DATA_FILE as KNOWN_FILE, ) + # XXX This is wrong! from . import find known = known_from_file(KNOWN_FILE) knownvars = (known or {}).get('variables')