bpo-36876: Fix the C analyzer tool. (GH-22841)

The original tool wasn't working right and it was simpler to create a new one, partially re-using some of the old code. At this point the tool runs properly on the master. (Try: ./python Tools/c-analyzer/c-analyzer.py analyze.)  It take ~40 seconds on my machine to analyze the full CPython code base.

Note that we'll need to iron out some OS-specific stuff (e.g. preprocessor). We're okay though since this tool isn't used yet in our workflow. We will also need to verify the analysis results in detail before activating the check in CI, though I'm pretty sure it's close.

https://bugs.python.org/issue36876
This commit is contained in:
Eric Snow 2020-10-22 18:42:51 -06:00 committed by GitHub
parent ec388cfb4e
commit 345cd37abe
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
92 changed files with 8868 additions and 10539 deletions

View File

@ -1,15 +0,0 @@
import contextlib
import os.path
import test.test_tools
from test.support import load_package_tests
@contextlib.contextmanager
def tool_imports_for_tests():
test.test_tools.skip_if_missing('c-analyzer')
with test.test_tools.imports_under_tool('c-analyzer'):
yield
def load_tests(*args):
return load_package_tests(os.path.dirname(__file__), *args)

View File

@ -1,5 +0,0 @@
from . import load_tests
import unittest
unittest.main()

View File

@ -1,6 +0,0 @@
import os.path
from test.support import load_package_tests
def load_tests(*args):
return load_package_tests(os.path.dirname(__file__), *args)

View File

@ -1,470 +0,0 @@
import os.path
import unittest
from .. import tool_imports_for_tests
with tool_imports_for_tests():
from c_analyzer.common.files import (
iter_files, _walk_tree, glob_tree,
)
def fixpath(filename):
return filename.replace('/', os.path.sep)
class IterFilesTests(unittest.TestCase):
maxDiff = None
_return_walk = None
@property
def calls(self):
try:
return self._calls
except AttributeError:
self._calls = []
return self._calls
def set_files(self, *filesperroot):
roots = []
result = []
for root, files in filesperroot:
root = fixpath(root)
roots.append(root)
result.append([os.path.join(root, fixpath(f))
for f in files])
self._return_walk = result
return roots
def _walk(self, root, *, suffix=None, walk=None):
self.calls.append(('_walk', (root, suffix, walk)))
return iter(self._return_walk.pop(0))
def _glob(self, root, *, suffix=None):
self.calls.append(('_glob', (root, suffix)))
return iter(self._return_walk.pop(0))
def test_typical(self):
dirnames = self.set_files(
('spam', ['file1.c', 'file2.c']),
('eggs', ['ham/file3.h']),
)
suffixes = ('.c', '.h')
files = list(iter_files(dirnames, suffixes,
_glob=self._glob,
_walk=self._walk))
self.assertEqual(files, [
fixpath('spam/file1.c'),
fixpath('spam/file2.c'),
fixpath('eggs/ham/file3.h'),
])
self.assertEqual(self.calls, [
('_walk', ('spam', None, _walk_tree)),
('_walk', ('eggs', None, _walk_tree)),
])
def test_single_root(self):
self._return_walk = [
[fixpath('spam/file1.c'), fixpath('spam/file2.c')],
]
files = list(iter_files('spam', '.c',
_glob=self._glob,
_walk=self._walk))
self.assertEqual(files, [
fixpath('spam/file1.c'),
fixpath('spam/file2.c'),
])
self.assertEqual(self.calls, [
('_walk', ('spam', '.c', _walk_tree)),
])
def test_one_root(self):
self._return_walk = [
[fixpath('spam/file1.c'), fixpath('spam/file2.c')],
]
files = list(iter_files(['spam'], '.c',
_glob=self._glob,
_walk=self._walk))
self.assertEqual(files, [
fixpath('spam/file1.c'),
fixpath('spam/file2.c'),
])
self.assertEqual(self.calls, [
('_walk', ('spam', '.c', _walk_tree)),
])
def test_multiple_roots(self):
dirnames = self.set_files(
('spam', ['file1.c', 'file2.c']),
('eggs', ['ham/file3.c']),
)
files = list(iter_files(dirnames, '.c',
_glob=self._glob,
_walk=self._walk))
self.assertEqual(files, [
fixpath('spam/file1.c'),
fixpath('spam/file2.c'),
fixpath('eggs/ham/file3.c'),
])
self.assertEqual(self.calls, [
('_walk', ('spam', '.c', _walk_tree)),
('_walk', ('eggs', '.c', _walk_tree)),
])
def test_no_roots(self):
files = list(iter_files([], '.c',
_glob=self._glob,
_walk=self._walk))
self.assertEqual(files, [])
self.assertEqual(self.calls, [])
def test_single_suffix(self):
self._return_walk = [
[fixpath('spam/file1.c'),
fixpath('spam/eggs/file3.c'),
],
]
files = list(iter_files('spam', '.c',
_glob=self._glob,
_walk=self._walk))
self.assertEqual(files, [
fixpath('spam/file1.c'),
fixpath('spam/eggs/file3.c'),
])
self.assertEqual(self.calls, [
('_walk', ('spam', '.c', _walk_tree)),
])
def test_one_suffix(self):
self._return_walk = [
[fixpath('spam/file1.c'),
fixpath('spam/file1.h'),
fixpath('spam/file1.o'),
fixpath('spam/eggs/file3.c'),
],
]
files = list(iter_files('spam', ['.c'],
_glob=self._glob,
_walk=self._walk))
self.assertEqual(files, [
fixpath('spam/file1.c'),
fixpath('spam/eggs/file3.c'),
])
self.assertEqual(self.calls, [
('_walk', ('spam', None, _walk_tree)),
])
def test_multiple_suffixes(self):
self._return_walk = [
[fixpath('spam/file1.c'),
fixpath('spam/file1.h'),
fixpath('spam/file1.o'),
fixpath('spam/eggs/file3.c'),
],
]
files = list(iter_files('spam', ('.c', '.h'),
_glob=self._glob,
_walk=self._walk))
self.assertEqual(files, [
fixpath('spam/file1.c'),
fixpath('spam/file1.h'),
fixpath('spam/eggs/file3.c'),
])
self.assertEqual(self.calls, [
('_walk', ('spam', None, _walk_tree)),
])
def test_no_suffix(self):
expected = [fixpath('spam/file1.c'),
fixpath('spam/file1.h'),
fixpath('spam/file1.o'),
fixpath('spam/eggs/file3.c'),
]
for suffix in (None, '', ()):
with self.subTest(suffix):
self.calls.clear()
self._return_walk = [list(expected)]
files = list(iter_files('spam', suffix,
_glob=self._glob,
_walk=self._walk))
self.assertEqual(files, expected)
self.assertEqual(self.calls, [
('_walk', ('spam', suffix, _walk_tree)),
])
def test_relparent(self):
dirnames = self.set_files(
('/x/y/z/spam', ['file1.c', 'file2.c']),
('/x/y/z/eggs', ['ham/file3.c']),
)
files = list(iter_files(dirnames, '.c', fixpath('/x/y'),
_glob=self._glob,
_walk=self._walk))
self.assertEqual(files, [
fixpath('z/spam/file1.c'),
fixpath('z/spam/file2.c'),
fixpath('z/eggs/ham/file3.c'),
])
self.assertEqual(self.calls, [
('_walk', (fixpath('/x/y/z/spam'), '.c', _walk_tree)),
('_walk', (fixpath('/x/y/z/eggs'), '.c', _walk_tree)),
])
def test_glob(self):
dirnames = self.set_files(
('spam', ['file1.c', 'file2.c']),
('eggs', ['ham/file3.c']),
)
files = list(iter_files(dirnames, '.c',
get_files=glob_tree,
_walk=self._walk,
_glob=self._glob))
self.assertEqual(files, [
fixpath('spam/file1.c'),
fixpath('spam/file2.c'),
fixpath('eggs/ham/file3.c'),
])
self.assertEqual(self.calls, [
('_glob', ('spam', '.c')),
('_glob', ('eggs', '.c')),
])
def test_alt_walk_func(self):
dirnames = self.set_files(
('spam', ['file1.c', 'file2.c']),
('eggs', ['ham/file3.c']),
)
def get_files(root):
return None
files = list(iter_files(dirnames, '.c',
get_files=get_files,
_walk=self._walk,
_glob=self._glob))
self.assertEqual(files, [
fixpath('spam/file1.c'),
fixpath('spam/file2.c'),
fixpath('eggs/ham/file3.c'),
])
self.assertEqual(self.calls, [
('_walk', ('spam', '.c', get_files)),
('_walk', ('eggs', '.c', get_files)),
])
# def test_no_dirnames(self):
# dirnames = []
# filter_by_name = None
#
# files = list(iter_files(dirnames, filter_by_name,
# _walk=self._walk))
#
# self.assertEqual(files, [])
# self.assertEqual(self.calls, [])
#
# def test_no_filter(self):
# self._return_walk = [
# [('spam', (), ('file1', 'file2.c', 'file3.h', 'file4.o')),
# ],
# ]
# dirnames = [
# 'spam',
# ]
# filter_by_name = None
#
# files = list(iter_files(dirnames, filter_by_name,
# _walk=self._walk))
#
# self.assertEqual(files, [
# fixpath('spam/file1'),
# fixpath('spam/file2.c'),
# fixpath('spam/file3.h'),
# fixpath('spam/file4.o'),
# ])
# self.assertEqual(self.calls, [
# ('_walk', ('spam',)),
# ])
#
# def test_no_files(self):
# self._return_walk = [
# [('spam', (), ()),
# ],
# [(fixpath('eggs/ham'), (), ()),
# ],
# ]
# dirnames = [
# 'spam',
# fixpath('eggs/ham'),
# ]
# filter_by_name = None
#
# files = list(iter_files(dirnames, filter_by_name,
# _walk=self._walk))
#
# self.assertEqual(files, [])
# self.assertEqual(self.calls, [
# ('_walk', ('spam',)),
# ('_walk', (fixpath('eggs/ham'),)),
# ])
#
# def test_tree(self):
# self._return_walk = [
# [('spam', ('sub1', 'sub2', 'sub3'), ('file1',)),
# (fixpath('spam/sub1'), ('sub1sub1',), ('file2', 'file3')),
# (fixpath('spam/sub1/sub1sub1'), (), ('file4',)),
# (fixpath('spam/sub2'), (), ()),
# (fixpath('spam/sub3'), (), ('file5',)),
# ],
# [(fixpath('eggs/ham'), (), ('file6',)),
# ],
# ]
# dirnames = [
# 'spam',
# fixpath('eggs/ham'),
# ]
# filter_by_name = None
#
# files = list(iter_files(dirnames, filter_by_name,
# _walk=self._walk))
#
# self.assertEqual(files, [
# fixpath('spam/file1'),
# fixpath('spam/sub1/file2'),
# fixpath('spam/sub1/file3'),
# fixpath('spam/sub1/sub1sub1/file4'),
# fixpath('spam/sub3/file5'),
# fixpath('eggs/ham/file6'),
# ])
# self.assertEqual(self.calls, [
# ('_walk', ('spam',)),
# ('_walk', (fixpath('eggs/ham'),)),
# ])
#
# def test_filter_suffixes(self):
# self._return_walk = [
# [('spam', (), ('file1', 'file2.c', 'file3.h', 'file4.o')),
# ],
# ]
# dirnames = [
# 'spam',
# ]
# filter_by_name = ('.c', '.h')
#
# files = list(iter_files(dirnames, filter_by_name,
# _walk=self._walk))
#
# self.assertEqual(files, [
# fixpath('spam/file2.c'),
# fixpath('spam/file3.h'),
# ])
# self.assertEqual(self.calls, [
# ('_walk', ('spam',)),
# ])
#
# def test_some_filtered(self):
# self._return_walk = [
# [('spam', (), ('file1', 'file2', 'file3', 'file4')),
# ],
# ]
# dirnames = [
# 'spam',
# ]
# def filter_by_name(filename, results=[False, True, False, True]):
# self.calls.append(('filter_by_name', (filename,)))
# return results.pop(0)
#
# files = list(iter_files(dirnames, filter_by_name,
# _walk=self._walk))
#
# self.assertEqual(files, [
# fixpath('spam/file2'),
# fixpath('spam/file4'),
# ])
# self.assertEqual(self.calls, [
# ('_walk', ('spam',)),
# ('filter_by_name', ('file1',)),
# ('filter_by_name', ('file2',)),
# ('filter_by_name', ('file3',)),
# ('filter_by_name', ('file4',)),
# ])
#
# def test_none_filtered(self):
# self._return_walk = [
# [('spam', (), ('file1', 'file2', 'file3', 'file4')),
# ],
# ]
# dirnames = [
# 'spam',
# ]
# def filter_by_name(filename, results=[True, True, True, True]):
# self.calls.append(('filter_by_name', (filename,)))
# return results.pop(0)
#
# files = list(iter_files(dirnames, filter_by_name,
# _walk=self._walk))
#
# self.assertEqual(files, [
# fixpath('spam/file1'),
# fixpath('spam/file2'),
# fixpath('spam/file3'),
# fixpath('spam/file4'),
# ])
# self.assertEqual(self.calls, [
# ('_walk', ('spam',)),
# ('filter_by_name', ('file1',)),
# ('filter_by_name', ('file2',)),
# ('filter_by_name', ('file3',)),
# ('filter_by_name', ('file4',)),
# ])
#
# def test_all_filtered(self):
# self._return_walk = [
# [('spam', (), ('file1', 'file2', 'file3', 'file4')),
# ],
# ]
# dirnames = [
# 'spam',
# ]
# def filter_by_name(filename, results=[False, False, False, False]):
# self.calls.append(('filter_by_name', (filename,)))
# return results.pop(0)
#
# files = list(iter_files(dirnames, filter_by_name,
# _walk=self._walk))
#
# self.assertEqual(files, [])
# self.assertEqual(self.calls, [
# ('_walk', ('spam',)),
# ('filter_by_name', ('file1',)),
# ('filter_by_name', ('file2',)),
# ('filter_by_name', ('file3',)),
# ('filter_by_name', ('file4',)),
# ])

View File

@ -1,197 +0,0 @@
import string
import unittest
from ..util import PseudoStr, StrProxy, Object
from .. import tool_imports_for_tests
with tool_imports_for_tests():
from c_analyzer.common.info import (
UNKNOWN,
ID,
)
class IDTests(unittest.TestCase):
VALID_ARGS = (
'x/y/z/spam.c',
'func',
'eggs',
)
VALID_KWARGS = dict(zip(ID._fields, VALID_ARGS))
VALID_EXPECTED = VALID_ARGS
def test_from_raw(self):
tests = [
('', None),
(None, None),
('spam', (None, None, 'spam')),
(('spam',), (None, None, 'spam')),
(('x/y/z/spam.c', 'spam'), ('x/y/z/spam.c', None, 'spam')),
(self.VALID_ARGS, self.VALID_EXPECTED),
(self.VALID_KWARGS, self.VALID_EXPECTED),
]
for raw, expected in tests:
with self.subTest(raw):
id = ID.from_raw(raw)
self.assertEqual(id, expected)
def test_minimal(self):
id = ID(
filename=None,
funcname=None,
name='eggs',
)
self.assertEqual(id, (
None,
None,
'eggs',
))
def test_init_typical_global(self):
id = ID(
filename='x/y/z/spam.c',
funcname=None,
name='eggs',
)
self.assertEqual(id, (
'x/y/z/spam.c',
None,
'eggs',
))
def test_init_typical_local(self):
id = ID(
filename='x/y/z/spam.c',
funcname='func',
name='eggs',
)
self.assertEqual(id, (
'x/y/z/spam.c',
'func',
'eggs',
))
def test_init_all_missing(self):
for value in ('', None):
with self.subTest(repr(value)):
id = ID(
filename=value,
funcname=value,
name=value,
)
self.assertEqual(id, (
None,
None,
None,
))
def test_init_all_coerced(self):
tests = [
('str subclass',
dict(
filename=PseudoStr('x/y/z/spam.c'),
funcname=PseudoStr('func'),
name=PseudoStr('eggs'),
),
('x/y/z/spam.c',
'func',
'eggs',
)),
('non-str',
dict(
filename=StrProxy('x/y/z/spam.c'),
funcname=Object(),
name=('a', 'b', 'c'),
),
('x/y/z/spam.c',
'<object>',
"('a', 'b', 'c')",
)),
]
for summary, kwargs, expected in tests:
with self.subTest(summary):
id = ID(**kwargs)
for field in ID._fields:
value = getattr(id, field)
self.assertIs(type(value), str)
self.assertEqual(tuple(id), expected)
def test_iterable(self):
id = ID(**self.VALID_KWARGS)
filename, funcname, name = id
values = (filename, funcname, name)
for value, expected in zip(values, self.VALID_EXPECTED):
self.assertEqual(value, expected)
def test_fields(self):
id = ID('a', 'b', 'z')
self.assertEqual(id.filename, 'a')
self.assertEqual(id.funcname, 'b')
self.assertEqual(id.name, 'z')
def test_validate_typical(self):
id = ID(
filename='x/y/z/spam.c',
funcname='func',
name='eggs',
)
id.validate() # This does not fail.
def test_validate_missing_field(self):
for field in ID._fields:
with self.subTest(field):
id = ID(**self.VALID_KWARGS)
id = id._replace(**{field: None})
if field == 'funcname':
id.validate() # The field can be missing (not set).
id = id._replace(filename=None)
id.validate() # Both fields can be missing (not set).
continue
with self.assertRaises(TypeError):
id.validate()
def test_validate_bad_field(self):
badch = tuple(c for c in string.punctuation + string.digits)
notnames = (
'1a',
'a.b',
'a-b',
'&a',
'a++',
) + badch
tests = [
('filename', ()), # Any non-empty str is okay.
('funcname', notnames),
('name', notnames),
]
seen = set()
for field, invalid in tests:
for value in invalid:
seen.add(value)
with self.subTest(f'{field}={value!r}'):
id = ID(**self.VALID_KWARGS)
id = id._replace(**{field: value})
with self.assertRaises(ValueError):
id.validate()
for field, invalid in tests:
valid = seen - set(invalid)
for value in valid:
with self.subTest(f'{field}={value!r}'):
id = ID(**self.VALID_KWARGS)
id = id._replace(**{field: value})
id.validate() # This does not fail.

View File

@ -1,54 +0,0 @@
import unittest
from .. import tool_imports_for_tests
with tool_imports_for_tests():
from c_analyzer.variables import info
from c_analyzer.common.show import (
basic,
)
TYPICAL = [
info.Variable.from_parts('src1/spam.c', None, 'var1', 'static const char *'),
info.Variable.from_parts('src1/spam.c', 'ham', 'initialized', 'static int'),
info.Variable.from_parts('src1/spam.c', None, 'var2', 'static PyObject *'),
info.Variable.from_parts('src1/eggs.c', 'tofu', 'ready', 'static int'),
info.Variable.from_parts('src1/spam.c', None, 'freelist', 'static (PyTupleObject *)[10]'),
info.Variable.from_parts('src1/sub/ham.c', None, 'var1', 'static const char const *'),
info.Variable.from_parts('src2/jam.c', None, 'var1', 'static int'),
info.Variable.from_parts('src2/jam.c', None, 'var2', 'static MyObject *'),
info.Variable.from_parts('Include/spam.h', None, 'data', 'static const int'),
]
class BasicTests(unittest.TestCase):
maxDiff = None
def setUp(self):
self.lines = []
def print(self, line):
self.lines.append(line)
def test_typical(self):
basic(TYPICAL,
_print=self.print)
self.assertEqual(self.lines, [
'src1/spam.c:var1 static const char *',
'src1/spam.c:ham():initialized static int',
'src1/spam.c:var2 static PyObject *',
'src1/eggs.c:tofu():ready static int',
'src1/spam.c:freelist static (PyTupleObject *)[10]',
'src1/sub/ham.c:var1 static const char const *',
'src2/jam.c:var1 static int',
'src2/jam.c:var2 static MyObject *',
'Include/spam.h:data static const int',
])
def test_no_rows(self):
basic([],
_print=self.print)
self.assertEqual(self.lines, [])

View File

@ -1,6 +0,0 @@
import os.path
from test.support import load_package_tests
def load_tests(*args):
return load_package_tests(os.path.dirname(__file__), *args)

View File

@ -1,296 +0,0 @@
import sys
import unittest
from .. import tool_imports_for_tests
with tool_imports_for_tests():
from c_analyzer.variables import info
from cpython import SOURCE_DIRS
from cpython.supported import IGNORED_FILE
from cpython.known import DATA_FILE as KNOWN_FILE
from cpython.__main__ import (
cmd_check, cmd_show, parse_args, main,
)
TYPICAL = [
(info.Variable.from_parts('src1/spam.c', None, 'var1', 'const char *'),
True,
),
(info.Variable.from_parts('src1/spam.c', 'ham', 'initialized', 'int'),
True,
),
(info.Variable.from_parts('src1/spam.c', None, 'var2', 'PyObject *'),
False,
),
(info.Variable.from_parts('src1/eggs.c', 'tofu', 'ready', 'int'),
True,
),
(info.Variable.from_parts('src1/spam.c', None, 'freelist', '(PyTupleObject *)[10]'),
False,
),
(info.Variable.from_parts('src1/sub/ham.c', None, 'var1', 'const char const *'),
True,
),
(info.Variable.from_parts('src2/jam.c', None, 'var1', 'int'),
True,
),
(info.Variable.from_parts('src2/jam.c', None, 'var2', 'MyObject *'),
False,
),
(info.Variable.from_parts('Include/spam.h', None, 'data', 'const int'),
True,
),
]
class CMDBase(unittest.TestCase):
maxDiff = None
# _return_known_from_file = None
# _return_ignored_from_file = None
_return_find = ()
@property
def calls(self):
try:
return self._calls
except AttributeError:
self._calls = []
return self._calls
# def _known_from_file(self, *args):
# self.calls.append(('_known_from_file', args))
# return self._return_known_from_file or {}
#
# def _ignored_from_file(self, *args):
# self.calls.append(('_ignored_from_file', args))
# return self._return_ignored_from_file or {}
def _find(self, known, ignored, skip_objects=False):
self.calls.append(('_find', (known, ignored, skip_objects)))
return self._return_find
def _show(self, *args):
self.calls.append(('_show', args))
def _print(self, *args):
self.calls.append(('_print', args))
class CheckTests(CMDBase):
def test_defaults(self):
self._return_find = []
cmd_check('check',
_find=self._find,
_show=self._show,
_print=self._print,
)
self.assertEqual(
self.calls[0],
('_find', (KNOWN_FILE, IGNORED_FILE, False)),
)
def test_all_supported(self):
self._return_find = [(v, s) for v, s in TYPICAL if s]
dirs = ['src1', 'src2', 'Include']
cmd_check('check',
known='known.tsv',
ignored='ignored.tsv',
_find=self._find,
_show=self._show,
_print=self._print,
)
self.assertEqual(self.calls, [
('_find', ('known.tsv', 'ignored.tsv', False)),
#('_print', ('okay',)),
])
def test_some_unsupported(self):
self._return_find = TYPICAL
with self.assertRaises(SystemExit) as cm:
cmd_check('check',
known='known.tsv',
ignored='ignored.tsv',
_find=self._find,
_show=self._show,
_print=self._print,
)
unsupported = [v for v, s in TYPICAL if not s]
self.assertEqual(self.calls, [
('_find', ('known.tsv', 'ignored.tsv', False)),
('_print', ('ERROR: found unsupported global variables',)),
('_print', ()),
('_show', (sorted(unsupported),)),
('_print', (' (3 total)',)),
])
self.assertEqual(cm.exception.code, 1)
class ShowTests(CMDBase):
def test_defaults(self):
self._return_find = []
cmd_show('show',
_find=self._find,
_show=self._show,
_print=self._print,
)
self.assertEqual(
self.calls[0],
('_find', (KNOWN_FILE, IGNORED_FILE, False)),
)
def test_typical(self):
self._return_find = TYPICAL
cmd_show('show',
known='known.tsv',
ignored='ignored.tsv',
_find=self._find,
_show=self._show,
_print=self._print,
)
supported = [v for v, s in TYPICAL if s]
unsupported = [v for v, s in TYPICAL if not s]
self.assertEqual(self.calls, [
('_find', ('known.tsv', 'ignored.tsv', False)),
('_print', ('supported:',)),
('_print', ('----------',)),
('_show', (sorted(supported),)),
('_print', (' (6 total)',)),
('_print', ()),
('_print', ('unsupported:',)),
('_print', ('------------',)),
('_show', (sorted(unsupported),)),
('_print', (' (3 total)',)),
])
class ParseArgsTests(unittest.TestCase):
maxDiff = None
def test_no_args(self):
self.errmsg = None
def fail(msg):
self.errmsg = msg
sys.exit(msg)
with self.assertRaises(SystemExit):
parse_args('cg', [], _fail=fail)
self.assertEqual(self.errmsg, 'missing command')
def test_check_no_args(self):
cmd, cmdkwargs = parse_args('cg', [
'check',
])
self.assertEqual(cmd, 'check')
self.assertEqual(cmdkwargs, {
'ignored': IGNORED_FILE,
'known': KNOWN_FILE,
#'dirs': SOURCE_DIRS,
})
def test_check_full_args(self):
cmd, cmdkwargs = parse_args('cg', [
'check',
'--ignored', 'spam.tsv',
'--known', 'eggs.tsv',
#'dir1',
#'dir2',
#'dir3',
])
self.assertEqual(cmd, 'check')
self.assertEqual(cmdkwargs, {
'ignored': 'spam.tsv',
'known': 'eggs.tsv',
#'dirs': ['dir1', 'dir2', 'dir3']
})
def test_show_no_args(self):
cmd, cmdkwargs = parse_args('cg', [
'show',
])
self.assertEqual(cmd, 'show')
self.assertEqual(cmdkwargs, {
'ignored': IGNORED_FILE,
'known': KNOWN_FILE,
#'dirs': SOURCE_DIRS,
'skip_objects': False,
})
def test_show_full_args(self):
cmd, cmdkwargs = parse_args('cg', [
'show',
'--ignored', 'spam.tsv',
'--known', 'eggs.tsv',
#'dir1',
#'dir2',
#'dir3',
])
self.assertEqual(cmd, 'show')
self.assertEqual(cmdkwargs, {
'ignored': 'spam.tsv',
'known': 'eggs.tsv',
#'dirs': ['dir1', 'dir2', 'dir3'],
'skip_objects': False,
})
def new_stub_commands(*names):
calls = []
def cmdfunc(cmd, **kwargs):
calls.append((cmd, kwargs))
commands = {name: cmdfunc for name in names}
return commands, calls
class MainTests(unittest.TestCase):
def test_no_command(self):
with self.assertRaises(ValueError):
main(None, {})
def test_check(self):
commands, calls = new_stub_commands('check', 'show')
cmdkwargs = {
'ignored': 'spam.tsv',
'known': 'eggs.tsv',
'dirs': ['dir1', 'dir2', 'dir3'],
}
main('check', cmdkwargs, _COMMANDS=commands)
self.assertEqual(calls, [
('check', cmdkwargs),
])
def test_show(self):
commands, calls = new_stub_commands('check', 'show')
cmdkwargs = {
'ignored': 'spam.tsv',
'known': 'eggs.tsv',
'dirs': ['dir1', 'dir2', 'dir3'],
}
main('show', cmdkwargs, _COMMANDS=commands)
self.assertEqual(calls, [
('show', cmdkwargs),
])

View File

@ -1,34 +0,0 @@
import unittest
from .. import tool_imports_for_tests
with tool_imports_for_tests():
pass
class SelfCheckTests(unittest.TestCase):
@unittest.expectedFailure
def test_known(self):
# Make sure known macros & vartypes aren't hiding unknown local types.
# XXX finish!
raise NotImplementedError
@unittest.expectedFailure
def test_compare_nm_results(self):
# Make sure the "show" results match the statics found by "nm" command.
# XXX Skip if "nm" is not available.
# XXX finish!
raise NotImplementedError
class DummySourceTests(unittest.TestCase):
@unittest.expectedFailure
def test_check(self):
# XXX finish!
raise NotImplementedError
@unittest.expectedFailure
def test_show(self):
# XXX finish!
raise NotImplementedError

View File

@ -1,98 +0,0 @@
import re
import textwrap
import unittest
from .. import tool_imports_for_tests
with tool_imports_for_tests():
from c_analyzer.common.info import ID
from c_analyzer.variables.info import Variable
from cpython.supported import (
is_supported, ignored_from_file,
)
class IsSupportedTests(unittest.TestCase):
@unittest.expectedFailure
def test_supported(self):
statics = [
Variable('src1/spam.c', None, 'var1', 'const char *'),
Variable('src1/spam.c', None, 'var1', 'int'),
]
for static in statics:
with self.subTest(static):
result = is_supported(static)
self.assertTrue(result)
@unittest.expectedFailure
def test_not_supported(self):
statics = [
Variable('src1/spam.c', None, 'var1', 'PyObject *'),
Variable('src1/spam.c', None, 'var1', 'PyObject[10]'),
]
for static in statics:
with self.subTest(static):
result = is_supported(static)
self.assertFalse(result)
class IgnoredFromFileTests(unittest.TestCase):
maxDiff = None
_return_read_tsv = ()
@property
def calls(self):
try:
return self._calls
except AttributeError:
self._calls = []
return self._calls
def _read_tsv(self, *args):
self.calls.append(('_read_tsv', args))
return self._return_read_tsv
def test_typical(self):
lines = textwrap.dedent('''
filename funcname name kind reason
file1.c - var1 variable ...
file1.c func1 local1 variable |
file1.c - var2 variable ???
file1.c func2 local2 variable |
file2.c - var1 variable reasons
''').strip().splitlines()
lines = [re.sub(r'\s{1,8}', '\t', line, 4).replace('|', '')
for line in lines]
self._return_read_tsv = [tuple(v.strip() for v in line.split('\t'))
for line in lines[1:]]
ignored = ignored_from_file('spam.c', _read_tsv=self._read_tsv)
self.assertEqual(ignored, {
'variables': {
ID('file1.c', '', 'var1'): '...',
ID('file1.c', 'func1', 'local1'): '',
ID('file1.c', '', 'var2'): '???',
ID('file1.c', 'func2', 'local2'): '',
ID('file2.c', '', 'var1'): 'reasons',
},
})
self.assertEqual(self.calls, [
('_read_tsv', ('spam.c', 'filename\tfuncname\tname\tkind\treason')),
])
def test_empty(self):
self._return_read_tsv = []
ignored = ignored_from_file('spam.c', _read_tsv=self._read_tsv)
self.assertEqual(ignored, {
'variables': {},
})
self.assertEqual(self.calls, [
('_read_tsv', ('spam.c', 'filename\tfuncname\tname\tkind\treason')),
])

View File

@ -1,6 +0,0 @@
import os.path
from test.support import load_package_tests
def load_tests(*args):
return load_package_tests(os.path.dirname(__file__), *args)

View File

@ -1,795 +0,0 @@
import textwrap
import unittest
from .. import tool_imports_for_tests
with tool_imports_for_tests():
from c_analyzer.parser.declarations import (
iter_global_declarations, iter_local_statements,
parse_func, _parse_var, parse_compound,
iter_variables,
)
class TestCaseBase(unittest.TestCase):
maxDiff = None
@property
def calls(self):
try:
return self._calls
except AttributeError:
self._calls = []
return self._calls
class IterGlobalDeclarationsTests(TestCaseBase):
def test_functions(self):
tests = [
(textwrap.dedent('''
void func1() {
return;
}
'''),
textwrap.dedent('''
void func1() {
return;
}
''').strip(),
),
(textwrap.dedent('''
static unsigned int * _func1(
const char *arg1,
int *arg2
long long arg3
)
{
return _do_something(arg1, arg2, arg3);
}
'''),
textwrap.dedent('''
static unsigned int * _func1( const char *arg1, int *arg2 long long arg3 ) {
return _do_something(arg1, arg2, arg3);
}
''').strip(),
),
(textwrap.dedent('''
static PyObject *
_func1(const char *arg1, PyObject *arg2)
{
static int initialized = 0;
if (!initialized) {
initialized = 1;
_init(arg1);
}
PyObject *result = _do_something(arg1, arg2);
Py_INCREF(result);
return result;
}
'''),
textwrap.dedent('''
static PyObject * _func1(const char *arg1, PyObject *arg2) {
static int initialized = 0;
if (!initialized) {
initialized = 1;
_init(arg1);
}
PyObject *result = _do_something(arg1, arg2);
Py_INCREF(result);
return result;
}
''').strip(),
),
]
for lines, expected in tests:
body = textwrap.dedent(
expected.partition('{')[2].rpartition('}')[0]
).strip()
expected = (expected, body)
with self.subTest(lines):
lines = lines.splitlines()
stmts = list(iter_global_declarations(lines))
self.assertEqual(stmts, [expected])
@unittest.expectedFailure
def test_declarations(self):
tests = [
'int spam;',
'long long spam;',
'static const int const *spam;',
'int spam;',
'typedef int myint;',
'typedef PyObject * (*unaryfunc)(PyObject *);',
# typedef struct
# inline struct
# enum
# inline enum
]
for text in tests:
expected = (text,
' '.join(l.strip() for l in text.splitlines()))
with self.subTest(lines):
lines = lines.splitlines()
stmts = list(iter_global_declarations(lines))
self.assertEqual(stmts, [expected])
@unittest.expectedFailure
def test_declaration_multiple_vars(self):
lines = ['static const int const *spam, *ham=NULL, eggs = 3;']
stmts = list(iter_global_declarations(lines))
self.assertEqual(stmts, [
('static const int const *spam;', None),
('static const int *ham=NULL;', None),
('static const int eggs = 3;', None),
])
def test_mixed(self):
lines = textwrap.dedent('''
int spam;
static const char const *eggs;
PyObject * start(void) {
static int initialized = 0;
if (initialized) {
initialized = 1;
init();
}
return _start();
}
char* ham;
static int stop(char *reason) {
ham = reason;
return _stop();
}
''').splitlines()
expected = [
(textwrap.dedent('''
PyObject * start(void) {
static int initialized = 0;
if (initialized) {
initialized = 1;
init();
}
return _start();
}
''').strip(),
textwrap.dedent('''
static int initialized = 0;
if (initialized) {
initialized = 1;
init();
}
return _start();
''').strip(),
),
(textwrap.dedent('''
static int stop(char *reason) {
ham = reason;
return _stop();
}
''').strip(),
textwrap.dedent('''
ham = reason;
return _stop();
''').strip(),
),
]
stmts = list(iter_global_declarations(lines))
self.assertEqual(stmts, expected)
#self.assertEqual([stmt for stmt, _ in stmts],
# [stmt for stmt, _ in expected])
#self.assertEqual([body for _, body in stmts],
# [body for _, body in expected])
def test_no_statements(self):
lines = []
stmts = list(iter_global_declarations(lines))
self.assertEqual(stmts, [])
def test_bogus(self):
tests = [
(textwrap.dedent('''
int spam;
static const char const *eggs;
PyObject * start(void) {
static int initialized = 0;
if (initialized) {
initialized = 1;
init();
}
return _start();
}
char* ham;
static int _stop(void) {
// missing closing bracket
static int stop(char *reason) {
ham = reason;
return _stop();
}
'''),
[(textwrap.dedent('''
PyObject * start(void) {
static int initialized = 0;
if (initialized) {
initialized = 1;
init();
}
return _start();
}
''').strip(),
textwrap.dedent('''
static int initialized = 0;
if (initialized) {
initialized = 1;
init();
}
return _start();
''').strip(),
),
# Neither "stop()" nor "_stop()" are here.
],
),
]
for lines, expected in tests:
with self.subTest(lines):
lines = lines.splitlines()
stmts = list(iter_global_declarations(lines))
self.assertEqual(stmts, expected)
#self.assertEqual([stmt for stmt, _ in stmts],
# [stmt for stmt, _ in expected])
#self.assertEqual([body for _, body in stmts],
# [body for _, body in expected])
def test_ignore_comments(self):
tests = [
('// msg', None),
('// int stmt;', None),
(' // ... ', None),
('// /*', None),
('/* int stmt; */', None),
("""
/**
* ...
* int stmt;
*/
""", None),
]
for lines, expected in tests:
with self.subTest(lines):
lines = lines.splitlines()
stmts = list(iter_global_declarations(lines))
self.assertEqual(stmts, [expected] if expected else [])
class IterLocalStatementsTests(TestCaseBase):
def test_vars(self):
tests = [
# POTS
'int spam;',
'unsigned int spam;',
'char spam;',
'float spam;',
# typedefs
'uint spam;',
'MyType spam;',
# complex
'struct myspam spam;',
'union choice spam;',
# inline struct
# inline union
# enum?
]
# pointers
tests.extend([
# POTS
'int * spam;',
'unsigned int * spam;',
'char *spam;',
'char const *spam = "spamspamspam...";',
# typedefs
'MyType *spam;',
# complex
'struct myspam *spam;',
'union choice *spam;',
# packed with details
'const char const *spam;',
# void pointer
'void *data = NULL;',
# function pointers
'int (* func)(char *arg1);',
'char * (* func)(void);',
])
# storage class
tests.extend([
'static int spam;',
'extern int spam;',
'static unsigned int spam;',
'static struct myspam spam;',
])
# type qualifier
tests.extend([
'const int spam;',
'const unsigned int spam;',
'const struct myspam spam;',
])
# combined
tests.extend([
'const char *spam = eggs;',
'static const char const *spam = "spamspamspam...";',
'extern const char const *spam;',
'static void *data = NULL;',
'static int (const * func)(char *arg1) = func1;',
'static char * (* func)(void);',
])
for line in tests:
expected = line
with self.subTest(line):
stmts = list(iter_local_statements([line]))
self.assertEqual(stmts, [(expected, None)])
@unittest.expectedFailure
def test_vars_multiline_var(self):
lines = textwrap.dedent('''
PyObject *
spam
= NULL;
''').splitlines()
expected = 'PyObject * spam = NULL;'
stmts = list(iter_local_statements(lines))
self.assertEqual(stmts, [(expected, None)])
@unittest.expectedFailure
def test_declaration_multiple_vars(self):
lines = ['static const int const *spam, *ham=NULL, ham2[]={1, 2, 3}, ham3[2]={1, 2}, eggs = 3;']
stmts = list(iter_global_declarations(lines))
self.assertEqual(stmts, [
('static const int const *spam;', None),
('static const int *ham=NULL;', None),
('static const int ham[]={1, 2, 3};', None),
('static const int ham[2]={1, 2};', None),
('static const int eggs = 3;', None),
])
@unittest.expectedFailure
def test_other_simple(self):
raise NotImplementedError
@unittest.expectedFailure
def test_compound(self):
raise NotImplementedError
@unittest.expectedFailure
def test_mixed(self):
raise NotImplementedError
def test_no_statements(self):
lines = []
stmts = list(iter_local_statements(lines))
self.assertEqual(stmts, [])
@unittest.expectedFailure
def test_bogus(self):
raise NotImplementedError
def test_ignore_comments(self):
tests = [
('// msg', None),
('// int stmt;', None),
(' // ... ', None),
('// /*', None),
('/* int stmt; */', None),
("""
/**
* ...
* int stmt;
*/
""", None),
# mixed with statements
('int stmt; // ...', ('int stmt;', None)),
( 'int stmt; /* ... */', ('int stmt;', None)),
( '/* ... */ int stmt;', ('int stmt;', None)),
]
for lines, expected in tests:
with self.subTest(lines):
lines = lines.splitlines()
stmts = list(iter_local_statements(lines))
self.assertEqual(stmts, [expected] if expected else [])
class ParseFuncTests(TestCaseBase):
def test_typical(self):
tests = [
('PyObject *\nspam(char *a)\n{\nreturn _spam(a);\n}',
'return _spam(a);',
('spam', 'PyObject * spam(char *a)'),
),
]
for stmt, body, expected in tests:
with self.subTest(stmt):
name, signature = parse_func(stmt, body)
self.assertEqual((name, signature), expected)
class ParseVarTests(TestCaseBase):
def test_typical(self):
tests = [
# POTS
('int spam;', ('spam', 'int')),
('unsigned int spam;', ('spam', 'unsigned int')),
('char spam;', ('spam', 'char')),
('float spam;', ('spam', 'float')),
# typedefs
('uint spam;', ('spam', 'uint')),
('MyType spam;', ('spam', 'MyType')),
# complex
('struct myspam spam;', ('spam', 'struct myspam')),
('union choice spam;', ('spam', 'union choice')),
# inline struct
# inline union
# enum?
]
# pointers
tests.extend([
# POTS
('int * spam;', ('spam', 'int *')),
('unsigned int * spam;', ('spam', 'unsigned int *')),
('char *spam;', ('spam', 'char *')),
('char const *spam = "spamspamspam...";', ('spam', 'char const *')),
# typedefs
('MyType *spam;', ('spam', 'MyType *')),
# complex
('struct myspam *spam;', ('spam', 'struct myspam *')),
('union choice *spam;', ('spam', 'union choice *')),
# packed with details
('const char const *spam;', ('spam', 'const char const *')),
# void pointer
('void *data = NULL;', ('data', 'void *')),
# function pointers
('int (* func)(char *);', ('func', 'int (*)(char *)')),
('char * (* func)(void);', ('func', 'char * (*)(void)')),
])
# storage class
tests.extend([
('static int spam;', ('spam', 'static int')),
('extern int spam;', ('spam', 'extern int')),
('static unsigned int spam;', ('spam', 'static unsigned int')),
('static struct myspam spam;', ('spam', 'static struct myspam')),
])
# type qualifier
tests.extend([
('const int spam;', ('spam', 'const int')),
('const unsigned int spam;', ('spam', 'const unsigned int')),
('const struct myspam spam;', ('spam', 'const struct myspam')),
])
# combined
tests.extend([
('const char *spam = eggs;', ('spam', 'const char *')),
('static const char const *spam = "spamspamspam...";',
('spam', 'static const char const *')),
('extern const char const *spam;',
('spam', 'extern const char const *')),
('static void *data = NULL;', ('data', 'static void *')),
('static int (const * func)(char *) = func1;',
('func', 'static int (const *)(char *)')),
('static char * (* func)(void);',
('func', 'static char * (*)(void)')),
])
for stmt, expected in tests:
with self.subTest(stmt):
name, vartype = _parse_var(stmt)
self.assertEqual((name, vartype), expected)
@unittest.skip('not finished')
class ParseCompoundTests(TestCaseBase):
def test_typical(self):
headers, bodies = parse_compound(stmt, blocks)
...
class IterVariablesTests(TestCaseBase):
_return_iter_source_lines = None
_return_iter_global = None
_return_iter_local = None
_return_parse_func = None
_return_parse_var = None
_return_parse_compound = None
def _iter_source_lines(self, filename):
self.calls.append(
('_iter_source_lines', (filename,)))
return self._return_iter_source_lines.splitlines()
def _iter_global(self, lines):
self.calls.append(
('_iter_global', (lines,)))
try:
return self._return_iter_global.pop(0)
except IndexError:
return ('???', None)
def _iter_local(self, lines):
self.calls.append(
('_iter_local', (lines,)))
try:
return self._return_iter_local.pop(0)
except IndexError:
return ('???', None)
def _parse_func(self, stmt, body):
self.calls.append(
('_parse_func', (stmt, body)))
try:
return self._return_parse_func.pop(0)
except IndexError:
return ('???', '???')
def _parse_var(self, lines):
self.calls.append(
('_parse_var', (lines,)))
try:
return self._return_parse_var.pop(0)
except IndexError:
return ('???', '???')
def _parse_compound(self, stmt, blocks):
self.calls.append(
('_parse_compound', (stmt, blocks)))
try:
return self._return_parse_compound.pop(0)
except IndexError:
return (['???'], ['???'])
def test_empty_file(self):
self._return_iter_source_lines = ''
self._return_iter_global = [
[],
]
self._return_parse_func = None
self._return_parse_var = None
self._return_parse_compound = None
srcvars = list(iter_variables('spam.c',
_iter_source_lines=self._iter_source_lines,
_iter_global=self._iter_global,
_iter_local=self._iter_local,
_parse_func=self._parse_func,
_parse_var=self._parse_var,
_parse_compound=self._parse_compound,
))
self.assertEqual(srcvars, [])
self.assertEqual(self.calls, [
('_iter_source_lines', ('spam.c',)),
('_iter_global', ([],)),
])
def test_no_statements(self):
content = textwrap.dedent('''
...
''')
self._return_iter_source_lines = content
self._return_iter_global = [
[],
]
self._return_parse_func = None
self._return_parse_var = None
self._return_parse_compound = None
srcvars = list(iter_variables('spam.c',
_iter_source_lines=self._iter_source_lines,
_iter_global=self._iter_global,
_iter_local=self._iter_local,
_parse_func=self._parse_func,
_parse_var=self._parse_var,
_parse_compound=self._parse_compound,
))
self.assertEqual(srcvars, [])
self.assertEqual(self.calls, [
('_iter_source_lines', ('spam.c',)),
('_iter_global', (content.splitlines(),)),
])
def test_typical(self):
content = textwrap.dedent('''
...
''')
self._return_iter_source_lines = content
self._return_iter_global = [
[('<lines 1>', None), # var1
('<lines 2>', None), # non-var
('<lines 3>', None), # var2
('<lines 4>', '<body 1>'), # func1
('<lines 9>', None), # var4
],
]
self._return_iter_local = [
# func1
[('<lines 5>', None), # var3
('<lines 6>', [('<header 1>', '<block 1>')]), # if
('<lines 8>', None), # non-var
],
# if
[('<lines 7>', None), # var2 ("collision" with global var)
],
]
self._return_parse_func = [
('func1', '<sig 1>'),
]
self._return_parse_var = [
('var1', '<vartype 1>'),
(None, None),
('var2', '<vartype 2>'),
('var3', '<vartype 3>'),
('var2', '<vartype 2b>'),
('var4', '<vartype 4>'),
(None, None),
(None, None),
(None, None),
('var5', '<vartype 5>'),
]
self._return_parse_compound = [
([[
'if (',
'<simple>',
')',
],
],
['<block 1>']),
]
srcvars = list(iter_variables('spam.c',
_iter_source_lines=self._iter_source_lines,
_iter_global=self._iter_global,
_iter_local=self._iter_local,
_parse_func=self._parse_func,
_parse_var=self._parse_var,
_parse_compound=self._parse_compound,
))
self.assertEqual(srcvars, [
(None, 'var1', '<vartype 1>'),
(None, 'var2', '<vartype 2>'),
('func1', 'var3', '<vartype 3>'),
('func1', 'var2', '<vartype 2b>'),
('func1', 'var4', '<vartype 4>'),
(None, 'var5', '<vartype 5>'),
])
self.assertEqual(self.calls, [
('_iter_source_lines', ('spam.c',)),
('_iter_global', (content.splitlines(),)),
('_parse_var', ('<lines 1>',)),
('_parse_var', ('<lines 2>',)),
('_parse_var', ('<lines 3>',)),
('_parse_func', ('<lines 4>', '<body 1>')),
('_iter_local', (['<body 1>'],)),
('_parse_var', ('<lines 5>',)),
('_parse_compound', ('<lines 6>', [('<header 1>', '<block 1>')])),
('_parse_var', ('if (',)),
('_parse_var', ('<simple>',)),
('_parse_var', (')',)),
('_parse_var', ('<lines 8>',)),
('_iter_local', (['<block 1>'],)),
('_parse_var', ('<lines 7>',)),
('_parse_var', ('<lines 9>',)),
])
def test_no_locals(self):
content = textwrap.dedent('''
...
''')
self._return_iter_source_lines = content
self._return_iter_global = [
[('<lines 1>', None), # var1
('<lines 2>', None), # non-var
('<lines 3>', None), # var2
('<lines 4>', '<body 1>'), # func1
],
]
self._return_iter_local = [
# func1
[('<lines 5>', None), # non-var
('<lines 6>', [('<header 1>', '<block 1>')]), # if
('<lines 8>', None), # non-var
],
# if
[('<lines 7>', None), # non-var
],
]
self._return_parse_func = [
('func1', '<sig 1>'),
]
self._return_parse_var = [
('var1', '<vartype 1>'),
(None, None),
('var2', '<vartype 2>'),
(None, None),
(None, None),
(None, None),
(None, None),
(None, None),
(None, None),
]
self._return_parse_compound = [
([[
'if (',
'<simple>',
')',
],
],
['<block 1>']),
]
srcvars = list(iter_variables('spam.c',
_iter_source_lines=self._iter_source_lines,
_iter_global=self._iter_global,
_iter_local=self._iter_local,
_parse_func=self._parse_func,
_parse_var=self._parse_var,
_parse_compound=self._parse_compound,
))
self.assertEqual(srcvars, [
(None, 'var1', '<vartype 1>'),
(None, 'var2', '<vartype 2>'),
])
self.assertEqual(self.calls, [
('_iter_source_lines', ('spam.c',)),
('_iter_global', (content.splitlines(),)),
('_parse_var', ('<lines 1>',)),
('_parse_var', ('<lines 2>',)),
('_parse_var', ('<lines 3>',)),
('_parse_func', ('<lines 4>', '<body 1>')),
('_iter_local', (['<body 1>'],)),
('_parse_var', ('<lines 5>',)),
('_parse_compound', ('<lines 6>', [('<header 1>', '<block 1>')])),
('_parse_var', ('if (',)),
('_parse_var', ('<simple>',)),
('_parse_var', (')',)),
('_parse_var', ('<lines 8>',)),
('_iter_local', (['<block 1>'],)),
('_parse_var', ('<lines 7>',)),
])

View File

@ -1,6 +0,0 @@
import os.path
from test.support import load_package_tests
def load_tests(*args):
return load_package_tests(os.path.dirname(__file__), *args)

View File

@ -1,192 +0,0 @@
import string
import unittest
from ..util import PseudoStr, StrProxy, Object
from .. import tool_imports_for_tests
with tool_imports_for_tests():
from c_analyzer.common.info import ID
from c_analyzer.symbols.info import Symbol
class SymbolTests(unittest.TestCase):
VALID_ARGS = (
ID('x/y/z/spam.c', 'func', 'eggs'),
Symbol.KIND.VARIABLE,
False,
)
VALID_KWARGS = dict(zip(Symbol._fields, VALID_ARGS))
VALID_EXPECTED = VALID_ARGS
def test_init_typical_binary_local(self):
id = ID(None, None, 'spam')
symbol = Symbol(
id=id,
kind=Symbol.KIND.VARIABLE,
external=False,
)
self.assertEqual(symbol, (
id,
Symbol.KIND.VARIABLE,
False,
))
def test_init_typical_binary_global(self):
id = ID('Python/ceval.c', None, 'spam')
symbol = Symbol(
id=id,
kind=Symbol.KIND.VARIABLE,
external=False,
)
self.assertEqual(symbol, (
id,
Symbol.KIND.VARIABLE,
False,
))
def test_init_coercion(self):
tests = [
('str subclass',
dict(
id=PseudoStr('eggs'),
kind=PseudoStr('variable'),
external=0,
),
(ID(None, None, 'eggs'),
Symbol.KIND.VARIABLE,
False,
)),
('with filename',
dict(
id=('x/y/z/spam.c', 'eggs'),
kind=PseudoStr('variable'),
external=0,
),
(ID('x/y/z/spam.c', None, 'eggs'),
Symbol.KIND.VARIABLE,
False,
)),
('non-str 1',
dict(
id=('a', 'b', 'c'),
kind=StrProxy('variable'),
external=0,
),
(ID('a', 'b', 'c'),
Symbol.KIND.VARIABLE,
False,
)),
('non-str 2',
dict(
id=('a', 'b', 'c'),
kind=Object(),
external=0,
),
(ID('a', 'b', 'c'),
'<object>',
False,
)),
]
for summary, kwargs, expected in tests:
with self.subTest(summary):
symbol = Symbol(**kwargs)
for field in Symbol._fields:
value = getattr(symbol, field)
if field == 'external':
self.assertIs(type(value), bool)
elif field == 'id':
self.assertIs(type(value), ID)
else:
self.assertIs(type(value), str)
self.assertEqual(tuple(symbol), expected)
def test_init_all_missing(self):
id = ID(None, None, 'spam')
symbol = Symbol(id)
self.assertEqual(symbol, (
id,
Symbol.KIND.VARIABLE,
None,
))
def test_fields(self):
id = ID('z', 'x', 'a')
symbol = Symbol(id, 'b', False)
self.assertEqual(symbol.id, id)
self.assertEqual(symbol.kind, 'b')
self.assertIs(symbol.external, False)
def test___getattr__(self):
id = ID('z', 'x', 'a')
symbol = Symbol(id, 'b', False)
filename = symbol.filename
funcname = symbol.funcname
name = symbol.name
self.assertEqual(filename, 'z')
self.assertEqual(funcname, 'x')
self.assertEqual(name, 'a')
def test_validate_typical(self):
id = ID('z', 'x', 'a')
symbol = Symbol(
id=id,
kind=Symbol.KIND.VARIABLE,
external=False,
)
symbol.validate() # This does not fail.
def test_validate_missing_field(self):
for field in Symbol._fields:
with self.subTest(field):
symbol = Symbol(**self.VALID_KWARGS)
symbol = symbol._replace(**{field: None})
with self.assertRaises(TypeError):
symbol.validate()
def test_validate_bad_field(self):
badch = tuple(c for c in string.punctuation + string.digits)
notnames = (
'1a',
'a.b',
'a-b',
'&a',
'a++',
) + badch
tests = [
('id', notnames),
('kind', ('bogus',)),
]
seen = set()
for field, invalid in tests:
for value in invalid:
if field != 'kind':
seen.add(value)
with self.subTest(f'{field}={value!r}'):
symbol = Symbol(**self.VALID_KWARGS)
symbol = symbol._replace(**{field: value})
with self.assertRaises(ValueError):
symbol.validate()
for field, invalid in tests:
if field == 'kind':
continue
valid = seen - set(invalid)
for value in valid:
with self.subTest(f'{field}={value!r}'):
symbol = Symbol(**self.VALID_KWARGS)
symbol = symbol._replace(**{field: value})
symbol.validate() # This does not fail.

View File

@ -1,6 +0,0 @@
import os.path
from test.support import load_package_tests
def load_tests(*args):
return load_package_tests(os.path.dirname(__file__), *args)

View File

@ -1,124 +0,0 @@
import unittest
from .. import tool_imports_for_tests
with tool_imports_for_tests():
from c_analyzer.variables import info
from c_analyzer.variables.find import (
vars_from_binary,
)
class _Base(unittest.TestCase):
maxDiff = None
@property
def calls(self):
try:
return self._calls
except AttributeError:
self._calls = []
return self._calls
class VarsFromBinaryTests(_Base):
_return_iter_vars = ()
_return_get_symbol_resolver = None
def setUp(self):
super().setUp()
self.kwargs = dict(
_iter_vars=self._iter_vars,
_get_symbol_resolver=self._get_symbol_resolver,
)
def _iter_vars(self, binfile, resolve, handle_id):
self.calls.append(('_iter_vars', (binfile, resolve, handle_id)))
return [(v, v.id) for v in self._return_iter_vars]
def _get_symbol_resolver(self, known=None, dirnames=(), *,
handle_var,
filenames=None,
check_filename=None,
perfilecache=None,
):
self.calls.append(('_get_symbol_resolver',
(known, dirnames, handle_var, filenames,
check_filename, perfilecache)))
return self._return_get_symbol_resolver
def test_typical(self):
resolver = self._return_get_symbol_resolver = object()
variables = self._return_iter_vars = [
info.Variable.from_parts('dir1/spam.c', None, 'var1', 'int'),
info.Variable.from_parts('dir1/spam.c', None, 'var2', 'static int'),
info.Variable.from_parts('dir1/spam.c', None, 'var3', 'char *'),
info.Variable.from_parts('dir1/spam.c', 'func2', 'var4', 'const char *'),
info.Variable.from_parts('dir1/eggs.c', None, 'var1', 'static int'),
info.Variable.from_parts('dir1/eggs.c', 'func1', 'var2', 'static char *'),
]
known = object()
filenames = object()
found = list(vars_from_binary('python',
known=known,
filenames=filenames,
**self.kwargs))
self.assertEqual(found, [
info.Variable.from_parts('dir1/spam.c', None, 'var1', 'int'),
info.Variable.from_parts('dir1/spam.c', None, 'var2', 'static int'),
info.Variable.from_parts('dir1/spam.c', None, 'var3', 'char *'),
info.Variable.from_parts('dir1/spam.c', 'func2', 'var4', 'const char *'),
info.Variable.from_parts('dir1/eggs.c', None, 'var1', 'static int'),
info.Variable.from_parts('dir1/eggs.c', 'func1', 'var2', 'static char *'),
])
self.assertEqual(self.calls, [
('_get_symbol_resolver', (filenames, known, info.Variable.from_id, None, None, {})),
('_iter_vars', ('python', resolver, None)),
])
# self._return_iter_symbols = [
# s_info.Symbol(('dir1/spam.c', None, 'var1'), 'variable', False),
# s_info.Symbol(('dir1/spam.c', None, 'var2'), 'variable', False),
# s_info.Symbol(('dir1/spam.c', None, 'func1'), 'function', False),
# s_info.Symbol(('dir1/spam.c', None, 'func2'), 'function', True),
# s_info.Symbol(('dir1/spam.c', None, 'var3'), 'variable', False),
# s_info.Symbol(('dir1/spam.c', 'func2', 'var4'), 'variable', False),
# s_info.Symbol(('dir1/ham.c', None, 'var1'), 'variable', True),
# s_info.Symbol(('dir1/eggs.c', None, 'var1'), 'variable', False),
# s_info.Symbol(('dir1/eggs.c', None, 'xyz'), 'other', False),
# s_info.Symbol(('dir1/eggs.c', '???', 'var2'), 'variable', False),
# s_info.Symbol(('???', None, 'var_x'), 'variable', False),
# s_info.Symbol(('???', '???', 'var_y'), 'variable', False),
# s_info.Symbol((None, None, '???'), 'other', False),
# ]
# known = object()
#
# vars_from_binary('python', knownvars=known, **this.kwargs)
# found = list(globals_from_symbols(['dir1'], self.iter_symbols))
#
# self.assertEqual(found, [
# info.Variable.from_parts('dir1/spam.c', None, 'var1', '???'),
# info.Variable.from_parts('dir1/spam.c', None, 'var2', '???'),
# info.Variable.from_parts('dir1/spam.c', None, 'var3', '???'),
# info.Variable.from_parts('dir1/spam.c', 'func2', 'var4', '???'),
# info.Variable.from_parts('dir1/eggs.c', None, 'var1', '???'),
# ])
# self.assertEqual(self.calls, [
# ('iter_symbols', (['dir1'],)),
# ])
#
# def test_no_symbols(self):
# self._return_iter_symbols = []
#
# found = list(globals_from_symbols(['dir1'], self.iter_symbols))
#
# self.assertEqual(found, [])
# self.assertEqual(self.calls, [
# ('iter_symbols', (['dir1'],)),
# ])
# XXX need functional test

View File

@ -1,244 +0,0 @@
import string
import unittest
from ..util import PseudoStr, StrProxy, Object
from .. import tool_imports_for_tests
with tool_imports_for_tests():
from c_analyzer.common.info import UNKNOWN, ID
from c_analyzer.variables.info import (
normalize_vartype, Variable
)
class NormalizeVartypeTests(unittest.TestCase):
def test_basic(self):
tests = [
(None, None),
('', ''),
('int', 'int'),
(PseudoStr('int'), 'int'),
(StrProxy('int'), 'int'),
]
for vartype, expected in tests:
with self.subTest(vartype):
normalized = normalize_vartype(vartype)
self.assertEqual(normalized, expected)
class VariableTests(unittest.TestCase):
VALID_ARGS = (
('x/y/z/spam.c', 'func', 'eggs'),
'static',
'int',
)
VALID_KWARGS = dict(zip(Variable._fields, VALID_ARGS))
VALID_EXPECTED = VALID_ARGS
def test_init_typical_global(self):
for storage in ('static', 'extern', 'implicit'):
with self.subTest(storage):
static = Variable(
id=ID(
filename='x/y/z/spam.c',
funcname=None,
name='eggs',
),
storage=storage,
vartype='int',
)
self.assertEqual(static, (
('x/y/z/spam.c', None, 'eggs'),
storage,
'int',
))
def test_init_typical_local(self):
for storage in ('static', 'local'):
with self.subTest(storage):
static = Variable(
id=ID(
filename='x/y/z/spam.c',
funcname='func',
name='eggs',
),
storage=storage,
vartype='int',
)
self.assertEqual(static, (
('x/y/z/spam.c', 'func', 'eggs'),
storage,
'int',
))
def test_init_all_missing(self):
for value in ('', None):
with self.subTest(repr(value)):
static = Variable(
id=value,
storage=value,
vartype=value,
)
self.assertEqual(static, (
None,
None,
None,
))
def test_init_all_coerced(self):
id = ID('x/y/z/spam.c', 'func', 'spam')
tests = [
('str subclass',
dict(
id=(
PseudoStr('x/y/z/spam.c'),
PseudoStr('func'),
PseudoStr('spam'),
),
storage=PseudoStr('static'),
vartype=PseudoStr('int'),
),
(id,
'static',
'int',
)),
('non-str 1',
dict(
id=id,
storage=Object(),
vartype=Object(),
),
(id,
'<object>',
'<object>',
)),
('non-str 2',
dict(
id=id,
storage=StrProxy('static'),
vartype=StrProxy('variable'),
),
(id,
'static',
'variable',
)),
('non-str',
dict(
id=id,
storage=('a', 'b', 'c'),
vartype=('x', 'y', 'z'),
),
(id,
"('a', 'b', 'c')",
"('x', 'y', 'z')",
)),
]
for summary, kwargs, expected in tests:
with self.subTest(summary):
static = Variable(**kwargs)
for field in Variable._fields:
value = getattr(static, field)
if field == 'id':
self.assertIs(type(value), ID)
else:
self.assertIs(type(value), str)
self.assertEqual(tuple(static), expected)
def test_iterable(self):
static = Variable(**self.VALID_KWARGS)
id, storage, vartype = static
values = (id, storage, vartype)
for value, expected in zip(values, self.VALID_EXPECTED):
self.assertEqual(value, expected)
def test_fields(self):
static = Variable(('a', 'b', 'z'), 'x', 'y')
self.assertEqual(static.id, ('a', 'b', 'z'))
self.assertEqual(static.storage, 'x')
self.assertEqual(static.vartype, 'y')
def test___getattr__(self):
static = Variable(('a', 'b', 'z'), 'x', 'y')
self.assertEqual(static.filename, 'a')
self.assertEqual(static.funcname, 'b')
self.assertEqual(static.name, 'z')
def test_validate_typical(self):
validstorage = ('static', 'extern', 'implicit', 'local')
self.assertEqual(set(validstorage), set(Variable.STORAGE))
for storage in validstorage:
with self.subTest(storage):
static = Variable(
id=ID(
filename='x/y/z/spam.c',
funcname='func',
name='eggs',
),
storage=storage,
vartype='int',
)
static.validate() # This does not fail.
def test_validate_missing_field(self):
for field in Variable._fields:
with self.subTest(field):
static = Variable(**self.VALID_KWARGS)
static = static._replace(**{field: None})
with self.assertRaises(TypeError):
static.validate()
for field in ('storage', 'vartype'):
with self.subTest(field):
static = Variable(**self.VALID_KWARGS)
static = static._replace(**{field: UNKNOWN})
with self.assertRaises(TypeError):
static.validate()
def test_validate_bad_field(self):
badch = tuple(c for c in string.punctuation + string.digits)
notnames = (
'1a',
'a.b',
'a-b',
'&a',
'a++',
) + badch
tests = [
('id', ()), # Any non-empty str is okay.
('storage', ('external', 'global') + notnames),
('vartype', ()), # Any non-empty str is okay.
]
seen = set()
for field, invalid in tests:
for value in invalid:
seen.add(value)
with self.subTest(f'{field}={value!r}'):
static = Variable(**self.VALID_KWARGS)
static = static._replace(**{field: value})
with self.assertRaises(ValueError):
static.validate()
for field, invalid in tests:
if field == 'id':
continue
valid = seen - set(invalid)
for value in valid:
with self.subTest(f'{field}={value!r}'):
static = Variable(**self.VALID_KWARGS)
static = static._replace(**{field: value})
static.validate() # This does not fail.

View File

@ -1,139 +0,0 @@
import re
import textwrap
import unittest
from .. import tool_imports_for_tests
with tool_imports_for_tests():
from c_analyzer.common.info import ID
from c_analyzer.variables.info import Variable
from c_analyzer.variables.known import (
read_file,
from_file,
)
class _BaseTests(unittest.TestCase):
maxDiff = None
@property
def calls(self):
try:
return self._calls
except AttributeError:
self._calls = []
return self._calls
class ReadFileTests(_BaseTests):
_return_read_tsv = ()
def _read_tsv(self, *args):
self.calls.append(('_read_tsv', args))
return self._return_read_tsv
def test_typical(self):
lines = textwrap.dedent('''
filename funcname name kind declaration
file1.c - var1 variable static int
file1.c func1 local1 variable static int
file1.c - var2 variable int
file1.c func2 local2 variable char *
file2.c - var1 variable char *
''').strip().splitlines()
lines = [re.sub(r'\s+', '\t', line, 4) for line in lines]
self._return_read_tsv = [tuple(v.strip() for v in line.split('\t'))
for line in lines[1:]]
known = list(read_file('known.tsv', _read_tsv=self._read_tsv))
self.assertEqual(known, [
('variable', ID('file1.c', '', 'var1'), 'static int'),
('variable', ID('file1.c', 'func1', 'local1'), 'static int'),
('variable', ID('file1.c', '', 'var2'), 'int'),
('variable', ID('file1.c', 'func2', 'local2'), 'char *'),
('variable', ID('file2.c', '', 'var1'), 'char *'),
])
self.assertEqual(self.calls, [
('_read_tsv',
('known.tsv', 'filename\tfuncname\tname\tkind\tdeclaration')),
])
def test_empty(self):
self._return_read_tsv = []
known = list(read_file('known.tsv', _read_tsv=self._read_tsv))
self.assertEqual(known, [])
self.assertEqual(self.calls, [
('_read_tsv', ('known.tsv', 'filename\tfuncname\tname\tkind\tdeclaration')),
])
class FromFileTests(_BaseTests):
_return_read_file = ()
_return_handle_var = ()
def _read_file(self, infile):
self.calls.append(('_read_file', (infile,)))
return iter(self._return_read_file)
def _handle_var(self, varid, decl):
self.calls.append(('_handle_var', (varid, decl)))
var = self._return_handle_var.pop(0)
return var
def test_typical(self):
expected = [
Variable.from_parts('file1.c', '', 'var1', 'static int'),
Variable.from_parts('file1.c', 'func1', 'local1', 'static int'),
Variable.from_parts('file1.c', '', 'var2', 'int'),
Variable.from_parts('file1.c', 'func2', 'local2', 'char *'),
Variable.from_parts('file2.c', '', 'var1', 'char *'),
]
self._return_read_file = [('variable', v.id, v.vartype)
for v in expected]
# ('variable', ID('file1.c', '', 'var1'), 'static int'),
# ('variable', ID('file1.c', 'func1', 'local1'), 'static int'),
# ('variable', ID('file1.c', '', 'var2'), 'int'),
# ('variable', ID('file1.c', 'func2', 'local2'), 'char *'),
# ('variable', ID('file2.c', '', 'var1'), 'char *'),
# ]
self._return_handle_var = list(expected) # a copy
known = from_file('known.tsv',
handle_var=self._handle_var,
_read_file=self._read_file,
)
self.assertEqual(known, {
'variables': {v.id: v for v in expected},
})
# Variable.from_parts('file1.c', '', 'var1', 'static int'),
# Variable.from_parts('file1.c', 'func1', 'local1', 'static int'),
# Variable.from_parts('file1.c', '', 'var2', 'int'),
# Variable.from_parts('file1.c', 'func2', 'local2', 'char *'),
# Variable.from_parts('file2.c', '', 'var1', 'char *'),
# ]},
# })
self.assertEqual(self.calls, [
('_read_file', ('known.tsv',)),
*[('_handle_var', (v.id, v.vartype))
for v in expected],
])
def test_empty(self):
self._return_read_file = []
known = from_file('known.tsv',
handle_var=self._handle_var,
_read_file=self._read_file,
)
self.assertEqual(known, {
'variables': {},
})
self.assertEqual(self.calls, [
('_read_file', ('known.tsv',)),
])

View File

@ -1,60 +0,0 @@
import itertools
class PseudoStr(str):
pass
class StrProxy:
def __init__(self, value):
self.value = value
def __str__(self):
return self.value
def __bool__(self):
return bool(self.value)
class Object:
def __repr__(self):
return '<object>'
def wrapped_arg_combos(*args,
wrappers=(PseudoStr, StrProxy),
skip=(lambda w, i, v: not isinstance(v, str)),
):
"""Yield every possible combination of wrapped items for the given args.
Effectively, the wrappers are applied to the args according to the
powerset of the args indicies. So the result includes the args
completely unwrapped.
If "skip" is supplied (default is to skip all non-str values) and
it returns True for a given arg index/value then that arg will
remain unwrapped,
Only unique results are returned. If an arg was skipped for one
of the combinations then it could end up matching one of the other
combinations. In that case only one of them will be yielded.
"""
if not args:
return
indices = list(range(len(args)))
# The powerset (from recipe in the itertools docs).
combos = itertools.chain.from_iterable(itertools.combinations(indices, r)
for r in range(len(indices)+1))
seen = set()
for combo in combos:
for wrap in wrappers:
indexes = []
applied = list(args)
for i in combo:
arg = args[i]
if skip and skip(wrap, i, arg):
continue
indexes.append(i)
applied[i] = wrap(arg)
key = (wrap, tuple(indexes))
if key not in seen:
yield tuple(applied)
seen.add(key)

View File

@ -36,6 +36,10 @@ should be run to ensure that no new globals have been added:
python3 Tools/c-analyzer/check-c-globals.py
You can also use the more generic tool:
python3 Tools/c-analyzer/c-analyzer.py
If it reports any globals then they should be resolved. If the globals
are runtime state then they should be folded into _PyRuntimeState.
Otherwise they should be added to ignored-globals.txt.

View File

@ -0,0 +1,7 @@
from cpython.__main__ import parse_args, main, configure_logger
cmd, cmd_kwargs, verbosity, traceback_cm = parse_args()
configure_logger(verbosity)
with traceback_cm:
main(cmd, cmd_kwargs)

View File

@ -1,9 +0,0 @@
# This is a script equivalent of running "python -m test.test_c_globals.cg".
from cpython.__main__ import parse_args, main
# This is effectively copied from cg/__main__.py:
if __name__ == '__main__':
cmd, cmdkwargs = parse_args()
main(cmd, cmdkwargs)

View File

@ -0,0 +1,103 @@
from c_parser import (
parse_files as _parse_files,
)
from c_parser.info import (
KIND,
TypeDeclaration,
filter_by_kind,
collate_by_kind_group,
resolve_parsed,
)
from . import (
analyze as _analyze,
datafiles as _datafiles,
)
from .info import Analysis
def analyze(filenmes, **kwargs):
results = iter_analyis_results(filenames, **kwargs)
return Analysis.from_results(results)
def iter_analysis_results(filenmes, *,
known=None,
**kwargs
):
decls = iter_decls(filenames, **kwargs)
yield from analyze_decls(decls, known)
def iter_decls(filenames, *,
kinds=None,
parse_files=_parse_files,
**kwargs
):
kinds = KIND.DECLS if kinds is None else (KIND.DECLS & set(kinds))
parse_files = parse_files or _parse_files
parsed = parse_files(filenames, **kwargs)
parsed = filter_by_kind(parsed, kinds)
for item in parsed:
yield resolve_parsed(item)
def analyze_decls(decls, known, *,
analyze_resolved=None,
handle_unresolved=True,
relroot=None,
):
knowntypes, knowntypespecs = _datafiles.get_known(
known,
handle_unresolved=handle_unresolved,
analyze_resolved=analyze_resolved,
relroot=relroot,
)
decls = list(decls)
collated = collate_by_kind_group(decls)
types = {decl: None for decl in collated['type']}
typespecs = _analyze.get_typespecs(types)
def analyze_decl(decl):
return _analyze.analyze_decl(
decl,
typespecs,
knowntypespecs,
types,
knowntypes,
analyze_resolved=analyze_resolved,
)
_analyze.analyze_type_decls(types, analyze_decl, handle_unresolved)
for decl in decls:
if decl in types:
resolved = types[decl]
else:
resolved = analyze_decl(decl)
if resolved and handle_unresolved:
typedeps, _ = resolved
if not isinstance(typedeps, TypeDeclaration):
if not typedeps or None in typedeps:
raise NotImplementedError((decl, resolved))
yield decl, resolved
#######################################
# checks
def check_all(analysis, checks, *, failfast=False):
for check in checks or ():
for data, failure in check(analysis):
if failure is None:
continue
yield data, failure
if failfast:
yield None, None
break
else:
continue
# We failed fast.
break

View File

@ -0,0 +1,501 @@
import io
import logging
import os.path
import re
import sys
from c_common.logging import VERBOSITY, Printer
from c_common.scriptutil import (
add_verbosity_cli,
add_traceback_cli,
add_sepval_cli,
add_files_cli,
add_commands_cli,
process_args_by_key,
configure_logger,
get_prog,
filter_filenames,
iter_marks,
)
from c_parser.info import KIND, is_type_decl
from . import (
analyze as _analyze,
check_all as _check_all,
datafiles as _datafiles,
)
KINDS = [
KIND.TYPEDEF,
KIND.STRUCT,
KIND.UNION,
KIND.ENUM,
KIND.FUNCTION,
KIND.VARIABLE,
KIND.STATEMENT,
]
logger = logging.getLogger(__name__)
#######################################
# table helpers
TABLE_SECTIONS = {
'types': (
['kind', 'name', 'data', 'file'],
is_type_decl,
(lambda v: (v.kind.value, v.filename or '', v.name)),
),
'typedefs': 'types',
'structs': 'types',
'unions': 'types',
'enums': 'types',
'functions': (
['name', 'data', 'file'],
(lambda kind: kind is KIND.FUNCTION),
(lambda v: (v.filename or '', v.name)),
),
'variables': (
['name', 'parent', 'data', 'file'],
(lambda kind: kind is KIND.VARIABLE),
(lambda v: (v.filename or '', str(v.parent) if v.parent else '', v.name)),
),
'statements': (
['file', 'parent', 'data'],
(lambda kind: kind is KIND.STATEMENT),
(lambda v: (v.filename or '', str(v.parent) if v.parent else '', v.name)),
),
KIND.TYPEDEF: 'typedefs',
KIND.STRUCT: 'structs',
KIND.UNION: 'unions',
KIND.ENUM: 'enums',
KIND.FUNCTION: 'functions',
KIND.VARIABLE: 'variables',
KIND.STATEMENT: 'statements',
}
def _render_table(items, columns, relroot=None):
# XXX improve this
header = '\t'.join(columns)
div = '--------------------'
yield header
yield div
total = 0
for item in items:
rowdata = item.render_rowdata(columns)
row = [rowdata[c] for c in columns]
if relroot and 'file' in columns:
index = columns.index('file')
row[index] = os.path.relpath(row[index], relroot)
yield '\t'.join(row)
total += 1
yield div
yield f'total: {total}'
def build_section(name, groupitems, *, relroot=None):
info = TABLE_SECTIONS[name]
while type(info) is not tuple:
if name in KINDS:
name = info
info = TABLE_SECTIONS[info]
columns, match_kind, sortkey = info
items = (v for v in groupitems if match_kind(v.kind))
items = sorted(items, key=sortkey)
def render():
yield ''
yield f'{name}:'
yield ''
for line in _render_table(items, columns, relroot):
yield line
return items, render
#######################################
# the checks
CHECKS = {
#'globals': _check_globals,
}
def add_checks_cli(parser, checks=None, *, add_flags=None):
default = False
if not checks:
checks = list(CHECKS)
default = True
elif isinstance(checks, str):
checks = [checks]
if (add_flags is None and len(checks) > 1) or default:
add_flags = True
process_checks = add_sepval_cli(parser, '--check', 'checks', checks)
if add_flags:
for check in checks:
parser.add_argument(f'--{check}', dest='checks',
action='append_const', const=check)
return [
process_checks,
]
def _get_check_handlers(fmt, printer, verbosity=VERBOSITY):
div = None
def handle_after():
pass
if not fmt:
div = ''
def handle_failure(failure, data):
data = repr(data)
if verbosity >= 3:
logger.info(f'failure: {failure}')
logger.info(f'data: {data}')
else:
logger.warn(f'failure: {failure} (data: {data})')
elif fmt == 'raw':
def handle_failure(failure, data):
print(f'{failure!r} {data!r}')
elif fmt == 'brief':
def handle_failure(failure, data):
parent = data.parent or ''
funcname = parent if isinstance(parent, str) else parent.name
name = f'({funcname}).{data.name}' if funcname else data.name
failure = failure.split('\t')[0]
print(f'{data.filename}:{name} - {failure}')
elif fmt == 'summary':
def handle_failure(failure, data):
parent = data.parent or ''
funcname = parent if isinstance(parent, str) else parent.name
print(f'{data.filename:35}\t{funcname or "-":35}\t{data.name:40}\t{failure}')
elif fmt == 'full':
div = ''
def handle_failure(failure, data):
name = data.shortkey if data.kind is KIND.VARIABLE else data.name
parent = data.parent or ''
funcname = parent if isinstance(parent, str) else parent.name
known = 'yes' if data.is_known else '*** NO ***'
print(f'{data.kind.value} {name!r} failed ({failure})')
print(f' file: {data.filename}')
print(f' func: {funcname or "-"}')
print(f' name: {data.name}')
print(f' data: ...')
print(f' type unknown: {known}')
else:
if fmt in FORMATS:
raise NotImplementedError(fmt)
raise ValueError(f'unsupported fmt {fmt!r}')
return handle_failure, handle_after, div
#######################################
# the formats
def fmt_raw(analysis):
for item in analysis:
yield from item.render('raw')
def fmt_brief(analysis):
# XXX Support sorting.
items = sorted(analysis)
for kind in KINDS:
if kind is KIND.STATEMENT:
continue
for item in items:
if item.kind is not kind:
continue
yield from item.render('brief')
yield f' total: {len(items)}'
def fmt_summary(analysis):
# XXX Support sorting and grouping.
items = list(analysis)
total = len(items)
def section(name):
_, render = build_section(name, items)
yield from render()
yield from section('types')
yield from section('functions')
yield from section('variables')
yield from section('statements')
yield ''
# yield f'grand total: {len(supported) + len(unsupported)}'
yield f'grand total: {total}'
def fmt_full(analysis):
# XXX Support sorting.
items = sorted(analysis, key=lambda v: v.key)
yield ''
for item in items:
yield from item.render('full')
yield ''
yield f'total: {len(items)}'
FORMATS = {
'raw': fmt_raw,
'brief': fmt_brief,
'summary': fmt_summary,
'full': fmt_full,
}
def add_output_cli(parser, *, default='summary'):
parser.add_argument('--format', dest='fmt', default=default, choices=tuple(FORMATS))
def process_args(args):
pass
return process_args
#######################################
# the commands
def _cli_check(parser, checks=None, **kwargs):
if isinstance(checks, str):
checks = [checks]
if checks is False:
process_checks = None
elif checks is None:
process_checks = add_checks_cli(parser)
elif len(checks) == 1 and type(checks) is not dict and re.match(r'^<.*>$', checks[0]):
check = checks[0][1:-1]
def process_checks(args):
args.checks = [check]
else:
process_checks = add_checks_cli(parser, checks=checks)
process_output = add_output_cli(parser, default=None)
process_files = add_files_cli(parser, **kwargs)
return [
process_checks,
process_output,
process_files,
]
def cmd_check(filenames, *,
checks=None,
ignored=None,
fmt=None,
relroot=None,
failfast=False,
iter_filenames=None,
verbosity=VERBOSITY,
_analyze=_analyze,
_CHECKS=CHECKS,
**kwargs
):
if not checks:
checks = _CHECKS
elif isinstance(checks, str):
checks = [checks]
checks = [_CHECKS[c] if isinstance(c, str) else c
for c in checks]
printer = Printer(verbosity)
(handle_failure, handle_after, div
) = _get_check_handlers(fmt, printer, verbosity)
filenames = filter_filenames(filenames, iter_filenames)
logger.info('analyzing...')
analyzed = _analyze(filenames, **kwargs)
if relroot:
analyzed.fix_filenames(relroot)
logger.info('checking...')
numfailed = 0
for data, failure in _check_all(analyzed, checks, failfast=failfast):
if data is None:
printer.info('stopping after one failure')
break
if div is not None and numfailed > 0:
printer.info(div)
numfailed += 1
handle_failure(failure, data)
handle_after()
printer.info('-------------------------')
logger.info(f'total failures: {numfailed}')
logger.info('done checking')
if numfailed > 0:
sys.exit(numfailed)
def _cli_analyze(parser, **kwargs):
process_output = add_output_cli(parser)
process_files = add_files_cli(parser, **kwargs)
return [
process_output,
process_files,
]
# XXX Support filtering by kind.
def cmd_analyze(filenames, *,
fmt=None,
iter_filenames=None,
verbosity=None,
_analyze=_analyze,
formats=FORMATS,
**kwargs
):
verbosity = verbosity if verbosity is not None else 3
try:
do_fmt = formats[fmt]
except KeyError:
raise ValueError(f'unsupported fmt {fmt!r}')
filenames = filter_filenames(filenames, iter_filenames)
if verbosity == 2:
def iter_filenames(filenames=filenames):
marks = iter_marks()
for filename in filenames:
print(next(marks), end='')
yield filename
filenames = iter_filenames()
elif verbosity > 2:
def iter_filenames(filenames=filenames):
for filename in filenames:
print(f'<{filename}>')
yield filename
filenames = iter_filenames()
logger.info('analyzing...')
analyzed = _analyze(filenames, **kwargs)
for line in do_fmt(analyzed):
print(line)
def _cli_data(parser, filenames=None, known=None):
ArgumentParser = type(parser)
common = ArgumentParser(add_help=False)
if filenames is None:
common.add_argument('filenames', metavar='FILE', nargs='+')
subs = parser.add_subparsers(dest='datacmd')
sub = subs.add_parser('show', parents=[common])
if known is None:
sub.add_argument('--known', required=True)
sub = subs.add_parser('dump')
if known is None:
sub.add_argument('--known')
sub.add_argument('--show', action='store_true')
sub = subs.add_parser('check')
if known is None:
sub.add_argument('--known', required=True)
return None
def cmd_data(datacmd, filenames, known=None, *,
_analyze=_analyze,
formats=FORMATS,
extracolumns=None,
relroot=None,
**kwargs
):
kwargs.pop('verbosity', None)
usestdout = kwargs.pop('show', None)
if datacmd == 'show':
do_fmt = formats['summary']
if isinstance(known, str):
known, _ = _datafiles.get_known(known, extracolumns, relroot)
for line in do_fmt(known):
print(line)
elif datacmd == 'dump':
analyzed = _analyze(filenames, **kwargs)
if known is None or usestdout:
outfile = io.StringIO()
_datafiles.write_known(analyzed, outfile, extracolumns,
relroot=relroot)
print(outfile.getvalue())
else:
_datafiles.write_known(analyzed, known, extracolumns,
relroot=relroot)
elif datacmd == 'check':
raise NotImplementedError(datacmd)
else:
raise ValueError(f'unsupported data command {datacmd!r}')
COMMANDS = {
'check': (
'analyze and fail if the given C source/header files have any problems',
[_cli_check],
cmd_check,
),
'analyze': (
'report on the state of the given C source/header files',
[_cli_analyze],
cmd_analyze,
),
'data': (
'check/manage local data (e.g. knwon types, ignored vars, caches)',
[_cli_data],
cmd_data,
),
}
#######################################
# the script
def parse_args(argv=sys.argv[1:], prog=sys.argv[0], *, subset=None):
import argparse
parser = argparse.ArgumentParser(
prog=prog or get_prog(),
)
processors = add_commands_cli(
parser,
commands={k: v[1] for k, v in COMMANDS.items()},
commonspecs=[
add_verbosity_cli,
add_traceback_cli,
],
subset=subset,
)
args = parser.parse_args(argv)
ns = vars(args)
cmd = ns.pop('cmd')
verbosity, traceback_cm = process_args_by_key(
args,
processors[cmd],
['verbosity', 'traceback_cm'],
)
# "verbosity" is sent to the commands, so we put it back.
args.verbosity = verbosity
return cmd, ns, verbosity, traceback_cm
def main(cmd, cmd_kwargs):
try:
run_cmd = COMMANDS[cmd][0]
except KeyError:
raise ValueError(f'unsupported cmd {cmd!r}')
run_cmd(**cmd_kwargs)
if __name__ == '__main__':
cmd, cmd_kwargs, verbosity, traceback_cm = parse_args()
configure_logger(verbosity)
with traceback_cm:
main(cmd, cmd_kwargs)

View File

@ -0,0 +1,307 @@
from c_parser.info import (
KIND,
TypeDeclaration,
POTSType,
FuncPtr,
is_pots,
is_funcptr,
)
from .info import (
IGNORED,
UNKNOWN,
is_system_type,
SystemType,
)
def get_typespecs(typedecls):
typespecs = {}
for decl in typedecls:
if decl.shortkey not in typespecs:
typespecs[decl.shortkey] = [decl]
else:
typespecs[decl.shortkey].append(decl)
return typespecs
def analyze_decl(decl, typespecs, knowntypespecs, types, knowntypes, *,
analyze_resolved=None):
resolved = resolve_decl(decl, typespecs, knowntypespecs, types)
if resolved is None:
# The decl is supposed to be skipped or ignored.
return None
if analyze_resolved is None:
return resolved, None
return analyze_resolved(resolved, decl, types, knowntypes)
# This alias helps us avoid name collisions.
_analyze_decl = analyze_decl
def analyze_type_decls(types, analyze_decl, handle_unresolved=True):
unresolved = set(types)
while unresolved:
updated = []
for decl in unresolved:
resolved = analyze_decl(decl)
if resolved is None:
# The decl should be skipped or ignored.
types[decl] = IGNORED
updated.append(decl)
continue
typedeps, _ = resolved
if typedeps is None:
raise NotImplementedError(decl)
if UNKNOWN in typedeps:
# At least one dependency is unknown, so this decl
# is not resolvable.
types[decl] = UNKNOWN
updated.append(decl)
continue
if None in typedeps:
# XXX
# Handle direct recursive types first.
nonrecursive = 1
if decl.kind is KIND.STRUCT or decl.kind is KIND.UNION:
nonrecursive = 0
i = 0
for member, dep in zip(decl.members, typedeps):
if dep is None:
if member.vartype.typespec != decl.shortkey:
nonrecursive += 1
else:
typedeps[i] = decl
i += 1
if nonrecursive:
# We don't have all dependencies resolved yet.
continue
types[decl] = resolved
updated.append(decl)
if updated:
for decl in updated:
unresolved.remove(decl)
else:
# XXX
# Handle indirect recursive types.
...
# We couldn't resolve the rest.
# Let the caller deal with it!
break
if unresolved and handle_unresolved:
if handle_unresolved is True:
handle_unresolved = _handle_unresolved
handle_unresolved(unresolved, types, analyze_decl)
def resolve_decl(decl, typespecs, knowntypespecs, types):
if decl.kind is KIND.ENUM:
typedeps = []
else:
if decl.kind is KIND.VARIABLE:
vartypes = [decl.vartype]
elif decl.kind is KIND.FUNCTION:
vartypes = [decl.signature.returntype]
elif decl.kind is KIND.TYPEDEF:
vartypes = [decl.vartype]
elif decl.kind is KIND.STRUCT or decl.kind is KIND.UNION:
vartypes = [m.vartype for m in decl.members]
else:
# Skip this one!
return None
typedeps = []
for vartype in vartypes:
typespec = vartype.typespec
if is_pots(typespec):
typedecl = POTSType(typespec)
elif is_system_type(typespec):
typedecl = SystemType(typespec)
elif is_funcptr(vartype):
typedecl = FuncPtr(vartype)
else:
typedecl = find_typedecl(decl, typespec, typespecs)
if typedecl is None:
typedecl = find_typedecl(decl, typespec, knowntypespecs)
elif not isinstance(typedecl, TypeDeclaration):
raise NotImplementedError(repr(typedecl))
if typedecl is None:
# We couldn't find it!
typedecl = UNKNOWN
elif typedecl not in types:
# XXX How can this happen?
typedecl = UNKNOWN
elif types[typedecl] is UNKNOWN:
typedecl = UNKNOWN
elif types[typedecl] is IGNORED:
# We don't care if it didn't resolve.
pass
elif types[typedecl] is None:
# The typedecl for the typespec hasn't been resolved yet.
typedecl = None
typedeps.append(typedecl)
return typedeps
def find_typedecl(decl, typespec, typespecs):
specdecls = typespecs.get(typespec)
if not specdecls:
return None
filename = decl.filename
if len(specdecls) == 1:
typedecl, = specdecls
if '-' in typespec and typedecl.filename != filename:
# Inlined types are always in the same file.
return None
return typedecl
# Decide which one to return.
candidates = []
samefile = None
for typedecl in specdecls:
type_filename = typedecl.filename
if type_filename == filename:
if samefile is not None:
# We expect type names to be unique in a file.
raise NotImplementedError((decl, samefile, typedecl))
samefile = typedecl
elif filename.endswith('.c') and not type_filename.endswith('.h'):
# If the decl is in a source file then we expect the
# type to be in the same file or in a header file.
continue
candidates.append(typedecl)
if not candidates:
return None
elif len(candidates) == 1:
winner, = candidates
# XXX Check for inline?
elif '-' in typespec:
# Inlined types are always in the same file.
winner = samefile
elif samefile is not None:
# Favor types in the same file.
winner = samefile
else:
# We don't know which to return.
raise NotImplementedError((decl, candidates))
return winner
#############################
# handling unresolved decls
class Skipped(TypeDeclaration):
def __init__(self):
_file = _name = _data = _parent = None
super().__init__(_file, _name, _data, _parent, _shortkey='<skipped>')
_SKIPPED = Skipped()
del Skipped
def _handle_unresolved(unresolved, types, analyze_decl):
#raise NotImplementedError(unresolved)
dump = True
dump = False
if dump:
print()
for decl in types: # Preserve the original order.
if decl not in unresolved:
assert types[decl] is not None, decl
if types[decl] in (UNKNOWN, IGNORED):
unresolved.add(decl)
if dump:
_dump_unresolved(decl, types, analyze_decl)
print()
else:
assert types[decl][0] is not None, (decl, types[decl])
assert None not in types[decl][0], (decl, types[decl])
else:
assert types[decl] is None
if dump:
_dump_unresolved(decl, types, analyze_decl)
print()
#raise NotImplementedError
for decl in unresolved:
types[decl] = ([_SKIPPED], None)
for decl in types:
assert types[decl]
def _dump_unresolved(decl, types, analyze_decl):
if isinstance(decl, str):
typespec = decl
decl, = (d for d in types if d.shortkey == typespec)
elif type(decl) is tuple:
filename, typespec = decl
if '-' in typespec:
found = [d for d in types
if d.shortkey == typespec and d.filename == filename]
#if not found:
# raise NotImplementedError(decl)
decl, = found
else:
found = [d for d in types if d.shortkey == typespec]
if not found:
print(f'*** {typespec} ???')
return
#raise NotImplementedError(decl)
else:
decl, = found
resolved = analyze_decl(decl)
if resolved:
typedeps, _ = resolved or (None, None)
if decl.kind is KIND.STRUCT or decl.kind is KIND.UNION:
print(f'*** {decl.shortkey} {decl.filename}')
for member, mtype in zip(decl.members, typedeps):
typespec = member.vartype.typespec
if typespec == decl.shortkey:
print(f' ~~~~: {typespec:20} - {member!r}')
continue
status = None
if is_pots(typespec):
mtype = typespec
status = 'okay'
elif is_system_type(typespec):
mtype = typespec
status = 'okay'
elif mtype is None:
if '-' in member.vartype.typespec:
mtype, = [d for d in types
if d.shortkey == member.vartype.typespec
and d.filename == decl.filename]
else:
found = [d for d in types
if d.shortkey == typespec]
if not found:
print(f' ???: {typespec:20}')
continue
mtype, = found
if status is None:
status = 'okay' if types.get(mtype) else 'oops'
if mtype is _SKIPPED:
status = 'okay'
mtype = '<skipped>'
elif isinstance(mtype, FuncPtr):
status = 'okay'
mtype = str(mtype.vartype)
elif not isinstance(mtype, str):
if hasattr(mtype, 'vartype'):
if is_funcptr(mtype.vartype):
status = 'okay'
mtype = str(mtype).rpartition('(')[0].rstrip()
status = ' okay' if status == 'okay' else f'--> {status}'
print(f' {status}: {typespec:20} - {member!r} ({mtype})')
else:
print(f'*** {decl} ({decl.vartype!r})')
if decl.vartype.typespec.startswith('struct ') or is_funcptr(decl):
_dump_unresolved(
(decl.filename, decl.vartype.typespec),
types,
analyze_decl,
)

View File

@ -1,124 +0,0 @@
import glob
import os
import os.path
# XXX need tests:
# * walk_tree()
# * glob_tree()
# * iter_files_by_suffix()
C_SOURCE_SUFFIXES = ('.c', '.h')
def _walk_tree(root, *,
_walk=os.walk,
):
# A wrapper around os.walk that resolves the filenames.
for parent, _, names in _walk(root):
for name in names:
yield os.path.join(parent, name)
def walk_tree(root, *,
suffix=None,
walk=_walk_tree,
):
"""Yield each file in the tree under the given directory name.
If "suffix" is provided then only files with that suffix will
be included.
"""
if suffix and not isinstance(suffix, str):
raise ValueError('suffix must be a string')
for filename in walk(root):
if suffix and not filename.endswith(suffix):
continue
yield filename
def glob_tree(root, *,
suffix=None,
_glob=glob.iglob,
_escape=glob.escape,
_join=os.path.join,
):
"""Yield each file in the tree under the given directory name.
If "suffix" is provided then only files with that suffix will
be included.
"""
suffix = suffix or ''
if not isinstance(suffix, str):
raise ValueError('suffix must be a string')
for filename in _glob(_join(_escape(root), f'*{suffix}')):
yield filename
for filename in _glob(_join(_escape(root), f'**/*{suffix}')):
yield filename
def iter_files(root, suffix=None, relparent=None, *,
get_files=None,
_glob=glob_tree,
_walk=walk_tree,
):
"""Yield each file in the tree under the given directory name.
If "root" is a non-string iterable then do the same for each of
those trees.
If "suffix" is provided then only files with that suffix will
be included.
if "relparent" is provided then it is used to resolve each
filename as a relative path.
"""
if get_files is None:
get_files = os.walk
if not isinstance(root, str):
roots = root
for root in roots:
yield from iter_files(root, suffix, relparent,
get_files=get_files,
_glob=_glob, _walk=_walk)
return
# Use the right "walk" function.
if get_files in (glob.glob, glob.iglob, glob_tree):
get_files = _glob
else:
_files = _walk_tree if get_files in (os.walk, walk_tree) else get_files
get_files = (lambda *a, **k: _walk(*a, walk=_files, **k))
# Handle a single suffix.
if suffix and not isinstance(suffix, str):
filenames = get_files(root)
suffix = tuple(suffix)
else:
filenames = get_files(root, suffix=suffix)
suffix = None
for filename in filenames:
if suffix and not isinstance(suffix, str): # multiple suffixes
if not filename.endswith(suffix):
continue
if relparent:
filename = os.path.relpath(filename, relparent)
yield filename
def iter_files_by_suffix(root, suffixes, relparent=None, *,
walk=walk_tree,
_iter_files=iter_files,
):
"""Yield each file in the tree that has the given suffixes.
Unlike iter_files(), the results are in the original suffix order.
"""
if isinstance(suffixes, str):
suffixes = [suffixes]
# XXX Ignore repeated suffixes?
for suffix in suffixes:
yield from _iter_files(root, suffix, relparent)

View File

@ -1,138 +0,0 @@
from collections import namedtuple
import re
from .util import classonly, _NTBase
# XXX need tests:
# * ID.match()
UNKNOWN = '???'
# Does not start with digit and contains at least one letter.
NAME_RE = re.compile(r'(?!\d)(?=.*?[A-Za-z])\w+', re.ASCII)
class ID(_NTBase, namedtuple('ID', 'filename funcname name')):
"""A unique ID for a single symbol or declaration."""
__slots__ = ()
# XXX Add optional conditions (tuple of strings) field.
#conditions = Slot()
@classonly
def from_raw(cls, raw):
if not raw:
return None
if isinstance(raw, str):
return cls(None, None, raw)
try:
name, = raw
filename = None
except ValueError:
try:
filename, name = raw
except ValueError:
return super().from_raw(raw)
return cls(filename, None, name)
def __new__(cls, filename, funcname, name):
self = super().__new__(
cls,
filename=str(filename) if filename else None,
funcname=str(funcname) if funcname else None,
name=str(name) if name else None,
)
#cls.conditions.set(self, tuple(str(s) if s else None
# for s in conditions or ()))
return self
def validate(self):
"""Fail if the object is invalid (i.e. init with bad data)."""
if not self.name:
raise TypeError('missing name')
if not NAME_RE.fullmatch(self.name):
raise ValueError(
f'name must be an identifier, got {self.name!r}')
# Symbols from a binary might not have filename/funcname info.
if self.funcname:
if not self.filename:
raise TypeError('missing filename')
if not NAME_RE.fullmatch(self.funcname) and self.funcname != UNKNOWN:
raise ValueError(
f'name must be an identifier, got {self.funcname!r}')
# XXX Require the filename (at least UNKONWN)?
# XXX Check the filename?
@property
def islocal(self):
return self.funcname is not None
def match(self, other, *,
match_files=(lambda f1, f2: f1 == f2),
):
"""Return True if the two match.
At least one of the two must be completely valid (no UNKNOWN
anywhere). Otherwise False is returned. The remaining one
*may* have UNKNOWN for both funcname and filename. It must
have a valid name though.
The caller is responsible for knowing which of the two is valid
(and which to use if both are valid).
"""
# First check the name.
if self.name is None:
return False
if other.name != self.name:
return False
# Then check the filename.
if self.filename is None:
return False
if other.filename is None:
return False
if self.filename == UNKNOWN:
# "other" must be the valid one.
if other.funcname == UNKNOWN:
return False
elif self.funcname != UNKNOWN:
# XXX Try matching funcname even though we don't
# know the filename?
raise NotImplementedError
else:
return True
elif other.filename == UNKNOWN:
# "self" must be the valid one.
if self.funcname == UNKNOWN:
return False
elif other.funcname != UNKNOWN:
# XXX Try matching funcname even though we don't
# know the filename?
raise NotImplementedError
else:
return True
elif not match_files(self.filename, other.filename):
return False
# Finally, check the funcname.
if self.funcname == UNKNOWN:
# "other" must be the valid one.
if other.funcname == UNKNOWN:
return False
else:
return other.funcname is not None
elif other.funcname == UNKNOWN:
# "self" must be the valid one.
if self.funcname == UNKNOWN:
return False
else:
return self.funcname is not None
elif self.funcname == other.funcname:
# Both are valid.
return True
return False

View File

@ -1,11 +0,0 @@
def basic(variables, *,
_print=print):
"""Print each row simply."""
for var in variables:
if var.funcname:
line = f'{var.filename}:{var.funcname}():{var.name}'
else:
line = f'{var.filename}:{var.name}'
line = f'{line:<64} {var.vartype}'
_print(line)

View File

@ -0,0 +1,109 @@
import c_common.tables as _tables
import c_parser.info as _info
import c_parser.datafiles as _parser
from . import analyze as _analyze
#############################
# "known" decls
EXTRA_COLUMNS = [
#'typedecl',
]
def analyze_known(known, *,
analyze_resolved=None,
handle_unresolved=True,
):
knowntypes = knowntypespecs = {}
collated = _info.collate_by_kind_group(known)
types = {decl: None for decl in collated['type']}
typespecs = _analyze.get_typespecs(types)
def analyze_decl(decl):
return _analyze.analyze_decl(
decl,
typespecs,
knowntypespecs,
types,
knowntypes,
analyze_resolved=analyze_resolved,
)
_analyze.analyze_type_decls(types, analyze_decl, handle_unresolved)
return types, typespecs
def get_known(known, extracolumns=None, *,
analyze_resolved=None,
handle_unresolved=True,
relroot=None,
):
if isinstance(known, str):
known = read_known(known, extracolumns, relroot)
return analyze_known(
known,
handle_unresolved=handle_unresolved,
analyze_resolved=analyze_resolved,
)
def read_known(infile, extracolumns=None, relroot=None):
extracolumns = EXTRA_COLUMNS + (
list(extracolumns) if extracolumns else []
)
known = {}
for decl, extra in _parser.iter_decls_tsv(infile, extracolumns, relroot):
known[decl] = extra
return known
def write_known(rows, outfile, extracolumns=None, *,
relroot=None,
backup=True,
):
extracolumns = EXTRA_COLUMNS + (
list(extracolumns) if extracolumns else []
)
_parser.write_decls_tsv(
rows,
outfile,
extracolumns,
relroot=relroot,
backup=backup,
)
#############################
# ignored vars
IGNORED_COLUMNS = [
'filename',
'funcname',
'name',
'reason',
]
IGNORED_HEADER = '\t'.join(IGNORED_COLUMNS)
def read_ignored(infile):
return dict(_iter_ignored(infile))
def _iter_ignored(infile):
for row in _tables.read_table(infile, IGNORED_HEADER, sep='\t'):
*varidinfo, reason = row
varid = _info.DeclID.from_row(varidinfo)
yield varid, reason
def write_ignored(variables, outfile):
raise NotImplementedError
reason = '???'
#if not isinstance(varid, DeclID):
# varid = getattr(varid, 'parsed', varid).id
_tables.write_table(
outfile,
IGNORED_HEADER,
sep='\t',
rows=(r.render_rowdata() + (reason,) for r in decls),
)

View File

@ -0,0 +1,353 @@
from collections import namedtuple
from c_common.clsutil import classonly
import c_common.misc as _misc
from c_parser.info import (
KIND,
HighlevelParsedItem,
Declaration,
TypeDeclaration,
is_type_decl,
is_process_global,
)
IGNORED = _misc.Labeled('IGNORED')
UNKNOWN = _misc.Labeled('UNKNOWN')
# XXX Use known.tsv for these?
SYSTEM_TYPES = {
'int8_t',
'uint8_t',
'int16_t',
'uint16_t',
'int32_t',
'uint32_t',
'int64_t',
'uint64_t',
'size_t',
'ssize_t',
'intptr_t',
'uintptr_t',
'wchar_t',
'',
# OS-specific
'pthread_cond_t',
'pthread_mutex_t',
'pthread_key_t',
'atomic_int',
'atomic_uintptr_t',
'',
# lib-specific
'WINDOW', # curses
'XML_LChar',
'XML_Size',
'XML_Parser',
'enum XML_Error',
'enum XML_Status',
'',
}
def is_system_type(typespec):
return typespec in SYSTEM_TYPES
class SystemType(TypeDeclaration):
def __init__(self, name):
super().__init__(None, name, None, None, _shortkey=name)
class Analyzed:
_locked = False
@classonly
def is_target(cls, raw):
if isinstance(raw, HighlevelParsedItem):
return True
else:
return False
@classonly
def from_raw(cls, raw, **extra):
if isinstance(raw, cls):
if extra:
# XXX ?
raise NotImplementedError((raw, extra))
#return cls(raw.item, raw.typedecl, **raw._extra, **extra)
else:
return info
elif cls.is_target(raw):
return cls(raw, **extra)
else:
raise NotImplementedError((raw, extra))
@classonly
def from_resolved(cls, item, resolved, **extra):
if isinstance(resolved, TypeDeclaration):
return cls(item, typedecl=resolved, **extra)
else:
typedeps, extra = cls._parse_raw_resolved(item, resolved, extra)
if item.kind is KIND.ENUM:
if typedeps:
raise NotImplementedError((item, resolved, extra))
elif not typedeps:
raise NotImplementedError((item, resolved, extra))
return cls(item, typedeps, **extra or {})
@classonly
def _parse_raw_resolved(cls, item, resolved, extra_extra):
if resolved in (UNKNOWN, IGNORED):
return resolved, None
try:
typedeps, extra = resolved
except (TypeError, ValueError):
typedeps = extra = None
if extra:
# The resolved data takes precedence.
extra = dict(extra_extra, **extra)
if isinstance(typedeps, TypeDeclaration):
return typedeps, extra
elif typedeps in (None, UNKNOWN):
# It is still effectively unresolved.
return UNKNOWN, extra
elif None in typedeps or UNKNOWN in typedeps:
# It is still effectively unresolved.
return typedeps, extra
elif any(not isinstance(td, TypeDeclaration) for td in typedeps):
raise NotImplementedError((item, typedeps, extra))
return typedeps, extra
def __init__(self, item, typedecl=None, **extra):
assert item is not None
self.item = item
if typedecl in (UNKNOWN, IGNORED):
pass
elif item.kind is KIND.STRUCT or item.kind is KIND.UNION:
if isinstance(typedecl, TypeDeclaration):
raise NotImplementedError(item, typedecl)
elif typedecl is None:
typedecl = UNKNOWN
else:
typedecl = [UNKNOWN if d is None else d for d in typedecl]
elif typedecl is None:
typedecl = UNKNOWN
elif typedecl and not isinstance(typedecl, TypeDeclaration):
# All the other decls have a single type decl.
typedecl, = typedecl
if typedecl is None:
typedecl = UNKNOWN
self.typedecl = typedecl
self._extra = extra
self._locked = True
self._validate()
def _validate(self):
item = self.item
extra = self._extra
# Check item.
if not isinstance(item, HighlevelParsedItem):
raise ValueError(f'"item" must be a high-level parsed item, got {item!r}')
# Check extra.
for key, value in extra.items():
if key.startswith('_'):
raise ValueError(f'extra items starting with {"_"!r} not allowed, got {extra!r}')
if hasattr(item, key) and not callable(getattr(item, key)):
raise ValueError(f'extra cannot override item, got {value!r} for key {key!r}')
def __repr__(self):
kwargs = [
f'item={self.item!r}',
f'typedecl={self.typedecl!r}',
*(f'{k}={v!r}' for k, v in self._extra.items())
]
return f'{type(self).__name__}({", ".join(kwargs)})'
def __str__(self):
try:
return self._str
except AttributeError:
self._str, = self.render('line')
return self._str
def __hash__(self):
return hash(self.item)
def __eq__(self, other):
if isinstance(other, Analyzed):
return self.item == other.item
elif isinstance(other, HighlevelParsedItem):
return self.item == other
elif type(other) is tuple:
return self.item == other
else:
return NotImplemented
def __gt__(self, other):
if isinstance(other, Analyzed):
return self.item > other.item
elif isinstance(other, HighlevelParsedItem):
return self.item > other
elif type(other) is tuple:
return self.item > other
else:
return NotImplemented
def __dir__(self):
names = set(super().__dir__())
names.update(self._extra)
names.remove('_locked')
return sorted(names)
def __getattr__(self, name):
if name.startswith('_'):
raise AttributeError(name)
# The item takes precedence over the extra data (except if callable).
try:
value = getattr(self.item, name)
if callable(value):
raise AttributeError(name)
except AttributeError:
try:
value = self._extra[name]
except KeyError:
pass
else:
# Speed things up the next time.
self.__dict__[name] = value
return value
raise # re-raise
else:
return value
def __setattr__(self, name, value):
if self._locked and name != '_str':
raise AttributeError(f'readonly ({name})')
super().__setattr__(name, value)
def __delattr__(self, name):
if self._locked:
raise AttributeError(f'readonly ({name})')
super().__delattr__(name)
@property
def decl(self):
if not isinstance(self.item, Declaration):
raise AttributeError('decl')
return self.item
@property
def signature(self):
# XXX vartype...
...
@property
def istype(self):
return is_type_decl(self.item.kind)
@property
def is_known(self):
if self.typedecl in (UNKNOWN, IGNORED):
return False
elif isinstance(self.typedecl, TypeDeclaration):
return True
else:
return UNKNOWN not in self.typedecl
def fix_filename(self, relroot):
self.item.fix_filename(relroot)
def as_rowdata(self, columns=None):
# XXX finsih!
return self.item.as_rowdata(columns)
def render_rowdata(self, columns=None):
# XXX finsih!
return self.item.render_rowdata(columns)
def render(self, fmt='line', *, itemonly=False):
if fmt == 'raw':
yield repr(self)
return
rendered = self.item.render(fmt)
if itemonly or not self._extra:
yield from rendered
return
extra = self._render_extra(fmt)
if not extra:
yield from rendered
elif fmt in ('brief', 'line'):
rendered, = rendered
extra, = extra
yield f'{rendered}\t{extra}'
elif fmt == 'summary':
raise NotImplementedError(fmt)
elif fmt == 'full':
yield from rendered
for line in extra:
yield f'\t{line}'
else:
raise NotImplementedError(fmt)
def _render_extra(self, fmt):
if fmt in ('brief', 'line'):
yield str(self._extra)
else:
raise NotImplementedError(fmt)
class Analysis:
_item_class = Analyzed
@classonly
def build_item(cls, info, resolved=None, **extra):
if resolved is None:
return cls._item_class.from_raw(info, **extra)
else:
return cls._item_class.from_resolved(info, resolved, **extra)
@classmethod
def from_results(cls, results):
self = cls()
for info, resolved in results:
self._add_result(info, resolved)
return self
def __init__(self, items=None):
self._analyzed = {type(self).build_item(item): None
for item in items or ()}
def __repr__(self):
return f'{type(self).__name__}({list(self._analyzed.keys())})'
def __iter__(self):
#yield from self.types
#yield from self.functions
#yield from self.variables
yield from self._analyzed
def __len__(self):
return len(self._analyzed)
def __getitem__(self, key):
if type(key) is int:
for i, val in enumerate(self._analyzed):
if i == key:
return val
else:
raise IndexError(key)
else:
return self._analyzed[key]
def fix_filenames(self, relroot):
for item in self._analyzed:
item.fix_filename(relroot)
def _add_result(self, info, resolved):
analyzed = type(self).build_item(info, resolved)
self._analyzed[analyzed] = None
return analyzed

View File

@ -1,339 +0,0 @@
import re
import shlex
import subprocess
from ..common.info import UNKNOWN
from . import source
IDENTIFIER = r'(?:[a-zA-z]|_+[a-zA-Z0-9]\w*)'
TYPE_QUAL = r'(?:const|volatile)'
VAR_TYPE_SPEC = r'''(?:
void |
(?:
(?:(?:un)?signed\s+)?
(?:
char |
short |
int |
long |
long\s+int |
long\s+long
) |
) |
float |
double |
{IDENTIFIER} |
(?:struct|union)\s+{IDENTIFIER}
)'''
POINTER = rf'''(?:
(?:\s+const)?\s*[*]
)'''
#STRUCT = r'''(?:
# (?:struct|(struct\s+%s))\s*[{]
# [^}]*
# [}]
# )''' % (IDENTIFIER)
#UNION = r'''(?:
# (?:union|(union\s+%s))\s*[{]
# [^}]*
# [}]
# )''' % (IDENTIFIER)
#DECL_SPEC = rf'''(?:
# ({VAR_TYPE_SPEC}) |
# ({STRUCT}) |
# ({UNION})
# )'''
FUNC_START = rf'''(?:
(?:
(?:
extern |
static |
static\s+inline
)\s+
)?
#(?:const\s+)?
{VAR_TYPE_SPEC}
)'''
#GLOBAL_VAR_START = rf'''(?:
# (?:
# (?:
# extern |
# static
# )\s+
# )?
# (?:
# {TYPE_QUAL}
# (?:\s+{TYPE_QUAL})?
# )?\s+
# {VAR_TYPE_SPEC}
# )'''
GLOBAL_DECL_START_RE = re.compile(rf'''
^
(?:
({FUNC_START})
)
''', re.VERBOSE)
LOCAL_VAR_START = rf'''(?:
(?:
(?:
register |
static
)\s+
)?
(?:
(?:
{TYPE_QUAL}
(?:\s+{TYPE_QUAL})?
)\s+
)?
{VAR_TYPE_SPEC}
{POINTER}?
)'''
LOCAL_STMT_START_RE = re.compile(rf'''
^
(?:
({LOCAL_VAR_START})
)
''', re.VERBOSE)
def iter_global_declarations(lines):
"""Yield (decl, body) for each global declaration in the given lines.
For function definitions the header is reduced to one line and
the body is provided as-is. For other compound declarations (e.g.
struct) the entire declaration is reduced to one line and "body"
is None. Likewise for simple declarations (e.g. variables).
Declarations inside function bodies are ignored, though their text
is provided in the function body.
"""
# XXX Bail out upon bogus syntax.
lines = source.iter_clean_lines(lines)
for line in lines:
if not GLOBAL_DECL_START_RE.match(line):
continue
# We only need functions here, since we only need locals for now.
if line.endswith(';'):
continue
if line.endswith('{') and '(' not in line:
continue
# Capture the function.
# (assume no func is a one-liner)
decl = line
while '{' not in line: # assume no inline structs, etc.
try:
line = next(lines)
except StopIteration:
return
decl += ' ' + line
body, end = _extract_block(lines)
if end is None:
return
assert end == '}'
yield (f'{decl}\n{body}\n{end}', body)
def iter_local_statements(lines):
"""Yield (lines, blocks) for each statement in the given lines.
For simple statements, "blocks" is None and the statement is reduced
to a single line. For compound statements, "blocks" is a pair of
(header, body) for each block in the statement. The headers are
reduced to a single line each, but the bpdies are provided as-is.
"""
# XXX Bail out upon bogus syntax.
lines = source.iter_clean_lines(lines)
for line in lines:
if not LOCAL_STMT_START_RE.match(line):
continue
stmt = line
blocks = None
if not line.endswith(';'):
# XXX Support compound & multiline simple statements.
#blocks = []
continue
yield (stmt, blocks)
def _extract_block(lines):
end = None
depth = 1
body = []
for line in lines:
depth += line.count('{') - line.count('}')
if depth == 0:
end = line
break
body.append(line)
return '\n'.join(body), end
def parse_func(stmt, body):
"""Return (name, signature) for the given function definition."""
header, _, end = stmt.partition(body)
assert end.strip() == '}'
assert header.strip().endswith('{')
header, _, _= header.rpartition('{')
signature = ' '.join(header.strip().splitlines())
_, _, name = signature.split('(')[0].strip().rpartition(' ')
assert name
return name, signature
#TYPE_SPEC = rf'''(?:
# )'''
#VAR_DECLARATOR = rf'''(?:
# )'''
#VAR_DECL = rf'''(?:
# {TYPE_SPEC}+
# {VAR_DECLARATOR}
# \s*
# )'''
#VAR_DECLARATION = rf'''(?:
# {VAR_DECL}
# (?: = [^=] [^;]* )?
# ;
# )'''
#
#
#def parse_variable(decl, *, inFunc=False):
# """Return [(name, storage, vartype)] for the given variable declaration."""
# ...
def _parse_var(stmt):
"""Return (name, vartype) for the given variable declaration."""
stmt = stmt.rstrip(';')
m = LOCAL_STMT_START_RE.match(stmt)
assert m
vartype = m.group(0)
name = stmt[len(vartype):].partition('=')[0].strip()
if name.startswith('('):
name, _, after = name[1:].partition(')')
assert after
name = name.replace('*', '* ')
inside, _, name = name.strip().rpartition(' ')
vartype = f'{vartype} ({inside.strip()}){after}'
else:
name = name.replace('*', '* ')
before, _, name = name.rpartition(' ')
vartype = f'{vartype} {before}'
vartype = vartype.strip()
while ' ' in vartype:
vartype = vartype.replace(' ', ' ')
return name, vartype
def extract_storage(decl, *, infunc=None):
"""Return (storage, vartype) based on the given declaration.
The default storage is "implicit" (or "local" if infunc is True).
"""
if decl == UNKNOWN:
return decl
if decl.startswith('static '):
return 'static'
#return 'static', decl.partition(' ')[2].strip()
elif decl.startswith('extern '):
return 'extern'
#return 'extern', decl.partition(' ')[2].strip()
elif re.match('.*\b(static|extern)\b', decl):
raise NotImplementedError
elif infunc:
return 'local'
else:
return 'implicit'
def parse_compound(stmt, blocks):
"""Return (headers, bodies) for the given compound statement."""
# XXX Identify declarations inside compound statements
# (if/switch/for/while).
raise NotImplementedError
def iter_variables(filename, *,
preprocessed=False,
_iter_source_lines=source.iter_lines,
_iter_global=iter_global_declarations,
_iter_local=iter_local_statements,
_parse_func=parse_func,
_parse_var=_parse_var,
_parse_compound=parse_compound,
):
"""Yield (funcname, name, vartype) for every variable in the given file."""
if preprocessed:
raise NotImplementedError
lines = _iter_source_lines(filename)
for stmt, body in _iter_global(lines):
# At the file top-level we only have to worry about vars & funcs.
if not body:
name, vartype = _parse_var(stmt)
if name:
yield (None, name, vartype)
else:
funcname, _ = _parse_func(stmt, body)
localvars = _iter_locals(body,
_iter_statements=_iter_local,
_parse_var=_parse_var,
_parse_compound=_parse_compound,
)
for name, vartype in localvars:
yield (funcname, name, vartype)
def _iter_locals(lines, *,
_iter_statements=iter_local_statements,
_parse_var=_parse_var,
_parse_compound=parse_compound,
):
compound = [lines]
while compound:
body = compound.pop(0)
bodylines = body.splitlines()
for stmt, blocks in _iter_statements(bodylines):
if not blocks:
name, vartype = _parse_var(stmt)
if name:
yield (name, vartype)
else:
headers, bodies = _parse_compound(stmt, blocks)
for header in headers:
for line in header:
name, vartype = _parse_var(line)
if name:
yield (name, vartype)
compound.extend(bodies)
def iter_all(filename, *,
preprocessed=False,
):
"""Yield a Declaration for each one found.
If there are duplicates, due to preprocessor conditionals, then
they are checked to make sure they are the same.
"""
# XXX For the moment we cheat.
for funcname, name, decl in iter_variables(filename,
preprocessed=preprocessed):
yield 'variable', funcname, name, decl

View File

@ -1,107 +0,0 @@
from ..common.info import UNKNOWN, ID
from . import declarations
# XXX need tests:
# * variables
# * variable
# * variable_from_id
def _iter_vars(filenames, preprocessed, *,
handle_id=None,
_iter_decls=declarations.iter_all,
):
if handle_id is None:
handle_id = ID
for filename in filenames or ():
for kind, funcname, name, decl in _iter_decls(filename,
preprocessed=preprocessed,
):
if kind != 'variable':
continue
varid = handle_id(filename, funcname, name)
yield varid, decl
# XXX Add a "handle_var" arg like we did for get_resolver()?
def variables(*filenames,
perfilecache=None,
preprocessed=False,
known=None, # for types
handle_id=None,
_iter_vars=_iter_vars,
):
"""Yield (varid, decl) for each variable found in the given files.
If "preprocessed" is provided (and not False/None) then it is used
to decide which tool to use to parse the source code after it runs
through the C preprocessor. Otherwise the raw
"""
if len(filenames) == 1 and not (filenames[0], str):
filenames, = filenames
if perfilecache is None:
yield from _iter_vars(filenames, preprocessed)
else:
# XXX Cache per-file variables (e.g. `{filename: [(varid, decl)]}`).
raise NotImplementedError
def variable(name, filenames, *,
local=False,
perfilecache=None,
preprocessed=False,
handle_id=None,
_iter_vars=variables,
):
"""Return (varid, decl) for the first found variable that matches.
If "local" is True then the first matching local variable in the
file will always be returned. To avoid that, pass perfilecache and
pop each variable from the cache after using it.
"""
for varid, decl in _iter_vars(filenames,
perfilecache=perfilecache,
preprocessed=preprocessed,
):
if varid.name != name:
continue
if local:
if varid.funcname:
if varid.funcname == UNKNOWN:
raise NotImplementedError
return varid, decl
elif not varid.funcname:
return varid, decl
else:
return None, None # No matching variable was found.
def variable_from_id(id, filenames, *,
perfilecache=None,
preprocessed=False,
handle_id=None,
_get_var=variable,
):
"""Return (varid, decl) for the first found variable that matches."""
local = False
if isinstance(id, str):
name = id
else:
if id.funcname == UNKNOWN:
local = True
elif id.funcname:
raise NotImplementedError
name = id.name
if id.filename and id.filename != UNKNOWN:
filenames = [id.filename]
return _get_var(name, filenames,
local=local,
perfilecache=perfilecache,
preprocessed=preprocessed,
handle_id=handle_id,
)

View File

@ -1,179 +0,0 @@
import re
from ..common.info import UNKNOWN, ID
from .preprocessor import _iter_clean_lines
_NOT_SET = object()
def get_srclines(filename, *,
cache=None,
_open=open,
_iter_lines=_iter_clean_lines,
):
"""Return the file's lines as a list.
Each line will have trailing whitespace removed (including newline).
If a cache is given the it is used.
"""
if cache is not None:
try:
return cache[filename]
except KeyError:
pass
with _open(filename) as srcfile:
srclines = [line
for _, line in _iter_lines(srcfile)
if not line.startswith('#')]
for i, line in enumerate(srclines):
srclines[i] = line.rstrip()
if cache is not None:
cache[filename] = srclines
return srclines
def parse_variable_declaration(srcline):
"""Return (name, decl) for the given declaration line."""
# XXX possible false negatives...
decl, sep, _ = srcline.partition('=')
if not sep:
if not srcline.endswith(';'):
return None, None
decl = decl.strip(';')
decl = decl.strip()
m = re.match(r'.*\b(\w+)\s*(?:\[[^\]]*\])?$', decl)
if not m:
return None, None
name = m.group(1)
return name, decl
def parse_variable(srcline, funcname=None):
"""Return (varid, decl) for the variable declared on the line (or None)."""
line = srcline.strip()
# XXX Handle more than just static variables.
if line.startswith('static '):
if '(' in line and '[' not in line:
# a function
return None, None
return parse_variable_declaration(line)
else:
return None, None
def iter_variables(filename, *,
srccache=None,
parse_variable=None,
_get_srclines=get_srclines,
_default_parse_variable=parse_variable,
):
"""Yield (varid, decl) for each variable in the given source file."""
if parse_variable is None:
parse_variable = _default_parse_variable
indent = ''
prev = ''
funcname = None
for line in _get_srclines(filename, cache=srccache):
# remember current funcname
if funcname:
if line == indent + '}':
funcname = None
continue
else:
if '(' in prev and line == indent + '{':
if not prev.startswith('__attribute__'):
funcname = prev.split('(')[0].split()[-1]
prev = ''
continue
indent = line[:-len(line.lstrip())]
prev = line
info = parse_variable(line, funcname)
if isinstance(info, list):
for name, _funcname, decl in info:
yield ID(filename, _funcname, name), decl
continue
name, decl = info
if name is None:
continue
yield ID(filename, funcname, name), decl
def _match_varid(variable, name, funcname, ignored=None):
if ignored and variable in ignored:
return False
if variable.name != name:
return False
if funcname == UNKNOWN:
if not variable.funcname:
return False
elif variable.funcname != funcname:
return False
return True
def find_variable(filename, funcname, name, *,
ignored=None,
srccache=None, # {filename: lines}
parse_variable=None,
_iter_variables=iter_variables,
):
"""Return the matching variable.
Return None if the variable is not found.
"""
for varid, decl in _iter_variables(filename,
srccache=srccache,
parse_variable=parse_variable,
):
if _match_varid(varid, name, funcname, ignored):
return varid, decl
else:
return None
def find_variables(varids, filenames=None, *,
srccache=_NOT_SET,
parse_variable=None,
_find_symbol=find_variable,
):
"""Yield (varid, decl) for each ID.
If the variable is not found then its decl will be UNKNOWN. That
way there will be one resulting variable per given ID.
"""
if srccache is _NOT_SET:
srccache = {}
used = set()
for varid in varids:
if varid.filename and varid.filename != UNKNOWN:
srcfiles = [varid.filename]
else:
if not filenames:
yield varid, UNKNOWN
continue
srcfiles = filenames
for filename in srcfiles:
varid, decl = _find_varid(filename, varid.funcname, varid.name,
ignored=used,
srccache=srccache,
parse_variable=parse_variable,
)
if varid:
yield varid, decl
used.add(varid)
break
else:
yield varid, UNKNOWN

View File

@ -1,511 +0,0 @@
from collections import namedtuple
import shlex
import os
import re
from ..common import util, info
CONTINUATION = '\\' + os.linesep
IDENTIFIER = r'(?:\w*[a-zA-Z]\w*)'
IDENTIFIER_RE = re.compile('^' + IDENTIFIER + '$')
def _coerce_str(value):
if not value:
return ''
return str(value).strip()
#############################
# directives
DIRECTIVE_START = r'''
(?:
^ \s*
[#] \s*
)'''
DIRECTIVE_TEXT = r'''
(?:
(?: \s+ ( .*\S ) )?
\s* $
)'''
DIRECTIVE = rf'''
(?:
{DIRECTIVE_START}
(
include |
error | warning |
pragma |
define | undef |
if | ifdef | ifndef | elseif | else | endif |
__FILE__ | __LINE__ | __DATE __ | __TIME__ | __TIMESTAMP__
)
{DIRECTIVE_TEXT}
)'''
# (?:
# [^\\\n] |
# \\ [^\n] |
# \\ \n
# )+
# ) \n
# )'''
DIRECTIVE_RE = re.compile(DIRECTIVE, re.VERBOSE)
DEFINE = rf'''
(?:
{DIRECTIVE_START} define \s+
(?:
( \w*[a-zA-Z]\w* )
(?: \s* [(] ([^)]*) [)] )?
)
{DIRECTIVE_TEXT}
)'''
DEFINE_RE = re.compile(DEFINE, re.VERBOSE)
def parse_directive(line):
"""Return the appropriate directive for the given line."""
line = line.strip()
if line.startswith('#'):
line = line[1:].lstrip()
line = '#' + line
directive = line
#directive = '#' + line
while ' ' in directive:
directive = directive.replace(' ', ' ')
return _parse_directive(directive)
def _parse_directive(line):
m = DEFINE_RE.match(line)
if m:
name, args, text = m.groups()
if args:
args = [a.strip() for a in args.split(',')]
return Macro(name, args, text)
else:
return Constant(name, text)
m = DIRECTIVE_RE.match(line)
if not m:
raise ValueError(f'unsupported directive {line!r}')
kind, text = m.groups()
if not text:
if kind not in ('else', 'endif'):
raise ValueError(f'missing text in directive {line!r}')
elif kind in ('else', 'endif', 'define'):
raise ValueError(f'unexpected text in directive {line!r}')
if kind == 'include':
directive = Include(text)
elif kind in IfDirective.KINDS:
directive = IfDirective(kind, text)
else:
directive = OtherDirective(kind, text)
directive.validate()
return directive
class PreprocessorDirective(util._NTBase):
"""The base class for directives."""
__slots__ = ()
KINDS = frozenset([
'include',
'pragma',
'error', 'warning',
'define', 'undef',
'if', 'ifdef', 'ifndef', 'elseif', 'else', 'endif',
'__FILE__', '__DATE__', '__LINE__', '__TIME__', '__TIMESTAMP__',
])
@property
def text(self):
return ' '.join(v for v in self[1:] if v and v.strip()) or None
def validate(self):
"""Fail if the object is invalid (i.e. init with bad data)."""
super().validate()
if not self.kind:
raise TypeError('missing kind')
elif self.kind not in self.KINDS:
raise ValueError
# text can be anything, including None.
class Constant(PreprocessorDirective,
namedtuple('Constant', 'kind name value')):
"""A single "constant" directive ("define")."""
__slots__ = ()
def __new__(cls, name, value=None):
self = super().__new__(
cls,
'define',
name=_coerce_str(name) or None,
value=_coerce_str(value) or None,
)
return self
def validate(self):
"""Fail if the object is invalid (i.e. init with bad data)."""
super().validate()
if not self.name:
raise TypeError('missing name')
elif not IDENTIFIER_RE.match(self.name):
raise ValueError(f'name must be identifier, got {self.name!r}')
# value can be anything, including None
class Macro(PreprocessorDirective,
namedtuple('Macro', 'kind name args body')):
"""A single "macro" directive ("define")."""
__slots__ = ()
def __new__(cls, name, args, body=None):
# "args" must be a string or an iterable of strings (or "empty").
if isinstance(args, str):
args = [v.strip() for v in args.split(',')]
if args:
args = tuple(_coerce_str(a) or None for a in args)
self = super().__new__(
cls,
kind='define',
name=_coerce_str(name) or None,
args=args if args else (),
body=_coerce_str(body) or None,
)
return self
@property
def text(self):
if self.body:
return f'{self.name}({", ".join(self.args)}) {self.body}'
else:
return f'{self.name}({", ".join(self.args)})'
def validate(self):
"""Fail if the object is invalid (i.e. init with bad data)."""
super().validate()
if not self.name:
raise TypeError('missing name')
elif not IDENTIFIER_RE.match(self.name):
raise ValueError(f'name must be identifier, got {self.name!r}')
for arg in self.args:
if not arg:
raise ValueError(f'missing arg in {self.args}')
elif not IDENTIFIER_RE.match(arg):
raise ValueError(f'arg must be identifier, got {arg!r}')
# body can be anything, including None
class IfDirective(PreprocessorDirective,
namedtuple('IfDirective', 'kind condition')):
"""A single conditional directive (e.g. "if", "ifdef").
This only includes directives that actually provide conditions. The
related directives "else" and "endif" are covered by OtherDirective
instead.
"""
__slots__ = ()
KINDS = frozenset([
'if',
'ifdef',
'ifndef',
'elseif',
])
@classmethod
def _condition_from_raw(cls, raw, kind):
#return Condition.from_raw(raw, _kind=kind)
condition = _coerce_str(raw)
if not condition:
return None
if kind == 'ifdef':
condition = f'defined({condition})'
elif kind == 'ifndef':
condition = f'! defined({condition})'
return condition
def __new__(cls, kind, condition):
kind = _coerce_str(kind)
self = super().__new__(
cls,
kind=kind or None,
condition=cls._condition_from_raw(condition, kind),
)
return self
@property
def text(self):
if self.kind == 'ifdef':
return self.condition[8:-1] # strip "defined("
elif self.kind == 'ifndef':
return self.condition[10:-1] # strip "! defined("
else:
return self.condition
#return str(self.condition)
def validate(self):
"""Fail if the object is invalid (i.e. init with bad data)."""
super().validate()
if not self.condition:
raise TypeError('missing condition')
#else:
# for cond in self.condition:
# if not cond:
# raise ValueError(f'missing condition in {self.condition}')
# cond.validate()
# if self.kind in ('ifdef', 'ifndef'):
# if len(self.condition) != 1:
# raise ValueError('too many condition')
# if self.kind == 'ifdef':
# if not self.condition[0].startswith('defined '):
# raise ValueError('bad condition')
# else:
# if not self.condition[0].startswith('! defined '):
# raise ValueError('bad condition')
class Include(PreprocessorDirective,
namedtuple('Include', 'kind file')):
"""A single "include" directive.
Supported "file" values are either follow the bracket style
(<stdio>) or double quotes ("spam.h").
"""
__slots__ = ()
def __new__(cls, file):
self = super().__new__(
cls,
kind='include',
file=_coerce_str(file) or None,
)
return self
def validate(self):
"""Fail if the object is invalid (i.e. init with bad data)."""
super().validate()
if not self.file:
raise TypeError('missing file')
class OtherDirective(PreprocessorDirective,
namedtuple('OtherDirective', 'kind text')):
"""A single directive not covered by another class.
This includes the "else", "endif", and "undef" directives, which are
otherwise inherently related to the directives covered by the
Constant, Macro, and IfCondition classes.
Note that all directives must have a text value, except for "else"
and "endif" (which must have no text).
"""
__slots__ = ()
KINDS = PreprocessorDirective.KINDS - {'include', 'define'} - IfDirective.KINDS
def __new__(cls, kind, text):
self = super().__new__(
cls,
kind=_coerce_str(kind) or None,
text=_coerce_str(text) or None,
)
return self
def validate(self):
"""Fail if the object is invalid (i.e. init with bad data)."""
super().validate()
if self.text:
if self.kind in ('else', 'endif'):
raise ValueError('unexpected text in directive')
elif self.kind not in ('else', 'endif'):
raise TypeError('missing text')
#############################
# iterating lines
def _recompute_conditions(directive, ifstack):
if directive.kind in ('if', 'ifdef', 'ifndef'):
ifstack.append(
([], directive.condition))
elif directive.kind == 'elseif':
if ifstack:
negated, active = ifstack.pop()
if active:
negated.append(active)
else:
negated = []
ifstack.append(
(negated, directive.condition))
elif directive.kind == 'else':
if ifstack:
negated, active = ifstack.pop()
if active:
negated.append(active)
ifstack.append(
(negated, None))
elif directive.kind == 'endif':
if ifstack:
ifstack.pop()
conditions = []
for negated, active in ifstack:
for condition in negated:
conditions.append(f'! ({condition})')
if active:
conditions.append(active)
return tuple(conditions)
def _iter_clean_lines(lines):
lines = iter(enumerate(lines, 1))
for lno, line in lines:
# Handle line continuations.
while line.endswith(CONTINUATION):
try:
lno, _line = next(lines)
except StopIteration:
break
line = line[:-len(CONTINUATION)] + ' ' + _line
# Deal with comments.
after = line
line = ''
while True:
# Look for a comment.
before, begin, remainder = after.partition('/*')
if '//' in before:
before, _, _ = before.partition('//')
line += before + ' ' # per the C99 spec
break
line += before
if not begin:
break
line += ' ' # per the C99 spec
# Go until we find the end of the comment.
_, end, after = remainder.partition('*/')
while not end:
try:
lno, remainder = next(lines)
except StopIteration:
raise Exception('unterminated comment')
_, end, after = remainder.partition('*/')
yield lno, line
def iter_lines(lines, *,
_iter_clean_lines=_iter_clean_lines,
_parse_directive=_parse_directive,
_recompute_conditions=_recompute_conditions,
):
"""Yield (lno, line, directive, active conditions) for each given line.
This is effectively a subset of the operations taking place in
translation phases 2-4 from the C99 spec (ISO/IEC 9899:TC2); see
section 5.1.1.2. Line continuations are removed and comments
replaced with a single space. (In both cases "lno" will be the last
line involved.) Otherwise each line is returned as-is.
"lno" is the (1-indexed) line number for the line.
"directive" will be a PreprocessorDirective or None, depending on
whether or not there is a directive on the line.
"active conditions" is the set of preprocessor conditions (e.g.
"defined()") under which the current line of code will be included
in compilation. That set is derived from every conditional
directive block (e.g. "if defined()", "ifdef", "else") containing
that line. That includes nested directives. Note that the
current line does not affect the active conditions for iteself.
It only impacts subsequent lines. That applies to directives
that close blocks (e.g. "endif") just as much as conditional
directvies. Also note that "else" and "elseif" directives
update the active conditions (for later lines), rather than
adding to them.
"""
ifstack = []
conditions = ()
for lno, line in _iter_clean_lines(lines):
stripped = line.strip()
if not stripped.startswith('#'):
yield lno, line, None, conditions
continue
directive = '#' + stripped[1:].lstrip()
while ' ' in directive:
directive = directive.replace(' ', ' ')
directive = _parse_directive(directive)
yield lno, line, directive, conditions
if directive.kind in ('else', 'endif'):
conditions = _recompute_conditions(directive, ifstack)
elif isinstance(directive, IfDirective):
conditions = _recompute_conditions(directive, ifstack)
#############################
# running (platform-specific?)
def _gcc(filename, *,
_get_argv=(lambda: _get_gcc_argv()),
_run=util.run_cmd,
):
argv = _get_argv()
argv.extend([
'-E', filename,
])
output = _run(argv)
return output
def _get_gcc_argv(*,
_open=open,
_run=util.run_cmd,
):
with _open('/tmp/print.mk', 'w') as tmpfile:
tmpfile.write('print-%:\n')
#tmpfile.write('\t@echo $* = $($*)\n')
tmpfile.write('\t@echo $($*)\n')
argv = ['/usr/bin/make',
'-f', 'Makefile',
'-f', '/tmp/print.mk',
'print-CC',
'print-PY_CORE_CFLAGS',
]
output = _run(argv)
gcc, cflags = output.strip().splitlines()
argv = shlex.split(gcc.strip())
cflags = shlex.split(cflags.strip())
return argv + cflags
def run(filename, *,
_gcc=_gcc,
):
"""Return the text of the given file after running the preprocessor."""
return _gcc(filename)

View File

@ -1,34 +0,0 @@
from . import preprocessor
def iter_clean_lines(lines):
incomment = False
for line in lines:
# Deal with comments.
if incomment:
_, sep, line = line.partition('*/')
if sep:
incomment = False
continue
line, _, _ = line.partition('//')
line, sep, remainder = line.partition('/*')
if sep:
_, sep, after = remainder.partition('*/')
if not sep:
incomment = True
continue
line += ' ' + after
# Ignore blank lines and leading/trailing whitespace.
line = line.strip()
if not line:
continue
yield line
def iter_lines(filename, *,
preprocess=preprocessor.run,
):
content = preprocess(filename)
return iter(content.splitlines())

View File

@ -1,117 +0,0 @@
import os.path
import shutil
from c_analyzer.common import util, info
from .info import Symbol
# XXX need tests:
# * iter_symbols
NM_KINDS = {
'b': Symbol.KIND.VARIABLE, # uninitialized
'd': Symbol.KIND.VARIABLE, # initialized
#'g': Symbol.KIND.VARIABLE, # uninitialized
#'s': Symbol.KIND.VARIABLE, # initialized
't': Symbol.KIND.FUNCTION,
}
SPECIAL_SYMBOLS = {
# binary format (e.g. ELF)
'__bss_start',
'__data_start',
'__dso_handle',
'_DYNAMIC',
'_edata',
'_end',
'__environ@@GLIBC_2.2.5',
'_GLOBAL_OFFSET_TABLE_',
'__JCR_END__',
'__JCR_LIST__',
'__TMC_END__',
}
def _is_special_symbol(name):
if name in SPECIAL_SYMBOLS:
return True
if '@@GLIBC' in name:
return True
return False
def iter_symbols(binfile, *,
nm=None,
handle_id=None,
_which=shutil.which,
_run=util.run_cmd,
):
"""Yield a Symbol for each relevant entry reported by the "nm" command."""
if nm is None:
nm = _which('nm')
if not nm:
raise NotImplementedError
if handle_id is None:
handle_id = info.ID
argv = [nm,
'--line-numbers',
binfile,
]
try:
output = _run(argv)
except Exception:
if nm is None:
# XXX Use dumpbin.exe /SYMBOLS on Windows.
raise NotImplementedError
raise
for line in output.splitlines():
(name, kind, external, filename, funcname,
) = _parse_nm_line(line)
if kind != Symbol.KIND.VARIABLE:
continue
elif _is_special_symbol(name):
continue
yield Symbol(
id=handle_id(filename, funcname, name),
kind=kind,
external=external,
)
def _parse_nm_line(line):
_origline = line
_, _, line = line.partition(' ') # strip off the address
line = line.strip()
kind, _, line = line.partition(' ')
line = line.strip()
external = kind.isupper()
kind = NM_KINDS.get(kind.lower(), Symbol.KIND.OTHER)
name, _, filename = line.partition('\t')
name = name.strip()
if filename:
filename = os.path.relpath(filename.partition(':')[0])
else:
filename = info.UNKNOWN
name, islocal = _parse_nm_name(name, kind)
funcname = info.UNKNOWN if islocal else None
return name, kind, external, filename, funcname
def _parse_nm_name(name, kind):
if kind != Symbol.KIND.VARIABLE:
return name, None
if _is_special_symbol(name):
return name, None
actual, sep, digits = name.partition('.')
if not sep:
return name, False
if not digits.isdigit():
raise Exception(f'got bogus name {name}')
return actual, True

View File

@ -1,175 +0,0 @@
import os
import os.path
import shutil
from ..common import files
from ..common.info import UNKNOWN, ID
from ..parser import find as p_find
from . import _nm
from .info import Symbol
# XXX need tests:
# * get_resolver()
# * get_resolver_from_dirs()
# * symbol()
# * symbols()
# * variables()
def _resolve_known(symbol, knownvars):
for varid in knownvars:
if symbol.match(varid):
break
else:
return None
return knownvars.pop(varid)
def get_resolver(filenames=None, known=None, *,
handle_var,
check_filename=None,
perfilecache=None,
preprocessed=False,
_from_source=p_find.variable_from_id,
):
"""Return a "resolver" func for the given known vars/types and filenames.
"handle_var" is a callable that takes (ID, decl) and returns a
Variable. Variable.from_id is a suitable callable.
The returned func takes a single Symbol and returns a corresponding
Variable. If the symbol was located then the variable will be
valid, populated with the corresponding information. Otherwise None
is returned.
"""
knownvars = (known or {}).get('variables')
if knownvars:
knownvars = dict(knownvars) # a copy
if filenames:
if check_filename is None:
filenames = list(filenames)
def check_filename(filename):
return filename in filenames
def resolve(symbol):
# XXX Check "found" instead?
if not check_filename(symbol.filename):
return None
found = _resolve_known(symbol, knownvars)
if found is None:
#return None
varid, decl = _from_source(symbol, filenames,
perfilecache=perfilecache,
preprocessed=preprocessed,
)
found = handle_var(varid, decl)
return found
else:
def resolve(symbol):
return _resolve_known(symbol, knownvars)
elif filenames:
def resolve(symbol):
varid, decl = _from_source(symbol, filenames,
perfilecache=perfilecache,
preprocessed=preprocessed,
)
return handle_var(varid, decl)
else:
def resolve(symbol):
return None
return resolve
def get_resolver_from_dirs(dirnames, known=None, *,
handle_var,
suffixes=('.c',),
perfilecache=None,
preprocessed=False,
_iter_files=files.iter_files_by_suffix,
_get_resolver=get_resolver,
):
"""Return a "resolver" func for the given known vars/types and filenames.
"dirnames" should be absolute paths. If not then they will be
resolved relative to CWD.
See get_resolver().
"""
dirnames = [d if d.endswith(os.path.sep) else d + os.path.sep
for d in dirnames]
filenames = _iter_files(dirnames, suffixes)
def check_filename(filename):
for dirname in dirnames:
if filename.startswith(dirname):
return True
else:
return False
return _get_resolver(filenames, known,
handle_var=handle_var,
check_filename=check_filename,
perfilecache=perfilecache,
preprocessed=preprocessed,
)
def symbol(symbol, filenames, known=None, *,
perfilecache=None,
preprocessed=False,
handle_id=None,
_get_resolver=get_resolver,
):
"""Return a Variable for the one matching the given symbol.
"symbol" can be one of several objects:
* Symbol - use the contained info
* name (str) - look for a global variable with that name
* (filename, name) - look for named global in file
* (filename, funcname, name) - look for named local in file
A name is always required. If the filename is None, "", or
"UNKNOWN" then all files will be searched. If the funcname is
"" or "UNKNOWN" then only local variables will be searched for.
"""
resolve = _get_resolver(known, filenames,
handle_id=handle_id,
perfilecache=perfilecache,
preprocessed=preprocessed,
)
return resolve(symbol)
def _get_platform_tool():
if os.name == 'nt':
# XXX Support this.
raise NotImplementedError
elif nm := shutil.which('nm'):
return lambda b, hi: _nm.iter_symbols(b, nm=nm, handle_id=hi)
else:
raise NotImplementedError
def symbols(binfile, *,
handle_id=None,
_file_exists=os.path.exists,
_get_platform_tool=_get_platform_tool,
):
"""Yield a Symbol for each one found in the binary."""
if not _file_exists(binfile):
raise Exception('executable missing (need to build it first?)')
_iter_symbols = _get_platform_tool()
yield from _iter_symbols(binfile, handle_id)
def variables(binfile, *,
resolve,
handle_id=None,
_iter_symbols=symbols,
):
"""Yield (Variable, Symbol) for each found symbol."""
for symbol in _iter_symbols(binfile, handle_id=handle_id):
if symbol.kind != Symbol.KIND.VARIABLE:
continue
var = resolve(symbol) or None
yield var, symbol

View File

@ -1,51 +0,0 @@
from collections import namedtuple
from c_analyzer.common.info import ID
from c_analyzer.common.util import classonly, _NTBase
class Symbol(_NTBase, namedtuple('Symbol', 'id kind external')):
"""Info for a single compilation symbol."""
__slots__ = ()
class KIND:
VARIABLE = 'variable'
FUNCTION = 'function'
OTHER = 'other'
@classonly
def from_name(cls, name, filename=None, kind=KIND.VARIABLE, external=None):
"""Return a new symbol based on the given name."""
id = ID(filename, None, name)
return cls(id, kind, external)
def __new__(cls, id, kind=KIND.VARIABLE, external=None):
self = super().__new__(
cls,
id=ID.from_raw(id),
kind=str(kind) if kind else None,
external=bool(external) if external is not None else None,
)
return self
def __hash__(self):
return hash(self.id)
def __getattr__(self, name):
return getattr(self.id, name)
def validate(self):
"""Fail if the object is invalid (i.e. init with bad data)."""
if not self.id:
raise TypeError('missing id')
else:
self.id.validate()
if not self.kind:
raise TypeError('missing kind')
elif self.kind not in vars(self.KIND).values():
raise ValueError(f'unsupported kind {self.kind}')
if self.external is None:
raise TypeError('missing external')

View File

@ -1,75 +0,0 @@
from ..common import files
from ..common.info import UNKNOWN
from ..parser import (
find as p_find,
)
from ..symbols import (
info as s_info,
find as s_find,
)
from .info import Variable
# XXX need tests:
# * vars_from_source
def _remove_cached(cache, var):
if not cache:
return
try:
cached = cache[var.filename]
cached.remove(var)
except (KeyError, IndexError):
pass
def vars_from_binary(binfile, *,
known=None,
filenames=None,
handle_id=None,
check_filename=None,
handle_var=Variable.from_id,
_iter_vars=s_find.variables,
_get_symbol_resolver=s_find.get_resolver,
):
"""Yield a Variable for each found Symbol.
Details are filled in from the given "known" variables and types.
"""
cache = {}
resolve = _get_symbol_resolver(filenames, known,
handle_var=handle_var,
check_filename=check_filename,
perfilecache=cache,
)
for var, symbol in _iter_vars(binfile,
resolve=resolve,
handle_id=handle_id,
):
if var is None:
var = Variable(symbol.id, UNKNOWN, UNKNOWN)
yield var
_remove_cached(cache, var)
def vars_from_source(filenames, *,
preprocessed=None,
known=None,
handle_id=None,
handle_var=Variable.from_id,
iter_vars=p_find.variables,
):
"""Yield a Variable for each declaration in the raw source code.
Details are filled in from the given "known" variables and types.
"""
cache = {}
for varid, decl in iter_vars(filenames or (),
perfilecache=cache,
preprocessed=preprocessed,
known=known,
handle_id=handle_id,
):
var = handle_var(varid, decl)
yield var
_remove_cached(cache, var)

View File

@ -1,93 +0,0 @@
from collections import namedtuple
from ..common.info import ID, UNKNOWN
from ..common.util import classonly, _NTBase
def normalize_vartype(vartype):
"""Return the canonical form for a variable type (or func signature)."""
# We allow empty strring through for semantic reasons.
if vartype is None:
return None
# XXX finish!
# XXX Return (modifiers, type, pointer)?
return str(vartype)
# XXX Variable.vartype -> decl (Declaration).
class Variable(_NTBase,
namedtuple('Variable', 'id storage vartype')):
"""Information about a single variable declaration."""
__slots__ = ()
STORAGE = (
'static',
'extern',
'implicit',
'local',
)
@classonly
def from_parts(cls, filename, funcname, name, decl, storage=None):
varid = ID(filename, funcname, name)
if storage is None:
self = cls.from_id(varid, decl)
else:
self = cls(varid, storage, decl)
return self
@classonly
def from_id(cls, varid, decl):
from ..parser.declarations import extract_storage
storage = extract_storage(decl, infunc=varid.funcname)
return cls(varid, storage, decl)
def __new__(cls, id, storage, vartype):
self = super().__new__(
cls,
id=ID.from_raw(id),
storage=str(storage) if storage else None,
vartype=normalize_vartype(vartype) if vartype else None,
)
return self
def __hash__(self):
return hash(self.id)
def __getattr__(self, name):
return getattr(self.id, name)
def _validate_id(self):
if not self.id:
raise TypeError('missing id')
if not self.filename or self.filename == UNKNOWN:
raise TypeError(f'id missing filename ({self.id})')
if self.funcname and self.funcname == UNKNOWN:
raise TypeError(f'id missing funcname ({self.id})')
self.id.validate()
def validate(self):
"""Fail if the object is invalid (i.e. init with bad data)."""
self._validate_id()
if self.storage is None or self.storage == UNKNOWN:
raise TypeError('missing storage')
elif self.storage not in self.STORAGE:
raise ValueError(f'unsupported storage {self.storage:r}')
if self.vartype is None or self.vartype == UNKNOWN:
raise TypeError('missing vartype')
@property
def isglobal(self):
return self.storage != 'local'
@property
def isconst(self):
return 'const' in self.vartype.split()

View File

@ -1,91 +0,0 @@
import csv
from ..common.info import ID, UNKNOWN
from ..common.util import read_tsv
from .info import Variable
# XXX need tests:
# * read_file()
# * look_up_variable()
COLUMNS = ('filename', 'funcname', 'name', 'kind', 'declaration')
HEADER = '\t'.join(COLUMNS)
def read_file(infile, *,
_read_tsv=read_tsv,
):
"""Yield (kind, id, decl) for each row in the data file.
The caller is responsible for validating each row.
"""
for row in _read_tsv(infile, HEADER):
filename, funcname, name, kind, declaration = row
if not funcname or funcname == '-':
funcname = None
id = ID(filename, funcname, name)
yield kind, id, declaration
def from_file(infile, *,
handle_var=Variable.from_id,
_read_file=read_file,
):
"""Return the info for known declarations in the given file."""
known = {
'variables': {},
#'types': {},
#'constants': {},
#'macros': {},
}
for kind, id, decl in _read_file(infile):
if kind == 'variable':
values = known['variables']
value = handle_var(id, decl)
else:
raise ValueError(f'unsupported kind in row {row}')
value.validate()
values[id] = value
return known
def look_up_variable(varid, knownvars, *,
match_files=(lambda f1, f2: f1 == f2),
):
"""Return the known Variable matching the given ID.
"knownvars" is a mapping of ID to Variable.
"match_files" is used to verify if two filenames point to
the same file.
If no match is found then None is returned.
"""
if not knownvars:
return None
if varid.funcname == UNKNOWN:
if not varid.filename or varid.filename == UNKNOWN:
for varid in knownvars:
if not varid.funcname:
continue
if varid.name == varid.name:
return knownvars[varid]
else:
return None
else:
for varid in knownvars:
if not varid.funcname:
continue
if not match_files(varid.filename, varid.filename):
continue
if varid.name == varid.name:
return knownvars[varid]
else:
return None
elif not varid.filename or varid.filename == UNKNOWN:
raise NotImplementedError
else:
return knownvars.get(varid.id)

View File

@ -0,0 +1,2 @@
NOT_SET = object()

View File

@ -1,70 +1,7 @@
import csv
import subprocess
_NOT_SET = object()
def run_cmd(argv, **kwargs):
proc = subprocess.run(
argv,
#capture_output=True,
#stderr=subprocess.STDOUT,
stdout=subprocess.PIPE,
text=True,
check=True,
**kwargs
)
return proc.stdout
def read_tsv(infile, header, *,
_open=open,
_get_reader=csv.reader,
):
"""Yield each row of the given TSV (tab-separated) file."""
if isinstance(infile, str):
with _open(infile, newline='') as infile:
yield from read_tsv(infile, header,
_open=_open,
_get_reader=_get_reader,
)
return
lines = iter(infile)
# Validate the header.
try:
actualheader = next(lines).strip()
except StopIteration:
actualheader = ''
if actualheader != header:
raise ValueError(f'bad header {actualheader!r}')
for row in _get_reader(lines, delimiter='\t'):
yield tuple(v.strip() for v in row)
def write_tsv(outfile, header, rows, *,
_open=open,
_get_writer=csv.writer,
):
"""Write each of the rows to the given TSV (tab-separated) file."""
if isinstance(outfile, str):
with _open(outfile, 'w', newline='') as outfile:
return write_tsv(outfile, header, rows,
_open=_open,
_get_writer=_get_writer,
)
if isinstance(header, str):
header = header.split('\t')
writer = _get_writer(outfile, delimiter='\t')
writer.writerow(header)
for row in rows:
writer.writerow('' if v is None else str(v)
for v in row)
class Slot:
"""A descriptor that provides a slot.
@ -178,66 +115,3 @@ class classonly:
raise AttributeError(self.name)
# called on the class
return self.getter(None, cls)
class _NTBase:
__slots__ = ()
@classonly
def from_raw(cls, raw):
if not raw:
return None
elif isinstance(raw, cls):
return raw
elif isinstance(raw, str):
return cls.from_string(raw)
else:
if hasattr(raw, 'items'):
return cls(**raw)
try:
args = tuple(raw)
except TypeError:
pass
else:
return cls(*args)
raise NotImplementedError
@classonly
def from_string(cls, value):
"""Return a new instance based on the given string."""
raise NotImplementedError
@classmethod
def _make(cls, iterable): # The default _make() is not subclass-friendly.
return cls.__new__(cls, *iterable)
# XXX Always validate?
#def __init__(self, *args, **kwargs):
# self.validate()
# XXX The default __repr__() is not subclass-friendly (where the name changes).
#def __repr__(self):
# _, _, sig = super().__repr__().partition('(')
# return f'{self.__class__.__name__}({sig}'
# To make sorting work with None:
def __lt__(self, other):
try:
return super().__lt__(other)
except TypeError:
if None in self:
return True
elif None in other:
return False
else:
raise
def validate(self):
return
# XXX Always validate?
#def _replace(self, **kwargs):
# obj = super()._replace(**kwargs)
# obj.validate()
# return obj

View File

@ -0,0 +1,388 @@
import fnmatch
import glob
import os
import os.path
import shutil
import stat
from .iterutil import iter_many
C_SOURCE_SUFFIXES = ('.c', '.h')
def create_backup(old, backup=None):
if isinstance(old, str):
filename = old
else:
filename = getattr(old, 'name', None)
if not filename:
return None
if not backup or backup is True:
backup = f'{filename}.bak'
try:
shutil.copyfile(filename, backup)
except FileNotFoundError as exc:
if exc.filename != filename:
raise # re-raise
backup = None
return backup
##################################
# find files
def match_glob(filename, pattern):
if fnmatch.fnmatch(filename, pattern):
return True
# fnmatch doesn't handle ** quite right. It will not match the
# following:
#
# ('x/spam.py', 'x/**/*.py')
# ('spam.py', '**/*.py')
#
# though it *will* match the following:
#
# ('x/y/spam.py', 'x/**/*.py')
# ('x/spam.py', '**/*.py')
if '**/' not in pattern:
return False
# We only accommodate the single-"**" case.
return fnmatch.fnmatch(filename, pattern.replace('**/', '', 1))
def iter_filenames(filenames, *,
start=None,
include=None,
exclude=None,
):
onempty = Exception('no filenames provided')
for filename, solo in iter_many(filenames, onempty):
check, start = _get_check(filename, start, include, exclude)
yield filename, check, solo
# filenames = iter(filenames or ())
# try:
# first = next(filenames)
# except StopIteration:
# raise Exception('no filenames provided')
# try:
# second = next(filenames)
# except StopIteration:
# check, _ = _get_check(first, start, include, exclude)
# yield first, check, False
# return
#
# check, start = _get_check(first, start, include, exclude)
# yield first, check, True
# check, start = _get_check(second, start, include, exclude)
# yield second, check, True
# for filename in filenames:
# check, start = _get_check(filename, start, include, exclude)
# yield filename, check, True
def expand_filenames(filenames):
for filename in filenames:
# XXX Do we need to use glob.escape (a la commit 9355868458, GH-20994)?
if '**/' in filename:
yield from glob.glob(filename.replace('**/', ''))
yield from glob.glob(filename)
def _get_check(filename, start, include, exclude):
if start and filename != start:
return (lambda: '<skipped>'), start
else:
def check():
if _is_excluded(filename, exclude, include):
return '<excluded>'
return None
return check, None
def _is_excluded(filename, exclude, include):
if include:
for included in include:
if match_glob(filename, included):
return False
return True
elif exclude:
for excluded in exclude:
if match_glob(filename, excluded):
return True
return False
else:
return False
def _walk_tree(root, *,
_walk=os.walk,
):
# A wrapper around os.walk that resolves the filenames.
for parent, _, names in _walk(root):
for name in names:
yield os.path.join(parent, name)
def walk_tree(root, *,
suffix=None,
walk=_walk_tree,
):
"""Yield each file in the tree under the given directory name.
If "suffix" is provided then only files with that suffix will
be included.
"""
if suffix and not isinstance(suffix, str):
raise ValueError('suffix must be a string')
for filename in walk(root):
if suffix and not filename.endswith(suffix):
continue
yield filename
def glob_tree(root, *,
suffix=None,
_glob=glob.iglob,
):
"""Yield each file in the tree under the given directory name.
If "suffix" is provided then only files with that suffix will
be included.
"""
suffix = suffix or ''
if not isinstance(suffix, str):
raise ValueError('suffix must be a string')
for filename in _glob(f'{root}/*{suffix}'):
yield filename
for filename in _glob(f'{root}/**/*{suffix}'):
yield filename
def iter_files(root, suffix=None, relparent=None, *,
get_files=os.walk,
_glob=glob_tree,
_walk=walk_tree,
):
"""Yield each file in the tree under the given directory name.
If "root" is a non-string iterable then do the same for each of
those trees.
If "suffix" is provided then only files with that suffix will
be included.
if "relparent" is provided then it is used to resolve each
filename as a relative path.
"""
if not isinstance(root, str):
roots = root
for root in roots:
yield from iter_files(root, suffix, relparent,
get_files=get_files,
_glob=_glob, _walk=_walk)
return
# Use the right "walk" function.
if get_files in (glob.glob, glob.iglob, glob_tree):
get_files = _glob
else:
_files = _walk_tree if get_files in (os.walk, walk_tree) else get_files
get_files = (lambda *a, **k: _walk(*a, walk=_files, **k))
# Handle a single suffix.
if suffix and not isinstance(suffix, str):
filenames = get_files(root)
suffix = tuple(suffix)
else:
filenames = get_files(root, suffix=suffix)
suffix = None
for filename in filenames:
if suffix and not isinstance(suffix, str): # multiple suffixes
if not filename.endswith(suffix):
continue
if relparent:
filename = os.path.relpath(filename, relparent)
yield filename
def iter_files_by_suffix(root, suffixes, relparent=None, *,
walk=walk_tree,
_iter_files=iter_files,
):
"""Yield each file in the tree that has the given suffixes.
Unlike iter_files(), the results are in the original suffix order.
"""
if isinstance(suffixes, str):
suffixes = [suffixes]
# XXX Ignore repeated suffixes?
for suffix in suffixes:
yield from _iter_files(root, suffix, relparent)
##################################
# file info
# XXX posix-only?
S_IRANY = stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH
S_IWANY = stat.S_IWUSR | stat.S_IWGRP | stat.S_IWOTH
S_IXANY = stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH
def is_readable(file, *, user=None, check=False):
filename, st, mode = _get_file_info(file)
if check:
try:
okay = _check_file(filename, S_IRANY)
except NotImplementedError:
okay = NotImplemented
if okay is not NotImplemented:
return okay
# Fall back to checking the mode.
return _check_mode(st, mode, S_IRANY, user)
def is_writable(file, *, user=None, check=False):
filename, st, mode = _get_file_info(file)
if check:
try:
okay = _check_file(filename, S_IWANY)
except NotImplementedError:
okay = NotImplemented
if okay is not NotImplemented:
return okay
# Fall back to checking the mode.
return _check_mode(st, mode, S_IWANY, user)
def is_executable(file, *, user=None, check=False):
filename, st, mode = _get_file_info(file)
if check:
try:
okay = _check_file(filename, S_IXANY)
except NotImplementedError:
okay = NotImplemented
if okay is not NotImplemented:
return okay
# Fall back to checking the mode.
return _check_mode(st, mode, S_IXANY, user)
def _get_file_info(file):
filename = st = mode = None
if isinstance(file, int):
mode = file
elif isinstance(file, os.stat_result):
st = file
else:
if isinstance(file, str):
filename = file
elif hasattr(file, 'name') and os.path.exists(file.name):
filename = file.name
else:
raise NotImplementedError(file)
st = os.stat(filename)
return filename, st, mode or st.st_mode
def _check_file(filename, check):
if not isinstance(filename, str):
raise Exception(f'filename required to check file, got {filename}')
if check & S_IRANY:
flags = os.O_RDONLY
elif check & S_IWANY:
flags = os.O_WRONLY
elif check & S_IXANY:
# We can worry about S_IXANY later
return NotImplemented
else:
raise NotImplementedError(check)
try:
fd = os.open(filename, flags)
except PermissionError:
return False
# We do not ignore other exceptions.
else:
os.close(fd)
return True
def _get_user_info(user):
import pwd
username = uid = gid = groups = None
if user is None:
uid = os.geteuid()
#username = os.getlogin()
username = pwd.getpwuid(uid)[0]
gid = os.getgid()
groups = os.getgroups()
else:
if isinstance(user, int):
uid = user
entry = pwd.getpwuid(uid)
username = entry.pw_name
elif isinstance(user, str):
username = user
entry = pwd.getpwnam(username)
uid = entry.pw_uid
else:
raise NotImplementedError(user)
gid = entry.pw_gid
os.getgrouplist(username, gid)
return username, uid, gid, groups
def _check_mode(st, mode, check, user):
orig = check
_, uid, gid, groups = _get_user_info(user)
if check & S_IRANY:
check -= S_IRANY
matched = False
if mode & stat.S_IRUSR:
if st.st_uid == uid:
matched = True
if mode & stat.S_IRGRP:
if st.st_uid == gid or st.st_uid in groups:
matched = True
if mode & stat.S_IROTH:
matched = True
if not matched:
return False
if check & S_IWANY:
check -= S_IWANY
matched = False
if mode & stat.S_IWUSR:
if st.st_uid == uid:
matched = True
if mode & stat.S_IWGRP:
if st.st_uid == gid or st.st_uid in groups:
matched = True
if mode & stat.S_IWOTH:
matched = True
if not matched:
return False
if check & S_IXANY:
check -= S_IXANY
matched = False
if mode & stat.S_IXUSR:
if st.st_uid == uid:
matched = True
if mode & stat.S_IXGRP:
if st.st_uid == gid or st.st_uid in groups:
matched = True
if mode & stat.S_IXOTH:
matched = True
if not matched:
return False
if check:
raise NotImplementedError((orig, check))
return True

View File

@ -0,0 +1,48 @@
_NOT_SET = object()
def peek_and_iter(items):
if not items:
return None, None
items = iter(items)
try:
peeked = next(items)
except StopIteration:
return None, None
def chain():
yield peeked
yield from items
return chain(), peeked
def iter_many(items, onempty=None):
if not items:
if onempty is None:
return
if not callable(onempty):
raise onEmpty
items = onempty(items)
yield from iter_many(items, onempty=None)
return
items = iter(items)
try:
first = next(items)
except StopIteration:
if onempty is None:
return
if not callable(onempty):
raise onEmpty
items = onempty(items)
yield from iter_many(items, onempty=None)
else:
try:
second = next(items)
except StopIteration:
yield first, False
return
else:
yield first, True
yield second, True
for item in items:
yield item, True

View File

@ -0,0 +1,63 @@
import logging
import sys
VERBOSITY = 3
# The root logger for the whole top-level package:
_logger = logging.getLogger(__name__.rpartition('.')[0])
def configure_logger(logger, verbosity=VERBOSITY, *,
logfile=None,
maxlevel=logging.CRITICAL,
):
level = max(1, # 0 disables it, so we use the next lowest.
min(maxlevel,
maxlevel - verbosity * 10))
logger.setLevel(level)
#logger.propagate = False
if not logger.handlers:
if logfile:
handler = logging.FileHandler(logfile)
else:
handler = logging.StreamHandler(sys.stdout)
handler.setLevel(level)
#handler.setFormatter(logging.Formatter())
logger.addHandler(handler)
# In case the provided logger is in a sub-package...
if logger is not _logger:
configure_logger(
_logger,
verbosity,
logfile=logfile,
maxlevel=maxlevel,
)
def hide_emit_errors():
"""Ignore errors while emitting log entries.
Rather than printing a message desribing the error, we show nothing.
"""
# For now we simply ignore all exceptions. If we wanted to ignore
# specific ones (e.g. BrokenPipeError) then we would need to use
# a Handler subclass with a custom handleError() method.
orig = logging.raiseExceptions
logging.raiseExceptions = False
def restore():
logging.raiseExceptions = orig
return restore
class Printer:
def __init__(self, verbosity=VERBOSITY):
self.verbosity = verbosity
def info(self, *args, **kwargs):
if self.verbosity < 3:
return
print(*args, **kwargs)

View File

@ -0,0 +1,7 @@
class Labeled:
__slots__ = ('_label',)
def __init__(self, label):
self._label = label
def __repr__(self):
return f'<{self._label}>'

View File

@ -0,0 +1,577 @@
import argparse
import contextlib
import fnmatch
import logging
import os
import os.path
import shutil
import sys
from . import fsutil, strutil, iterutil, logging as loggingutil
def get_prog(spec=None, *, absolute=False, allowsuffix=True):
if spec is None:
_, spec = _find_script()
# This is more natural for prog than __file__ would be.
filename = sys.argv[0]
elif isinstance(spec, str):
filename = os.path.normpath(spec)
spec = None
else:
filename = spec.origin
if _is_standalone(filename):
# Check if "installed".
if allowsuffix or not filename.endswith('.py'):
basename = os.path.basename(filename)
found = shutil.which(basename)
if found:
script = os.path.abspath(filename)
found = os.path.abspath(found)
if os.path.normcase(script) == os.path.normcase(found):
return basename
# It is only "standalone".
if absolute:
filename = os.path.abspath(filename)
return filename
elif spec is not None:
module = spec.name
if module.endswith('.__main__'):
module = module[:-9]
return f'{sys.executable} -m {module}'
else:
if absolute:
filename = os.path.abspath(filename)
return f'{sys.executable} {filename}'
def _find_script():
frame = sys._getframe(2)
while frame.f_globals['__name__'] != '__main__':
frame = frame.f_back
# This should match sys.argv[0].
filename = frame.f_globals['__file__']
# This will be None if -m wasn't used..
spec = frame.f_globals['__spec__']
return filename, spec
def is_installed(filename, *, allowsuffix=True):
if not allowsuffix and filename.endswith('.py'):
return False
filename = os.path.abspath(os.path.normalize(filename))
found = shutil.which(os.path.basename(filename))
if not found:
return False
if found != filename:
return False
return _is_standalone(filename)
def is_standalone(filename):
filename = os.path.abspath(os.path.normalize(filename))
return _is_standalone(filename)
def _is_standalone(filename):
return fsutil.is_executable(filename)
##################################
# logging
VERBOSITY = 3
TRACEBACK = os.environ.get('SHOW_TRACEBACK', '').strip()
TRACEBACK = bool(TRACEBACK and TRACEBACK.upper() not in ('0', 'FALSE', 'NO'))
logger = logging.getLogger(__name__)
def configure_logger(verbosity, logger=None, **kwargs):
if logger is None:
# Configure the root logger.
logger = logging.getLogger()
loggingutil.configure_logger(logger, verbosity, **kwargs)
##################################
# selections
class UnsupportedSelectionError(Exception):
def __init__(self, values, possible):
self.values = tuple(values)
self.possible = tuple(possible)
super().__init__(f'unsupported selections {self.unique}')
@property
def unique(self):
return tuple(sorted(set(self.values)))
def normalize_selection(selected: str, *, possible=None):
if selected in (None, True, False):
return selected
elif isinstance(selected, str):
selected = [selected]
elif not selected:
return ()
unsupported = []
_selected = set()
for item in selected:
if not item:
continue
for value in item.strip().replace(',', ' ').split():
if not value:
continue
# XXX Handle subtraction (leading "-").
if possible and value not in possible and value != 'all':
unsupported.append(value)
_selected.add(value)
if unsupported:
raise UnsupportedSelectionError(unsupported, tuple(possible))
if 'all' in _selected:
return True
return frozenset(selected)
##################################
# CLI parsing helpers
class CLIArgSpec(tuple):
def __new__(cls, *args, **kwargs):
return super().__new__(cls, (args, kwargs))
def __repr__(self):
args, kwargs = self
args = [repr(arg) for arg in args]
for name, value in kwargs.items():
args.append(f'{name}={value!r}')
return f'{type(self).__name__}({", ".join(args)})'
def __call__(self, parser, *, _noop=(lambda a: None)):
self.apply(parser)
return _noop
def apply(self, parser):
args, kwargs = self
parser.add_argument(*args, **kwargs)
def apply_cli_argspecs(parser, specs):
processors = []
for spec in specs:
if callable(spec):
procs = spec(parser)
_add_procs(processors, procs)
else:
args, kwargs = spec
parser.add_argument(args, kwargs)
return processors
def _add_procs(flattened, procs):
# XXX Fail on non-empty, non-callable procs?
if not procs:
return
if callable(procs):
flattened.append(procs)
else:
#processors.extend(p for p in procs if callable(p))
for proc in procs:
_add_procs(flattened, proc)
def add_verbosity_cli(parser):
parser.add_argument('-q', '--quiet', action='count', default=0)
parser.add_argument('-v', '--verbose', action='count', default=0)
def process_args(args):
ns = vars(args)
key = 'verbosity'
if key in ns:
parser.error(f'duplicate arg {key!r}')
ns[key] = max(0, VERBOSITY + ns.pop('verbose') - ns.pop('quiet'))
return key
return process_args
def add_traceback_cli(parser):
parser.add_argument('--traceback', '--tb', action='store_true',
default=TRACEBACK)
parser.add_argument('--no-traceback', '--no-tb', dest='traceback',
action='store_const', const=False)
def process_args(args):
ns = vars(args)
key = 'traceback_cm'
if key in ns:
parser.error(f'duplicate arg {key!r}')
showtb = ns.pop('traceback')
@contextlib.contextmanager
def traceback_cm():
restore = loggingutil.hide_emit_errors()
try:
yield
except BrokenPipeError:
# It was piped to "head" or something similar.
pass
except NotImplementedError:
raise # re-raise
except Exception as exc:
if not showtb:
sys.exit(f'ERROR: {exc}')
raise # re-raise
except KeyboardInterrupt:
if not showtb:
sys.exit('\nINTERRUPTED')
raise # re-raise
except BaseException as exc:
if not showtb:
sys.exit(f'{type(exc).__name__}: {exc}')
raise # re-raise
finally:
restore()
ns[key] = traceback_cm()
return key
return process_args
def add_sepval_cli(parser, opt, dest, choices, *, sep=',', **kwargs):
# if opt is True:
# parser.add_argument(f'--{dest}', action='append', **kwargs)
# elif isinstance(opt, str) and opt.startswith('-'):
# parser.add_argument(opt, dest=dest, action='append', **kwargs)
# else:
# arg = dest if not opt else opt
# kwargs.setdefault('nargs', '+')
# parser.add_argument(arg, dest=dest, action='append', **kwargs)
if not isinstance(opt, str):
parser.error(f'opt must be a string, got {opt!r}')
elif opt.startswith('-'):
parser.add_argument(opt, dest=dest, action='append', **kwargs)
else:
kwargs.setdefault('nargs', '+')
#kwargs.setdefault('metavar', opt.upper())
parser.add_argument(opt, dest=dest, action='append', **kwargs)
def process_args(args):
ns = vars(args)
# XXX Use normalize_selection()?
if isinstance(ns[dest], str):
ns[dest] = [ns[dest]]
selections = []
for many in ns[dest] or ():
for value in many.split(sep):
if value not in choices:
parser.error(f'unknown {dest} {value!r}')
selections.append(value)
ns[dest] = selections
return process_args
def add_files_cli(parser, *, excluded=None, nargs=None):
process_files = add_file_filtering_cli(parser, excluded=excluded)
parser.add_argument('filenames', nargs=nargs or '+', metavar='FILENAME')
return [
process_files,
]
def add_file_filtering_cli(parser, *, excluded=None):
parser.add_argument('--start')
parser.add_argument('--include', action='append')
parser.add_argument('--exclude', action='append')
excluded = tuple(excluded or ())
def process_args(args):
ns = vars(args)
key = 'iter_filenames'
if key in ns:
parser.error(f'duplicate arg {key!r}')
_include = tuple(ns.pop('include') or ())
_exclude = excluded + tuple(ns.pop('exclude') or ())
kwargs = dict(
start=ns.pop('start'),
include=tuple(_parse_files(_include)),
exclude=tuple(_parse_files(_exclude)),
# We use the default for "show_header"
)
ns[key] = (lambda files: fsutil.iter_filenames(files, **kwargs))
return process_args
def _parse_files(filenames):
for filename, _ in strutil.parse_entries(filenames):
yield filename.strip()
def add_failure_filtering_cli(parser, pool, *, default=False):
parser.add_argument('--fail', action='append',
metavar=f'"{{all|{"|".join(sorted(pool))}}},..."')
parser.add_argument('--no-fail', dest='fail', action='store_const', const=())
def process_args(args):
ns = vars(args)
fail = ns.pop('fail')
try:
fail = normalize_selection(fail, possible=pool)
except UnsupportedSelectionError as exc:
parser.error(f'invalid --fail values: {", ".join(exc.unique)}')
else:
if fail is None:
fail = default
if fail is True:
def ignore_exc(_exc):
return False
elif fail is False:
def ignore_exc(_exc):
return True
else:
def ignore_exc(exc):
for err in fail:
if type(exc) == pool[err]:
return False
else:
return True
args.ignore_exc = ignore_exc
return process_args
def add_kind_filtering_cli(parser, *, default=None):
parser.add_argument('--kinds', action='append')
def process_args(args):
ns = vars(args)
kinds = []
for kind in ns.pop('kinds') or default or ():
kinds.extend(kind.strip().replace(',', ' ').split())
if not kinds:
match_kind = (lambda k: True)
else:
included = set()
excluded = set()
for kind in kinds:
if kind.startswith('-'):
kind = kind[1:]
excluded.add(kind)
if kind in included:
included.remove(kind)
else:
included.add(kind)
if kind in excluded:
excluded.remove(kind)
if excluded:
if included:
... # XXX fail?
def match_kind(kind, *, _excluded=excluded):
return kind not in _excluded
else:
def match_kind(kind, *, _included=included):
return kind in _included
args.match_kind = match_kind
return process_args
COMMON_CLI = [
add_verbosity_cli,
add_traceback_cli,
#add_dryrun_cli,
]
def add_commands_cli(parser, commands, *, commonspecs=COMMON_CLI, subset=None):
arg_processors = {}
if isinstance(subset, str):
cmdname = subset
try:
_, argspecs, _ = commands[cmdname]
except KeyError:
raise ValueError(f'unsupported subset {subset!r}')
parser.set_defaults(cmd=cmdname)
arg_processors[cmdname] = _add_cmd_cli(parser, commonspecs, argspecs)
else:
if subset is None:
cmdnames = subset = list(commands)
elif not subset:
raise NotImplementedError
elif isinstance(subset, set):
cmdnames = [k for k in commands if k in subset]
subset = sorted(subset)
else:
cmdnames = [n for n in subset if n in commands]
if len(cmdnames) < len(subset):
bad = tuple(n for n in subset if n not in commands)
raise ValueError(f'unsupported subset {bad}')
common = argparse.ArgumentParser(add_help=False)
common_processors = apply_cli_argspecs(common, commonspecs)
subs = parser.add_subparsers(dest='cmd')
for cmdname in cmdnames:
description, argspecs, _ = commands[cmdname]
sub = subs.add_parser(
cmdname,
description=description,
parents=[common],
)
cmd_processors = _add_cmd_cli(sub, (), argspecs)
arg_processors[cmdname] = common_processors + cmd_processors
return arg_processors
def _add_cmd_cli(parser, commonspecs, argspecs):
processors = []
argspecs = list(commonspecs or ()) + list(argspecs or ())
for argspec in argspecs:
if callable(argspec):
procs = argspec(parser)
_add_procs(processors, procs)
else:
if not argspec:
raise NotImplementedError
args = list(argspec)
if not isinstance(args[-1], str):
kwargs = args.pop()
if not isinstance(args[0], str):
try:
args, = args
except (TypeError, ValueError):
parser.error(f'invalid cmd args {argspec!r}')
else:
kwargs = {}
parser.add_argument(*args, **kwargs)
# There will be nothing to process.
return processors
def _flatten_processors(processors):
for proc in processors:
if proc is None:
continue
if callable(proc):
yield proc
else:
yield from _flatten_processors(proc)
def process_args(args, processors, *, keys=None):
processors = _flatten_processors(processors)
ns = vars(args)
extracted = {}
if keys is None:
for process_args in processors:
for key in process_args(args):
extracted[key] = ns.pop(key)
else:
remainder = set(keys)
for process_args in processors:
hanging = process_args(args)
if isinstance(hanging, str):
hanging = [hanging]
for key in hanging or ():
if key not in remainder:
raise NotImplementedError(key)
extracted[key] = ns.pop(key)
remainder.remove(key)
if remainder:
raise NotImplementedError(sorted(remainder))
return extracted
def process_args_by_key(args, processors, keys):
extracted = process_args(args, processors, keys=keys)
return [extracted[key] for key in keys]
##################################
# commands
def set_command(name, add_cli):
"""A decorator factory to set CLI info."""
def decorator(func):
if hasattr(func, '__cli__'):
raise Exception(f'already set')
func.__cli__ = (name, add_cli)
return func
return decorator
##################################
# main() helpers
def filter_filenames(filenames, iter_filenames=None):
for filename, check, _ in _iter_filenames(filenames, iter_filenames):
if (reason := check()):
logger.debug(f'{filename}: {reason}')
continue
yield filename
def main_for_filenames(filenames, iter_filenames=None):
for filename, check, show in _iter_filenames(filenames, iter_filenames):
if show:
print()
print('-------------------------------------------')
print(filename)
if (reason := check()):
print(reason)
continue
yield filename
def _iter_filenames(filenames, iter_files):
if iter_files is None:
iter_files = fsutil.iter_filenames
yield from iter_files(filenames)
return
onempty = Exception('no filenames provided')
items = iter_files(filenames)
items, peeked = iterutil.peek_and_iter(items)
if not items:
raise onempty
if isinstance(peeked, str):
check = (lambda: True)
for filename, ismany in iterutil.iter_many(items, onempty):
yield filename, check, ismany
elif len(peeked) == 3:
yield from items
else:
raise NotImplementedError
def iter_marks(mark='.', *, group=5, groups=2, lines=10, sep=' '):
mark = mark or ''
sep = f'{mark}{sep}' if sep else mark
end = f'{mark}{os.linesep}'
div = os.linesep
perline = group * groups
perlines = perline * lines
if perline == 1:
yield end
elif group == 1:
yield sep
count = 1
while True:
if count % perline == 0:
yield end
if count % perlines == 0:
yield div
elif count % group == 0:
yield sep
else:
yield mark
count += 1

View File

@ -0,0 +1,42 @@
import logging
logger = logging.getLogger(__name__)
def unrepr(value):
raise NotImplementedError
def parse_entries(entries, *, ignoresep=None):
for entry in entries:
if ignoresep and ignoresep in entry:
subentries = [entry]
else:
subentries = entry.strip().replace(',', ' ').split()
for item in subentries:
if item.startswith('+'):
filename = item[1:]
try:
infile = open(filename)
except FileNotFoundError:
logger.debug(f'ignored in parse_entries(): +{filename}')
return
with infile:
# We read the entire file here to ensure the file
# gets closed sooner rather than later. Note that
# the file would stay open if this iterator is never
# exchausted.
lines = infile.read().splitlines()
for line in _iter_significant_lines(lines):
yield line, filename
else:
yield item, None
def _iter_significant_lines(lines):
for line in lines:
line = line.partition('#')[0]
if not line.strip():
continue
yield line

View File

@ -0,0 +1,213 @@
import csv
from . import NOT_SET, strutil, fsutil
EMPTY = '-'
UNKNOWN = '???'
def parse_markers(markers, default=None):
if markers is NOT_SET:
return default
if not markers:
return None
if type(markers) is not str:
return markers
if markers == markers[0] * len(markers):
return [markers]
return list(markers)
def fix_row(row, **markers):
if isinstance(row, str):
raise NotImplementedError(row)
empty = parse_markers(markers.pop('empty', ('-',)))
unknown = parse_markers(markers.pop('unknown', ('???',)))
row = (val if val else None for val in row)
if not empty:
if not unknown:
return row
return (UNKNOWN if val in unknown else val for val in row)
elif not unknown:
return (EMPTY if val in empty else val for val in row)
return (EMPTY if val in empty else (UNKNOWN if val in unknown else val)
for val in row)
def _fix_read_default(row):
for value in row:
yield value.strip()
def _fix_write_default(row, empty=''):
for value in row:
yield empty if value is None else str(value)
def _normalize_fix_read(fix):
if fix is None:
fix = ''
if callable(fix):
def fix_row(row):
values = fix(row)
return _fix_read_default(values)
elif isinstance(fix, str):
def fix_row(row):
values = _fix_read_default(row)
return (None if v == fix else v
for v in values)
else:
raise NotImplementedError(fix)
return fix_row
def _normalize_fix_write(fix, empty=''):
if fix is None:
fix = empty
if callable(fix):
def fix_row(row):
values = fix(row)
return _fix_write_default(values, empty)
elif isinstance(fix, str):
def fix_row(row):
return _fix_write_default(row, fix)
else:
raise NotImplementedError(fix)
return fix_row
def read_table(infile, header, *,
sep='\t',
fix=None,
_open=open,
_get_reader=csv.reader,
):
"""Yield each row of the given ???-separated (e.g. tab) file."""
if isinstance(infile, str):
with _open(infile, newline='') as infile:
yield from read_table(
infile,
header,
sep=sep,
fix=fix,
_open=_open,
_get_reader=_get_reader,
)
return
lines = strutil._iter_significant_lines(infile)
# Validate the header.
if not isinstance(header, str):
header = sep.join(header)
try:
actualheader = next(lines).strip()
except StopIteration:
actualheader = ''
if actualheader != header:
raise ValueError(f'bad header {actualheader!r}')
fix_row = _normalize_fix_read(fix)
for row in _get_reader(lines, delimiter=sep or '\t'):
yield tuple(fix_row(row))
def write_table(outfile, header, rows, *,
sep='\t',
fix=None,
backup=True,
_open=open,
_get_writer=csv.writer,
):
"""Write each of the rows to the given ???-separated (e.g. tab) file."""
if backup:
fsutil.create_backup(outfile, backup)
if isinstance(outfile, str):
with _open(outfile, 'w', newline='') as outfile:
return write_table(
outfile,
header,
rows,
sep=sep,
fix=fix,
backup=backup,
_open=_open,
_get_writer=_get_writer,
)
if isinstance(header, str):
header = header.split(sep or '\t')
fix_row = _normalize_fix_write(fix)
writer = _get_writer(outfile, delimiter=sep or '\t')
writer.writerow(header)
for row in rows:
writer.writerow(
tuple(fix_row(row))
)
def parse_table(entries, sep, header=None, rawsep=None, *,
default=NOT_SET,
strict=True,
):
header, sep = _normalize_table_file_props(header, sep)
if not sep:
raise ValueError('missing "sep"')
ncols = None
if header:
if strict:
ncols = len(header.split(sep))
cur_file = None
for line, filename in strutil.parse_entries(entries, ignoresep=sep):
_sep = sep
if filename:
if header and cur_file != filename:
cur_file = filename
# Skip the first line if it's the header.
if line.strip() == header:
continue
else:
# We expected the header.
raise NotImplementedError((header, line))
elif rawsep and sep not in line:
_sep = rawsep
row = _parse_row(line, _sep, ncols, default)
if strict and not ncols:
ncols = len(row)
yield row, filename
def parse_row(line, sep, *, ncols=None, default=NOT_SET):
if not sep:
raise ValueError('missing "sep"')
return _parse_row(line, sep, ncols, default)
def _parse_row(line, sep, ncols, default):
row = tuple(v.strip() for v in line.split(sep))
if (ncols or 0) > 0:
diff = ncols - len(row)
if diff:
if default is NOT_SET or diff < 0:
raise Exception(f'bad row (expected {ncols} columns, got {row!r})')
row += (default,) * diff
return row
def _normalize_table_file_props(header, sep):
if not header:
return None, sep
if not isinstance(header, str):
if not sep:
raise NotImplementedError(header)
header = sep.join(header)
elif not sep:
for sep in ('\t', ',', ' '):
if sep in header:
break
else:
sep = None
return header, sep

View File

@ -0,0 +1,46 @@
from .parser import parse as _parse
from .preprocessor import get_preprocessor as _get_preprocessor
def parse_file(filename, *,
match_kind=None,
get_file_preprocessor=None,
):
if get_file_preprocessor is None:
get_file_preprocessor = _get_preprocessor()
yield from _parse_file(filename, match_kind, get_file_preprocessor)
def parse_files(filenames, *,
match_kind=None,
get_file_preprocessor=None,
):
if get_file_preprocessor is None:
get_file_preprocessor = _get_preprocessor()
for filename in filenames:
yield from _parse_file(filename, match_kind, get_file_preprocessor)
def _parse_file(filename, match_kind, get_file_preprocessor):
# Preprocess the file.
preprocess = get_file_preprocessor(filename)
preprocessed = preprocess()
if preprocessed is None:
return
# Parse the lines.
srclines = ((l.file, l.data) for l in preprocessed if l.kind == 'source')
for item in _parse(srclines):
if match_kind is not None and not match_kind(item.kind):
continue
if not item.filename:
raise NotImplementedError(repr(item))
yield item
def parse_signature(text):
raise NotImplementedError
# aliases
from .info import resolve_parsed

View File

@ -0,0 +1,261 @@
import logging
import os.path
import sys
from c_common.scriptutil import (
CLIArgSpec as Arg,
add_verbosity_cli,
add_traceback_cli,
add_kind_filtering_cli,
add_files_cli,
add_commands_cli,
process_args_by_key,
configure_logger,
get_prog,
main_for_filenames,
)
from .preprocessor import get_preprocessor
from .preprocessor.__main__ import (
add_common_cli as add_preprocessor_cli,
)
from .info import KIND
from . import parse_file as _iter_parsed
logger = logging.getLogger(__name__)
def _format_vartype(vartype):
if isinstance(vartype, str):
return vartype
data = vartype
try:
vartype = data['vartype']
except KeyError:
storage, typequal, typespec, abstract = vartype.values()
else:
storage = data.get('storage')
if storage:
_, typequal, typespec, abstract = vartype.values()
else:
storage, typequal, typespec, abstract = vartype.values()
vartype = f'{typespec} {abstract}'
if typequal:
vartype = f'{typequal} {vartype}'
if storage:
vartype = f'{storage} {vartype}'
return vartype
def _get_preprocessor(filename, **kwargs):
return get_processor(filename,
log_err=print,
**kwargs
)
#######################################
# the formats
def fmt_raw(filename, item, *, showfwd=None):
yield str(tuple(item))
def fmt_summary(filename, item, *, showfwd=None):
if item.filename and item.filename != os.path.join('.', filename):
yield f'> {item.filename}'
if showfwd is None:
LINE = ' {lno:>5} {kind:10} {funcname:40} {fwd:1} {name:40} {data}'
else:
LINE = ' {lno:>5} {kind:10} {funcname:40} {name:40} {data}'
lno = kind = funcname = fwd = name = data = ''
MIN_LINE = len(LINE.format(**locals()))
fileinfo, kind, funcname, name, data = item
lno = fileinfo.lno if fileinfo and fileinfo.lno >= 0 else ''
funcname = funcname or ' --'
name = name or ' --'
isforward = False
if kind is KIND.FUNCTION:
storage, inline, params, returntype, isforward = data.values()
returntype = _format_vartype(returntype)
data = returntype + params
if inline:
data = f'inline {data}'
if storage:
data = f'{storage} {data}'
elif kind is KIND.VARIABLE:
data = _format_vartype(data)
elif kind is KIND.STRUCT or kind is KIND.UNION:
if data is None:
isforward = True
else:
fields = data
data = f'({len(data)}) {{ '
indent = ',\n' + ' ' * (MIN_LINE + len(data))
data += ', '.join(f.name for f in fields[:5])
fields = fields[5:]
while fields:
data = f'{data}{indent}{", ".join(f.name for f in fields[:5])}'
fields = fields[5:]
data += ' }'
elif kind is KIND.ENUM:
if data is None:
isforward = True
else:
names = [d if isinstance(d, str) else d.name
for d in data]
data = f'({len(data)}) {{ '
indent = ',\n' + ' ' * (MIN_LINE + len(data))
data += ', '.join(names[:5])
names = names[5:]
while names:
data = f'{data}{indent}{", ".join(names[:5])}'
names = names[5:]
data += ' }'
elif kind is KIND.TYPEDEF:
data = f'typedef {data}'
elif kind == KIND.STATEMENT:
pass
else:
raise NotImplementedError(item)
if isforward:
fwd = '*'
if not showfwd and showfwd is not None:
return
elif showfwd:
return
kind = kind.value
yield LINE.format(**locals())
def fmt_full(filename, item, *, showfwd=None):
raise NotImplementedError
FORMATS = {
'raw': fmt_raw,
'summary': fmt_summary,
'full': fmt_full,
}
def add_output_cli(parser):
parser.add_argument('--format', dest='fmt', default='summary', choices=tuple(FORMATS))
parser.add_argument('--showfwd', action='store_true', default=None)
parser.add_argument('--no-showfwd', dest='showfwd', action='store_false', default=None)
def process_args(args):
pass
return process_args
#######################################
# the commands
def _cli_parse(parser, excluded=None, **prepr_kwargs):
process_output = add_output_cli(parser)
process_kinds = add_kind_filtering_cli(parser)
process_preprocessor = add_preprocessor_cli(parser, **prepr_kwargs)
process_files = add_files_cli(parser, excluded=excluded)
return [
process_output,
process_kinds,
process_preprocessor,
process_files,
]
def cmd_parse(filenames, *,
fmt='summary',
showfwd=None,
iter_filenames=None,
**kwargs
):
if 'get_file_preprocessor' not in kwargs:
kwargs['get_file_preprocessor'] = _get_preprocessor()
try:
do_fmt = FORMATS[fmt]
except KeyError:
raise ValueError(f'unsupported fmt {fmt!r}')
for filename in main_for_filenames(filenames, iter_filenames):
for item in _iter_parsed(filename, **kwargs):
for line in do_fmt(filename, item, showfwd=showfwd):
print(line)
def _cli_data(parser):
...
return []
def cmd_data(filenames,
**kwargs
):
# XXX
raise NotImplementedError
COMMANDS = {
'parse': (
'parse the given C source & header files',
[_cli_parse],
cmd_parse,
),
'data': (
'check/manage local data (e.g. excludes, macros)',
[_cli_data],
cmd_data,
),
}
#######################################
# the script
def parse_args(argv=sys.argv[1:], prog=sys.argv[0], *, subset='parse'):
import argparse
parser = argparse.ArgumentParser(
prog=prog or get_prog,
)
processors = add_commands_cli(
parser,
commands={k: v[1] for k, v in COMMANDS.items()},
commonspecs=[
add_verbosity_cli,
add_traceback_cli,
],
subset=subset,
)
args = parser.parse_args(argv)
ns = vars(args)
cmd = ns.pop('cmd')
verbosity, traceback_cm = process_args_by_key(
args,
processors[cmd],
['verbosity', 'traceback_cm'],
)
return cmd, ns, verbosity, traceback_cm
def main(cmd, cmd_kwargs):
try:
run_cmd = COMMANDS[cmd][0]
except KeyError:
raise ValueError(f'unsupported cmd {cmd!r}')
run_cmd(**cmd_kwargs)
if __name__ == '__main__':
cmd, cmd_kwargs, verbosity, traceback_cm = parse_args()
configure_logger(verbosity)
with traceback_cm:
main(cmd, cmd_kwargs)

View File

@ -0,0 +1,244 @@
f'''
struct {ANON_IDENTIFIER};
struct {{ ... }}
struct {IDENTIFIER} {{ ... }}
union {ANON_IDENTIFIER};
union {{ ... }}
union {IDENTIFIER} {{ ... }}
enum {ANON_IDENTIFIER};
enum {{ ... }}
enum {IDENTIFIER} {{ ... }}
typedef {VARTYPE} {IDENTIFIER};
typedef {IDENTIFIER};
typedef {IDENTIFIER};
typedef {IDENTIFIER};
'''
def parse(srclines):
if isinstance(srclines, str): # a filename
raise NotImplementedError
# This only handles at most 10 nested levels.
#MATCHED_PARENS = textwrap.dedent(rf'''
# # matched parens
# (?:
# [(] # level 0
# (?:
# [^()]*
# [(] # level 1
# (?:
# [^()]*
# [(] # level 2
# (?:
# [^()]*
# [(] # level 3
# (?:
# [^()]*
# [(] # level 4
# (?:
# [^()]*
# [(] # level 5
# (?:
# [^()]*
# [(] # level 6
# (?:
# [^()]*
# [(] # level 7
# (?:
# [^()]*
# [(] # level 8
# (?:
# [^()]*
# [(] # level 9
# (?:
# [^()]*
# [(] # level 10
# [^()]*
# [)]
# )*
# [^()]*
# [)]
# )*
# [^()]*
# [)]
# )*
# [^()]*
# [)]
# )*
# [^()]*
# [)]
# )*
# [^()]*
# [)]
# )*
# [^()]*
# [)]
# )*
# [^()]*
# [)]
# )*
# [^()]*
# [)]
# )*
# [^()]*
# [)]
# )*
# [^()]*
# [)]
# )
# # end matched parens
# ''')
'''
# for loop
(?:
\s* \b for
\s* [(]
(
[^;]* ;
[^;]* ;
.*?
) # <header>
[)]
\s*
(?:
(?:
(
{_ind(SIMPLE_STMT, 6)}
) # <stmt>
;
)
|
( {{ ) # <open>
)
)
|
(
(?:
(?:
(?:
{_ind(SIMPLE_STMT, 6)}
)?
return \b \s*
{_ind(INITIALIZER, 5)}
)
|
(?:
(?:
{IDENTIFIER} \s*
(?: . | -> ) \s*
)*
{IDENTIFIER}
\s* = \s*
{_ind(INITIALIZER, 5)}
)
|
(?:
{_ind(SIMPLE_STMT, 5)}
)
)
|
# cast compound literal
(?:
(?:
[^'"{{}};]*
{_ind(STRING_LITERAL, 5)}
)*
[^'"{{}};]*?
[^'"{{}};=]
=
\s* [(] [^)]* [)]
\s* {{ [^;]* }}
)
) # <stmt>
# compound statement
(?:
(
(?:
# "for" statements are handled separately above.
(?: (?: else \s+ )? if | switch | while ) \s*
{_ind(COMPOUND_HEAD, 5)}
)
|
(?: else | do )
# We do not worry about compound statements for labels,
# "case", or "default".
)? # <header>
\s*
( {{ ) # <open>
)
(
(?:
[^'"{{}};]*
{_ind(STRING_LITERAL, 5)}
)*
[^'"{{}};]*
# Presumably we will not see "== {{".
[^\s='"{{}};]
)? # <header>
(
\b
(?:
# We don't worry about labels with a compound statement.
(?:
switch \s* [(] [^{{]* [)]
)
|
(?:
case \b \s* [^:]+ [:]
)
|
(?:
default \s* [:]
)
|
(?:
do
)
|
(?:
while \s* [(] [^{{]* [)]
)
|
#(?:
# for \s* [(] [^{{]* [)]
# )
#|
(?:
if \s* [(]
(?: [^{{]* [^)] \s* {{ )* [^{{]*
[)]
)
|
(?:
else
(?:
\s*
if \s* [(]
(?: [^{{]* [^)] \s* {{ )* [^{{]*
[)]
)?
)
)
)? # <header>
'''

View File

@ -0,0 +1,150 @@
import os.path
import c_common.tables as _tables
import c_parser.info as _info
BASE_COLUMNS = [
'filename',
'funcname',
'name',
'kind',
]
END_COLUMNS = {
'parsed': 'data',
'decls': 'declaration',
}
def _get_columns(group, extra=None):
return BASE_COLUMNS + list(extra or ()) + [END_COLUMNS[group]]
#return [
# *BASE_COLUMNS,
# *extra or (),
# END_COLUMNS[group],
#]
#############################
# high-level
def read_parsed(infile):
# XXX Support other formats than TSV?
columns = _get_columns('parsed')
for row in _tables.read_table(infile, columns, sep='\t', fix='-'):
yield _info.ParsedItem.from_row(row, columns)
def write_parsed(items, outfile):
# XXX Support other formats than TSV?
columns = _get_columns('parsed')
rows = (item.as_row(columns) for item in items)
_tables.write_table(outfile, columns, rows, sep='\t', fix='-')
def read_decls(infile, fmt=None):
if fmt is None:
fmt = _get_format(infile)
read_all, _ = _get_format_handlers('decls', fmt)
for decl, _ in read_all(infile):
yield decl
def write_decls(decls, outfile, fmt=None, *, backup=False):
if fmt is None:
fmt = _get_format(infile)
_, write_all = _get_format_handlers('decls', fmt)
write_all(decls, outfile, backup=backup)
#############################
# formats
def _get_format(file, default='tsv'):
if isinstance(file, str):
filename = file
else:
filename = getattr(file, 'name', '')
_, ext = os.path.splitext(filename)
return ext[1:] if ext else default
def _get_format_handlers(group, fmt):
# XXX Use a registry.
if group != 'decls':
raise NotImplementedError(group)
if fmt == 'tsv':
return (_iter_decls_tsv, _write_decls_tsv)
else:
raise NotImplementedError(fmt)
# tsv
def iter_decls_tsv(infile, extracolumns=None, relroot=None):
for info, extra in _iter_decls_tsv(infile, extracolumns, relroot):
decl = _info.Declaration.from_row(info)
yield decl, extra
def write_decls_tsv(decls, outfile, extracolumns=None, *,
relroot=None,
**kwargs
):
# XXX Move the row rendering here.
_write_decls_tsv(rows, outfile, extracolumns, relroot, kwargs)
def _iter_decls_tsv(infile, extracolumns=None, relroot=None):
columns = _get_columns('decls', extracolumns)
for row in _tables.read_table(infile, columns, sep='\t'):
if extracolumns:
declinfo = row[:4] + row[-1:]
extra = row[4:-1]
else:
declinfo = row
extra = None
if relroot:
# XXX Use something like tables.fix_row() here.
declinfo = [None if v == '-' else v
for v in declinfo]
declinfo[0] = os.path.join(relroot, declinfo[0])
yield declinfo, extra
def _write_decls_tsv(decls, outfile, extracolumns, relroot,kwargs):
columns = _get_columns('decls', extracolumns)
if extracolumns:
def render_decl(decl):
if type(row) is tuple:
decl, *extra = decl
else:
extra = ()
extra += ('???',) * (len(extraColumns) - len(extra))
*row, declaration = _render_known_row(decl, relroot)
row += extra + (declaration,)
return row
else:
render_decl = _render_known_decl
_tables.write_table(
outfile,
header='\t'.join(columns),
rows=(render_decl(d, relroot) for d in decls),
sep='\t',
**kwargs
)
def _render_known_decl(decl, relroot, *,
# These match BASE_COLUMNS + END_COLUMNS[group].
_columns = 'filename parent name kind data'.split(),
):
if not isinstance(decl, _info.Declaration):
# e.g. Analyzed
decl = decl.decl
rowdata = decl.render_rowdata(_columns)
if relroot:
rowdata['filename'] = os.path.relpath(rowdata['filename'], relroot)
return [rowdata[c] or '-' for c in _columns]
# XXX
#return _tables.fix_row(rowdata[c] for c in columns)

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,212 @@
"""A simple non-validating parser for C99.
The functions and regex patterns here are not entirely suitable for
validating C syntax. Please rely on a proper compiler for that.
Instead our goal here is merely matching and extracting information from
valid C code.
Furthermore, the grammar rules for the C syntax (particularly as
described in the K&R book) actually describe a superset, of which the
full C langage is a proper subset. Here are some of the extra
conditions that must be applied when parsing C code:
* ...
(see: http://www.open-std.org/jtc1/sc22/wg14/www/docs/n1256.pdf)
We have taken advantage of the elements of the C grammar that are used
only in a few limited contexts, mostly as delimiters. They allow us to
focus the regex patterns confidently. Here are the relevant tokens and
in which grammar rules they are used:
separators:
* ";"
+ (decl) struct/union: at end of each member decl
+ (decl) declaration: at end of each (non-compound) decl
+ (stmt) expr stmt: at end of each stmt
+ (stmt) for: between exprs in "header"
+ (stmt) goto: at end
+ (stmt) continue: at end
+ (stmt) break: at end
+ (stmt) return: at end
* ","
+ (decl) struct/union: between member declators
+ (decl) param-list: between params
+ (decl) enum: between enumerators
+ (decl) initializer (compound): between initializers
+ (expr) postfix: between func call args
+ (expr) expression: between "assignment" exprs
* ":"
+ (decl) struct/union: in member declators
+ (stmt) label: between label and stmt
+ (stmt) case: between expression and stmt
+ (stmt) default: between "default" and stmt
* "="
+ (decl) delaration: between decl and initializer
+ (decl) enumerator: between identifier and "initializer"
+ (expr) assignment: between "var" and expr
wrappers:
* "(...)"
+ (decl) declarator (func ptr): to wrap ptr/name
+ (decl) declarator (func ptr): around params
+ (decl) declarator: around sub-declarator (for readability)
+ (expr) postfix (func call): around args
+ (expr) primary: around sub-expr
+ (stmt) if: around condition
+ (stmt) switch: around source expr
+ (stmt) while: around condition
+ (stmt) do-while: around condition
+ (stmt) for: around "header"
* "{...}"
+ (decl) enum: around enumerators
+ (decl) func: around body
+ (stmt) compound: around stmts
* "[...]"
* (decl) declarator: for arrays
* (expr) postfix: array access
other:
* "*"
+ (decl) declarator: for pointer types
+ (expr) unary: for pointer deref
To simplify the regular expressions used here, we've takens some
shortcuts and made certain assumptions about the code we are parsing.
Some of these allow us to skip context-sensitive matching (e.g. braces)
or otherwise still match arbitrary C code unambiguously. However, in
some cases there are certain corner cases where the patterns are
ambiguous relative to arbitrary C code. However, they are still
unambiguous in the specific code we are parsing.
Here are the cases where we've taken shortcuts or made assumptions:
* there is no overlap syntactically between the local context (func
bodies) and the global context (other than variable decls), so we
do not need to worry about ambiguity due to the overlap:
+ the global context has no expressions or statements
+ the local context has no function definitions or type decls
* no "inline" type declarations (struct, union, enum) in function
parameters ~(including function pointers)~
* no "inline" type decls in function return types
* no superflous parentheses in declarators
* var decls in for loops are always "simple" (e.g. no inline types)
* only inline struct/union/enum decls may be anonymouns (without a name)
* no function pointers in function pointer parameters
* for loop "headers" do not have curly braces (e.g. compound init)
* syntactically, variable decls do not overlap with stmts/exprs, except
in the following case:
spam (*eggs) (...)
This could be either a function pointer variable named "eggs"
or a call to a function named "spam", which returns a function
pointer that gets called. The only differentiator is the
syntax used in the "..." part. It will be comma-separated
parameters for the former and comma-separated expressions for
the latter. Thus, if we expect such decls or calls then we must
parse the decl params.
"""
"""
TODO:
* extract CPython-specific code
* drop include injection (or only add when needed)
* track position instead of slicing "text"
* Parser class instead of the _iter_source() mess
* alt impl using a state machine (& tokenizer or split on delimiters)
"""
from ..info import ParsedItem
from ._info import SourceInfo
def parse(srclines):
if isinstance(srclines, str): # a filename
raise NotImplementedError
anon_name = anonymous_names()
for result in _parse(srclines, anon_name):
yield ParsedItem.from_raw(result)
# XXX Later: Add a separate function to deal with preprocessor directives
# parsed out of raw source.
def anonymous_names():
counter = 1
def anon_name(prefix='anon-'):
nonlocal counter
name = f'{prefix}{counter}'
counter += 1
return name
return anon_name
#############################
# internal impl
import logging
_logger = logging.getLogger(__name__)
def _parse(srclines, anon_name):
from ._global import parse_globals
source = _iter_source(srclines)
#source = _iter_source(srclines, showtext=True)
for result in parse_globals(source, anon_name):
# XXX Handle blocks here insted of in parse_globals().
yield result
def _iter_source(lines, *, maxtext=20_000, maxlines=700, showtext=False):
filestack = []
allinfo = {}
# "lines" should be (fileinfo, data), as produced by the preprocessor code.
for fileinfo, line in lines:
if fileinfo.filename in filestack:
while fileinfo.filename != filestack[-1]:
filename = filestack.pop()
del allinfo[filename]
filename = fileinfo.filename
srcinfo = allinfo[filename]
else:
filename = fileinfo.filename
srcinfo = SourceInfo(filename)
filestack.append(filename)
allinfo[filename] = srcinfo
_logger.debug(f'-> {line}')
srcinfo._add_line(line, fileinfo.lno)
if len(srcinfo.text) > maxtext:
break
if srcinfo.end - srcinfo.start > maxlines:
break
while srcinfo._used():
yield srcinfo
if showtext:
_logger.debug(f'=> {srcinfo.text}')
else:
if not filestack:
srcinfo = SourceInfo('???')
else:
filename = filestack[-1]
srcinfo = allinfo[filename]
while srcinfo._used():
yield srcinfo
if showtext:
_logger.debug(f'=> {srcinfo.text}')
yield srcinfo
if showtext:
_logger.debug(f'=> {srcinfo.text}')
if not srcinfo._ready:
return
# At this point either the file ended prematurely
# or there's "too much" text.
filename, lno, text = srcinfo.filename, srcinfo._start, srcinfo.text
if len(text) > 500:
text = text[:500] + '...'
raise Exception(f'unmatched text ({filename} starting at line {lno}):\n{text}')

View File

@ -0,0 +1,6 @@
def _parse(srclines, anon_name):
text = ' '.join(l for _, l in srclines)
from ._delim import parse
yield from parse(text, anon_name)

View File

@ -0,0 +1,115 @@
import re
from ._regexes import (
_ind,
STRING_LITERAL,
VAR_DECL as _VAR_DECL,
)
def log_match(group, m):
from . import _logger
_logger.debug(f'matched <{group}> ({m.group(0)})')
#############################
# regex utils
def set_capture_group(pattern, group, *, strict=True):
old = f'(?: # <{group}>'
if strict and f'(?: # <{group}>' not in pattern:
raise ValueError(f'{old!r} not found in pattern')
return pattern.replace(old, f'( # <{group}>', 1)
def set_capture_groups(pattern, groups, *, strict=True):
for group in groups:
pattern = set_capture_group(pattern, group, strict=strict)
return pattern
#############################
# syntax-related utils
_PAREN_RE = re.compile(rf'''
(?:
(?:
[^'"()]*
{_ind(STRING_LITERAL, 3)}
)*
[^'"()]*
(?:
( [(] )
|
( [)] )
)
)
''', re.VERBOSE)
def match_paren(text, depth=0):
pos = 0
while (m := _PAREN_RE.match(text, pos)):
pos = m.end()
_open, _close = m.groups()
if _open:
depth += 1
else: # _close
depth -= 1
if depth == 0:
return pos
else:
raise ValueError(f'could not find matching parens for {text!r}')
VAR_DECL = set_capture_groups(_VAR_DECL, (
'STORAGE',
'TYPE_QUAL',
'TYPE_SPEC',
'DECLARATOR',
'IDENTIFIER',
'WRAPPED_IDENTIFIER',
'FUNC_IDENTIFIER',
))
def parse_var_decl(decl):
m = re.match(VAR_DECL, decl, re.VERBOSE)
(storage, typequal, typespec, declarator,
name,
wrappedname,
funcptrname,
) = m.groups()
if name:
kind = 'simple'
elif wrappedname:
kind = 'wrapped'
name = wrappedname
elif funcptrname:
kind = 'funcptr'
name = funcptrname
else:
raise NotImplementedError
abstract = declarator.replace(name, '')
vartype = {
'storage': storage,
'typequal': typequal,
'typespec': typespec,
'abstract': abstract,
}
return (kind, name, vartype)
#############################
# parser state utils
# XXX Drop this or use it!
def iter_results(results):
if not results:
return
if callable(results):
results = results()
for result, text in results():
if result:
yield result, text

View File

@ -0,0 +1,158 @@
import re
from ._regexes import (
STRUCT_MEMBER_DECL as _STRUCT_MEMBER_DECL,
ENUM_MEMBER_DECL as _ENUM_MEMBER_DECL,
)
from ._common import (
log_match,
parse_var_decl,
set_capture_groups,
)
#############################
# struct / union
STRUCT_MEMBER_DECL = set_capture_groups(_STRUCT_MEMBER_DECL, (
'COMPOUND_TYPE_KIND',
'COMPOUND_TYPE_NAME',
'SPECIFIER_QUALIFIER',
'DECLARATOR',
'SIZE',
'ENDING',
'CLOSE',
))
STRUCT_MEMBER_RE = re.compile(rf'^ \s* {STRUCT_MEMBER_DECL}', re.VERBOSE)
def parse_struct_body(source, anon_name, parent):
done = False
while not done:
done = True
for srcinfo in source:
m = STRUCT_MEMBER_RE.match(srcinfo.text)
if m:
break
else:
# We ran out of lines.
if srcinfo is not None:
srcinfo.done()
return
for item in _parse_struct_next(m, srcinfo, anon_name, parent):
if callable(item):
parse_body = item
yield from parse_body(source)
else:
yield item
done = False
def _parse_struct_next(m, srcinfo, anon_name, parent):
(inline_kind, inline_name,
qualspec, declarator,
size,
ending,
close,
) = m.groups()
remainder = srcinfo.text[m.end():]
if close:
log_match('compound close', m)
srcinfo.advance(remainder)
elif inline_kind:
log_match('compound inline', m)
kind = inline_kind
name = inline_name or anon_name('inline-')
# Immediately emit a forward declaration.
yield srcinfo.resolve(kind, name=name, data=None)
# un-inline the decl. Note that it might not actually be inline.
# We handle the case in the "maybe_inline_actual" branch.
srcinfo.nest(
remainder,
f'{kind} {name}',
)
def parse_body(source):
_parse_body = DECL_BODY_PARSERS[kind]
data = [] # members
ident = f'{kind} {name}'
for item in _parse_body(source, anon_name, ident):
if item.kind == 'field':
data.append(item)
else:
yield item
# XXX Should "parent" really be None for inline type decls?
yield srcinfo.resolve(kind, data, name, parent=None)
srcinfo.resume()
yield parse_body
else:
# not inline (member)
log_match('compound member', m)
if qualspec:
_, name, data = parse_var_decl(f'{qualspec} {declarator}')
if not name:
name = anon_name('struct-field-')
if size:
# data = (data, size)
data['size'] = int(size)
else:
# This shouldn't happen (we expect each field to have a name).
raise NotImplementedError
name = sized_name or anon_name('struct-field-')
data = int(size)
yield srcinfo.resolve('field', data, name, parent) # XXX Restart?
if ending == ',':
remainder = rf'{qualspec} {remainder}'
srcinfo.advance(remainder)
#############################
# enum
ENUM_MEMBER_DECL = set_capture_groups(_ENUM_MEMBER_DECL, (
'CLOSE',
'NAME',
'INIT',
'ENDING',
))
ENUM_MEMBER_RE = re.compile(rf'{ENUM_MEMBER_DECL}', re.VERBOSE)
def parse_enum_body(source, _anon_name, _parent):
ending = None
while ending != '}':
for srcinfo in source:
m = ENUM_MEMBER_RE.match(srcinfo.text)
if m:
break
else:
# We ran out of lines.
if srcinfo is not None:
srcinfo.done()
return
remainder = srcinfo.text[m.end():]
(close,
name, init, ending,
) = m.groups()
if close:
ending = '}'
else:
data = init
yield srcinfo.resolve('field', data, name, _parent)
srcinfo.advance(remainder)
#############################
DECL_BODY_PARSERS = {
'struct': parse_struct_body,
'union': parse_struct_body,
'enum': parse_enum_body,
}

View File

@ -0,0 +1,54 @@
import re
import textwrap
from ._regexes import _ind, STRING_LITERAL
def parse(text, anon_name):
context = None
data = None
for m in DELIMITER_RE.find_iter(text):
before, opened, closed = m.groups()
delim = opened or closed
handle_segment = HANDLERS[context][delim]
result, context, data = handle_segment(before, delim, data)
if result:
yield result
DELIMITER = textwrap.dedent(rf'''
(
(?:
[^'"()\[\]{};]*
{_ind(STRING_LITERAL, 3)}
}*
[^'"()\[\]{};]+
)? # <before>
(?:
(
[(\[{]
) # <open>
|
(
[)\]};]
) # <close>
)?
''')
DELIMITER_RE = re.compile(DELIMITER, re.VERBOSE)
_HANDLERS = {
None: { # global
# opened
'{': ...,
'[': None,
'(': None,
# closed
'}': None,
']': None,
')': None,
';': ...,
},
'': {
},
}

View File

@ -0,0 +1,278 @@
import re
from ._regexes import (
LOCAL as _LOCAL,
LOCAL_STATICS as _LOCAL_STATICS,
)
from ._common import (
log_match,
parse_var_decl,
set_capture_groups,
match_paren,
)
from ._compound_decl_body import DECL_BODY_PARSERS
LOCAL = set_capture_groups(_LOCAL, (
'EMPTY',
'INLINE_LEADING',
'INLINE_PRE',
'INLINE_KIND',
'INLINE_NAME',
'STORAGE',
'VAR_DECL',
'VAR_INIT',
'VAR_ENDING',
'COMPOUND_BARE',
'COMPOUND_LABELED',
'COMPOUND_PAREN',
'BLOCK_LEADING',
'BLOCK_OPEN',
'SIMPLE_STMT',
'SIMPLE_ENDING',
'BLOCK_CLOSE',
))
LOCAL_RE = re.compile(rf'^ \s* {LOCAL}', re.VERBOSE)
# Note that parse_function_body() still has trouble with a few files
# in the CPython codebase.
def parse_function_body(source, name, anon_name):
# XXX
raise NotImplementedError
def parse_function_body(name, text, resolve, source, anon_name, parent):
raise NotImplementedError
# For now we do not worry about locals declared in for loop "headers".
depth = 1;
while depth > 0:
m = LOCAL_RE.match(text)
while not m:
text, resolve = continue_text(source, text or '{', resolve)
m = LOCAL_RE.match(text)
text = text[m.end():]
(
empty,
inline_leading, inline_pre, inline_kind, inline_name,
storage, decl,
var_init, var_ending,
compound_bare, compound_labeled, compound_paren,
block_leading, block_open,
simple_stmt, simple_ending,
block_close,
) = m.groups()
if empty:
log_match('', m)
resolve(None, None, None, text)
yield None, text
elif inline_kind:
log_match('', m)
kind = inline_kind
name = inline_name or anon_name('inline-')
data = [] # members
# We must set the internal "text" from _iter_source() to the
# start of the inline compound body,
# Note that this is effectively like a forward reference that
# we do not emit.
resolve(kind, None, name, text, None)
_parse_body = DECL_BODY_PARSERS[kind]
before = []
ident = f'{kind} {name}'
for member, inline, text in _parse_body(text, resolve, source, anon_name, ident):
if member:
data.append(member)
if inline:
yield from inline
# un-inline the decl. Note that it might not actually be inline.
# We handle the case in the "maybe_inline_actual" branch.
text = f'{inline_leading or ""} {inline_pre or ""} {kind} {name} {text}'
# XXX Should "parent" really be None for inline type decls?
yield resolve(kind, data, name, text, None), text
elif block_close:
log_match('', m)
depth -= 1
resolve(None, None, None, text)
# XXX This isn't great. Calling resolve() should have
# cleared the closing bracket. However, some code relies
# on the yielded value instead of the resolved one. That
# needs to be fixed.
yield None, text
elif compound_bare:
log_match('', m)
yield resolve('statement', compound_bare, None, text, parent), text
elif compound_labeled:
log_match('', m)
yield resolve('statement', compound_labeled, None, text, parent), text
elif compound_paren:
log_match('', m)
try:
pos = match_paren(text)
except ValueError:
text = f'{compound_paren} {text}'
#resolve(None, None, None, text)
text, resolve = continue_text(source, text, resolve)
yield None, text
else:
head = text[:pos]
text = text[pos:]
if compound_paren == 'for':
# XXX Parse "head" as a compound statement.
stmt1, stmt2, stmt3 = head.split(';', 2)
data = {
'compound': compound_paren,
'statements': (stmt1, stmt2, stmt3),
}
else:
data = {
'compound': compound_paren,
'statement': head,
}
yield resolve('statement', data, None, text, parent), text
elif block_open:
log_match('', m)
depth += 1
if block_leading:
# An inline block: the last evaluated expression is used
# in place of the block.
# XXX Combine it with the remainder after the block close.
stmt = f'{block_open}{{<expr>}}...;'
yield resolve('statement', stmt, None, text, parent), text
else:
resolve(None, None, None, text)
yield None, text
elif simple_ending:
log_match('', m)
yield resolve('statement', simple_stmt, None, text, parent), text
elif var_ending:
log_match('', m)
kind = 'variable'
_, name, vartype = parse_var_decl(decl)
data = {
'storage': storage,
'vartype': vartype,
}
after = ()
if var_ending == ',':
# It was a multi-declaration, so queue up the next one.
_, qual, typespec, _ = vartype.values()
text = f'{storage or ""} {qual or ""} {typespec} {text}'
yield resolve(kind, data, name, text, parent), text
if var_init:
_data = f'{name} = {var_init.strip()}'
yield resolve('statement', _data, None, text, parent), text
else:
# This should be unreachable.
raise NotImplementedError
#############################
# static local variables
LOCAL_STATICS = set_capture_groups(_LOCAL_STATICS, (
'INLINE_LEADING',
'INLINE_PRE',
'INLINE_KIND',
'INLINE_NAME',
'STATIC_DECL',
'STATIC_INIT',
'STATIC_ENDING',
'DELIM_LEADING',
'BLOCK_OPEN',
'BLOCK_CLOSE',
'STMT_END',
))
LOCAL_STATICS_RE = re.compile(rf'^ \s* {LOCAL_STATICS}', re.VERBOSE)
def parse_function_statics(source, func, anon_name):
# For now we do not worry about locals declared in for loop "headers".
depth = 1;
while depth > 0:
for srcinfo in source:
m = LOCAL_STATICS_RE.match(srcinfo.text)
if m:
break
else:
# We ran out of lines.
if srcinfo is not None:
srcinfo.done()
return
for item, depth in _parse_next_local_static(m, srcinfo,
anon_name, func, depth):
if callable(item):
parse_body = item
yield from parse_body(source)
elif item is not None:
yield item
def _parse_next_local_static(m, srcinfo, anon_name, func, depth):
(inline_leading, inline_pre, inline_kind, inline_name,
static_decl, static_init, static_ending,
_delim_leading,
block_open,
block_close,
stmt_end,
) = m.groups()
remainder = srcinfo.text[m.end():]
if inline_kind:
log_match('func inline', m)
kind = inline_kind
name = inline_name or anon_name('inline-')
# Immediately emit a forward declaration.
yield srcinfo.resolve(kind, name=name, data=None), depth
# un-inline the decl. Note that it might not actually be inline.
# We handle the case in the "maybe_inline_actual" branch.
srcinfo.nest(
remainder,
f'{inline_leading or ""} {inline_pre or ""} {kind} {name}'
)
def parse_body(source):
_parse_body = DECL_BODY_PARSERS[kind]
data = [] # members
ident = f'{kind} {name}'
for item in _parse_body(source, anon_name, ident):
if item.kind == 'field':
data.append(item)
else:
yield item
# XXX Should "parent" really be None for inline type decls?
yield srcinfo.resolve(kind, data, name, parent=None)
srcinfo.resume()
yield parse_body, depth
elif static_decl:
log_match('local variable', m)
_, name, data = parse_var_decl(static_decl)
yield srcinfo.resolve('variable', data, name, parent=func), depth
if static_init:
srcinfo.advance(f'{name} {static_init} {remainder}')
elif static_ending == ',':
# It was a multi-declaration, so queue up the next one.
_, qual, typespec, _ = data.values()
srcinfo.advance(f'static {qual or ""} {typespec} {remainder}')
else:
srcinfo.advance('')
else:
log_match('func other', m)
if block_open:
depth += 1
elif block_close:
depth -= 1
elif stmt_end:
pass
else:
# This should be unreachable.
raise NotImplementedError
srcinfo.advance(remainder)
yield None, depth

View File

@ -0,0 +1,179 @@
import re
from ._regexes import (
GLOBAL as _GLOBAL,
)
from ._common import (
log_match,
parse_var_decl,
set_capture_groups,
)
from ._compound_decl_body import DECL_BODY_PARSERS
#from ._func_body import parse_function_body
from ._func_body import parse_function_statics as parse_function_body
GLOBAL = set_capture_groups(_GLOBAL, (
'EMPTY',
'COMPOUND_LEADING',
'COMPOUND_KIND',
'COMPOUND_NAME',
'FORWARD_KIND',
'FORWARD_NAME',
'MAYBE_INLINE_ACTUAL',
'TYPEDEF_DECL',
'TYPEDEF_FUNC_PARAMS',
'VAR_STORAGE',
'FUNC_INLINE',
'VAR_DECL',
'FUNC_PARAMS',
'FUNC_DELIM',
'FUNC_LEGACY_PARAMS',
'VAR_INIT',
'VAR_ENDING',
))
GLOBAL_RE = re.compile(rf'^ \s* {GLOBAL}', re.VERBOSE)
def parse_globals(source, anon_name):
for srcinfo in source:
m = GLOBAL_RE.match(srcinfo.text)
if not m:
# We need more text.
continue
for item in _parse_next(m, srcinfo, anon_name):
if callable(item):
parse_body = item
yield from parse_body(source)
else:
yield item
else:
# We ran out of lines.
if srcinfo is not None:
srcinfo.done()
return
def _parse_next(m, srcinfo, anon_name):
(
empty,
# compound type decl (maybe inline)
compound_leading, compound_kind, compound_name,
forward_kind, forward_name, maybe_inline_actual,
# typedef
typedef_decl, typedef_func_params,
# vars and funcs
storage, func_inline, decl,
func_params, func_delim, func_legacy_params,
var_init, var_ending,
) = m.groups()
remainder = srcinfo.text[m.end():]
if empty:
log_match('global empty', m)
srcinfo.advance(remainder)
elif maybe_inline_actual:
log_match('maybe_inline_actual', m)
# Ignore forward declarations.
# XXX Maybe return them too (with an "isforward" flag)?
if not maybe_inline_actual.strip().endswith(';'):
remainder = maybe_inline_actual + remainder
yield srcinfo.resolve(forward_kind, None, forward_name)
if maybe_inline_actual.strip().endswith('='):
# We use a dummy prefix for a fake typedef.
# XXX Ideally this case would not be caught by MAYBE_INLINE_ACTUAL.
_, name, data = parse_var_decl(f'{forward_kind} {forward_name} fake_typedef_{forward_name}')
yield srcinfo.resolve('typedef', data, name, parent=None)
remainder = f'{name} {remainder}'
srcinfo.advance(remainder)
elif compound_kind:
kind = compound_kind
name = compound_name or anon_name('inline-')
# Immediately emit a forward declaration.
yield srcinfo.resolve(kind, name=name, data=None)
# un-inline the decl. Note that it might not actually be inline.
# We handle the case in the "maybe_inline_actual" branch.
srcinfo.nest(
remainder,
f'{compound_leading or ""} {compound_kind} {name}',
)
def parse_body(source):
_parse_body = DECL_BODY_PARSERS[compound_kind]
data = [] # members
ident = f'{kind} {name}'
for item in _parse_body(source, anon_name, ident):
if item.kind == 'field':
data.append(item)
else:
yield item
# XXX Should "parent" really be None for inline type decls?
yield srcinfo.resolve(kind, data, name, parent=None)
srcinfo.resume()
yield parse_body
elif typedef_decl:
log_match('typedef', m)
kind = 'typedef'
_, name, data = parse_var_decl(typedef_decl)
if typedef_func_params:
return_type = data
# This matches the data for func declarations.
data = {
'storage': None,
'inline': None,
'params': f'({typedef_func_params})',
'returntype': return_type,
'isforward': True,
}
yield srcinfo.resolve(kind, data, name, parent=None)
srcinfo.advance(remainder)
elif func_delim or func_legacy_params:
log_match('function', m)
kind = 'function'
_, name, return_type = parse_var_decl(decl)
func_params = func_params or func_legacy_params
data = {
'storage': storage,
'inline': func_inline,
'params': f'({func_params})',
'returntype': return_type,
'isforward': func_delim == ';',
}
yield srcinfo.resolve(kind, data, name, parent=None)
srcinfo.advance(remainder)
if func_delim == '{' or func_legacy_params:
def parse_body(source):
yield from parse_function_body(source, name, anon_name)
yield parse_body
elif var_ending:
log_match('global variable', m)
kind = 'variable'
_, name, vartype = parse_var_decl(decl)
data = {
'storage': storage,
'vartype': vartype,
}
yield srcinfo.resolve(kind, data, name, parent=None)
if var_ending == ',':
# It was a multi-declaration, so queue up the next one.
_, qual, typespec, _ = vartype.values()
remainder = f'{storage or ""} {qual or ""} {typespec} {remainder}'
srcinfo.advance(remainder)
if var_init:
_data = f'{name} = {var_init.strip()}'
yield srcinfo.resolve('statement', _data, name=None)
else:
# This should be unreachable.
raise NotImplementedError

View File

@ -0,0 +1,168 @@
from ..info import KIND, ParsedItem, FileInfo
class TextInfo:
def __init__(self, text, start=None, end=None):
# immutable:
if not start:
start = 1
self.start = start
# mutable:
lines = text.splitlines() or ['']
self.text = text.strip()
if not end:
end = start + len(lines) - 1
self.end = end
self.line = lines[-1]
def __repr__(self):
args = (f'{a}={getattr(self, a)!r}'
for a in ['text', 'start', 'end'])
return f'{type(self).__name__}({", ".join(args)})'
def add_line(self, line, lno=None):
if lno is None:
lno = self.end + 1
else:
if isinstance(lno, FileInfo):
fileinfo = lno
if fileinfo.filename != self.filename:
raise NotImplementedError((fileinfo, self.filename))
lno = fileinfo.lno
# XXX
#if lno < self.end:
# raise NotImplementedError((lno, self.end))
line = line.lstrip()
self.text += ' ' + line
self.line = line
self.end = lno
class SourceInfo:
_ready = False
def __init__(self, filename, _current=None):
# immutable:
self.filename = filename
# mutable:
if isinstance(_current, str):
_current = TextInfo(_current)
self._current = _current
start = -1
self._start = _current.start if _current else -1
self._nested = []
self._set_ready()
def __repr__(self):
args = (f'{a}={getattr(self, a)!r}'
for a in ['filename', '_current'])
return f'{type(self).__name__}({", ".join(args)})'
@property
def start(self):
if self._current is None:
return self._start
return self._current.start
@property
def end(self):
if self._current is None:
return self._start
return self._current.end
@property
def text(self):
if self._current is None:
return ''
return self._current.text
def nest(self, text, before, start=None):
if self._current is None:
raise Exception('nesting requires active source text')
current = self._current
current.text = before
self._nested.append(current)
self._replace(text, start)
def resume(self, remainder=None):
if not self._nested:
raise Exception('no nested text to resume')
if self._current is None:
raise Exception('un-nesting requires active source text')
if remainder is None:
remainder = self._current.text
self._clear()
self._current = self._nested.pop()
self._current.text += ' ' + remainder
self._set_ready()
def advance(self, remainder, start=None):
if self._current is None:
raise Exception('advancing requires active source text')
if remainder.strip():
self._replace(remainder, start, fixnested=True)
else:
if self._nested:
self._replace('', start, fixnested=True)
#raise Exception('cannot advance while nesting')
else:
self._clear(start)
def resolve(self, kind, data, name, parent=None):
# "field" isn't a top-level kind, so we leave it as-is.
if kind and kind != 'field':
kind = KIND._from_raw(kind)
fileinfo = FileInfo(self.filename, self._start)
return ParsedItem(fileinfo, kind, parent, name, data)
def done(self):
self._set_ready()
def _set_ready(self):
if self._current is None:
self._ready = False
else:
self._ready = self._current.text.strip() != ''
def _used(self):
ready = self._ready
self._ready = False
return ready
def _clear(self, start=None):
old = self._current
if self._current is not None:
# XXX Fail if self._current wasn't used up?
if start is None:
start = self._current.end
self._current = None
if start is not None:
self._start = start
self._set_ready()
return old
def _replace(self, text, start=None, *, fixnested=False):
end = self._current.end
old = self._clear(start)
self._current = TextInfo(text, self._start, end)
if fixnested and self._nested and self._nested[-1] is old:
self._nested[-1] = self._current
self._set_ready()
def _add_line(self, line, lno=None):
if not line.strip():
# We don't worry about multi-line string literals.
return
if self._current is None:
self._start = lno
self._current = TextInfo(line, lno)
else:
# XXX
#if lno < self._current.end:
# # A circular include?
# raise NotImplementedError((lno, self))
self._current.add_line(line, lno)
self._ready = True

View File

@ -0,0 +1,796 @@
# Regular expression patterns for C syntax.
#
# None of these patterns has any capturing. However, a number of them
# have capturing markers compatible with utils.set_capture_groups().
import textwrap
def _ind(text, level=1, edges='both'):
indent = ' ' * level
text = textwrap.indent(text, indent)
if edges == 'pre' or edges == 'both':
text = '\n' + indent + text.lstrip()
if edges == 'post' or edges == 'both':
text = text.rstrip() + '\n' + ' ' * (level - 1)
return text
#######################################
# general
HEX = r'(?: [0-9a-zA-Z] )'
STRING_LITERAL = textwrap.dedent(rf'''
(?:
# character literal
(?:
['] [^'] [']
|
['] \\ . [']
|
['] \\x{HEX}{HEX} [']
|
['] \\0\d\d [']
|
(?:
['] \\o[01]\d\d [']
|
['] \\o2[0-4]\d [']
|
['] \\o25[0-5] [']
)
)
|
# string literal
(?:
["] (?: [^"\\]* \\ . )* [^"\\]* ["]
)
# end string literal
)
''')
_KEYWORD = textwrap.dedent(r'''
(?:
\b
(?:
auto |
extern |
register |
static |
typedef |
const |
volatile |
signed |
unsigned |
char |
short |
int |
long |
float |
double |
void |
struct |
union |
enum |
goto |
return |
sizeof |
break |
continue |
if |
else |
for |
do |
while |
switch |
case |
default |
entry
)
\b
)
''')
KEYWORD = rf'''
# keyword
{_KEYWORD}
# end keyword
'''
_KEYWORD = ''.join(_KEYWORD.split())
IDENTIFIER = r'(?: [a-zA-Z_][a-zA-Z0-9_]* )'
# We use a negative lookahead to filter out keywords.
STRICT_IDENTIFIER = rf'(?: (?! {_KEYWORD} ) \b {IDENTIFIER} \b )'
ANON_IDENTIFIER = rf'(?: (?! {_KEYWORD} ) \b {IDENTIFIER} (?: - \d+ )? \b )'
#######################################
# types
SIMPLE_TYPE = textwrap.dedent(rf'''
# simple type
(?:
\b
(?:
void
|
(?: signed | unsigned ) # implies int
|
(?:
(?: (?: signed | unsigned ) \s+ )?
(?: (?: long | short ) \s+ )?
(?: char | short | int | long | float | double )
)
)
\b
)
# end simple type
''')
COMPOUND_TYPE_KIND = r'(?: \b (?: struct | union | enum ) \b )'
#######################################
# variable declarations
STORAGE_CLASS = r'(?: \b (?: auto | register | static | extern ) \b )'
TYPE_QUALIFIER = r'(?: \b (?: const | volatile ) \b )'
PTR_QUALIFIER = rf'(?: [*] (?: \s* {TYPE_QUALIFIER} )? )'
TYPE_SPEC = textwrap.dedent(rf'''
# type spec
(?:
{_ind(SIMPLE_TYPE, 2)}
|
(?:
[_]*typeof[_]*
\s* [(]
(?: \s* [*&] )*
\s* {STRICT_IDENTIFIER}
\s* [)]
)
|
# reference to a compound type
(?:
{COMPOUND_TYPE_KIND}
(?: \s* {ANON_IDENTIFIER} )?
)
|
# reference to a typedef
{STRICT_IDENTIFIER}
)
# end type spec
''')
DECLARATOR = textwrap.dedent(rf'''
# declarator (possibly abstract)
(?:
(?: {PTR_QUALIFIER} \s* )*
(?:
(?:
(?: # <IDENTIFIER>
{STRICT_IDENTIFIER}
)
(?: \s* \[ (?: \s* [^\]]+ \s* )? [\]] )* # arrays
)
|
(?:
[(] \s*
(?: # <WRAPPED_IDENTIFIER>
{STRICT_IDENTIFIER}
)
(?: \s* \[ (?: \s* [^\]]+ \s* )? [\]] )* # arrays
\s* [)]
)
|
# func ptr
(?:
[(] (?: \s* {PTR_QUALIFIER} )? \s*
(?: # <FUNC_IDENTIFIER>
{STRICT_IDENTIFIER}
)
(?: \s* \[ (?: \s* [^\]]+ \s* )? [\]] )* # arrays
\s* [)]
# We allow for a single level of paren nesting in parameters.
\s* [(] (?: [^()]* [(] [^)]* [)] )* [^)]* [)]
)
)
)
# end declarator
''')
VAR_DECL = textwrap.dedent(rf'''
# var decl (and typedef and func return type)
(?:
(?:
(?: # <STORAGE>
{STORAGE_CLASS}
)
\s*
)?
(?:
(?: # <TYPE_QUAL>
{TYPE_QUALIFIER}
)
\s*
)?
(?:
(?: # <TYPE_SPEC>
{_ind(TYPE_SPEC, 4)}
)
)
\s*
(?:
(?: # <DECLARATOR>
{_ind(DECLARATOR, 4)}
)
)
)
# end var decl
''')
INITIALIZER = textwrap.dedent(rf'''
# initializer
(?:
(?:
[(]
# no nested parens (e.g. func ptr)
[^)]*
[)]
\s*
)?
(?:
# a string literal
(?:
(?: {_ind(STRING_LITERAL, 4)} \s* )*
{_ind(STRING_LITERAL, 4)}
)
|
# a simple initializer
(?:
(?:
[^'",;{{]*
{_ind(STRING_LITERAL, 4)}
)*
[^'",;{{]*
)
|
# a struct/array literal
(?:
# We only expect compound initializers with
# single-variable declarations.
{{
(?:
[^'";]*?
{_ind(STRING_LITERAL, 5)}
)*
[^'";]*?
}}
(?= \s* ; ) # Note this lookahead.
)
)
)
# end initializer
''')
#######################################
# compound type declarations
STRUCT_MEMBER_DECL = textwrap.dedent(rf'''
(?:
# inline compound type decl
(?:
(?: # <COMPOUND_TYPE_KIND>
{COMPOUND_TYPE_KIND}
)
(?:
\s+
(?: # <COMPOUND_TYPE_NAME>
{STRICT_IDENTIFIER}
)
)?
\s* {{
)
|
(?:
# typed member
(?:
# Technically it doesn't have to have a type...
(?: # <SPECIFIER_QUALIFIER>
(?: {TYPE_QUALIFIER} \s* )?
{_ind(TYPE_SPEC, 5)}
)
(?:
# If it doesn't have a declarator then it will have
# a size and vice versa.
\s*
(?: # <DECLARATOR>
{_ind(DECLARATOR, 6)}
)
)?
)
# sized member
(?:
\s* [:] \s*
(?: # <SIZE>
\d+
)
)?
\s*
(?: # <ENDING>
[,;]
)
)
|
(?:
\s*
(?: # <CLOSE>
}}
)
)
)
''')
ENUM_MEMBER_DECL = textwrap.dedent(rf'''
(?:
(?:
\s*
(?: # <CLOSE>
}}
)
)
|
(?:
\s*
(?: # <NAME>
{IDENTIFIER}
)
(?:
\s* = \s*
(?: # <INIT>
{_ind(STRING_LITERAL, 4)}
|
[^'",}}]+
)
)?
\s*
(?: # <ENDING>
, | }}
)
)
)
''')
#######################################
# statements
SIMPLE_STMT_BODY = textwrap.dedent(rf'''
# simple statement body
(?:
(?:
[^'"{{}};]*
{_ind(STRING_LITERAL, 3)}
)*
[^'"{{}};]*
#(?= [;{{] ) # Note this lookahead.
)
# end simple statement body
''')
SIMPLE_STMT = textwrap.dedent(rf'''
# simple statement
(?:
(?: # <SIMPLE_STMT>
# stmt-inline "initializer"
(?:
return \b
(?:
\s*
{_ind(INITIALIZER, 5)}
)?
)
|
# variable assignment
(?:
(?: [*] \s* )?
(?:
{STRICT_IDENTIFIER} \s*
(?: . | -> ) \s*
)*
{STRICT_IDENTIFIER}
(?: \s* \[ \s* \d+ \s* \] )?
\s* = \s*
{_ind(INITIALIZER, 4)}
)
|
# catchall return statement
(?:
return \b
(?:
(?:
[^'";]*
{_ind(STRING_LITERAL, 6)}
)*
\s* [^'";]*
)?
)
|
# simple statement
(?:
{_ind(SIMPLE_STMT_BODY, 4)}
)
)
\s*
(?: # <SIMPLE_ENDING>
;
)
)
# end simple statement
''')
COMPOUND_STMT = textwrap.dedent(rf'''
# compound statement
(?:
\b
(?:
(?:
(?: # <COMPOUND_BARE>
else | do
)
\b
)
|
(?:
(?: # <COMPOUND_LABELED>
(?:
case \b
(?:
[^'":]*
{_ind(STRING_LITERAL, 7)}
)*
\s* [^'":]*
)
|
default
|
{STRICT_IDENTIFIER}
)
\s* [:]
)
|
(?:
(?: # <COMPOUND_PAREN>
for | while | if | switch
)
\s* (?= [(] ) # Note this lookahead.
)
)
\s*
)
# end compound statement
''')
#######################################
# function bodies
LOCAL = textwrap.dedent(rf'''
(?:
# an empty statement
(?: # <EMPTY>
;
)
|
# inline type decl
(?:
(?:
(?: # <INLINE_LEADING>
[^;{{}}]+?
)
\s*
)?
(?: # <INLINE_PRE>
(?: {STORAGE_CLASS} \s* )?
(?: {TYPE_QUALIFIER} \s* )?
)? # </INLINE_PRE>
(?: # <INLINE_KIND>
{COMPOUND_TYPE_KIND}
)
(?:
\s+
(?: # <INLINE_NAME>
{STRICT_IDENTIFIER}
)
)?
\s* {{
)
|
# var decl
(?:
(?: # <STORAGE>
{STORAGE_CLASS}
)? # </STORAGE>
(?:
\s*
(?: # <VAR_DECL>
{_ind(VAR_DECL, 5)}
)
)
(?:
(?:
# initializer
# We expect only basic initializers.
\s* = \s*
(?: # <VAR_INIT>
{_ind(INITIALIZER, 6)}
)
)?
(?:
\s*
(?: # <VAR_ENDING>
[,;]
)
)
)
)
|
{_ind(COMPOUND_STMT, 2)}
|
# start-of-block
(?:
(?: # <BLOCK_LEADING>
(?:
[^'"{{}};]*
{_ind(STRING_LITERAL, 5)}
)*
[^'"{{}};]*
# Presumably we will not see "== {{".
[^\s='"{{}});]
\s*
)? # </BLOCK_LEADING>
(?: # <BLOCK_OPEN>
{{
)
)
|
{_ind(SIMPLE_STMT, 2)}
|
# end-of-block
(?: # <BLOCK_CLOSE>
}}
)
)
''')
LOCAL_STATICS = textwrap.dedent(rf'''
(?:
# inline type decl
(?:
(?:
(?: # <INLINE_LEADING>
[^;{{}}]+?
)
\s*
)?
(?: # <INLINE_PRE>
(?: {STORAGE_CLASS} \s* )?
(?: {TYPE_QUALIFIER} \s* )?
)?
(?: # <INLINE_KIND>
{COMPOUND_TYPE_KIND}
)
(?:
\s+
(?: # <INLINE_NAME>
{STRICT_IDENTIFIER}
)
)?
\s* {{
)
|
# var decl
(?:
# We only look for static variables.
(?: # <STATIC_DECL>
static \b
(?: \s* {TYPE_QUALIFIER} )?
\s* {_ind(TYPE_SPEC, 4)}
\s* {_ind(DECLARATOR, 4)}
)
\s*
(?:
(?: # <STATIC_INIT>
= \s*
{_ind(INITIALIZER, 4)}
\s*
[,;{{]
)
|
(?: # <STATIC_ENDING>
[,;]
)
)
)
|
# everything else
(?:
(?: # <DELIM_LEADING>
(?:
[^'"{{}};]*
{_ind(STRING_LITERAL, 4)}
)*
\s* [^'"{{}};]*
)
(?:
(?: # <BLOCK_OPEN>
{{
)
|
(?: # <BLOCK_CLOSE>
}}
)
|
(?: # <STMT_END>
;
)
)
)
)
''')
#######################################
# global declarations
GLOBAL = textwrap.dedent(rf'''
(?:
# an empty statement
(?: # <EMPTY>
;
)
|
# compound type decl (maybe inline)
(?:
(?:
(?: # <COMPOUND_LEADING>
[^;{{}}]+?
)
\s*
)?
(?: # <COMPOUND_KIND>
{COMPOUND_TYPE_KIND}
)
(?:
\s+
(?: # <COMPOUND_NAME>
{STRICT_IDENTIFIER}
)
)?
\s* {{
)
|
# bogus inline decl artifact
# This simplifies resolving the relative syntactic ambiguity of
# inline structs.
(?:
(?: # <FORWARD_KIND>
{COMPOUND_TYPE_KIND}
)
\s*
(?: # <FORWARD_NAME>
{ANON_IDENTIFIER}
)
(?: # <MAYBE_INLINE_ACTUAL>
[^=,;({{[*\]]*
[=,;({{]
)
)
|
# typedef
(?:
\b typedef \b \s*
(?: # <TYPEDEF_DECL>
{_ind(VAR_DECL, 4)}
)
(?:
# We expect no inline type definitions in the parameters.
\s* [(] \s*
(?: # <TYPEDEF_FUNC_PARAMS>
[^{{;]*
)
\s* [)]
)?
\s* ;
)
|
# func decl/definition & var decls
# XXX dedicated pattern for funcs (more restricted)?
(?:
(?:
(?: # <VAR_STORAGE>
{STORAGE_CLASS}
)
\s*
)?
(?:
(?: # <FUNC_INLINE>
\b inline \b
)
\s*
)?
(?: # <VAR_DECL>
{_ind(VAR_DECL, 4)}
)
(?:
# func decl / definition
(?:
(?:
# We expect no inline type definitions in the parameters.
\s* [(] \s*
(?: # <FUNC_PARAMS>
[^{{;]*
)
\s* [)] \s*
(?: # <FUNC_DELIM>
[{{;]
)
)
|
(?:
# This is some old-school syntax!
\s* [(] \s*
# We throw away the bare names:
{STRICT_IDENTIFIER}
(?: \s* , \s* {STRICT_IDENTIFIER} )*
\s* [)] \s*
# We keep the trailing param declarations:
(?: # <FUNC_LEGACY_PARAMS>
# There's at least one!
(?: {TYPE_QUALIFIER} \s* )?
{_ind(TYPE_SPEC, 7)}
\s*
{_ind(DECLARATOR, 7)}
\s* ;
(?:
\s*
(?: {TYPE_QUALIFIER} \s* )?
{_ind(TYPE_SPEC, 8)}
\s*
{_ind(DECLARATOR, 8)}
\s* ;
)*
)
\s* {{
)
)
|
# var / typedef
(?:
(?:
# initializer
# We expect only basic initializers.
\s* = \s*
(?: # <VAR_INIT>
{_ind(INITIALIZER, 6)}
)
)?
\s*
(?: # <VAR_ENDING>
[,;]
)
)
)
)
)
''')

View File

@ -0,0 +1,190 @@
import contextlib
import distutils.ccompiler
import logging
import os.path
from c_common.fsutil import match_glob as _match_glob
from c_common.tables import parse_table as _parse_table
from ..source import (
resolve as _resolve_source,
good_file as _good_file,
)
from . import errors as _errors
from . import (
pure as _pure,
gcc as _gcc,
)
logger = logging.getLogger(__name__)
# Supprted "source":
# * filename (string)
# * lines (iterable)
# * text (string)
# Supported return values:
# * iterator of SourceLine
# * sequence of SourceLine
# * text (string)
# * something that combines all those
# XXX Add the missing support from above.
# XXX Add more low-level functions to handle permutations?
def preprocess(source, *,
incldirs=None,
macros=None,
samefiles=None,
filename=None,
tool=True,
):
"""...
CWD should be the project root and "source" should be relative.
"""
if tool:
logger.debug(f'CWD: {os.getcwd()!r}')
logger.debug(f'incldirs: {incldirs!r}')
logger.debug(f'macros: {macros!r}')
logger.debug(f'samefiles: {samefiles!r}')
_preprocess = _get_preprocessor(tool)
with _good_file(source, filename) as source:
return _preprocess(source, incldirs, macros, samefiles) or ()
else:
source, filename = _resolve_source(source, filename)
# We ignore "includes", "macros", etc.
return _pure.preprocess(source, filename)
# if _run() returns just the lines:
# text = _run(source)
# lines = [line + os.linesep for line in text.splitlines()]
# lines[-1] = lines[-1].splitlines()[0]
#
# conditions = None
# for lno, line in enumerate(lines, 1):
# kind = 'source'
# directive = None
# data = line
# yield lno, kind, data, conditions
def get_preprocessor(*,
file_macros=None,
file_incldirs=None,
file_same=None,
ignore_exc=False,
log_err=None,
):
_preprocess = preprocess
if file_macros:
file_macros = tuple(_parse_macros(file_macros))
if file_incldirs:
file_incldirs = tuple(_parse_incldirs(file_incldirs))
if file_same:
file_same = tuple(file_same)
if not callable(ignore_exc):
ignore_exc = (lambda exc, _ig=ignore_exc: _ig)
def get_file_preprocessor(filename):
filename = filename.strip()
if file_macros:
macros = list(_resolve_file_values(filename, file_macros))
if file_incldirs:
incldirs = [v for v, in _resolve_file_values(filename, file_incldirs)]
def preprocess(**kwargs):
if file_macros and 'macros' not in kwargs:
kwargs['macros'] = macros
if file_incldirs and 'incldirs' not in kwargs:
kwargs['incldirs'] = [v for v, in _resolve_file_values(filename, file_incldirs)]
if file_same and 'file_same' not in kwargs:
kwargs['samefiles'] = file_same
kwargs.setdefault('filename', filename)
with handling_errors(ignore_exc, log_err=log_err):
return _preprocess(filename, **kwargs)
return preprocess
return get_file_preprocessor
def _resolve_file_values(filename, file_values):
# We expect the filename and all patterns to be absolute paths.
for pattern, *value in file_values or ():
if _match_glob(filename, pattern):
yield value
def _parse_macros(macros):
for row, srcfile in _parse_table(macros, '\t', 'glob\tname\tvalue', rawsep='=', default=None):
yield row
def _parse_incldirs(incldirs):
for row, srcfile in _parse_table(incldirs, '\t', 'glob\tdirname', default=None):
glob, dirname = row
if dirname is None:
# Match all files.
dirname = glob
row = ('*', dirname.strip())
yield row
@contextlib.contextmanager
def handling_errors(ignore_exc=None, *, log_err=None):
try:
yield
except _errors.OSMismatchError as exc:
if not ignore_exc(exc):
raise # re-raise
if log_err is not None:
log_err(f'<OS mismatch (expected {" or ".join(exc.expected)})>')
return None
except _errors.MissingDependenciesError as exc:
if not ignore_exc(exc):
raise # re-raise
if log_err is not None:
log_err(f'<missing dependency {exc.missing}')
return None
except _errors.ErrorDirectiveError as exc:
if not ignore_exc(exc):
raise # re-raise
if log_err is not None:
log_err(exc)
return None
##################################
# tools
_COMPILERS = {
# matching disutils.ccompiler.compiler_class:
'unix': _gcc.preprocess,
'msvc': None,
'cygwin': None,
'mingw32': None,
'bcpp': None,
# aliases/extras:
'gcc': _gcc.preprocess,
'clang': None,
}
def _get_preprocessor(tool):
if tool is True:
tool = distutils.ccompiler.get_default_compiler()
preprocess = _COMPILERS.get(tool)
if preprocess is None:
raise ValueError(f'unsupported tool {tool}')
return preprocess
##################################
# aliases
from .errors import (
PreprocessorError,
PreprocessorFailure,
ErrorDirectiveError,
MissingDependenciesError,
OSMismatchError,
)
from .common import FileInfo, SourceLine

View File

@ -0,0 +1,196 @@
import logging
import sys
from c_common.scriptutil import (
CLIArgSpec as Arg,
add_verbosity_cli,
add_traceback_cli,
add_kind_filtering_cli,
add_files_cli,
add_failure_filtering_cli,
add_commands_cli,
process_args_by_key,
configure_logger,
get_prog,
main_for_filenames,
)
from . import (
errors as _errors,
get_preprocessor as _get_preprocessor,
)
FAIL = {
'err': _errors.ErrorDirectiveError,
'deps': _errors.MissingDependenciesError,
'os': _errors.OSMismatchError,
}
FAIL_DEFAULT = tuple(v for v in FAIL if v != 'os')
logger = logging.getLogger(__name__)
##################################
# CLI helpers
def add_common_cli(parser, *, get_preprocessor=_get_preprocessor):
parser.add_argument('--macros', action='append')
parser.add_argument('--incldirs', action='append')
parser.add_argument('--same', action='append')
process_fail_arg = add_failure_filtering_cli(parser, FAIL)
def process_args(args):
ns = vars(args)
process_fail_arg(args)
ignore_exc = ns.pop('ignore_exc')
# We later pass ignore_exc to _get_preprocessor().
args.get_file_preprocessor = get_preprocessor(
file_macros=ns.pop('macros'),
file_incldirs=ns.pop('incldirs'),
file_same=ns.pop('same'),
ignore_exc=ignore_exc,
log_err=print,
)
return process_args
def _iter_preprocessed(filename, *,
get_preprocessor,
match_kind=None,
pure=False,
):
preprocess = get_preprocessor(filename)
for line in preprocess(tool=not pure) or ():
if match_kind is not None and not match_kind(line.kind):
continue
yield line
#######################################
# the commands
def _cli_preprocess(parser, excluded=None, **prepr_kwargs):
parser.add_argument('--pure', action='store_true')
parser.add_argument('--no-pure', dest='pure', action='store_const', const=False)
process_kinds = add_kind_filtering_cli(parser)
process_common = add_common_cli(parser, **prepr_kwargs)
parser.add_argument('--raw', action='store_true')
process_files = add_files_cli(parser, excluded=excluded)
return [
process_kinds,
process_common,
process_files,
]
def cmd_preprocess(filenames, *,
raw=False,
iter_filenames=None,
**kwargs
):
if 'get_file_preprocessor' not in kwargs:
kwargs['get_file_preprocessor'] = _get_preprocessor()
if raw:
def show_file(filename, lines):
for line in lines:
print(line)
#print(line.raw)
else:
def show_file(filename, lines):
for line in lines:
linefile = ''
if line.filename != filename:
linefile = f' ({line.filename})'
text = line.data
if line.kind == 'comment':
text = '/* ' + line.data.splitlines()[0]
text += ' */' if '\n' in line.data else r'\n... */'
print(f' {line.lno:>4} {line.kind:10} | {text}')
filenames = main_for_filenames(filenames, iter_filenames)
for filename in filenames:
lines = _iter_preprocessed(filename, **kwargs)
show_file(filename, lines)
def _cli_data(parser):
...
return None
def cmd_data(filenames,
**kwargs
):
# XXX
raise NotImplementedError
COMMANDS = {
'preprocess': (
'preprocess the given C source & header files',
[_cli_preprocess],
cmd_preprocess,
),
'data': (
'check/manage local data (e.g. excludes, macros)',
[_cli_data],
cmd_data,
),
}
#######################################
# the script
def parse_args(argv=sys.argv[1:], prog=sys.argv[0], *,
subset='preprocess',
excluded=None,
**prepr_kwargs
):
import argparse
parser = argparse.ArgumentParser(
prog=prog or get_prog(),
)
processors = add_commands_cli(
parser,
commands={k: v[1] for k, v in COMMANDS.items()},
commonspecs=[
add_verbosity_cli,
add_traceback_cli,
],
subset=subset,
)
args = parser.parse_args(argv)
ns = vars(args)
cmd = ns.pop('cmd')
verbosity, traceback_cm = process_args_by_key(
args,
processors[cmd],
['verbosity', 'traceback_cm'],
)
return cmd, ns, verbosity, traceback_cm
def main(cmd, cmd_kwargs):
try:
run_cmd = COMMANDS[cmd][0]
except KeyError:
raise ValueError(f'unsupported cmd {cmd!r}')
run_cmd(**cmd_kwargs)
if __name__ == '__main__':
cmd, cmd_kwargs, verbosity, traceback_cm = parse_args()
configure_logger(verbosity)
with traceback_cm:
main(cmd, cmd_kwargs)

View File

@ -0,0 +1,173 @@
import contextlib
import distutils.ccompiler
import logging
import shlex
import subprocess
import sys
from ..info import FileInfo, SourceLine
from .errors import (
PreprocessorFailure,
ErrorDirectiveError,
MissingDependenciesError,
OSMismatchError,
)
logger = logging.getLogger(__name__)
# XXX Add aggregate "source" class(es)?
# * expose all lines as single text string
# * expose all lines as sequence
# * iterate all lines
def run_cmd(argv, *,
#capture_output=True,
stdout=subprocess.PIPE,
#stderr=subprocess.STDOUT,
stderr=subprocess.PIPE,
text=True,
check=True,
**kwargs
):
if isinstance(stderr, str) and stderr.lower() == 'stdout':
stderr = subprocess.STDOUT
kw = dict(locals())
kw.pop('argv')
kw.pop('kwargs')
kwargs.update(kw)
proc = subprocess.run(argv, **kwargs)
return proc.stdout
def preprocess(tool, filename, **kwargs):
argv = _build_argv(tool, filename, **kwargs)
logger.debug(' '.join(shlex.quote(v) for v in argv))
# Make sure the OS is supported for this file.
if (_expected := is_os_mismatch(filename)):
error = None
raise OSMismatchError(filename, _expected, argv, error, TOOL)
# Run the command.
with converted_error(tool, argv, filename):
# We use subprocess directly here, instead of calling the
# distutil compiler object's preprocess() method, since that
# one writes to stdout/stderr and it's simpler to do it directly
# through subprocess.
return run_cmd(argv)
def _build_argv(
tool,
filename,
incldirs=None,
macros=None,
preargs=None,
postargs=None,
executable=None,
compiler=None,
):
compiler = distutils.ccompiler.new_compiler(
compiler=compiler or tool,
)
if executable:
compiler.set_executable('preprocessor', executable)
argv = None
def _spawn(_argv):
nonlocal argv
argv = _argv
compiler.spawn = _spawn
compiler.preprocess(
filename,
macros=[tuple(v) for v in macros or ()],
include_dirs=incldirs or (),
extra_preargs=preargs or (),
extra_postargs=postargs or (),
)
return argv
@contextlib.contextmanager
def converted_error(tool, argv, filename):
try:
yield
except subprocess.CalledProcessError as exc:
convert_error(
tool,
argv,
filename,
exc.stderr,
exc.returncode,
)
def convert_error(tool, argv, filename, stderr, rc):
error = (stderr.splitlines()[0], rc)
if (_expected := is_os_mismatch(filename, stderr)):
logger.debug(stderr.strip())
raise OSMismatchError(filename, _expected, argv, error, tool)
elif (_missing := is_missing_dep(stderr)):
logger.debug(stderr.strip())
raise MissingDependenciesError(filename, (_missing,), argv, error, tool)
elif '#error' in stderr:
# XXX Ignore incompatible files.
error = (stderr.splitlines()[1], rc)
logger.debug(stderr.strip())
raise ErrorDirectiveError(filename, argv, error, tool)
else:
# Try one more time, with stderr written to the terminal.
try:
output = run_cmd(argv, stderr=None)
except subprocess.CalledProcessError:
raise PreprocessorFailure(filename, argv, error, tool)
def is_os_mismatch(filename, errtext=None):
# See: https://docs.python.org/3/library/sys.html#sys.platform
actual = sys.platform
if actual == 'unknown':
raise NotImplementedError
if errtext is not None:
if (missing := is_missing_dep(errtext)):
matching = get_matching_oses(missing, filename)
if actual not in matching:
return matching
return False
def get_matching_oses(missing, filename):
# OSX
if 'darwin' in filename or 'osx' in filename:
return ('darwin',)
elif missing == 'SystemConfiguration/SystemConfiguration.h':
return ('darwin',)
# Windows
elif missing in ('windows.h', 'winsock2.h'):
return ('win32',)
# other
elif missing == 'sys/ldr.h':
return ('aix',)
elif missing == 'dl.h':
# XXX The existence of Python/dynload_dl.c implies others...
# Note that hpux isn't actual supported any more.
return ('hpux', '???')
# unrecognized
else:
return ()
def is_missing_dep(errtext):
if 'No such file or directory' in errtext:
missing = errtext.split(': No such file or directory')[0].split()[-1]
return missing
return False

View File

@ -0,0 +1,110 @@
import sys
OS = sys.platform
def _as_tuple(items):
if isinstance(items, str):
return tuple(items.strip().replace(',', ' ').split())
elif items:
return tuple(items)
else:
return ()
class PreprocessorError(Exception):
"""Something preprocessor-related went wrong."""
@classmethod
def _msg(cls, filename, reason, **ignored):
msg = 'failure while preprocessing'
if reason:
msg = f'{msg} ({reason})'
return msg
def __init__(self, filename, preprocessor=None, reason=None):
if isinstance(reason, str):
reason = reason.strip()
self.filename = filename
self.preprocessor = preprocessor or None
self.reason = str(reason) if reason else None
msg = self._msg(**vars(self))
msg = f'({filename}) {msg}'
if preprocessor:
msg = f'[{preprocessor}] {msg}'
super().__init__(msg)
class PreprocessorFailure(PreprocessorError):
"""The preprocessor command failed."""
@classmethod
def _msg(cls, error, **ignored):
msg = 'preprocessor command failed'
if error:
msg = f'{msg} {error}'
return msg
def __init__(self, filename, argv, error=None, preprocessor=None):
exitcode = -1
if isinstance(error, tuple):
if len(error) == 2:
error, exitcode = error
else:
error = str(error)
if isinstance(error, str):
error = error.strip()
self.argv = _as_tuple(argv) or None
self.error = error if error else None
self.exitcode = exitcode
reason = str(self.error)
super().__init__(filename, preprocessor, reason)
class ErrorDirectiveError(PreprocessorFailure):
"""The file hit a #error directive."""
@classmethod
def _msg(cls, error, **ignored):
return f'#error directive hit ({error})'
def __init__(self, filename, argv, error, *args, **kwargs):
super().__init__(filename, argv, error, *args, **kwargs)
class MissingDependenciesError(PreprocessorFailure):
"""The preprocessor did not have access to all the target's dependencies."""
@classmethod
def _msg(cls, missing, **ignored):
msg = 'preprocessing failed due to missing dependencies'
if missing:
msg = f'{msg} ({", ".join(missing)})'
return msg
def __init__(self, filename, missing=None, *args, **kwargs):
self.missing = _as_tuple(missing) or None
super().__init__(filename, *args, **kwargs)
class OSMismatchError(MissingDependenciesError):
"""The target is not compatible with the host OS."""
@classmethod
def _msg(cls, expected, **ignored):
return f'OS is {OS} but expected {expected or "???"}'
def __init__(self, filename, expected=None, *args, **kwargs):
if isinstance(expected, str):
expected = expected.strip()
self.actual = OS
self.expected = expected if expected else None
super().__init__(filename, None, *args, **kwargs)

View File

@ -0,0 +1,123 @@
import os.path
import re
from . import common as _common
TOOL = 'gcc'
# https://gcc.gnu.org/onlinedocs/cpp/Preprocessor-Output.html
LINE_MARKER_RE = re.compile(r'^# (\d+) "([^"]+)"(?: [1234])*$')
PREPROC_DIRECTIVE_RE = re.compile(r'^\s*#\s*(\w+)\b.*')
COMPILER_DIRECTIVE_RE = re.compile(r'''
^
(.*?) # <before>
(__\w+__) # <directive>
\s*
[(] [(]
(
[^()]*
(?:
[(]
[^()]*
[)]
[^()]*
)*
) # <args>
( [)] [)] )? # <closed>
''', re.VERBOSE)
POST_ARGS = (
'-pthread',
'-std=c99',
#'-g',
#'-Og',
#'-Wno-unused-result',
#'-Wsign-compare',
#'-Wall',
#'-Wextra',
'-E',
)
def preprocess(filename, incldirs=None, macros=None, samefiles=None):
text = _common.preprocess(
TOOL,
filename,
incldirs=incldirs,
macros=macros,
#preargs=PRE_ARGS,
postargs=POST_ARGS,
executable=['gcc'],
compiler='unix',
)
return _iter_lines(text, filename, samefiles)
def _iter_lines(text, filename, samefiles, *, raw=False):
lines = iter(text.splitlines())
# Build the lines and filter out directives.
partial = 0 # depth
origfile = None
for line in lines:
m = LINE_MARKER_RE.match(line)
if m:
lno, origfile = m.groups()
lno = int(lno)
elif _filter_orig_file(origfile, filename, samefiles):
if (m := PREPROC_DIRECTIVE_RE.match(line)):
name, = m.groups()
if name != 'pragma':
raise Exception(line)
else:
if not raw:
line, partial = _strip_directives(line, partial=partial)
yield _common.SourceLine(
_common.FileInfo(filename, lno),
'source',
line or '',
None,
)
lno += 1
def _strip_directives(line, partial=0):
# We assume there are no string literals with parens in directive bodies.
while partial > 0:
if not (m := re.match(r'[^{}]*([()])', line)):
return None, partial
delim, = m.groups()
partial += 1 if delim == '(' else -1 # opened/closed
line = line[m.end():]
line = re.sub(r'__extension__', '', line)
while (m := COMPILER_DIRECTIVE_RE.match(line)):
before, _, _, closed = m.groups()
if closed:
line = f'{before} {line[m.end():]}'
else:
after, partial = _strip_directives(line[m.end():], 2)
line = f'{before} {after or ""}'
if partial:
break
return line, partial
def _filter_orig_file(origfile, current, samefiles):
if origfile == current:
return True
if origfile == '<stdin>':
return True
if os.path.isabs(origfile):
return False
for filename in samefiles or ():
if filename.endswith(os.path.sep):
filename += os.path.basename(current)
if origfile == filename:
return True
return False

View File

@ -0,0 +1,23 @@
from ..source import (
opened as _open_source,
)
from . import common as _common
def preprocess(lines, filename=None):
if isinstance(lines, str):
with _open_source(lines, filename) as (lines, filename):
yield from preprocess(lines, filename)
return
# XXX actually preprocess...
for lno, line in enumerate(lines, 1):
kind = 'source'
data = line
conditions = None
yield _common.SourceLine(
_common.FileInfo(filename, lno),
kind,
data,
conditions,
)

View File

@ -0,0 +1,64 @@
import contextlib
import os.path
def resolve(source, filename):
if _looks_like_filename(source):
return _resolve_filename(source, filename)
if isinstance(source, str):
source = source.splitlines()
# At this point "source" is not a str.
if not filename:
filename = None
elif not isinstance(filename, str):
raise TypeError(f'filename should be str (or None), got {filename!r}')
else:
filename, _ = _resolve_filename(filename)
return source, filename
@contextlib.contextmanager
def good_file(filename, alt=None):
if not _looks_like_filename(filename):
raise ValueError(f'expected a filename, got {filename}')
filename, _ = _resolve_filename(filename, alt)
try:
yield filename
except Exception:
if not os.path.exists(filename):
raise FileNotFoundError(f'file not found: {filename}')
raise # re-raise
def _looks_like_filename(value):
if not isinstance(value, str):
return False
return value.endswith(('.c', '.h'))
def _resolve_filename(filename, alt=None):
if os.path.isabs(filename):
...
# raise NotImplementedError
else:
filename = os.path.join('.', filename)
if not alt:
alt = filename
elif os.path.abspath(filename) == os.path.abspath(alt):
alt = filename
else:
raise ValueError(f'mismatch: {filename} != {alt}')
return filename, alt
@contextlib.contextmanager
def opened(source, filename=None):
source, filename = resolve(source, filename)
if isinstance(source, str):
with open(source) as srcfile:
yield srcfile, filename
else:
yield source, filename

View File

@ -1,448 +1,35 @@
from cpython.__main__ import main, configure_logger
from collections import namedtuple
import glob
import os.path
import re
import shutil
import sys
import subprocess
VERBOSITY = 2
C_GLOBALS_DIR = os.path.abspath(os.path.dirname(__file__))
TOOLS_DIR = os.path.dirname(C_GLOBALS_DIR)
ROOT_DIR = os.path.dirname(TOOLS_DIR)
GLOBALS_FILE = os.path.join(C_GLOBALS_DIR, 'ignored-globals.txt')
SOURCE_DIRS = ['Include', 'Objects', 'Modules', 'Parser', 'Python']
CAPI_REGEX = re.compile(r'^ *PyAPI_DATA\([^)]*\) \W*(_?Py\w+(?:, \w+)*\w).*;.*$')
IGNORED_VARS = {
'_DYNAMIC',
'_GLOBAL_OFFSET_TABLE_',
'__JCR_LIST__',
'__JCR_END__',
'__TMC_END__',
'__bss_start',
'__data_start',
'__dso_handle',
'_edata',
'_end',
}
def find_capi_vars(root):
capi_vars = {}
for dirname in SOURCE_DIRS:
for filename in glob.glob(os.path.join(
glob.escape(os.path.join(ROOT_DIR, dirname)),
'**/*.[hc]'),
recursive=True):
with open(filename) as file:
for name in _find_capi_vars(file):
if name in capi_vars:
assert not filename.endswith('.c')
assert capi_vars[name].endswith('.c')
capi_vars[name] = filename
return capi_vars
def _find_capi_vars(lines):
for line in lines:
if not line.startswith('PyAPI_DATA'):
continue
assert '{' not in line
match = CAPI_REGEX.match(line)
assert match
names, = match.groups()
for name in names.split(', '):
yield name
def _read_global_names(filename):
# These variables are shared between all interpreters in the process.
with open(filename) as file:
return {line.partition('#')[0].strip()
for line in file
if line.strip() and not line.startswith('#')}
def _is_global_var(name, globalnames):
if _is_autogen_var(name):
return True
if _is_type_var(name):
return True
if _is_module(name):
return True
if _is_exception(name):
return True
if _is_compiler(name):
return True
return name in globalnames
def _is_autogen_var(name):
return (
name.startswith('PyId_') or
'.' in name or
# Objects/typeobject.c
name.startswith('op_id.') or
name.startswith('rop_id.') or
# Python/graminit.c
name.startswith('arcs_') or
name.startswith('states_')
)
def _is_type_var(name):
if name.endswith(('Type', '_Type', '_type')): # XXX Always a static type?
return True
if name.endswith('_desc'): # for structseq types
return True
return (
name.startswith('doc_') or
name.endswith(('_doc', '__doc__', '_docstring')) or
name.endswith('_methods') or
name.endswith('_fields') or
name.endswith(('_memberlist', '_members')) or
name.endswith('_slots') or
name.endswith(('_getset', '_getsets', '_getsetlist')) or
name.endswith('_as_mapping') or
name.endswith('_as_number') or
name.endswith('_as_sequence') or
name.endswith('_as_buffer') or
name.endswith('_as_async')
)
def _is_module(name):
if name.endswith(('_functions', 'Methods', '_Methods')):
return True
if name == 'module_def':
return True
if name == 'initialized':
return True
return name.endswith(('module', '_Module'))
def _is_exception(name):
# Other vars are enumerated in globals-core.txt.
if not name.startswith(('PyExc_', '_PyExc_')):
return False
return name.endswith(('Error', 'Warning'))
def _is_compiler(name):
return (
# Python/Python-ast.c
name.endswith('_type') or
name.endswith('_singleton') or
name.endswith('_attributes')
)
class Var(namedtuple('Var', 'name kind scope capi filename')):
@classmethod
def parse_nm(cls, line, expected, ignored, capi_vars, globalnames):
_, _, line = line.partition(' ') # strip off the address
line = line.strip()
kind, _, line = line.partition(' ')
if kind in ignored or ():
return None
elif kind not in expected or ():
raise RuntimeError('unsupported NM type {!r}'.format(kind))
name, _, filename = line.partition('\t')
name = name.strip()
if _is_autogen_var(name):
return None
if _is_global_var(name, globalnames):
scope = 'global'
else:
scope = None
capi = (name in capi_vars or ())
if filename:
filename = os.path.relpath(filename.partition(':')[0])
return cls(name, kind, scope, capi, filename or '~???~')
@property
def external(self):
return self.kind.isupper()
def find_vars(root, globals_filename=GLOBALS_FILE):
python = os.path.join(root, 'python')
if not os.path.exists(python):
raise RuntimeError('python binary missing (need to build it first?)')
capi_vars = find_capi_vars(root)
globalnames = _read_global_names(globals_filename)
nm = shutil.which('nm')
if nm is None:
# XXX Use dumpbin.exe /SYMBOLS on Windows.
raise NotImplementedError
else:
yield from (var
for var in _find_var_symbols(python, nm, capi_vars,
globalnames)
if var.name not in IGNORED_VARS)
NM_FUNCS = set('Tt')
NM_PUBLIC_VARS = set('BD')
NM_PRIVATE_VARS = set('bd')
NM_VARS = NM_PUBLIC_VARS | NM_PRIVATE_VARS
NM_DATA = set('Rr')
NM_OTHER = set('ACGgiINpSsuUVvWw-?')
NM_IGNORED = NM_FUNCS | NM_DATA | NM_OTHER
def _find_var_symbols(python, nm, capi_vars, globalnames):
args = [nm,
'--line-numbers',
python]
out = subprocess.check_output(args)
for line in out.decode('utf-8').splitlines():
var = Var.parse_nm(line, NM_VARS, NM_IGNORED, capi_vars, globalnames)
if var is None:
continue
yield var
#######################################
class Filter(namedtuple('Filter', 'name op value action')):
@classmethod
def parse(cls, raw):
action = '+'
if raw.startswith(('+', '-')):
action = raw[0]
raw = raw[1:]
# XXX Support < and >?
name, op, value = raw.partition('=')
return cls(name, op, value, action)
def check(self, var):
value = getattr(var, self.name, None)
if not self.op:
matched = bool(value)
elif self.op == '=':
matched = (value == self.value)
else:
raise NotImplementedError
if self.action == '+':
return matched
elif self.action == '-':
return not matched
else:
raise NotImplementedError
def filter_var(var, filters):
for filter in filters:
if not filter.check(var):
return False
return True
def make_sort_key(spec):
columns = [(col.strip('_'), '_' if col.startswith('_') else '')
for col in spec]
def sort_key(var):
return tuple(getattr(var, col).lstrip(prefix)
for col, prefix in columns)
return sort_key
def make_groups(allvars, spec):
group = spec
groups = {}
for var in allvars:
value = getattr(var, group)
key = '{}: {}'.format(group, value)
try:
groupvars = groups[key]
except KeyError:
groupvars = groups[key] = []
groupvars.append(var)
return groups
def format_groups(groups, columns, fmts, widths):
for group in sorted(groups):
groupvars = groups[group]
yield '', 0
yield ' # {}'.format(group), 0
yield from format_vars(groupvars, columns, fmts, widths)
def format_vars(allvars, columns, fmts, widths):
fmt = ' '.join(fmts[col] for col in columns)
fmt = ' ' + fmt.replace(' ', ' ') + ' ' # for div margin
header = fmt.replace(':', ':^').format(*(col.upper() for col in columns))
yield header, 0
div = ' '.join('-'*(widths[col]+2) for col in columns)
yield div, 0
for var in allvars:
values = (getattr(var, col) for col in columns)
row = fmt.format(*('X' if val is True else val or ''
for val in values))
yield row, 1
yield div, 0
#######################################
COLUMNS = 'name,external,capi,scope,filename'
COLUMN_NAMES = COLUMNS.split(',')
COLUMN_WIDTHS = {col: len(col)
for col in COLUMN_NAMES}
COLUMN_WIDTHS.update({
'name': 50,
'scope': 7,
'filename': 40,
})
COLUMN_FORMATS = {col: '{:%s}' % width
for col, width in COLUMN_WIDTHS.items()}
for col in COLUMN_FORMATS:
if COLUMN_WIDTHS[col] == len(col):
COLUMN_FORMATS[col] = COLUMN_FORMATS[col].replace(':', ':^')
def _parse_filters_arg(raw, error):
filters = []
for value in raw.split(','):
value=value.strip()
if not value:
continue
try:
filter = Filter.parse(value)
if filter.name not in COLUMN_NAMES:
raise Exception('unsupported column {!r}'.format(filter.name))
except Exception as e:
error('bad filter {!r}: {}'.format(raw, e))
filters.append(filter)
return filters
def _parse_columns_arg(raw, error):
columns = raw.split(',')
for column in columns:
if column not in COLUMN_NAMES:
error('unsupported column {!r}'.format(column))
return columns
def _parse_sort_arg(raw, error):
sort = raw.split(',')
for column in sort:
if column.lstrip('_') not in COLUMN_NAMES:
error('unsupported column {!r}'.format(column))
return sort
def _parse_group_arg(raw, error):
if not raw:
return raw
group = raw
if group not in COLUMN_NAMES:
error('unsupported column {!r}'.format(group))
if group != 'filename':
error('unsupported group {!r}'.format(group))
return group
def parse_args(argv=None):
if argv is None:
argv = sys.argv[1:]
def parse_args():
import argparse
from c_common.scriptutil import (
add_verbosity_cli,
add_traceback_cli,
process_args_by_key,
)
from cpython.__main__ import _cli_check
parser = argparse.ArgumentParser()
processors = [
add_verbosity_cli(parser),
add_traceback_cli(parser),
_cli_check(parser, checks='<globals>'),
]
parser.add_argument('-v', '--verbose', action='count', default=0)
parser.add_argument('-q', '--quiet', action='count', default=0)
args = parser.parse_args()
ns = vars(args)
parser.add_argument('--filters', default='-scope',
help='[[-]<COLUMN>[=<GLOB>]] ...')
cmd = 'check'
verbosity, traceback_cm = process_args_by_key(
args,
processors,
['verbosity', 'traceback_cm'],
)
parser.add_argument('--columns', default=COLUMNS,
help='a comma-separated list of columns to show')
parser.add_argument('--sort', default='filename,_name',
help='a comma-separated list of columns to sort')
parser.add_argument('--group',
help='group by the given column name (- to not group)')
parser.add_argument('--rc-on-match', dest='rc', type=int)
parser.add_argument('filename', nargs='?', default=GLOBALS_FILE)
args = parser.parse_args(argv)
verbose = vars(args).pop('verbose', 0)
quiet = vars(args).pop('quiet', 0)
args.verbosity = max(0, VERBOSITY + verbose - quiet)
if args.sort.startswith('filename') and not args.group:
args.group = 'filename'
if args.rc is None:
if '-scope=core' in args.filters or 'core' not in args.filters:
args.rc = 0
else:
args.rc = 1
args.filters = _parse_filters_arg(args.filters, parser.error)
args.columns = _parse_columns_arg(args.columns, parser.error)
args.sort = _parse_sort_arg(args.sort, parser.error)
args.group = _parse_group_arg(args.group, parser.error)
return args
return cmd, ns, verbosity, traceback_cm
def main(root=ROOT_DIR, filename=GLOBALS_FILE,
filters=None, columns=COLUMN_NAMES, sort=None, group=None,
verbosity=VERBOSITY, rc=1):
log = lambda msg: ...
if verbosity >= 2:
log = lambda msg: print(msg)
allvars = (var
for var in find_vars(root, filename)
if filter_var(var, filters))
if sort:
allvars = sorted(allvars, key=make_sort_key(sort))
if group:
try:
columns.remove(group)
except ValueError:
pass
grouped = make_groups(allvars, group)
lines = format_groups(grouped, columns, COLUMN_FORMATS, COLUMN_WIDTHS)
else:
lines = format_vars(allvars, columns, COLUMN_FORMATS, COLUMN_WIDTHS)
total = 0
for line, count in lines:
total += count
log(line)
log('\ntotal: {}'.format(total))
if total and rc:
print('ERROR: found unsafe globals', file=sys.stderr)
return rc
return 0
if __name__ == '__main__':
args = parse_args()
sys.exit(
main(**vars(args)))
(cmd, cmd_kwargs, verbosity, traceback_cm) = parse_args()
configure_logger(verbosity)
with traceback_cm:
main(cmd, cmd_kwargs)

View File

@ -1,72 +0,0 @@
#######################################
# C Globals and CPython Runtime State.
CPython's C code makes extensive use of global variables (whether static
globals or static locals). Each such variable falls into one of several
categories:
* strictly const data
* used exclusively in main or in the REPL
* process-global state (e.g. managing process-level resources
like signals and file descriptors)
* Python "global" runtime state
* per-interpreter runtime state
The last one can be a problem as soon as anyone creates a second
interpreter (AKA "subinterpreter") in a process. It is definitely a
problem under subinterpreters if they are no longer sharing the GIL,
since the GIL protects us from a lot of race conditions. Keep in mind
that ultimately *all* objects (PyObject) should be treated as
per-interpreter state. This includes "static types", freelists,
_PyIdentifier, and singletons. Take that in for a second. It has
significant implications on where we use static variables!
Be aware that module-global state (stored in C statics) is a kind of
per-interpreter state. There have been efforts across many years, and
still going, to provide extension module authors mechanisms to store
that state safely (see PEPs 3121, 489, etc.).
(Note that there has been discussion around support for running multiple
Python runtimes in the same process. That would ends up with the same
problems, relative to static variables, that subinterpreters have.)
Historically we have been bad at keeping per-interpreter state out of
static variables, mostly because until recently subinterpreters were
not widely used nor even factored in to solutions. However, the
feature is growing in popularity and use in the community.
Mandate: "Eliminate use of static variables for per-interpreter state."
The "c-statics.py" script in this directory, along with its accompanying
data files, are part of the effort to resolve existing problems with
our use of static variables and to prevent future problems.
#-------------------------
## statics for actually-global state (and runtime state consolidation)
In general, holding any kind of state in static variables
increases maintenance burden and increases the complexity of code (e.g.
we use TSS to identify the active thread state). So it is a good idea
to avoid using statics for state even if for the "global" runtime or
for process-global state.
Relative to maintenance burden, one problem is where the runtime
state is spread throughout the codebase in dozens of individual
globals. Unlike the other globals, the runtime state represents a set
of values that are constantly shifting in a complex way. When they are
spread out it's harder to get a clear picture of what the runtime
involves. Furthermore, when they are spread out it complicates efforts
that change the runtime.
Consequently, the globals for Python's runtime state have been
consolidated under a single top-level _PyRuntime global. No new globals
should be added for runtime state. Instead, they should be added to
_PyRuntimeState or one of its sub-structs. The tools in this directory
are run as part of the test suite to ensure that no new globals have
been added. The script can be run manually as well:
./python Lib/test/test_c_statics/c-statics.py check
If it reports any globals then they should be resolved. If the globals
are runtime state then they should be folded into _PyRuntimeState.
Otherwise they should be marked as ignored.

View File

@ -1,11 +1,10 @@
import os.path
import sys
TOOL_ROOT = os.path.abspath(
TOOL_ROOT = os.path.normcase(
os.path.abspath(
os.path.dirname( # c-analyzer/
os.path.dirname(__file__))) # cpython/
DATA_DIR = TOOL_ROOT
os.path.dirname(__file__)))) # cpython/
REPO_ROOT = (
os.path.dirname( # ..
os.path.dirname(TOOL_ROOT))) # Tools/
@ -19,11 +18,3 @@ SOURCE_DIRS = [os.path.join(REPO_ROOT, name) for name in [
'Objects',
'Modules',
]]
#PYTHON = os.path.join(REPO_ROOT, 'python')
PYTHON = sys.executable
# Clean up the namespace.
del sys
del os

View File

@ -1,212 +1,280 @@
import argparse
import re
import logging
import sys
from c_analyzer.common import show
from c_analyzer.common.info import UNKNOWN
from . import SOURCE_DIRS
from .find import supported_vars
from .known import (
from_file as known_from_file,
DATA_FILE as KNOWN_FILE,
from c_common.fsutil import expand_filenames, iter_files_by_suffix
from c_common.scriptutil import (
add_verbosity_cli,
add_traceback_cli,
add_commands_cli,
add_kind_filtering_cli,
add_files_cli,
process_args_by_key,
configure_logger,
get_prog,
)
from .supported import IGNORED_FILE
from c_parser.info import KIND
import c_parser.__main__ as c_parser
import c_analyzer.__main__ as c_analyzer
import c_analyzer as _c_analyzer
from c_analyzer.info import UNKNOWN
from . import _analyzer, _parser, REPO_ROOT
def _check_results(unknown, knownvars, used):
def _match_unused_global(variable):
found = []
for varid in knownvars:
if varid in used:
continue
if varid.funcname is not None:
continue
if varid.name != variable.name:
continue
if variable.filename and variable.filename != UNKNOWN:
if variable.filename == varid.filename:
found.append(varid)
logger = logging.getLogger(__name__)
def _resolve_filenames(filenames):
if filenames:
resolved = (_parser.resolve_filename(f) for f in filenames)
else:
found.append(varid)
return found
badknown = set()
for variable in sorted(unknown):
msg = None
if variable.funcname != UNKNOWN:
msg = f'could not find global symbol {variable.id}'
elif m := _match_unused_global(variable):
assert isinstance(m, list)
badknown.update(m)
elif variable.name in ('completed', 'id'): # XXX Figure out where these variables are.
unknown.remove(variable)
else:
msg = f'could not find local symbol {variable.id}'
if msg:
#raise Exception(msg)
print(msg)
if badknown:
print('---')
print(f'{len(badknown)} globals in known.tsv, but may actually be local:')
for varid in sorted(badknown):
print(f'{varid.filename:30} {varid.name}')
unused = sorted(varid
for varid in set(knownvars) - used
if varid.name != 'id') # XXX Figure out where these variables are.
if unused:
print('---')
print(f'did not use {len(unused)} known vars:')
for varid in unused:
print(f'{varid.filename:30} {varid.funcname or "-":20} {varid.name}')
raise Exception('not all known symbols used')
if unknown:
print('---')
raise Exception('could not find all symbols')
resolved = _parser.iter_filenames()
return resolved
# XXX Move this check to its own command.
def cmd_check_cache(cmd, *,
known=KNOWN_FILE,
ignored=IGNORED_FILE,
_known_from_file=known_from_file,
_find=supported_vars,
):
known = _known_from_file(known)
used = set()
unknown = set()
for var, supported in _find(known=known, ignored=ignored):
if supported is None:
unknown.add(var)
continue
used.add(var.id)
_check_results(unknown, known['variables'], used)
def cmd_check(cmd, *,
known=KNOWN_FILE,
ignored=IGNORED_FILE,
_find=supported_vars,
_show=show.basic,
_print=print,
):
"""
Fail if there are unsupported globals variables.
In the failure case, the list of unsupported variables
will be printed out.
"""
def fmt_summary(analysis):
# XXX Support sorting and grouping.
supported = []
unsupported = []
for var, supported in _find(known=known, ignored=ignored):
if not supported:
unsupported.append(var)
for item in analysis:
if item.supported:
supported.append(item)
else:
unsupported.append(item)
total = 0
if not unsupported:
#_print('okay')
return
def section(name, groupitems):
nonlocal total
items, render = c_analyzer.build_section(name, groupitems,
relroot=REPO_ROOT)
yield from render()
total += len(items)
_print('ERROR: found unsupported global variables')
_print()
_show(sorted(unsupported))
_print(f' ({len(unsupported)} total)')
sys.exit(1)
yield ''
yield '===================='
yield 'supported'
yield '===================='
yield from section('types', supported)
yield from section('variables', supported)
yield ''
yield '===================='
yield 'unsupported'
yield '===================='
yield from section('types', unsupported)
yield from section('variables', unsupported)
yield ''
yield f'grand total: {total}'
def cmd_show(cmd, *,
known=KNOWN_FILE,
ignored=IGNORED_FILE,
skip_objects=False,
_find=supported_vars,
_show=show.basic,
_print=print,
):
"""
Print out the list of found global variables.
#######################################
# the checks
The variables will be distinguished as "supported" or "unsupported".
"""
allsupported = []
allunsupported = []
for found, supported in _find(known=known,
ignored=ignored,
skip_objects=skip_objects,
):
if supported is None:
CHECKS = dict(c_analyzer.CHECKS, **{
'globals': _analyzer.check_globals,
})
#######################################
# the commands
FILES_KWARGS = dict(excluded=_parser.EXCLUDED, nargs='*')
def _cli_parse(parser):
process_output = c_parser.add_output_cli(parser)
process_kind = add_kind_filtering_cli(parser)
process_preprocessor = c_parser.add_preprocessor_cli(
parser,
get_preprocessor=_parser.get_preprocessor,
)
process_files = add_files_cli(parser, **FILES_KWARGS)
return [
process_output,
process_kind,
process_preprocessor,
process_files,
]
def cmd_parse(filenames=None, **kwargs):
filenames = _resolve_filenames(filenames)
if 'get_file_preprocessor' not in kwargs:
kwargs['get_file_preprocessor'] = _parser.get_preprocessor()
c_parser.cmd_parse(filenames, **kwargs)
def _cli_check(parser, **kwargs):
return c_analyzer._cli_check(parser, CHECKS, **kwargs, **FILES_KWARGS)
def cmd_check(filenames=None, **kwargs):
filenames = _resolve_filenames(filenames)
kwargs['get_file_preprocessor'] = _parser.get_preprocessor(log_err=print)
c_analyzer.cmd_check(
filenames,
relroot=REPO_ROOT,
_analyze=_analyzer.analyze,
_CHECKS=CHECKS,
**kwargs
)
def cmd_analyze(filenames=None, **kwargs):
formats = dict(c_analyzer.FORMATS)
formats['summary'] = fmt_summary
filenames = _resolve_filenames(filenames)
kwargs['get_file_preprocessor'] = _parser.get_preprocessor(log_err=print)
c_analyzer.cmd_analyze(
filenames,
_analyze=_analyzer.analyze,
formats=formats,
**kwargs
)
def _cli_data(parser):
filenames = False
known = True
return c_analyzer._cli_data(parser, filenames, known)
def cmd_data(datacmd, **kwargs):
formats = dict(c_analyzer.FORMATS)
formats['summary'] = fmt_summary
filenames = (file
for file in _resolve_filenames(None)
if file not in _parser.EXCLUDED)
kwargs['get_file_preprocessor'] = _parser.get_preprocessor(log_err=print)
if datacmd == 'show':
types = _analyzer.read_known()
results = []
for decl, info in types.items():
if info is UNKNOWN:
if decl.kind in (KIND.STRUCT, KIND.UNION):
extra = {'unsupported': ['type unknown'] * len(decl.members)}
else:
extra = {'unsupported': ['type unknown']}
info = (info, extra)
results.append((decl, info))
if decl.shortkey == 'struct _object':
tempinfo = info
known = _analyzer.Analysis.from_results(results)
analyze = None
elif datacmd == 'dump':
known = _analyzer.KNOWN_FILE
def analyze(files, **kwargs):
decls = []
for decl in _analyzer.iter_decls(files, **kwargs):
if not KIND.is_type_decl(decl.kind):
continue
(allsupported if supported else allunsupported
).append(found)
_print('supported:')
_print('----------')
_show(sorted(allsupported))
_print(f' ({len(allsupported)} total)')
_print()
_print('unsupported:')
_print('------------')
_show(sorted(allunsupported))
_print(f' ({len(allunsupported)} total)')
if not decl.filename.endswith('.h'):
if decl.shortkey not in _analyzer.KNOWN_IN_DOT_C:
continue
decls.append(decl)
results = _c_analyzer.analyze_decls(
decls,
known={},
analyze_resolved=_analyzer.analyze_resolved,
)
return _analyzer.Analysis.from_results(results)
else:
known = _analyzer.read_known()
def analyze(files, **kwargs):
return _analyzer.iter_decls(files, **kwargs)
extracolumns = None
c_analyzer.cmd_data(
datacmd,
filenames,
known,
_analyze=analyze,
formats=formats,
extracolumns=extracolumns,
relroot=REPO_ROOT,
**kwargs
)
#############################
# the script
# We do not define any other cmd_*() handlers here,
# favoring those defined elsewhere.
COMMANDS = {
'check': cmd_check,
'show': cmd_show,
'check': (
'analyze and fail if the CPython source code has any problems',
[_cli_check],
cmd_check,
),
'analyze': (
'report on the state of the CPython source code',
[(lambda p: c_analyzer._cli_analyze(p, **FILES_KWARGS))],
cmd_analyze,
),
'parse': (
'parse the CPython source files',
[_cli_parse],
cmd_parse,
),
'data': (
'check/manage local data (e.g. knwon types, ignored vars, caches)',
[_cli_data],
cmd_data,
),
}
PROG = sys.argv[0]
PROG = 'c-globals.py'
#######################################
# the script
def parse_args(prog=PROG, argv=sys.argv[1:], *, _fail=None):
common = argparse.ArgumentParser(add_help=False)
common.add_argument('--ignored', metavar='FILE',
default=IGNORED_FILE,
help='path to file that lists ignored vars')
common.add_argument('--known', metavar='FILE',
default=KNOWN_FILE,
help='path to file that lists known types')
#common.add_argument('dirs', metavar='DIR', nargs='*',
# default=SOURCE_DIRS,
# help='a directory to check')
def parse_args(argv=sys.argv[1:], prog=None, *, subset=None):
import argparse
parser = argparse.ArgumentParser(
prog=prog,
prog=prog or get_prog(),
)
subs = parser.add_subparsers(dest='cmd')
check = subs.add_parser('check', parents=[common])
# if subset == 'check' or subset == ['check']:
# if checks is not None:
# commands = dict(COMMANDS)
# commands['check'] = list(commands['check'])
# cli = commands['check'][1][0]
# commands['check'][1][0] = (lambda p: cli(p, checks=checks))
processors = add_commands_cli(
parser,
commands=COMMANDS,
commonspecs=[
add_verbosity_cli,
add_traceback_cli,
],
subset=subset,
)
show = subs.add_parser('show', parents=[common])
show.add_argument('--skip-objects', action='store_true')
if _fail is None:
def _fail(msg):
parser.error(msg)
# Now parse the args.
args = parser.parse_args(argv)
ns = vars(args)
cmd = ns.pop('cmd')
if not cmd:
_fail('missing command')
return cmd, ns
verbosity, traceback_cm = process_args_by_key(
args,
processors[cmd],
['verbosity', 'traceback_cm'],
)
if cmd != 'parse':
# "verbosity" is sent to the commands, so we put it back.
args.verbosity = verbosity
return cmd, ns, verbosity, traceback_cm
def main(cmd, cmdkwargs=None, *, _COMMANDS=COMMANDS):
def main(cmd, cmd_kwargs):
try:
cmdfunc = _COMMANDS[cmd]
run_cmd = COMMANDS[cmd][-1]
except KeyError:
raise ValueError(
f'unsupported cmd {cmd!r}' if cmd else 'missing cmd')
cmdfunc(cmd, **cmdkwargs or {})
raise ValueError(f'unsupported cmd {cmd!r}')
run_cmd(**cmd_kwargs)
if __name__ == '__main__':
cmd, cmdkwargs = parse_args()
main(cmd, cmdkwargs)
cmd, cmd_kwargs, verbosity, traceback_cm = parse_args()
configure_logger(verbosity)
with traceback_cm:
main(cmd, cmd_kwargs)

View File

@ -0,0 +1,348 @@
import os.path
import re
from c_common.clsutil import classonly
from c_parser.info import (
KIND,
DeclID,
Declaration,
TypeDeclaration,
TypeDef,
Struct,
Member,
FIXED_TYPE,
is_type_decl,
is_pots,
is_funcptr,
is_process_global,
is_fixed_type,
is_immutable,
)
import c_analyzer as _c_analyzer
import c_analyzer.info as _info
import c_analyzer.datafiles as _datafiles
from . import _parser, REPO_ROOT
_DATA_DIR = os.path.dirname(__file__)
KNOWN_FILE = os.path.join(_DATA_DIR, 'known.tsv')
IGNORED_FILE = os.path.join(_DATA_DIR, 'ignored.tsv')
KNOWN_IN_DOT_C = {
'struct _odictobject': False,
'PyTupleObject': False,
'struct _typeobject': False,
'struct _arena': True, # ???
'struct _frame': False,
'struct _ts': True, # ???
'struct PyCodeObject': False,
'struct _is': True, # ???
'PyWideStringList': True, # ???
# recursive
'struct _dictkeysobject': False,
}
# These are loaded from the respective .tsv files upon first use.
_KNOWN = {
# {(file, ID) | ID => info | bool}
#'PyWideStringList': True,
}
#_KNOWN = {(Struct(None, typeid.partition(' ')[-1], None)
# if typeid.startswith('struct ')
# else TypeDef(None, typeid, None)
# ): ([], {'unsupported': None if supported else True})
# for typeid, supported in _KNOWN_IN_DOT_C.items()}
_IGNORED = {
# {ID => reason}
}
KINDS = frozenset((*KIND.TYPES, KIND.VARIABLE))
def read_known():
if not _KNOWN:
# Cache a copy the first time.
extracols = None # XXX
#extracols = ['unsupported']
known = _datafiles.read_known(KNOWN_FILE, extracols, REPO_ROOT)
# For now we ignore known.values() (i.e. "extra").
types, _ = _datafiles.analyze_known(
known,
analyze_resolved=analyze_resolved,
)
_KNOWN.update(types)
return _KNOWN.copy()
def write_known():
raise NotImplementedError
datafiles.write_known(decls, IGNORED_FILE, ['unsupported'], relroot=REPO_ROOT)
def read_ignored():
if not _IGNORED:
_IGNORED.update(_datafiles.read_ignored(IGNORED_FILE))
return dict(_IGNORED)
def write_ignored():
raise NotImplementedError
datafiles.write_ignored(variables, IGNORED_FILE)
def analyze(filenames, *,
skip_objects=False,
**kwargs
):
if skip_objects:
# XXX Set up a filter.
raise NotImplementedError
known = read_known()
decls = iter_decls(filenames)
results = _c_analyzer.analyze_decls(
decls,
known,
analyze_resolved=analyze_resolved,
)
analysis = Analysis.from_results(results)
return analysis
def iter_decls(filenames, **kwargs):
decls = _c_analyzer.iter_decls(
filenames,
# We ignore functions (and statements).
kinds=KINDS,
parse_files=_parser.parse_files,
**kwargs
)
for decl in decls:
if not decl.data:
# Ignore forward declarations.
continue
yield decl
def analyze_resolved(resolved, decl, types, knowntypes, extra=None):
if decl.kind not in KINDS:
# Skip it!
return None
typedeps = resolved
if typedeps is _info.UNKNOWN:
if decl.kind in (KIND.STRUCT, KIND.UNION):
typedeps = [typedeps] * len(decl.members)
else:
typedeps = [typedeps]
#assert isinstance(typedeps, (list, TypeDeclaration)), typedeps
if extra is None:
extra = {}
elif 'unsupported' in extra:
raise NotImplementedError((decl, extra))
unsupported = _check_unsupported(decl, typedeps, types, knowntypes)
extra['unsupported'] = unsupported
return typedeps, extra
def _check_unsupported(decl, typedeps, types, knowntypes):
if typedeps is None:
raise NotImplementedError(decl)
if decl.kind in (KIND.STRUCT, KIND.UNION):
return _check_members(decl, typedeps, types, knowntypes)
elif decl.kind is KIND.ENUM:
if typedeps:
raise NotImplementedError((decl, typedeps))
return None
else:
return _check_typedep(decl, typedeps, types, knowntypes)
def _check_members(decl, typedeps, types, knowntypes):
if isinstance(typedeps, TypeDeclaration):
raise NotImplementedError((decl, typedeps))
#members = decl.members or () # A forward decl has no members.
members = decl.members
if not members:
# A forward decl has no members, but that shouldn't surface here..
raise NotImplementedError(decl)
if len(members) != len(typedeps):
raise NotImplementedError((decl, typedeps))
unsupported = []
for member, typedecl in zip(members, typedeps):
checked = _check_typedep(member, typedecl, types, knowntypes)
unsupported.append(checked)
if any(None if v is FIXED_TYPE else v for v in unsupported):
return unsupported
elif FIXED_TYPE in unsupported:
return FIXED_TYPE
else:
return None
def _check_typedep(decl, typedecl, types, knowntypes):
if not isinstance(typedecl, TypeDeclaration):
if hasattr(type(typedecl), '__len__'):
if len(typedecl) == 1:
typedecl, = typedecl
if typedecl is None:
# XXX Fail?
return 'typespec (missing)'
elif typedecl is _info.UNKNOWN:
# XXX Is this right?
return 'typespec (unknown)'
elif not isinstance(typedecl, TypeDeclaration):
raise NotImplementedError((decl, typedecl))
if isinstance(decl, Member):
return _check_vartype(decl, typedecl, types, knowntypes)
elif not isinstance(decl, Declaration):
raise NotImplementedError(decl)
elif decl.kind is KIND.TYPEDEF:
return _check_vartype(decl, typedecl, types, knowntypes)
elif decl.kind is KIND.VARIABLE:
if not is_process_global(decl):
return None
checked = _check_vartype(decl, typedecl, types, knowntypes)
return 'mutable' if checked is FIXED_TYPE else checked
else:
raise NotImplementedError(decl)
def _check_vartype(decl, typedecl, types, knowntypes):
"""Return failure reason."""
checked = _check_typespec(decl, typedecl, types, knowntypes)
if checked:
return checked
if is_immutable(decl.vartype):
return None
if is_fixed_type(decl.vartype):
return FIXED_TYPE
return 'mutable'
def _check_typespec(decl, typedecl, types, knowntypes):
typespec = decl.vartype.typespec
if typedecl is not None:
found = types.get(typedecl)
if found is None:
found = knowntypes.get(typedecl)
if found is not None:
_, extra = found
if extra is None:
# XXX Under what circumstances does this happen?
extra = {}
unsupported = extra.get('unsupported')
if unsupported is FIXED_TYPE:
unsupported = None
return 'typespec' if unsupported else None
# Fall back to default known types.
if is_pots(typespec):
return None
elif _info.is_system_type(typespec):
return None
elif is_funcptr(decl.vartype):
return None
return 'typespec'
class Analyzed(_info.Analyzed):
@classonly
def is_target(cls, raw):
if not super().is_target(raw):
return False
if raw.kind not in KINDS:
return False
return True
#@classonly
#def _parse_raw_result(cls, result, extra):
# typedecl, extra = super()._parse_raw_result(result, extra)
# if typedecl is None:
# return None, extra
# raise NotImplementedError
def __init__(self, item, typedecl=None, *, unsupported=None, **extra):
if 'unsupported' in extra:
raise NotImplementedError((item, typedecl, unsupported, extra))
if not unsupported:
unsupported = None
elif isinstance(unsupported, (str, TypeDeclaration)):
unsupported = (unsupported,)
elif unsupported is not FIXED_TYPE:
unsupported = tuple(unsupported)
self.unsupported = unsupported
extra['unsupported'] = self.unsupported # ...for __repr__(), etc.
if self.unsupported is None:
#self.supported = None
self.supported = True
elif self.unsupported is FIXED_TYPE:
if item.kind is KIND.VARIABLE:
raise NotImplementedError(item, typedecl, unsupported)
self.supported = True
else:
self.supported = not self.unsupported
super().__init__(item, typedecl, **extra)
def render(self, fmt='line', *, itemonly=False):
if fmt == 'raw':
yield repr(self)
return
rendered = super().render(fmt, itemonly=itemonly)
# XXX ???
#if itemonly:
# yield from rendered
supported = self._supported
if fmt in ('line', 'brief'):
rendered, = rendered
parts = [
'+' if supported else '-' if supported is False else '',
rendered,
]
yield '\t'.join(parts)
elif fmt == 'summary':
raise NotImplementedError(fmt)
elif fmt == 'full':
yield from rendered
if supported:
yield f'\tsupported:\t{supported}'
else:
raise NotImplementedError(fmt)
class Analysis(_info.Analysis):
_item_class = Analyzed
@classonly
def build_item(cls, info, result=None):
if not isinstance(info, Declaration) or info.kind not in KINDS:
raise NotImplementedError((info, result))
return super().build_item(info, result)
def check_globals(analysis):
# yield (data, failure)
ignored = read_ignored()
for item in analysis:
if item.kind != KIND.VARIABLE:
continue
if item.supported:
continue
if item.id in ignored:
continue
reason = item.unsupported
if not reason:
reason = '???'
elif not isinstance(reason, str):
if len(reason) == 1:
reason, = reason
reason = f'({reason})'
yield item, f'not supported {reason:20}\t{item.storage or ""} {item.vartype}'

View File

@ -1,326 +0,0 @@
# The code here consists of hacks for pre-populating the known.tsv file.
from c_analyzer.parser.preprocessor import _iter_clean_lines
from c_analyzer.parser.naive import (
iter_variables, parse_variable_declaration, find_variables,
)
from c_analyzer.common.known import HEADER as KNOWN_HEADER
from c_analyzer.common.info import UNKNOWN, ID
from c_analyzer.variables import Variable
from c_analyzer.util import write_tsv
from . import SOURCE_DIRS, REPO_ROOT
from .known import DATA_FILE as KNOWN_FILE
from .files import iter_cpython_files
POTS = ('char ', 'wchar_t ', 'int ', 'Py_ssize_t ')
POTS += tuple('const ' + v for v in POTS)
STRUCTS = ('PyTypeObject', 'PyObject', 'PyMethodDef', 'PyModuleDef', 'grammar')
def _parse_global(line, funcname=None):
line = line.strip()
if line.startswith('static '):
if '(' in line and '[' not in line and ' = ' not in line:
return None, None
name, decl = parse_variable_declaration(line)
elif line.startswith(('Py_LOCAL(', 'Py_LOCAL_INLINE(')):
name, decl = parse_variable_declaration(line)
elif line.startswith('_Py_static_string('):
decl = line.strip(';').strip()
name = line.split('(')[1].split(',')[0].strip()
elif line.startswith('_Py_IDENTIFIER('):
decl = line.strip(';').strip()
name = 'PyId_' + line.split('(')[1].split(')')[0].strip()
elif funcname:
return None, None
# global-only
elif line.startswith('PyAPI_DATA('): # only in .h files
name, decl = parse_variable_declaration(line)
elif line.startswith('extern '): # only in .h files
name, decl = parse_variable_declaration(line)
elif line.startswith('PyDoc_VAR('):
decl = line.strip(';').strip()
name = line.split('(')[1].split(')')[0].strip()
elif line.startswith(POTS): # implied static
if '(' in line and '[' not in line and ' = ' not in line:
return None, None
name, decl = parse_variable_declaration(line)
elif line.startswith(STRUCTS) and line.endswith(' = {'): # implied static
name, decl = parse_variable_declaration(line)
elif line.startswith(STRUCTS) and line.endswith(' = NULL;'): # implied static
name, decl = parse_variable_declaration(line)
elif line.startswith('struct '):
if not line.endswith(' = {'):
return None, None
if not line.partition(' ')[2].startswith(STRUCTS):
return None, None
# implied static
name, decl = parse_variable_declaration(line)
# file-specific
elif line.startswith(('SLOT1BINFULL(', 'SLOT1BIN(')):
# Objects/typeobject.c
funcname = line.split('(')[1].split(',')[0]
return [
('op_id', funcname, '_Py_static_string(op_id, OPSTR)'),
('rop_id', funcname, '_Py_static_string(op_id, OPSTR)'),
]
elif line.startswith('WRAP_METHOD('):
# Objects/weakrefobject.c
funcname, name = (v.strip() for v in line.split('(')[1].split(')')[0].split(','))
return [
('PyId_' + name, funcname, f'_Py_IDENTIFIER({name})'),
]
else:
return None, None
return name, decl
def _pop_cached(varcache, filename, funcname, name, *,
_iter_variables=iter_variables,
):
# Look for the file.
try:
cached = varcache[filename]
except KeyError:
cached = varcache[filename] = {}
for variable in _iter_variables(filename,
parse_variable=_parse_global,
):
variable._isglobal = True
cached[variable.id] = variable
for var in cached:
print(' ', var)
# Look for the variable.
if funcname == UNKNOWN:
for varid in cached:
if varid.name == name:
break
else:
return None
return cached.pop(varid)
else:
return cached.pop((filename, funcname, name), None)
def find_matching_variable(varid, varcache, allfilenames, *,
_pop_cached=_pop_cached,
):
if varid.filename and varid.filename != UNKNOWN:
filenames = [varid.filename]
else:
filenames = allfilenames
for filename in filenames:
variable = _pop_cached(varcache, filename, varid.funcname, varid.name)
if variable is not None:
return variable
else:
if varid.filename and varid.filename != UNKNOWN and varid.funcname is None:
for filename in allfilenames:
if not filename.endswith('.h'):
continue
variable = _pop_cached(varcache, filename, None, varid.name)
if variable is not None:
return variable
return None
MULTILINE = {
# Python/Python-ast.c
'Load_singleton': 'PyObject *',
'Store_singleton': 'PyObject *',
'Del_singleton': 'PyObject *',
'AugLoad_singleton': 'PyObject *',
'AugStore_singleton': 'PyObject *',
'Param_singleton': 'PyObject *',
'And_singleton': 'PyObject *',
'Or_singleton': 'PyObject *',
'Add_singleton': 'static PyObject *',
'Sub_singleton': 'static PyObject *',
'Mult_singleton': 'static PyObject *',
'MatMult_singleton': 'static PyObject *',
'Div_singleton': 'static PyObject *',
'Mod_singleton': 'static PyObject *',
'Pow_singleton': 'static PyObject *',
'LShift_singleton': 'static PyObject *',
'RShift_singleton': 'static PyObject *',
'BitOr_singleton': 'static PyObject *',
'BitXor_singleton': 'static PyObject *',
'BitAnd_singleton': 'static PyObject *',
'FloorDiv_singleton': 'static PyObject *',
'Invert_singleton': 'static PyObject *',
'Not_singleton': 'static PyObject *',
'UAdd_singleton': 'static PyObject *',
'USub_singleton': 'static PyObject *',
'Eq_singleton': 'static PyObject *',
'NotEq_singleton': 'static PyObject *',
'Lt_singleton': 'static PyObject *',
'LtE_singleton': 'static PyObject *',
'Gt_singleton': 'static PyObject *',
'GtE_singleton': 'static PyObject *',
'Is_singleton': 'static PyObject *',
'IsNot_singleton': 'static PyObject *',
'In_singleton': 'static PyObject *',
'NotIn_singleton': 'static PyObject *',
# Python/symtable.c
'top': 'static identifier ',
'lambda': 'static identifier ',
'genexpr': 'static identifier ',
'listcomp': 'static identifier ',
'setcomp': 'static identifier ',
'dictcomp': 'static identifier ',
'__class__': 'static identifier ',
# Python/compile.c
'__doc__': 'static PyObject *',
'__annotations__': 'static PyObject *',
# Objects/floatobject.c
'double_format': 'static float_format_type ',
'float_format': 'static float_format_type ',
'detected_double_format': 'static float_format_type ',
'detected_float_format': 'static float_format_type ',
# Python/dtoa.c
'private_mem': 'static double private_mem[PRIVATE_mem]',
'pmem_next': 'static double *',
# Modules/_weakref.c
'weakref_functions': 'static PyMethodDef ',
}
INLINE = {
# Modules/_tracemalloc.c
'allocators': 'static struct { PyMemAllocatorEx mem; PyMemAllocatorEx raw; PyMemAllocatorEx obj; } ',
# Modules/faulthandler.c
'fatal_error': 'static struct { int enabled; PyObject *file; int fd; int all_threads; PyInterpreterState *interp; void *exc_handler; } ',
'thread': 'static struct { PyObject *file; int fd; PY_TIMEOUT_T timeout_us; int repeat; PyInterpreterState *interp; int exit; char *header; size_t header_len; PyThread_type_lock cancel_event; PyThread_type_lock running; } ',
# Modules/signalmodule.c
'Handlers': 'static volatile struct { _Py_atomic_int tripped; PyObject *func; } Handlers[NSIG]',
'wakeup': 'static volatile struct { SOCKET_T fd; int warn_on_full_buffer; int use_send; } ',
# Python/dynload_shlib.c
'handles': 'static struct { dev_t dev; ino_t ino; void *handle; } handles[128]',
# Objects/obmalloc.c
'_PyMem_Debug': 'static struct { debug_alloc_api_t raw; debug_alloc_api_t mem; debug_alloc_api_t obj; } ',
# Python/bootstrap_hash.c
'urandom_cache': 'static struct { int fd; dev_t st_dev; ino_t st_ino; } ',
}
FUNC = {
# Objects/object.c
'_Py_abstract_hack': 'Py_ssize_t (*_Py_abstract_hack)(PyObject *)',
# Parser/myreadline.c
'PyOS_InputHook': 'int (*PyOS_InputHook)(void)',
# Python/pylifecycle.c
'_PyOS_mystrnicmp_hack': 'int (*_PyOS_mystrnicmp_hack)(const char *, const char *, Py_ssize_t)',
# Parser/myreadline.c
'PyOS_ReadlineFunctionPointer': 'char *(*PyOS_ReadlineFunctionPointer)(FILE *, FILE *, const char *)',
}
IMPLIED = {
# Objects/boolobject.c
'_Py_FalseStruct': 'static struct _longobject ',
'_Py_TrueStruct': 'static struct _longobject ',
# Modules/config.c
'_PyImport_Inittab': 'struct _inittab _PyImport_Inittab[]',
}
GLOBALS = {}
GLOBALS.update(MULTILINE)
GLOBALS.update(INLINE)
GLOBALS.update(FUNC)
GLOBALS.update(IMPLIED)
LOCALS = {
'buildinfo': ('Modules/getbuildinfo.c',
'Py_GetBuildInfo',
'static char buildinfo[50 + sizeof(GITVERSION) + ((sizeof(GITTAG) > sizeof(GITBRANCH)) ? sizeof(GITTAG) : sizeof(GITBRANCH))]'),
'methods': ('Python/codecs.c',
'_PyCodecRegistry_Init',
'static struct { char *name; PyMethodDef def; } methods[]'),
}
def _known(symbol):
if symbol.funcname:
if symbol.funcname != UNKNOWN or symbol.filename != UNKNOWN:
raise KeyError(symbol.name)
filename, funcname, decl = LOCALS[symbol.name]
varid = ID(filename, funcname, symbol.name)
elif not symbol.filename or symbol.filename == UNKNOWN:
raise KeyError(symbol.name)
else:
varid = symbol.id
try:
decl = GLOBALS[symbol.name]
except KeyError:
if symbol.name.endswith('_methods'):
decl = 'static PyMethodDef '
elif symbol.filename == 'Objects/exceptions.c' and symbol.name.startswith(('PyExc_', '_PyExc_')):
decl = 'static PyTypeObject '
else:
raise
if symbol.name not in decl:
decl = decl + symbol.name
return Variable(varid, 'static', decl)
def known_row(varid, decl):
return (
varid.filename,
varid.funcname or '-',
varid.name,
'variable',
decl,
)
def known_rows(symbols, *,
cached=True,
_get_filenames=iter_cpython_files,
_find_match=find_matching_variable,
_find_symbols=find_variables,
_as_known=known_row,
):
filenames = list(_get_filenames())
cache = {}
if cached:
for symbol in symbols:
try:
found = _known(symbol)
except KeyError:
found = _find_match(symbol, cache, filenames)
if found is None:
found = Variable(symbol.id, UNKNOWN, UNKNOWN)
yield _as_known(found.id, found.vartype)
else:
raise NotImplementedError # XXX incorporate KNOWN
for variable in _find_symbols(symbols, filenames,
srccache=cache,
parse_variable=_parse_global,
):
#variable = variable._replace(
# filename=os.path.relpath(variable.filename, REPO_ROOT))
if variable.funcname == UNKNOWN:
print(variable)
if variable.vartype== UNKNOWN:
print(variable)
yield _as_known(variable.id, variable.vartype)
def generate(symbols, filename=None, *,
_generate_rows=known_rows,
_write_tsv=write_tsv,
):
if not filename:
filename = KNOWN_FILE + '.new'
rows = _generate_rows(symbols)
_write_tsv(filename, KNOWN_HEADER, rows)
if __name__ == '__main__':
from c_symbols import binary
symbols = binary.iter_symbols(
binary.PYTHON,
find_local_symbol=None,
)
generate(symbols)

View File

@ -0,0 +1,308 @@
import os.path
import re
from c_common.fsutil import expand_filenames, iter_files_by_suffix
from c_parser.preprocessor import (
get_preprocessor as _get_preprocessor,
)
from c_parser import (
parse_file as _parse_file,
parse_files as _parse_files,
)
from . import REPO_ROOT, INCLUDE_DIRS, SOURCE_DIRS
GLOB_ALL = '**/*'
def clean_lines(text):
"""Clear out comments, blank lines, and leading/trailing whitespace."""
lines = (line.strip() for line in text.splitlines())
lines = (line.partition('#')[0].rstrip()
for line in lines
if line and not line.startswith('#'))
glob_all = f'{GLOB_ALL} '
lines = (re.sub(r'^[*] ', glob_all, line) for line in lines)
lines = (os.path.join(REPO_ROOT, line) for line in lines)
return list(lines)
'''
@begin=sh@
./python ../c-parser/cpython.py
--exclude '+../c-parser/EXCLUDED'
--macros '+../c-parser/MACROS'
--incldirs '+../c-parser/INCL_DIRS'
--same './Include/cpython/'
Include/*.h
Include/internal/*.h
Modules/**/*.c
Objects/**/*.c
Parser/**/*.c
Python/**/*.c
@end=sh@
'''
GLOBS = [
'Include/*.h',
'Include/internal/*.h',
'Modules/**/*.c',
'Objects/**/*.c',
'Parser/**/*.c',
'Python/**/*.c',
]
EXCLUDED = clean_lines('''
# @begin=conf@
# Rather than fixing for this one, we manually make sure it's okay.
Modules/_sha3/kcp/KeccakP-1600-opt64.c
# OSX
#Modules/_ctypes/darwin/*.c
#Modules/_ctypes/libffi_osx/*.c
Modules/_scproxy.c # SystemConfiguration/SystemConfiguration.h
# Windows
Modules/_winapi.c # windows.h
Modules/overlapped.c # winsock.h
Python/dynload_win.c # windows.h
# other OS-dependent
Python/dynload_dl.c # dl.h
Python/dynload_hpux.c # dl.h
Python/dynload_aix.c # sys/ldr.h
# @end=conf@
''')
# XXX Fix the parser.
EXCLUDED += clean_lines('''
# The tool should be able to parse these...
Modules/_dbmmodule.c
Modules/cjkcodecs/_codecs_*.c
Modules/expat/xmlrole.c
Modules/expat/xmlparse.c
Python/initconfig.c
''')
INCL_DIRS = clean_lines('''
# @begin=tsv@
glob dirname
* .
* ./Include
* ./Include/internal
Modules/_tkinter.c /usr/include/tcl8.6
Modules/tkappinit.c /usr/include/tcl
Modules/_decimal/**/*.c Modules/_decimal/libmpdec
# @end=tsv@
''')[1:]
MACROS = clean_lines('''
# @begin=tsv@
glob name value
Include/internal/*.h Py_BUILD_CORE 1
Python/**/*.c Py_BUILD_CORE 1
Parser/**/*.c Py_BUILD_CORE 1
Objects/**/*.c Py_BUILD_CORE 1
Modules/faulthandler.c Py_BUILD_CORE 1
Modules/_functoolsmodule.c Py_BUILD_CORE 1
Modules/gcmodule.c Py_BUILD_CORE 1
Modules/getpath.c Py_BUILD_CORE 1
Modules/_io/*.c Py_BUILD_CORE 1
Modules/itertoolsmodule.c Py_BUILD_CORE 1
Modules/_localemodule.c Py_BUILD_CORE 1
Modules/main.c Py_BUILD_CORE 1
Modules/posixmodule.c Py_BUILD_CORE 1
Modules/signalmodule.c Py_BUILD_CORE 1
Modules/_threadmodule.c Py_BUILD_CORE 1
Modules/_tracemalloc.c Py_BUILD_CORE 1
Modules/_asynciomodule.c Py_BUILD_CORE 1
Modules/mathmodule.c Py_BUILD_CORE 1
Modules/cmathmodule.c Py_BUILD_CORE 1
Modules/_weakref.c Py_BUILD_CORE 1
Modules/sha256module.c Py_BUILD_CORE 1
Modules/sha512module.c Py_BUILD_CORE 1
Modules/_datetimemodule.c Py_BUILD_CORE 1
Modules/_ctypes/cfield.c Py_BUILD_CORE 1
Modules/_heapqmodule.c Py_BUILD_CORE 1
Modules/_posixsubprocess.c Py_BUILD_CORE 1
Modules/_json.c Py_BUILD_CORE_BUILTIN 1
Modules/_pickle.c Py_BUILD_CORE_BUILTIN 1
Modules/_testinternalcapi.c Py_BUILD_CORE_BUILTIN 1
Include/cpython/abstract.h Py_CPYTHON_ABSTRACTOBJECT_H 1
Include/cpython/bytearrayobject.h Py_CPYTHON_BYTEARRAYOBJECT_H 1
Include/cpython/bytesobject.h Py_CPYTHON_BYTESOBJECT_H 1
Include/cpython/ceval.h Py_CPYTHON_CEVAL_H 1
Include/cpython/code.h Py_CPYTHON_CODE_H 1
Include/cpython/dictobject.h Py_CPYTHON_DICTOBJECT_H 1
Include/cpython/fileobject.h Py_CPYTHON_FILEOBJECT_H 1
Include/cpython/fileutils.h Py_CPYTHON_FILEUTILS_H 1
Include/cpython/frameobject.h Py_CPYTHON_FRAMEOBJECT_H 1
Include/cpython/import.h Py_CPYTHON_IMPORT_H 1
Include/cpython/interpreteridobject.h Py_CPYTHON_INTERPRETERIDOBJECT_H 1
Include/cpython/listobject.h Py_CPYTHON_LISTOBJECT_H 1
Include/cpython/methodobject.h Py_CPYTHON_METHODOBJECT_H 1
Include/cpython/object.h Py_CPYTHON_OBJECT_H 1
Include/cpython/objimpl.h Py_CPYTHON_OBJIMPL_H 1
Include/cpython/pyerrors.h Py_CPYTHON_ERRORS_H 1
Include/cpython/pylifecycle.h Py_CPYTHON_PYLIFECYCLE_H 1
Include/cpython/pymem.h Py_CPYTHON_PYMEM_H 1
Include/cpython/pystate.h Py_CPYTHON_PYSTATE_H 1
Include/cpython/sysmodule.h Py_CPYTHON_SYSMODULE_H 1
Include/cpython/traceback.h Py_CPYTHON_TRACEBACK_H 1
Include/cpython/tupleobject.h Py_CPYTHON_TUPLEOBJECT_H 1
Include/cpython/unicodeobject.h Py_CPYTHON_UNICODEOBJECT_H 1
# implied include of pyport.h
Include/**/*.h PyAPI_DATA(RTYPE) extern RTYPE
Include/**/*.h PyAPI_FUNC(RTYPE) RTYPE
Include/**/*.h Py_DEPRECATED(VER) /* */
Include/**/*.h _Py_NO_RETURN /* */
Include/**/*.h PYLONG_BITS_IN_DIGIT 30
Modules/**/*.c PyMODINIT_FUNC PyObject*
Objects/unicodeobject.c PyMODINIT_FUNC PyObject*
Python/marshal.c PyMODINIT_FUNC PyObject*
Python/_warnings.c PyMODINIT_FUNC PyObject*
Python/Python-ast.c PyMODINIT_FUNC PyObject*
Python/import.c PyMODINIT_FUNC PyObject*
Modules/_testcapimodule.c PyAPI_FUNC(RTYPE) RTYPE
Python/getargs.c PyAPI_FUNC(RTYPE) RTYPE
# implied include of exports.h
#Modules/_io/bytesio.c Py_EXPORTED_SYMBOL /* */
# implied include of object.h
Include/**/*.h PyObject_HEAD PyObject ob_base;
Include/**/*.h PyObject_VAR_HEAD PyVarObject ob_base;
# implied include of pyconfig.h
Include/**/*.h SIZEOF_WCHAR_T 4
# implied include of <unistd.h>
Include/**/*.h _POSIX_THREADS 1
# from Makefile
Modules/getpath.c PYTHONPATH 1
Modules/getpath.c PREFIX ...
Modules/getpath.c EXEC_PREFIX ...
Modules/getpath.c VERSION ...
Modules/getpath.c VPATH ...
# from Modules/_sha3/sha3module.c
Modules/_sha3/kcp/KeccakP-1600-inplace32BI.c PLATFORM_BYTE_ORDER 4321 # force big-endian
Modules/_sha3/kcp/*.c KeccakOpt 64
Modules/_sha3/kcp/*.c KeccakP200_excluded 1
Modules/_sha3/kcp/*.c KeccakP400_excluded 1
Modules/_sha3/kcp/*.c KeccakP800_excluded 1
# See: setup.py
Modules/_decimal/**/*.c CONFIG_64 1
Modules/_decimal/**/*.c ASM 1
Modules/expat/xmlparse.c HAVE_EXPAT_CONFIG_H 1
Modules/expat/xmlparse.c XML_POOR_ENTROPY 1
Modules/_dbmmodule.c HAVE_GDBM_DASH_NDBM_H 1
# @end=tsv@
''')[1:]
# -pthread
# -Wno-unused-result
# -Wsign-compare
# -g
# -Og
# -Wall
# -std=c99
# -Wextra
# -Wno-unused-result -Wno-unused-parameter
# -Wno-missing-field-initializers
# -Werror=implicit-function-declaration
SAME = [
'./Include/cpython/',
]
def resolve_filename(filename):
orig = filename
filename = os.path.normcase(os.path.normpath(filename))
if os.path.isabs(filename):
if os.path.relpath(filename, REPO_ROOT).startswith('.'):
raise Exception(f'{orig!r} is outside the repo ({REPO_ROOT})')
return filename
else:
return os.path.join(REPO_ROOT, filename)
def iter_filenames(*, search=False):
if search:
yield from iter_files_by_suffix(INCLUDE_DIRS, ('.h',))
yield from iter_files_by_suffix(SOURCE_DIRS, ('.c',))
else:
globs = (os.path.join(REPO_ROOT, file) for file in GLOBS)
yield from expand_filenames(globs)
def get_preprocessor(*,
file_macros=None,
file_incldirs=None,
file_same=None,
**kwargs
):
macros = tuple(MACROS)
if file_macros:
macros += tuple(file_macros)
incldirs = tuple(INCL_DIRS)
if file_incldirs:
incldirs += tuple(file_incldirs)
return _get_preprocessor(
file_macros=macros,
file_incldirs=incldirs,
file_same=file_same,
**kwargs
)
def parse_file(filename, *,
match_kind=None,
ignore_exc=None,
log_err=None,
):
get_file_preprocessor = get_preprocessor(
ignore_exc=ignore_exc,
log_err=log_err,
)
yield from _parse_file(
filename,
match_kind=match_kind,
get_file_preprocessor=get_file_preprocessor,
)
def parse_files(filenames=None, *,
match_kind=None,
ignore_exc=None,
log_err=None,
get_file_preprocessor=None,
**file_kwargs
):
if get_file_preprocessor is None:
get_file_preprocessor = get_preprocessor(
ignore_exc=ignore_exc,
log_err=log_err,
)
yield from _parse_files(
filenames,
match_kind=match_kind,
get_file_preprocessor=get_file_preprocessor,
**file_kwargs
)

View File

@ -1,29 +0,0 @@
from c_analyzer.common.files import (
C_SOURCE_SUFFIXES, walk_tree, iter_files_by_suffix,
)
from . import SOURCE_DIRS, REPO_ROOT
# XXX need tests:
# * iter_files()
def iter_files(*,
walk=walk_tree,
_files=iter_files_by_suffix,
):
"""Yield each file in the tree for each of the given directory names."""
excludedtrees = [
os.path.join('Include', 'cpython', ''),
]
def is_excluded(filename):
for root in excludedtrees:
if filename.startswith(root):
return True
return False
for filename in _files(SOURCE_DIRS, C_SOURCE_SUFFIXES, REPO_ROOT,
walk=walk,
):
if is_excluded(filename):
continue
yield filename

View File

@ -1,101 +0,0 @@
import os.path
from c_analyzer.common import files
from c_analyzer.common.info import UNKNOWN, ID
from c_analyzer.variables import find as _common
from . import SOURCE_DIRS, PYTHON, REPO_ROOT
from .known import (
from_file as known_from_file,
DATA_FILE as KNOWN_FILE,
)
from .supported import (
ignored_from_file, IGNORED_FILE, is_supported, _is_object,
)
# XXX need tests:
# * vars_from_binary()
# * vars_from_source()
# * supported_vars()
def _handle_id(filename, funcname, name, *,
_relpath=os.path.relpath,
):
filename = _relpath(filename, REPO_ROOT)
return ID(filename, funcname, name)
def vars_from_binary(*,
known=KNOWN_FILE,
_known_from_file=known_from_file,
_iter_files=files.iter_files_by_suffix,
_iter_vars=_common.vars_from_binary,
):
"""Yield a Variable for each found Symbol.
Details are filled in from the given "known" variables and types.
"""
if isinstance(known, str):
known = _known_from_file(known)
dirnames = SOURCE_DIRS
suffixes = ('.c',)
filenames = _iter_files(dirnames, suffixes)
# XXX For now we only use known variables (no source lookup).
filenames = None
yield from _iter_vars(PYTHON,
known=known,
filenames=filenames,
handle_id=_handle_id,
check_filename=(lambda n: True),
)
def vars_from_source(*,
preprocessed=None,
known=KNOWN_FILE,
_known_from_file=known_from_file,
_iter_files=files.iter_files_by_suffix,
_iter_vars=_common.vars_from_source,
):
"""Yield a Variable for each declaration in the raw source code.
Details are filled in from the given "known" variables and types.
"""
if isinstance(known, str):
known = _known_from_file(known)
dirnames = SOURCE_DIRS
suffixes = ('.c',)
filenames = _iter_files(dirnames, suffixes)
yield from _iter_vars(filenames,
preprocessed=preprocessed,
known=known,
handle_id=_handle_id,
)
def supported_vars(*,
known=KNOWN_FILE,
ignored=IGNORED_FILE,
skip_objects=False,
_known_from_file=known_from_file,
_ignored_from_file=ignored_from_file,
_iter_vars=vars_from_binary,
_is_supported=is_supported,
):
"""Yield (var, is supported) for each found variable."""
if isinstance(known, str):
known = _known_from_file(known)
if isinstance(ignored, str):
ignored = _ignored_from_file(ignored)
for var in _iter_vars(known=known):
if not var.isglobal:
continue
elif var.vartype == UNKNOWN:
yield var, None
# XXX Support proper filters instead.
elif skip_objects and _is_object(found.vartype):
continue
else:
yield var, _is_supported(var, ignored, known)

View File

@ -0,0 +1,2 @@
filename funcname name reason
#??? - somevar ???
1 filename funcname name reason
2 #??? - somevar ???

View File

@ -1,66 +0,0 @@
import csv
import os.path
from c_analyzer.parser.declarations import extract_storage
from c_analyzer.variables import known as _common
from c_analyzer.variables.info import Variable
from . import DATA_DIR
# XXX need tests:
# * from_file()
# * look_up_variable()
DATA_FILE = os.path.join(DATA_DIR, 'known.tsv')
def _get_storage(decl, infunc):
# statics
if decl.startswith(('Py_LOCAL(', 'Py_LOCAL_INLINE(')):
return 'static'
if decl.startswith(('_Py_IDENTIFIER(', '_Py_static_string(')):
return 'static'
if decl.startswith('PyDoc_VAR('):
return 'static'
if decl.startswith(('SLOT1BINFULL(', 'SLOT1BIN(')):
return 'static'
if decl.startswith('WRAP_METHOD('):
return 'static'
# public extern
if decl.startswith('PyAPI_DATA('):
return 'extern'
# Fall back to the normal handler.
return extract_storage(decl, infunc=infunc)
def _handle_var(varid, decl):
# if varid.name == 'id' and decl == UNKNOWN:
# # None of these are variables.
# decl = 'int id';
storage = _get_storage(decl, varid.funcname)
return Variable(varid, storage, decl)
def from_file(infile=DATA_FILE, *,
_from_file=_common.from_file,
_handle_var=_handle_var,
):
"""Return the info for known declarations in the given file."""
return _from_file(infile, handle_var=_handle_var)
def look_up_variable(varid, knownvars, *,
_lookup=_common.look_up_variable,
):
"""Return the known variable matching the given ID.
"knownvars" is a mapping of ID to Variable.
"match_files" is used to verify if two filenames point to
the same file.
If no match is found then None is returned.
"""
return _lookup(varid, knownvars)

View File

@ -0,0 +1,3 @@
filename funcname name kind declaration
#filename funcname name kind is_supported declaration
#??? - PyWideStringList typedef ???
Can't render this file because it has a wrong number of fields in line 2.

View File

@ -1,398 +0,0 @@
import os.path
import re
from c_analyzer.common.info import ID
from c_analyzer.common.util import read_tsv, write_tsv
from . import DATA_DIR
# XXX need tests:
# * generate / script
IGNORED_FILE = os.path.join(DATA_DIR, 'ignored.tsv')
IGNORED_COLUMNS = ('filename', 'funcname', 'name', 'kind', 'reason')
IGNORED_HEADER = '\t'.join(IGNORED_COLUMNS)
# XXX Move these to ignored.tsv.
IGNORED = {
# global
'PyImport_FrozenModules': 'process-global',
'M___hello__': 'process-global',
'inittab_copy': 'process-global',
'PyHash_Func': 'process-global',
'_Py_HashSecret_Initialized': 'process-global',
'_TARGET_LOCALES': 'process-global',
# startup (only changed before/during)
'_PyRuntime': 'runtime startup',
'runtime_initialized': 'runtime startup',
'static_arg_parsers': 'runtime startup',
'orig_argv': 'runtime startup',
'opt_ptr': 'runtime startup',
'_preinit_warnoptions': 'runtime startup',
'_Py_StandardStreamEncoding': 'runtime startup',
'Py_FileSystemDefaultEncoding': 'runtime startup',
'_Py_StandardStreamErrors': 'runtime startup',
'Py_FileSystemDefaultEncodeErrors': 'runtime startup',
'Py_BytesWarningFlag': 'runtime startup',
'Py_DebugFlag': 'runtime startup',
'Py_DontWriteBytecodeFlag': 'runtime startup',
'Py_FrozenFlag': 'runtime startup',
'Py_HashRandomizationFlag': 'runtime startup',
'Py_IgnoreEnvironmentFlag': 'runtime startup',
'Py_InspectFlag': 'runtime startup',
'Py_InteractiveFlag': 'runtime startup',
'Py_IsolatedFlag': 'runtime startup',
'Py_NoSiteFlag': 'runtime startup',
'Py_NoUserSiteDirectory': 'runtime startup',
'Py_OptimizeFlag': 'runtime startup',
'Py_QuietFlag': 'runtime startup',
'Py_UTF8Mode': 'runtime startup',
'Py_UnbufferedStdioFlag': 'runtime startup',
'Py_VerboseFlag': 'runtime startup',
'_Py_path_config': 'runtime startup',
'_PyOS_optarg': 'runtime startup',
'_PyOS_opterr': 'runtime startup',
'_PyOS_optind': 'runtime startup',
'_Py_HashSecret': 'runtime startup',
# REPL
'_PyOS_ReadlineLock': 'repl',
'_PyOS_ReadlineTState': 'repl',
# effectively const
'tracemalloc_empty_traceback': 'const',
'_empty_bitmap_node': 'const',
'posix_constants_pathconf': 'const',
'posix_constants_confstr': 'const',
'posix_constants_sysconf': 'const',
'_PySys_ImplCacheTag': 'const',
'_PySys_ImplName': 'const',
'PyImport_Inittab': 'const',
'_PyImport_DynLoadFiletab': 'const',
'_PyParser_Grammar': 'const',
'Py_hexdigits': 'const',
'_PyImport_Inittab': 'const',
'_PyByteArray_empty_string': 'const',
'_PyLong_DigitValue': 'const',
'_Py_SwappedOp': 'const',
'PyStructSequence_UnnamedField': 'const',
# signals are main-thread only
'faulthandler_handlers': 'signals are main-thread only',
'user_signals': 'signals are main-thread only',
'wakeup': 'signals are main-thread only',
# hacks
'_PySet_Dummy': 'only used as a placeholder',
}
BENIGN = 'races here are benign and unlikely'
def is_supported(variable, ignored=None, known=None, *,
_ignored=(lambda *a, **k: _is_ignored(*a, **k)),
_vartype_okay=(lambda *a, **k: _is_vartype_okay(*a, **k)),
):
"""Return True if the given global variable is okay in CPython."""
if _ignored(variable,
ignored and ignored.get('variables')):
return True
elif _vartype_okay(variable.vartype,
ignored.get('types')):
return True
else:
return False
def _is_ignored(variable, ignoredvars=None, *,
_IGNORED=IGNORED,
):
"""Return the reason if the variable is a supported global.
Return None if the variable is not a supported global.
"""
if ignoredvars and (reason := ignoredvars.get(variable.id)):
return reason
if variable.funcname is None:
if reason := _IGNORED.get(variable.name):
return reason
# compiler
if variable.filename == 'Python/graminit.c':
if variable.vartype.startswith('static state '):
return 'compiler'
if variable.filename == 'Python/symtable.c':
if variable.vartype.startswith('static identifier '):
return 'compiler'
if variable.filename == 'Python/Python-ast.c':
# These should be const.
if variable.name.endswith('_field'):
return 'compiler'
if variable.name.endswith('_attribute'):
return 'compiler'
# other
if variable.filename == 'Python/dtoa.c':
# guarded by lock?
if variable.name in ('p5s', 'freelist'):
return 'dtoa is thread-safe?'
if variable.name in ('private_mem', 'pmem_next'):
return 'dtoa is thread-safe?'
if variable.filename == 'Python/thread.c':
# Threads do not become an issue until after these have been set
# and these never get changed after that.
if variable.name in ('initialized', 'thread_debug'):
return 'thread-safe'
if variable.filename == 'Python/getversion.c':
if variable.name == 'version':
# Races are benign here, as well as unlikely.
return BENIGN
if variable.filename == 'Python/fileutils.c':
if variable.name == 'force_ascii':
return BENIGN
if variable.name == 'ioctl_works':
return BENIGN
if variable.name == '_Py_open_cloexec_works':
return BENIGN
if variable.filename == 'Python/codecs.c':
if variable.name == 'ucnhash_CAPI':
return BENIGN
if variable.filename == 'Python/bootstrap_hash.c':
if variable.name == 'getrandom_works':
return BENIGN
if variable.filename == 'Objects/unicodeobject.c':
if variable.name == 'ucnhash_CAPI':
return BENIGN
if variable.name == 'bloom_linebreak':
# *mostly* benign
return BENIGN
if variable.filename == 'Modules/getbuildinfo.c':
if variable.name == 'buildinfo':
# The static is used for pre-allocation.
return BENIGN
if variable.filename == 'Modules/posixmodule.c':
if variable.name == 'ticks_per_second':
return BENIGN
if variable.name == 'dup3_works':
return BENIGN
if variable.filename == 'Modules/timemodule.c':
if variable.name == 'ticks_per_second':
return BENIGN
if variable.filename == 'Objects/longobject.c':
if variable.name == 'log_base_BASE':
return BENIGN
if variable.name == 'convwidth_base':
return BENIGN
if variable.name == 'convmultmax_base':
return BENIGN
return None
def _is_vartype_okay(vartype, ignoredtypes=None):
if _is_object(vartype):
return None
if vartype.startswith('static const '):
return 'const'
if vartype.startswith('const '):
return 'const'
# components for TypeObject definitions
for name in ('PyMethodDef', 'PyGetSetDef', 'PyMemberDef'):
if name in vartype:
return 'const'
for name in ('PyNumberMethods', 'PySequenceMethods', 'PyMappingMethods',
'PyBufferProcs', 'PyAsyncMethods'):
if name in vartype:
return 'const'
for name in ('slotdef', 'newfunc'):
if name in vartype:
return 'const'
# structseq
for name in ('PyStructSequence_Desc', 'PyStructSequence_Field'):
if name in vartype:
return 'const'
# other definiitions
if 'PyModuleDef' in vartype:
return 'const'
# thread-safe
if '_Py_atomic_int' in vartype:
return 'thread-safe'
if 'pthread_condattr_t' in vartype:
return 'thread-safe'
# startup
if '_Py_PreInitEntry' in vartype:
return 'startup'
# global
# if 'PyMemAllocatorEx' in vartype:
# return True
# others
# if 'PyThread_type_lock' in vartype:
# return True
# XXX ???
# _Py_tss_t
# _Py_hashtable_t
# stack_t
# _PyUnicode_Name_CAPI
# functions
if '(' in vartype and '[' not in vartype:
return 'function pointer'
# XXX finish!
# * allow const values?
#raise NotImplementedError
return None
PYOBJECT_RE = re.compile(r'''
^
(
# must start with "static "
static \s+
(
identifier
)
\b
) |
(
# may start with "static "
( static \s+ )?
(
.*
(
PyObject |
PyTypeObject |
_? Py \w+ Object |
_PyArg_Parser |
_Py_Identifier |
traceback_t |
PyAsyncGenASend |
_PyAsyncGenWrappedValue |
PyContext |
method_cache_entry
)
\b
) |
(
(
_Py_IDENTIFIER |
_Py_static_string
)
[(]
)
)
''', re.VERBOSE)
def _is_object(vartype):
if 'PyDictKeysObject' in vartype:
return False
if PYOBJECT_RE.match(vartype):
return True
if vartype.endswith((' _Py_FalseStruct', ' _Py_TrueStruct')):
return True
# XXX Add more?
#for part in vartype.split():
# # XXX const is automatic True?
# if part == 'PyObject' or part.startswith('PyObject['):
# return True
return False
def ignored_from_file(infile, *,
_read_tsv=read_tsv,
):
"""Yield a Variable for each ignored var in the file."""
ignored = {
'variables': {},
#'types': {},
#'constants': {},
#'macros': {},
}
for row in _read_tsv(infile, IGNORED_HEADER):
filename, funcname, name, kind, reason = row
if not funcname or funcname == '-':
funcname = None
id = ID(filename, funcname, name)
if kind == 'variable':
values = ignored['variables']
else:
raise ValueError(f'unsupported kind in row {row}')
values[id] = reason
return ignored
##################################
# generate
def _get_row(varid, reason):
return (
varid.filename,
varid.funcname or '-',
varid.name,
'variable',
str(reason),
)
def _get_rows(variables, ignored=None, *,
_as_row=_get_row,
_is_ignored=_is_ignored,
_vartype_okay=_is_vartype_okay,
):
count = 0
for variable in variables:
reason = _is_ignored(variable,
ignored and ignored.get('variables'),
)
if not reason:
reason = _vartype_okay(variable.vartype,
ignored and ignored.get('types'))
if not reason:
continue
print(' ', variable, repr(reason))
yield _as_row(variable.id, reason)
count += 1
print(f'total: {count}')
def _generate_ignored_file(variables, filename=None, *,
_generate_rows=_get_rows,
_write_tsv=write_tsv,
):
if not filename:
filename = IGNORED_FILE + '.new'
rows = _generate_rows(variables)
_write_tsv(filename, IGNORED_HEADER, rows)
if __name__ == '__main__':
from cpython import SOURCE_DIRS
from cpython.known import (
from_file as known_from_file,
DATA_FILE as KNOWN_FILE,
)
# XXX This is wrong!
from . import find
known = known_from_file(KNOWN_FILE)
knownvars = (known or {}).get('variables')
variables = find.globals_from_binary(knownvars=knownvars,
dirnames=SOURCE_DIRS)
_generate_ignored_file(variables)

View File

@ -1,492 +0,0 @@
# All variables declared here are shared between all interpreters
# in a single process. That means that they must not be changed
# unless that change should apply to all interpreters.
#
# See check-c-globals.py.
#
# Many generic names are handled via the script:
#
# * most exceptions and all warnings handled via _is_exception()
# * for builtin modules, generic names are handled via _is_module()
# * generic names for static types handled via _is_type_var()
# * AST vars handled via _is_compiler()
#######################################
# main
# Modules/getpath.c
exec_prefix
module_search_path
prefix
progpath
# Modules/main.c
orig_argc
orig_argv
# Python/getopt.c
opt_ptr
_PyOS_optarg
_PyOS_opterr
_PyOS_optind
#######################################
# REPL
# Parser/myreadline.c
PyOS_InputHook
PyOS_ReadlineFunctionPointer
_PyOS_ReadlineLock
_PyOS_ReadlineTState
#######################################
# state
# Python/dtoa.c
p5s
pmem_next # very slight race
private_mem # very slight race
# Python/import.c
# For the moment the import lock stays global. Ultimately there should
# be a global lock for extension modules and a per-interpreter lock.
import_lock
import_lock_level
import_lock_thread
# Python/pylifecycle.c
_PyRuntime
#---------------------------------
# module globals (PyObject)
# Modules/_functoolsmodule.c
kwd_mark
# Modules/_localemodule.c
Error
# Modules/_threadmodule.c
ThreadError
# Modules/_tracemalloc.c
unknown_filename
# Modules/gcmodule.c
gc_str
# Modules/posixmodule.c
billion
posix_putenv_garbage
# Modules/signalmodule.c
DefaultHandler
IgnoreHandler
IntHandler
ItimerError
# Modules/zipimport.c
ZipImportError
zip_directory_cache
#---------------------------------
# module globals (other)
# Modules/_tracemalloc.c
allocators
tables_lock
tracemalloc_config
tracemalloc_empty_traceback
tracemalloc_filenames
tracemalloc_peak_traced_memory
tracemalloc_reentrant_key
tracemalloc_traceback
tracemalloc_tracebacks
tracemalloc_traced_memory
tracemalloc_traces
# Modules/faulthandler.c
fatal_error
faulthandler_handlers
old_stack
stack
thread
user_signals
# Modules/posixmodule.c
posix_constants_confstr
posix_constants_pathconf
posix_constants_sysconf
structseq_new
ticks_per_second
# Modules/signalmodule.c
Handlers # main thread only
is_tripped # main thread only
main_pid
main_thread
old_siginthandler
wakeup_fd # main thread only
# Modules/zipimport.c
zip_searchorder
# Python/bltinmodule.c
Py_FileSystemDefaultEncodeErrors
Py_FileSystemDefaultEncoding
Py_HasFileSystemDefaultEncoding
# Python/sysmodule.c
_PySys_ImplCacheTag
_PySys_ImplName
#---------------------------------
# freelists
# Modules/_collectionsmodule.c
freeblocks
numfreeblocks
# Objects/classobject.c
free_list
numfree
# Objects/dictobject.c
free_list
keys_free_list
numfree
numfreekeys
# Objects/exceptions.c
memerrors_freelist
memerrors_numfree
# Objects/floatobject.c
free_list
numfree
# Objects/frameobject.c
free_list
numfree
# Objects/genobject.c
ag_asend_freelist
ag_asend_freelist_free
ag_value_freelist
ag_value_freelist_free
# Objects/listobject.c
free_list
numfree
# Objects/methodobject.c
free_list
numfree
# Objects/sliceobject.c
slice_cache # slight race
# Objects/tupleobject.c
free_list
numfree
# Python/dtoa.c
freelist # very slight race
#---------------------------------
# caches (PyObject)
# Objects/typeobject.c
method_cache # only for static types
next_version_tag # only for static types
# Python/dynload_shlib.c
handles # slight race during import
nhandles # slight race during import
# Python/import.c
extensions # slight race on init during import
#---------------------------------
# caches (other)
# Python/bootstrap_hash.c
urandom_cache
# Python/modsupport.c
_Py_PackageContext # Slight race during import! Move to PyThreadState?
#---------------------------------
# counters
# Objects/bytesobject.c
null_strings
one_strings
# Objects/dictobject.c
pydict_global_version
# Objects/moduleobject.c
max_module_number # slight race during import
#######################################
# constants
#---------------------------------
# singletons
# Objects/boolobject.c
_Py_FalseStruct
_Py_TrueStruct
# Objects/object.c
_Py_NoneStruct
_Py_NotImplementedStruct
# Objects/sliceobject.c
_Py_EllipsisObject
#---------------------------------
# constants (other)
# Modules/config.c
_PyImport_Inittab
# Objects/bytearrayobject.c
_PyByteArray_empty_string
# Objects/dictobject.c
empty_keys_struct
empty_values
# Objects/floatobject.c
detected_double_format
detected_float_format
double_format
float_format
# Objects/longobject.c
_PyLong_DigitValue
# Objects/object.c
_Py_SwappedOp
# Objects/obmalloc.c
_PyMem_Debug
# Objects/setobject.c
_dummy_struct
# Objects/structseq.c
PyStructSequence_UnnamedField
# Objects/typeobject.c
name_op
slotdefs # almost
slotdefs_initialized # almost
subtype_getsets_dict_only
subtype_getsets_full
subtype_getsets_weakref_only
tp_new_methoddef
# Objects/unicodeobject.c
bloom_linebreak
static_strings # slight race
# Parser/tokenizer.c
_PyParser_TokenNames
# Python/Python-ast.c
alias_fields
# Python/codecs.c
Py_hexdigits
ucnhash_CAPI # slight performance-only race
# Python/dynload_shlib.c
_PyImport_DynLoadFiletab
# Python/fileutils.c
_Py_open_cloexec_works
force_ascii
# Python/frozen.c
M___hello__
PyImport_FrozenModules
# Python/graminit.c
_PyParser_Grammar
dfas
labels
# Python/import.c
PyImport_Inittab
# Python/pylifecycle.c
_TARGET_LOCALES
#---------------------------------
# initialized (PyObject)
# Objects/bytesobject.c
characters
nullstring
# Objects/exceptions.c
PyExc_RecursionErrorInst
errnomap
# Objects/longobject.c
_PyLong_One
_PyLong_Zero
small_ints
# Objects/setobject.c
emptyfrozenset
# Objects/unicodeobject.c
interned # slight race on init in PyUnicode_InternInPlace()
unicode_empty
unicode_latin1
#---------------------------------
# initialized (other)
# Python/getargs.c
static_arg_parsers
# Python/pyhash.c
PyHash_Func
_Py_HashSecret
_Py_HashSecret_Initialized
# Python/pylifecycle.c
_Py_StandardStreamEncoding
_Py_StandardStreamErrors
default_home
env_home
progname
Py_BytesWarningFlag
Py_DebugFlag
Py_DontWriteBytecodeFlag
Py_FrozenFlag
Py_HashRandomizationFlag
Py_IgnoreEnvironmentFlag
Py_InspectFlag
Py_InteractiveFlag
Py_IsolatedFlag
Py_NoSiteFlag
Py_NoUserSiteDirectory
Py_OptimizeFlag
Py_QuietFlag
Py_UnbufferedStdioFlag
Py_VerboseFlag
#---------------------------------
# types
# Modules/_threadmodule.c
Locktype
RLocktype
localdummytype
localtype
# Objects/exceptions.c
PyExc_BaseException
PyExc_Exception
PyExc_GeneratorExit
PyExc_KeyboardInterrupt
PyExc_StopAsyncIteration
PyExc_StopIteration
PyExc_SystemExit
_PyExc_BaseException
_PyExc_Exception
_PyExc_GeneratorExit
_PyExc_KeyboardInterrupt
_PyExc_StopAsyncIteration
_PyExc_StopIteration
_PyExc_SystemExit
# Objects/structseq.c
_struct_sequence_template
#---------------------------------
# interned strings/bytes
# Modules/_io/_iomodule.c
_PyIO_empty_bytes
_PyIO_empty_str
_PyIO_str_close
_PyIO_str_closed
_PyIO_str_decode
_PyIO_str_encode
_PyIO_str_fileno
_PyIO_str_flush
_PyIO_str_getstate
_PyIO_str_isatty
_PyIO_str_newlines
_PyIO_str_nl
_PyIO_str_read
_PyIO_str_read1
_PyIO_str_readable
_PyIO_str_readall
_PyIO_str_readinto
_PyIO_str_readline
_PyIO_str_reset
_PyIO_str_seek
_PyIO_str_seekable
_PyIO_str_setstate
_PyIO_str_tell
_PyIO_str_truncate
_PyIO_str_writable
_PyIO_str_write
# Modules/_threadmodule.c
str_dict
# Objects/boolobject.c
false_str
true_str
# Objects/listobject.c
indexerr
# Python/symtable.c
__class__
dictcomp
genexpr
lambda
listcomp
setcomp
top
# Python/sysmodule.c
whatstrings
#######################################
# hacks
# Objects/object.c
_Py_abstract_hack
# Objects/setobject.c
_PySet_Dummy
# Python/pylifecycle.c
_PyOS_mystrnicmp_hack

View File

@ -1 +0,0 @@
filename funcname name kind reason
1 filename funcname name kind reason

File diff suppressed because it is too large Load Diff