bpo-36876: [c-analyzer tool] Tighten up the results and output. (GH-23431)

We also update the "ignored" file with a temporary list of all known globals.
This commit is contained in:
Eric Snow 2020-11-20 15:39:28 -07:00 committed by GitHub
parent a993e901eb
commit 9f02b479e6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 3203 additions and 110 deletions

View File

@ -5,6 +5,7 @@ import os.path
import re
import sys
from c_common import fsutil
from c_common.logging import VERBOSITY, Printer
from c_common.scriptutil import (
add_verbosity_cli,
@ -298,9 +299,9 @@ def cmd_check(filenames, *,
checks=None,
ignored=None,
fmt=None,
relroot=None,
failfast=False,
iter_filenames=None,
relroot=fsutil.USE_CWD,
track_progress=None,
verbosity=VERBOSITY,
_analyze=_analyze,
@ -317,14 +318,14 @@ def cmd_check(filenames, *,
(handle_failure, handle_after, div
) = _get_check_handlers(fmt, printer, verbosity)
filenames = filter_filenames(filenames, iter_filenames)
filenames, relroot = fsutil.fix_filenames(filenames, relroot=relroot)
filenames = filter_filenames(filenames, iter_filenames, relroot)
if track_progress:
filenames = track_progress(filenames)
logger.info('analyzing files...')
analyzed = _analyze(filenames, **kwargs)
if relroot:
analyzed.fix_filenames(relroot)
analyzed.fix_filenames(relroot, normalize=False)
decls = filter_forward(analyzed, markpublic=True)
logger.info('checking analysis results...')
@ -374,6 +375,7 @@ def _cli_analyze(parser, **kwargs):
def cmd_analyze(filenames, *,
fmt=None,
iter_filenames=None,
relroot=fsutil.USE_CWD,
track_progress=None,
verbosity=None,
_analyze=_analyze,
@ -387,12 +389,14 @@ def cmd_analyze(filenames, *,
except KeyError:
raise ValueError(f'unsupported fmt {fmt!r}')
filenames = filter_filenames(filenames, iter_filenames)
filenames, relroot = fsutil.fix_filenames(filenames, relroot=relroot)
filenames = filter_filenames(filenames, iter_filenames, relroot)
if track_progress:
filenames = track_progress(filenames)
logger.info('analyzing files...')
analyzed = _analyze(filenames, **kwargs)
analyzed.fix_filenames(relroot, normalize=False)
decls = filter_forward(analyzed, markpublic=True)
for line in do_fmt(decls):
@ -434,7 +438,7 @@ def cmd_data(datacmd, filenames, known=None, *,
_analyze=_analyze,
formats=FORMATS,
extracolumns=None,
relroot=None,
relroot=fsutil.USE_CWD,
track_progress=None,
**kwargs
):
@ -447,9 +451,11 @@ def cmd_data(datacmd, filenames, known=None, *,
for line in do_fmt(known):
print(line)
elif datacmd == 'dump':
filenames, relroot = fsutil.fix_filenames(filenames, relroot=relroot)
if track_progress:
filenames = track_progress(filenames)
analyzed = _analyze(filenames, **kwargs)
analyzed.fix_filenames(relroot, normalize=False)
if known is None or usestdout:
outfile = io.StringIO()
_datafiles.write_known(analyzed, outfile, extracolumns,

View File

@ -1,3 +1,6 @@
import os.path
from c_common import fsutil
import c_common.tables as _tables
import c_parser.info as _info
import c_parser.match as _match
@ -13,6 +16,30 @@ EXTRA_COLUMNS = [
]
def get_known(known, extracolumns=None, *,
analyze_resolved=None,
handle_unresolved=True,
relroot=fsutil.USE_CWD,
):
if isinstance(known, str):
known = read_known(known, extracolumns, relroot)
return analyze_known(
known,
handle_unresolved=handle_unresolved,
analyze_resolved=analyze_resolved,
)
def read_known(infile, extracolumns=None, relroot=fsutil.USE_CWD):
extracolumns = EXTRA_COLUMNS + (
list(extracolumns) if extracolumns else []
)
known = {}
for decl, extra in _parser.iter_decls_tsv(infile, extracolumns, relroot):
known[decl] = extra
return known
def analyze_known(known, *,
analyze_resolved=None,
handle_unresolved=True,
@ -34,32 +61,8 @@ def analyze_known(known, *,
return types, typespecs
def get_known(known, extracolumns=None, *,
analyze_resolved=None,
handle_unresolved=True,
relroot=None,
):
if isinstance(known, str):
known = read_known(known, extracolumns, relroot)
return analyze_known(
known,
handle_unresolved=handle_unresolved,
analyze_resolved=analyze_resolved,
)
def read_known(infile, extracolumns=None, relroot=None):
extracolumns = EXTRA_COLUMNS + (
list(extracolumns) if extracolumns else []
)
known = {}
for decl, extra in _parser.iter_decls_tsv(infile, extracolumns, relroot):
known[decl] = extra
return known
def write_known(rows, outfile, extracolumns=None, *,
relroot=None,
relroot=fsutil.USE_CWD,
backup=True,
):
extracolumns = EXTRA_COLUMNS + (
@ -86,22 +89,34 @@ IGNORED_COLUMNS = [
IGNORED_HEADER = '\t'.join(IGNORED_COLUMNS)
def read_ignored(infile):
return dict(_iter_ignored(infile))
def read_ignored(infile, relroot=fsutil.USE_CWD):
return dict(_iter_ignored(infile, relroot))
def _iter_ignored(infile):
def _iter_ignored(infile, relroot):
if relroot and relroot is not fsutil.USE_CWD:
relroot = os.path.abspath(relroot)
bogus = {_tables.EMPTY, _tables.UNKNOWN}
for row in _tables.read_table(infile, IGNORED_HEADER, sep='\t'):
*varidinfo, reason = row
if _tables.EMPTY in varidinfo or _tables.UNKNOWN in varidinfo:
varidinfo = tuple(None if v in bogus else v
for v in varidinfo)
if reason in bogus:
reason = None
varid = _info.DeclID.from_row(varidinfo)
varid = varid.fix_filename(relroot, formatted=False, fixroot=False)
yield varid, reason
def write_ignored(variables, outfile):
def write_ignored(variables, outfile, relroot=fsutil.USE_CWD):
raise NotImplementedError
if relroot and relroot is not fsutil.USE_CWD:
relroot = os.path.abspath(relroot)
reason = '???'
#if not isinstance(varid, DeclID):
# varid = getattr(varid, 'parsed', varid).id
decls = (d.fix_filename(relroot, fixroot=False) for d in decls)
_tables.write_table(
outfile,
IGNORED_HEADER,

View File

@ -1,5 +1,7 @@
from collections import namedtuple
import os.path
from c_common import fsutil
from c_common.clsutil import classonly
import c_common.misc as _misc
from c_parser.info import (
@ -223,8 +225,9 @@ class Analyzed:
else:
return UNKNOWN not in self.typedecl
def fix_filename(self, relroot):
self.item.fix_filename(relroot)
def fix_filename(self, relroot=fsutil.USE_CWD, **kwargs):
self.item.fix_filename(relroot, **kwargs)
return self
def as_rowdata(self, columns=None):
# XXX finsih!
@ -309,9 +312,11 @@ class Analysis:
else:
return self._analyzed[key]
def fix_filenames(self, relroot):
def fix_filenames(self, relroot=fsutil.USE_CWD, **kwargs):
if relroot and relroot is not fsutil.USE_CWD:
relroot = os.path.abspath(relroot)
for item in self._analyzed:
item.fix_filename(relroot)
item.fix_filename(relroot, fixroot=False, **kwargs)
def _add_result(self, info, resolved):
analyzed = type(self).build_item(info, resolved)

View File

@ -8,6 +8,9 @@ import stat
from .iterutil import iter_many
USE_CWD = object()
C_SOURCE_SUFFIXES = ('.c', '.h')
@ -29,6 +32,78 @@ def create_backup(old, backup=None):
return backup
##################################
# filenames
def fix_filename(filename, relroot=USE_CWD, *,
fixroot=True,
_badprefix=f'..{os.path.sep}',
):
"""Return a normalized, absolute-path copy of the given filename."""
if not relroot or relroot is USE_CWD:
return os.path.abspath(filename)
if fixroot:
relroot = os.path.abspath(relroot)
return _fix_filename(filename, relroot)
def _fix_filename(filename, relroot, *,
_badprefix=f'..{os.path.sep}',
):
orig = filename
# First we normalize.
filename = os.path.normpath(filename)
if filename.startswith(_badprefix):
raise ValueError(f'bad filename {orig!r} (resolves beyond relative root')
# Now make sure it is absolute (relative to relroot).
if not os.path.isabs(filename):
filename = os.path.join(relroot, filename)
else:
relpath = os.path.relpath(filename, relroot)
if os.path.join(relroot, relpath) != filename:
raise ValueError(f'expected {relroot!r} as lroot, got {orig!r}')
return filename
def fix_filenames(filenames, relroot=USE_CWD):
if not relroot or relroot is USE_CWD:
filenames = (os.path.abspath(v) for v in filenames)
else:
relroot = os.path.abspath(relroot)
filenames = (_fix_filename(v, relroot) for v in filenames)
return filenames, relroot
def format_filename(filename, relroot=USE_CWD, *,
fixroot=True,
normalize=True,
_badprefix=f'..{os.path.sep}',
):
"""Return a consistent relative-path representation of the filename."""
orig = filename
if normalize:
filename = os.path.normpath(filename)
if relroot is None:
# Otherwise leave it as-is.
return filename
elif relroot is USE_CWD:
# Make it relative to CWD.
filename = os.path.relpath(filename)
else:
# Make it relative to "relroot".
if fixroot:
relroot = os.path.abspath(relroot)
elif not relroot:
raise ValueError('missing relroot')
filename = os.path.relpath(filename, relroot)
if filename.startswith(_badprefix):
raise ValueError(f'bad filename {orig!r} (resolves beyond relative root')
return filename
##################################
# find files
@ -54,34 +129,29 @@ def match_glob(filename, pattern):
return fnmatch.fnmatch(filename, pattern.replace('**/', '', 1))
def iter_filenames(filenames, *,
def process_filenames(filenames, *,
start=None,
include=None,
exclude=None,
relroot=USE_CWD,
):
if relroot and relroot is not USE_CWD:
relroot = os.path.abspath(relroot)
if start:
start = fix_filename(start, relroot, fixroot=False)
if include:
include = set(fix_filename(v, relroot, fixroot=False)
for v in include)
if exclude:
exclude = set(fix_filename(v, relroot, fixroot=False)
for v in exclude)
onempty = Exception('no filenames provided')
for filename, solo in iter_many(filenames, onempty):
filename = fix_filename(filename, relroot, fixroot=False)
relfile = format_filename(filename, relroot, fixroot=False, normalize=False)
check, start = _get_check(filename, start, include, exclude)
yield filename, check, solo
# filenames = iter(filenames or ())
# try:
# first = next(filenames)
# except StopIteration:
# raise Exception('no filenames provided')
# try:
# second = next(filenames)
# except StopIteration:
# check, _ = _get_check(first, start, include, exclude)
# yield first, check, False
# return
#
# check, start = _get_check(first, start, include, exclude)
# yield first, check, True
# check, start = _get_check(second, start, include, exclude)
# yield second, check, True
# for filename in filenames:
# check, start = _get_check(filename, start, include, exclude)
# yield filename, check, True
yield filename, relfile, check, solo
def expand_filenames(filenames):

View File

@ -307,7 +307,9 @@ def add_file_filtering_cli(parser, *, excluded=None):
exclude=tuple(_parse_files(_exclude)),
# We use the default for "show_header"
)
ns[key] = (lambda files: fsutil.iter_filenames(files, **kwargs))
def process_filenames(filenames, relroot=None):
return fsutil.process_filenames(filenames, relroot=relroot, **kwargs)
ns[key] = process_filenames
return process_args
@ -529,42 +531,46 @@ def set_command(name, add_cli):
##################################
# main() helpers
def filter_filenames(filenames, iter_filenames=None):
for filename, check, _ in _iter_filenames(filenames, iter_filenames):
def filter_filenames(filenames, process_filenames=None, relroot=fsutil.USE_CWD):
# We expect each filename to be a normalized, absolute path.
for filename, _, check, _ in _iter_filenames(filenames, process_filenames, relroot):
if (reason := check()):
logger.debug(f'{filename}: {reason}')
continue
yield filename
def main_for_filenames(filenames, iter_filenames=None):
for filename, check, show in _iter_filenames(filenames, iter_filenames):
def main_for_filenames(filenames, process_filenames=None, relroot=fsutil.USE_CWD):
filenames, relroot = fsutil.fix_filenames(filenames, relroot=relroot)
for filename, relfile, check, show in _iter_filenames(filenames, process_filenames, relroot):
if show:
print()
print(relfile)
print('-------------------------------------------')
print(filename)
if (reason := check()):
print(reason)
continue
yield filename
yield filename, relfile
def _iter_filenames(filenames, iter_files):
if iter_files is None:
iter_files = fsutil.iter_filenames
yield from iter_files(filenames)
def _iter_filenames(filenames, process, relroot):
if process is None:
yield from fsutil.process_filenames(filenames, relroot=relroot)
return
onempty = Exception('no filenames provided')
items = iter_files(filenames)
items = process(filenames, relroot=relroot)
items, peeked = iterutil.peek_and_iter(items)
if not items:
raise onempty
if isinstance(peeked, str):
if relroot and relroot is not fsutil.USE_CWD:
relroot = os.path.abspath(relroot)
check = (lambda: True)
for filename, ismany in iterutil.iter_many(items, onempty):
yield filename, check, ismany
elif len(peeked) == 3:
relfile = fsutil.format_filename(filename, relroot, fixroot=False)
yield filename, relfile, check, ismany
elif len(peeked) == 4:
yield from items
else:
raise NotImplementedError

View File

@ -26,13 +26,14 @@ def fix_row(row, **markers):
unknown = parse_markers(markers.pop('unknown', ('???',)))
row = (val if val else None for val in row)
if not empty:
if not unknown:
return row
return (UNKNOWN if val in unknown else val for val in row)
if unknown:
row = (UNKNOWN if val in unknown else val for val in row)
elif not unknown:
return (EMPTY if val in empty else val for val in row)
return (EMPTY if val in empty else (UNKNOWN if val in unknown else val)
row = (EMPTY if val in empty else val for val in row)
else:
row = (EMPTY if val in empty else (UNKNOWN if val in unknown else val)
for val in row)
return tuple(row)
def _fix_read_default(row):

View File

@ -2,6 +2,7 @@ import logging
import os.path
import sys
from c_common import fsutil
from c_common.scriptutil import (
CLIArgSpec as Arg,
add_verbosity_cli,
@ -64,8 +65,9 @@ def fmt_raw(filename, item, *, showfwd=None):
def fmt_summary(filename, item, *, showfwd=None):
if item.filename and item.filename != os.path.join('.', filename):
if item.filename != filename:
yield f'> {item.filename}'
if showfwd is None:
LINE = ' {lno:>5} {kind:10} {funcname:40} {fwd:1} {name:40} {data}'
else:
@ -172,6 +174,7 @@ def cmd_parse(filenames, *,
fmt='summary',
showfwd=None,
iter_filenames=None,
relroot=None,
**kwargs
):
if 'get_file_preprocessor' not in kwargs:
@ -180,9 +183,10 @@ def cmd_parse(filenames, *,
do_fmt = FORMATS[fmt]
except KeyError:
raise ValueError(f'unsupported fmt {fmt!r}')
for filename in main_for_filenames(filenames, iter_filenames):
for filename, relfile in main_for_filenames(filenames, iter_filenames, relroot):
for item in _iter_parsed(filename, **kwargs):
for line in do_fmt(filename, item, showfwd=showfwd):
item = item.fix_filename(relroot, fixroot=False, normalize=False)
for line in do_fmt(relfile, item, showfwd=showfwd):
print(line)

View File

@ -1,5 +1,6 @@
import os.path
from c_common import fsutil
import c_common.tables as _tables
import c_parser.info as _info
@ -81,21 +82,27 @@ def _get_format_handlers(group, fmt):
# tsv
def iter_decls_tsv(infile, extracolumns=None, relroot=None):
for info, extra in _iter_decls_tsv(infile, extracolumns, relroot):
def iter_decls_tsv(infile, extracolumns=None, relroot=fsutil.USE_CWD):
if relroot and relroot is not fsutil.USE_CWD:
relroot = os.path.abspath(relroot)
for info, extra in _iter_decls_tsv(infile, extracolumns):
decl = _info.Declaration.from_row(info)
decl = decl.fix_filename(relroot, formatted=False, fixroot=False)
yield decl, extra
def write_decls_tsv(decls, outfile, extracolumns=None, *,
relroot=None,
relroot=fsutil.USE_CWD,
**kwargs
):
if relroot and relroot is not fsutil.USE_CWD:
relroot = os.path.abspath(relroot)
decls = (d.fix_filename(relroot, fixroot=False) for d in decls)
# XXX Move the row rendering here.
_write_decls_tsv(decls, outfile, extracolumns, relroot, kwargs)
_write_decls_tsv(decls, outfile, extracolumns, kwargs)
def _iter_decls_tsv(infile, extracolumns=None, relroot=None):
def _iter_decls_tsv(infile, extracolumns=None):
columns = _get_columns('decls', extracolumns)
for row in _tables.read_table(infile, columns, sep='\t'):
if extracolumns:
@ -104,15 +111,13 @@ def _iter_decls_tsv(infile, extracolumns=None, relroot=None):
else:
declinfo = row
extra = None
if relroot:
# XXX Use something like tables.fix_row() here.
declinfo = [None if v == '-' else v
for v in declinfo]
declinfo[0] = os.path.join(relroot, declinfo[0])
yield declinfo, extra
def _write_decls_tsv(decls, outfile, extracolumns, relroot,kwargs):
def _write_decls_tsv(decls, outfile, extracolumns, kwargs):
columns = _get_columns('decls', extracolumns)
if extracolumns:
def render_decl(decl):
@ -121,7 +126,7 @@ def _write_decls_tsv(decls, outfile, extracolumns, relroot,kwargs):
else:
extra = ()
extra += ('???',) * (len(extraColumns) - len(extra))
*row, declaration = _render_known_row(decl, relroot)
*row, declaration = _render_known_row(decl)
row += extra + (declaration,)
return row
else:
@ -129,13 +134,13 @@ def _write_decls_tsv(decls, outfile, extracolumns, relroot,kwargs):
_tables.write_table(
outfile,
header='\t'.join(columns),
rows=(render_decl(d, relroot) for d in decls),
rows=(render_decl(d) for d in decls),
sep='\t',
**kwargs
)
def _render_known_decl(decl, relroot, *,
def _render_known_decl(decl, *,
# These match BASE_COLUMNS + END_COLUMNS[group].
_columns = 'filename parent name kind data'.split(),
):
@ -143,8 +148,6 @@ def _render_known_decl(decl, relroot, *,
# e.g. Analyzed
decl = decl.decl
rowdata = decl.render_rowdata(_columns)
if relroot:
rowdata['filename'] = os.path.relpath(rowdata['filename'], relroot)
return [rowdata[c] or '-' for c in _columns]
# XXX
#return _tables.fix_row(rowdata[c] for c in columns)

View File

@ -3,6 +3,7 @@ import enum
import os.path
import re
from c_common import fsutil
from c_common.clsutil import classonly
import c_common.misc as _misc
import c_common.strutil as _strutil
@ -148,6 +149,16 @@ def get_kind_group(item):
#############################
# low-level
def _fix_filename(filename, relroot, *,
formatted=True,
**kwargs):
if formatted:
fix = fsutil.format_filename
else:
fix = fsutil.fix_filename
return fix(filename, relroot=relroot, **kwargs)
class FileInfo(namedtuple('FileInfo', 'filename lno')):
@classmethod
def from_raw(cls, raw):
@ -165,8 +176,10 @@ class FileInfo(namedtuple('FileInfo', 'filename lno')):
def __str__(self):
return self.filename
def fix_filename(self, relroot):
filename = os.path.relpath(self.filename, relroot)
def fix_filename(self, relroot=fsutil.USE_CWD, **kwargs):
filename = _fix_filename(self.filename, relroot, **kwargs)
if filename == self.filename:
return self
return self._replace(filename=filename)
@ -194,6 +207,16 @@ class DeclID(namedtuple('DeclID', 'filename funcname name')):
row = _tables.fix_row(row, **markers)
return cls(*row)
# We have to provde _make() becaose we implemented __new__().
@classmethod
def _make(cls, iterable):
try:
return cls(*iterable)
except Exception:
super()._make(iterable)
raise # re-raise
def __new__(cls, filename, funcname, name):
self = super().__new__(
cls,
@ -221,6 +244,12 @@ class DeclID(namedtuple('DeclID', 'filename funcname name')):
return NotImplemented
return self._compare > other
def fix_filename(self, relroot=fsutil.USE_CWD, **kwargs):
filename = _fix_filename(self.filename, relroot, **kwargs)
if filename == self.filename:
return self
return self._replace(filename=filename)
class ParsedItem(namedtuple('ParsedItem', 'file kind parent name data')):
@ -290,6 +319,12 @@ class ParsedItem(namedtuple('ParsedItem', 'file kind parent name data')):
else:
return self.parent.name
def fix_filename(self, relroot=fsutil.USE_CWD, **kwargs):
fixed = self.file.fix_filename(relroot, **kwargs)
if fixed == self.file:
return self
return self._replace(file=fixed)
def as_row(self, columns=None):
if not columns:
columns = self._fields
@ -591,9 +626,10 @@ class HighlevelParsedItem:
)
return self._parsed
def fix_filename(self, relroot):
def fix_filename(self, relroot=fsutil.USE_CWD, **kwargs):
if self.file:
self.file = self.file.fix_filename(relroot)
self.file = self.file.fix_filename(relroot, **kwargs)
return self
def as_rowdata(self, columns=None):
columns, datacolumns, colnames = self._parse_columns(columns)

View File

@ -105,7 +105,11 @@ def cmd_parse(filenames=None, **kwargs):
filenames = _resolve_filenames(filenames)
if 'get_file_preprocessor' not in kwargs:
kwargs['get_file_preprocessor'] = _parser.get_preprocessor()
c_parser.cmd_parse(filenames, **kwargs)
c_parser.cmd_parse(
filenames,
relroot=REPO_ROOT,
**kwargs
)
def _cli_check(parser, **kwargs):
@ -131,6 +135,7 @@ def cmd_analyze(filenames=None, **kwargs):
kwargs['get_file_preprocessor'] = _parser.get_preprocessor(log_err=print)
c_analyzer.cmd_analyze(
filenames,
relroot=REPO_ROOT,
_analyze=_analyzer.analyze,
formats=formats,
**kwargs

View File

@ -84,13 +84,13 @@ def write_known():
def read_ignored():
if not _IGNORED:
_IGNORED.update(_datafiles.read_ignored(IGNORED_FILE))
_IGNORED.update(_datafiles.read_ignored(IGNORED_FILE, relroot=REPO_ROOT))
return dict(_IGNORED)
def write_ignored():
raise NotImplementedError
datafiles.write_ignored(variables, IGNORED_FILE)
_datafiles.write_ignored(variables, IGNORED_FILE, relroot=REPO_ROOT)
def analyze(filenames, *,

View File

@ -162,6 +162,12 @@ Modules/_datetimemodule.c Py_BUILD_CORE 1
Modules/_ctypes/cfield.c Py_BUILD_CORE 1
Modules/_heapqmodule.c Py_BUILD_CORE 1
Modules/_posixsubprocess.c Py_BUILD_CORE 1
Modules/_sre.c Py_BUILD_CORE 1
Modules/_collectionsmodule.c Py_BUILD_CORE 1
Modules/_zoneinfo.c Py_BUILD_CORE 1
Modules/unicodedata.c Py_BUILD_CORE 1
Modules/_cursesmodule.c Py_BUILD_CORE 1
Modules/_ctypes/_ctypes.c Py_BUILD_CORE 1
Objects/stringlib/codecs.h Py_BUILD_CORE 1
Python/ceval_gil.h Py_BUILD_CORE 1
Python/condvar.h Py_BUILD_CORE 1

File diff suppressed because it is too large Load Diff