bpo-36876: [c-analyzer tool] Tighten up the results and output. (GH-23431)

We also update the "ignored" file with a temporary list of all known globals.
This commit is contained in:
Eric Snow 2020-11-20 15:39:28 -07:00 committed by GitHub
parent a993e901eb
commit 9f02b479e6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 3203 additions and 110 deletions

View File

@ -5,6 +5,7 @@ import os.path
import re
import sys
from c_common import fsutil
from c_common.logging import VERBOSITY, Printer
from c_common.scriptutil import (
add_verbosity_cli,
@ -298,9 +299,9 @@ def cmd_check(filenames, *,
checks=None,
ignored=None,
fmt=None,
relroot=None,
failfast=False,
iter_filenames=None,
relroot=fsutil.USE_CWD,
track_progress=None,
verbosity=VERBOSITY,
_analyze=_analyze,
@ -317,14 +318,14 @@ def cmd_check(filenames, *,
(handle_failure, handle_after, div
) = _get_check_handlers(fmt, printer, verbosity)
filenames = filter_filenames(filenames, iter_filenames)
filenames, relroot = fsutil.fix_filenames(filenames, relroot=relroot)
filenames = filter_filenames(filenames, iter_filenames, relroot)
if track_progress:
filenames = track_progress(filenames)
logger.info('analyzing files...')
analyzed = _analyze(filenames, **kwargs)
if relroot:
analyzed.fix_filenames(relroot)
analyzed.fix_filenames(relroot, normalize=False)
decls = filter_forward(analyzed, markpublic=True)
logger.info('checking analysis results...')
@ -374,6 +375,7 @@ def _cli_analyze(parser, **kwargs):
def cmd_analyze(filenames, *,
fmt=None,
iter_filenames=None,
relroot=fsutil.USE_CWD,
track_progress=None,
verbosity=None,
_analyze=_analyze,
@ -387,12 +389,14 @@ def cmd_analyze(filenames, *,
except KeyError:
raise ValueError(f'unsupported fmt {fmt!r}')
filenames = filter_filenames(filenames, iter_filenames)
filenames, relroot = fsutil.fix_filenames(filenames, relroot=relroot)
filenames = filter_filenames(filenames, iter_filenames, relroot)
if track_progress:
filenames = track_progress(filenames)
logger.info('analyzing files...')
analyzed = _analyze(filenames, **kwargs)
analyzed.fix_filenames(relroot, normalize=False)
decls = filter_forward(analyzed, markpublic=True)
for line in do_fmt(decls):
@ -434,7 +438,7 @@ def cmd_data(datacmd, filenames, known=None, *,
_analyze=_analyze,
formats=FORMATS,
extracolumns=None,
relroot=None,
relroot=fsutil.USE_CWD,
track_progress=None,
**kwargs
):
@ -447,9 +451,11 @@ def cmd_data(datacmd, filenames, known=None, *,
for line in do_fmt(known):
print(line)
elif datacmd == 'dump':
filenames, relroot = fsutil.fix_filenames(filenames, relroot=relroot)
if track_progress:
filenames = track_progress(filenames)
analyzed = _analyze(filenames, **kwargs)
analyzed.fix_filenames(relroot, normalize=False)
if known is None or usestdout:
outfile = io.StringIO()
_datafiles.write_known(analyzed, outfile, extracolumns,

View File

@ -1,3 +1,6 @@
import os.path
from c_common import fsutil
import c_common.tables as _tables
import c_parser.info as _info
import c_parser.match as _match
@ -13,6 +16,30 @@ EXTRA_COLUMNS = [
]
def get_known(known, extracolumns=None, *,
analyze_resolved=None,
handle_unresolved=True,
relroot=fsutil.USE_CWD,
):
if isinstance(known, str):
known = read_known(known, extracolumns, relroot)
return analyze_known(
known,
handle_unresolved=handle_unresolved,
analyze_resolved=analyze_resolved,
)
def read_known(infile, extracolumns=None, relroot=fsutil.USE_CWD):
extracolumns = EXTRA_COLUMNS + (
list(extracolumns) if extracolumns else []
)
known = {}
for decl, extra in _parser.iter_decls_tsv(infile, extracolumns, relroot):
known[decl] = extra
return known
def analyze_known(known, *,
analyze_resolved=None,
handle_unresolved=True,
@ -34,32 +61,8 @@ def analyze_known(known, *,
return types, typespecs
def get_known(known, extracolumns=None, *,
analyze_resolved=None,
handle_unresolved=True,
relroot=None,
):
if isinstance(known, str):
known = read_known(known, extracolumns, relroot)
return analyze_known(
known,
handle_unresolved=handle_unresolved,
analyze_resolved=analyze_resolved,
)
def read_known(infile, extracolumns=None, relroot=None):
extracolumns = EXTRA_COLUMNS + (
list(extracolumns) if extracolumns else []
)
known = {}
for decl, extra in _parser.iter_decls_tsv(infile, extracolumns, relroot):
known[decl] = extra
return known
def write_known(rows, outfile, extracolumns=None, *,
relroot=None,
relroot=fsutil.USE_CWD,
backup=True,
):
extracolumns = EXTRA_COLUMNS + (
@ -86,22 +89,34 @@ IGNORED_COLUMNS = [
IGNORED_HEADER = '\t'.join(IGNORED_COLUMNS)
def read_ignored(infile):
return dict(_iter_ignored(infile))
def read_ignored(infile, relroot=fsutil.USE_CWD):
return dict(_iter_ignored(infile, relroot))
def _iter_ignored(infile):
def _iter_ignored(infile, relroot):
if relroot and relroot is not fsutil.USE_CWD:
relroot = os.path.abspath(relroot)
bogus = {_tables.EMPTY, _tables.UNKNOWN}
for row in _tables.read_table(infile, IGNORED_HEADER, sep='\t'):
*varidinfo, reason = row
if _tables.EMPTY in varidinfo or _tables.UNKNOWN in varidinfo:
varidinfo = tuple(None if v in bogus else v
for v in varidinfo)
if reason in bogus:
reason = None
varid = _info.DeclID.from_row(varidinfo)
varid = varid.fix_filename(relroot, formatted=False, fixroot=False)
yield varid, reason
def write_ignored(variables, outfile):
def write_ignored(variables, outfile, relroot=fsutil.USE_CWD):
raise NotImplementedError
if relroot and relroot is not fsutil.USE_CWD:
relroot = os.path.abspath(relroot)
reason = '???'
#if not isinstance(varid, DeclID):
# varid = getattr(varid, 'parsed', varid).id
decls = (d.fix_filename(relroot, fixroot=False) for d in decls)
_tables.write_table(
outfile,
IGNORED_HEADER,

View File

@ -1,5 +1,7 @@
from collections import namedtuple
import os.path
from c_common import fsutil
from c_common.clsutil import classonly
import c_common.misc as _misc
from c_parser.info import (
@ -223,8 +225,9 @@ class Analyzed:
else:
return UNKNOWN not in self.typedecl
def fix_filename(self, relroot):
self.item.fix_filename(relroot)
def fix_filename(self, relroot=fsutil.USE_CWD, **kwargs):
self.item.fix_filename(relroot, **kwargs)
return self
def as_rowdata(self, columns=None):
# XXX finsih!
@ -309,9 +312,11 @@ class Analysis:
else:
return self._analyzed[key]
def fix_filenames(self, relroot):
def fix_filenames(self, relroot=fsutil.USE_CWD, **kwargs):
if relroot and relroot is not fsutil.USE_CWD:
relroot = os.path.abspath(relroot)
for item in self._analyzed:
item.fix_filename(relroot)
item.fix_filename(relroot, fixroot=False, **kwargs)
def _add_result(self, info, resolved):
analyzed = type(self).build_item(info, resolved)

View File

@ -8,6 +8,9 @@ import stat
from .iterutil import iter_many
USE_CWD = object()
C_SOURCE_SUFFIXES = ('.c', '.h')
@ -29,6 +32,78 @@ def create_backup(old, backup=None):
return backup
##################################
# filenames
def fix_filename(filename, relroot=USE_CWD, *,
fixroot=True,
_badprefix=f'..{os.path.sep}',
):
"""Return a normalized, absolute-path copy of the given filename."""
if not relroot or relroot is USE_CWD:
return os.path.abspath(filename)
if fixroot:
relroot = os.path.abspath(relroot)
return _fix_filename(filename, relroot)
def _fix_filename(filename, relroot, *,
_badprefix=f'..{os.path.sep}',
):
orig = filename
# First we normalize.
filename = os.path.normpath(filename)
if filename.startswith(_badprefix):
raise ValueError(f'bad filename {orig!r} (resolves beyond relative root')
# Now make sure it is absolute (relative to relroot).
if not os.path.isabs(filename):
filename = os.path.join(relroot, filename)
else:
relpath = os.path.relpath(filename, relroot)
if os.path.join(relroot, relpath) != filename:
raise ValueError(f'expected {relroot!r} as lroot, got {orig!r}')
return filename
def fix_filenames(filenames, relroot=USE_CWD):
if not relroot or relroot is USE_CWD:
filenames = (os.path.abspath(v) for v in filenames)
else:
relroot = os.path.abspath(relroot)
filenames = (_fix_filename(v, relroot) for v in filenames)
return filenames, relroot
def format_filename(filename, relroot=USE_CWD, *,
fixroot=True,
normalize=True,
_badprefix=f'..{os.path.sep}',
):
"""Return a consistent relative-path representation of the filename."""
orig = filename
if normalize:
filename = os.path.normpath(filename)
if relroot is None:
# Otherwise leave it as-is.
return filename
elif relroot is USE_CWD:
# Make it relative to CWD.
filename = os.path.relpath(filename)
else:
# Make it relative to "relroot".
if fixroot:
relroot = os.path.abspath(relroot)
elif not relroot:
raise ValueError('missing relroot')
filename = os.path.relpath(filename, relroot)
if filename.startswith(_badprefix):
raise ValueError(f'bad filename {orig!r} (resolves beyond relative root')
return filename
##################################
# find files
@ -54,34 +129,29 @@ def match_glob(filename, pattern):
return fnmatch.fnmatch(filename, pattern.replace('**/', '', 1))
def iter_filenames(filenames, *,
start=None,
include=None,
exclude=None,
):
def process_filenames(filenames, *,
start=None,
include=None,
exclude=None,
relroot=USE_CWD,
):
if relroot and relroot is not USE_CWD:
relroot = os.path.abspath(relroot)
if start:
start = fix_filename(start, relroot, fixroot=False)
if include:
include = set(fix_filename(v, relroot, fixroot=False)
for v in include)
if exclude:
exclude = set(fix_filename(v, relroot, fixroot=False)
for v in exclude)
onempty = Exception('no filenames provided')
for filename, solo in iter_many(filenames, onempty):
filename = fix_filename(filename, relroot, fixroot=False)
relfile = format_filename(filename, relroot, fixroot=False, normalize=False)
check, start = _get_check(filename, start, include, exclude)
yield filename, check, solo
# filenames = iter(filenames or ())
# try:
# first = next(filenames)
# except StopIteration:
# raise Exception('no filenames provided')
# try:
# second = next(filenames)
# except StopIteration:
# check, _ = _get_check(first, start, include, exclude)
# yield first, check, False
# return
#
# check, start = _get_check(first, start, include, exclude)
# yield first, check, True
# check, start = _get_check(second, start, include, exclude)
# yield second, check, True
# for filename in filenames:
# check, start = _get_check(filename, start, include, exclude)
# yield filename, check, True
yield filename, relfile, check, solo
def expand_filenames(filenames):

View File

@ -307,7 +307,9 @@ def add_file_filtering_cli(parser, *, excluded=None):
exclude=tuple(_parse_files(_exclude)),
# We use the default for "show_header"
)
ns[key] = (lambda files: fsutil.iter_filenames(files, **kwargs))
def process_filenames(filenames, relroot=None):
return fsutil.process_filenames(filenames, relroot=relroot, **kwargs)
ns[key] = process_filenames
return process_args
@ -529,42 +531,46 @@ def set_command(name, add_cli):
##################################
# main() helpers
def filter_filenames(filenames, iter_filenames=None):
for filename, check, _ in _iter_filenames(filenames, iter_filenames):
def filter_filenames(filenames, process_filenames=None, relroot=fsutil.USE_CWD):
# We expect each filename to be a normalized, absolute path.
for filename, _, check, _ in _iter_filenames(filenames, process_filenames, relroot):
if (reason := check()):
logger.debug(f'{filename}: {reason}')
continue
yield filename
def main_for_filenames(filenames, iter_filenames=None):
for filename, check, show in _iter_filenames(filenames, iter_filenames):
def main_for_filenames(filenames, process_filenames=None, relroot=fsutil.USE_CWD):
filenames, relroot = fsutil.fix_filenames(filenames, relroot=relroot)
for filename, relfile, check, show in _iter_filenames(filenames, process_filenames, relroot):
if show:
print()
print(relfile)
print('-------------------------------------------')
print(filename)
if (reason := check()):
print(reason)
continue
yield filename
yield filename, relfile
def _iter_filenames(filenames, iter_files):
if iter_files is None:
iter_files = fsutil.iter_filenames
yield from iter_files(filenames)
def _iter_filenames(filenames, process, relroot):
if process is None:
yield from fsutil.process_filenames(filenames, relroot=relroot)
return
onempty = Exception('no filenames provided')
items = iter_files(filenames)
items = process(filenames, relroot=relroot)
items, peeked = iterutil.peek_and_iter(items)
if not items:
raise onempty
if isinstance(peeked, str):
if relroot and relroot is not fsutil.USE_CWD:
relroot = os.path.abspath(relroot)
check = (lambda: True)
for filename, ismany in iterutil.iter_many(items, onempty):
yield filename, check, ismany
elif len(peeked) == 3:
relfile = fsutil.format_filename(filename, relroot, fixroot=False)
yield filename, relfile, check, ismany
elif len(peeked) == 4:
yield from items
else:
raise NotImplementedError

View File

@ -26,13 +26,14 @@ def fix_row(row, **markers):
unknown = parse_markers(markers.pop('unknown', ('???',)))
row = (val if val else None for val in row)
if not empty:
if not unknown:
return row
return (UNKNOWN if val in unknown else val for val in row)
if unknown:
row = (UNKNOWN if val in unknown else val for val in row)
elif not unknown:
return (EMPTY if val in empty else val for val in row)
return (EMPTY if val in empty else (UNKNOWN if val in unknown else val)
for val in row)
row = (EMPTY if val in empty else val for val in row)
else:
row = (EMPTY if val in empty else (UNKNOWN if val in unknown else val)
for val in row)
return tuple(row)
def _fix_read_default(row):

View File

@ -2,6 +2,7 @@ import logging
import os.path
import sys
from c_common import fsutil
from c_common.scriptutil import (
CLIArgSpec as Arg,
add_verbosity_cli,
@ -64,8 +65,9 @@ def fmt_raw(filename, item, *, showfwd=None):
def fmt_summary(filename, item, *, showfwd=None):
if item.filename and item.filename != os.path.join('.', filename):
if item.filename != filename:
yield f'> {item.filename}'
if showfwd is None:
LINE = ' {lno:>5} {kind:10} {funcname:40} {fwd:1} {name:40} {data}'
else:
@ -172,6 +174,7 @@ def cmd_parse(filenames, *,
fmt='summary',
showfwd=None,
iter_filenames=None,
relroot=None,
**kwargs
):
if 'get_file_preprocessor' not in kwargs:
@ -180,9 +183,10 @@ def cmd_parse(filenames, *,
do_fmt = FORMATS[fmt]
except KeyError:
raise ValueError(f'unsupported fmt {fmt!r}')
for filename in main_for_filenames(filenames, iter_filenames):
for filename, relfile in main_for_filenames(filenames, iter_filenames, relroot):
for item in _iter_parsed(filename, **kwargs):
for line in do_fmt(filename, item, showfwd=showfwd):
item = item.fix_filename(relroot, fixroot=False, normalize=False)
for line in do_fmt(relfile, item, showfwd=showfwd):
print(line)

View File

@ -1,5 +1,6 @@
import os.path
from c_common import fsutil
import c_common.tables as _tables
import c_parser.info as _info
@ -81,21 +82,27 @@ def _get_format_handlers(group, fmt):
# tsv
def iter_decls_tsv(infile, extracolumns=None, relroot=None):
for info, extra in _iter_decls_tsv(infile, extracolumns, relroot):
def iter_decls_tsv(infile, extracolumns=None, relroot=fsutil.USE_CWD):
if relroot and relroot is not fsutil.USE_CWD:
relroot = os.path.abspath(relroot)
for info, extra in _iter_decls_tsv(infile, extracolumns):
decl = _info.Declaration.from_row(info)
decl = decl.fix_filename(relroot, formatted=False, fixroot=False)
yield decl, extra
def write_decls_tsv(decls, outfile, extracolumns=None, *,
relroot=None,
relroot=fsutil.USE_CWD,
**kwargs
):
if relroot and relroot is not fsutil.USE_CWD:
relroot = os.path.abspath(relroot)
decls = (d.fix_filename(relroot, fixroot=False) for d in decls)
# XXX Move the row rendering here.
_write_decls_tsv(decls, outfile, extracolumns, relroot, kwargs)
_write_decls_tsv(decls, outfile, extracolumns, kwargs)
def _iter_decls_tsv(infile, extracolumns=None, relroot=None):
def _iter_decls_tsv(infile, extracolumns=None):
columns = _get_columns('decls', extracolumns)
for row in _tables.read_table(infile, columns, sep='\t'):
if extracolumns:
@ -104,15 +111,13 @@ def _iter_decls_tsv(infile, extracolumns=None, relroot=None):
else:
declinfo = row
extra = None
if relroot:
# XXX Use something like tables.fix_row() here.
declinfo = [None if v == '-' else v
for v in declinfo]
declinfo[0] = os.path.join(relroot, declinfo[0])
# XXX Use something like tables.fix_row() here.
declinfo = [None if v == '-' else v
for v in declinfo]
yield declinfo, extra
def _write_decls_tsv(decls, outfile, extracolumns, relroot,kwargs):
def _write_decls_tsv(decls, outfile, extracolumns, kwargs):
columns = _get_columns('decls', extracolumns)
if extracolumns:
def render_decl(decl):
@ -121,7 +126,7 @@ def _write_decls_tsv(decls, outfile, extracolumns, relroot,kwargs):
else:
extra = ()
extra += ('???',) * (len(extraColumns) - len(extra))
*row, declaration = _render_known_row(decl, relroot)
*row, declaration = _render_known_row(decl)
row += extra + (declaration,)
return row
else:
@ -129,13 +134,13 @@ def _write_decls_tsv(decls, outfile, extracolumns, relroot,kwargs):
_tables.write_table(
outfile,
header='\t'.join(columns),
rows=(render_decl(d, relroot) for d in decls),
rows=(render_decl(d) for d in decls),
sep='\t',
**kwargs
)
def _render_known_decl(decl, relroot, *,
def _render_known_decl(decl, *,
# These match BASE_COLUMNS + END_COLUMNS[group].
_columns = 'filename parent name kind data'.split(),
):
@ -143,8 +148,6 @@ def _render_known_decl(decl, relroot, *,
# e.g. Analyzed
decl = decl.decl
rowdata = decl.render_rowdata(_columns)
if relroot:
rowdata['filename'] = os.path.relpath(rowdata['filename'], relroot)
return [rowdata[c] or '-' for c in _columns]
# XXX
#return _tables.fix_row(rowdata[c] for c in columns)

View File

@ -3,6 +3,7 @@ import enum
import os.path
import re
from c_common import fsutil
from c_common.clsutil import classonly
import c_common.misc as _misc
import c_common.strutil as _strutil
@ -148,6 +149,16 @@ def get_kind_group(item):
#############################
# low-level
def _fix_filename(filename, relroot, *,
formatted=True,
**kwargs):
if formatted:
fix = fsutil.format_filename
else:
fix = fsutil.fix_filename
return fix(filename, relroot=relroot, **kwargs)
class FileInfo(namedtuple('FileInfo', 'filename lno')):
@classmethod
def from_raw(cls, raw):
@ -165,8 +176,10 @@ class FileInfo(namedtuple('FileInfo', 'filename lno')):
def __str__(self):
return self.filename
def fix_filename(self, relroot):
filename = os.path.relpath(self.filename, relroot)
def fix_filename(self, relroot=fsutil.USE_CWD, **kwargs):
filename = _fix_filename(self.filename, relroot, **kwargs)
if filename == self.filename:
return self
return self._replace(filename=filename)
@ -194,6 +207,16 @@ class DeclID(namedtuple('DeclID', 'filename funcname name')):
row = _tables.fix_row(row, **markers)
return cls(*row)
# We have to provde _make() becaose we implemented __new__().
@classmethod
def _make(cls, iterable):
try:
return cls(*iterable)
except Exception:
super()._make(iterable)
raise # re-raise
def __new__(cls, filename, funcname, name):
self = super().__new__(
cls,
@ -221,6 +244,12 @@ class DeclID(namedtuple('DeclID', 'filename funcname name')):
return NotImplemented
return self._compare > other
def fix_filename(self, relroot=fsutil.USE_CWD, **kwargs):
filename = _fix_filename(self.filename, relroot, **kwargs)
if filename == self.filename:
return self
return self._replace(filename=filename)
class ParsedItem(namedtuple('ParsedItem', 'file kind parent name data')):
@ -290,6 +319,12 @@ class ParsedItem(namedtuple('ParsedItem', 'file kind parent name data')):
else:
return self.parent.name
def fix_filename(self, relroot=fsutil.USE_CWD, **kwargs):
fixed = self.file.fix_filename(relroot, **kwargs)
if fixed == self.file:
return self
return self._replace(file=fixed)
def as_row(self, columns=None):
if not columns:
columns = self._fields
@ -591,9 +626,10 @@ class HighlevelParsedItem:
)
return self._parsed
def fix_filename(self, relroot):
def fix_filename(self, relroot=fsutil.USE_CWD, **kwargs):
if self.file:
self.file = self.file.fix_filename(relroot)
self.file = self.file.fix_filename(relroot, **kwargs)
return self
def as_rowdata(self, columns=None):
columns, datacolumns, colnames = self._parse_columns(columns)

View File

@ -105,7 +105,11 @@ def cmd_parse(filenames=None, **kwargs):
filenames = _resolve_filenames(filenames)
if 'get_file_preprocessor' not in kwargs:
kwargs['get_file_preprocessor'] = _parser.get_preprocessor()
c_parser.cmd_parse(filenames, **kwargs)
c_parser.cmd_parse(
filenames,
relroot=REPO_ROOT,
**kwargs
)
def _cli_check(parser, **kwargs):
@ -131,6 +135,7 @@ def cmd_analyze(filenames=None, **kwargs):
kwargs['get_file_preprocessor'] = _parser.get_preprocessor(log_err=print)
c_analyzer.cmd_analyze(
filenames,
relroot=REPO_ROOT,
_analyze=_analyzer.analyze,
formats=formats,
**kwargs

View File

@ -84,13 +84,13 @@ def write_known():
def read_ignored():
if not _IGNORED:
_IGNORED.update(_datafiles.read_ignored(IGNORED_FILE))
_IGNORED.update(_datafiles.read_ignored(IGNORED_FILE, relroot=REPO_ROOT))
return dict(_IGNORED)
def write_ignored():
raise NotImplementedError
datafiles.write_ignored(variables, IGNORED_FILE)
_datafiles.write_ignored(variables, IGNORED_FILE, relroot=REPO_ROOT)
def analyze(filenames, *,

View File

@ -162,6 +162,12 @@ Modules/_datetimemodule.c Py_BUILD_CORE 1
Modules/_ctypes/cfield.c Py_BUILD_CORE 1
Modules/_heapqmodule.c Py_BUILD_CORE 1
Modules/_posixsubprocess.c Py_BUILD_CORE 1
Modules/_sre.c Py_BUILD_CORE 1
Modules/_collectionsmodule.c Py_BUILD_CORE 1
Modules/_zoneinfo.c Py_BUILD_CORE 1
Modules/unicodedata.c Py_BUILD_CORE 1
Modules/_cursesmodule.c Py_BUILD_CORE 1
Modules/_ctypes/_ctypes.c Py_BUILD_CORE 1
Objects/stringlib/codecs.h Py_BUILD_CORE 1
Python/ceval_gil.h Py_BUILD_CORE 1
Python/condvar.h Py_BUILD_CORE 1

File diff suppressed because it is too large Load Diff