bpo-36876: [c-analyzer tool] Additional CLI updates for "capi" command. (gh-23929)

https://bugs.python.org/issue36876
2020-12-25 15:57:30 -07:00 · 2020-12-25 15:57:30 -07:00 · 5ae9be68d9
parent c1ae21c965
commit 5ae9be68d9
3 changed files with 380 additions and 93 deletions
--- a/Tools/c-analyzer/cpython/main.py
+++ b/Tools/c-analyzer/cpython/main.py
@ -241,7 +241,7 @@ def _cli_capi(parser):
        for raw in args.kinds or ():
            for kind in raw.replace(',', ' ').strip().split():
                if kind in _capi.KINDS:
-                    kind.append(kind)
+                    kinds.append(kind)
                else:
                    parser.error(f'expected KIND to be one of {sorted(_capi.KINDS)}, got {kind!r}')
        args.kinds = set(kinds)
@ -249,7 +249,7 @@ def _cli_capi(parser):
    parser.add_argument('--group-by', dest='groupby',
                        choices=['level', 'kind'])

-    parser.add_argument('--format', default='brief')
+    parser.add_argument('--format', default='table')
    parser.add_argument('--summary', dest='format',
                        action='store_const', const='summary')
    def process_format(args, *, argv=None):
@ -259,12 +259,27 @@ def _cli_capi(parser):
            if args.format not in _capi._FORMATS:
                parser.error(f'unsupported format {orig!r}')

+    parser.add_argument('--show-empty', dest='showempty', action='store_true')
+    parser.add_argument('--no-show-empty', dest='showempty', action='store_false')
+    parser.set_defaults(showempty=None)
+
+    # XXX Add --sort-by, --sort and --no-sort.
+
+    parser.add_argument('--ignore', dest='ignored', action='append')
+    def process_ignored(args, *, argv=None):
+        ignored = []
+        for raw in args.ignored or ():
+            ignored.extend(raw.replace(',', ' ').strip().split())
+        args.ignored = ignored or None
+
    parser.add_argument('filenames', nargs='*', metavar='FILENAME')
    process_progress = add_progress_cli(parser)

    return [
        process_levels,
+        process_kinds,
        process_format,
+        process_ignored,
        process_progress,
    ]

@ -273,7 +288,9 @@ def cmd_capi(filenames=None, *,
             levels=None,
             kinds=None,
             groupby='kind',
-             format='brief',
+             format='table',
+             showempty=None,
+             ignored=None,
             track_progress=None,
             verbosity=VERBOSITY,
             **kwargs
@ -282,7 +299,7 @@ def cmd_capi(filenames=None, *,

    filenames = _files.iter_header_files(filenames, levels=levels)
    #filenames = (file for file, _ in main_for_filenames(filenames))
-    if track_progress is not None:
+    if track_progress:
        filenames = track_progress(filenames)
    items = _capi.iter_capi(filenames)
    if levels:
@ -290,7 +307,16 @@ def cmd_capi(filenames=None, *,
    if kinds:
        items = (item for item in items if item.kind in kinds)

-    lines = render(items, groupby=groupby, verbose=verbosity > VERBOSITY)
+    filter = _capi.resolve_filter(ignored)
+    if filter:
+        items = (item for item in items if filter(item, log=lambda msg: logger.log(1, msg)))
+
+    lines = render(
+        items,
+        groupby=groupby,
+        showempty=showempty,
+        verbose=verbosity > VERBOSITY,
+    )
    print()
    for line in lines:
        print(line)
--- a/Tools/c-analyzer/cpython/_capi.py
+++ b/Tools/c-analyzer/cpython/_capi.py
@ -1,4 +1,5 @@
 from collections import namedtuple
+import logging
 import os
 import os.path
 import re
@ -10,6 +11,9 @@ from ._files import iter_header_files, resolve_filename
 from . import REPO_ROOT


+logger = logging.getLogger(__name__)
+
+
 INCLUDE_ROOT = os.path.join(REPO_ROOT, 'Include')
 INCLUDE_CPYTHON = os.path.join(INCLUDE_ROOT, 'cpython')
 INCLUDE_INTERNAL = os.path.join(INCLUDE_ROOT, 'internal')
@ -122,31 +126,34 @@ def _parse_line(line, prev=None):
    results = zip(KINDS, m.groups())
    for kind, name in results:
        if name:
-            clean = last.split('//')[0].strip()
+            clean = last.split('//')[0].rstrip()
            if clean.endswith('*/'):
                clean = clean.split('/*')[0].rstrip()
+
            if kind == 'macro' or kind == 'constant':
-                if clean.endswith('\\'):
-                    return line  # the new "prev"
+                if not clean.endswith('\\'):
+                    return name, kind
            elif kind == 'inline':
-                if not prev:
-                    if not clean.endswith('}'):
-                        return line  # the new "prev"
-                elif clean != '}':
-                    return line  # the new "prev"
-            elif not clean.endswith(';'):
-                return line  # the new "prev"
-            return name, kind
+                if clean.endswith('}'):
+                    if not prev or clean == '}':
+                        return name, kind
+            elif kind == 'func' or kind == 'data':
+                if clean.endswith(';'):
+                    return name, kind
+            else:
+                # This should not be reached.
+                raise NotImplementedError
+            return line  # the new "prev"
    # It was a plain #define.
    return None


-LEVELS = {
+LEVELS = [
    'stable',
    'cpython',
    'private',
    'internal',
-}
+]

 def _get_level(filename, name, *,
               _cpython=INCLUDE_CPYTHON + os.path.sep,
@ -165,6 +172,12 @@ def _get_level(filename, name, *,
    #return '???'


+GROUPINGS = {
+    'kind': KINDS,
+    'level': LEVELS,
+}
+
+
 class CAPIItem(namedtuple('CAPIItem', 'file lno name kind level')):

    @classmethod
@ -231,34 +244,70 @@ def _parse_groupby(raw):
    else:
        raise NotImplementedError

-    if not all(v in ('kind', 'level') for v in groupby):
+    if not all(v in GROUPINGS for v in groupby):
        raise ValueError(f'invalid groupby value {raw!r}')
    return groupby


-def summarize(items, *, groupby='kind'):
-    summary = {}
+def _resolve_full_groupby(groupby):
+    if isinstance(groupby, str):
+        groupby = [groupby]
+    groupings = []
+    for grouping in groupby + list(GROUPINGS):
+        if grouping not in groupings:
+            groupings.append(grouping)
+    return groupings
+
+
+def summarize(items, *, groupby='kind', includeempty=True, minimize=None):
+    if minimize is None:
+        if includeempty is None:
+            minimize = True
+            includeempty = False
+        else:
+            minimize = includeempty
+    elif includeempty is None:
+        includeempty = minimize
+    elif minimize and includeempty:
+        raise ValueError(f'cannot minimize and includeempty at the same time')

    groupby = _parse_groupby(groupby)[0]
-    if groupby == 'kind':
-        outers = KINDS
-        inners = LEVELS
-        def increment(item):
-            summary[item.kind][item.level] += 1
-    elif groupby == 'level':
-        outers = LEVELS
-        inners = KINDS
-        def increment(item):
-            summary[item.level][item.kind] += 1
-    else:
-        raise NotImplementedError
+    _outer, _inner = _resolve_full_groupby(groupby)
+    outers = GROUPINGS[_outer]
+    inners = GROUPINGS[_inner]
+
+    summary = {
+        'totals': {
+            'all': 0,
+            'subs': {o: 0 for o in outers},
+            'bygroup': {o: {i: 0 for i in inners}
+                        for o in outers},
+        },
+    }

-    for outer in outers:
-        summary[outer] = _outer = {}
-        for inner in inners:
-            _outer[inner] = 0
    for item in items:
-        increment(item)
+        outer = getattr(item, _outer)
+        inner = getattr(item, _inner)
+        # Update totals.
+        summary['totals']['all'] += 1
+        summary['totals']['subs'][outer] += 1
+        summary['totals']['bygroup'][outer][inner] += 1
+
+    if not includeempty:
+        subtotals = summary['totals']['subs']
+        bygroup = summary['totals']['bygroup']
+        for outer in outers:
+            if subtotals[outer] == 0:
+                del subtotals[outer]
+                del bygroup[outer]
+                continue
+
+            for inner in inners:
+                if bygroup[outer][inner] == 0:
+                    del bygroup[outer][inner]
+            if minimize:
+                if len(bygroup[outer]) == 1:
+                    del bygroup[outer]

    return summary

@ -289,20 +338,72 @@ def iter_capi(filenames=None):
                yield item


-def _collate(items, groupby):
+def resolve_filter(ignored):
+    if not ignored:
+        return None
+    ignored = set(_resolve_ignored(ignored))
+    def filter(item, *, log=None):
+        if item.name not in ignored:
+            return True
+        if log is not None:
+            log(f'ignored {item.name!r}')
+        return False
+    return filter
+
+
+def _resolve_ignored(ignored):
+    if isinstance(ignored, str):
+        ignored = [ignored]
+    for raw in ignored:
+        if isinstance(raw, str):
+            if raw.startswith('|'):
+                yield raw[1:]
+            elif raw.startswith('<') and raw.endswith('>'):
+                filename = raw[1:-1]
+                try:
+                    infile = open(filename)
+                except Exception as exc:
+                    logger.error(f'ignore file failed: {exc}')
+                    continue
+                logger.log(1, f'reading ignored names from {filename!r}')
+                with infile:
+                    for line in infile:
+                        if not line:
+                            continue
+                        if line[0].isspace():
+                            continue
+                        line = line.partition('#')[0].rstrip()
+                        if line:
+                            # XXX Recurse?
+                            yield line
+            else:
+                raw = raw.strip()
+                if raw:
+                    yield raw
+        else:
+            raise NotImplementedError
+
+
+def _collate(items, groupby, includeempty):
    groupby = _parse_groupby(groupby)[0]
    maxfilename = maxname = maxkind = maxlevel = 0
+
    collated = {}
+    groups = GROUPINGS[groupby]
+    for group in groups:
+        collated[group] = []
+
    for item in items:
        key = getattr(item, groupby)
-        if key in collated:
-            collated[key].append(item)
-        else:
-            collated[key] = [item]
+        collated[key].append(item)
        maxfilename = max(len(item.relfile), maxfilename)
        maxname = max(len(item.name), maxname)
        maxkind = max(len(item.kind), maxkind)
        maxlevel = max(len(item.level), maxlevel)
+    if not includeempty:
+        for group in groups:
+            if not collated[group]:
+                del collated[group]
    maxextra = {
        'kind': maxkind,
        'level': maxlevel,
@ -310,27 +411,55 @@ def _collate(items, groupby):
    return collated, groupby, maxfilename, maxname, maxextra


+def _get_sortkey(sort, _groupby, _columns):
+    if sort is True or sort is None:
+        # For now:
+        def sortkey(item):
+            return (
+                item.level == 'private',
+                LEVELS.index(item.level),
+                KINDS.index(item.kind),
+                os.path.dirname(item.file),
+                os.path.basename(item.file),
+                item.name,
+            )
+        return sortkey
+
+        sortfields = 'not-private level kind dirname basename name'.split()
+    elif isinstance(sort, str):
+        sortfields = sort.replace(',', ' ').strip().split()
+    elif callable(sort):
+        return sort
+    else:
+        raise NotImplementedError
+
+    # XXX Build a sortkey func from sortfields.
+    raise NotImplementedError
+
+
 ##################################
 # CLI rendering

-_LEVEL_MARKERS = {
-    'S': 'stable',
-    'C': 'cpython',
-    'P': 'private',
-    'I': 'internal',
-}
-_KIND_MARKERS = {
-    'F': 'func',
-    'D': 'data',
-    'I': 'inline',
-    'M': 'macro',
-    'C': 'constant',
+_MARKERS = {
+    'level': {
+        'S': 'stable',
+        'C': 'cpython',
+        'P': 'private',
+        'I': 'internal',
+    },
+    'kind': {
+        'F': 'func',
+        'D': 'data',
+        'I': 'inline',
+        'M': 'macro',
+        'C': 'constant',
+    },
 }


 def resolve_format(format):
    if not format:
-        return 'brief'
+        return 'table'
    elif isinstance(format, str) and format in _FORMATS:
        return format
    else:
@ -350,19 +479,29 @@ def get_renderer(format):
        return render


-def render_table(items, *, columns=None, groupby='kind', verbose=False):
+def render_table(items, *,
+                 columns=None,
+                 groupby='kind',
+                 sort=True,
+                 showempty=False,
+                 verbose=False,
+                 ):
+    if groupby is None:
+        groupby = 'kind'
+    if showempty is None:
+        showempty = False
+
    if groupby:
-        collated, groupby, maxfilename, maxname, maxextra = _collate(items, groupby)
-        if groupby == 'kind':
-            groups = KINDS
-            extras = ['level']
-            markers = {'level': _LEVEL_MARKERS}
-        elif groupby == 'level':
-            groups = LEVELS
-            extras = ['kind']
-            markers = {'kind': _KIND_MARKERS}
-        else:
-            raise NotImplementedError
+        (collated, groupby, maxfilename, maxname, maxextra,
+         ) = _collate(items, groupby, showempty)
+        for grouping in GROUPINGS:
+            maxextra[grouping] = max(len(g) for g in GROUPINGS[grouping])
+
+        _, extra = _resolve_full_groupby(groupby)
+        extras = [extra]
+        markers = {extra: _MARKERS[extra]}
+
+        groups = GROUPINGS[groupby]
    else:
        # XXX Support no grouping?
        raise NotImplementedError
@ -373,8 +512,6 @@ def render_table(items, *, columns=None, groupby='kind', verbose=False):
                    for extra in ('kind', 'level')}
    else:
        if verbose:
-            maxextra['kind'] = max(len(kind) for kind in KINDS)
-            maxextra['level'] = max(len(level) for level in LEVELS)
            extracols = [f'{extra}:{maxextra[extra]}'
                         for extra in extras]
            def get_extra(item):
@ -404,32 +541,51 @@ def render_table(items, *, columns=None, groupby='kind', verbose=False):
        ]
    header, div, fmt = build_table(columns)

+    if sort:
+        sortkey = _get_sortkey(sort, groupby, columns)
+
    total = 0
-    for group in groups:
-        if group not in collated:
+    for group, grouped in collated.items():
+        if not showempty and group not in collated:
            continue
        yield ''
        yield f' === {group} ==='
        yield ''
        yield header
        yield div
-        for item in collated[group]:
-            yield fmt.format(
-                filename=item.relfile,
-                name=item.name,
-                **get_extra(item),
-            )
+        if grouped:
+            if sort:
+                grouped = sorted(grouped, key=sortkey)
+            for item in grouped:
+                yield fmt.format(
+                    filename=item.relfile,
+                    name=item.name,
+                    **get_extra(item),
+                )
        yield div
-        subtotal = len(collated[group])
+        subtotal = len(grouped)
        yield f'  sub-total: {subtotal}'
        total += subtotal
    yield ''
    yield f'total: {total}'


-def render_full(items, *, groupby=None, verbose=False):
+def render_full(items, *,
+                groupby='kind',
+                sort=None,
+                showempty=None,
+                verbose=False,
+                ):
+    if groupby is None:
+        groupby = 'kind'
+    if showempty is None:
+        showempty = False
+
+    if sort:
+        sortkey = _get_sortkey(sort, groupby, None)
+
    if groupby:
-        collated, groupby, _, _, _ = _collate(items, groupby)
+        collated, groupby, _, _, _ = _collate(items, groupby, showempty)
        for group, grouped in collated.items():
            yield '#' * 25
            yield f'# {group} ({len(grouped)})'
@ -437,10 +593,14 @@ def render_full(items, *, groupby=None, verbose=False):
            yield ''
            if not grouped:
                continue
+            if sort:
+                grouped = sorted(grouped, key=sortkey)
            for item in grouped:
                yield from _render_item_full(item, groupby, verbose)
                yield ''
    else:
+        if sort:
+            items = sorted(items, key=sortkey)
        for item in items:
            yield from _render_item_full(item, None, verbose)
            yield ''
@ -459,21 +619,47 @@ def _render_item_full(item, groupby, verbose):
        print('  ---------------------------------------')


-def render_summary(items, *, groupby='kind', verbose=False):
-    total = 0
-    summary = summarize(items, groupby=groupby)
-    # XXX Stablize the sorting to match KINDS/LEVELS.
-    for outer, counts in summary.items():
-        subtotal = sum(c for _, c in counts.items())
-        yield f'{outer + ":":20} ({subtotal})'
-        for inner, count in counts.items():
-            yield f'   {inner + ":":9} {count}'
-        total += subtotal
-    yield f'{"total:":20} ({total})'
+def render_summary(items, *,
+                   groupby='kind',
+                   sort=None,
+                   showempty=None,
+                   verbose=False,
+                   ):
+    if groupby is None:
+        groupby = 'kind'
+    summary = summarize(
+        items,
+        groupby=groupby,
+        includeempty=showempty,
+        minimize=None if showempty else not verbose,
+    )
+
+    subtotals = summary['totals']['subs']
+    bygroup = summary['totals']['bygroup']
+    lastempty = False
+    for outer, subtotal in subtotals.items():
+        if bygroup:
+            subtotal = f'({subtotal})'
+            yield f'{outer + ":":20} {subtotal:>8}'
+        else:
+            yield f'{outer + ":":10} {subtotal:>8}'
+        if outer in bygroup:
+            for inner, count in bygroup[outer].items():
+                yield f'   {inner + ":":9} {count}'
+            lastempty = False
+        else:
+            lastempty = True
+
+    total = f'*{summary["totals"]["all"]}*'
+    label = '*total*:'
+    if bygroup:
+        yield f'{label:20} {total:>8}'
+    else:
+        yield f'{label:10} {total:>9}'


 _FORMATS = {
-    'brief': render_table,
+    'table': render_table,
    'full': render_full,
    'summary': render_summary,
 }
--- a/Tools/c-analyzer/must-resolve.sh
+++ b/Tools/c-analyzer/must-resolve.sh
@ -0,0 +1,75 @@
+#!/usr/bin/env bash
+
+# Any PyObject exposed via the public API is problematic since it must
+# be made per-interpreter.  This involves the following:
+#
+# singletons:
+#  - None
+#  - True
+#  - False
+#  - NotImplemented
+#  - Ellipsis
+# PyTypeObject:
+#  - PyExc*  [97]
+#  - static types  [81]
+#
+# In the non-stable API we could use #defines to do the conversion
+# transparently (though Py_None is perhaps problematic for performance
+# reasons).  However, we can't take that approach with the stable API.
+# That means we must find all functions (& macros) in the stable API
+# (and probably the full public API, for sanity sake) and adjust them.
+# This will involve internally converting from the public object to the
+# corresponding per-interpreter object.
+#
+# Note that the only place this solution fails is with direct pointer
+# equality checks with the public objects.
+
+# XXX What about saying that the stable API is not sub-interpreter
+# compatible?
+
+
+function run_capi() {
+    ./python Tools/c-analyzer/c-analyzer.py capi \
+        --no-progress \
+        --group-by kind \
+        --func --inline --macro \
+        --no-show-empty \
+        --ignore '<must-resolve.ignored>' \
+        $@
+}
+
+echo ''
+echo '#################################################'
+echo '# All API'
+echo '#################################################'
+run_capi --format summary Include/*.h Include/cpython/*.h
+run_capi --format table Include/*.h Include/cpython/*.h
+echo ''
+echo ''
+echo '#################################################'
+echo '# stable API'
+echo '#################################################'
+echo ''
+echo '# public:'
+run_capi --format summary --public --no-show-empty Include/*.h
+echo ''
+echo '# private:'
+run_capi --format summary --private --no-show-empty Include/*.h
+echo ''
+run_capi --format full -v Include/*.h
+#run_capi --format full -v --public Include/*.h
+#run_capi --format full -v --private Include/*.h
+echo ''
+echo '#################################################'
+echo '# cpython API'
+echo '#################################################'
+echo ''
+echo '# public:'
+run_capi --format summary --public --no-show-empty Include/cpython/*.h
+echo ''
+echo '# private:'
+run_capi --format summary --private --no-show-empty Include/cpython/*.h
+echo ''
+run_capi --format full -v Include/cpython/*.h
+#run_capi --format full -v --public Include/cpython/*.h
+#run_capi --format full -v --private Include/cpython/*.h