From 5ae9be68d9f1a628fdc920b647257f94afb77887 Mon Sep 17 00:00:00 2001
From: Eric Snow <ericsnowcurrently@gmail.com>
Date: Fri, 25 Dec 2020 15:57:30 -0700
Subject: [PATCH] bpo-36876: [c-analyzer tool] Additional CLI updates for
 "capi" command. (gh-23929)

https://bugs.python.org/issue36876
---
 Tools/c-analyzer/cpython/__main__.py |  36 ++-
 Tools/c-analyzer/cpython/_capi.py    | 362 ++++++++++++++++++++-------
 Tools/c-analyzer/must-resolve.sh     |  75 ++++++
 3 files changed, 380 insertions(+), 93 deletions(-)
 create mode 100755 Tools/c-analyzer/must-resolve.sh

diff --git a/Tools/c-analyzer/cpython/__main__.py b/Tools/c-analyzer/cpython/__main__.py
index 9d29b13ed8f..a11b687214d 100644
--- a/Tools/c-analyzer/cpython/__main__.py
+++ b/Tools/c-analyzer/cpython/__main__.py
@@ -241,7 +241,7 @@ def _cli_capi(parser):
         for raw in args.kinds or ():
             for kind in raw.replace(',', ' ').strip().split():
                 if kind in _capi.KINDS:
-                    kind.append(kind)
+                    kinds.append(kind)
                 else:
                     parser.error(f'expected KIND to be one of {sorted(_capi.KINDS)}, got {kind!r}')
         args.kinds = set(kinds)
@@ -249,7 +249,7 @@ def _cli_capi(parser):
     parser.add_argument('--group-by', dest='groupby',
                         choices=['level', 'kind'])
 
-    parser.add_argument('--format', default='brief')
+    parser.add_argument('--format', default='table')
     parser.add_argument('--summary', dest='format',
                         action='store_const', const='summary')
     def process_format(args, *, argv=None):
@@ -259,12 +259,27 @@ def _cli_capi(parser):
             if args.format not in _capi._FORMATS:
                 parser.error(f'unsupported format {orig!r}')
 
+    parser.add_argument('--show-empty', dest='showempty', action='store_true')
+    parser.add_argument('--no-show-empty', dest='showempty', action='store_false')
+    parser.set_defaults(showempty=None)
+
+    # XXX Add --sort-by, --sort and --no-sort.
+
+    parser.add_argument('--ignore', dest='ignored', action='append')
+    def process_ignored(args, *, argv=None):
+        ignored = []
+        for raw in args.ignored or ():
+            ignored.extend(raw.replace(',', ' ').strip().split())
+        args.ignored = ignored or None
+
     parser.add_argument('filenames', nargs='*', metavar='FILENAME')
     process_progress = add_progress_cli(parser)
 
     return [
         process_levels,
+        process_kinds,
         process_format,
+        process_ignored,
         process_progress,
     ]
 
@@ -273,7 +288,9 @@ def cmd_capi(filenames=None, *,
              levels=None,
              kinds=None,
              groupby='kind',
-             format='brief',
+             format='table',
+             showempty=None,
+             ignored=None,
              track_progress=None,
              verbosity=VERBOSITY,
              **kwargs
@@ -282,7 +299,7 @@ def cmd_capi(filenames=None, *,
 
     filenames = _files.iter_header_files(filenames, levels=levels)
     #filenames = (file for file, _ in main_for_filenames(filenames))
-    if track_progress is not None:
+    if track_progress:
         filenames = track_progress(filenames)
     items = _capi.iter_capi(filenames)
     if levels:
@@ -290,7 +307,16 @@ def cmd_capi(filenames=None, *,
     if kinds:
         items = (item for item in items if item.kind in kinds)
 
-    lines = render(items, groupby=groupby, verbose=verbosity > VERBOSITY)
+    filter = _capi.resolve_filter(ignored)
+    if filter:
+        items = (item for item in items if filter(item, log=lambda msg: logger.log(1, msg)))
+
+    lines = render(
+        items,
+        groupby=groupby,
+        showempty=showempty,
+        verbose=verbosity > VERBOSITY,
+    )
     print()
     for line in lines:
         print(line)
diff --git a/Tools/c-analyzer/cpython/_capi.py b/Tools/c-analyzer/cpython/_capi.py
index 38d7cd3c514..df8159a8cc1 100644
--- a/Tools/c-analyzer/cpython/_capi.py
+++ b/Tools/c-analyzer/cpython/_capi.py
@@ -1,4 +1,5 @@
 from collections import namedtuple
+import logging
 import os
 import os.path
 import re
@@ -10,6 +11,9 @@ from ._files import iter_header_files, resolve_filename
 from . import REPO_ROOT
 
 
+logger = logging.getLogger(__name__)
+
+
 INCLUDE_ROOT = os.path.join(REPO_ROOT, 'Include')
 INCLUDE_CPYTHON = os.path.join(INCLUDE_ROOT, 'cpython')
 INCLUDE_INTERNAL = os.path.join(INCLUDE_ROOT, 'internal')
@@ -122,31 +126,34 @@ def _parse_line(line, prev=None):
     results = zip(KINDS, m.groups())
     for kind, name in results:
         if name:
-            clean = last.split('//')[0].strip()
+            clean = last.split('//')[0].rstrip()
             if clean.endswith('*/'):
                 clean = clean.split('/*')[0].rstrip()
+
             if kind == 'macro' or kind == 'constant':
-                if clean.endswith('\\'):
-                    return line  # the new "prev"
+                if not clean.endswith('\\'):
+                    return name, kind
             elif kind == 'inline':
-                if not prev:
-                    if not clean.endswith('}'):
-                        return line  # the new "prev"
-                elif clean != '}':
-                    return line  # the new "prev"
-            elif not clean.endswith(';'):
-                return line  # the new "prev"
-            return name, kind
+                if clean.endswith('}'):
+                    if not prev or clean == '}':
+                        return name, kind
+            elif kind == 'func' or kind == 'data':
+                if clean.endswith(';'):
+                    return name, kind
+            else:
+                # This should not be reached.
+                raise NotImplementedError
+            return line  # the new "prev"
     # It was a plain #define.
     return None
 
 
-LEVELS = {
+LEVELS = [
     'stable',
     'cpython',
     'private',
     'internal',
-}
+]
 
 def _get_level(filename, name, *,
                _cpython=INCLUDE_CPYTHON + os.path.sep,
@@ -165,6 +172,12 @@ def _get_level(filename, name, *,
     #return '???'
 
 
+GROUPINGS = {
+    'kind': KINDS,
+    'level': LEVELS,
+}
+
+
 class CAPIItem(namedtuple('CAPIItem', 'file lno name kind level')):
 
     @classmethod
@@ -231,34 +244,70 @@ def _parse_groupby(raw):
     else:
         raise NotImplementedError
 
-    if not all(v in ('kind', 'level') for v in groupby):
+    if not all(v in GROUPINGS for v in groupby):
         raise ValueError(f'invalid groupby value {raw!r}')
     return groupby
 
 
-def summarize(items, *, groupby='kind'):
-    summary = {}
+def _resolve_full_groupby(groupby):
+    if isinstance(groupby, str):
+        groupby = [groupby]
+    groupings = []
+    for grouping in groupby + list(GROUPINGS):
+        if grouping not in groupings:
+            groupings.append(grouping)
+    return groupings
+
+
+def summarize(items, *, groupby='kind', includeempty=True, minimize=None):
+    if minimize is None:
+        if includeempty is None:
+            minimize = True
+            includeempty = False
+        else:
+            minimize = includeempty
+    elif includeempty is None:
+        includeempty = minimize
+    elif minimize and includeempty:
+        raise ValueError(f'cannot minimize and includeempty at the same time')
 
     groupby = _parse_groupby(groupby)[0]
-    if groupby == 'kind':
-        outers = KINDS
-        inners = LEVELS
-        def increment(item):
-            summary[item.kind][item.level] += 1
-    elif groupby == 'level':
-        outers = LEVELS
-        inners = KINDS
-        def increment(item):
-            summary[item.level][item.kind] += 1
-    else:
-        raise NotImplementedError
+    _outer, _inner = _resolve_full_groupby(groupby)
+    outers = GROUPINGS[_outer]
+    inners = GROUPINGS[_inner]
+
+    summary = {
+        'totals': {
+            'all': 0,
+            'subs': {o: 0 for o in outers},
+            'bygroup': {o: {i: 0 for i in inners}
+                        for o in outers},
+        },
+    }
 
-    for outer in outers:
-        summary[outer] = _outer = {}
-        for inner in inners:
-            _outer[inner] = 0
     for item in items:
-        increment(item)
+        outer = getattr(item, _outer)
+        inner = getattr(item, _inner)
+        # Update totals.
+        summary['totals']['all'] += 1
+        summary['totals']['subs'][outer] += 1
+        summary['totals']['bygroup'][outer][inner] += 1
+
+    if not includeempty:
+        subtotals = summary['totals']['subs']
+        bygroup = summary['totals']['bygroup']
+        for outer in outers:
+            if subtotals[outer] == 0:
+                del subtotals[outer]
+                del bygroup[outer]
+                continue
+
+            for inner in inners:
+                if bygroup[outer][inner] == 0:
+                    del bygroup[outer][inner]
+            if minimize:
+                if len(bygroup[outer]) == 1:
+                    del bygroup[outer]
 
     return summary
 
@@ -289,20 +338,72 @@ def iter_capi(filenames=None):
                 yield item
 
 
-def _collate(items, groupby):
+def resolve_filter(ignored):
+    if not ignored:
+        return None
+    ignored = set(_resolve_ignored(ignored))
+    def filter(item, *, log=None):
+        if item.name not in ignored:
+            return True
+        if log is not None:
+            log(f'ignored {item.name!r}')
+        return False
+    return filter
+
+
+def _resolve_ignored(ignored):
+    if isinstance(ignored, str):
+        ignored = [ignored]
+    for raw in ignored:
+        if isinstance(raw, str):
+            if raw.startswith('|'):
+                yield raw[1:]
+            elif raw.startswith('<') and raw.endswith('>'):
+                filename = raw[1:-1]
+                try:
+                    infile = open(filename)
+                except Exception as exc:
+                    logger.error(f'ignore file failed: {exc}')
+                    continue
+                logger.log(1, f'reading ignored names from {filename!r}')
+                with infile:
+                    for line in infile:
+                        if not line:
+                            continue
+                        if line[0].isspace():
+                            continue
+                        line = line.partition('#')[0].rstrip()
+                        if line:
+                            # XXX Recurse?
+                            yield line
+            else:
+                raw = raw.strip()
+                if raw:
+                    yield raw
+        else:
+            raise NotImplementedError
+
+
+def _collate(items, groupby, includeempty):
     groupby = _parse_groupby(groupby)[0]
     maxfilename = maxname = maxkind = maxlevel = 0
+
     collated = {}
+    groups = GROUPINGS[groupby]
+    for group in groups:
+        collated[group] = []
+
     for item in items:
         key = getattr(item, groupby)
-        if key in collated:
-            collated[key].append(item)
-        else:
-            collated[key] = [item]
+        collated[key].append(item)
         maxfilename = max(len(item.relfile), maxfilename)
         maxname = max(len(item.name), maxname)
         maxkind = max(len(item.kind), maxkind)
         maxlevel = max(len(item.level), maxlevel)
+    if not includeempty:
+        for group in groups:
+            if not collated[group]:
+                del collated[group]
     maxextra = {
         'kind': maxkind,
         'level': maxlevel,
@@ -310,27 +411,55 @@ def _collate(items, groupby):
     return collated, groupby, maxfilename, maxname, maxextra
 
 
+def _get_sortkey(sort, _groupby, _columns):
+    if sort is True or sort is None:
+        # For now:
+        def sortkey(item):
+            return (
+                item.level == 'private',
+                LEVELS.index(item.level),
+                KINDS.index(item.kind),
+                os.path.dirname(item.file),
+                os.path.basename(item.file),
+                item.name,
+            )
+        return sortkey
+
+        sortfields = 'not-private level kind dirname basename name'.split()
+    elif isinstance(sort, str):
+        sortfields = sort.replace(',', ' ').strip().split()
+    elif callable(sort):
+        return sort
+    else:
+        raise NotImplementedError
+
+    # XXX Build a sortkey func from sortfields.
+    raise NotImplementedError
+
+
 ##################################
 # CLI rendering
 
-_LEVEL_MARKERS = {
-    'S': 'stable',
-    'C': 'cpython',
-    'P': 'private',
-    'I': 'internal',
-}
-_KIND_MARKERS = {
-    'F': 'func',
-    'D': 'data',
-    'I': 'inline',
-    'M': 'macro',
-    'C': 'constant',
+_MARKERS = {
+    'level': {
+        'S': 'stable',
+        'C': 'cpython',
+        'P': 'private',
+        'I': 'internal',
+    },
+    'kind': {
+        'F': 'func',
+        'D': 'data',
+        'I': 'inline',
+        'M': 'macro',
+        'C': 'constant',
+    },
 }
 
 
 def resolve_format(format):
     if not format:
-        return 'brief'
+        return 'table'
     elif isinstance(format, str) and format in _FORMATS:
         return format
     else:
@@ -350,19 +479,29 @@ def get_renderer(format):
         return render
 
 
-def render_table(items, *, columns=None, groupby='kind', verbose=False):
+def render_table(items, *,
+                 columns=None,
+                 groupby='kind',
+                 sort=True,
+                 showempty=False,
+                 verbose=False,
+                 ):
+    if groupby is None:
+        groupby = 'kind'
+    if showempty is None:
+        showempty = False
+
     if groupby:
-        collated, groupby, maxfilename, maxname, maxextra = _collate(items, groupby)
-        if groupby == 'kind':
-            groups = KINDS
-            extras = ['level']
-            markers = {'level': _LEVEL_MARKERS}
-        elif groupby == 'level':
-            groups = LEVELS
-            extras = ['kind']
-            markers = {'kind': _KIND_MARKERS}
-        else:
-            raise NotImplementedError
+        (collated, groupby, maxfilename, maxname, maxextra,
+         ) = _collate(items, groupby, showempty)
+        for grouping in GROUPINGS:
+            maxextra[grouping] = max(len(g) for g in GROUPINGS[grouping])
+
+        _, extra = _resolve_full_groupby(groupby)
+        extras = [extra]
+        markers = {extra: _MARKERS[extra]}
+
+        groups = GROUPINGS[groupby]
     else:
         # XXX Support no grouping?
         raise NotImplementedError
@@ -373,8 +512,6 @@ def render_table(items, *, columns=None, groupby='kind', verbose=False):
                     for extra in ('kind', 'level')}
     else:
         if verbose:
-            maxextra['kind'] = max(len(kind) for kind in KINDS)
-            maxextra['level'] = max(len(level) for level in LEVELS)
             extracols = [f'{extra}:{maxextra[extra]}'
                          for extra in extras]
             def get_extra(item):
@@ -404,32 +541,51 @@ def render_table(items, *, columns=None, groupby='kind', verbose=False):
         ]
     header, div, fmt = build_table(columns)
 
+    if sort:
+        sortkey = _get_sortkey(sort, groupby, columns)
+
     total = 0
-    for group in groups:
-        if group not in collated:
+    for group, grouped in collated.items():
+        if not showempty and group not in collated:
             continue
         yield ''
         yield f' === {group} ==='
         yield ''
         yield header
         yield div
-        for item in collated[group]:
-            yield fmt.format(
-                filename=item.relfile,
-                name=item.name,
-                **get_extra(item),
-            )
+        if grouped:
+            if sort:
+                grouped = sorted(grouped, key=sortkey)
+            for item in grouped:
+                yield fmt.format(
+                    filename=item.relfile,
+                    name=item.name,
+                    **get_extra(item),
+                )
         yield div
-        subtotal = len(collated[group])
+        subtotal = len(grouped)
         yield f'  sub-total: {subtotal}'
         total += subtotal
     yield ''
     yield f'total: {total}'
 
 
-def render_full(items, *, groupby=None, verbose=False):
+def render_full(items, *,
+                groupby='kind',
+                sort=None,
+                showempty=None,
+                verbose=False,
+                ):
+    if groupby is None:
+        groupby = 'kind'
+    if showempty is None:
+        showempty = False
+
+    if sort:
+        sortkey = _get_sortkey(sort, groupby, None)
+
     if groupby:
-        collated, groupby, _, _, _ = _collate(items, groupby)
+        collated, groupby, _, _, _ = _collate(items, groupby, showempty)
         for group, grouped in collated.items():
             yield '#' * 25
             yield f'# {group} ({len(grouped)})'
@@ -437,10 +593,14 @@ def render_full(items, *, groupby=None, verbose=False):
             yield ''
             if not grouped:
                 continue
+            if sort:
+                grouped = sorted(grouped, key=sortkey)
             for item in grouped:
                 yield from _render_item_full(item, groupby, verbose)
                 yield ''
     else:
+        if sort:
+            items = sorted(items, key=sortkey)
         for item in items:
             yield from _render_item_full(item, None, verbose)
             yield ''
@@ -459,21 +619,47 @@ def _render_item_full(item, groupby, verbose):
         print('  ---------------------------------------')
 
 
-def render_summary(items, *, groupby='kind', verbose=False):
-    total = 0
-    summary = summarize(items, groupby=groupby)
-    # XXX Stablize the sorting to match KINDS/LEVELS.
-    for outer, counts in summary.items():
-        subtotal = sum(c for _, c in counts.items())
-        yield f'{outer + ":":20} ({subtotal})'
-        for inner, count in counts.items():
-            yield f'   {inner + ":":9} {count}'
-        total += subtotal
-    yield f'{"total:":20} ({total})'
+def render_summary(items, *,
+                   groupby='kind',
+                   sort=None,
+                   showempty=None,
+                   verbose=False,
+                   ):
+    if groupby is None:
+        groupby = 'kind'
+    summary = summarize(
+        items,
+        groupby=groupby,
+        includeempty=showempty,
+        minimize=None if showempty else not verbose,
+    )
+
+    subtotals = summary['totals']['subs']
+    bygroup = summary['totals']['bygroup']
+    lastempty = False
+    for outer, subtotal in subtotals.items():
+        if bygroup:
+            subtotal = f'({subtotal})'
+            yield f'{outer + ":":20} {subtotal:>8}'
+        else:
+            yield f'{outer + ":":10} {subtotal:>8}'
+        if outer in bygroup:
+            for inner, count in bygroup[outer].items():
+                yield f'   {inner + ":":9} {count}'
+            lastempty = False
+        else:
+            lastempty = True
+
+    total = f'*{summary["totals"]["all"]}*'
+    label = '*total*:'
+    if bygroup:
+        yield f'{label:20} {total:>8}'
+    else:
+        yield f'{label:10} {total:>9}'
 
 
 _FORMATS = {
-    'brief': render_table,
+    'table': render_table,
     'full': render_full,
     'summary': render_summary,
 }
diff --git a/Tools/c-analyzer/must-resolve.sh b/Tools/c-analyzer/must-resolve.sh
new file mode 100755
index 00000000000..191e42241f5
--- /dev/null
+++ b/Tools/c-analyzer/must-resolve.sh
@@ -0,0 +1,75 @@
+#!/usr/bin/env bash
+
+# Any PyObject exposed via the public API is problematic since it must
+# be made per-interpreter.  This involves the following:
+#
+# singletons:
+#  - None
+#  - True
+#  - False
+#  - NotImplemented
+#  - Ellipsis
+# PyTypeObject:
+#  - PyExc*  [97]
+#  - static types  [81]
+#
+# In the non-stable API we could use #defines to do the conversion
+# transparently (though Py_None is perhaps problematic for performance
+# reasons).  However, we can't take that approach with the stable API.
+# That means we must find all functions (& macros) in the stable API
+# (and probably the full public API, for sanity sake) and adjust them.
+# This will involve internally converting from the public object to the
+# corresponding per-interpreter object.
+#
+# Note that the only place this solution fails is with direct pointer
+# equality checks with the public objects.
+
+# XXX What about saying that the stable API is not sub-interpreter
+# compatible?
+
+
+function run_capi() {
+    ./python Tools/c-analyzer/c-analyzer.py capi \
+        --no-progress \
+        --group-by kind \
+        --func --inline --macro \
+        --no-show-empty \
+        --ignore '<must-resolve.ignored>' \
+        $@
+}
+
+echo ''
+echo '#################################################'
+echo '# All API'
+echo '#################################################'
+run_capi --format summary Include/*.h Include/cpython/*.h
+run_capi --format table Include/*.h Include/cpython/*.h
+echo ''
+echo ''
+echo '#################################################'
+echo '# stable API'
+echo '#################################################'
+echo ''
+echo '# public:'
+run_capi --format summary --public --no-show-empty Include/*.h
+echo ''
+echo '# private:'
+run_capi --format summary --private --no-show-empty Include/*.h
+echo ''
+run_capi --format full -v Include/*.h
+#run_capi --format full -v --public Include/*.h
+#run_capi --format full -v --private Include/*.h
+echo ''
+echo '#################################################'
+echo '# cpython API'
+echo '#################################################'
+echo ''
+echo '# public:'
+run_capi --format summary --public --no-show-empty Include/cpython/*.h
+echo ''
+echo '# private:'
+run_capi --format summary --private --no-show-empty Include/cpython/*.h
+echo ''
+run_capi --format full -v Include/cpython/*.h
+#run_capi --format full -v --public Include/cpython/*.h
+#run_capi --format full -v --private Include/cpython/*.h