gh-101100: Only show GitHub check annotations on changed doc paragraphs (#108065)

* Only show GitHub check annotations on changed doc paragraphs
* Improve check-warnings script arg parsing following Hugo's suggestions
* Factor filtering warnings by modified diffs into helper function
* Build docs on unmerged branch so warning lines match & avoid deep clone

---------

Co-authored-by: Hugo van Kemenade <hugovk@users.noreply.github.com>
Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com>
This commit is contained in:
C.A.M. Gerlach 2023-08-18 19:43:28 -05:00 committed by GitHub
parent dc7b630b23
commit eb953d6e44
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 208 additions and 31 deletions

View File

@ -16,8 +16,30 @@ jobs:
name: 'Docs' name: 'Docs'
runs-on: ubuntu-latest runs-on: ubuntu-latest
timeout-minutes: 60 timeout-minutes: 60
env:
branch_base: 'origin/${{ github.event.pull_request.base.ref }}'
branch_pr: 'origin/${{ github.event.pull_request.head.ref }}'
refspec_base: '+${{ github.event.pull_request.base.sha }}:remotes/origin/${{ github.event.pull_request.base.ref }}'
refspec_pr: '+${{ github.event.pull_request.head.sha }}:remotes/origin/${{ github.event.pull_request.head.ref }}'
steps: steps:
- uses: actions/checkout@v3 - name: 'Check out latest PR branch commit'
uses: actions/checkout@v3
with:
ref: ${{ github.event.pull_request.head.sha }}
# Adapted from https://github.com/actions/checkout/issues/520#issuecomment-1167205721
- name: 'Fetch commits to get branch diff'
run: |
# Fetch enough history to find a common ancestor commit (aka merge-base):
git fetch origin ${{ env.refspec_pr }} --depth=$(( ${{ github.event.pull_request.commits }} + 1 )) \
--no-tags --prune --no-recurse-submodules
# This should get the oldest commit in the local fetched history (which may not be the commit the PR branched from):
COMMON_ANCESTOR=$( git rev-list --first-parent --max-parents=0 --max-count=1 ${{ env.branch_pr }} )
DATE=$( git log --date=iso8601 --format=%cd "${COMMON_ANCESTOR}" )
# Get all commits since that commit date from the base branch (eg: master or main):
git fetch origin ${{ env.refspec_base }} --shallow-since="${DATE}" \
--no-tags --prune --no-recurse-submodules
- name: 'Set up Python' - name: 'Set up Python'
uses: actions/setup-python@v4 uses: actions/setup-python@v4
with: with:
@ -28,13 +50,6 @@ jobs:
run: make -C Doc/ venv run: make -C Doc/ venv
# To annotate PRs with Sphinx nitpicks (missing references) # To annotate PRs with Sphinx nitpicks (missing references)
- name: 'Get list of changed files'
if: github.event_name == 'pull_request'
id: changed_files
uses: Ana06/get-changed-files@v2.2.0
with:
filter: "Doc/**"
format: csv # works for paths with spaces
- name: 'Build HTML documentation' - name: 'Build HTML documentation'
continue-on-error: true continue-on-error: true
run: | run: |
@ -45,7 +60,7 @@ jobs:
if: github.event_name == 'pull_request' if: github.event_name == 'pull_request'
run: | run: |
python Doc/tools/check-warnings.py \ python Doc/tools/check-warnings.py \
--check-and-annotate '${{ steps.changed_files.outputs.added_modified }}' \ --annotate-diff '${{ env.branch_base }}' '${{ env.branch_pr }}' \
--fail-if-regression \ --fail-if-regression \
--fail-if-improved --fail-if-improved

View File

@ -2,12 +2,16 @@
""" """
Check the output of running Sphinx in nit-picky mode (missing references). Check the output of running Sphinx in nit-picky mode (missing references).
""" """
from __future__ import annotations
import argparse import argparse
import csv import itertools
import os import os
import re import re
import subprocess
import sys import sys
from pathlib import Path from pathlib import Path
from typing import TextIO
# Exclude these whether they're dirty or clean, # Exclude these whether they're dirty or clean,
# because they trigger a rebuild of dirty files. # because they trigger a rebuild of dirty files.
@ -24,28 +28,178 @@ EXCLUDE_SUBDIRS = {
"venv", "venv",
} }
PATTERN = re.compile(r"(?P<file>[^:]+):(?P<line>\d+): WARNING: (?P<msg>.+)") # Regex pattern to match the parts of a Sphinx warning
WARNING_PATTERN = re.compile(
r"(?P<file>([A-Za-z]:[\\/])?[^:]+):(?P<line>\d+): WARNING: (?P<msg>.+)"
)
# Regex pattern to match the line numbers in a Git unified diff
DIFF_PATTERN = re.compile(
r"^@@ -(?P<linea>\d+)(?:,(?P<removed>\d+))? \+(?P<lineb>\d+)(?:,(?P<added>\d+))? @@",
flags=re.MULTILINE,
)
def check_and_annotate(warnings: list[str], files_to_check: str) -> None: def get_diff_files(ref_a: str, ref_b: str, filter_mode: str = "") -> set[Path]:
"""List the files changed between two Git refs, filtered by change type."""
added_files_result = subprocess.run(
[
"git",
"diff",
f"--diff-filter={filter_mode}",
"--name-only",
f"{ref_a}...{ref_b}",
"--",
],
stdout=subprocess.PIPE,
check=True,
text=True,
encoding="UTF-8",
)
added_files = added_files_result.stdout.strip().split("\n")
return {Path(file.strip()) for file in added_files if file.strip()}
def get_diff_lines(ref_a: str, ref_b: str, file: Path) -> list[int]:
"""List the lines changed between two Git refs for a specific file."""
diff_output = subprocess.run(
[
"git",
"diff",
"--unified=0",
f"{ref_a}...{ref_b}",
"--",
str(file),
],
stdout=subprocess.PIPE,
check=True,
text=True,
encoding="UTF-8",
)
# Scrape line offsets + lengths from diff and convert to line numbers
line_matches = DIFF_PATTERN.finditer(diff_output.stdout)
# Removed and added line counts are 1 if not printed
line_match_values = [
line_match.groupdict(default=1) for line_match in line_matches
]
line_ints = [
(int(match_value["lineb"]), int(match_value["added"]))
for match_value in line_match_values
]
line_ranges = [
range(line_b, line_b + added) for line_b, added in line_ints
]
line_numbers = list(itertools.chain(*line_ranges))
return line_numbers
def get_para_line_numbers(file_obj: TextIO) -> list[list[int]]:
"""Get the line numbers of text in a file object, grouped by paragraph."""
paragraphs = []
prev_line = None
for lineno, line in enumerate(file_obj):
lineno = lineno + 1
if prev_line is None or (line.strip() and not prev_line.strip()):
paragraph = [lineno - 1]
paragraphs.append(paragraph)
paragraph.append(lineno)
prev_line = line
return paragraphs
def filter_and_parse_warnings(
warnings: list[str], files: set[Path]
) -> list[re.Match[str]]:
"""Get the warnings matching passed files and parse them with regex."""
filtered_warnings = [
warning
for warning in warnings
if any(str(file) in warning for file in files)
]
warning_matches = [
WARNING_PATTERN.fullmatch(warning.strip())
for warning in filtered_warnings
]
non_null_matches = [warning for warning in warning_matches if warning]
return non_null_matches
def filter_warnings_by_diff(
warnings: list[re.Match[str]], ref_a: str, ref_b: str, file: Path
) -> list[re.Match[str]]:
"""Filter the passed per-file warnings to just those on changed lines."""
diff_lines = get_diff_lines(ref_a, ref_b, file)
with file.open(encoding="UTF-8") as file_obj:
paragraphs = get_para_line_numbers(file_obj)
touched_paras = [
para_lines
for para_lines in paragraphs
if set(diff_lines) & set(para_lines)
]
touched_para_lines = set(itertools.chain(*touched_paras))
warnings_infile = [
warning for warning in warnings if str(file) in warning["file"]
]
warnings_touched = [
warning
for warning in warnings_infile
if int(warning["line"]) in touched_para_lines
]
return warnings_touched
def process_touched_warnings(
warnings: list[str], ref_a: str, ref_b: str
) -> list[re.Match[str]]:
"""Filter a list of Sphinx warnings to those affecting touched lines."""
added_files, modified_files = tuple(
get_diff_files(ref_a, ref_b, filter_mode=mode) for mode in ("A", "M")
)
warnings_added = filter_and_parse_warnings(warnings, added_files)
warnings_modified = filter_and_parse_warnings(warnings, modified_files)
modified_files_warned = {
file
for file in modified_files
if any(str(file) in warning["file"] for warning in warnings_modified)
}
warnings_modified_touched = [
filter_warnings_by_diff(warnings_modified, ref_a, ref_b, file)
for file in modified_files_warned
]
warnings_touched = warnings_added + list(
itertools.chain(*warnings_modified_touched)
)
return warnings_touched
def annotate_diff(
warnings: list[str], ref_a: str = "main", ref_b: str = "HEAD"
) -> None:
""" """
Convert Sphinx warning messages to GitHub Actions. Convert Sphinx warning messages to GitHub Actions for changed paragraphs.
Converts lines like: Converts lines like:
.../Doc/library/cgi.rst:98: WARNING: reference target not found .../Doc/library/cgi.rst:98: WARNING: reference target not found
to: to:
::warning file=.../Doc/library/cgi.rst,line=98::reference target not found ::warning file=.../Doc/library/cgi.rst,line=98::reference target not found
Non-matching lines are echoed unchanged. See:
see:
https://docs.github.com/en/actions/using-workflows/workflow-commands-for-github-actions#setting-a-warning-message https://docs.github.com/en/actions/using-workflows/workflow-commands-for-github-actions#setting-a-warning-message
""" """
files_to_check = next(csv.reader([files_to_check])) warnings_touched = process_touched_warnings(warnings, ref_a, ref_b)
for warning in warnings: print("Emitting doc warnings matching modified lines:")
if any(filename in warning for filename in files_to_check): for warning in warnings_touched:
if match := PATTERN.fullmatch(warning): print("::warning file={file},line={line}::{msg}".format_map(warning))
print("::warning file={file},line={line}::{msg}".format_map(match)) print(warning[0])
if not warnings_touched:
print("None")
def fail_if_regression( def fail_if_regression(
@ -68,7 +222,7 @@ def fail_if_regression(
print(filename) print(filename)
for warning in warnings: for warning in warnings:
if filename in warning: if filename in warning:
if match := PATTERN.fullmatch(warning): if match := WARNING_PATTERN.fullmatch(warning):
print(" {line}: {msg}".format_map(match)) print(" {line}: {msg}".format_map(match))
return -1 return -1
return 0 return 0
@ -91,12 +245,14 @@ def fail_if_improved(
return 0 return 0
def main() -> int: def main(argv: list[str] | None = None) -> int:
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument( parser.add_argument(
"--check-and-annotate", "--annotate-diff",
help="Comma-separated list of files to check, " nargs="*",
"and annotate those with warnings on GitHub Actions", metavar=("BASE_REF", "HEAD_REF"),
help="Add GitHub Actions annotations on the diff for warnings on "
"lines changed between the given refs (main and HEAD, by default)",
) )
parser.add_argument( parser.add_argument(
"--fail-if-regression", "--fail-if-regression",
@ -108,13 +264,19 @@ def main() -> int:
action="store_true", action="store_true",
help="Fail if new files with no nits are found", help="Fail if new files with no nits are found",
) )
args = parser.parse_args()
args = parser.parse_args(argv)
if args.annotate_diff is not None and len(args.annotate_diff) > 2:
parser.error(
"--annotate-diff takes between 0 and 2 ref args, not "
f"{len(args.annotate_diff)} {tuple(args.annotate_diff)}"
)
exit_code = 0 exit_code = 0
wrong_directory_msg = "Must run this script from the repo root" wrong_directory_msg = "Must run this script from the repo root"
assert Path("Doc").exists() and Path("Doc").is_dir(), wrong_directory_msg assert Path("Doc").exists() and Path("Doc").is_dir(), wrong_directory_msg
with Path("Doc/sphinx-warnings.txt").open() as f: with Path("Doc/sphinx-warnings.txt").open(encoding="UTF-8") as f:
warnings = f.read().splitlines() warnings = f.read().splitlines()
cwd = str(Path.cwd()) + os.path.sep cwd = str(Path.cwd()) + os.path.sep
@ -124,15 +286,15 @@ def main() -> int:
if "Doc/" in warning if "Doc/" in warning
} }
with Path("Doc/tools/.nitignore").open() as clean_files: with Path("Doc/tools/.nitignore").open(encoding="UTF-8") as clean_files:
files_with_expected_nits = { files_with_expected_nits = {
filename.strip() filename.strip()
for filename in clean_files for filename in clean_files
if filename.strip() and not filename.startswith("#") if filename.strip() and not filename.startswith("#")
} }
if args.check_and_annotate: if args.annotate_diff is not None:
check_and_annotate(warnings, args.check_and_annotate) annotate_diff(warnings, *args.annotate_diff)
if args.fail_if_regression: if args.fail_if_regression:
exit_code += fail_if_regression( exit_code += fail_if_regression(