cpython/Doc/tools/check-warnings.py

#!/usr/bin/env python3
"""
Check the output of running Sphinx in nit-picky mode (missing references).
"""
from __future__ import annotations

import argparse
import itertools
import os
import re
import subprocess
import sys
from pathlib import Path
from typing import TextIO

# Exclude these whether they're dirty or clean,
# because they trigger a rebuild of dirty files.
EXCLUDE_FILES = {
    "Doc/whatsnew/changelog.rst",
}

# Subdirectories of Doc/ to exclude.
EXCLUDE_SUBDIRS = {
    ".env",
    ".venv",
    "env",
    "includes",
    "venv",
}

# Regex pattern to match the parts of a Sphinx warning
WARNING_PATTERN = re.compile(
    r"(?P<file>([A-Za-z]:[\\/])?[^:]+):(?P<line>\d+): WARNING: (?P<msg>.+)"
)

# Regex pattern to match the line numbers in a Git unified diff
DIFF_PATTERN = re.compile(
    r"^@@ -(?P<linea>\d+)(?:,(?P<removed>\d+))? \+(?P<lineb>\d+)(?:,(?P<added>\d+))? @@",
    flags=re.MULTILINE,
)


def get_diff_files(ref_a: str, ref_b: str, filter_mode: str = "") -> set[Path]:
    """List the files changed between two Git refs, filtered by change type."""
    added_files_result = subprocess.run(
        [
            "git",
            "diff",
            f"--diff-filter={filter_mode}",
            "--name-only",
            f"{ref_a}...{ref_b}",
            "--",
        ],
        stdout=subprocess.PIPE,
        check=True,
        text=True,
        encoding="UTF-8",
    )

    added_files = added_files_result.stdout.strip().split("\n")
    return {Path(file.strip()) for file in added_files if file.strip()}


def get_diff_lines(ref_a: str, ref_b: str, file: Path) -> list[int]:
    """List the lines changed between two Git refs for a specific file."""
    diff_output = subprocess.run(
        [
            "git",
            "diff",
            "--unified=0",
            f"{ref_a}...{ref_b}",
            "--",
            str(file),
        ],
        stdout=subprocess.PIPE,
        check=True,
        text=True,
        encoding="UTF-8",
    )

    # Scrape line offsets + lengths from diff and convert to line numbers
    line_matches = DIFF_PATTERN.finditer(diff_output.stdout)
    # Removed and added line counts are 1 if not printed
    line_match_values = [
        line_match.groupdict(default=1) for line_match in line_matches
    ]
    line_ints = [
        (int(match_value["lineb"]), int(match_value["added"]))
        for match_value in line_match_values
    ]
    line_ranges = [
        range(line_b, line_b + added) for line_b, added in line_ints
    ]
    line_numbers = list(itertools.chain(*line_ranges))

    return line_numbers


def get_para_line_numbers(file_obj: TextIO) -> list[list[int]]:
    """Get the line numbers of text in a file object, grouped by paragraph."""
    paragraphs = []
    prev_line = None
    for lineno, line in enumerate(file_obj):
        lineno = lineno + 1
        if prev_line is None or (line.strip() and not prev_line.strip()):
            paragraph = [lineno - 1]
            paragraphs.append(paragraph)
        paragraph.append(lineno)
        prev_line = line
    return paragraphs


def filter_and_parse_warnings(
    warnings: list[str], files: set[Path]
) -> list[re.Match[str]]:
    """Get the warnings matching passed files and parse them with regex."""
    filtered_warnings = [
        warning
        for warning in warnings
        if any(str(file) in warning for file in files)
    ]
    warning_matches = [
        WARNING_PATTERN.fullmatch(warning.strip())
        for warning in filtered_warnings
    ]
    non_null_matches = [warning for warning in warning_matches if warning]
    return non_null_matches


def filter_warnings_by_diff(
    warnings: list[re.Match[str]], ref_a: str, ref_b: str, file: Path
) -> list[re.Match[str]]:
    """Filter the passed per-file warnings to just those on changed lines."""
    diff_lines = get_diff_lines(ref_a, ref_b, file)
    with file.open(encoding="UTF-8") as file_obj:
        paragraphs = get_para_line_numbers(file_obj)
    touched_paras = [
        para_lines
        for para_lines in paragraphs
        if set(diff_lines) & set(para_lines)
    ]
    touched_para_lines = set(itertools.chain(*touched_paras))
    warnings_infile = [
        warning for warning in warnings if str(file) in warning["file"]
    ]
    warnings_touched = [
        warning
        for warning in warnings_infile
        if int(warning["line"]) in touched_para_lines
    ]
    return warnings_touched


def process_touched_warnings(
    warnings: list[str], ref_a: str, ref_b: str
) -> list[re.Match[str]]:
    """Filter a list of Sphinx warnings to those affecting touched lines."""
    added_files, modified_files = tuple(
        get_diff_files(ref_a, ref_b, filter_mode=mode) for mode in ("A", "M")
    )

    warnings_added = filter_and_parse_warnings(warnings, added_files)
    warnings_modified = filter_and_parse_warnings(warnings, modified_files)

    modified_files_warned = {
        file
        for file in modified_files
        if any(str(file) in warning["file"] for warning in warnings_modified)
    }

    warnings_modified_touched = [
        filter_warnings_by_diff(warnings_modified, ref_a, ref_b, file)
        for file in modified_files_warned
    ]
    warnings_touched = warnings_added + list(
        itertools.chain(*warnings_modified_touched)
    )

    return warnings_touched


def annotate_diff(
    warnings: list[str], ref_a: str = "main", ref_b: str = "HEAD"
) -> None:
    """
    Convert Sphinx warning messages to GitHub Actions for changed paragraphs.

    Converts lines like:
        .../Doc/library/cgi.rst:98: WARNING: reference target not found
    to:
        ::warning file=.../Doc/library/cgi.rst,line=98::reference target not found

    See:
    https://docs.github.com/en/actions/using-workflows/workflow-commands-for-github-actions#setting-a-warning-message
    """
    warnings_touched = process_touched_warnings(warnings, ref_a, ref_b)
    print("Emitting doc warnings matching modified lines:")
    for warning in warnings_touched:
        print("::warning file={file},line={line}::{msg}".format_map(warning))
        print(warning[0])
    if not warnings_touched:
        print("None")


def fail_if_regression(
    warnings: list[str], files_with_expected_nits: set[str], files_with_nits: set[str]
) -> int:
    """
    Ensure some files always pass Sphinx nit-picky mode (no missing references).
    These are files which are *not* in .nitignore.
    """
    all_rst = {
        str(rst)
        for rst in Path("Doc/").rglob("*.rst")
        if rst.parts[1] not in EXCLUDE_SUBDIRS
    }
    should_be_clean = all_rst - files_with_expected_nits - EXCLUDE_FILES
    problem_files = sorted(should_be_clean & files_with_nits)
    if problem_files:
        print("\nError: must not contain warnings:\n")
        for filename in problem_files:
            print(filename)
            for warning in warnings:
                if filename in warning:
                    if match := WARNING_PATTERN.fullmatch(warning):
                        print("  {line}: {msg}".format_map(match))
        return -1
    return 0


def fail_if_improved(
    files_with_expected_nits: set[str], files_with_nits: set[str]
) -> int:
    """
    We may have fixed warnings in some files so that the files are now completely clean.
    Good news! Let's add them to .nitignore to prevent regression.
    """
    files_with_no_nits = files_with_expected_nits - files_with_nits
    if files_with_no_nits:
        print("\nCongratulations! You improved:\n")
        for filename in sorted(files_with_no_nits):
            print(filename)
        print("\nPlease remove from Doc/tools/.nitignore\n")
        return -1
    return 0


def main(argv: list[str] | None = None) -> int:
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--annotate-diff",
        nargs="*",
        metavar=("BASE_REF", "HEAD_REF"),
        help="Add GitHub Actions annotations on the diff for warnings on "
        "lines changed between the given refs (main and HEAD, by default)",
    )
    parser.add_argument(
        "--fail-if-regression",
        action="store_true",
        help="Fail if known-good files have warnings",
    )
    parser.add_argument(
        "--fail-if-improved",
        action="store_true",
        help="Fail if new files with no nits are found",
    )

    args = parser.parse_args(argv)
    if args.annotate_diff is not None and len(args.annotate_diff) > 2:
        parser.error(
            "--annotate-diff takes between 0 and 2 ref args, not "
            f"{len(args.annotate_diff)} {tuple(args.annotate_diff)}"
        )
    exit_code = 0

    wrong_directory_msg = "Must run this script from the repo root"
    assert Path("Doc").exists() and Path("Doc").is_dir(), wrong_directory_msg

    with Path("Doc/sphinx-warnings.txt").open(encoding="UTF-8") as f:
        warnings = f.read().splitlines()

    cwd = str(Path.cwd()) + os.path.sep
    files_with_nits = {
        warning.removeprefix(cwd).split(":")[0]
        for warning in warnings
        if "Doc/" in warning
    }

    with Path("Doc/tools/.nitignore").open(encoding="UTF-8") as clean_files:
        files_with_expected_nits = {
            filename.strip()
            for filename in clean_files
            if filename.strip() and not filename.startswith("#")
        }

    if args.annotate_diff is not None:
        annotate_diff(warnings, *args.annotate_diff)

    if args.fail_if_regression:
        exit_code += fail_if_regression(
            warnings, files_with_expected_nits, files_with_nits
        )

    if args.fail_if_improved:
        exit_code += fail_if_improved(files_with_expected_nits, files_with_nits)

    return exit_code


if __name__ == "__main__":
    sys.exit(main())
gh-101100: Docs: Check Sphinx warnings and fail if improved (#106460) 2023-07-22 05:12:43 -03:00			`#!/usr/bin/env python3`
			`"""`
			`Check the output of running Sphinx in nit-picky mode (missing references).`
			`"""`
gh-101100: Only show GitHub check annotations on changed doc paragraphs (#108065) * Only show GitHub check annotations on changed doc paragraphs * Improve check-warnings script arg parsing following Hugo's suggestions * Factor filtering warnings by modified diffs into helper function * Build docs on unmerged branch so warning lines match & avoid deep clone --------- Co-authored-by: Hugo van Kemenade <hugovk@users.noreply.github.com> Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com> 2023-08-18 21:43:28 -03:00			`from __future__ import annotations`

gh-101100: Docs: Check Sphinx warnings and fail if improved (#106460) 2023-07-22 05:12:43 -03:00			`import argparse`
gh-101100: Only show GitHub check annotations on changed doc paragraphs (#108065) * Only show GitHub check annotations on changed doc paragraphs * Improve check-warnings script arg parsing following Hugo's suggestions * Factor filtering warnings by modified diffs into helper function * Build docs on unmerged branch so warning lines match & avoid deep clone --------- Co-authored-by: Hugo van Kemenade <hugovk@users.noreply.github.com> Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com> 2023-08-18 21:43:28 -03:00			`import itertools`
gh-101100: Docs: Check Sphinx warnings and fail if improved (#106460) 2023-07-22 05:12:43 -03:00			`import os`
			`import re`
gh-101100: Only show GitHub check annotations on changed doc paragraphs (#108065) * Only show GitHub check annotations on changed doc paragraphs * Improve check-warnings script arg parsing following Hugo's suggestions * Factor filtering warnings by modified diffs into helper function * Build docs on unmerged branch so warning lines match & avoid deep clone --------- Co-authored-by: Hugo van Kemenade <hugovk@users.noreply.github.com> Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com> 2023-08-18 21:43:28 -03:00			`import subprocess`
gh-101100: Docs: Check Sphinx warnings and fail if improved (#106460) 2023-07-22 05:12:43 -03:00			`import sys`
			`from pathlib import Path`
gh-101100: Only show GitHub check annotations on changed doc paragraphs (#108065) * Only show GitHub check annotations on changed doc paragraphs * Improve check-warnings script arg parsing following Hugo's suggestions * Factor filtering warnings by modified diffs into helper function * Build docs on unmerged branch so warning lines match & avoid deep clone --------- Co-authored-by: Hugo van Kemenade <hugovk@users.noreply.github.com> Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com> 2023-08-18 21:43:28 -03:00			`from typing import TextIO`
gh-101100: Docs: Check Sphinx warnings and fail if improved (#106460) 2023-07-22 05:12:43 -03:00
			`# Exclude these whether they're dirty or clean,`
			`# because they trigger a rebuild of dirty files.`
			`EXCLUDE_FILES = {`
			`"Doc/whatsnew/changelog.rst",`
			`}`

			`# Subdirectories of Doc/ to exclude.`
			`EXCLUDE_SUBDIRS = {`
			`".env",`
			`".venv",`
			`"env",`
			`"includes",`
			`"venv",`
			`}`

gh-101100: Only show GitHub check annotations on changed doc paragraphs (#108065) * Only show GitHub check annotations on changed doc paragraphs * Improve check-warnings script arg parsing following Hugo's suggestions * Factor filtering warnings by modified diffs into helper function * Build docs on unmerged branch so warning lines match & avoid deep clone --------- Co-authored-by: Hugo van Kemenade <hugovk@users.noreply.github.com> Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com> 2023-08-18 21:43:28 -03:00			`# Regex pattern to match the parts of a Sphinx warning`
			`WARNING_PATTERN = re.compile(`
			`r"(?P<file>([A-Za-z]:[\\/])?[^:]+):(?P<line>\d+): WARNING: (?P<msg>.+)"`
			`)`

			`# Regex pattern to match the line numbers in a Git unified diff`
			`DIFF_PATTERN = re.compile(`
			`r"^@@ -(?P<linea>\d+)(?:,(?P<removed>\d+))? \+(?P<lineb>\d+)(?:,(?P<added>\d+))? @@",`
			`flags=re.MULTILINE,`
			`)`


			`def get_diff_files(ref_a: str, ref_b: str, filter_mode: str = "") -> set[Path]:`
			`"""List the files changed between two Git refs, filtered by change type."""`
			`added_files_result = subprocess.run(`
			`[`
			`"git",`
			`"diff",`
			`f"--diff-filter={filter_mode}",`
			`"--name-only",`
			`f"{ref_a}...{ref_b}",`
			`"--",`
			`],`
			`stdout=subprocess.PIPE,`
			`check=True,`
			`text=True,`
			`encoding="UTF-8",`
			`)`

			`added_files = added_files_result.stdout.strip().split("\n")`
			`return {Path(file.strip()) for file in added_files if file.strip()}`


			`def get_diff_lines(ref_a: str, ref_b: str, file: Path) -> list[int]:`
			`"""List the lines changed between two Git refs for a specific file."""`
			`diff_output = subprocess.run(`
			`[`
			`"git",`
			`"diff",`
			`"--unified=0",`
			`f"{ref_a}...{ref_b}",`
			`"--",`
			`str(file),`
			`],`
			`stdout=subprocess.PIPE,`
			`check=True,`
			`text=True,`
			`encoding="UTF-8",`
			`)`

			`# Scrape line offsets + lengths from diff and convert to line numbers`
			`line_matches = DIFF_PATTERN.finditer(diff_output.stdout)`
			`# Removed and added line counts are 1 if not printed`
			`line_match_values = [`
			`line_match.groupdict(default=1) for line_match in line_matches`
			`]`
			`line_ints = [`
			`(int(match_value["lineb"]), int(match_value["added"]))`
			`for match_value in line_match_values`
			`]`
			`line_ranges = [`
			`range(line_b, line_b + added) for line_b, added in line_ints`
			`]`
			`line_numbers = list(itertools.chain(*line_ranges))`

			`return line_numbers`


			`def get_para_line_numbers(file_obj: TextIO) -> list[list[int]]:`
			`"""Get the line numbers of text in a file object, grouped by paragraph."""`
			`paragraphs = []`
			`prev_line = None`
			`for lineno, line in enumerate(file_obj):`
			`lineno = lineno + 1`
			`if prev_line is None or (line.strip() and not prev_line.strip()):`
			`paragraph = [lineno - 1]`
			`paragraphs.append(paragraph)`
			`paragraph.append(lineno)`
			`prev_line = line`
			`return paragraphs`


			`def filter_and_parse_warnings(`
			`warnings: list[str], files: set[Path]`
			`) -> list[re.Match[str]]:`
			`"""Get the warnings matching passed files and parse them with regex."""`
			`filtered_warnings = [`
			`warning`
			`for warning in warnings`
			`if any(str(file) in warning for file in files)`
			`]`
			`warning_matches = [`
			`WARNING_PATTERN.fullmatch(warning.strip())`
			`for warning in filtered_warnings`
			`]`
			`non_null_matches = [warning for warning in warning_matches if warning]`
			`return non_null_matches`


			`def filter_warnings_by_diff(`
			`warnings: list[re.Match[str]], ref_a: str, ref_b: str, file: Path`
			`) -> list[re.Match[str]]:`
			`"""Filter the passed per-file warnings to just those on changed lines."""`
			`diff_lines = get_diff_lines(ref_a, ref_b, file)`
			`with file.open(encoding="UTF-8") as file_obj:`
			`paragraphs = get_para_line_numbers(file_obj)`
			`touched_paras = [`
			`para_lines`
			`for para_lines in paragraphs`
			`if set(diff_lines) & set(para_lines)`
			`]`
			`touched_para_lines = set(itertools.chain(*touched_paras))`
			`warnings_infile = [`
			`warning for warning in warnings if str(file) in warning["file"]`
			`]`
			`warnings_touched = [`
			`warning`
			`for warning in warnings_infile`
			`if int(warning["line"]) in touched_para_lines`
			`]`
			`return warnings_touched`

gh-101100: Docs: Check Sphinx warnings and fail if improved (#106460) 2023-07-22 05:12:43 -03:00
gh-101100: Only show GitHub check annotations on changed doc paragraphs (#108065) * Only show GitHub check annotations on changed doc paragraphs * Improve check-warnings script arg parsing following Hugo's suggestions * Factor filtering warnings by modified diffs into helper function * Build docs on unmerged branch so warning lines match & avoid deep clone --------- Co-authored-by: Hugo van Kemenade <hugovk@users.noreply.github.com> Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com> 2023-08-18 21:43:28 -03:00			`def process_touched_warnings(`
			`warnings: list[str], ref_a: str, ref_b: str`
			`) -> list[re.Match[str]]:`
			`"""Filter a list of Sphinx warnings to those affecting touched lines."""`
			`added_files, modified_files = tuple(`
			`get_diff_files(ref_a, ref_b, filter_mode=mode) for mode in ("A", "M")`
			`)`

			`warnings_added = filter_and_parse_warnings(warnings, added_files)`
			`warnings_modified = filter_and_parse_warnings(warnings, modified_files)`

			`modified_files_warned = {`
			`file`
			`for file in modified_files`
			`if any(str(file) in warning["file"] for warning in warnings_modified)`
			`}`
gh-101100: Docs: Check Sphinx warnings and fail if improved (#106460) 2023-07-22 05:12:43 -03:00
gh-101100: Only show GitHub check annotations on changed doc paragraphs (#108065) * Only show GitHub check annotations on changed doc paragraphs * Improve check-warnings script arg parsing following Hugo's suggestions * Factor filtering warnings by modified diffs into helper function * Build docs on unmerged branch so warning lines match & avoid deep clone --------- Co-authored-by: Hugo van Kemenade <hugovk@users.noreply.github.com> Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com> 2023-08-18 21:43:28 -03:00			`warnings_modified_touched = [`
			`filter_warnings_by_diff(warnings_modified, ref_a, ref_b, file)`
			`for file in modified_files_warned`
			`]`
			`warnings_touched = warnings_added + list(`
			`itertools.chain(*warnings_modified_touched)`
			`)`

			`return warnings_touched`


			`def annotate_diff(`
			`warnings: list[str], ref_a: str = "main", ref_b: str = "HEAD"`
			`) -> None:`
gh-101100: Docs: Check Sphinx warnings and fail if improved (#106460) 2023-07-22 05:12:43 -03:00			`"""`
gh-101100: Only show GitHub check annotations on changed doc paragraphs (#108065) * Only show GitHub check annotations on changed doc paragraphs * Improve check-warnings script arg parsing following Hugo's suggestions * Factor filtering warnings by modified diffs into helper function * Build docs on unmerged branch so warning lines match & avoid deep clone --------- Co-authored-by: Hugo van Kemenade <hugovk@users.noreply.github.com> Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com> 2023-08-18 21:43:28 -03:00			`Convert Sphinx warning messages to GitHub Actions for changed paragraphs.`
gh-101100: Docs: Check Sphinx warnings and fail if improved (#106460) 2023-07-22 05:12:43 -03:00
			`Converts lines like:`
			`.../Doc/library/cgi.rst:98: WARNING: reference target not found`
			`to:`
			`::warning file=.../Doc/library/cgi.rst,line=98::reference target not found`

gh-101100: Only show GitHub check annotations on changed doc paragraphs (#108065) * Only show GitHub check annotations on changed doc paragraphs * Improve check-warnings script arg parsing following Hugo's suggestions * Factor filtering warnings by modified diffs into helper function * Build docs on unmerged branch so warning lines match & avoid deep clone --------- Co-authored-by: Hugo van Kemenade <hugovk@users.noreply.github.com> Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com> 2023-08-18 21:43:28 -03:00			`See:`
gh-101100: Docs: Check Sphinx warnings and fail if improved (#106460) 2023-07-22 05:12:43 -03:00			`https://docs.github.com/en/actions/using-workflows/workflow-commands-for-github-actions#setting-a-warning-message`
			`"""`
gh-101100: Only show GitHub check annotations on changed doc paragraphs (#108065) * Only show GitHub check annotations on changed doc paragraphs * Improve check-warnings script arg parsing following Hugo's suggestions * Factor filtering warnings by modified diffs into helper function * Build docs on unmerged branch so warning lines match & avoid deep clone --------- Co-authored-by: Hugo van Kemenade <hugovk@users.noreply.github.com> Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com> 2023-08-18 21:43:28 -03:00			`warnings_touched = process_touched_warnings(warnings, ref_a, ref_b)`
			`print("Emitting doc warnings matching modified lines:")`
			`for warning in warnings_touched:`
			`print("::warning file={file},line={line}::{msg}".format_map(warning))`
			`print(warning[0])`
			`if not warnings_touched:`
			`print("None")`
gh-101100: Docs: Check Sphinx warnings and fail if improved (#106460) 2023-07-22 05:12:43 -03:00

			`def fail_if_regression(`
			`warnings: list[str], files_with_expected_nits: set[str], files_with_nits: set[str]`
			`) -> int:`
			`"""`
			`Ensure some files always pass Sphinx nit-picky mode (no missing references).`
			`These are files which are not in .nitignore.`
			`"""`
			`all_rst = {`
			`str(rst)`
			`for rst in Path("Doc/").rglob("*.rst")`
			`if rst.parts[1] not in EXCLUDE_SUBDIRS`
			`}`
			`should_be_clean = all_rst - files_with_expected_nits - EXCLUDE_FILES`
			`problem_files = sorted(should_be_clean & files_with_nits)`
			`if problem_files:`
			`print("\nError: must not contain warnings:\n")`
			`for filename in problem_files:`
			`print(filename)`
			`for warning in warnings:`
			`if filename in warning:`
gh-101100: Only show GitHub check annotations on changed doc paragraphs (#108065) * Only show GitHub check annotations on changed doc paragraphs * Improve check-warnings script arg parsing following Hugo's suggestions * Factor filtering warnings by modified diffs into helper function * Build docs on unmerged branch so warning lines match & avoid deep clone --------- Co-authored-by: Hugo van Kemenade <hugovk@users.noreply.github.com> Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com> 2023-08-18 21:43:28 -03:00			`if match := WARNING_PATTERN.fullmatch(warning):`
gh-101100: Docs: Check Sphinx warnings and fail if improved (#106460) 2023-07-22 05:12:43 -03:00			`print(" {line}: {msg}".format_map(match))`
			`return -1`
			`return 0`


			`def fail_if_improved(`
			`files_with_expected_nits: set[str], files_with_nits: set[str]`
			`) -> int:`
			`"""`
			`We may have fixed warnings in some files so that the files are now completely clean.`
			`Good news! Let's add them to .nitignore to prevent regression.`
			`"""`
			`files_with_no_nits = files_with_expected_nits - files_with_nits`
			`if files_with_no_nits:`
			`print("\nCongratulations! You improved:\n")`
			`for filename in sorted(files_with_no_nits):`
			`print(filename)`
			`print("\nPlease remove from Doc/tools/.nitignore\n")`
			`return -1`
			`return 0`


gh-101100: Only show GitHub check annotations on changed doc paragraphs (#108065) * Only show GitHub check annotations on changed doc paragraphs * Improve check-warnings script arg parsing following Hugo's suggestions * Factor filtering warnings by modified diffs into helper function * Build docs on unmerged branch so warning lines match & avoid deep clone --------- Co-authored-by: Hugo van Kemenade <hugovk@users.noreply.github.com> Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com> 2023-08-18 21:43:28 -03:00			`def main(argv: list[str] \| None = None) -> int:`
gh-101100: Docs: Check Sphinx warnings and fail if improved (#106460) 2023-07-22 05:12:43 -03:00			`parser = argparse.ArgumentParser()`
			`parser.add_argument(`
gh-101100: Only show GitHub check annotations on changed doc paragraphs (#108065) * Only show GitHub check annotations on changed doc paragraphs * Improve check-warnings script arg parsing following Hugo's suggestions * Factor filtering warnings by modified diffs into helper function * Build docs on unmerged branch so warning lines match & avoid deep clone --------- Co-authored-by: Hugo van Kemenade <hugovk@users.noreply.github.com> Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com> 2023-08-18 21:43:28 -03:00			`"--annotate-diff",`
			`nargs="*",`
			`metavar=("BASE_REF", "HEAD_REF"),`
			`help="Add GitHub Actions annotations on the diff for warnings on "`
			`"lines changed between the given refs (main and HEAD, by default)",`
gh-101100: Docs: Check Sphinx warnings and fail if improved (#106460) 2023-07-22 05:12:43 -03:00			`)`
			`parser.add_argument(`
			`"--fail-if-regression",`
			`action="store_true",`
			`help="Fail if known-good files have warnings",`
			`)`
			`parser.add_argument(`
			`"--fail-if-improved",`
			`action="store_true",`
			`help="Fail if new files with no nits are found",`
			`)`
gh-101100: Only show GitHub check annotations on changed doc paragraphs (#108065) * Only show GitHub check annotations on changed doc paragraphs * Improve check-warnings script arg parsing following Hugo's suggestions * Factor filtering warnings by modified diffs into helper function * Build docs on unmerged branch so warning lines match & avoid deep clone --------- Co-authored-by: Hugo van Kemenade <hugovk@users.noreply.github.com> Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com> 2023-08-18 21:43:28 -03:00
			`args = parser.parse_args(argv)`
			`if args.annotate_diff is not None and len(args.annotate_diff) > 2:`
			`parser.error(`
			`"--annotate-diff takes between 0 and 2 ref args, not "`
			`f"{len(args.annotate_diff)} {tuple(args.annotate_diff)}"`
			`)`
gh-101100: Docs: Check Sphinx warnings and fail if improved (#106460) 2023-07-22 05:12:43 -03:00			`exit_code = 0`

			`wrong_directory_msg = "Must run this script from the repo root"`
			`assert Path("Doc").exists() and Path("Doc").is_dir(), wrong_directory_msg`

gh-101100: Only show GitHub check annotations on changed doc paragraphs (#108065) * Only show GitHub check annotations on changed doc paragraphs * Improve check-warnings script arg parsing following Hugo's suggestions * Factor filtering warnings by modified diffs into helper function * Build docs on unmerged branch so warning lines match & avoid deep clone --------- Co-authored-by: Hugo van Kemenade <hugovk@users.noreply.github.com> Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com> 2023-08-18 21:43:28 -03:00			`with Path("Doc/sphinx-warnings.txt").open(encoding="UTF-8") as f:`
gh-101100: Docs: Check Sphinx warnings and fail if improved (#106460) 2023-07-22 05:12:43 -03:00			`warnings = f.read().splitlines()`

			`cwd = str(Path.cwd()) + os.path.sep`
			`files_with_nits = {`
			`warning.removeprefix(cwd).split(":")[0]`
			`for warning in warnings`
			`if "Doc/" in warning`
			`}`

gh-101100: Only show GitHub check annotations on changed doc paragraphs (#108065) * Only show GitHub check annotations on changed doc paragraphs * Improve check-warnings script arg parsing following Hugo's suggestions * Factor filtering warnings by modified diffs into helper function * Build docs on unmerged branch so warning lines match & avoid deep clone --------- Co-authored-by: Hugo van Kemenade <hugovk@users.noreply.github.com> Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com> 2023-08-18 21:43:28 -03:00			`with Path("Doc/tools/.nitignore").open(encoding="UTF-8") as clean_files:`
gh-101100: Docs: Check Sphinx warnings and fail if improved (#106460) 2023-07-22 05:12:43 -03:00			`files_with_expected_nits = {`
			`filename.strip()`
			`for filename in clean_files`
			`if filename.strip() and not filename.startswith("#")`
			`}`

gh-101100: Only show GitHub check annotations on changed doc paragraphs (#108065) * Only show GitHub check annotations on changed doc paragraphs * Improve check-warnings script arg parsing following Hugo's suggestions * Factor filtering warnings by modified diffs into helper function * Build docs on unmerged branch so warning lines match & avoid deep clone --------- Co-authored-by: Hugo van Kemenade <hugovk@users.noreply.github.com> Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com> 2023-08-18 21:43:28 -03:00			`if args.annotate_diff is not None:`
			`annotate_diff(warnings, *args.annotate_diff)`
gh-101100: Docs: Check Sphinx warnings and fail if improved (#106460) 2023-07-22 05:12:43 -03:00
			`if args.fail_if_regression:`
			`exit_code += fail_if_regression(`
			`warnings, files_with_expected_nits, files_with_nits`
			`)`

			`if args.fail_if_improved:`
			`exit_code += fail_if_improved(files_with_expected_nits, files_with_nits)`

			`return exit_code`


			`if __name__ == "__main__":`
			`sys.exit(main())`