cpython/Tools/clinic/cpp.py

import dataclasses as dc
import re
import sys
from typing import NoReturn


TokenAndCondition = tuple[str, str]
TokenStack = list[TokenAndCondition]

def negate(condition: str) -> str:
    """
    Returns a CPP conditional that is the opposite of the conditional passed in.
    """
    if condition.startswith('!'):
        return condition[1:]
    return "!" + condition


is_a_simple_defined = re.compile(r'^defined\s*\(\s*[A-Za-z0-9_]+\s*\)$').match


@dc.dataclass(repr=False)
class Monitor:
    """
    A simple C preprocessor that scans C source and computes, line by line,
    what the current C preprocessor #if state is.

    Doesn't handle everything--for example, if you have /* inside a C string,
    without a matching */ (also inside a C string), or with a */ inside a C
    string but on another line and with preprocessor macros in between...
    the parser will get lost.

    Anyway this implementation seems to work well enough for the CPython sources.
    """
    filename: str | None = None
    _: dc.KW_ONLY
    verbose: bool = False

    def __post_init__(self) -> None:
        self.stack: TokenStack = []
        self.in_comment = False
        self.continuation: str | None = None
        self.line_number = 0

    def __repr__(self) -> str:
        parts = (
            str(id(self)),
            f"line={self.line_number}",
            f"condition={self.condition()!r}"
        )
        return f"<clinic.Monitor {' '.join(parts)}>"

    def status(self) -> str:
        return str(self.line_number).rjust(4) + ": " + self.condition()

    def condition(self) -> str:
        """
        Returns the current preprocessor state, as a single #if condition.
        """
        return " && ".join(condition for token, condition in self.stack)

    def fail(self, *a: object) -> NoReturn:
        if self.filename:
            filename = " " + self.filename
        else:
            filename = ''
        print("Error at" + filename, "line", self.line_number, ":")
        print("   ", ' '.join(str(x) for x in a))
        sys.exit(-1)

    def writeline(self, line: str) -> None:
        self.line_number += 1
        line = line.strip()

        def pop_stack() -> TokenAndCondition:
            if not self.stack:
                self.fail("#" + token + " without matching #if / #ifdef / #ifndef!")
            return self.stack.pop()

        if self.continuation:
            line = self.continuation + line
            self.continuation = None

        if not line:
            return

        if line.endswith('\\'):
            self.continuation = line[:-1].rstrip() + " "
            return

        # we have to ignore preprocessor commands inside comments
        #
        # we also have to handle this:
        #     /* start
        #     ...
        #     */   /*    <-- tricky!
        #     ...
        #     */
        # and this:
        #     /* start
        #     ...
        #     */   /* also tricky! */
        if self.in_comment:
            if '*/' in line:
                # snip out the comment and continue
                #
                # GCC allows
                #    /* comment
                #    */ #include <stdio.h>
                # maybe other compilers too?
                _, _, line = line.partition('*/')
                self.in_comment = False

        while True:
            if '/*' in line:
                if self.in_comment:
                    self.fail("Nested block comment!")

                before, _, remainder = line.partition('/*')
                comment, comment_ends, after = remainder.partition('*/')
                if comment_ends:
                    # snip out the comment
                    line = before.rstrip() + ' ' + after.lstrip()
                    continue
                # comment continues to eol
                self.in_comment = True
                line = before.rstrip()
            break

        # we actually have some // comments
        # (but block comments take precedence)
        before, line_comment, comment = line.partition('//')
        if line_comment:
            line = before.rstrip()

        if not line.startswith('#'):
            return

        line = line[1:].lstrip()
        assert line

        fields = line.split()
        token = fields[0].lower()
        condition = ' '.join(fields[1:]).strip()

        if token in {'if', 'ifdef', 'ifndef', 'elif'}:
            if not condition:
                self.fail("Invalid format for #" + token + " line: no argument!")
            if token in {'if', 'elif'}:
                if not is_a_simple_defined(condition):
                    condition = "(" + condition + ")"
                if token == 'elif':
                    previous_token, previous_condition = pop_stack()
                    self.stack.append((previous_token, negate(previous_condition)))
            else:
                fields = condition.split()
                if len(fields) != 1:
                    self.fail("Invalid format for #" + token + " line: should be exactly one argument!")
                symbol = fields[0]
                condition = 'defined(' + symbol + ')'
                if token == 'ifndef':
                    condition = '!' + condition
                token = 'if'

            self.stack.append((token, condition))

        elif token == 'else':
            previous_token, previous_condition = pop_stack()
            self.stack.append((previous_token, negate(previous_condition)))

        elif token == 'endif':
            while pop_stack()[0] != 'if':
                pass

        else:
            return

        if self.verbose:
            print(self.status())


def _main(filenames: list[str] | None = None) -> None:
    filenames = filenames or sys.argv[1:]
    for filename in filenames:
        with open(filename) as f:
            cpp = Monitor(filename, verbose=True)
            print()
            print(filename)
            for line in f:
                cpp.writeline(line)


if __name__ == '__main__':
    _main()
gh-104683: Argument clinic: modernise `cpp.Monitor` (#106698) 2023-07-12 19:48:36 -03:00			`import dataclasses as dc`
#Issue 20456: Several improvements and bugfixes for Argument Clinic, including correctly generating code for Clinic blocks inside C preprocessor conditional blocks. 2014-02-01 02:03:12 -04:00			`import re`
			`import sys`
gh-104050: Add more annotations to `Tools/clinic.py` (#104544) 2023-05-16 14:18:28 -03:00			`from typing import NoReturn`
#Issue 20456: Several improvements and bugfixes for Argument Clinic, including correctly generating code for Clinic blocks inside C preprocessor conditional blocks. 2014-02-01 02:03:12 -04:00
gh-104050: Run mypy on `clinic.py` in CI (#104421) * Add basic mypy workflow to CI * Make the type check pass --------- Co-authored-by: Erlend E. Aasland <erlend.aasland@protonmail.com> Co-authored-by: Nikita Sobolev <mail@sobolevn.me> Co-authored-by: Hugo van Kemenade <hugovk@users.noreply.github.com> 2023-05-15 05:49:28 -03:00
			`TokenAndCondition = tuple[str, str]`
			`TokenStack = list[TokenAndCondition]`

			`def negate(condition: str) -> str:`
#Issue 20456: Several improvements and bugfixes for Argument Clinic, including correctly generating code for Clinic blocks inside C preprocessor conditional blocks. 2014-02-01 02:03:12 -04:00			`"""`
			`Returns a CPP conditional that is the opposite of the conditional passed in.`
			`"""`
			`if condition.startswith('!'):`
			`return condition[1:]`
			`return "!" + condition`

gh-104683: Argument clinic: modernise `cpp.Monitor` (#106698) 2023-07-12 19:48:36 -03:00
			`is_a_simple_defined = re.compile(r'^defined\s\(\s[A-Za-z0-9_]+\s*\)$').match`


			`@dc.dataclass(repr=False)`
#Issue 20456: Several improvements and bugfixes for Argument Clinic, including correctly generating code for Clinic blocks inside C preprocessor conditional blocks. 2014-02-01 02:03:12 -04:00			`class Monitor:`
			`"""`
			`A simple C preprocessor that scans C source and computes, line by line,`
			`what the current C preprocessor #if state is.`

			`Doesn't handle everything--for example, if you have /* inside a C string,`
			`without a matching / (also inside a C string), or with a / inside a C`
			`string but on another line and with preprocessor macros in between...`
			`the parser will get lost.`

			`Anyway this implementation seems to work well enough for the CPython sources.`
			`"""`
gh-104683: Argument clinic: modernise `cpp.Monitor` (#106698) 2023-07-12 19:48:36 -03:00			`filename: str \| None = None`
			`_: dc.KW_ONLY`
			`verbose: bool = False`
#Issue 20456: Several improvements and bugfixes for Argument Clinic, including correctly generating code for Clinic blocks inside C preprocessor conditional blocks. 2014-02-01 02:03:12 -04:00
gh-104683: Argument clinic: modernise `cpp.Monitor` (#106698) 2023-07-12 19:48:36 -03:00			`def __post_init__(self) -> None:`
gh-104050: Run mypy on `clinic.py` in CI (#104421) * Add basic mypy workflow to CI * Make the type check pass --------- Co-authored-by: Erlend E. Aasland <erlend.aasland@protonmail.com> Co-authored-by: Nikita Sobolev <mail@sobolevn.me> Co-authored-by: Hugo van Kemenade <hugovk@users.noreply.github.com> 2023-05-15 05:49:28 -03:00			`self.stack: TokenStack = []`
#Issue 20456: Several improvements and bugfixes for Argument Clinic, including correctly generating code for Clinic blocks inside C preprocessor conditional blocks. 2014-02-01 02:03:12 -04:00			`self.in_comment = False`
gh-104050: Run mypy on `clinic.py` in CI (#104421) * Add basic mypy workflow to CI * Make the type check pass --------- Co-authored-by: Erlend E. Aasland <erlend.aasland@protonmail.com> Co-authored-by: Nikita Sobolev <mail@sobolevn.me> Co-authored-by: Hugo van Kemenade <hugovk@users.noreply.github.com> 2023-05-15 05:49:28 -03:00			`self.continuation: str \| None = None`
#Issue 20456: Several improvements and bugfixes for Argument Clinic, including correctly generating code for Clinic blocks inside C preprocessor conditional blocks. 2014-02-01 02:03:12 -04:00			`self.line_number = 0`

gh-104050: Run mypy on `clinic.py` in CI (#104421) * Add basic mypy workflow to CI * Make the type check pass --------- Co-authored-by: Erlend E. Aasland <erlend.aasland@protonmail.com> Co-authored-by: Nikita Sobolev <mail@sobolevn.me> Co-authored-by: Hugo van Kemenade <hugovk@users.noreply.github.com> 2023-05-15 05:49:28 -03:00			`def __repr__(self) -> str:`
gh-104683: Improve consistency and test coverage of argument-clinic `__repr__` functions (#107667) 2023-08-05 17:58:38 -03:00			`parts = (`
			`str(id(self)),`
			`f"line={self.line_number}",`
			`f"condition={self.condition()!r}"`
gh-104683: Argument clinic: modernise `cpp.Monitor` (#106698) 2023-07-12 19:48:36 -03:00			`)`
gh-104683: Improve consistency and test coverage of argument-clinic `__repr__` functions (#107667) 2023-08-05 17:58:38 -03:00			`return f"<clinic.Monitor {' '.join(parts)}>"`
#Issue 20456: Several improvements and bugfixes for Argument Clinic, including correctly generating code for Clinic blocks inside C preprocessor conditional blocks. 2014-02-01 02:03:12 -04:00
gh-104050: Run mypy on `clinic.py` in CI (#104421) * Add basic mypy workflow to CI * Make the type check pass --------- Co-authored-by: Erlend E. Aasland <erlend.aasland@protonmail.com> Co-authored-by: Nikita Sobolev <mail@sobolevn.me> Co-authored-by: Hugo van Kemenade <hugovk@users.noreply.github.com> 2023-05-15 05:49:28 -03:00			`def status(self) -> str:`
#Issue 20456: Several improvements and bugfixes for Argument Clinic, including correctly generating code for Clinic blocks inside C preprocessor conditional blocks. 2014-02-01 02:03:12 -04:00			`return str(self.line_number).rjust(4) + ": " + self.condition()`

gh-104050: Run mypy on `clinic.py` in CI (#104421) * Add basic mypy workflow to CI * Make the type check pass --------- Co-authored-by: Erlend E. Aasland <erlend.aasland@protonmail.com> Co-authored-by: Nikita Sobolev <mail@sobolevn.me> Co-authored-by: Hugo van Kemenade <hugovk@users.noreply.github.com> 2023-05-15 05:49:28 -03:00			`def condition(self) -> str:`
#Issue 20456: Several improvements and bugfixes for Argument Clinic, including correctly generating code for Clinic blocks inside C preprocessor conditional blocks. 2014-02-01 02:03:12 -04:00			`"""`
			`Returns the current preprocessor state, as a single #if condition.`
			`"""`
			`return " && ".join(condition for token, condition in self.stack)`

gh-104050: Add more annotations to `Tools/clinic.py` (#104544) 2023-05-16 14:18:28 -03:00			`def fail(self, *a: object) -> NoReturn:`
#Issue 20456: Several improvements and bugfixes for Argument Clinic, including correctly generating code for Clinic blocks inside C preprocessor conditional blocks. 2014-02-01 02:03:12 -04:00			`if self.filename:`
			`filename = " " + self.filename`
			`else:`
			`filename = ''`
			`print("Error at" + filename, "line", self.line_number, ":")`
			`print(" ", ' '.join(str(x) for x in a))`
			`sys.exit(-1)`

gh-104050: Run mypy on `clinic.py` in CI (#104421) * Add basic mypy workflow to CI * Make the type check pass --------- Co-authored-by: Erlend E. Aasland <erlend.aasland@protonmail.com> Co-authored-by: Nikita Sobolev <mail@sobolevn.me> Co-authored-by: Hugo van Kemenade <hugovk@users.noreply.github.com> 2023-05-15 05:49:28 -03:00			`def writeline(self, line: str) -> None:`
#Issue 20456: Several improvements and bugfixes for Argument Clinic, including correctly generating code for Clinic blocks inside C preprocessor conditional blocks. 2014-02-01 02:03:12 -04:00			`self.line_number += 1`
			`line = line.strip()`

gh-104050: Run mypy on `clinic.py` in CI (#104421) * Add basic mypy workflow to CI * Make the type check pass --------- Co-authored-by: Erlend E. Aasland <erlend.aasland@protonmail.com> Co-authored-by: Nikita Sobolev <mail@sobolevn.me> Co-authored-by: Hugo van Kemenade <hugovk@users.noreply.github.com> 2023-05-15 05:49:28 -03:00			`def pop_stack() -> TokenAndCondition:`
#Issue 20456: Several improvements and bugfixes for Argument Clinic, including correctly generating code for Clinic blocks inside C preprocessor conditional blocks. 2014-02-01 02:03:12 -04:00			`if not self.stack:`
			`self.fail("#" + token + " without matching #if / #ifdef / #ifndef!")`
			`return self.stack.pop()`

			`if self.continuation:`
			`line = self.continuation + line`
			`self.continuation = None`

			`if not line:`
			`return`

			`if line.endswith('\\'):`
			`self.continuation = line[:-1].rstrip() + " "`
			`return`

			`# we have to ignore preprocessor commands inside comments`
			`#`
			`# we also have to handle this:`
			`# /* start`
			`# ...`
			`# / / <-- tricky!`
			`# ...`
			`# */`
			`# and this:`
			`# /* start`
			`# ...`
			`# / / also tricky! */`
			`if self.in_comment:`
			`if '*/' in line:`
			`# snip out the comment and continue`
			`#`
			`# GCC allows`
			`# /* comment`
			`# */ #include <stdio.h>`
			`# maybe other compilers too?`
			`_, _, line = line.partition('*/')`
			`self.in_comment = False`

			`while True:`
			`if '/*' in line:`
			`if self.in_comment:`
			`self.fail("Nested block comment!")`

			`before, _, remainder = line.partition('/*')`
			`comment, comment_ends, after = remainder.partition('*/')`
			`if comment_ends:`
			`# snip out the comment`
			`line = before.rstrip() + ' ' + after.lstrip()`
			`continue`
			`# comment continues to eol`
			`self.in_comment = True`
			`line = before.rstrip()`
			`break`

			`# we actually have some // comments`
			`# (but block comments take precedence)`
			`before, line_comment, comment = line.partition('//')`
			`if line_comment:`
			`line = before.rstrip()`

			`if not line.startswith('#'):`
			`return`

			`line = line[1:].lstrip()`
			`assert line`

			`fields = line.split()`
			`token = fields[0].lower()`
			`condition = ' '.join(fields[1:]).strip()`

bpo-40179: Fix translation of #elif in Argument Clinic (GH-19364) Co-authored-by: Ammar Askar <ammar@ammaraskar.com> 2020-04-18 11:52:48 -03:00			`if token in {'if', 'ifdef', 'ifndef', 'elif'}:`
#Issue 20456: Several improvements and bugfixes for Argument Clinic, including correctly generating code for Clinic blocks inside C preprocessor conditional blocks. 2014-02-01 02:03:12 -04:00			`if not condition:`
			`self.fail("Invalid format for #" + token + " line: no argument!")`
bpo-40179: Fix translation of #elif in Argument Clinic (GH-19364) Co-authored-by: Ammar Askar <ammar@ammaraskar.com> 2020-04-18 11:52:48 -03:00			`if token in {'if', 'elif'}:`
gh-104683: Argument clinic: modernise `cpp.Monitor` (#106698) 2023-07-12 19:48:36 -03:00			`if not is_a_simple_defined(condition):`
#Issue 20456: Several improvements and bugfixes for Argument Clinic, including correctly generating code for Clinic blocks inside C preprocessor conditional blocks. 2014-02-01 02:03:12 -04:00			`condition = "(" + condition + ")"`
bpo-40179: Fix translation of #elif in Argument Clinic (GH-19364) Co-authored-by: Ammar Askar <ammar@ammaraskar.com> 2020-04-18 11:52:48 -03:00			`if token == 'elif':`
			`previous_token, previous_condition = pop_stack()`
			`self.stack.append((previous_token, negate(previous_condition)))`
#Issue 20456: Several improvements and bugfixes for Argument Clinic, including correctly generating code for Clinic blocks inside C preprocessor conditional blocks. 2014-02-01 02:03:12 -04:00			`else:`
			`fields = condition.split()`
			`if len(fields) != 1:`
			`self.fail("Invalid format for #" + token + " line: should be exactly one argument!")`
			`symbol = fields[0]`
			`condition = 'defined(' + symbol + ')'`
			`if token == 'ifndef':`
			`condition = '!' + condition`
bpo-40179: Fix translation of #elif in Argument Clinic (GH-19364) Co-authored-by: Ammar Askar <ammar@ammaraskar.com> 2020-04-18 11:52:48 -03:00			`token = 'if'`
#Issue 20456: Several improvements and bugfixes for Argument Clinic, including correctly generating code for Clinic blocks inside C preprocessor conditional blocks. 2014-02-01 02:03:12 -04:00
bpo-40179: Fix translation of #elif in Argument Clinic (GH-19364) Co-authored-by: Ammar Askar <ammar@ammaraskar.com> 2020-04-18 11:52:48 -03:00			`self.stack.append((token, condition))`
#Issue 20456: Several improvements and bugfixes for Argument Clinic, including correctly generating code for Clinic blocks inside C preprocessor conditional blocks. 2014-02-01 02:03:12 -04:00
bpo-40179: Fix translation of #elif in Argument Clinic (GH-19364) Co-authored-by: Ammar Askar <ammar@ammaraskar.com> 2020-04-18 11:52:48 -03:00			`elif token == 'else':`
			`previous_token, previous_condition = pop_stack()`
			`self.stack.append((previous_token, negate(previous_condition)))`
#Issue 20456: Several improvements and bugfixes for Argument Clinic, including correctly generating code for Clinic blocks inside C preprocessor conditional blocks. 2014-02-01 02:03:12 -04:00
			`elif token == 'endif':`
bpo-40179: Fix translation of #elif in Argument Clinic (GH-19364) Co-authored-by: Ammar Askar <ammar@ammaraskar.com> 2020-04-18 11:52:48 -03:00			`while pop_stack()[0] != 'if':`
			`pass`

			`else:`
			`return`

#Issue 20456: Several improvements and bugfixes for Argument Clinic, including correctly generating code for Clinic blocks inside C preprocessor conditional blocks. 2014-02-01 02:03:12 -04:00			`if self.verbose:`
			`print(self.status())`

gh-106368: Improve coverage reports for argument clinic (#107693) 2023-08-06 16:40:55 -03:00
			`def _main(filenames: list[str] \| None = None) -> None:`
			`filenames = filenames or sys.argv[1:]`
			`for filename in filenames:`
gh-104683: Modernise Tools/clinic/ (#104684) - Make some string interpolations more readable using f-strings or explicit parametrisation - Remove unneeded open() mode specifiers Co-authored-by: Erlend E. Aasland <erlend.aasland@protonmail.com> 2023-05-20 17:16:49 -03:00			`with open(filename) as f:`
#Issue 20456: Several improvements and bugfixes for Argument Clinic, including correctly generating code for Clinic blocks inside C preprocessor conditional blocks. 2014-02-01 02:03:12 -04:00			`cpp = Monitor(filename, verbose=True)`
			`print()`
			`print(filename)`
gh-104683: Remove unused variables from `Tools/clinic` and tests for `Tools/clinic` (#107771) 2023-08-09 07:24:05 -03:00			`for line in f:`
#Issue 20456: Several improvements and bugfixes for Argument Clinic, including correctly generating code for Clinic blocks inside C preprocessor conditional blocks. 2014-02-01 02:03:12 -04:00			`cpp.writeline(line)`
gh-106368: Improve coverage reports for argument clinic (#107693) 2023-08-06 16:40:55 -03:00

			`if __name__ == '__main__':`
			`_main()`