2023-07-12 19:48:36 -03:00
|
|
|
import dataclasses as dc
|
2014-02-01 02:03:12 -04:00
|
|
|
import re
|
|
|
|
import sys
|
2023-05-16 14:18:28 -03:00
|
|
|
from typing import NoReturn
|
2014-02-01 02:03:12 -04:00
|
|
|
|
2023-05-15 05:49:28 -03:00
|
|
|
|
|
|
|
TokenAndCondition = tuple[str, str]
|
|
|
|
TokenStack = list[TokenAndCondition]
|
|
|
|
|
|
|
|
def negate(condition: str) -> str:
|
2014-02-01 02:03:12 -04:00
|
|
|
"""
|
|
|
|
Returns a CPP conditional that is the opposite of the conditional passed in.
|
|
|
|
"""
|
|
|
|
if condition.startswith('!'):
|
|
|
|
return condition[1:]
|
|
|
|
return "!" + condition
|
|
|
|
|
2023-07-12 19:48:36 -03:00
|
|
|
|
|
|
|
is_a_simple_defined = re.compile(r'^defined\s*\(\s*[A-Za-z0-9_]+\s*\)$').match
|
|
|
|
|
|
|
|
|
|
|
|
@dc.dataclass(repr=False)
|
2014-02-01 02:03:12 -04:00
|
|
|
class Monitor:
|
|
|
|
"""
|
|
|
|
A simple C preprocessor that scans C source and computes, line by line,
|
|
|
|
what the current C preprocessor #if state is.
|
|
|
|
|
|
|
|
Doesn't handle everything--for example, if you have /* inside a C string,
|
|
|
|
without a matching */ (also inside a C string), or with a */ inside a C
|
|
|
|
string but on another line and with preprocessor macros in between...
|
|
|
|
the parser will get lost.
|
|
|
|
|
|
|
|
Anyway this implementation seems to work well enough for the CPython sources.
|
|
|
|
"""
|
2023-07-12 19:48:36 -03:00
|
|
|
filename: str | None = None
|
|
|
|
_: dc.KW_ONLY
|
|
|
|
verbose: bool = False
|
2014-02-01 02:03:12 -04:00
|
|
|
|
2023-07-12 19:48:36 -03:00
|
|
|
def __post_init__(self) -> None:
|
2023-05-15 05:49:28 -03:00
|
|
|
self.stack: TokenStack = []
|
2014-02-01 02:03:12 -04:00
|
|
|
self.in_comment = False
|
2023-05-15 05:49:28 -03:00
|
|
|
self.continuation: str | None = None
|
2014-02-01 02:03:12 -04:00
|
|
|
self.line_number = 0
|
|
|
|
|
2023-05-15 05:49:28 -03:00
|
|
|
def __repr__(self) -> str:
|
2023-08-05 17:58:38 -03:00
|
|
|
parts = (
|
|
|
|
str(id(self)),
|
|
|
|
f"line={self.line_number}",
|
|
|
|
f"condition={self.condition()!r}"
|
2023-07-12 19:48:36 -03:00
|
|
|
)
|
2023-08-05 17:58:38 -03:00
|
|
|
return f"<clinic.Monitor {' '.join(parts)}>"
|
2014-02-01 02:03:12 -04:00
|
|
|
|
2023-05-15 05:49:28 -03:00
|
|
|
def status(self) -> str:
|
2014-02-01 02:03:12 -04:00
|
|
|
return str(self.line_number).rjust(4) + ": " + self.condition()
|
|
|
|
|
2023-05-15 05:49:28 -03:00
|
|
|
def condition(self) -> str:
|
2014-02-01 02:03:12 -04:00
|
|
|
"""
|
|
|
|
Returns the current preprocessor state, as a single #if condition.
|
|
|
|
"""
|
|
|
|
return " && ".join(condition for token, condition in self.stack)
|
|
|
|
|
2023-05-16 14:18:28 -03:00
|
|
|
def fail(self, *a: object) -> NoReturn:
|
2014-02-01 02:03:12 -04:00
|
|
|
if self.filename:
|
|
|
|
filename = " " + self.filename
|
|
|
|
else:
|
|
|
|
filename = ''
|
|
|
|
print("Error at" + filename, "line", self.line_number, ":")
|
|
|
|
print(" ", ' '.join(str(x) for x in a))
|
|
|
|
sys.exit(-1)
|
|
|
|
|
2023-05-15 05:49:28 -03:00
|
|
|
def writeline(self, line: str) -> None:
|
2014-02-01 02:03:12 -04:00
|
|
|
self.line_number += 1
|
|
|
|
line = line.strip()
|
|
|
|
|
2023-05-15 05:49:28 -03:00
|
|
|
def pop_stack() -> TokenAndCondition:
|
2014-02-01 02:03:12 -04:00
|
|
|
if not self.stack:
|
|
|
|
self.fail("#" + token + " without matching #if / #ifdef / #ifndef!")
|
|
|
|
return self.stack.pop()
|
|
|
|
|
|
|
|
if self.continuation:
|
|
|
|
line = self.continuation + line
|
|
|
|
self.continuation = None
|
|
|
|
|
|
|
|
if not line:
|
|
|
|
return
|
|
|
|
|
|
|
|
if line.endswith('\\'):
|
|
|
|
self.continuation = line[:-1].rstrip() + " "
|
|
|
|
return
|
|
|
|
|
|
|
|
# we have to ignore preprocessor commands inside comments
|
|
|
|
#
|
|
|
|
# we also have to handle this:
|
|
|
|
# /* start
|
|
|
|
# ...
|
|
|
|
# */ /* <-- tricky!
|
|
|
|
# ...
|
|
|
|
# */
|
|
|
|
# and this:
|
|
|
|
# /* start
|
|
|
|
# ...
|
|
|
|
# */ /* also tricky! */
|
|
|
|
if self.in_comment:
|
|
|
|
if '*/' in line:
|
|
|
|
# snip out the comment and continue
|
|
|
|
#
|
|
|
|
# GCC allows
|
|
|
|
# /* comment
|
|
|
|
# */ #include <stdio.h>
|
|
|
|
# maybe other compilers too?
|
|
|
|
_, _, line = line.partition('*/')
|
|
|
|
self.in_comment = False
|
|
|
|
|
|
|
|
while True:
|
|
|
|
if '/*' in line:
|
|
|
|
if self.in_comment:
|
|
|
|
self.fail("Nested block comment!")
|
|
|
|
|
|
|
|
before, _, remainder = line.partition('/*')
|
|
|
|
comment, comment_ends, after = remainder.partition('*/')
|
|
|
|
if comment_ends:
|
|
|
|
# snip out the comment
|
|
|
|
line = before.rstrip() + ' ' + after.lstrip()
|
|
|
|
continue
|
|
|
|
# comment continues to eol
|
|
|
|
self.in_comment = True
|
|
|
|
line = before.rstrip()
|
|
|
|
break
|
|
|
|
|
|
|
|
# we actually have some // comments
|
|
|
|
# (but block comments take precedence)
|
|
|
|
before, line_comment, comment = line.partition('//')
|
|
|
|
if line_comment:
|
|
|
|
line = before.rstrip()
|
|
|
|
|
|
|
|
if not line.startswith('#'):
|
|
|
|
return
|
|
|
|
|
|
|
|
line = line[1:].lstrip()
|
|
|
|
assert line
|
|
|
|
|
|
|
|
fields = line.split()
|
|
|
|
token = fields[0].lower()
|
|
|
|
condition = ' '.join(fields[1:]).strip()
|
|
|
|
|
2020-04-18 11:52:48 -03:00
|
|
|
if token in {'if', 'ifdef', 'ifndef', 'elif'}:
|
2014-02-01 02:03:12 -04:00
|
|
|
if not condition:
|
|
|
|
self.fail("Invalid format for #" + token + " line: no argument!")
|
2020-04-18 11:52:48 -03:00
|
|
|
if token in {'if', 'elif'}:
|
2023-07-12 19:48:36 -03:00
|
|
|
if not is_a_simple_defined(condition):
|
2014-02-01 02:03:12 -04:00
|
|
|
condition = "(" + condition + ")"
|
2020-04-18 11:52:48 -03:00
|
|
|
if token == 'elif':
|
|
|
|
previous_token, previous_condition = pop_stack()
|
|
|
|
self.stack.append((previous_token, negate(previous_condition)))
|
2014-02-01 02:03:12 -04:00
|
|
|
else:
|
|
|
|
fields = condition.split()
|
|
|
|
if len(fields) != 1:
|
|
|
|
self.fail("Invalid format for #" + token + " line: should be exactly one argument!")
|
|
|
|
symbol = fields[0]
|
|
|
|
condition = 'defined(' + symbol + ')'
|
|
|
|
if token == 'ifndef':
|
|
|
|
condition = '!' + condition
|
2020-04-18 11:52:48 -03:00
|
|
|
token = 'if'
|
2014-02-01 02:03:12 -04:00
|
|
|
|
2020-04-18 11:52:48 -03:00
|
|
|
self.stack.append((token, condition))
|
2014-02-01 02:03:12 -04:00
|
|
|
|
2020-04-18 11:52:48 -03:00
|
|
|
elif token == 'else':
|
|
|
|
previous_token, previous_condition = pop_stack()
|
|
|
|
self.stack.append((previous_token, negate(previous_condition)))
|
2014-02-01 02:03:12 -04:00
|
|
|
|
|
|
|
elif token == 'endif':
|
2020-04-18 11:52:48 -03:00
|
|
|
while pop_stack()[0] != 'if':
|
|
|
|
pass
|
|
|
|
|
|
|
|
else:
|
|
|
|
return
|
|
|
|
|
2014-02-01 02:03:12 -04:00
|
|
|
if self.verbose:
|
|
|
|
print(self.status())
|
|
|
|
|
2023-08-06 16:40:55 -03:00
|
|
|
|
|
|
|
def _main(filenames: list[str] | None = None) -> None:
|
|
|
|
filenames = filenames or sys.argv[1:]
|
|
|
|
for filename in filenames:
|
2023-05-20 17:16:49 -03:00
|
|
|
with open(filename) as f:
|
2014-02-01 02:03:12 -04:00
|
|
|
cpp = Monitor(filename, verbose=True)
|
|
|
|
print()
|
|
|
|
print(filename)
|
2023-08-09 07:24:05 -03:00
|
|
|
for line in f:
|
2014-02-01 02:03:12 -04:00
|
|
|
cpp.writeline(line)
|
2023-08-06 16:40:55 -03:00
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
_main()
|