296 lines
7.9 KiB
Python
296 lines
7.9 KiB
Python
|
import re
|
||
|
import shlex
|
||
|
import subprocess
|
||
|
|
||
|
from . import source
|
||
|
|
||
|
|
||
|
IDENTIFIER = r'(?:[a-zA-z]|_+[a-zA-Z0-9]\w*)'
|
||
|
|
||
|
TYPE_QUAL = r'(?:const|volatile)'
|
||
|
|
||
|
VAR_TYPE_SPEC = r'''(?:
|
||
|
void |
|
||
|
(?:
|
||
|
(?:(?:un)?signed\s+)?
|
||
|
(?:
|
||
|
char |
|
||
|
short |
|
||
|
int |
|
||
|
long |
|
||
|
long\s+int |
|
||
|
long\s+long
|
||
|
) |
|
||
|
) |
|
||
|
float |
|
||
|
double |
|
||
|
{IDENTIFIER} |
|
||
|
(?:struct|union)\s+{IDENTIFIER}
|
||
|
)'''
|
||
|
|
||
|
POINTER = rf'''(?:
|
||
|
(?:\s+const)?\s*[*]
|
||
|
)'''
|
||
|
|
||
|
#STRUCT = r'''(?:
|
||
|
# (?:struct|(struct\s+%s))\s*[{]
|
||
|
# [^}]*
|
||
|
# [}]
|
||
|
# )''' % (IDENTIFIER)
|
||
|
#UNION = r'''(?:
|
||
|
# (?:union|(union\s+%s))\s*[{]
|
||
|
# [^}]*
|
||
|
# [}]
|
||
|
# )''' % (IDENTIFIER)
|
||
|
#DECL_SPEC = rf'''(?:
|
||
|
# ({VAR_TYPE_SPEC}) |
|
||
|
# ({STRUCT}) |
|
||
|
# ({UNION})
|
||
|
# )'''
|
||
|
|
||
|
FUNC_START = rf'''(?:
|
||
|
(?:
|
||
|
(?:
|
||
|
extern |
|
||
|
static |
|
||
|
static\s+inline
|
||
|
)\s+
|
||
|
)?
|
||
|
#(?:const\s+)?
|
||
|
{VAR_TYPE_SPEC}
|
||
|
)'''
|
||
|
#GLOBAL_VAR_START = rf'''(?:
|
||
|
# (?:
|
||
|
# (?:
|
||
|
# extern |
|
||
|
# static
|
||
|
# )\s+
|
||
|
# )?
|
||
|
# (?:
|
||
|
# {TYPE_QUAL}
|
||
|
# (?:\s+{TYPE_QUAL})?
|
||
|
# )?\s+
|
||
|
# {VAR_TYPE_SPEC}
|
||
|
# )'''
|
||
|
GLOBAL_DECL_START_RE = re.compile(rf'''
|
||
|
^
|
||
|
(?:
|
||
|
({FUNC_START})
|
||
|
)
|
||
|
''', re.VERBOSE)
|
||
|
|
||
|
LOCAL_VAR_START = rf'''(?:
|
||
|
(?:
|
||
|
(?:
|
||
|
register |
|
||
|
static
|
||
|
)\s+
|
||
|
)?
|
||
|
(?:
|
||
|
(?:
|
||
|
{TYPE_QUAL}
|
||
|
(?:\s+{TYPE_QUAL})?
|
||
|
)\s+
|
||
|
)?
|
||
|
{VAR_TYPE_SPEC}
|
||
|
{POINTER}?
|
||
|
)'''
|
||
|
LOCAL_STMT_START_RE = re.compile(rf'''
|
||
|
^
|
||
|
(?:
|
||
|
({LOCAL_VAR_START})
|
||
|
)
|
||
|
''', re.VERBOSE)
|
||
|
|
||
|
|
||
|
def iter_global_declarations(lines):
|
||
|
"""Yield (decl, body) for each global declaration in the given lines.
|
||
|
|
||
|
For function definitions the header is reduced to one line and
|
||
|
the body is provided as-is. For other compound declarations (e.g.
|
||
|
struct) the entire declaration is reduced to one line and "body"
|
||
|
is None. Likewise for simple declarations (e.g. variables).
|
||
|
|
||
|
Declarations inside function bodies are ignored, though their text
|
||
|
is provided in the function body.
|
||
|
"""
|
||
|
# XXX Bail out upon bogus syntax.
|
||
|
lines = source.iter_clean_lines(lines)
|
||
|
for line in lines:
|
||
|
if not GLOBAL_DECL_START_RE.match(line):
|
||
|
continue
|
||
|
# We only need functions here, since we only need locals for now.
|
||
|
if line.endswith(';'):
|
||
|
continue
|
||
|
if line.endswith('{') and '(' not in line:
|
||
|
continue
|
||
|
|
||
|
# Capture the function.
|
||
|
# (assume no func is a one-liner)
|
||
|
decl = line
|
||
|
while '{' not in line: # assume no inline structs, etc.
|
||
|
try:
|
||
|
line = next(lines)
|
||
|
except StopIteration:
|
||
|
return
|
||
|
decl += ' ' + line
|
||
|
|
||
|
body, end = _extract_block(lines)
|
||
|
if end is None:
|
||
|
return
|
||
|
assert end == '}'
|
||
|
yield (f'{decl}\n{body}\n{end}', body)
|
||
|
|
||
|
|
||
|
def iter_local_statements(lines):
|
||
|
"""Yield (lines, blocks) for each statement in the given lines.
|
||
|
|
||
|
For simple statements, "blocks" is None and the statement is reduced
|
||
|
to a single line. For compound statements, "blocks" is a pair of
|
||
|
(header, body) for each block in the statement. The headers are
|
||
|
reduced to a single line each, but the bpdies are provided as-is.
|
||
|
"""
|
||
|
# XXX Bail out upon bogus syntax.
|
||
|
lines = source.iter_clean_lines(lines)
|
||
|
for line in lines:
|
||
|
if not LOCAL_STMT_START_RE.match(line):
|
||
|
continue
|
||
|
|
||
|
stmt = line
|
||
|
blocks = None
|
||
|
if not line.endswith(';'):
|
||
|
# XXX Support compound & multiline simple statements.
|
||
|
#blocks = []
|
||
|
continue
|
||
|
|
||
|
yield (stmt, blocks)
|
||
|
|
||
|
|
||
|
def _extract_block(lines):
|
||
|
end = None
|
||
|
depth = 1
|
||
|
body = []
|
||
|
for line in lines:
|
||
|
depth += line.count('{') - line.count('}')
|
||
|
if depth == 0:
|
||
|
end = line
|
||
|
break
|
||
|
body.append(line)
|
||
|
return '\n'.join(body), end
|
||
|
|
||
|
|
||
|
def parse_func(stmt, body):
|
||
|
"""Return (name, signature) for the given function definition."""
|
||
|
header, _, end = stmt.partition(body)
|
||
|
assert end.strip() == '}'
|
||
|
assert header.strip().endswith('{')
|
||
|
header, _, _= header.rpartition('{')
|
||
|
|
||
|
signature = ' '.join(header.strip().splitlines())
|
||
|
|
||
|
_, _, name = signature.split('(')[0].strip().rpartition(' ')
|
||
|
assert name
|
||
|
|
||
|
return name, signature
|
||
|
|
||
|
|
||
|
def parse_var(stmt):
|
||
|
"""Return (name, vartype) for the given variable declaration."""
|
||
|
stmt = stmt.rstrip(';')
|
||
|
m = LOCAL_STMT_START_RE.match(stmt)
|
||
|
assert m
|
||
|
vartype = m.group(0)
|
||
|
name = stmt[len(vartype):].partition('=')[0].strip()
|
||
|
|
||
|
if name.startswith('('):
|
||
|
name, _, after = name[1:].partition(')')
|
||
|
assert after
|
||
|
name = name.replace('*', '* ')
|
||
|
inside, _, name = name.strip().rpartition(' ')
|
||
|
vartype = f'{vartype} ({inside.strip()}){after}'
|
||
|
else:
|
||
|
name = name.replace('*', '* ')
|
||
|
before, _, name = name.rpartition(' ')
|
||
|
vartype = f'{vartype} {before}'
|
||
|
|
||
|
vartype = vartype.strip()
|
||
|
while ' ' in vartype:
|
||
|
vartype = vartype.replace(' ', ' ')
|
||
|
|
||
|
return name, vartype
|
||
|
|
||
|
|
||
|
def parse_compound(stmt, blocks):
|
||
|
"""Return (headers, bodies) for the given compound statement."""
|
||
|
# XXX Identify declarations inside compound statements
|
||
|
# (if/switch/for/while).
|
||
|
raise NotImplementedError
|
||
|
|
||
|
|
||
|
def iter_variables(filename, *,
|
||
|
_iter_source_lines=source.iter_lines,
|
||
|
_iter_global=iter_global_declarations,
|
||
|
_iter_local=iter_local_statements,
|
||
|
_parse_func=parse_func,
|
||
|
_parse_var=parse_var,
|
||
|
_parse_compound=parse_compound,
|
||
|
):
|
||
|
"""Yield (funcname, name, vartype) for every variable in the given file."""
|
||
|
lines = _iter_source_lines(filename)
|
||
|
for stmt, body in _iter_global(lines):
|
||
|
# At the file top-level we only have to worry about vars & funcs.
|
||
|
if not body:
|
||
|
name, vartype = _parse_var(stmt)
|
||
|
if name:
|
||
|
yield (None, name, vartype)
|
||
|
else:
|
||
|
funcname, _ = _parse_func(stmt, body)
|
||
|
localvars = _iter_locals(body,
|
||
|
_iter_statements=_iter_local,
|
||
|
_parse_var=_parse_var,
|
||
|
_parse_compound=_parse_compound,
|
||
|
)
|
||
|
for name, vartype in localvars:
|
||
|
yield (funcname, name, vartype)
|
||
|
|
||
|
|
||
|
def _iter_locals(lines, *,
|
||
|
_iter_statements=iter_local_statements,
|
||
|
_parse_var=parse_var,
|
||
|
_parse_compound=parse_compound,
|
||
|
):
|
||
|
compound = [lines]
|
||
|
while compound:
|
||
|
body = compound.pop(0)
|
||
|
bodylines = body.splitlines()
|
||
|
for stmt, blocks in _iter_statements(bodylines):
|
||
|
if not blocks:
|
||
|
name, vartype = _parse_var(stmt)
|
||
|
if name:
|
||
|
yield (name, vartype)
|
||
|
else:
|
||
|
headers, bodies = _parse_compound(stmt, blocks)
|
||
|
for header in headers:
|
||
|
for line in header:
|
||
|
name, vartype = _parse_var(line)
|
||
|
if name:
|
||
|
yield (name, vartype)
|
||
|
compound.extend(bodies)
|
||
|
|
||
|
|
||
|
def iter_all(dirnames):
|
||
|
"""Yield a Declaration for each one found.
|
||
|
|
||
|
If there are duplicates, due to preprocessor conditionals, then
|
||
|
they are checked to make sure they are the same.
|
||
|
"""
|
||
|
raise NotImplementedError
|
||
|
|
||
|
|
||
|
def iter_preprocessed(dirnames):
|
||
|
"""Yield a Declaration for each one found.
|
||
|
|
||
|
All source files are run through the preprocessor first.
|
||
|
"""
|
||
|
raise NotImplementedError
|