340 lines
9.2 KiB
Python
340 lines
9.2 KiB
Python
import re
|
|
import shlex
|
|
import subprocess
|
|
|
|
from ..common.info import UNKNOWN
|
|
|
|
from . import source
|
|
|
|
|
|
IDENTIFIER = r'(?:[a-zA-z]|_+[a-zA-Z0-9]\w*)'
|
|
|
|
TYPE_QUAL = r'(?:const|volatile)'
|
|
|
|
VAR_TYPE_SPEC = r'''(?:
|
|
void |
|
|
(?:
|
|
(?:(?:un)?signed\s+)?
|
|
(?:
|
|
char |
|
|
short |
|
|
int |
|
|
long |
|
|
long\s+int |
|
|
long\s+long
|
|
) |
|
|
) |
|
|
float |
|
|
double |
|
|
{IDENTIFIER} |
|
|
(?:struct|union)\s+{IDENTIFIER}
|
|
)'''
|
|
|
|
POINTER = rf'''(?:
|
|
(?:\s+const)?\s*[*]
|
|
)'''
|
|
|
|
#STRUCT = r'''(?:
|
|
# (?:struct|(struct\s+%s))\s*[{]
|
|
# [^}]*
|
|
# [}]
|
|
# )''' % (IDENTIFIER)
|
|
#UNION = r'''(?:
|
|
# (?:union|(union\s+%s))\s*[{]
|
|
# [^}]*
|
|
# [}]
|
|
# )''' % (IDENTIFIER)
|
|
#DECL_SPEC = rf'''(?:
|
|
# ({VAR_TYPE_SPEC}) |
|
|
# ({STRUCT}) |
|
|
# ({UNION})
|
|
# )'''
|
|
|
|
FUNC_START = rf'''(?:
|
|
(?:
|
|
(?:
|
|
extern |
|
|
static |
|
|
static\s+inline
|
|
)\s+
|
|
)?
|
|
#(?:const\s+)?
|
|
{VAR_TYPE_SPEC}
|
|
)'''
|
|
#GLOBAL_VAR_START = rf'''(?:
|
|
# (?:
|
|
# (?:
|
|
# extern |
|
|
# static
|
|
# )\s+
|
|
# )?
|
|
# (?:
|
|
# {TYPE_QUAL}
|
|
# (?:\s+{TYPE_QUAL})?
|
|
# )?\s+
|
|
# {VAR_TYPE_SPEC}
|
|
# )'''
|
|
GLOBAL_DECL_START_RE = re.compile(rf'''
|
|
^
|
|
(?:
|
|
({FUNC_START})
|
|
)
|
|
''', re.VERBOSE)
|
|
|
|
LOCAL_VAR_START = rf'''(?:
|
|
(?:
|
|
(?:
|
|
register |
|
|
static
|
|
)\s+
|
|
)?
|
|
(?:
|
|
(?:
|
|
{TYPE_QUAL}
|
|
(?:\s+{TYPE_QUAL})?
|
|
)\s+
|
|
)?
|
|
{VAR_TYPE_SPEC}
|
|
{POINTER}?
|
|
)'''
|
|
LOCAL_STMT_START_RE = re.compile(rf'''
|
|
^
|
|
(?:
|
|
({LOCAL_VAR_START})
|
|
)
|
|
''', re.VERBOSE)
|
|
|
|
|
|
def iter_global_declarations(lines):
|
|
"""Yield (decl, body) for each global declaration in the given lines.
|
|
|
|
For function definitions the header is reduced to one line and
|
|
the body is provided as-is. For other compound declarations (e.g.
|
|
struct) the entire declaration is reduced to one line and "body"
|
|
is None. Likewise for simple declarations (e.g. variables).
|
|
|
|
Declarations inside function bodies are ignored, though their text
|
|
is provided in the function body.
|
|
"""
|
|
# XXX Bail out upon bogus syntax.
|
|
lines = source.iter_clean_lines(lines)
|
|
for line in lines:
|
|
if not GLOBAL_DECL_START_RE.match(line):
|
|
continue
|
|
# We only need functions here, since we only need locals for now.
|
|
if line.endswith(';'):
|
|
continue
|
|
if line.endswith('{') and '(' not in line:
|
|
continue
|
|
|
|
# Capture the function.
|
|
# (assume no func is a one-liner)
|
|
decl = line
|
|
while '{' not in line: # assume no inline structs, etc.
|
|
try:
|
|
line = next(lines)
|
|
except StopIteration:
|
|
return
|
|
decl += ' ' + line
|
|
|
|
body, end = _extract_block(lines)
|
|
if end is None:
|
|
return
|
|
assert end == '}'
|
|
yield (f'{decl}\n{body}\n{end}', body)
|
|
|
|
|
|
def iter_local_statements(lines):
|
|
"""Yield (lines, blocks) for each statement in the given lines.
|
|
|
|
For simple statements, "blocks" is None and the statement is reduced
|
|
to a single line. For compound statements, "blocks" is a pair of
|
|
(header, body) for each block in the statement. The headers are
|
|
reduced to a single line each, but the bpdies are provided as-is.
|
|
"""
|
|
# XXX Bail out upon bogus syntax.
|
|
lines = source.iter_clean_lines(lines)
|
|
for line in lines:
|
|
if not LOCAL_STMT_START_RE.match(line):
|
|
continue
|
|
|
|
stmt = line
|
|
blocks = None
|
|
if not line.endswith(';'):
|
|
# XXX Support compound & multiline simple statements.
|
|
#blocks = []
|
|
continue
|
|
|
|
yield (stmt, blocks)
|
|
|
|
|
|
def _extract_block(lines):
|
|
end = None
|
|
depth = 1
|
|
body = []
|
|
for line in lines:
|
|
depth += line.count('{') - line.count('}')
|
|
if depth == 0:
|
|
end = line
|
|
break
|
|
body.append(line)
|
|
return '\n'.join(body), end
|
|
|
|
|
|
def parse_func(stmt, body):
|
|
"""Return (name, signature) for the given function definition."""
|
|
header, _, end = stmt.partition(body)
|
|
assert end.strip() == '}'
|
|
assert header.strip().endswith('{')
|
|
header, _, _= header.rpartition('{')
|
|
|
|
signature = ' '.join(header.strip().splitlines())
|
|
|
|
_, _, name = signature.split('(')[0].strip().rpartition(' ')
|
|
assert name
|
|
|
|
return name, signature
|
|
|
|
|
|
#TYPE_SPEC = rf'''(?:
|
|
# )'''
|
|
#VAR_DECLARATOR = rf'''(?:
|
|
# )'''
|
|
#VAR_DECL = rf'''(?:
|
|
# {TYPE_SPEC}+
|
|
# {VAR_DECLARATOR}
|
|
# \s*
|
|
# )'''
|
|
#VAR_DECLARATION = rf'''(?:
|
|
# {VAR_DECL}
|
|
# (?: = [^=] [^;]* )?
|
|
# ;
|
|
# )'''
|
|
#
|
|
#
|
|
#def parse_variable(decl, *, inFunc=False):
|
|
# """Return [(name, storage, vartype)] for the given variable declaration."""
|
|
# ...
|
|
|
|
|
|
def _parse_var(stmt):
|
|
"""Return (name, vartype) for the given variable declaration."""
|
|
stmt = stmt.rstrip(';')
|
|
m = LOCAL_STMT_START_RE.match(stmt)
|
|
assert m
|
|
vartype = m.group(0)
|
|
name = stmt[len(vartype):].partition('=')[0].strip()
|
|
|
|
if name.startswith('('):
|
|
name, _, after = name[1:].partition(')')
|
|
assert after
|
|
name = name.replace('*', '* ')
|
|
inside, _, name = name.strip().rpartition(' ')
|
|
vartype = f'{vartype} ({inside.strip()}){after}'
|
|
else:
|
|
name = name.replace('*', '* ')
|
|
before, _, name = name.rpartition(' ')
|
|
vartype = f'{vartype} {before}'
|
|
|
|
vartype = vartype.strip()
|
|
while ' ' in vartype:
|
|
vartype = vartype.replace(' ', ' ')
|
|
|
|
return name, vartype
|
|
|
|
|
|
def extract_storage(decl, *, infunc=None):
|
|
"""Return (storage, vartype) based on the given declaration.
|
|
|
|
The default storage is "implicit" (or "local" if infunc is True).
|
|
"""
|
|
if decl == UNKNOWN:
|
|
return decl
|
|
if decl.startswith('static '):
|
|
return 'static'
|
|
#return 'static', decl.partition(' ')[2].strip()
|
|
elif decl.startswith('extern '):
|
|
return 'extern'
|
|
#return 'extern', decl.partition(' ')[2].strip()
|
|
elif re.match('.*\b(static|extern)\b', decl):
|
|
raise NotImplementedError
|
|
elif infunc:
|
|
return 'local'
|
|
else:
|
|
return 'implicit'
|
|
|
|
|
|
def parse_compound(stmt, blocks):
|
|
"""Return (headers, bodies) for the given compound statement."""
|
|
# XXX Identify declarations inside compound statements
|
|
# (if/switch/for/while).
|
|
raise NotImplementedError
|
|
|
|
|
|
def iter_variables(filename, *,
|
|
preprocessed=False,
|
|
_iter_source_lines=source.iter_lines,
|
|
_iter_global=iter_global_declarations,
|
|
_iter_local=iter_local_statements,
|
|
_parse_func=parse_func,
|
|
_parse_var=_parse_var,
|
|
_parse_compound=parse_compound,
|
|
):
|
|
"""Yield (funcname, name, vartype) for every variable in the given file."""
|
|
if preprocessed:
|
|
raise NotImplementedError
|
|
lines = _iter_source_lines(filename)
|
|
for stmt, body in _iter_global(lines):
|
|
# At the file top-level we only have to worry about vars & funcs.
|
|
if not body:
|
|
name, vartype = _parse_var(stmt)
|
|
if name:
|
|
yield (None, name, vartype)
|
|
else:
|
|
funcname, _ = _parse_func(stmt, body)
|
|
localvars = _iter_locals(body,
|
|
_iter_statements=_iter_local,
|
|
_parse_var=_parse_var,
|
|
_parse_compound=_parse_compound,
|
|
)
|
|
for name, vartype in localvars:
|
|
yield (funcname, name, vartype)
|
|
|
|
|
|
def _iter_locals(lines, *,
|
|
_iter_statements=iter_local_statements,
|
|
_parse_var=_parse_var,
|
|
_parse_compound=parse_compound,
|
|
):
|
|
compound = [lines]
|
|
while compound:
|
|
body = compound.pop(0)
|
|
bodylines = body.splitlines()
|
|
for stmt, blocks in _iter_statements(bodylines):
|
|
if not blocks:
|
|
name, vartype = _parse_var(stmt)
|
|
if name:
|
|
yield (name, vartype)
|
|
else:
|
|
headers, bodies = _parse_compound(stmt, blocks)
|
|
for header in headers:
|
|
for line in header:
|
|
name, vartype = _parse_var(line)
|
|
if name:
|
|
yield (name, vartype)
|
|
compound.extend(bodies)
|
|
|
|
|
|
def iter_all(filename, *,
|
|
preprocessed=False,
|
|
):
|
|
"""Yield a Declaration for each one found.
|
|
|
|
If there are duplicates, due to preprocessor conditionals, then
|
|
they are checked to make sure they are the same.
|
|
"""
|
|
# XXX For the moment we cheat.
|
|
for funcname, name, decl in iter_variables(filename,
|
|
preprocessed=preprocessed):
|
|
yield 'variable', funcname, name, decl
|