2022-11-03 01:31:26 -03:00
|
|
|
"""Parser for bytecodes.inst."""
|
|
|
|
|
|
|
|
from dataclasses import dataclass, field
|
|
|
|
from typing import NamedTuple, Callable, TypeVar
|
|
|
|
|
|
|
|
import lexer as lx
|
|
|
|
from plexer import PLexer
|
|
|
|
|
|
|
|
|
|
|
|
P = TypeVar("P", bound="Parser")
|
|
|
|
N = TypeVar("N", bound="Node")
|
|
|
|
def contextual(func: Callable[[P], N|None]) -> Callable[[P], N|None]:
|
|
|
|
# Decorator to wrap grammar methods.
|
|
|
|
# Resets position if `func` returns None.
|
|
|
|
def contextual_wrapper(self: P) -> N|None:
|
|
|
|
begin = self.getpos()
|
|
|
|
res = func(self)
|
|
|
|
if res is None:
|
|
|
|
self.setpos(begin)
|
|
|
|
return
|
|
|
|
end = self.getpos()
|
|
|
|
res.context = Context(begin, end, self)
|
|
|
|
return res
|
|
|
|
return contextual_wrapper
|
|
|
|
|
|
|
|
|
|
|
|
class Context(NamedTuple):
|
|
|
|
begin: int
|
|
|
|
end: int
|
|
|
|
owner: PLexer
|
|
|
|
|
|
|
|
def __repr__(self):
|
|
|
|
return f"<{self.begin}-{self.end}>"
|
|
|
|
|
|
|
|
|
|
|
|
@dataclass
|
|
|
|
class Node:
|
|
|
|
context: Context|None = field(init=False, default=None)
|
|
|
|
|
|
|
|
@property
|
|
|
|
def text(self) -> str:
|
2022-11-06 13:40:47 -04:00
|
|
|
return self.to_text()
|
|
|
|
|
|
|
|
def to_text(self, dedent: int = 0) -> str:
|
2022-11-03 01:31:26 -03:00
|
|
|
context = self.context
|
|
|
|
if not context:
|
|
|
|
return ""
|
|
|
|
tokens = context.owner.tokens
|
|
|
|
begin = context.begin
|
|
|
|
end = context.end
|
2022-11-06 13:40:47 -04:00
|
|
|
return lx.to_text(tokens[begin:end], dedent)
|
2022-11-03 01:31:26 -03:00
|
|
|
|
|
|
|
|
|
|
|
@dataclass
|
|
|
|
class Block(Node):
|
|
|
|
tokens: list[lx.Token]
|
|
|
|
|
|
|
|
|
|
|
|
@dataclass
|
2022-11-08 12:22:56 -04:00
|
|
|
class InstHeader(Node):
|
2022-11-03 01:31:26 -03:00
|
|
|
name: str
|
2022-11-08 12:22:56 -04:00
|
|
|
inputs: list[str]
|
|
|
|
outputs: list[str]
|
|
|
|
|
|
|
|
|
|
|
|
@dataclass
|
|
|
|
class InstDef(Node):
|
|
|
|
header: InstHeader
|
|
|
|
block: Block
|
|
|
|
|
|
|
|
@property
|
|
|
|
def name(self):
|
|
|
|
return self.header.name
|
|
|
|
|
|
|
|
@property
|
|
|
|
def inputs(self):
|
|
|
|
return self.header.inputs
|
|
|
|
|
|
|
|
@property
|
|
|
|
def outputs(self):
|
|
|
|
return self.header.outputs
|
2022-11-03 01:31:26 -03:00
|
|
|
|
|
|
|
|
2022-11-06 13:40:47 -04:00
|
|
|
@dataclass
|
|
|
|
class Super(Node):
|
|
|
|
name: str
|
|
|
|
ops: list[str]
|
|
|
|
|
|
|
|
|
2022-11-03 01:31:26 -03:00
|
|
|
@dataclass
|
|
|
|
class Family(Node):
|
|
|
|
name: str
|
|
|
|
members: list[str]
|
|
|
|
|
|
|
|
|
|
|
|
class Parser(PLexer):
|
|
|
|
|
|
|
|
@contextual
|
|
|
|
def inst_def(self) -> InstDef | None:
|
|
|
|
if header := self.inst_header():
|
|
|
|
if block := self.block():
|
2022-11-08 12:22:56 -04:00
|
|
|
return InstDef(header, block)
|
2022-11-03 01:31:26 -03:00
|
|
|
raise self.make_syntax_error("Expected block")
|
|
|
|
return None
|
|
|
|
|
|
|
|
@contextual
|
2022-11-08 12:22:56 -04:00
|
|
|
def inst_header(self) -> InstHeader | None:
|
2022-11-03 01:31:26 -03:00
|
|
|
# inst(NAME) | inst(NAME, (inputs -- outputs))
|
|
|
|
# TODO: Error out when there is something unexpected.
|
2022-11-08 12:22:56 -04:00
|
|
|
# TODO: Make INST a keyword in the lexer.``
|
2022-11-03 01:31:26 -03:00
|
|
|
if (tkn := self.expect(lx.IDENTIFIER)) and tkn.text == "inst":
|
|
|
|
if (self.expect(lx.LPAREN)
|
|
|
|
and (tkn := self.expect(lx.IDENTIFIER))):
|
|
|
|
name = tkn.text
|
|
|
|
if self.expect(lx.COMMA):
|
|
|
|
inp, outp = self.stack_effect()
|
2022-11-08 12:22:56 -04:00
|
|
|
if self.expect(lx.RPAREN):
|
|
|
|
if ((tkn := self.peek())
|
|
|
|
and tkn.kind == lx.LBRACE):
|
|
|
|
self.check_overlaps(inp, outp)
|
|
|
|
return InstHeader(name, inp, outp)
|
2022-11-03 01:31:26 -03:00
|
|
|
elif self.expect(lx.RPAREN):
|
2022-11-08 12:22:56 -04:00
|
|
|
return InstHeader(name, [], [])
|
2022-11-03 01:31:26 -03:00
|
|
|
return None
|
|
|
|
|
2022-11-08 12:22:56 -04:00
|
|
|
def check_overlaps(self, inp: list[str], outp: list[str]):
|
|
|
|
for i, name in enumerate(inp):
|
|
|
|
try:
|
|
|
|
j = outp.index(name)
|
|
|
|
except ValueError:
|
|
|
|
continue
|
|
|
|
else:
|
|
|
|
if i != j:
|
|
|
|
raise self.make_syntax_error(
|
|
|
|
f"Input {name!r} at pos {i} repeated in output at different pos {j}")
|
|
|
|
|
|
|
|
def stack_effect(self) -> tuple[list[str], list[str]]:
|
2022-11-03 01:31:26 -03:00
|
|
|
# '(' [inputs] '--' [outputs] ')'
|
|
|
|
if self.expect(lx.LPAREN):
|
|
|
|
inp = self.inputs() or []
|
|
|
|
if self.expect(lx.MINUSMINUS):
|
|
|
|
outp = self.outputs() or []
|
|
|
|
if self.expect(lx.RPAREN):
|
|
|
|
return inp, outp
|
|
|
|
raise self.make_syntax_error("Expected stack effect")
|
|
|
|
|
2022-11-08 12:22:56 -04:00
|
|
|
def inputs(self) -> list[str] | None:
|
2022-11-03 01:31:26 -03:00
|
|
|
# input (, input)*
|
|
|
|
here = self.getpos()
|
|
|
|
if inp := self.input():
|
|
|
|
near = self.getpos()
|
|
|
|
if self.expect(lx.COMMA):
|
|
|
|
if rest := self.inputs():
|
|
|
|
return [inp] + rest
|
|
|
|
self.setpos(near)
|
|
|
|
return [inp]
|
|
|
|
self.setpos(here)
|
|
|
|
return None
|
|
|
|
|
2022-11-08 12:22:56 -04:00
|
|
|
def input(self) -> str | None:
|
2022-11-03 01:31:26 -03:00
|
|
|
# IDENTIFIER
|
|
|
|
if (tkn := self.expect(lx.IDENTIFIER)):
|
|
|
|
if self.expect(lx.LBRACKET):
|
|
|
|
if arg := self.expect(lx.IDENTIFIER):
|
|
|
|
if self.expect(lx.RBRACKET):
|
|
|
|
return f"{tkn.text}[{arg.text}]"
|
|
|
|
if self.expect(lx.TIMES):
|
|
|
|
if num := self.expect(lx.NUMBER):
|
|
|
|
if self.expect(lx.RBRACKET):
|
|
|
|
return f"{tkn.text}[{arg.text}*{num.text}]"
|
|
|
|
raise self.make_syntax_error("Expected argument in brackets", tkn)
|
|
|
|
|
|
|
|
return tkn.text
|
|
|
|
if self.expect(lx.CONDOP):
|
|
|
|
while self.expect(lx.CONDOP):
|
|
|
|
pass
|
|
|
|
return "??"
|
|
|
|
return None
|
|
|
|
|
2022-11-08 12:22:56 -04:00
|
|
|
def outputs(self) -> list[str] | None:
|
2022-11-03 01:31:26 -03:00
|
|
|
# output (, output)*
|
|
|
|
here = self.getpos()
|
|
|
|
if outp := self.output():
|
|
|
|
near = self.getpos()
|
|
|
|
if self.expect(lx.COMMA):
|
|
|
|
if rest := self.outputs():
|
|
|
|
return [outp] + rest
|
|
|
|
self.setpos(near)
|
|
|
|
return [outp]
|
|
|
|
self.setpos(here)
|
|
|
|
return None
|
|
|
|
|
2022-11-08 12:22:56 -04:00
|
|
|
def output(self) -> str | None:
|
2022-11-03 01:31:26 -03:00
|
|
|
return self.input() # TODO: They're not quite the same.
|
|
|
|
|
|
|
|
@contextual
|
2022-11-06 13:40:47 -04:00
|
|
|
def super_def(self) -> Super | None:
|
|
|
|
if (tkn := self.expect(lx.IDENTIFIER)) and tkn.text == "super":
|
|
|
|
if self.expect(lx.LPAREN):
|
|
|
|
if (tkn := self.expect(lx.IDENTIFIER)):
|
|
|
|
if self.expect(lx.RPAREN):
|
|
|
|
if self.expect(lx.EQUALS):
|
|
|
|
if ops := self.ops():
|
|
|
|
res = Super(tkn.text, ops)
|
|
|
|
return res
|
|
|
|
|
|
|
|
def ops(self) -> list[str] | None:
|
|
|
|
if tkn := self.expect(lx.IDENTIFIER):
|
|
|
|
ops = [tkn.text]
|
|
|
|
while self.expect(lx.PLUS):
|
|
|
|
if tkn := self.require(lx.IDENTIFIER):
|
|
|
|
ops.append(tkn.text)
|
|
|
|
self.require(lx.SEMI)
|
|
|
|
return ops
|
|
|
|
|
|
|
|
@contextual
|
|
|
|
def family_def(self) -> Family | None:
|
2022-11-03 01:31:26 -03:00
|
|
|
if (tkn := self.expect(lx.IDENTIFIER)) and tkn.text == "family":
|
|
|
|
if self.expect(lx.LPAREN):
|
|
|
|
if (tkn := self.expect(lx.IDENTIFIER)):
|
|
|
|
if self.expect(lx.RPAREN):
|
|
|
|
if self.expect(lx.EQUALS):
|
|
|
|
if members := self.members():
|
|
|
|
if self.expect(lx.SEMI):
|
2022-11-06 13:40:47 -04:00
|
|
|
return Family(tkn.text, members)
|
2022-11-03 01:31:26 -03:00
|
|
|
return None
|
|
|
|
|
2022-11-08 12:22:56 -04:00
|
|
|
def members(self) -> list[str] | None:
|
2022-11-03 01:31:26 -03:00
|
|
|
here = self.getpos()
|
|
|
|
if tkn := self.expect(lx.IDENTIFIER):
|
|
|
|
near = self.getpos()
|
|
|
|
if self.expect(lx.COMMA):
|
|
|
|
if rest := self.members():
|
|
|
|
return [tkn.text] + rest
|
|
|
|
self.setpos(near)
|
|
|
|
return [tkn.text]
|
|
|
|
self.setpos(here)
|
|
|
|
return None
|
|
|
|
|
|
|
|
@contextual
|
|
|
|
def block(self) -> Block:
|
|
|
|
tokens = self.c_blob()
|
|
|
|
return Block(tokens)
|
|
|
|
|
2022-11-08 12:22:56 -04:00
|
|
|
def c_blob(self) -> list[lx.Token]:
|
|
|
|
tokens: list[lx.Token] = []
|
2022-11-03 01:31:26 -03:00
|
|
|
level = 0
|
|
|
|
while tkn := self.next(raw=True):
|
|
|
|
if tkn.kind in (lx.LBRACE, lx.LPAREN, lx.LBRACKET):
|
|
|
|
level += 1
|
|
|
|
elif tkn.kind in (lx.RBRACE, lx.RPAREN, lx.RBRACKET):
|
|
|
|
level -= 1
|
|
|
|
if level <= 0:
|
|
|
|
break
|
|
|
|
tokens.append(tkn)
|
|
|
|
return tokens
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
import sys
|
|
|
|
if sys.argv[1:]:
|
|
|
|
filename = sys.argv[1]
|
|
|
|
if filename == "-c" and sys.argv[2:]:
|
|
|
|
src = sys.argv[2]
|
|
|
|
filename = None
|
|
|
|
else:
|
|
|
|
with open(filename) as f:
|
|
|
|
src = f.read()
|
|
|
|
srclines = src.splitlines()
|
|
|
|
begin = srclines.index("// BEGIN BYTECODES //")
|
|
|
|
end = srclines.index("// END BYTECODES //")
|
|
|
|
src = "\n".join(srclines[begin+1 : end])
|
|
|
|
else:
|
|
|
|
filename = None
|
|
|
|
src = "if (x) { x.foo; // comment\n}"
|
|
|
|
parser = Parser(src, filename)
|
|
|
|
x = parser.inst_def()
|
|
|
|
print(x)
|