from pygments.lexer import RegexLexer, bygroups, include from pygments.token import Comment, Keyword, Name, Operator, Punctuation, Text class PEGLexer(RegexLexer): """Pygments Lexer for PEG grammar (.gram) files This lexer strips the following elements from the grammar: - Meta-tags - Variable assignments - Actions - Lookaheads - Rule types - Rule options - Rules named `invalid_*` or `incorrect_*` """ name = "PEG" aliases = ["peg"] filenames = ["*.gram"] _name = r"([^\W\d]\w*)" _text_ws = r"(\s*)" tokens = { "ws": [(r"\n", Text), (r"\s+", Text), (r"#.*$", Comment.Singleline),], "lookaheads": [ # Forced tokens (r"(&&)(?=\w+\s?)", bygroups(None)), (r"(&&)(?='.+'\s?)", bygroups(None)), (r'(&&)(?=".+"\s?)', bygroups(None)), (r"(&&)(?=\(.+\)\s?)", bygroups(None)), (r"(?<=\|\s)(&\w+\s?)", bygroups(None)), (r"(?<=\|\s)(&'.+'\s?)", bygroups(None)), (r'(?<=\|\s)(&".+"\s?)', bygroups(None)), (r"(?<=\|\s)(&\(.+\)\s?)", bygroups(None)), ], "metas": [ (r"(@\w+ '''(.|\n)+?''')", bygroups(None)), (r"^(@.*)$", bygroups(None)), ], "actions": [ (r"{(.|\n)+?}", bygroups(None)), ], "strings": [ (r"'\w+?'", Keyword), (r'"\w+?"', Keyword), (r"'\W+?'", Text), (r'"\W+?"', Text), ], "variables": [ (_name + _text_ws + "(=)", bygroups(None, None, None),), (_name + _text_ws + r"(\[[\w\d_\*]+?\])" + _text_ws + "(=)", bygroups(None, None, None, None, None),), ], "invalids": [ (r"^(\s+\|\s+.*invalid_\w+.*\n)", bygroups(None)), (r"^(\s+\|\s+.*incorrect_\w+.*\n)", bygroups(None)), (r"^(#.*invalid syntax.*(?:.|\n)*)", bygroups(None),), ], "root": [ include("invalids"), include("ws"), include("lookaheads"), include("metas"), include("actions"), include("strings"), include("variables"), (r"\b(?!(NULL|EXTRA))([A-Z_]+)\b\s*(?!\()", Text,), ( r"^\s*" + _name + r"\s*" + r"(\[.*\])?" + r"\s*" + r"(\(.+\))?" + r"\s*(:)", bygroups(Name.Function, None, None, Punctuation), ), (_name, Name.Function), (r"[\||\.|\+|\*|\?]", Operator), (r"{|}|\(|\)|\[|\]", Punctuation), (r".", Text), ], }