gh-125588: Teach the python PEG generator the new f-string tokens (#125589)

Signed-off-by: Pablo Galindo <pablogsal@gmail.com>
This commit is contained in:
Pablo Galindo Salgado 2024-10-29 23:40:12 +00:00 committed by GitHub
parent 0e45b1fd0f
commit 9dfef4e5f4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 61 additions and 2 deletions

View File

@ -506,6 +506,14 @@ class TestPegen(unittest.TestCase):
val = eval(code) val = eval(code)
self.assertEqual(val, 3.0) self.assertEqual(val, 3.0)
def test_f_string_in_action(self) -> None:
grammar = """
start: n=NAME NEWLINE? $ { f"name -> {n.string}" }
"""
parser_class = make_parser(grammar)
node = parse_string("a", parser_class)
self.assertEqual(node.strip(), "name -> a")
def test_nullable(self) -> None: def test_nullable(self) -> None:
grammar_source = """ grammar_source = """
start: sign NUMBER start: sign NUMBER

View File

@ -0,0 +1,2 @@
The Python PEG generator can now use f-strings in the grammar actions. Patch
by Pablo Galindo

View File

@ -575,7 +575,7 @@ class GeneratedParser(Parser):
@memoize @memoize
def target_atom(self) -> Optional[str]: def target_atom(self) -> Optional[str]:
# target_atom: "{" ~ target_atoms? "}" | "[" ~ target_atoms? "]" | NAME "*" | NAME | NUMBER | STRING | "?" | ":" | !"}" !"]" OP # target_atom: "{" ~ target_atoms? "}" | "[" ~ target_atoms? "]" | NAME "*" | NAME | NUMBER | STRING | FSTRING_START | FSTRING_MIDDLE | FSTRING_END | "?" | ":" | !"}" !"]" OP
mark = self._mark() mark = self._mark()
cut = False cut = False
if ( if (
@ -625,6 +625,21 @@ class GeneratedParser(Parser):
): ):
return string . string return string . string
self._reset(mark) self._reset(mark)
if (
(fstring_start := self.fstring_start())
):
return fstring_start . string
self._reset(mark)
if (
(fstring_middle := self.fstring_middle())
):
return fstring_middle . string
self._reset(mark)
if (
(fstring_end := self.fstring_end())
):
return fstring_end . string
self._reset(mark)
if ( if (
(literal := self.expect("?")) (literal := self.expect("?"))
): ):

View File

@ -126,6 +126,9 @@ target_atom[str]:
| NAME { name.string } | NAME { name.string }
| NUMBER { number.string } | NUMBER { number.string }
| STRING { string.string } | STRING { string.string }
| FSTRING_START { fstring_start.string }
| FSTRING_MIDDLE { fstring_middle.string }
| FSTRING_END { fstring_end.string }
| "?" { "?" } | "?" { "?" }
| ":" { ":" } | ":" { ":" }
| !"}" !"]" OP { op.string } | !"}" !"]" OP { op.string }

View File

@ -205,6 +205,36 @@ class Parser:
return self._tokenizer.getnext() return self._tokenizer.getnext()
return None return None
@memoize
def fstring_start(self) -> Optional[tokenize.TokenInfo]:
FSTRING_START = getattr(token, "FSTRING_START")
if not FSTRING_START:
return None
tok = self._tokenizer.peek()
if tok.type == FSTRING_START:
return self._tokenizer.getnext()
return None
@memoize
def fstring_middle(self) -> Optional[tokenize.TokenInfo]:
FSTRING_MIDDLE = getattr(token, "FSTRING_MIDDLE")
if not FSTRING_MIDDLE:
return None
tok = self._tokenizer.peek()
if tok.type == FSTRING_MIDDLE:
return self._tokenizer.getnext()
return None
@memoize
def fstring_end(self) -> Optional[tokenize.TokenInfo]:
FSTRING_END = getattr(token, "FSTRING_END")
if not FSTRING_END:
return None
tok = self._tokenizer.peek()
if tok.type == FSTRING_END:
return self._tokenizer.getnext()
return None
@memoize @memoize
def op(self) -> Optional[tokenize.TokenInfo]: def op(self) -> Optional[tokenize.TokenInfo]:
tok = self._tokenizer.peek() tok = self._tokenizer.peek()

View File

@ -99,7 +99,8 @@ class PythonCallMakerVisitor(GrammarVisitor):
name = node.value name = node.value
if name == "SOFT_KEYWORD": if name == "SOFT_KEYWORD":
return "soft_keyword", "self.soft_keyword()" return "soft_keyword", "self.soft_keyword()"
if name in ("NAME", "NUMBER", "STRING", "OP", "TYPE_COMMENT"): if name in ("NAME", "NUMBER", "STRING", "OP", "TYPE_COMMENT",
"FSTRING_END", "FSTRING_MIDDLE", "FSTRING_START"):
name = name.lower() name = name.lower()
return name, f"self.{name}()" return name, f"self.{name}()"
if name in ("NEWLINE", "DEDENT", "INDENT", "ENDMARKER"): if name in ("NEWLINE", "DEDENT", "INDENT", "ENDMARKER"):