From abd9cc52d94b8e2835322b62c29f09bb0e6fcfe9 Mon Sep 17 00:00:00 2001 From: SKO <41810398+uyw4687@users.noreply.github.com> Date: Wed, 16 Aug 2023 16:43:45 +0900 Subject: [PATCH] gh-100061: Proper fix of the bug in the matching of possessive quantifiers (GH-102612) Restore the global Input Stream pointer after trying to match a sub-pattern. Co-authored-by: Ma Lin --- Lib/re/_compiler.py | 7 ------- Lib/test/test_re.py | 14 +++++++++++--- .../2023-03-14-01-19-57.gh-issue-100061.CiXJYn.rst | 2 ++ Modules/_sre/sre_lib.h | 4 ++++ 4 files changed, 17 insertions(+), 10 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2023-03-14-01-19-57.gh-issue-100061.CiXJYn.rst diff --git a/Lib/re/_compiler.py b/Lib/re/_compiler.py index f5fd160ba00..d0a4c55caf6 100644 --- a/Lib/re/_compiler.py +++ b/Lib/re/_compiler.py @@ -100,13 +100,6 @@ def _compile(code, pattern, flags): emit(ANY_ALL) else: emit(ANY) - elif op is POSSESSIVE_REPEAT: - # gh-106052: Possessive quantifiers do not work when the - # subpattern contains backtracking, i.e. "(?:ab?c)*+". - # Implement it as equivalent greedy qualifier in atomic group. - p = [(MAX_REPEAT, av)] - p = [(ATOMIC_GROUP, p)] - _compile(code, p, flags) elif op in REPEATING_CODES: if _simple(av[2]): emit(REPEATING_CODES[op][2]) diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py index bf3698ac78a..042f97f57ec 100644 --- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -2342,7 +2342,17 @@ class ReTests(unittest.TestCase): self.assertTrue(re.fullmatch(r'(?s:(?>.*?\.).*)\Z', "a.txt")) # reproducer self.assertTrue(re.fullmatch(r'(?s:(?=(?P.*?\.))(?P=g0).*)\Z', "a.txt")) - def test_bug_gh106052(self): + def test_bug_gh100061(self): + # gh-100061 + self.assertEqual(re.match('(?>(?:.(?!D))+)', 'ABCDE').span(), (0, 2)) + self.assertEqual(re.match('(?:.(?!D))++', 'ABCDE').span(), (0, 2)) + self.assertEqual(re.match('(?>(?:.(?!D))*)', 'ABCDE').span(), (0, 2)) + self.assertEqual(re.match('(?:.(?!D))*+', 'ABCDE').span(), (0, 2)) + self.assertEqual(re.match('(?>(?:.(?!D))?)', 'CDE').span(), (0, 0)) + self.assertEqual(re.match('(?:.(?!D))?+', 'CDE').span(), (0, 0)) + self.assertEqual(re.match('(?>(?:.(?!D)){1,3})', 'ABCDE').span(), (0, 2)) + self.assertEqual(re.match('(?:.(?!D)){1,3}+', 'ABCDE').span(), (0, 2)) + # gh-106052 self.assertEqual(re.match("(?>(?:ab?c)+)", "aca").span(), (0, 2)) self.assertEqual(re.match("(?:ab?c)++", "aca").span(), (0, 2)) self.assertEqual(re.match("(?>(?:ab?c)*)", "aca").span(), (0, 2)) @@ -2451,7 +2461,6 @@ ATOMIC_GROUP 17: SUCCESS ''') - @unittest.expectedFailure # gh-106052 def test_possesive_repeat_one(self): self.assertEqual(get_debug_out(r'a?+'), '''\ POSSESSIVE_REPEAT 0 1 @@ -2464,7 +2473,6 @@ POSSESSIVE_REPEAT 0 1 12: SUCCESS ''') - @unittest.expectedFailure # gh-106052 def test_possesive_repeat(self): self.assertEqual(get_debug_out(r'(?:ab)?+'), '''\ POSSESSIVE_REPEAT 0 1 diff --git a/Misc/NEWS.d/next/Library/2023-03-14-01-19-57.gh-issue-100061.CiXJYn.rst b/Misc/NEWS.d/next/Library/2023-03-14-01-19-57.gh-issue-100061.CiXJYn.rst new file mode 100644 index 00000000000..dfed34f6ae9 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-03-14-01-19-57.gh-issue-100061.CiXJYn.rst @@ -0,0 +1,2 @@ +Fix a bug that causes wrong matches for regular expressions with possessive +qualifier. diff --git a/Modules/_sre/sre_lib.h b/Modules/_sre/sre_lib.h index c1a774f6909..ae80009fd63 100644 --- a/Modules/_sre/sre_lib.h +++ b/Modules/_sre/sre_lib.h @@ -1336,6 +1336,10 @@ dispatch: MARK_POP(ctx->lastmark); LASTMARK_RESTORE(); + /* Restore the global Input Stream pointer + since it can change after jumps. */ + state->ptr = ptr; + /* We have sufficient matches, so exit loop. */ break; }