diff --git a/Lib/sre_parse.py b/Lib/sre_parse.py index 3d38673c322..ab37fd3fe2f 100644 --- a/Lib/sre_parse.py +++ b/Lib/sre_parse.py @@ -395,7 +395,7 @@ def _escape(source, escape, state): len(escape)) state.checklookbehindgroup(group, source) return GROUPREF, group - raise source.error("invalid group reference", len(escape)) + raise source.error("invalid group reference %d" % group, len(escape) - 1) if len(escape) == 2: if c in ASCIILETTERS: raise source.error("bad escape %s" % escape, len(escape)) @@ -725,8 +725,8 @@ def _parse(source, state, verbose): raise source.error("bad group number", len(condname) + 1) if condgroup >= MAXGROUPS: - raise source.error("invalid group reference", - len(condname) + 1) + msg = "invalid group reference %d" % condgroup + raise source.error(msg, len(condname) + 1) state.checklookbehindgroup(condgroup, source) elif char in FLAGS or char == "-": # flags @@ -883,7 +883,9 @@ def parse_template(source, pattern): literals = [] literal = [] lappend = literal.append - def addgroup(index): + def addgroup(index, pos): + if index > pattern.groups: + raise s.error("invalid group reference %d" % index, pos) if literal: literals.append(''.join(literal)) del literal[:] @@ -916,9 +918,9 @@ def parse_template(source, pattern): raise s.error("bad character in group name %r" % name, len(name) + 1) from None if index >= MAXGROUPS: - raise s.error("invalid group reference", + raise s.error("invalid group reference %d" % index, len(name) + 1) - addgroup(index) + addgroup(index, len(name) + 1) elif c == "0": if s.next in OCTDIGITS: this += sget() @@ -939,7 +941,7 @@ def parse_template(source, pattern): 'range 0-0o377' % this, len(this)) lappend(chr(c)) if not isoctal: - addgroup(int(this[1:])) + addgroup(int(this[1:]), len(this) - 1) else: try: this = chr(ESCAPES[this][1]) @@ -966,5 +968,5 @@ def expand_template(template, match): for index, group in groups: literals[index] = g(group) or empty except IndexError: - raise error("invalid group reference") + raise error("invalid group reference %d" % index) return empty.join(literals) diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py index 4cdc59127a1..3bd6d7b461c 100644 --- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -5,7 +5,6 @@ import locale import re from re import Scanner import sre_compile -import sre_constants import sys import string import traceback @@ -186,18 +185,19 @@ class ReTests(unittest.TestCase): r'octal escape value \777 outside of ' r'range 0-0o377', 0) - self.checkTemplateError('x', r'\1', 'x', 'invalid group reference') - self.checkTemplateError('x', r'\8', 'x', 'invalid group reference') - self.checkTemplateError('x', r'\9', 'x', 'invalid group reference') - self.checkTemplateError('x', r'\11', 'x', 'invalid group reference') - self.checkTemplateError('x', r'\18', 'x', 'invalid group reference') - self.checkTemplateError('x', r'\1a', 'x', 'invalid group reference') - self.checkTemplateError('x', r'\90', 'x', 'invalid group reference') - self.checkTemplateError('x', r'\99', 'x', 'invalid group reference') - self.checkTemplateError('x', r'\118', 'x', 'invalid group reference') # r'\11' + '8' - self.checkTemplateError('x', r'\11a', 'x', 'invalid group reference') - self.checkTemplateError('x', r'\181', 'x', 'invalid group reference') # r'\18' + '1' - self.checkTemplateError('x', r'\800', 'x', 'invalid group reference') # r'\80' + '0' + self.checkTemplateError('x', r'\1', 'x', 'invalid group reference 1', 1) + self.checkTemplateError('x', r'\8', 'x', 'invalid group reference 8', 1) + self.checkTemplateError('x', r'\9', 'x', 'invalid group reference 9', 1) + self.checkTemplateError('x', r'\11', 'x', 'invalid group reference 11', 1) + self.checkTemplateError('x', r'\18', 'x', 'invalid group reference 18', 1) + self.checkTemplateError('x', r'\1a', 'x', 'invalid group reference 1', 1) + self.checkTemplateError('x', r'\90', 'x', 'invalid group reference 90', 1) + self.checkTemplateError('x', r'\99', 'x', 'invalid group reference 99', 1) + self.checkTemplateError('x', r'\118', 'x', 'invalid group reference 11', 1) + self.checkTemplateError('x', r'\11a', 'x', 'invalid group reference 11', 1) + self.checkTemplateError('x', r'\181', 'x', 'invalid group reference 18', 1) + self.checkTemplateError('x', r'\800', 'x', 'invalid group reference 80', 1) + self.checkTemplateError('x', r'\8', '', 'invalid group reference 8', 1) # in python2.3 (etc), these loop endlessly in sre_parser.py self.assertEqual(re.sub('(((((((((((x)))))))))))', r'\11', 'x'), 'x') @@ -271,9 +271,9 @@ class ReTests(unittest.TestCase): self.checkTemplateError('(?Px)', r'\g<1a1>', 'xx', "bad character in group name '1a1'", 3) self.checkTemplateError('(?Px)', r'\g<2>', 'xx', - 'invalid group reference') + 'invalid group reference 2', 3) self.checkTemplateError('(?Px)', r'\2', 'xx', - 'invalid group reference') + 'invalid group reference 2', 1) with self.assertRaisesRegex(IndexError, "unknown group name 'ab'"): re.sub('(?Px)', r'\g', 'xx') self.assertEqual(re.sub('(?Px)|(?Py)', r'\g', 'xx'), '') @@ -558,10 +558,11 @@ class ReTests(unittest.TestCase): 'two branches', 10) def test_re_groupref_overflow(self): - self.checkTemplateError('()', r'\g<%s>' % sre_constants.MAXGROUPS, 'xx', - 'invalid group reference', 3) - self.checkPatternError(r'(?P)(?(%d))' % sre_constants.MAXGROUPS, - 'invalid group reference', 10) + from sre_constants import MAXGROUPS + self.checkTemplateError('()', r'\g<%s>' % MAXGROUPS, 'xx', + 'invalid group reference %d' % MAXGROUPS, 3) + self.checkPatternError(r'(?P)(?(%d))' % MAXGROUPS, + 'invalid group reference %d' % MAXGROUPS, 10) def test_re_groupref(self): self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a|').groups(), @@ -1007,7 +1008,7 @@ class ReTests(unittest.TestCase): self.checkPatternError(r"\567", r'octal escape value \567 outside of ' r'range 0-0o377', 0) - self.checkPatternError(r"\911", 'invalid group reference', 0) + self.checkPatternError(r"\911", 'invalid group reference 91', 1) self.checkPatternError(r"\x1", r'incomplete escape \x1', 0) self.checkPatternError(r"\x1z", r'incomplete escape \x1', 0) self.checkPatternError(r"\u123", r'incomplete escape \u123', 0) @@ -1061,7 +1062,7 @@ class ReTests(unittest.TestCase): self.checkPatternError(br"\567", r'octal escape value \567 outside of ' r'range 0-0o377', 0) - self.checkPatternError(br"\911", 'invalid group reference', 0) + self.checkPatternError(br"\911", 'invalid group reference 91', 1) self.checkPatternError(br"\x1", r'incomplete escape \x1', 0) self.checkPatternError(br"\x1z", r'incomplete escape \x1', 0) diff --git a/Misc/NEWS b/Misc/NEWS index b29d0620a77..0021b19ac41 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -23,6 +23,12 @@ Core and Builtins Library ------- +- Issue #25953: re.sub() now raises an error for invalid numerical group + reference in replacement template even if the pattern is not found in + the string. Error message for invalid group reference now includes the + group index and the position of the reference. + Based on patch by SilentGhost. + - Issue #18219: Optimize csv.DictWriter for large number of columns. Patch by Mariatta Wijaya.