diff --git a/Lib/sre_parse.py b/Lib/sre_parse.py
index 3d38673c322..ab37fd3fe2f 100644
--- a/Lib/sre_parse.py
+++ b/Lib/sre_parse.py
@@ -395,7 +395,7 @@ def _escape(source, escape, state):
len(escape))
state.checklookbehindgroup(group, source)
return GROUPREF, group
- raise source.error("invalid group reference", len(escape))
+ raise source.error("invalid group reference %d" % group, len(escape) - 1)
if len(escape) == 2:
if c in ASCIILETTERS:
raise source.error("bad escape %s" % escape, len(escape))
@@ -725,8 +725,8 @@ def _parse(source, state, verbose):
raise source.error("bad group number",
len(condname) + 1)
if condgroup >= MAXGROUPS:
- raise source.error("invalid group reference",
- len(condname) + 1)
+ msg = "invalid group reference %d" % condgroup
+ raise source.error(msg, len(condname) + 1)
state.checklookbehindgroup(condgroup, source)
elif char in FLAGS or char == "-":
# flags
@@ -883,7 +883,9 @@ def parse_template(source, pattern):
literals = []
literal = []
lappend = literal.append
- def addgroup(index):
+ def addgroup(index, pos):
+ if index > pattern.groups:
+ raise s.error("invalid group reference %d" % index, pos)
if literal:
literals.append(''.join(literal))
del literal[:]
@@ -916,9 +918,9 @@ def parse_template(source, pattern):
raise s.error("bad character in group name %r" % name,
len(name) + 1) from None
if index >= MAXGROUPS:
- raise s.error("invalid group reference",
+ raise s.error("invalid group reference %d" % index,
len(name) + 1)
- addgroup(index)
+ addgroup(index, len(name) + 1)
elif c == "0":
if s.next in OCTDIGITS:
this += sget()
@@ -939,7 +941,7 @@ def parse_template(source, pattern):
'range 0-0o377' % this, len(this))
lappend(chr(c))
if not isoctal:
- addgroup(int(this[1:]))
+ addgroup(int(this[1:]), len(this) - 1)
else:
try:
this = chr(ESCAPES[this][1])
@@ -966,5 +968,5 @@ def expand_template(template, match):
for index, group in groups:
literals[index] = g(group) or empty
except IndexError:
- raise error("invalid group reference")
+ raise error("invalid group reference %d" % index)
return empty.join(literals)
diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py
index 4cdc59127a1..3bd6d7b461c 100644
--- a/Lib/test/test_re.py
+++ b/Lib/test/test_re.py
@@ -5,7 +5,6 @@ import locale
import re
from re import Scanner
import sre_compile
-import sre_constants
import sys
import string
import traceback
@@ -186,18 +185,19 @@ class ReTests(unittest.TestCase):
r'octal escape value \777 outside of '
r'range 0-0o377', 0)
- self.checkTemplateError('x', r'\1', 'x', 'invalid group reference')
- self.checkTemplateError('x', r'\8', 'x', 'invalid group reference')
- self.checkTemplateError('x', r'\9', 'x', 'invalid group reference')
- self.checkTemplateError('x', r'\11', 'x', 'invalid group reference')
- self.checkTemplateError('x', r'\18', 'x', 'invalid group reference')
- self.checkTemplateError('x', r'\1a', 'x', 'invalid group reference')
- self.checkTemplateError('x', r'\90', 'x', 'invalid group reference')
- self.checkTemplateError('x', r'\99', 'x', 'invalid group reference')
- self.checkTemplateError('x', r'\118', 'x', 'invalid group reference') # r'\11' + '8'
- self.checkTemplateError('x', r'\11a', 'x', 'invalid group reference')
- self.checkTemplateError('x', r'\181', 'x', 'invalid group reference') # r'\18' + '1'
- self.checkTemplateError('x', r'\800', 'x', 'invalid group reference') # r'\80' + '0'
+ self.checkTemplateError('x', r'\1', 'x', 'invalid group reference 1', 1)
+ self.checkTemplateError('x', r'\8', 'x', 'invalid group reference 8', 1)
+ self.checkTemplateError('x', r'\9', 'x', 'invalid group reference 9', 1)
+ self.checkTemplateError('x', r'\11', 'x', 'invalid group reference 11', 1)
+ self.checkTemplateError('x', r'\18', 'x', 'invalid group reference 18', 1)
+ self.checkTemplateError('x', r'\1a', 'x', 'invalid group reference 1', 1)
+ self.checkTemplateError('x', r'\90', 'x', 'invalid group reference 90', 1)
+ self.checkTemplateError('x', r'\99', 'x', 'invalid group reference 99', 1)
+ self.checkTemplateError('x', r'\118', 'x', 'invalid group reference 11', 1)
+ self.checkTemplateError('x', r'\11a', 'x', 'invalid group reference 11', 1)
+ self.checkTemplateError('x', r'\181', 'x', 'invalid group reference 18', 1)
+ self.checkTemplateError('x', r'\800', 'x', 'invalid group reference 80', 1)
+ self.checkTemplateError('x', r'\8', '', 'invalid group reference 8', 1)
# in python2.3 (etc), these loop endlessly in sre_parser.py
self.assertEqual(re.sub('(((((((((((x)))))))))))', r'\11', 'x'), 'x')
@@ -271,9 +271,9 @@ class ReTests(unittest.TestCase):
self.checkTemplateError('(?Px)', r'\g<1a1>', 'xx',
"bad character in group name '1a1'", 3)
self.checkTemplateError('(?Px)', r'\g<2>', 'xx',
- 'invalid group reference')
+ 'invalid group reference 2', 3)
self.checkTemplateError('(?Px)', r'\2', 'xx',
- 'invalid group reference')
+ 'invalid group reference 2', 1)
with self.assertRaisesRegex(IndexError, "unknown group name 'ab'"):
re.sub('(?Px)', r'\g', 'xx')
self.assertEqual(re.sub('(?Px)|(?Py)', r'\g', 'xx'), '')
@@ -558,10 +558,11 @@ class ReTests(unittest.TestCase):
'two branches', 10)
def test_re_groupref_overflow(self):
- self.checkTemplateError('()', r'\g<%s>' % sre_constants.MAXGROUPS, 'xx',
- 'invalid group reference', 3)
- self.checkPatternError(r'(?P)(?(%d))' % sre_constants.MAXGROUPS,
- 'invalid group reference', 10)
+ from sre_constants import MAXGROUPS
+ self.checkTemplateError('()', r'\g<%s>' % MAXGROUPS, 'xx',
+ 'invalid group reference %d' % MAXGROUPS, 3)
+ self.checkPatternError(r'(?P)(?(%d))' % MAXGROUPS,
+ 'invalid group reference %d' % MAXGROUPS, 10)
def test_re_groupref(self):
self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a|').groups(),
@@ -1007,7 +1008,7 @@ class ReTests(unittest.TestCase):
self.checkPatternError(r"\567",
r'octal escape value \567 outside of '
r'range 0-0o377', 0)
- self.checkPatternError(r"\911", 'invalid group reference', 0)
+ self.checkPatternError(r"\911", 'invalid group reference 91', 1)
self.checkPatternError(r"\x1", r'incomplete escape \x1', 0)
self.checkPatternError(r"\x1z", r'incomplete escape \x1', 0)
self.checkPatternError(r"\u123", r'incomplete escape \u123', 0)
@@ -1061,7 +1062,7 @@ class ReTests(unittest.TestCase):
self.checkPatternError(br"\567",
r'octal escape value \567 outside of '
r'range 0-0o377', 0)
- self.checkPatternError(br"\911", 'invalid group reference', 0)
+ self.checkPatternError(br"\911", 'invalid group reference 91', 1)
self.checkPatternError(br"\x1", r'incomplete escape \x1', 0)
self.checkPatternError(br"\x1z", r'incomplete escape \x1', 0)
diff --git a/Misc/NEWS b/Misc/NEWS
index b29d0620a77..0021b19ac41 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -23,6 +23,12 @@ Core and Builtins
Library
-------
+- Issue #25953: re.sub() now raises an error for invalid numerical group
+ reference in replacement template even if the pattern is not found in
+ the string. Error message for invalid group reference now includes the
+ group index and the position of the reference.
+ Based on patch by SilentGhost.
+
- Issue #18219: Optimize csv.DictWriter for large number of columns.
Patch by Mariatta Wijaya.