Closes #14462: allow any valid Python identifier in sre group names, as documented.
This commit is contained in:
parent
991fc5736e
commit
1d472b74cb
|
@ -225,13 +225,25 @@ class Tokenizer:
|
|||
def seek(self, index):
|
||||
self.index, self.next = index
|
||||
|
||||
# The following three functions are not used in this module anymore, but we keep
|
||||
# them here (with DeprecationWarnings) for backwards compatibility.
|
||||
|
||||
def isident(char):
|
||||
import warnings
|
||||
warnings.warn('sre_parse.isident() will be removed in 3.5',
|
||||
DeprecationWarning, stacklevel=2)
|
||||
return "a" <= char <= "z" or "A" <= char <= "Z" or char == "_"
|
||||
|
||||
def isdigit(char):
|
||||
import warnings
|
||||
warnings.warn('sre_parse.isdigit() will be removed in 3.5',
|
||||
DeprecationWarning, stacklevel=2)
|
||||
return "0" <= char <= "9"
|
||||
|
||||
def isname(name):
|
||||
import warnings
|
||||
warnings.warn('sre_parse.isname() will be removed in 3.5',
|
||||
DeprecationWarning, stacklevel=2)
|
||||
# check that group name is a valid string
|
||||
if not isident(name[0]):
|
||||
return False
|
||||
|
@ -587,7 +599,7 @@ def _parse(source, state):
|
|||
group = 1
|
||||
if not name:
|
||||
raise error("missing group name")
|
||||
if not isname(name):
|
||||
if not name.isidentifier():
|
||||
raise error("bad character in group name")
|
||||
elif sourcematch("="):
|
||||
# named backreference
|
||||
|
@ -601,7 +613,7 @@ def _parse(source, state):
|
|||
name = name + char
|
||||
if not name:
|
||||
raise error("missing group name")
|
||||
if not isname(name):
|
||||
if not name.isidentifier():
|
||||
raise error("bad character in group name")
|
||||
gid = state.groupdict.get(name)
|
||||
if gid is None:
|
||||
|
@ -655,7 +667,7 @@ def _parse(source, state):
|
|||
group = 2
|
||||
if not condname:
|
||||
raise error("missing group name")
|
||||
if isname(condname):
|
||||
if condname.isidentifier():
|
||||
condgroup = state.groupdict.get(condname)
|
||||
if condgroup is None:
|
||||
raise error("unknown group name")
|
||||
|
@ -792,7 +804,7 @@ def parse_template(source, pattern):
|
|||
if index < 0:
|
||||
raise error("negative group number")
|
||||
except ValueError:
|
||||
if not isname(name):
|
||||
if not name.isidentifier():
|
||||
raise error("bad character in group name")
|
||||
try:
|
||||
index = pattern.groupindex[name]
|
||||
|
|
|
@ -180,6 +180,10 @@ class ReTests(unittest.TestCase):
|
|||
self.assertRaises(re.error, re.compile, '(?(a))')
|
||||
self.assertRaises(re.error, re.compile, '(?(1a))')
|
||||
self.assertRaises(re.error, re.compile, '(?(a.))')
|
||||
# New valid/invalid identifiers in Python 3
|
||||
re.compile('(?P<µ>x)(?P=µ)(?(µ)y)')
|
||||
re.compile('(?P<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>x)(?P=𝔘𝔫𝔦𝔠𝔬𝔡𝔢)(?(𝔘𝔫𝔦𝔠𝔬𝔡𝔢)y)')
|
||||
self.assertRaises(re.error, re.compile, '(?P<©>x)')
|
||||
|
||||
def test_symbolic_refs(self):
|
||||
self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a', 'xx')
|
||||
|
@ -192,6 +196,10 @@ class ReTests(unittest.TestCase):
|
|||
self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\g<b>', 'xx')
|
||||
self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\\2', 'xx')
|
||||
self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<-1>', 'xx')
|
||||
# New valid/invalid identifiers in Python 3
|
||||
self.assertEqual(re.sub('(?P<µ>x)', r'\g<µ>', 'xx'), 'xx')
|
||||
self.assertEqual(re.sub('(?P<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>x)', r'\g<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>', 'xx'), 'xx')
|
||||
self.assertRaises(re.error, re.sub, '(?P<a>x)', r'\g<©>', 'xx')
|
||||
|
||||
def test_re_subn(self):
|
||||
self.assertEqual(re.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2))
|
||||
|
|
Loading…
Reference in New Issue