Closes #14462: allow any valid Python identifier in sre group names, as documented.
This commit is contained in:
parent
991fc5736e
commit
1d472b74cb
|
@ -225,13 +225,25 @@ class Tokenizer:
|
||||||
def seek(self, index):
|
def seek(self, index):
|
||||||
self.index, self.next = index
|
self.index, self.next = index
|
||||||
|
|
||||||
|
# The following three functions are not used in this module anymore, but we keep
|
||||||
|
# them here (with DeprecationWarnings) for backwards compatibility.
|
||||||
|
|
||||||
def isident(char):
|
def isident(char):
|
||||||
|
import warnings
|
||||||
|
warnings.warn('sre_parse.isident() will be removed in 3.5',
|
||||||
|
DeprecationWarning, stacklevel=2)
|
||||||
return "a" <= char <= "z" or "A" <= char <= "Z" or char == "_"
|
return "a" <= char <= "z" or "A" <= char <= "Z" or char == "_"
|
||||||
|
|
||||||
def isdigit(char):
|
def isdigit(char):
|
||||||
|
import warnings
|
||||||
|
warnings.warn('sre_parse.isdigit() will be removed in 3.5',
|
||||||
|
DeprecationWarning, stacklevel=2)
|
||||||
return "0" <= char <= "9"
|
return "0" <= char <= "9"
|
||||||
|
|
||||||
def isname(name):
|
def isname(name):
|
||||||
|
import warnings
|
||||||
|
warnings.warn('sre_parse.isname() will be removed in 3.5',
|
||||||
|
DeprecationWarning, stacklevel=2)
|
||||||
# check that group name is a valid string
|
# check that group name is a valid string
|
||||||
if not isident(name[0]):
|
if not isident(name[0]):
|
||||||
return False
|
return False
|
||||||
|
@ -587,7 +599,7 @@ def _parse(source, state):
|
||||||
group = 1
|
group = 1
|
||||||
if not name:
|
if not name:
|
||||||
raise error("missing group name")
|
raise error("missing group name")
|
||||||
if not isname(name):
|
if not name.isidentifier():
|
||||||
raise error("bad character in group name")
|
raise error("bad character in group name")
|
||||||
elif sourcematch("="):
|
elif sourcematch("="):
|
||||||
# named backreference
|
# named backreference
|
||||||
|
@ -601,7 +613,7 @@ def _parse(source, state):
|
||||||
name = name + char
|
name = name + char
|
||||||
if not name:
|
if not name:
|
||||||
raise error("missing group name")
|
raise error("missing group name")
|
||||||
if not isname(name):
|
if not name.isidentifier():
|
||||||
raise error("bad character in group name")
|
raise error("bad character in group name")
|
||||||
gid = state.groupdict.get(name)
|
gid = state.groupdict.get(name)
|
||||||
if gid is None:
|
if gid is None:
|
||||||
|
@ -655,7 +667,7 @@ def _parse(source, state):
|
||||||
group = 2
|
group = 2
|
||||||
if not condname:
|
if not condname:
|
||||||
raise error("missing group name")
|
raise error("missing group name")
|
||||||
if isname(condname):
|
if condname.isidentifier():
|
||||||
condgroup = state.groupdict.get(condname)
|
condgroup = state.groupdict.get(condname)
|
||||||
if condgroup is None:
|
if condgroup is None:
|
||||||
raise error("unknown group name")
|
raise error("unknown group name")
|
||||||
|
@ -792,7 +804,7 @@ def parse_template(source, pattern):
|
||||||
if index < 0:
|
if index < 0:
|
||||||
raise error("negative group number")
|
raise error("negative group number")
|
||||||
except ValueError:
|
except ValueError:
|
||||||
if not isname(name):
|
if not name.isidentifier():
|
||||||
raise error("bad character in group name")
|
raise error("bad character in group name")
|
||||||
try:
|
try:
|
||||||
index = pattern.groupindex[name]
|
index = pattern.groupindex[name]
|
||||||
|
|
|
@ -180,6 +180,10 @@ class ReTests(unittest.TestCase):
|
||||||
self.assertRaises(re.error, re.compile, '(?(a))')
|
self.assertRaises(re.error, re.compile, '(?(a))')
|
||||||
self.assertRaises(re.error, re.compile, '(?(1a))')
|
self.assertRaises(re.error, re.compile, '(?(1a))')
|
||||||
self.assertRaises(re.error, re.compile, '(?(a.))')
|
self.assertRaises(re.error, re.compile, '(?(a.))')
|
||||||
|
# New valid/invalid identifiers in Python 3
|
||||||
|
re.compile('(?P<µ>x)(?P=µ)(?(µ)y)')
|
||||||
|
re.compile('(?P<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>x)(?P=𝔘𝔫𝔦𝔠𝔬𝔡𝔢)(?(𝔘𝔫𝔦𝔠𝔬𝔡𝔢)y)')
|
||||||
|
self.assertRaises(re.error, re.compile, '(?P<©>x)')
|
||||||
|
|
||||||
def test_symbolic_refs(self):
|
def test_symbolic_refs(self):
|
||||||
self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a', 'xx')
|
self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a', 'xx')
|
||||||
|
@ -192,6 +196,10 @@ class ReTests(unittest.TestCase):
|
||||||
self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\g<b>', 'xx')
|
self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\g<b>', 'xx')
|
||||||
self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\\2', 'xx')
|
self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\\2', 'xx')
|
||||||
self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<-1>', 'xx')
|
self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<-1>', 'xx')
|
||||||
|
# New valid/invalid identifiers in Python 3
|
||||||
|
self.assertEqual(re.sub('(?P<µ>x)', r'\g<µ>', 'xx'), 'xx')
|
||||||
|
self.assertEqual(re.sub('(?P<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>x)', r'\g<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>', 'xx'), 'xx')
|
||||||
|
self.assertRaises(re.error, re.sub, '(?P<a>x)', r'\g<©>', 'xx')
|
||||||
|
|
||||||
def test_re_subn(self):
|
def test_re_subn(self):
|
||||||
self.assertEqual(re.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2))
|
self.assertEqual(re.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2))
|
||||||
|
|
Loading…
Reference in New Issue