Closes #14462: allow any valid Python identifier in sre group names, as documented.

This commit is contained in:
Georg Brandl 2013-04-14 11:40:00 +02:00
parent 991fc5736e
commit 1d472b74cb
2 changed files with 24 additions and 4 deletions

View File

@ -225,13 +225,25 @@ class Tokenizer:
def seek(self, index): def seek(self, index):
self.index, self.next = index self.index, self.next = index
# The following three functions are not used in this module anymore, but we keep
# them here (with DeprecationWarnings) for backwards compatibility.
def isident(char): def isident(char):
import warnings
warnings.warn('sre_parse.isident() will be removed in 3.5',
DeprecationWarning, stacklevel=2)
return "a" <= char <= "z" or "A" <= char <= "Z" or char == "_" return "a" <= char <= "z" or "A" <= char <= "Z" or char == "_"
def isdigit(char): def isdigit(char):
import warnings
warnings.warn('sre_parse.isdigit() will be removed in 3.5',
DeprecationWarning, stacklevel=2)
return "0" <= char <= "9" return "0" <= char <= "9"
def isname(name): def isname(name):
import warnings
warnings.warn('sre_parse.isname() will be removed in 3.5',
DeprecationWarning, stacklevel=2)
# check that group name is a valid string # check that group name is a valid string
if not isident(name[0]): if not isident(name[0]):
return False return False
@ -587,7 +599,7 @@ def _parse(source, state):
group = 1 group = 1
if not name: if not name:
raise error("missing group name") raise error("missing group name")
if not isname(name): if not name.isidentifier():
raise error("bad character in group name") raise error("bad character in group name")
elif sourcematch("="): elif sourcematch("="):
# named backreference # named backreference
@ -601,7 +613,7 @@ def _parse(source, state):
name = name + char name = name + char
if not name: if not name:
raise error("missing group name") raise error("missing group name")
if not isname(name): if not name.isidentifier():
raise error("bad character in group name") raise error("bad character in group name")
gid = state.groupdict.get(name) gid = state.groupdict.get(name)
if gid is None: if gid is None:
@ -655,7 +667,7 @@ def _parse(source, state):
group = 2 group = 2
if not condname: if not condname:
raise error("missing group name") raise error("missing group name")
if isname(condname): if condname.isidentifier():
condgroup = state.groupdict.get(condname) condgroup = state.groupdict.get(condname)
if condgroup is None: if condgroup is None:
raise error("unknown group name") raise error("unknown group name")
@ -792,7 +804,7 @@ def parse_template(source, pattern):
if index < 0: if index < 0:
raise error("negative group number") raise error("negative group number")
except ValueError: except ValueError:
if not isname(name): if not name.isidentifier():
raise error("bad character in group name") raise error("bad character in group name")
try: try:
index = pattern.groupindex[name] index = pattern.groupindex[name]

View File

@ -180,6 +180,10 @@ class ReTests(unittest.TestCase):
self.assertRaises(re.error, re.compile, '(?(a))') self.assertRaises(re.error, re.compile, '(?(a))')
self.assertRaises(re.error, re.compile, '(?(1a))') self.assertRaises(re.error, re.compile, '(?(1a))')
self.assertRaises(re.error, re.compile, '(?(a.))') self.assertRaises(re.error, re.compile, '(?(a.))')
# New valid/invalid identifiers in Python 3
re.compile('(?P<µ>x)(?P=µ)(?(µ)y)')
re.compile('(?P<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>x)(?P=𝔘𝔫𝔦𝔠𝔬𝔡𝔢)(?(𝔘𝔫𝔦𝔠𝔬𝔡𝔢)y)')
self.assertRaises(re.error, re.compile, '(?P<©>x)')
def test_symbolic_refs(self): def test_symbolic_refs(self):
self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a', 'xx') self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a', 'xx')
@ -192,6 +196,10 @@ class ReTests(unittest.TestCase):
self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\g<b>', 'xx') self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\g<b>', 'xx')
self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\\2', 'xx') self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\\2', 'xx')
self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<-1>', 'xx') self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<-1>', 'xx')
# New valid/invalid identifiers in Python 3
self.assertEqual(re.sub('(?P<µ>x)', r'\g<µ>', 'xx'), 'xx')
self.assertEqual(re.sub('(?P<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>x)', r'\g<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>', 'xx'), 'xx')
self.assertRaises(re.error, re.sub, '(?P<a>x)', r'\g<©>', 'xx')
def test_re_subn(self): def test_re_subn(self):
self.assertEqual(re.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2)) self.assertEqual(re.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2))