gh-90568: Fix exception type for \N with a named sequence in RE (GH-91665)

re.error is now raised instead of TypeError.
This commit is contained in:
Serhiy Storchaka 2022-04-22 18:35:28 +03:00 committed by GitHub
parent 2f233fceae
commit 6ccfa31421
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 9 additions and 2 deletions

View File

@ -333,7 +333,7 @@ def _class_escape(source, escape):
charname = source.getuntil('}', 'character name')
try:
c = ord(unicodedata.lookup(charname))
except KeyError:
except (KeyError, TypeError):
raise source.error("undefined character name %r" % charname,
len(charname) + len(r'\N{}')) from None
return LITERAL, c
@ -393,7 +393,7 @@ def _escape(source, escape, state):
charname = source.getuntil('}', 'character name')
try:
c = ord(unicodedata.lookup(charname))
except KeyError:
except (KeyError, TypeError):
raise source.error("undefined character name %r" % charname,
len(charname) + len(r'\N{}')) from None
return LITERAL, c

View File

@ -772,6 +772,10 @@ class ReTests(unittest.TestCase):
"undefined character name 'SPAM'", 0)
self.checkPatternError(r'[\N{SPAM}]',
"undefined character name 'SPAM'", 1)
self.checkPatternError(r'\N{KEYCAP NUMBER SIGN}',
"undefined character name 'KEYCAP NUMBER SIGN'", 0)
self.checkPatternError(r'[\N{KEYCAP NUMBER SIGN}]',
"undefined character name 'KEYCAP NUMBER SIGN'", 1)
self.checkPatternError(br'\N{LESS-THAN SIGN}', r'bad escape \N', 0)
self.checkPatternError(br'[\N{LESS-THAN SIGN}]', r'bad escape \N', 1)

View File

@ -0,0 +1,3 @@
Parsing ``\N`` escapes of Unicode Named Character Sequences in a
:mod:`regular expression <re>` raises now :exc:`re.error` instead of
``TypeError``.