Issue #22364: Improved some re error messages using regex for hints.
This commit is contained in:
parent
7c316a181a
commit
632a77e6a3
|
@ -286,7 +286,7 @@ def _compile(pattern, flags):
|
||||||
if isinstance(pattern, _pattern_type):
|
if isinstance(pattern, _pattern_type):
|
||||||
if flags:
|
if flags:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
"Cannot process flags argument with a compiled pattern")
|
"cannot process flags argument with a compiled pattern")
|
||||||
return pattern
|
return pattern
|
||||||
if not sre_compile.isstring(pattern):
|
if not sre_compile.isstring(pattern):
|
||||||
raise TypeError("first argument must be string or compiled pattern")
|
raise TypeError("first argument must be string or compiled pattern")
|
||||||
|
|
|
@ -113,7 +113,7 @@ def _compile(code, pattern, flags):
|
||||||
emit(ANY)
|
emit(ANY)
|
||||||
elif op in REPEATING_CODES:
|
elif op in REPEATING_CODES:
|
||||||
if flags & SRE_FLAG_TEMPLATE:
|
if flags & SRE_FLAG_TEMPLATE:
|
||||||
raise error("internal: unsupported template operator")
|
raise error("internal: unsupported template operator %r" % (op,))
|
||||||
elif _simple(av) and op is not REPEAT:
|
elif _simple(av) and op is not REPEAT:
|
||||||
if op is MAX_REPEAT:
|
if op is MAX_REPEAT:
|
||||||
emit(REPEAT_ONE)
|
emit(REPEAT_ONE)
|
||||||
|
@ -216,7 +216,7 @@ def _compile(code, pattern, flags):
|
||||||
else:
|
else:
|
||||||
code[skipyes] = _len(code) - skipyes + 1
|
code[skipyes] = _len(code) - skipyes + 1
|
||||||
else:
|
else:
|
||||||
raise ValueError("unsupported operand type", op)
|
raise error("internal: unsupported operand type %r" % (op,))
|
||||||
|
|
||||||
def _compile_charset(charset, flags, code, fixup=None, fixes=None):
|
def _compile_charset(charset, flags, code, fixup=None, fixes=None):
|
||||||
# compile charset subprogram
|
# compile charset subprogram
|
||||||
|
@ -242,7 +242,7 @@ def _compile_charset(charset, flags, code, fixup=None, fixes=None):
|
||||||
else:
|
else:
|
||||||
emit(av)
|
emit(av)
|
||||||
else:
|
else:
|
||||||
raise error("internal: unsupported set operator")
|
raise error("internal: unsupported set operator %r" % (op,))
|
||||||
emit(FAILURE)
|
emit(FAILURE)
|
||||||
|
|
||||||
def _optimize_charset(charset, fixup, fixes):
|
def _optimize_charset(charset, fixup, fixes):
|
||||||
|
|
227
Lib/sre_parse.py
227
Lib/sre_parse.py
|
@ -79,7 +79,7 @@ class Pattern:
|
||||||
gid = self.groups
|
gid = self.groups
|
||||||
self.subpatterns.append(None)
|
self.subpatterns.append(None)
|
||||||
if self.groups > MAXGROUPS:
|
if self.groups > MAXGROUPS:
|
||||||
raise error("groups number is too large")
|
raise error("too many groups")
|
||||||
if name is not None:
|
if name is not None:
|
||||||
ogid = self.groupdict.get(name, None)
|
ogid = self.groupdict.get(name, None)
|
||||||
if ogid is not None:
|
if ogid is not None:
|
||||||
|
@ -235,7 +235,7 @@ class Tokenizer:
|
||||||
try:
|
try:
|
||||||
char += self.decoded_string[index]
|
char += self.decoded_string[index]
|
||||||
except IndexError:
|
except IndexError:
|
||||||
raise error("bogus escape (end of line)",
|
raise error("bad escape (end of pattern)",
|
||||||
self.string, len(self.string) - 1) from None
|
self.string, len(self.string) - 1) from None
|
||||||
self.index = index + 1
|
self.index = index + 1
|
||||||
self.next = char
|
self.next = char
|
||||||
|
@ -263,8 +263,13 @@ class Tokenizer:
|
||||||
c = self.next
|
c = self.next
|
||||||
self.__next()
|
self.__next()
|
||||||
if c is None:
|
if c is None:
|
||||||
raise self.error("unterminated name")
|
if not result:
|
||||||
|
raise self.error("missing group name")
|
||||||
|
raise self.error("missing %s, unterminated name" % terminator,
|
||||||
|
len(result))
|
||||||
if c == terminator:
|
if c == terminator:
|
||||||
|
if not result:
|
||||||
|
raise self.error("missing group name", 1)
|
||||||
break
|
break
|
||||||
result += c
|
result += c
|
||||||
return result
|
return result
|
||||||
|
@ -318,19 +323,19 @@ def _class_escape(source, escape):
|
||||||
# hexadecimal escape (exactly two digits)
|
# hexadecimal escape (exactly two digits)
|
||||||
escape += source.getwhile(2, HEXDIGITS)
|
escape += source.getwhile(2, HEXDIGITS)
|
||||||
if len(escape) != 4:
|
if len(escape) != 4:
|
||||||
raise ValueError
|
raise source.error("incomplete escape %s" % escape, len(escape))
|
||||||
return LITERAL, int(escape[2:], 16)
|
return LITERAL, int(escape[2:], 16)
|
||||||
elif c == "u" and source.istext:
|
elif c == "u" and source.istext:
|
||||||
# unicode escape (exactly four digits)
|
# unicode escape (exactly four digits)
|
||||||
escape += source.getwhile(4, HEXDIGITS)
|
escape += source.getwhile(4, HEXDIGITS)
|
||||||
if len(escape) != 6:
|
if len(escape) != 6:
|
||||||
raise ValueError
|
raise source.error("incomplete escape %s" % escape, len(escape))
|
||||||
return LITERAL, int(escape[2:], 16)
|
return LITERAL, int(escape[2:], 16)
|
||||||
elif c == "U" and source.istext:
|
elif c == "U" and source.istext:
|
||||||
# unicode escape (exactly eight digits)
|
# unicode escape (exactly eight digits)
|
||||||
escape += source.getwhile(8, HEXDIGITS)
|
escape += source.getwhile(8, HEXDIGITS)
|
||||||
if len(escape) != 10:
|
if len(escape) != 10:
|
||||||
raise ValueError
|
raise source.error("incomplete escape %s" % escape, len(escape))
|
||||||
c = int(escape[2:], 16)
|
c = int(escape[2:], 16)
|
||||||
chr(c) # raise ValueError for invalid code
|
chr(c) # raise ValueError for invalid code
|
||||||
return LITERAL, c
|
return LITERAL, c
|
||||||
|
@ -339,7 +344,7 @@ def _class_escape(source, escape):
|
||||||
escape += source.getwhile(2, OCTDIGITS)
|
escape += source.getwhile(2, OCTDIGITS)
|
||||||
c = int(escape[1:], 8)
|
c = int(escape[1:], 8)
|
||||||
if c > 0o377:
|
if c > 0o377:
|
||||||
raise source.error('octal escape value %r outside of '
|
raise source.error('octal escape value %s outside of '
|
||||||
'range 0-0o377' % escape, len(escape))
|
'range 0-0o377' % escape, len(escape))
|
||||||
return LITERAL, c
|
return LITERAL, c
|
||||||
elif c in DIGITS:
|
elif c in DIGITS:
|
||||||
|
@ -352,7 +357,7 @@ def _class_escape(source, escape):
|
||||||
return LITERAL, ord(escape[1])
|
return LITERAL, ord(escape[1])
|
||||||
except ValueError:
|
except ValueError:
|
||||||
pass
|
pass
|
||||||
raise source.error("bogus escape: %r" % escape, len(escape))
|
raise source.error("bad escape %s" % escape, len(escape))
|
||||||
|
|
||||||
def _escape(source, escape, state):
|
def _escape(source, escape, state):
|
||||||
# handle escape code in expression
|
# handle escape code in expression
|
||||||
|
@ -368,19 +373,19 @@ def _escape(source, escape, state):
|
||||||
# hexadecimal escape
|
# hexadecimal escape
|
||||||
escape += source.getwhile(2, HEXDIGITS)
|
escape += source.getwhile(2, HEXDIGITS)
|
||||||
if len(escape) != 4:
|
if len(escape) != 4:
|
||||||
raise ValueError
|
raise source.error("incomplete escape %s" % escape, len(escape))
|
||||||
return LITERAL, int(escape[2:], 16)
|
return LITERAL, int(escape[2:], 16)
|
||||||
elif c == "u" and source.istext:
|
elif c == "u" and source.istext:
|
||||||
# unicode escape (exactly four digits)
|
# unicode escape (exactly four digits)
|
||||||
escape += source.getwhile(4, HEXDIGITS)
|
escape += source.getwhile(4, HEXDIGITS)
|
||||||
if len(escape) != 6:
|
if len(escape) != 6:
|
||||||
raise ValueError
|
raise source.error("incomplete escape %s" % escape, len(escape))
|
||||||
return LITERAL, int(escape[2:], 16)
|
return LITERAL, int(escape[2:], 16)
|
||||||
elif c == "U" and source.istext:
|
elif c == "U" and source.istext:
|
||||||
# unicode escape (exactly eight digits)
|
# unicode escape (exactly eight digits)
|
||||||
escape += source.getwhile(8, HEXDIGITS)
|
escape += source.getwhile(8, HEXDIGITS)
|
||||||
if len(escape) != 10:
|
if len(escape) != 10:
|
||||||
raise ValueError
|
raise source.error("incomplete escape %s" % escape, len(escape))
|
||||||
c = int(escape[2:], 16)
|
c = int(escape[2:], 16)
|
||||||
chr(c) # raise ValueError for invalid code
|
chr(c) # raise ValueError for invalid code
|
||||||
return LITERAL, c
|
return LITERAL, c
|
||||||
|
@ -398,7 +403,7 @@ def _escape(source, escape, state):
|
||||||
escape += source.get()
|
escape += source.get()
|
||||||
c = int(escape[1:], 8)
|
c = int(escape[1:], 8)
|
||||||
if c > 0o377:
|
if c > 0o377:
|
||||||
raise source.error('octal escape value %r outside of '
|
raise source.error('octal escape value %s outside of '
|
||||||
'range 0-0o377' % escape,
|
'range 0-0o377' % escape,
|
||||||
len(escape))
|
len(escape))
|
||||||
return LITERAL, c
|
return LITERAL, c
|
||||||
|
@ -406,11 +411,11 @@ def _escape(source, escape, state):
|
||||||
group = int(escape[1:])
|
group = int(escape[1:])
|
||||||
if group < state.groups:
|
if group < state.groups:
|
||||||
if not state.checkgroup(group):
|
if not state.checkgroup(group):
|
||||||
raise source.error("cannot refer to open group",
|
raise source.error("cannot refer to an open group",
|
||||||
len(escape))
|
len(escape))
|
||||||
state.checklookbehindgroup(group, source)
|
state.checklookbehindgroup(group, source)
|
||||||
return GROUPREF, group
|
return GROUPREF, group
|
||||||
raise ValueError
|
raise source.error("invalid group reference", len(escape))
|
||||||
if len(escape) == 2:
|
if len(escape) == 2:
|
||||||
if c in ASCIILETTERS:
|
if c in ASCIILETTERS:
|
||||||
import warnings
|
import warnings
|
||||||
|
@ -419,7 +424,7 @@ def _escape(source, escape, state):
|
||||||
return LITERAL, ord(escape[1])
|
return LITERAL, ord(escape[1])
|
||||||
except ValueError:
|
except ValueError:
|
||||||
pass
|
pass
|
||||||
raise source.error("bogus escape: %r" % escape, len(escape))
|
raise source.error("bad escape %s" % escape, len(escape))
|
||||||
|
|
||||||
def _parse_sub(source, state, nested=True):
|
def _parse_sub(source, state, nested=True):
|
||||||
# parse an alternation: a|b|c
|
# parse an alternation: a|b|c
|
||||||
|
@ -427,12 +432,11 @@ def _parse_sub(source, state, nested=True):
|
||||||
items = []
|
items = []
|
||||||
itemsappend = items.append
|
itemsappend = items.append
|
||||||
sourcematch = source.match
|
sourcematch = source.match
|
||||||
|
start = source.tell()
|
||||||
while True:
|
while True:
|
||||||
itemsappend(_parse(source, state))
|
itemsappend(_parse(source, state))
|
||||||
if not sourcematch("|"):
|
if not sourcematch("|"):
|
||||||
break
|
break
|
||||||
if nested and source.next is not None and source.next != ")":
|
|
||||||
raise source.error("pattern not properly closed")
|
|
||||||
|
|
||||||
if len(items) == 1:
|
if len(items) == 1:
|
||||||
return items[0]
|
return items[0]
|
||||||
|
@ -480,8 +484,6 @@ def _parse_sub_cond(source, state, condgroup):
|
||||||
raise source.error("conditional backref with more than two branches")
|
raise source.error("conditional backref with more than two branches")
|
||||||
else:
|
else:
|
||||||
item_no = None
|
item_no = None
|
||||||
if source.next is not None and source.next != ")":
|
|
||||||
raise source.error("pattern not properly closed")
|
|
||||||
subpattern = SubPattern(state)
|
subpattern = SubPattern(state)
|
||||||
subpattern.append((GROUPREF_EXISTS, (condgroup, item_yes, item_no)))
|
subpattern.append((GROUPREF_EXISTS, (condgroup, item_yes, item_no)))
|
||||||
return subpattern
|
return subpattern
|
||||||
|
@ -526,6 +528,7 @@ def _parse(source, state):
|
||||||
subpatternappend((LITERAL, _ord(this)))
|
subpatternappend((LITERAL, _ord(this)))
|
||||||
|
|
||||||
elif this == "[":
|
elif this == "[":
|
||||||
|
here = source.tell() - 1
|
||||||
# character set
|
# character set
|
||||||
set = []
|
set = []
|
||||||
setappend = set.append
|
setappend = set.append
|
||||||
|
@ -538,7 +541,8 @@ def _parse(source, state):
|
||||||
while True:
|
while True:
|
||||||
this = sourceget()
|
this = sourceget()
|
||||||
if this is None:
|
if this is None:
|
||||||
raise source.error("unexpected end of regular expression")
|
raise source.error("unterminated character set",
|
||||||
|
source.tell() - here)
|
||||||
if this == "]" and set != start:
|
if this == "]" and set != start:
|
||||||
break
|
break
|
||||||
elif this[0] == "\\":
|
elif this[0] == "\\":
|
||||||
|
@ -547,25 +551,28 @@ def _parse(source, state):
|
||||||
code1 = LITERAL, _ord(this)
|
code1 = LITERAL, _ord(this)
|
||||||
if sourcematch("-"):
|
if sourcematch("-"):
|
||||||
# potential range
|
# potential range
|
||||||
this = sourceget()
|
that = sourceget()
|
||||||
if this is None:
|
if that is None:
|
||||||
raise source.error("unexpected end of regular expression")
|
raise source.error("unterminated character set",
|
||||||
if this == "]":
|
source.tell() - here)
|
||||||
|
if that == "]":
|
||||||
if code1[0] is IN:
|
if code1[0] is IN:
|
||||||
code1 = code1[1][0]
|
code1 = code1[1][0]
|
||||||
setappend(code1)
|
setappend(code1)
|
||||||
setappend((LITERAL, _ord("-")))
|
setappend((LITERAL, _ord("-")))
|
||||||
break
|
break
|
||||||
if this[0] == "\\":
|
if that[0] == "\\":
|
||||||
code2 = _class_escape(source, this)
|
code2 = _class_escape(source, that)
|
||||||
else:
|
else:
|
||||||
code2 = LITERAL, _ord(this)
|
code2 = LITERAL, _ord(that)
|
||||||
if code1[0] != LITERAL or code2[0] != LITERAL:
|
if code1[0] != LITERAL or code2[0] != LITERAL:
|
||||||
raise source.error("bad character range", len(this))
|
msg = "bad character range %s-%s" % (this, that)
|
||||||
|
raise source.error(msg, len(this) + 1 + len(that))
|
||||||
lo = code1[1]
|
lo = code1[1]
|
||||||
hi = code2[1]
|
hi = code2[1]
|
||||||
if hi < lo:
|
if hi < lo:
|
||||||
raise source.error("bad character range", len(this))
|
msg = "bad character range %s-%s" % (this, that)
|
||||||
|
raise source.error(msg, len(this) + 1 + len(that))
|
||||||
setappend((RANGE, (lo, hi)))
|
setappend((RANGE, (lo, hi)))
|
||||||
else:
|
else:
|
||||||
if code1[0] is IN:
|
if code1[0] is IN:
|
||||||
|
@ -617,10 +624,10 @@ def _parse(source, state):
|
||||||
if max >= MAXREPEAT:
|
if max >= MAXREPEAT:
|
||||||
raise OverflowError("the repetition number is too large")
|
raise OverflowError("the repetition number is too large")
|
||||||
if max < min:
|
if max < min:
|
||||||
raise source.error("bad repeat interval",
|
raise source.error("min repeat greater than max repeat",
|
||||||
source.tell() - here)
|
source.tell() - here)
|
||||||
else:
|
else:
|
||||||
raise source.error("not supported", len(this))
|
raise AssertionError("unsupported quantifier %r" % (char,))
|
||||||
# figure out which item to repeat
|
# figure out which item to repeat
|
||||||
if subpattern:
|
if subpattern:
|
||||||
item = subpattern[-1:]
|
item = subpattern[-1:]
|
||||||
|
@ -641,39 +648,32 @@ def _parse(source, state):
|
||||||
subpatternappend((ANY, None))
|
subpatternappend((ANY, None))
|
||||||
|
|
||||||
elif this == "(":
|
elif this == "(":
|
||||||
group = 1
|
start = source.tell() - 1
|
||||||
|
group = True
|
||||||
name = None
|
name = None
|
||||||
condgroup = None
|
condgroup = None
|
||||||
if sourcematch("?"):
|
if sourcematch("?"):
|
||||||
group = 0
|
|
||||||
# options
|
# options
|
||||||
char = sourceget()
|
char = sourceget()
|
||||||
if char is None:
|
if char is None:
|
||||||
raise self.error("unexpected end of pattern")
|
raise source.error("unexpected end of pattern")
|
||||||
if char == "P":
|
if char == "P":
|
||||||
# python extensions
|
# python extensions
|
||||||
if sourcematch("<"):
|
if sourcematch("<"):
|
||||||
# named group: skip forward to end of name
|
# named group: skip forward to end of name
|
||||||
name = source.getuntil(">")
|
name = source.getuntil(">")
|
||||||
group = 1
|
|
||||||
if not name:
|
|
||||||
raise source.error("missing group name", 1)
|
|
||||||
if not name.isidentifier():
|
if not name.isidentifier():
|
||||||
raise source.error("bad character in group name "
|
msg = "bad character in group name %r" % name
|
||||||
"%r" % name,
|
raise source.error(msg, len(name) + 1)
|
||||||
len(name) + 1)
|
|
||||||
elif sourcematch("="):
|
elif sourcematch("="):
|
||||||
# named backreference
|
# named backreference
|
||||||
name = source.getuntil(")")
|
name = source.getuntil(")")
|
||||||
if not name:
|
|
||||||
raise source.error("missing group name", 1)
|
|
||||||
if not name.isidentifier():
|
if not name.isidentifier():
|
||||||
raise source.error("bad character in backref "
|
msg = "bad character in group name %r" % name
|
||||||
"group name %r" % name,
|
raise source.error(msg, len(name) + 1)
|
||||||
len(name) + 1)
|
|
||||||
gid = state.groupdict.get(name)
|
gid = state.groupdict.get(name)
|
||||||
if gid is None:
|
if gid is None:
|
||||||
msg = "unknown group name: {0!r}".format(name)
|
msg = "unknown group name %r" % name
|
||||||
raise source.error(msg, len(name) + 1)
|
raise source.error(msg, len(name) + 1)
|
||||||
state.checklookbehindgroup(gid, source)
|
state.checklookbehindgroup(gid, source)
|
||||||
subpatternappend((GROUPREF, gid))
|
subpatternappend((GROUPREF, gid))
|
||||||
|
@ -682,16 +682,17 @@ def _parse(source, state):
|
||||||
char = sourceget()
|
char = sourceget()
|
||||||
if char is None:
|
if char is None:
|
||||||
raise source.error("unexpected end of pattern")
|
raise source.error("unexpected end of pattern")
|
||||||
raise source.error("unknown specifier: ?P%s" % char,
|
raise source.error("unknown extension ?P" + char,
|
||||||
len(char))
|
len(char) + 2)
|
||||||
elif char == ":":
|
elif char == ":":
|
||||||
# non-capturing group
|
# non-capturing group
|
||||||
group = 2
|
group = None
|
||||||
elif char == "#":
|
elif char == "#":
|
||||||
# comment
|
# comment
|
||||||
while True:
|
while True:
|
||||||
if source.next is None:
|
if source.next is None:
|
||||||
raise source.error("unbalanced parenthesis")
|
raise source.error("missing ), unterminated comment",
|
||||||
|
source.tell() - start)
|
||||||
if sourceget() == ")":
|
if sourceget() == ")":
|
||||||
break
|
break
|
||||||
continue
|
continue
|
||||||
|
@ -700,8 +701,11 @@ def _parse(source, state):
|
||||||
dir = 1
|
dir = 1
|
||||||
if char == "<":
|
if char == "<":
|
||||||
char = sourceget()
|
char = sourceget()
|
||||||
if char is None or char not in "=!":
|
if char is None:
|
||||||
raise source.error("syntax error")
|
raise source.error("unexpected end of pattern")
|
||||||
|
if char not in "=!":
|
||||||
|
raise source.error("unknown extension ?<" + char,
|
||||||
|
len(char) + 2)
|
||||||
dir = -1 # lookbehind
|
dir = -1 # lookbehind
|
||||||
lookbehindgroups = state.lookbehindgroups
|
lookbehindgroups = state.lookbehindgroups
|
||||||
if lookbehindgroups is None:
|
if lookbehindgroups is None:
|
||||||
|
@ -711,7 +715,8 @@ def _parse(source, state):
|
||||||
if lookbehindgroups is None:
|
if lookbehindgroups is None:
|
||||||
state.lookbehindgroups = None
|
state.lookbehindgroups = None
|
||||||
if not sourcematch(")"):
|
if not sourcematch(")"):
|
||||||
raise source.error("unbalanced parenthesis")
|
raise source.error("missing ), unterminated subpattern",
|
||||||
|
source.tell() - start)
|
||||||
if char == "=":
|
if char == "=":
|
||||||
subpatternappend((ASSERT, (dir, p)))
|
subpatternappend((ASSERT, (dir, p)))
|
||||||
else:
|
else:
|
||||||
|
@ -720,13 +725,11 @@ def _parse(source, state):
|
||||||
elif char == "(":
|
elif char == "(":
|
||||||
# conditional backreference group
|
# conditional backreference group
|
||||||
condname = source.getuntil(")")
|
condname = source.getuntil(")")
|
||||||
group = 2
|
group = None
|
||||||
if not condname:
|
|
||||||
raise source.error("missing group name", 1)
|
|
||||||
if condname.isidentifier():
|
if condname.isidentifier():
|
||||||
condgroup = state.groupdict.get(condname)
|
condgroup = state.groupdict.get(condname)
|
||||||
if condgroup is None:
|
if condgroup is None:
|
||||||
msg = "unknown group name: {0!r}".format(condname)
|
msg = "unknown group name %r" % condname
|
||||||
raise source.error(msg, len(condname) + 1)
|
raise source.error(msg, len(condname) + 1)
|
||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
|
@ -734,50 +737,48 @@ def _parse(source, state):
|
||||||
if condgroup < 0:
|
if condgroup < 0:
|
||||||
raise ValueError
|
raise ValueError
|
||||||
except ValueError:
|
except ValueError:
|
||||||
raise source.error("bad character in group name",
|
msg = "bad character in group name %r" % condname
|
||||||
len(condname) + 1)
|
raise source.error(msg, len(condname) + 1) from None
|
||||||
if not condgroup:
|
if not condgroup:
|
||||||
raise source.error("bad group number",
|
raise source.error("bad group number",
|
||||||
len(condname) + 1)
|
len(condname) + 1)
|
||||||
if condgroup >= MAXGROUPS:
|
if condgroup >= MAXGROUPS:
|
||||||
raise source.error("the group number is too large",
|
raise source.error("invalid group reference",
|
||||||
len(condname) + 1)
|
len(condname) + 1)
|
||||||
state.checklookbehindgroup(condgroup, source)
|
state.checklookbehindgroup(condgroup, source)
|
||||||
elif char in FLAGS:
|
elif char in FLAGS:
|
||||||
# flags
|
# flags
|
||||||
state.flags |= FLAGS[char]
|
while True:
|
||||||
while source.next in FLAGS:
|
state.flags |= FLAGS[char]
|
||||||
state.flags |= FLAGS[sourceget()]
|
char = sourceget()
|
||||||
|
if char is None:
|
||||||
|
raise source.error("missing )")
|
||||||
|
if char == ")":
|
||||||
|
break
|
||||||
|
if char not in FLAGS:
|
||||||
|
raise source.error("unknown flag", len(char))
|
||||||
verbose = state.flags & SRE_FLAG_VERBOSE
|
verbose = state.flags & SRE_FLAG_VERBOSE
|
||||||
|
continue
|
||||||
else:
|
else:
|
||||||
raise source.error("unexpected end of pattern")
|
raise source.error("unknown extension ?" + char,
|
||||||
if group:
|
len(char) + 1)
|
||||||
# parse group contents
|
|
||||||
if group == 2:
|
# parse group contents
|
||||||
# anonymous group
|
if group is not None:
|
||||||
group = None
|
try:
|
||||||
else:
|
group = state.opengroup(name)
|
||||||
try:
|
except error as err:
|
||||||
group = state.opengroup(name)
|
raise source.error(err.msg, len(name) + 1) from None
|
||||||
except error as err:
|
if condgroup:
|
||||||
raise source.error(err.msg, len(name) + 1)
|
p = _parse_sub_cond(source, state, condgroup)
|
||||||
if condgroup:
|
|
||||||
p = _parse_sub_cond(source, state, condgroup)
|
|
||||||
else:
|
|
||||||
p = _parse_sub(source, state)
|
|
||||||
if not sourcematch(")"):
|
|
||||||
raise source.error("unbalanced parenthesis")
|
|
||||||
if group is not None:
|
|
||||||
state.closegroup(group, p)
|
|
||||||
subpatternappend((SUBPATTERN, (group, p)))
|
|
||||||
else:
|
else:
|
||||||
while True:
|
p = _parse_sub(source, state)
|
||||||
char = sourceget()
|
if not source.match(")"):
|
||||||
if char is None:
|
raise source.error("missing ), unterminated subpattern",
|
||||||
raise source.error("unexpected end of pattern")
|
source.tell() - start)
|
||||||
if char == ")":
|
if group is not None:
|
||||||
break
|
state.closegroup(group, p)
|
||||||
raise source.error("unknown extension", len(char))
|
subpatternappend((SUBPATTERN, (group, p)))
|
||||||
|
|
||||||
elif this == "^":
|
elif this == "^":
|
||||||
subpatternappend((AT, AT_BEGINNING))
|
subpatternappend((AT, AT_BEGINNING))
|
||||||
|
@ -786,7 +787,7 @@ def _parse(source, state):
|
||||||
subpattern.append((AT, AT_END))
|
subpattern.append((AT, AT_END))
|
||||||
|
|
||||||
else:
|
else:
|
||||||
raise source.error("parser error", len(this))
|
raise AssertionError("unsupported special character %r" % (char,))
|
||||||
|
|
||||||
return subpattern
|
return subpattern
|
||||||
|
|
||||||
|
@ -804,7 +805,7 @@ def fix_flags(src, flags):
|
||||||
raise ValueError("ASCII and UNICODE flags are incompatible")
|
raise ValueError("ASCII and UNICODE flags are incompatible")
|
||||||
else:
|
else:
|
||||||
if flags & SRE_FLAG_UNICODE:
|
if flags & SRE_FLAG_UNICODE:
|
||||||
raise ValueError("can't use UNICODE flag with a bytes pattern")
|
raise ValueError("cannot use UNICODE flag with a bytes pattern")
|
||||||
if flags & SRE_FLAG_LOCALE and flags & SRE_FLAG_ASCII:
|
if flags & SRE_FLAG_LOCALE and flags & SRE_FLAG_ASCII:
|
||||||
import warnings
|
import warnings
|
||||||
warnings.warn("ASCII and LOCALE flags are incompatible. "
|
warnings.warn("ASCII and LOCALE flags are incompatible. "
|
||||||
|
@ -826,11 +827,8 @@ def parse(str, flags=0, pattern=None):
|
||||||
p.pattern.flags = fix_flags(str, p.pattern.flags)
|
p.pattern.flags = fix_flags(str, p.pattern.flags)
|
||||||
|
|
||||||
if source.next is not None:
|
if source.next is not None:
|
||||||
if source.next == ")":
|
assert source.next == ")"
|
||||||
raise source.error("unbalanced parenthesis")
|
raise source.error("unbalanced parenthesis")
|
||||||
else:
|
|
||||||
raise source.error("bogus characters at end of regular expression",
|
|
||||||
len(tail))
|
|
||||||
|
|
||||||
if flags & SRE_FLAG_DEBUG:
|
if flags & SRE_FLAG_DEBUG:
|
||||||
p.dump()
|
p.dump()
|
||||||
|
@ -866,26 +864,25 @@ def parse_template(source, pattern):
|
||||||
c = this[1]
|
c = this[1]
|
||||||
if c == "g":
|
if c == "g":
|
||||||
name = ""
|
name = ""
|
||||||
if s.match("<"):
|
if not s.match("<"):
|
||||||
name = s.getuntil(">")
|
raise s.error("missing <")
|
||||||
if not name:
|
name = s.getuntil(">")
|
||||||
raise s.error("missing group name", 1)
|
if name.isidentifier():
|
||||||
try:
|
|
||||||
index = int(name)
|
|
||||||
if index < 0:
|
|
||||||
raise s.error("negative group number", len(name) + 1)
|
|
||||||
if index >= MAXGROUPS:
|
|
||||||
raise s.error("the group number is too large",
|
|
||||||
len(name) + 1)
|
|
||||||
except ValueError:
|
|
||||||
if not name.isidentifier():
|
|
||||||
raise s.error("bad character in group name",
|
|
||||||
len(name) + 1)
|
|
||||||
try:
|
try:
|
||||||
index = pattern.groupindex[name]
|
index = pattern.groupindex[name]
|
||||||
except KeyError:
|
except KeyError:
|
||||||
msg = "unknown group name: {0!r}".format(name)
|
raise IndexError("unknown group name %r" % name)
|
||||||
raise IndexError(msg)
|
else:
|
||||||
|
try:
|
||||||
|
index = int(name)
|
||||||
|
if index < 0:
|
||||||
|
raise ValueError
|
||||||
|
except ValueError:
|
||||||
|
raise s.error("bad character in group name %r" % name,
|
||||||
|
len(name) + 1) from None
|
||||||
|
if index >= MAXGROUPS:
|
||||||
|
raise s.error("invalid group reference",
|
||||||
|
len(name) + 1)
|
||||||
addgroup(index)
|
addgroup(index)
|
||||||
elif c == "0":
|
elif c == "0":
|
||||||
if s.next in OCTDIGITS:
|
if s.next in OCTDIGITS:
|
||||||
|
@ -903,7 +900,7 @@ def parse_template(source, pattern):
|
||||||
isoctal = True
|
isoctal = True
|
||||||
c = int(this[1:], 8)
|
c = int(this[1:], 8)
|
||||||
if c > 0o377:
|
if c > 0o377:
|
||||||
raise s.error('octal escape value %r outside of '
|
raise s.error('octal escape value %s outside of '
|
||||||
'range 0-0o377' % this, len(this))
|
'range 0-0o377' % this, len(this))
|
||||||
lappend(chr(c))
|
lappend(chr(c))
|
||||||
if not isoctal:
|
if not isoctal:
|
||||||
|
|
|
@ -38,6 +38,24 @@ class ReTests(unittest.TestCase):
|
||||||
self.assertIs(type(actual), type(expect), msg)
|
self.assertIs(type(actual), type(expect), msg)
|
||||||
recurse(actual, expect)
|
recurse(actual, expect)
|
||||||
|
|
||||||
|
def checkPatternError(self, pattern, errmsg, pos=None):
|
||||||
|
with self.assertRaises(re.error) as cm:
|
||||||
|
re.compile(pattern)
|
||||||
|
with self.subTest(pattern=pattern):
|
||||||
|
err = cm.exception
|
||||||
|
self.assertEqual(err.msg, errmsg)
|
||||||
|
if pos is not None:
|
||||||
|
self.assertEqual(err.pos, pos)
|
||||||
|
|
||||||
|
def checkTemplateError(self, pattern, repl, string, errmsg, pos=None):
|
||||||
|
with self.assertRaises(re.error) as cm:
|
||||||
|
re.sub(pattern, repl, string)
|
||||||
|
with self.subTest(pattern=pattern, repl=repl):
|
||||||
|
err = cm.exception
|
||||||
|
self.assertEqual(err.msg, errmsg)
|
||||||
|
if pos is not None:
|
||||||
|
self.assertEqual(err.pos, pos)
|
||||||
|
|
||||||
def test_keep_buffer(self):
|
def test_keep_buffer(self):
|
||||||
# See bug 14212
|
# See bug 14212
|
||||||
b = bytearray(b'x')
|
b = bytearray(b'x')
|
||||||
|
@ -148,6 +166,7 @@ class ReTests(unittest.TestCase):
|
||||||
self.assertEqual(re.sub('x', r'\009', 'x'), '\0' + '9')
|
self.assertEqual(re.sub('x', r'\009', 'x'), '\0' + '9')
|
||||||
self.assertEqual(re.sub('x', r'\111', 'x'), '\111')
|
self.assertEqual(re.sub('x', r'\111', 'x'), '\111')
|
||||||
self.assertEqual(re.sub('x', r'\117', 'x'), '\117')
|
self.assertEqual(re.sub('x', r'\117', 'x'), '\117')
|
||||||
|
self.assertEqual(re.sub('x', r'\377', 'x'), '\377')
|
||||||
|
|
||||||
self.assertEqual(re.sub('x', r'\1111', 'x'), '\1111')
|
self.assertEqual(re.sub('x', r'\1111', 'x'), '\1111')
|
||||||
self.assertEqual(re.sub('x', r'\1111', 'x'), '\111' + '1')
|
self.assertEqual(re.sub('x', r'\1111', 'x'), '\111' + '1')
|
||||||
|
@ -158,21 +177,25 @@ class ReTests(unittest.TestCase):
|
||||||
self.assertEqual(re.sub('x', r'\09', 'x'), '\0' + '9')
|
self.assertEqual(re.sub('x', r'\09', 'x'), '\0' + '9')
|
||||||
self.assertEqual(re.sub('x', r'\0a', 'x'), '\0' + 'a')
|
self.assertEqual(re.sub('x', r'\0a', 'x'), '\0' + 'a')
|
||||||
|
|
||||||
self.assertRaises(re.error, re.sub, 'x', r'\400', 'x')
|
self.checkTemplateError('x', r'\400', 'x',
|
||||||
self.assertRaises(re.error, re.sub, 'x', r'\777', 'x')
|
r'octal escape value \400 outside of '
|
||||||
|
r'range 0-0o377', 0)
|
||||||
|
self.checkTemplateError('x', r'\777', 'x',
|
||||||
|
r'octal escape value \777 outside of '
|
||||||
|
r'range 0-0o377', 0)
|
||||||
|
|
||||||
self.assertRaises(re.error, re.sub, 'x', r'\1', 'x')
|
self.checkTemplateError('x', r'\1', 'x', 'invalid group reference')
|
||||||
self.assertRaises(re.error, re.sub, 'x', r'\8', 'x')
|
self.checkTemplateError('x', r'\8', 'x', 'invalid group reference')
|
||||||
self.assertRaises(re.error, re.sub, 'x', r'\9', 'x')
|
self.checkTemplateError('x', r'\9', 'x', 'invalid group reference')
|
||||||
self.assertRaises(re.error, re.sub, 'x', r'\11', 'x')
|
self.checkTemplateError('x', r'\11', 'x', 'invalid group reference')
|
||||||
self.assertRaises(re.error, re.sub, 'x', r'\18', 'x')
|
self.checkTemplateError('x', r'\18', 'x', 'invalid group reference')
|
||||||
self.assertRaises(re.error, re.sub, 'x', r'\1a', 'x')
|
self.checkTemplateError('x', r'\1a', 'x', 'invalid group reference')
|
||||||
self.assertRaises(re.error, re.sub, 'x', r'\90', 'x')
|
self.checkTemplateError('x', r'\90', 'x', 'invalid group reference')
|
||||||
self.assertRaises(re.error, re.sub, 'x', r'\99', 'x')
|
self.checkTemplateError('x', r'\99', 'x', 'invalid group reference')
|
||||||
self.assertRaises(re.error, re.sub, 'x', r'\118', 'x') # r'\11' + '8'
|
self.checkTemplateError('x', r'\118', 'x', 'invalid group reference') # r'\11' + '8'
|
||||||
self.assertRaises(re.error, re.sub, 'x', r'\11a', 'x')
|
self.checkTemplateError('x', r'\11a', 'x', 'invalid group reference')
|
||||||
self.assertRaises(re.error, re.sub, 'x', r'\181', 'x') # r'\18' + '1'
|
self.checkTemplateError('x', r'\181', 'x', 'invalid group reference') # r'\18' + '1'
|
||||||
self.assertRaises(re.error, re.sub, 'x', r'\800', 'x') # r'\80' + '0'
|
self.checkTemplateError('x', r'\800', 'x', 'invalid group reference') # r'\80' + '0'
|
||||||
|
|
||||||
# in python2.3 (etc), these loop endlessly in sre_parser.py
|
# in python2.3 (etc), these loop endlessly in sre_parser.py
|
||||||
self.assertEqual(re.sub('(((((((((((x)))))))))))', r'\11', 'x'), 'x')
|
self.assertEqual(re.sub('(((((((((((x)))))))))))', r'\11', 'x'), 'x')
|
||||||
|
@ -198,47 +221,65 @@ class ReTests(unittest.TestCase):
|
||||||
re.compile('(?P<a>x)(?P=a)(?(a)y)')
|
re.compile('(?P<a>x)(?P=a)(?(a)y)')
|
||||||
re.compile('(?P<a1>x)(?P=a1)(?(a1)y)')
|
re.compile('(?P<a1>x)(?P=a1)(?(a1)y)')
|
||||||
re.compile('(?P<a1>x)\1(?(1)y)')
|
re.compile('(?P<a1>x)\1(?(1)y)')
|
||||||
self.assertRaises(re.error, re.compile, '(?P<a>)(?P<a>)')
|
self.checkPatternError('(?P<a>)(?P<a>)',
|
||||||
self.assertRaises(re.error, re.compile, '(?Px)')
|
"redefinition of group name 'a' as group 2; "
|
||||||
self.assertRaises(re.error, re.compile, '(?P=)')
|
"was group 1")
|
||||||
self.assertRaises(re.error, re.compile, '(?P=1)')
|
self.checkPatternError('(?Pxy)', 'unknown extension ?Px')
|
||||||
self.assertRaises(re.error, re.compile, '(?P=a)')
|
self.checkPatternError('(?P<a>)(?P=a', 'missing ), unterminated name', 11)
|
||||||
self.assertRaises(re.error, re.compile, '(?P=a1)')
|
self.checkPatternError('(?P=', 'missing group name', 4)
|
||||||
self.assertRaises(re.error, re.compile, '(?P=a.)')
|
self.checkPatternError('(?P=)', 'missing group name', 4)
|
||||||
self.assertRaises(re.error, re.compile, '(?P<)')
|
self.checkPatternError('(?P=1)', "bad character in group name '1'", 4)
|
||||||
self.assertRaises(re.error, re.compile, '(?P<>)')
|
self.checkPatternError('(?P=a)', "unknown group name 'a'")
|
||||||
self.assertRaises(re.error, re.compile, '(?P<1>)')
|
self.checkPatternError('(?P=a1)', "unknown group name 'a1'")
|
||||||
self.assertRaises(re.error, re.compile, '(?P<a.>)')
|
self.checkPatternError('(?P=a.)', "bad character in group name 'a.'", 4)
|
||||||
self.assertRaises(re.error, re.compile, '(?())')
|
self.checkPatternError('(?P<)', 'missing >, unterminated name', 4)
|
||||||
self.assertRaises(re.error, re.compile, '(?(a))')
|
self.checkPatternError('(?P<a', 'missing >, unterminated name', 4)
|
||||||
self.assertRaises(re.error, re.compile, '(?(1a))')
|
self.checkPatternError('(?P<', 'missing group name', 4)
|
||||||
self.assertRaises(re.error, re.compile, '(?(a.))')
|
self.checkPatternError('(?P<>)', 'missing group name', 4)
|
||||||
|
self.checkPatternError(r'(?P<1>)', "bad character in group name '1'", 4)
|
||||||
|
self.checkPatternError(r'(?P<a.>)', "bad character in group name 'a.'", 4)
|
||||||
|
self.checkPatternError(r'(?(', 'missing group name', 3)
|
||||||
|
self.checkPatternError(r'(?())', 'missing group name', 3)
|
||||||
|
self.checkPatternError(r'(?(a))', "unknown group name 'a'", 3)
|
||||||
|
self.checkPatternError(r'(?(-1))', "bad character in group name '-1'", 3)
|
||||||
|
self.checkPatternError(r'(?(1a))', "bad character in group name '1a'", 3)
|
||||||
|
self.checkPatternError(r'(?(a.))', "bad character in group name 'a.'", 3)
|
||||||
# New valid/invalid identifiers in Python 3
|
# New valid/invalid identifiers in Python 3
|
||||||
re.compile('(?P<µ>x)(?P=µ)(?(µ)y)')
|
re.compile('(?P<µ>x)(?P=µ)(?(µ)y)')
|
||||||
re.compile('(?P<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>x)(?P=𝔘𝔫𝔦𝔠𝔬𝔡𝔢)(?(𝔘𝔫𝔦𝔠𝔬𝔡𝔢)y)')
|
re.compile('(?P<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>x)(?P=𝔘𝔫𝔦𝔠𝔬𝔡𝔢)(?(𝔘𝔫𝔦𝔠𝔬𝔡𝔢)y)')
|
||||||
self.assertRaises(re.error, re.compile, '(?P<©>x)')
|
self.checkPatternError('(?P<©>x)', "bad character in group name '©'", 4)
|
||||||
# Support > 100 groups.
|
# Support > 100 groups.
|
||||||
pat = '|'.join('x(?P<a%d>%x)y' % (i, i) for i in range(1, 200 + 1))
|
pat = '|'.join('x(?P<a%d>%x)y' % (i, i) for i in range(1, 200 + 1))
|
||||||
pat = '(?:%s)(?(200)z|t)' % pat
|
pat = '(?:%s)(?(200)z|t)' % pat
|
||||||
self.assertEqual(re.match(pat, 'xc8yz').span(), (0, 5))
|
self.assertEqual(re.match(pat, 'xc8yz').span(), (0, 5))
|
||||||
|
|
||||||
def test_symbolic_refs(self):
|
def test_symbolic_refs(self):
|
||||||
self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a', 'xx')
|
self.checkTemplateError('(?P<a>x)', '\g<a', 'xx',
|
||||||
self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<', 'xx')
|
'missing >, unterminated name', 3)
|
||||||
self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g', 'xx')
|
self.checkTemplateError('(?P<a>x)', '\g<', 'xx',
|
||||||
self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a a>', 'xx')
|
'missing group name', 3)
|
||||||
self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<>', 'xx')
|
self.checkTemplateError('(?P<a>x)', '\g', 'xx', 'missing <', 2)
|
||||||
self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<1a1>', 'xx')
|
self.checkTemplateError('(?P<a>x)', '\g<a a>', 'xx',
|
||||||
self.assertRaises(re.error, re.sub, '(?P<a>x)', r'\g<2>', 'xx')
|
"bad character in group name 'a a'", 3)
|
||||||
self.assertRaises(re.error, re.sub, '(?P<a>x)', r'\2', 'xx')
|
self.checkTemplateError('(?P<a>x)', '\g<>', 'xx',
|
||||||
self.assertRaises(IndexError, re.sub, '(?P<a>x)', '\g<ab>', 'xx')
|
'missing group name', 3)
|
||||||
|
self.checkTemplateError('(?P<a>x)', '\g<1a1>', 'xx',
|
||||||
|
"bad character in group name '1a1'", 3)
|
||||||
|
self.checkTemplateError('(?P<a>x)', r'\g<2>', 'xx',
|
||||||
|
'invalid group reference')
|
||||||
|
self.checkTemplateError('(?P<a>x)', r'\2', 'xx',
|
||||||
|
'invalid group reference')
|
||||||
|
with self.assertRaisesRegex(IndexError, "unknown group name 'ab'"):
|
||||||
|
re.sub('(?P<a>x)', '\g<ab>', 'xx')
|
||||||
self.assertEqual(re.sub('(?P<a>x)|(?P<b>y)', r'\g<b>', 'xx'), '')
|
self.assertEqual(re.sub('(?P<a>x)|(?P<b>y)', r'\g<b>', 'xx'), '')
|
||||||
self.assertEqual(re.sub('(?P<a>x)|(?P<b>y)', r'\2', 'xx'), '')
|
self.assertEqual(re.sub('(?P<a>x)|(?P<b>y)', r'\2', 'xx'), '')
|
||||||
self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<-1>', 'xx')
|
self.checkTemplateError('(?P<a>x)', '\g<-1>', 'xx',
|
||||||
|
"bad character in group name '-1'", 3)
|
||||||
# New valid/invalid identifiers in Python 3
|
# New valid/invalid identifiers in Python 3
|
||||||
self.assertEqual(re.sub('(?P<µ>x)', r'\g<µ>', 'xx'), 'xx')
|
self.assertEqual(re.sub('(?P<µ>x)', r'\g<µ>', 'xx'), 'xx')
|
||||||
self.assertEqual(re.sub('(?P<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>x)', r'\g<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>', 'xx'), 'xx')
|
self.assertEqual(re.sub('(?P<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>x)', r'\g<𝔘𝔫𝔦𝔠𝔬𝔡𝔢>', 'xx'), 'xx')
|
||||||
self.assertRaises(re.error, re.sub, '(?P<a>x)', r'\g<©>', 'xx')
|
self.checkTemplateError('(?P<a>x)', '\g<©>', 'xx',
|
||||||
|
"bad character in group name '©'", 3)
|
||||||
# Support > 100 groups.
|
# Support > 100 groups.
|
||||||
pat = '|'.join('x(?P<a%d>%x)y' % (i, i) for i in range(1, 200 + 1))
|
pat = '|'.join('x(?P<a%d>%x)y' % (i, i) for i in range(1, 200 + 1))
|
||||||
self.assertEqual(re.sub(pat, '\g<200>', 'xc8yzxc8y'), 'c8zc8')
|
self.assertEqual(re.sub(pat, '\g<200>', 'xc8yzxc8y'), 'c8zc8')
|
||||||
|
@ -444,6 +485,19 @@ class ReTests(unittest.TestCase):
|
||||||
pat = '(?:%s)(?(200)z)' % pat
|
pat = '(?:%s)(?(200)z)' % pat
|
||||||
self.assertEqual(re.match(pat, 'xc8yz').span(), (0, 5))
|
self.assertEqual(re.match(pat, 'xc8yz').span(), (0, 5))
|
||||||
|
|
||||||
|
self.checkPatternError(r'(?P<a>)(?(0))', 'bad group number', 10)
|
||||||
|
self.checkPatternError(r'()(?(1)a|b',
|
||||||
|
'missing ), unterminated subpattern', 2)
|
||||||
|
self.checkPatternError(r'()(?(1)a|b|c)',
|
||||||
|
'conditional backref with more than '
|
||||||
|
'two branches', 10)
|
||||||
|
|
||||||
|
def test_re_groupref_overflow(self):
|
||||||
|
self.checkTemplateError('()', '\g<%s>' % sre_constants.MAXGROUPS, 'xx',
|
||||||
|
'invalid group reference', 3)
|
||||||
|
self.checkPatternError(r'(?P<a>)(?(%d))' % sre_constants.MAXGROUPS,
|
||||||
|
'invalid group reference', 10)
|
||||||
|
|
||||||
def test_re_groupref(self):
|
def test_re_groupref(self):
|
||||||
self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a|').groups(),
|
self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a|').groups(),
|
||||||
('|', 'a'))
|
('|', 'a'))
|
||||||
|
@ -456,6 +510,8 @@ class ReTests(unittest.TestCase):
|
||||||
self.assertEqual(re.match(r'^(?:(a)|c)(\1)?$', 'c').groups(),
|
self.assertEqual(re.match(r'^(?:(a)|c)(\1)?$', 'c').groups(),
|
||||||
(None, None))
|
(None, None))
|
||||||
|
|
||||||
|
self.checkPatternError(r'(abc\1)', 'cannot refer to an open group', 4)
|
||||||
|
|
||||||
def test_groupdict(self):
|
def test_groupdict(self):
|
||||||
self.assertEqual(re.match('(?P<first>first) (?P<second>second)',
|
self.assertEqual(re.match('(?P<first>first) (?P<second>second)',
|
||||||
'first second').groupdict(),
|
'first second').groupdict(),
|
||||||
|
@ -493,6 +549,7 @@ class ReTests(unittest.TestCase):
|
||||||
|
|
||||||
self.assertTrue(re.match("^x{3}$", "xxx"))
|
self.assertTrue(re.match("^x{3}$", "xxx"))
|
||||||
self.assertTrue(re.match("^x{1,3}$", "xxx"))
|
self.assertTrue(re.match("^x{1,3}$", "xxx"))
|
||||||
|
self.assertTrue(re.match("^x{3,3}$", "xxx"))
|
||||||
self.assertTrue(re.match("^x{1,4}$", "xxx"))
|
self.assertTrue(re.match("^x{1,4}$", "xxx"))
|
||||||
self.assertTrue(re.match("^x{3,4}?$", "xxx"))
|
self.assertTrue(re.match("^x{3,4}?$", "xxx"))
|
||||||
self.assertTrue(re.match("^x{3}?$", "xxx"))
|
self.assertTrue(re.match("^x{3}?$", "xxx"))
|
||||||
|
@ -503,6 +560,9 @@ class ReTests(unittest.TestCase):
|
||||||
self.assertIsNone(re.match("^x{}$", "xxx"))
|
self.assertIsNone(re.match("^x{}$", "xxx"))
|
||||||
self.assertTrue(re.match("^x{}$", "x{}"))
|
self.assertTrue(re.match("^x{}$", "x{}"))
|
||||||
|
|
||||||
|
self.checkPatternError(r'x{2,1}',
|
||||||
|
'min repeat greater than max repeat', 2)
|
||||||
|
|
||||||
def test_getattr(self):
|
def test_getattr(self):
|
||||||
self.assertEqual(re.compile("(?i)(a)(b)").pattern, "(?i)(a)(b)")
|
self.assertEqual(re.compile("(?i)(a)(b)").pattern, "(?i)(a)(b)")
|
||||||
self.assertEqual(re.compile("(?i)(a)(b)").flags, re.I | re.U)
|
self.assertEqual(re.compile("(?i)(a)(b)").flags, re.I | re.U)
|
||||||
|
@ -550,7 +610,7 @@ class ReTests(unittest.TestCase):
|
||||||
b"1aa! a", re.LOCALE).group(0), b"1aa! a")
|
b"1aa! a", re.LOCALE).group(0), b"1aa! a")
|
||||||
|
|
||||||
def test_other_escapes(self):
|
def test_other_escapes(self):
|
||||||
self.assertRaises(re.error, re.compile, "\\")
|
self.checkPatternError("\\", 'bad escape (end of pattern)', 0)
|
||||||
self.assertEqual(re.match(r"\(", '(').group(), '(')
|
self.assertEqual(re.match(r"\(", '(').group(), '(')
|
||||||
self.assertIsNone(re.match(r"\(", ')'))
|
self.assertIsNone(re.match(r"\(", ')'))
|
||||||
self.assertEqual(re.match(r"\\", '\\').group(), '\\')
|
self.assertEqual(re.match(r"\\", '\\').group(), '\\')
|
||||||
|
@ -875,15 +935,17 @@ class ReTests(unittest.TestCase):
|
||||||
self.assertTrue(re.match(r"\08", "\0008"))
|
self.assertTrue(re.match(r"\08", "\0008"))
|
||||||
self.assertTrue(re.match(r"\01", "\001"))
|
self.assertTrue(re.match(r"\01", "\001"))
|
||||||
self.assertTrue(re.match(r"\018", "\0018"))
|
self.assertTrue(re.match(r"\018", "\0018"))
|
||||||
self.assertRaises(re.error, re.match, r"\567", "")
|
self.checkPatternError(r"\567",
|
||||||
self.assertRaises(re.error, re.match, r"\911", "")
|
r'octal escape value \567 outside of '
|
||||||
self.assertRaises(re.error, re.match, r"\x1", "")
|
r'range 0-0o377', 0)
|
||||||
self.assertRaises(re.error, re.match, r"\x1z", "")
|
self.checkPatternError(r"\911", 'invalid group reference', 0)
|
||||||
self.assertRaises(re.error, re.match, r"\u123", "")
|
self.checkPatternError(r"\x1", r'incomplete escape \x1', 0)
|
||||||
self.assertRaises(re.error, re.match, r"\u123z", "")
|
self.checkPatternError(r"\x1z", r'incomplete escape \x1', 0)
|
||||||
self.assertRaises(re.error, re.match, r"\U0001234", "")
|
self.checkPatternError(r"\u123", r'incomplete escape \u123', 0)
|
||||||
self.assertRaises(re.error, re.match, r"\U0001234z", "")
|
self.checkPatternError(r"\u123z", r'incomplete escape \u123', 0)
|
||||||
self.assertRaises(re.error, re.match, r"\U00110000", "")
|
self.checkPatternError(r"\U0001234", r'incomplete escape \U0001234', 0)
|
||||||
|
self.checkPatternError(r"\U0001234z", r'incomplete escape \U0001234', 0)
|
||||||
|
self.checkPatternError(r"\U00110000", r'bad escape \U00110000', 0)
|
||||||
|
|
||||||
def test_sre_character_class_literals(self):
|
def test_sre_character_class_literals(self):
|
||||||
for i in [0, 8, 16, 32, 64, 127, 128, 255, 256, 0xFFFF, 0x10000, 0x10FFFF]:
|
for i in [0, 8, 16, 32, 64, 127, 128, 255, 256, 0xFFFF, 0x10000, 0x10FFFF]:
|
||||||
|
@ -903,12 +965,14 @@ class ReTests(unittest.TestCase):
|
||||||
self.assertTrue(re.match(r"[\U%08x]" % i, chr(i)))
|
self.assertTrue(re.match(r"[\U%08x]" % i, chr(i)))
|
||||||
self.assertTrue(re.match(r"[\U%08x0]" % i, chr(i)+"0"))
|
self.assertTrue(re.match(r"[\U%08x0]" % i, chr(i)+"0"))
|
||||||
self.assertTrue(re.match(r"[\U%08xz]" % i, chr(i)+"z"))
|
self.assertTrue(re.match(r"[\U%08xz]" % i, chr(i)+"z"))
|
||||||
self.assertRaises(re.error, re.match, r"[\567]", "")
|
self.checkPatternError(r"[\567]",
|
||||||
self.assertRaises(re.error, re.match, r"[\911]", "")
|
r'octal escape value \567 outside of '
|
||||||
self.assertRaises(re.error, re.match, r"[\x1z]", "")
|
r'range 0-0o377', 1)
|
||||||
self.assertRaises(re.error, re.match, r"[\u123z]", "")
|
self.checkPatternError(r"[\911]", r'bad escape \9', 1)
|
||||||
self.assertRaises(re.error, re.match, r"[\U0001234z]", "")
|
self.checkPatternError(r"[\x1z]", r'incomplete escape \x1', 1)
|
||||||
self.assertRaises(re.error, re.match, r"[\U00110000]", "")
|
self.checkPatternError(r"[\u123z]", r'incomplete escape \u123', 1)
|
||||||
|
self.checkPatternError(r"[\U0001234z]", r'incomplete escape \U0001234', 1)
|
||||||
|
self.checkPatternError(r"[\U00110000]", r'bad escape \U00110000', 1)
|
||||||
self.assertTrue(re.match(r"[\U0001d49c-\U0001d4b5]", "\U0001d49e"))
|
self.assertTrue(re.match(r"[\U0001d49c-\U0001d4b5]", "\U0001d49e"))
|
||||||
|
|
||||||
def test_sre_byte_literals(self):
|
def test_sre_byte_literals(self):
|
||||||
|
@ -927,10 +991,12 @@ class ReTests(unittest.TestCase):
|
||||||
self.assertTrue(re.match(br"\08", b"\0008"))
|
self.assertTrue(re.match(br"\08", b"\0008"))
|
||||||
self.assertTrue(re.match(br"\01", b"\001"))
|
self.assertTrue(re.match(br"\01", b"\001"))
|
||||||
self.assertTrue(re.match(br"\018", b"\0018"))
|
self.assertTrue(re.match(br"\018", b"\0018"))
|
||||||
self.assertRaises(re.error, re.match, br"\567", b"")
|
self.checkPatternError(br"\567",
|
||||||
self.assertRaises(re.error, re.match, br"\911", b"")
|
r'octal escape value \567 outside of '
|
||||||
self.assertRaises(re.error, re.match, br"\x1", b"")
|
r'range 0-0o377', 0)
|
||||||
self.assertRaises(re.error, re.match, br"\x1z", b"")
|
self.checkPatternError(br"\911", 'invalid group reference', 0)
|
||||||
|
self.checkPatternError(br"\x1", r'incomplete escape \x1', 0)
|
||||||
|
self.checkPatternError(br"\x1z", r'incomplete escape \x1', 0)
|
||||||
|
|
||||||
def test_sre_byte_class_literals(self):
|
def test_sre_byte_class_literals(self):
|
||||||
for i in [0, 8, 16, 32, 64, 127, 128, 255]:
|
for i in [0, 8, 16, 32, 64, 127, 128, 255]:
|
||||||
|
@ -946,9 +1012,22 @@ class ReTests(unittest.TestCase):
|
||||||
self.assertTrue(re.match(br"[\u1234]", b'u'))
|
self.assertTrue(re.match(br"[\u1234]", b'u'))
|
||||||
with self.assertWarns(DeprecationWarning):
|
with self.assertWarns(DeprecationWarning):
|
||||||
self.assertTrue(re.match(br"[\U00012345]", b'U'))
|
self.assertTrue(re.match(br"[\U00012345]", b'U'))
|
||||||
self.assertRaises(re.error, re.match, br"[\567]", b"")
|
self.checkPatternError(br"[\567]",
|
||||||
self.assertRaises(re.error, re.match, br"[\911]", b"")
|
r'octal escape value \567 outside of '
|
||||||
self.assertRaises(re.error, re.match, br"[\x1z]", b"")
|
r'range 0-0o377', 1)
|
||||||
|
self.checkPatternError(br"[\911]", r'bad escape \9', 1)
|
||||||
|
self.checkPatternError(br"[\x1z]", r'incomplete escape \x1', 1)
|
||||||
|
|
||||||
|
def test_character_set_errors(self):
|
||||||
|
self.checkPatternError(r'[', 'unterminated character set', 0)
|
||||||
|
self.checkPatternError(r'[^', 'unterminated character set', 0)
|
||||||
|
self.checkPatternError(r'[a', 'unterminated character set', 0)
|
||||||
|
# bug 545855 -- This pattern failed to cause a compile error as it
|
||||||
|
# should, instead provoking a TypeError.
|
||||||
|
self.checkPatternError(r"[a-", 'unterminated character set', 0)
|
||||||
|
self.checkPatternError(r"[\w-b]", r'bad character range \w-b', 1)
|
||||||
|
self.checkPatternError(r"[a-\w]", r'bad character range a-\w', 1)
|
||||||
|
self.checkPatternError(r"[b-a]", 'bad character range b-a', 1)
|
||||||
|
|
||||||
def test_bug_113254(self):
|
def test_bug_113254(self):
|
||||||
self.assertEqual(re.match(r'(a)|(b)', 'b').start(1), -1)
|
self.assertEqual(re.match(r'(a)|(b)', 'b').start(1), -1)
|
||||||
|
@ -963,11 +1042,6 @@ class ReTests(unittest.TestCase):
|
||||||
self.assertEqual(re.match("(?P<a>a(b))", "ab").lastgroup, 'a')
|
self.assertEqual(re.match("(?P<a>a(b))", "ab").lastgroup, 'a')
|
||||||
self.assertEqual(re.match("((a))", "a").lastindex, 1)
|
self.assertEqual(re.match("((a))", "a").lastindex, 1)
|
||||||
|
|
||||||
def test_bug_545855(self):
|
|
||||||
# bug 545855 -- This pattern failed to cause a compile error as it
|
|
||||||
# should, instead provoking a TypeError.
|
|
||||||
self.assertRaises(re.error, re.compile, 'foo[a-')
|
|
||||||
|
|
||||||
def test_bug_418626(self):
|
def test_bug_418626(self):
|
||||||
# bugs 418626 at al. -- Testing Greg Chapman's addition of op code
|
# bugs 418626 at al. -- Testing Greg Chapman's addition of op code
|
||||||
# SRE_OP_MIN_REPEAT_ONE for eliminating recursion on simple uses of
|
# SRE_OP_MIN_REPEAT_ONE for eliminating recursion on simple uses of
|
||||||
|
@ -991,6 +1065,24 @@ class ReTests(unittest.TestCase):
|
||||||
self.assertEqual(re.match('(x)*y', 50000*'x'+'y').group(1), 'x')
|
self.assertEqual(re.match('(x)*y', 50000*'x'+'y').group(1), 'x')
|
||||||
self.assertEqual(re.match('(x)*?y', 50000*'x'+'y').group(1), 'x')
|
self.assertEqual(re.match('(x)*?y', 50000*'x'+'y').group(1), 'x')
|
||||||
|
|
||||||
|
def test_nothing_to_repeat(self):
|
||||||
|
for reps in '*', '+', '?', '{1,2}':
|
||||||
|
for mod in '', '?':
|
||||||
|
self.checkPatternError('%s%s' % (reps, mod),
|
||||||
|
'nothing to repeat', 0)
|
||||||
|
self.checkPatternError('(?:%s%s)' % (reps, mod),
|
||||||
|
'nothing to repeat', 3)
|
||||||
|
|
||||||
|
def test_multiple_repeat(self):
|
||||||
|
for outer_reps in '*', '+', '{1,2}':
|
||||||
|
for outer_mod in '', '?':
|
||||||
|
outer_op = outer_reps + outer_mod
|
||||||
|
for inner_reps in '*', '+', '?', '{1,2}':
|
||||||
|
for inner_mod in '', '?':
|
||||||
|
inner_op = inner_reps + inner_mod
|
||||||
|
self.checkPatternError(r'x%s%s' % (inner_op, outer_op),
|
||||||
|
'multiple repeat', 1 + len(inner_op))
|
||||||
|
|
||||||
def test_unlimited_zero_width_repeat(self):
|
def test_unlimited_zero_width_repeat(self):
|
||||||
# Issue #9669
|
# Issue #9669
|
||||||
self.assertIsNone(re.match(r'(?:a?)*y', 'z'))
|
self.assertIsNone(re.match(r'(?:a?)*y', 'z'))
|
||||||
|
@ -1381,13 +1473,13 @@ class ReTests(unittest.TestCase):
|
||||||
|
|
||||||
def test_backref_group_name_in_exception(self):
|
def test_backref_group_name_in_exception(self):
|
||||||
# Issue 17341: Poor error message when compiling invalid regex
|
# Issue 17341: Poor error message when compiling invalid regex
|
||||||
with self.assertRaisesRegex(sre_constants.error, '<foo>'):
|
self.checkPatternError('(?P=<foo>)',
|
||||||
re.compile('(?P=<foo>)')
|
"bad character in group name '<foo>'", 4)
|
||||||
|
|
||||||
def test_group_name_in_exception(self):
|
def test_group_name_in_exception(self):
|
||||||
# Issue 17341: Poor error message when compiling invalid regex
|
# Issue 17341: Poor error message when compiling invalid regex
|
||||||
with self.assertRaisesRegex(sre_constants.error, '\?foo'):
|
self.checkPatternError('(?P<?foo>)',
|
||||||
re.compile('(?P<?foo>)')
|
"bad character in group name '?foo'", 4)
|
||||||
|
|
||||||
def test_issue17998(self):
|
def test_issue17998(self):
|
||||||
for reps in '*', '+', '?', '{1}':
|
for reps in '*', '+', '?', '{1}':
|
||||||
|
@ -1556,6 +1648,19 @@ SUBPATTERN None
|
||||||
self.assertIn(' at position 77', str(err))
|
self.assertIn(' at position 77', str(err))
|
||||||
self.assertIn('(line 5, column 17)', str(err))
|
self.assertIn('(line 5, column 17)', str(err))
|
||||||
|
|
||||||
|
def test_misc_errors(self):
|
||||||
|
self.checkPatternError(r'(', 'missing ), unterminated subpattern', 0)
|
||||||
|
self.checkPatternError(r'((a|b)', 'missing ), unterminated subpattern', 0)
|
||||||
|
self.checkPatternError(r'(a|b))', 'unbalanced parenthesis', 5)
|
||||||
|
self.checkPatternError(r'(?P', 'unexpected end of pattern', 3)
|
||||||
|
self.checkPatternError(r'(?z)', 'unknown extension ?z', 1)
|
||||||
|
self.checkPatternError(r'(?iz)', 'unknown flag', 3)
|
||||||
|
self.checkPatternError(r'(?i', 'missing )', 3)
|
||||||
|
self.checkPatternError(r'(?#abc', 'missing ), unterminated comment', 0)
|
||||||
|
self.checkPatternError(r'(?<', 'unexpected end of pattern', 3)
|
||||||
|
self.checkPatternError(r'(?<>)', 'unknown extension ?<>', 1)
|
||||||
|
self.checkPatternError(r'(?', 'unexpected end of pattern', 2)
|
||||||
|
|
||||||
|
|
||||||
class PatternReprTests(unittest.TestCase):
|
class PatternReprTests(unittest.TestCase):
|
||||||
def check(self, pattern, expected):
|
def check(self, pattern, expected):
|
||||||
|
|
|
@ -30,6 +30,8 @@ Core and Builtins
|
||||||
Library
|
Library
|
||||||
-------
|
-------
|
||||||
|
|
||||||
|
- Issue #22364: Improved some re error messages using regex for hints.
|
||||||
|
|
||||||
- Issue #23742: ntpath.expandvars() no longer loses unbalanced single quotes.
|
- Issue #23742: ntpath.expandvars() no longer loses unbalanced single quotes.
|
||||||
|
|
||||||
- Issue #21717: The zipfile.ZipFile.open function now supports 'x' (exclusive
|
- Issue #21717: The zipfile.ZipFile.open function now supports 'x' (exclusive
|
||||||
|
|
|
@ -315,7 +315,7 @@ getstring(PyObject* string, Py_ssize_t* p_length,
|
||||||
|
|
||||||
/* get pointer to byte string buffer */
|
/* get pointer to byte string buffer */
|
||||||
if (PyObject_GetBuffer(string, view, PyBUF_SIMPLE) != 0) {
|
if (PyObject_GetBuffer(string, view, PyBUF_SIMPLE) != 0) {
|
||||||
PyErr_SetString(PyExc_TypeError, "expected string or buffer");
|
PyErr_SetString(PyExc_TypeError, "expected string or bytes-like object");
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -359,12 +359,12 @@ state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
|
||||||
|
|
||||||
if (isbytes && pattern->isbytes == 0) {
|
if (isbytes && pattern->isbytes == 0) {
|
||||||
PyErr_SetString(PyExc_TypeError,
|
PyErr_SetString(PyExc_TypeError,
|
||||||
"can't use a string pattern on a bytes-like object");
|
"cannot use a string pattern on a bytes-like object");
|
||||||
goto err;
|
goto err;
|
||||||
}
|
}
|
||||||
if (!isbytes && pattern->isbytes > 0) {
|
if (!isbytes && pattern->isbytes > 0) {
|
||||||
PyErr_SetString(PyExc_TypeError,
|
PyErr_SetString(PyExc_TypeError,
|
||||||
"can't use a bytes pattern on a string-like object");
|
"cannot use a bytes pattern on a string-like object");
|
||||||
goto err;
|
goto err;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue