SRE: stricter pattern syntax checking (covers parts of bug #115900)

This commit is contained in:
Fredrik Lundh 2001-01-14 21:00:44 +00:00
parent 146965abf2
commit 470ea5ab94
1 changed files with 17 additions and 7 deletions

View File

@ -8,6 +8,8 @@
# See the sre.py file for information on usage and redistribution. # See the sre.py file for information on usage and redistribution.
# #
# XXX: show string offset and offending character for all errors
import string, sys import string, sys
from sre_constants import * from sre_constants import *
@ -410,11 +412,11 @@ def _parse(source, state):
else: else:
code2 = LITERAL, ord(this) code2 = LITERAL, ord(this)
if code1[0] != LITERAL or code2[0] != LITERAL: if code1[0] != LITERAL or code2[0] != LITERAL:
raise error, "illegal range" raise error, "bad character range"
lo = code1[1] lo = code1[1]
hi = code2[1] hi = code2[1]
if hi < lo: if hi < lo:
raise error, "illegal range" raise error, "bad character range"
set.append((RANGE, (lo, hi))) set.append((RANGE, (lo, hi)))
else: else:
if code1[0] is IN: if code1[0] is IN:
@ -457,7 +459,8 @@ def _parse(source, state):
min = int(lo) min = int(lo)
if hi: if hi:
max = int(hi) max = int(hi)
# XXX: <fl> check that hi >= lo ??? if max < min:
raise error, "bad repeat interval"
else: else:
raise error, "not supported" raise error, "not supported"
# figure out which item to repeat # figure out which item to repeat
@ -465,6 +468,8 @@ def _parse(source, state):
item = subpattern[-1:] item = subpattern[-1:]
else: else:
raise error, "nothing to repeat" raise error, "nothing to repeat"
if item[0][0] in (MIN_REPEAT, MAX_REPEAT):
raise error, "multiple repeat"
if source.match("?"): if source.match("?"):
subpattern[-1] = (MIN_REPEAT, (min, max, item)) subpattern[-1] = (MIN_REPEAT, (min, max, item))
else: else:
@ -493,7 +498,7 @@ def _parse(source, state):
name = name + char name = name + char
group = 1 group = 1
if not isname(name): if not isname(name):
raise error, "illegal character in group name" raise error, "bad character in group name"
elif source.match("="): elif source.match("="):
# named backreference # named backreference
name = "" name = ""
@ -505,7 +510,7 @@ def _parse(source, state):
break break
name = name + char name = name + char
if not isname(name): if not isname(name):
raise error, "illegal character in group name" raise error, "bad character in group name"
gid = state.groupdict.get(name) gid = state.groupdict.get(name)
if gid is None: if gid is None:
raise error, "unknown group name" raise error, "unknown group name"
@ -547,6 +552,8 @@ def _parse(source, state):
continue continue
else: else:
# flags # flags
if not FLAGS.has_key(source.next):
raise error, "unexpected end of pattern"
while FLAGS.has_key(source.next): while FLAGS.has_key(source.next):
state.flags = state.flags | FLAGS[source.get()] state.flags = state.flags | FLAGS[source.get()]
if group: if group:
@ -565,7 +572,9 @@ def _parse(source, state):
else: else:
while 1: while 1:
char = source.get() char = source.get()
if char is None or char == ")": if char is None:
raise error, "unexpected end of pattern"
if char == ")":
break break
raise error, "unknown extension" raise error, "unknown extension"
@ -592,6 +601,7 @@ def parse(str, flags=0, pattern=None):
if pattern is None: if pattern is None:
pattern = Pattern() pattern = Pattern()
pattern.flags = flags pattern.flags = flags
pattern.str = str
p = _parse_sub(source, pattern, 0) p = _parse_sub(source, pattern, 0)
@ -639,7 +649,7 @@ def parse_template(source, pattern):
index = int(name) index = int(name)
except ValueError: except ValueError:
if not isname(name): if not isname(name):
raise error, "illegal character in group name" raise error, "bad character in group name"
try: try:
index = pattern.groupindex[name] index = pattern.groupindex[name]
except KeyError: except KeyError: