mirror of https://github.com/python/cpython
the mad patcher strikes again:
-- added pickling support (only works if sre is imported) -- fixed wordsize problems in engine (instead of casting literals down to the character size, cast characters up to the literal size (same as the code word size). this prevents false hits when you're matching a unicode pattern against an 8-bit string. (unfortunately, this broke another test, but I think the test should be changed in this case; more on that on python-dev) -- added sre.purge function (unofficial, clears the cache)
This commit is contained in:
parent
ae1b5b2e98
commit
0640e1161f
13
Lib/sre.py
13
Lib/sre.py
|
@ -89,6 +89,10 @@ def _compile(pattern, flags=0):
|
||||||
_cache[key] = p
|
_cache[key] = p
|
||||||
return p
|
return p
|
||||||
|
|
||||||
|
def purge():
|
||||||
|
# clear pattern cache
|
||||||
|
_cache.clear()
|
||||||
|
|
||||||
def _sub(pattern, template, string, count=0):
|
def _sub(pattern, template, string, count=0):
|
||||||
# internal: pattern.sub implementation hook
|
# internal: pattern.sub implementation hook
|
||||||
return _subn(pattern, template, string, count)[0]
|
return _subn(pattern, template, string, count)[0]
|
||||||
|
@ -142,3 +146,12 @@ def _split(pattern, string, maxsplit=0):
|
||||||
n = n + 1
|
n = n + 1
|
||||||
append(string[i:])
|
append(string[i:])
|
||||||
return s
|
return s
|
||||||
|
|
||||||
|
# register myself for pickling
|
||||||
|
|
||||||
|
import copy_reg
|
||||||
|
|
||||||
|
def _pickle(p):
|
||||||
|
return _compile, (p.pattern, p.flags)
|
||||||
|
|
||||||
|
copy_reg.pickle(type(_compile("")), _pickle, _compile)
|
||||||
|
|
|
@ -31,15 +31,15 @@ def _compile(code, pattern, flags):
|
||||||
emit(OPCODES[OP_IGNORE[op]])
|
emit(OPCODES[OP_IGNORE[op]])
|
||||||
else:
|
else:
|
||||||
emit(OPCODES[op])
|
emit(OPCODES[op])
|
||||||
emit(ord(av))
|
emit(av)
|
||||||
elif op is IN:
|
elif op is IN:
|
||||||
if flags & SRE_FLAG_IGNORECASE:
|
if flags & SRE_FLAG_IGNORECASE:
|
||||||
emit(OPCODES[OP_IGNORE[op]])
|
emit(OPCODES[OP_IGNORE[op]])
|
||||||
def fixup(literal, flags=flags):
|
def fixup(literal, flags=flags):
|
||||||
return _sre.getlower(ord(literal), flags)
|
return _sre.getlower(literal, flags)
|
||||||
else:
|
else:
|
||||||
emit(OPCODES[op])
|
emit(OPCODES[op])
|
||||||
fixup = ord
|
fixup = lambda x: x
|
||||||
skip = len(code); emit(0)
|
skip = len(code); emit(0)
|
||||||
for op, av in av:
|
for op, av in av:
|
||||||
emit(OPCODES[op])
|
emit(OPCODES[op])
|
||||||
|
@ -165,7 +165,7 @@ def _compile_info(code, pattern, flags):
|
||||||
if not (flags & SRE_FLAG_IGNORECASE):
|
if not (flags & SRE_FLAG_IGNORECASE):
|
||||||
for op, av in pattern.data:
|
for op, av in pattern.data:
|
||||||
if op is LITERAL:
|
if op is LITERAL:
|
||||||
prefix.append(ord(av))
|
prefix.append(av)
|
||||||
else:
|
else:
|
||||||
break
|
break
|
||||||
# add an info block
|
# add an info block
|
||||||
|
|
|
@ -19,6 +19,9 @@ from sre_constants import *
|
||||||
# FIXME: should be 65535, but the arraymodule is still broken
|
# FIXME: should be 65535, but the arraymodule is still broken
|
||||||
MAXREPEAT = 32767
|
MAXREPEAT = 32767
|
||||||
|
|
||||||
|
# FIXME: same here
|
||||||
|
CHARMASK = 0x7fff
|
||||||
|
|
||||||
SPECIAL_CHARS = ".\\[{()*+?^$|"
|
SPECIAL_CHARS = ".\\[{()*+?^$|"
|
||||||
REPEAT_CHARS = "*+?{"
|
REPEAT_CHARS = "*+?{"
|
||||||
|
|
||||||
|
@ -30,14 +33,14 @@ HEXDIGITS = tuple("0123456789abcdefABCDEF")
|
||||||
WHITESPACE = string.whitespace
|
WHITESPACE = string.whitespace
|
||||||
|
|
||||||
ESCAPES = {
|
ESCAPES = {
|
||||||
r"\a": (LITERAL, chr(7)),
|
r"\a": (LITERAL, 7),
|
||||||
r"\b": (LITERAL, chr(8)),
|
r"\b": (LITERAL, 8),
|
||||||
r"\f": (LITERAL, chr(12)),
|
r"\f": (LITERAL, 12),
|
||||||
r"\n": (LITERAL, chr(10)),
|
r"\n": (LITERAL, 10),
|
||||||
r"\r": (LITERAL, chr(13)),
|
r"\r": (LITERAL, 13),
|
||||||
r"\t": (LITERAL, chr(9)),
|
r"\t": (LITERAL, 9),
|
||||||
r"\v": (LITERAL, chr(11)),
|
r"\v": (LITERAL, 11),
|
||||||
r"\\": (LITERAL, "\\")
|
r"\\": (LITERAL, ord("\\"))
|
||||||
}
|
}
|
||||||
|
|
||||||
CATEGORIES = {
|
CATEGORIES = {
|
||||||
|
@ -176,9 +179,6 @@ def isdigit(char):
|
||||||
|
|
||||||
def isname(name):
|
def isname(name):
|
||||||
# check that group name is a valid string
|
# check that group name is a valid string
|
||||||
# FIXME: <fl> this code is really lame. should use a regular
|
|
||||||
# expression instead, but I seem to have certain bootstrapping
|
|
||||||
# problems here ;-)
|
|
||||||
if not isident(name[0]):
|
if not isident(name[0]):
|
||||||
return 0
|
return 0
|
||||||
for char in name:
|
for char in name:
|
||||||
|
@ -209,16 +209,14 @@ def _class_escape(source, escape):
|
||||||
while source.next in HEXDIGITS:
|
while source.next in HEXDIGITS:
|
||||||
escape = escape + source.get()
|
escape = escape + source.get()
|
||||||
escape = escape[2:]
|
escape = escape[2:]
|
||||||
# FIXME: support unicode characters!
|
return LITERAL, int(escape[-4:], 16) & CHARMASK
|
||||||
return LITERAL, chr(int(escape[-4:], 16) & 0xff)
|
|
||||||
elif str(escape[1:2]) in OCTDIGITS:
|
elif str(escape[1:2]) in OCTDIGITS:
|
||||||
while source.next in OCTDIGITS:
|
while source.next in OCTDIGITS:
|
||||||
escape = escape + source.get()
|
escape = escape + source.get()
|
||||||
escape = escape[1:]
|
escape = escape[1:]
|
||||||
# FIXME: support unicode characters!
|
return LITERAL, int(escape[-6:], 8) & CHARMASK
|
||||||
return LITERAL, chr(int(escape[-6:], 8) & 0xff)
|
|
||||||
if len(escape) == 2:
|
if len(escape) == 2:
|
||||||
return LITERAL, escape[1]
|
return LITERAL, ord(escape[1])
|
||||||
except ValueError:
|
except ValueError:
|
||||||
pass
|
pass
|
||||||
raise error, "bogus escape: %s" % repr(escape)
|
raise error, "bogus escape: %s" % repr(escape)
|
||||||
|
@ -236,8 +234,7 @@ def _escape(source, escape, state):
|
||||||
while source.next in HEXDIGITS:
|
while source.next in HEXDIGITS:
|
||||||
escape = escape + source.get()
|
escape = escape + source.get()
|
||||||
escape = escape[2:]
|
escape = escape[2:]
|
||||||
# FIXME: support unicode characters!
|
return LITERAL, int(escape[-4:], 16) & CHARMASK
|
||||||
return LITERAL, chr(int(escape[-4:], 16) & 0xff)
|
|
||||||
elif escape[1:2] in DIGITS:
|
elif escape[1:2] in DIGITS:
|
||||||
while 1:
|
while 1:
|
||||||
group = _group(escape, state.groups)
|
group = _group(escape, state.groups)
|
||||||
|
@ -251,17 +248,14 @@ def _escape(source, escape, state):
|
||||||
else:
|
else:
|
||||||
break
|
break
|
||||||
escape = escape[1:]
|
escape = escape[1:]
|
||||||
# FIXME: support unicode characters!
|
return LITERAL, int(escape[-6:], 8) & CHARMASK
|
||||||
return LITERAL, chr(int(escape[-6:], 8) & 0xff)
|
|
||||||
if len(escape) == 2:
|
if len(escape) == 2:
|
||||||
return LITERAL, escape[1]
|
return LITERAL, ord(escape[1])
|
||||||
except ValueError:
|
except ValueError:
|
||||||
pass
|
pass
|
||||||
raise error, "bogus escape: %s" % repr(escape)
|
raise error, "bogus escape: %s" % repr(escape)
|
||||||
|
|
||||||
|
|
||||||
def _branch(pattern, items):
|
def _branch(pattern, items):
|
||||||
|
|
||||||
# form a branch operator from a set of items
|
# form a branch operator from a set of items
|
||||||
|
|
||||||
subpattern = SubPattern(pattern)
|
subpattern = SubPattern(pattern)
|
||||||
|
@ -327,7 +321,7 @@ def _parse(source, state, flags=0):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if this and this[0] not in SPECIAL_CHARS:
|
if this and this[0] not in SPECIAL_CHARS:
|
||||||
subpattern.append((LITERAL, this))
|
subpattern.append((LITERAL, ord(this)))
|
||||||
|
|
||||||
elif this == "[":
|
elif this == "[":
|
||||||
# character set
|
# character set
|
||||||
|
@ -345,7 +339,7 @@ def _parse(source, state, flags=0):
|
||||||
elif this and this[0] == "\\":
|
elif this and this[0] == "\\":
|
||||||
code1 = _class_escape(source, this)
|
code1 = _class_escape(source, this)
|
||||||
elif this:
|
elif this:
|
||||||
code1 = LITERAL, this
|
code1 = LITERAL, ord(this)
|
||||||
else:
|
else:
|
||||||
raise error, "unexpected end of regular expression"
|
raise error, "unexpected end of regular expression"
|
||||||
if source.match("-"):
|
if source.match("-"):
|
||||||
|
@ -353,17 +347,15 @@ def _parse(source, state, flags=0):
|
||||||
this = source.get()
|
this = source.get()
|
||||||
if this == "]":
|
if this == "]":
|
||||||
set.append(code1)
|
set.append(code1)
|
||||||
set.append((LITERAL, "-"))
|
set.append((LITERAL, ord("-")))
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
if this[0] == "\\":
|
if this[0] == "\\":
|
||||||
code2 = _class_escape(source, this)
|
code2 = _class_escape(source, this)
|
||||||
else:
|
else:
|
||||||
code2 = LITERAL, this
|
code2 = LITERAL, ord(this)
|
||||||
if code1[0] != LITERAL or code2[0] != LITERAL:
|
if code1[0] != LITERAL or code2[0] != LITERAL:
|
||||||
raise error, "illegal range"
|
raise error, "illegal range"
|
||||||
if len(code1[1]) != 1 or len(code2[1]) != 1:
|
|
||||||
raise error, "illegal range"
|
|
||||||
set.append((RANGE, (code1[1], code2[1])))
|
set.append((RANGE, (code1[1], code2[1])))
|
||||||
else:
|
else:
|
||||||
if code1[0] is IN:
|
if code1[0] is IN:
|
||||||
|
@ -605,17 +597,16 @@ def parse_template(source, pattern):
|
||||||
break
|
break
|
||||||
if not code:
|
if not code:
|
||||||
this = this[1:]
|
this = this[1:]
|
||||||
# FIXME: support unicode characters!
|
code = LITERAL, int(this[-6:], 8) & CHARMASK
|
||||||
code = LITERAL, chr(int(this[-6:], 8) & 0xff)
|
|
||||||
a(code)
|
a(code)
|
||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
a(ESCAPES[this])
|
a(ESCAPES[this])
|
||||||
except KeyError:
|
except KeyError:
|
||||||
for c in this:
|
for c in this:
|
||||||
a((LITERAL, c))
|
a((LITERAL, ord(c)))
|
||||||
else:
|
else:
|
||||||
a((LITERAL, this))
|
a((LITERAL, ord(this)))
|
||||||
return p
|
return p
|
||||||
|
|
||||||
def expand_template(template, match):
|
def expand_template(template, match):
|
||||||
|
@ -623,12 +614,17 @@ def expand_template(template, match):
|
||||||
# code instead
|
# code instead
|
||||||
p = []
|
p = []
|
||||||
a = p.append
|
a = p.append
|
||||||
|
sep = match.string[:0]
|
||||||
|
if type(sep) is type(""):
|
||||||
|
char = chr
|
||||||
|
else:
|
||||||
|
char = unichr
|
||||||
for c, s in template:
|
for c, s in template:
|
||||||
if c is LITERAL:
|
if c is LITERAL:
|
||||||
a(s)
|
a(char(s))
|
||||||
elif c is MARK:
|
elif c is MARK:
|
||||||
s = match.group(s)
|
s = match.group(s)
|
||||||
if s is None:
|
if s is None:
|
||||||
raise error, "empty group"
|
raise error, "empty group"
|
||||||
a(s)
|
a(s)
|
||||||
return match.string[:0].join(p)
|
return sep.join(p)
|
||||||
|
|
|
@ -1,6 +1,5 @@
|
||||||
test_sre
|
test_sre
|
||||||
test_support -- test failed re module pickle
|
=== Failed incorrectly ('\\x00ffffffffffffff', '\377', 0, 'found', '\377')
|
||||||
test_support -- test failed re module cPickle
|
|
||||||
=== Failed incorrectly ('^(.+)?B', 'AB', 0, 'g1', 'A')
|
=== Failed incorrectly ('^(.+)?B', 'AB', 0, 'g1', 'A')
|
||||||
=== Failed incorrectly ('(a+)+\\1', 'aa', 0, 'found+"-"+g1', 'aa-a')
|
=== Failed incorrectly ('(a+)+\\1', 'aa', 0, 'found+"-"+g1', 'aa-a')
|
||||||
=== grouping error ('([^/]*/)*sub1/', 'd:msgs/tdir/sub1/trial/away.cpp', 0, 'found+"-"+g1', 'd:msgs/tdir/sub1/-tdir/') 'd:msgs/tdir/sub1/-trial/' should be 'd:msgs/tdir/sub1/-tdir/'
|
=== grouping error ('([^/]*/)*sub1/', 'd:msgs/tdir/sub1/trial/away.cpp', 0, 'found+"-"+g1', 'd:msgs/tdir/sub1/-tdir/') 'd:msgs/tdir/sub1/-trial/' should be 'd:msgs/tdir/sub1/-tdir/'
|
||||||
|
|
|
@ -20,7 +20,7 @@
|
||||||
* 00-06-28 fl fixed findall (0.9.1)
|
* 00-06-28 fl fixed findall (0.9.1)
|
||||||
* 00-06-29 fl fixed split, added more scanner features (0.9.2)
|
* 00-06-29 fl fixed split, added more scanner features (0.9.2)
|
||||||
* 00-06-30 fl tuning, fast search (0.9.3)
|
* 00-06-30 fl tuning, fast search (0.9.3)
|
||||||
* 00-06-30 fl added assert (lookahead) primitives (0.9.4)
|
* 00-06-30 fl added assert (lookahead) primitives, etc (0.9.4)
|
||||||
*
|
*
|
||||||
* Copyright (c) 1997-2000 by Secret Labs AB. All rights reserved.
|
* Copyright (c) 1997-2000 by Secret Labs AB. All rights reserved.
|
||||||
*
|
*
|
||||||
|
@ -339,7 +339,7 @@ SRE_AT(SRE_STATE* state, SRE_CHAR* ptr, SRE_CODE at)
|
||||||
}
|
}
|
||||||
|
|
||||||
LOCAL(int)
|
LOCAL(int)
|
||||||
SRE_MEMBER(SRE_CODE* set, SRE_CHAR ch)
|
SRE_MEMBER(SRE_CODE* set, SRE_CODE ch)
|
||||||
{
|
{
|
||||||
/* check if character is a member of the given set */
|
/* check if character is a member of the given set */
|
||||||
|
|
||||||
|
@ -356,13 +356,13 @@ SRE_MEMBER(SRE_CODE* set, SRE_CHAR ch)
|
||||||
return !ok;
|
return !ok;
|
||||||
|
|
||||||
case SRE_OP_LITERAL:
|
case SRE_OP_LITERAL:
|
||||||
if (ch == (SRE_CHAR) set[0])
|
if (ch == set[0])
|
||||||
return ok;
|
return ok;
|
||||||
set++;
|
set++;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case SRE_OP_RANGE:
|
case SRE_OP_RANGE:
|
||||||
if ((SRE_CHAR) set[0] <= ch && ch <= (SRE_CHAR) set[1])
|
if (set[0] <= ch && ch <= set[1])
|
||||||
return ok;
|
return ok;
|
||||||
set += 2;
|
set += 2;
|
||||||
break;
|
break;
|
||||||
|
@ -455,8 +455,8 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
|
||||||
case SRE_OP_LITERAL:
|
case SRE_OP_LITERAL:
|
||||||
/* match literal string */
|
/* match literal string */
|
||||||
/* args: <code> */
|
/* args: <code> */
|
||||||
TRACE(("%8d: literal %c\n", PTR(ptr), (SRE_CHAR) pattern[0]));
|
TRACE(("%8d: literal %c\n", PTR(ptr), pattern[0]));
|
||||||
if (ptr >= end || *ptr != (SRE_CHAR) pattern[0])
|
if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0])
|
||||||
goto failure;
|
goto failure;
|
||||||
pattern++;
|
pattern++;
|
||||||
ptr++;
|
ptr++;
|
||||||
|
@ -465,8 +465,8 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
|
||||||
case SRE_OP_NOT_LITERAL:
|
case SRE_OP_NOT_LITERAL:
|
||||||
/* match anything that is not literal character */
|
/* match anything that is not literal character */
|
||||||
/* args: <code> */
|
/* args: <code> */
|
||||||
TRACE(("%8d: literal not %c\n", PTR(ptr), (SRE_CHAR) pattern[0]));
|
TRACE(("%8d: literal not %c\n", PTR(ptr), pattern[0]));
|
||||||
if (ptr >= end || *ptr == (SRE_CHAR) pattern[0])
|
if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0])
|
||||||
goto failure;
|
goto failure;
|
||||||
pattern++;
|
pattern++;
|
||||||
ptr++;
|
ptr++;
|
||||||
|
@ -528,7 +528,7 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case SRE_OP_LITERAL_IGNORE:
|
case SRE_OP_LITERAL_IGNORE:
|
||||||
TRACE(("%8d: literal lower(%c)\n", PTR(ptr), (SRE_CHAR) *pattern));
|
TRACE(("%8d: literal lower(%c)\n", PTR(ptr), pattern[0]));
|
||||||
if (ptr >= end ||
|
if (ptr >= end ||
|
||||||
state->lower(*ptr) != state->lower(*pattern))
|
state->lower(*ptr) != state->lower(*pattern))
|
||||||
goto failure;
|
goto failure;
|
||||||
|
@ -537,8 +537,7 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case SRE_OP_NOT_LITERAL_IGNORE:
|
case SRE_OP_NOT_LITERAL_IGNORE:
|
||||||
TRACE(("%8d: literal not lower(%c)\n", PTR(ptr),
|
TRACE(("%8d: literal not lower(%c)\n", PTR(ptr), pattern[0]));
|
||||||
(SRE_CHAR) *pattern));
|
|
||||||
if (ptr >= end ||
|
if (ptr >= end ||
|
||||||
state->lower(*ptr) == state->lower(*pattern))
|
state->lower(*ptr) == state->lower(*pattern))
|
||||||
goto failure;
|
goto failure;
|
||||||
|
@ -549,7 +548,7 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
|
||||||
case SRE_OP_IN_IGNORE:
|
case SRE_OP_IN_IGNORE:
|
||||||
TRACE(("%8d: set lower(%c)\n", PTR(ptr), *ptr));
|
TRACE(("%8d: set lower(%c)\n", PTR(ptr), *ptr));
|
||||||
if (ptr >= end
|
if (ptr >= end
|
||||||
|| !SRE_MEMBER(pattern+1, (SRE_CHAR) state->lower(*ptr)))
|
|| !SRE_MEMBER(pattern+1, (SRE_CODE) state->lower(*ptr)))
|
||||||
goto failure;
|
goto failure;
|
||||||
pattern += pattern[0];
|
pattern += pattern[0];
|
||||||
ptr++;
|
ptr++;
|
||||||
|
@ -631,9 +630,9 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
|
||||||
|
|
||||||
} else if (pattern[3] == SRE_OP_LITERAL) {
|
} else if (pattern[3] == SRE_OP_LITERAL) {
|
||||||
/* repeated literal */
|
/* repeated literal */
|
||||||
SRE_CHAR chr = (SRE_CHAR) pattern[4];
|
SRE_CODE chr = pattern[4];
|
||||||
while (count < (int) pattern[2]) {
|
while (count < (int) pattern[2]) {
|
||||||
if (ptr >= end || *ptr != chr)
|
if (ptr >= end || (SRE_CODE) ptr[0] != chr)
|
||||||
break;
|
break;
|
||||||
ptr++;
|
ptr++;
|
||||||
count++;
|
count++;
|
||||||
|
@ -641,9 +640,9 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
|
||||||
|
|
||||||
} else if (pattern[3] == SRE_OP_LITERAL_IGNORE) {
|
} else if (pattern[3] == SRE_OP_LITERAL_IGNORE) {
|
||||||
/* repeated literal */
|
/* repeated literal */
|
||||||
SRE_CHAR chr = (SRE_CHAR) pattern[4];
|
SRE_CODE chr = pattern[4];
|
||||||
while (count < (int) pattern[2]) {
|
while (count < (int) pattern[2]) {
|
||||||
if (ptr >= end || (SRE_CHAR) state->lower(*ptr) != chr)
|
if (ptr >= end || (SRE_CODE) state->lower(*ptr) != chr)
|
||||||
break;
|
break;
|
||||||
ptr++;
|
ptr++;
|
||||||
count++;
|
count++;
|
||||||
|
@ -651,9 +650,9 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
|
||||||
|
|
||||||
} else if (pattern[3] == SRE_OP_NOT_LITERAL) {
|
} else if (pattern[3] == SRE_OP_NOT_LITERAL) {
|
||||||
/* repeated non-literal */
|
/* repeated non-literal */
|
||||||
SRE_CHAR chr = (SRE_CHAR) pattern[4];
|
SRE_CODE chr = pattern[4];
|
||||||
while (count < (int) pattern[2]) {
|
while (count < (int) pattern[2]) {
|
||||||
if (ptr >= end || *ptr == chr)
|
if (ptr >= end || (SRE_CODE) ptr[0] == chr)
|
||||||
break;
|
break;
|
||||||
ptr++;
|
ptr++;
|
||||||
count++;
|
count++;
|
||||||
|
@ -661,9 +660,9 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
|
||||||
|
|
||||||
} else if (pattern[3] == SRE_OP_NOT_LITERAL_IGNORE) {
|
} else if (pattern[3] == SRE_OP_NOT_LITERAL_IGNORE) {
|
||||||
/* repeated non-literal */
|
/* repeated non-literal */
|
||||||
SRE_CHAR chr = (SRE_CHAR) pattern[4];
|
SRE_CODE chr = pattern[4];
|
||||||
while (count < (int) pattern[2]) {
|
while (count < (int) pattern[2]) {
|
||||||
if (ptr >= end || (SRE_CHAR) state->lower(*ptr) == chr)
|
if (ptr >= end || (SRE_CODE) state->lower(ptr[0]) == chr)
|
||||||
break;
|
break;
|
||||||
ptr++;
|
ptr++;
|
||||||
count++;
|
count++;
|
||||||
|
@ -712,7 +711,7 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
|
||||||
} else if (pattern[pattern[0]] == SRE_OP_LITERAL) {
|
} else if (pattern[pattern[0]] == SRE_OP_LITERAL) {
|
||||||
/* tail starts with a literal. skip positions where
|
/* tail starts with a literal. skip positions where
|
||||||
the rest of the pattern cannot possibly match */
|
the rest of the pattern cannot possibly match */
|
||||||
SRE_CHAR chr = (SRE_CHAR) pattern[pattern[0]+1];
|
SRE_CODE chr = pattern[pattern[0]+1];
|
||||||
TRACE(("%8d: tail is literal %d\n", PTR(ptr), chr));
|
TRACE(("%8d: tail is literal %d\n", PTR(ptr), chr));
|
||||||
for (;;) {
|
for (;;) {
|
||||||
TRACE(("%8d: scan for tail match\n", PTR(ptr)));
|
TRACE(("%8d: scan for tail match\n", PTR(ptr)));
|
||||||
|
@ -868,7 +867,7 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
|
||||||
TRACE(("%8d: branch\n", PTR(ptr)));
|
TRACE(("%8d: branch\n", PTR(ptr)));
|
||||||
while (*pattern) {
|
while (*pattern) {
|
||||||
if (pattern[1] != SRE_OP_LITERAL ||
|
if (pattern[1] != SRE_OP_LITERAL ||
|
||||||
(ptr < end && *ptr == (SRE_CHAR) pattern[2])) {
|
(ptr < end && (SRE_CODE) ptr[0] == pattern[2])) {
|
||||||
TRACE(("%8d: branch check\n", PTR(ptr)));
|
TRACE(("%8d: branch check\n", PTR(ptr)));
|
||||||
state->ptr = ptr;
|
state->ptr = ptr;
|
||||||
i = SRE_MATCH(state, pattern + 1);
|
i = SRE_MATCH(state, pattern + 1);
|
||||||
|
@ -976,7 +975,7 @@ SRE_SEARCH(SRE_STATE* state, SRE_CODE* pattern)
|
||||||
end = state->end;
|
end = state->end;
|
||||||
while (ptr < end) {
|
while (ptr < end) {
|
||||||
for (;;) {
|
for (;;) {
|
||||||
if (*ptr != (SRE_CHAR) prefix[i]) {
|
if ((SRE_CODE) ptr[0] != prefix[i]) {
|
||||||
if (!i)
|
if (!i)
|
||||||
break;
|
break;
|
||||||
else
|
else
|
||||||
|
@ -1008,9 +1007,9 @@ SRE_SEARCH(SRE_STATE* state, SRE_CODE* pattern)
|
||||||
if (pattern[0] == SRE_OP_LITERAL) {
|
if (pattern[0] == SRE_OP_LITERAL) {
|
||||||
/* pattern starts with a literal character. this is used for
|
/* pattern starts with a literal character. this is used for
|
||||||
short prefixes, and if fast search is disabled*/
|
short prefixes, and if fast search is disabled*/
|
||||||
SRE_CHAR chr = (SRE_CHAR) pattern[1];
|
SRE_CODE chr = pattern[1];
|
||||||
for (;;) {
|
for (;;) {
|
||||||
while (ptr < end && *ptr != chr)
|
while (ptr < end && (SRE_CODE) ptr[0] != chr)
|
||||||
ptr++;
|
ptr++;
|
||||||
if (ptr == end)
|
if (ptr == end)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
Loading…
Reference in New Issue