bpo-25054, bpo-1647489: Added support of splitting on zerowidth patterns. (#4471)
Also fixed searching patterns that could match an empty string.
This commit is contained in:
parent
e69fbb6a56
commit
70d56fb525
|
@ -708,37 +708,19 @@ form.
|
|||
That way, separator components are always found at the same relative
|
||||
indices within the result list.
|
||||
|
||||
.. note::
|
||||
The pattern can match empty strings. ::
|
||||
|
||||
:func:`split` doesn't currently split a string on an empty pattern match.
|
||||
For example::
|
||||
|
||||
>>> re.split('x*', 'axbc')
|
||||
['a', 'bc']
|
||||
|
||||
Even though ``'x*'`` also matches 0 'x' before 'a', between 'b' and 'c',
|
||||
and after 'c', currently these matches are ignored. The correct behavior
|
||||
(i.e. splitting on empty matches too and returning ``['', 'a', 'b', 'c',
|
||||
'']``) will be implemented in future versions of Python, but since this
|
||||
is a backward incompatible change, a :exc:`FutureWarning` will be raised
|
||||
in the meanwhile.
|
||||
|
||||
Patterns that can only match empty strings currently never split the
|
||||
string. Since this doesn't match the expected behavior, a
|
||||
:exc:`ValueError` will be raised starting from Python 3.5::
|
||||
|
||||
>>> re.split("^$", "foo\n\nbar\n", flags=re.M)
|
||||
Traceback (most recent call last):
|
||||
File "<stdin>", line 1, in <module>
|
||||
...
|
||||
ValueError: split() requires a non-empty pattern match.
|
||||
>>> re.split(r'\b', 'Words, words, words.')
|
||||
['', 'Words', ', ', 'words', ', ', 'words', '.']
|
||||
>>> re.split(r'(\W*)', '...words...')
|
||||
['', '...', 'w', '', 'o', '', 'r', '', 'd', '', 's', '...', '']
|
||||
|
||||
.. versionchanged:: 3.1
|
||||
Added the optional flags argument.
|
||||
|
||||
.. versionchanged:: 3.5
|
||||
Splitting on a pattern that could match an empty string now raises
|
||||
a warning. Patterns that can only match empty strings are now rejected.
|
||||
.. versionchanged:: 3.7
|
||||
Added support of splitting on a pattern that could match an empty string.
|
||||
|
||||
|
||||
.. function:: findall(pattern, string, flags=0)
|
||||
|
||||
|
@ -746,8 +728,10 @@ form.
|
|||
strings. The *string* is scanned left-to-right, and matches are returned in
|
||||
the order found. If one or more groups are present in the pattern, return a
|
||||
list of groups; this will be a list of tuples if the pattern has more than
|
||||
one group. Empty matches are included in the result unless they touch the
|
||||
beginning of another match.
|
||||
one group. Empty matches are included in the result.
|
||||
|
||||
.. versionchanged:: 3.7
|
||||
Non-empty matches can now start just after a previous empty match.
|
||||
|
||||
|
||||
.. function:: finditer(pattern, string, flags=0)
|
||||
|
@ -755,8 +739,10 @@ form.
|
|||
Return an :term:`iterator` yielding :ref:`match objects <match-objects>` over
|
||||
all non-overlapping matches for the RE *pattern* in *string*. The *string*
|
||||
is scanned left-to-right, and matches are returned in the order found. Empty
|
||||
matches are included in the result unless they touch the beginning of another
|
||||
match.
|
||||
matches are included in the result.
|
||||
|
||||
.. versionchanged:: 3.7
|
||||
Non-empty matches can now start just after a previous empty match.
|
||||
|
||||
|
||||
.. function:: sub(pattern, repl, string, count=0, flags=0)
|
||||
|
|
|
@ -364,6 +364,10 @@ The flags :const:`re.ASCII`, :const:`re.LOCALE` and :const:`re.UNICODE`
|
|||
can be set within the scope of a group.
|
||||
(Contributed by Serhiy Storchaka in :issue:`31690`.)
|
||||
|
||||
:func:`re.split` now supports splitting on a pattern like ``r'\b'``,
|
||||
``'^$'`` or ``(?=-)`` that matches an empty string.
|
||||
(Contributed by Serhiy Storchaka in :issue:`25054`.)
|
||||
|
||||
string
|
||||
------
|
||||
|
||||
|
@ -768,6 +772,23 @@ Changes in the Python API
|
|||
avoid a warning escape them with a backslash.
|
||||
(Contributed by Serhiy Storchaka in :issue:`30349`.)
|
||||
|
||||
* The result of splitting a string on a :mod:`regular expression <re>`
|
||||
that could match an empty string has been changed. For example
|
||||
splitting on ``r'\s*'`` will now split not only on whitespaces as it
|
||||
did previously, but also between any pair of non-whitespace
|
||||
characters. The previous behavior can be restored by changing the pattern
|
||||
to ``r'\s+'``. A :exc:`FutureWarning` was emitted for such patterns since
|
||||
Python 3.5.
|
||||
|
||||
For patterns that match both empty and non-empty strings, the result of
|
||||
searching for all matches may also be changed in other cases. For example
|
||||
in the string ``'a\n\n'``, the pattern ``r'(?m)^\s*?$'`` will not only
|
||||
match empty strings at positions 2 and 3, but also the string ``'\n'`` at
|
||||
positions 2--3. To match only blank lines, the pattern should be rewritten
|
||||
as ``r'(?m)^[^\S\n]*$'``.
|
||||
|
||||
(Contributed by Serhiy Storchaka in :issue:`25054`.)
|
||||
|
||||
* :class:`tracemalloc.Traceback` frames are now sorted from oldest to most
|
||||
recent to be more consistent with :mod:`traceback`.
|
||||
(Contributed by Jesse Bakker in :issue:`32121`.)
|
||||
|
|
|
@ -1611,7 +1611,7 @@ class OutputChecker:
|
|||
'', want)
|
||||
# If a line in got contains only spaces, then remove the
|
||||
# spaces.
|
||||
got = re.sub(r'(?m)^\s*?$', '', got)
|
||||
got = re.sub(r'(?m)^[^\S\n]+$', '', got)
|
||||
if got == want:
|
||||
return True
|
||||
|
||||
|
|
|
@ -331,21 +331,21 @@ class ReTests(unittest.TestCase):
|
|||
['', 'a', '', '', 'c'])
|
||||
|
||||
for sep, expected in [
|
||||
(':*', ['', 'a', 'b', 'c']),
|
||||
('(?::*)', ['', 'a', 'b', 'c']),
|
||||
('(:*)', ['', ':', 'a', ':', 'b', '::', 'c']),
|
||||
('(:)*', ['', ':', 'a', ':', 'b', ':', 'c']),
|
||||
(':*', ['', 'a', 'b', 'c', '']),
|
||||
('(?::*)', ['', 'a', 'b', 'c', '']),
|
||||
('(:*)', ['', ':', 'a', ':', 'b', '::', 'c', '', '']),
|
||||
('(:)*', ['', ':', 'a', ':', 'b', ':', 'c', None, '']),
|
||||
]:
|
||||
with self.subTest(sep=sep), self.assertWarns(FutureWarning):
|
||||
with self.subTest(sep=sep):
|
||||
self.assertTypedEqual(re.split(sep, ':a:b::c'), expected)
|
||||
|
||||
for sep, expected in [
|
||||
('', [':a:b::c']),
|
||||
(r'\b', [':a:b::c']),
|
||||
(r'(?=:)', [':a:b::c']),
|
||||
(r'(?<=:)', [':a:b::c']),
|
||||
('', ['', ':', 'a', ':', 'b', ':', ':', 'c', '']),
|
||||
(r'\b', [':', 'a', ':', 'b', '::', 'c', '']),
|
||||
(r'(?=:)', ['', ':a', ':b', ':', ':c']),
|
||||
(r'(?<=:)', [':', 'a:', 'b:', ':', 'c']),
|
||||
]:
|
||||
with self.subTest(sep=sep), self.assertRaises(ValueError):
|
||||
with self.subTest(sep=sep):
|
||||
self.assertTypedEqual(re.split(sep, ':a:b::c'), expected)
|
||||
|
||||
def test_qualified_re_split(self):
|
||||
|
@ -356,7 +356,6 @@ class ReTests(unittest.TestCase):
|
|||
['', ':', 'a', ':', 'b::c'])
|
||||
self.assertEqual(re.split("(:+)", ":a:b::c", maxsplit=2),
|
||||
['', ':', 'a', ':', 'b::c'])
|
||||
with self.assertWarns(FutureWarning):
|
||||
self.assertEqual(re.split("(:*)", ":a:b::c", maxsplit=2),
|
||||
['', ':', 'a', ':', 'b::c'])
|
||||
|
||||
|
@ -1751,6 +1750,25 @@ class ReTests(unittest.TestCase):
|
|||
"span=(3, 5), match='bb'>" %
|
||||
(type(second).__module__, type(second).__qualname__))
|
||||
|
||||
def test_zerowidth(self):
|
||||
# Issues 852532, 1647489, 3262, 25054.
|
||||
self.assertEqual(re.split(r"\b", "a::bc"), ['', 'a', '::', 'bc', ''])
|
||||
self.assertEqual(re.split(r"\b|:+", "a::bc"), ['', 'a', '', 'bc', ''])
|
||||
self.assertEqual(re.split(r"(?<!\w)(?=\w)|:+", "a::bc"), ['', 'a', 'bc'])
|
||||
self.assertEqual(re.split(r"(?<=\w)(?!\w)|:+", "a::bc"), ['a', '', 'bc', ''])
|
||||
|
||||
self.assertEqual(re.sub(r"\b", "-", "a::bc"), '-a-::-bc-')
|
||||
self.assertEqual(re.sub(r"\b|:+", "-", "a::bc"), '-a--bc-')
|
||||
self.assertEqual(re.sub(r"(\b|:+)", r"[\1]", "a::bc"), '[]a[][::]bc[]')
|
||||
|
||||
self.assertEqual(re.findall(r"\b|:+", "a::bc"), ['', '', '::', '', ''])
|
||||
self.assertEqual(re.findall(r"\b|\w+", "a::bc"),
|
||||
['', 'a', '', '', 'bc', ''])
|
||||
|
||||
self.assertEqual([m.span() for m in re.finditer(r"\b|:+", "a::bc")],
|
||||
[(0, 0), (1, 1), (1, 3), (3, 3), (5, 5)])
|
||||
self.assertEqual([m.span() for m in re.finditer(r"\b|\w+", "a::bc")],
|
||||
[(0, 0), (0, 1), (1, 1), (3, 3), (3, 5), (5, 5)])
|
||||
|
||||
def test_bug_2537(self):
|
||||
# issue 2537: empty submatches
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
Added support of splitting on a pattern that could match an empty string.
|
|
@ -0,0 +1,3 @@
|
|||
Fixed searching regular expression patterns that could match an empty
|
||||
string. Non-empty string can now be correctly found after matching an empty
|
||||
string.
|
|
@ -446,6 +446,8 @@ state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
|
|||
|
||||
state->isbytes = isbytes;
|
||||
state->charsize = charsize;
|
||||
state->match_all = 0;
|
||||
state->must_advance = 0;
|
||||
|
||||
state->beginning = ptr;
|
||||
|
||||
|
@ -559,14 +561,14 @@ pattern_dealloc(PatternObject* self)
|
|||
}
|
||||
|
||||
LOCAL(Py_ssize_t)
|
||||
sre_match(SRE_STATE* state, SRE_CODE* pattern, int match_all)
|
||||
sre_match(SRE_STATE* state, SRE_CODE* pattern)
|
||||
{
|
||||
if (state->charsize == 1)
|
||||
return sre_ucs1_match(state, pattern, match_all);
|
||||
return sre_ucs1_match(state, pattern, 1);
|
||||
if (state->charsize == 2)
|
||||
return sre_ucs2_match(state, pattern, match_all);
|
||||
return sre_ucs2_match(state, pattern, 1);
|
||||
assert(state->charsize == 4);
|
||||
return sre_ucs4_match(state, pattern, match_all);
|
||||
return sre_ucs4_match(state, pattern, 1);
|
||||
}
|
||||
|
||||
LOCAL(Py_ssize_t)
|
||||
|
@ -606,7 +608,7 @@ _sre_SRE_Pattern_match_impl(PatternObject *self, PyObject *string,
|
|||
|
||||
TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.ptr));
|
||||
|
||||
status = sre_match(&state, PatternObject_GetCode(self), 0);
|
||||
status = sre_match(&state, PatternObject_GetCode(self));
|
||||
|
||||
TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
|
||||
if (PyErr_Occurred()) {
|
||||
|
@ -645,7 +647,8 @@ _sre_SRE_Pattern_fullmatch_impl(PatternObject *self, PyObject *string,
|
|||
|
||||
TRACE(("|%p|%p|FULLMATCH\n", PatternObject_GetCode(self), state.ptr));
|
||||
|
||||
status = sre_match(&state, PatternObject_GetCode(self), 1);
|
||||
state.match_all = 1;
|
||||
status = sre_match(&state, PatternObject_GetCode(self));
|
||||
|
||||
TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
|
||||
if (PyErr_Occurred()) {
|
||||
|
@ -808,11 +811,8 @@ _sre_SRE_Pattern_findall_impl(PatternObject *self, PyObject *string,
|
|||
if (status < 0)
|
||||
goto error;
|
||||
|
||||
if (state.ptr == state.start)
|
||||
state.start = (void*) ((char*) state.ptr + state.charsize);
|
||||
else
|
||||
state.must_advance = (state.ptr == state.start);
|
||||
state.start = state.ptr;
|
||||
|
||||
}
|
||||
|
||||
state_fini(&state);
|
||||
|
@ -901,17 +901,6 @@ _sre_SRE_Pattern_split_impl(PatternObject *self, PyObject *string,
|
|||
void* last;
|
||||
|
||||
assert(self->codesize != 0);
|
||||
if (self->code[0] != SRE_OP_INFO || self->code[3] == 0) {
|
||||
if (self->code[0] == SRE_OP_INFO && self->code[4] == 0) {
|
||||
PyErr_SetString(PyExc_ValueError,
|
||||
"split() requires a non-empty pattern match.");
|
||||
return NULL;
|
||||
}
|
||||
if (PyErr_WarnEx(PyExc_FutureWarning,
|
||||
"split() requires a non-empty pattern match.",
|
||||
1) < 0)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (!state_init(&state, self, string, 0, PY_SSIZE_T_MAX))
|
||||
return NULL;
|
||||
|
@ -942,14 +931,6 @@ _sre_SRE_Pattern_split_impl(PatternObject *self, PyObject *string,
|
|||
goto error;
|
||||
}
|
||||
|
||||
if (state.start == state.ptr) {
|
||||
if (last == state.end || state.ptr == state.end)
|
||||
break;
|
||||
/* skip one character */
|
||||
state.start = (void*) ((char*) state.ptr + state.charsize);
|
||||
continue;
|
||||
}
|
||||
|
||||
/* get segment before this match */
|
||||
item = getslice(state.isbytes, state.beginning,
|
||||
string, STATE_OFFSET(&state, last),
|
||||
|
@ -974,7 +955,7 @@ _sre_SRE_Pattern_split_impl(PatternObject *self, PyObject *string,
|
|||
}
|
||||
|
||||
n = n + 1;
|
||||
|
||||
state.must_advance = 1;
|
||||
last = state.start = state.ptr;
|
||||
|
||||
}
|
||||
|
@ -1101,9 +1082,7 @@ pattern_subx(PatternObject* self, PyObject* ptemplate, PyObject* string,
|
|||
if (status < 0)
|
||||
goto error;
|
||||
|
||||
} else if (i == b && i == e && n > 0)
|
||||
/* ignore empty match on latest position */
|
||||
goto next;
|
||||
}
|
||||
|
||||
if (filter_is_callable) {
|
||||
/* pass match object through filter */
|
||||
|
@ -1130,16 +1109,8 @@ pattern_subx(PatternObject* self, PyObject* ptemplate, PyObject* string,
|
|||
|
||||
i = e;
|
||||
n = n + 1;
|
||||
|
||||
next:
|
||||
/* move on */
|
||||
if (state.ptr == state.end)
|
||||
break;
|
||||
if (state.ptr == state.start)
|
||||
state.start = (void*) ((char*) state.ptr + state.charsize);
|
||||
else
|
||||
state.must_advance = 1;
|
||||
state.start = state.ptr;
|
||||
|
||||
}
|
||||
|
||||
/* get segment following last match */
|
||||
|
@ -2450,7 +2421,7 @@ _sre_SRE_Scanner_match_impl(ScannerObject *self)
|
|||
|
||||
state->ptr = state->start;
|
||||
|
||||
status = sre_match(state, PatternObject_GetCode(self->pattern), 0);
|
||||
status = sre_match(state, PatternObject_GetCode(self->pattern));
|
||||
if (PyErr_Occurred())
|
||||
return NULL;
|
||||
|
||||
|
@ -2459,12 +2430,10 @@ _sre_SRE_Scanner_match_impl(ScannerObject *self)
|
|||
|
||||
if (status == 0)
|
||||
state->start = NULL;
|
||||
else if (state->ptr != state->start)
|
||||
else {
|
||||
state->must_advance = (state->ptr == state->start);
|
||||
state->start = state->ptr;
|
||||
else if (state->ptr != state->end)
|
||||
state->start = (void*) ((char*) state->ptr + state->charsize);
|
||||
else
|
||||
state->start = NULL;
|
||||
}
|
||||
|
||||
return match;
|
||||
}
|
||||
|
@ -2499,12 +2468,10 @@ _sre_SRE_Scanner_search_impl(ScannerObject *self)
|
|||
|
||||
if (status == 0)
|
||||
state->start = NULL;
|
||||
else if (state->ptr != state->start)
|
||||
else {
|
||||
state->must_advance = (state->ptr == state->start);
|
||||
state->start = state->ptr;
|
||||
else if (state->ptr != state->end)
|
||||
state->start = (void*) ((char*) state->ptr + state->charsize);
|
||||
else
|
||||
state->start = NULL;
|
||||
}
|
||||
|
||||
return match;
|
||||
}
|
||||
|
|
|
@ -67,6 +67,7 @@ typedef struct {
|
|||
void* end; /* end of original string */
|
||||
/* attributes for the match object */
|
||||
PyObject* string;
|
||||
Py_buffer buffer;
|
||||
Py_ssize_t pos, endpos;
|
||||
int isbytes;
|
||||
int charsize; /* character size */
|
||||
|
@ -74,11 +75,12 @@ typedef struct {
|
|||
Py_ssize_t lastindex;
|
||||
Py_ssize_t lastmark;
|
||||
void** mark;
|
||||
int match_all;
|
||||
int must_advance;
|
||||
/* dynamically allocated stuff */
|
||||
char* data_stack;
|
||||
size_t data_stack_size;
|
||||
size_t data_stack_base;
|
||||
Py_buffer buffer;
|
||||
/* current repeat context */
|
||||
SRE_REPEAT *repeat;
|
||||
} SRE_STATE;
|
||||
|
|
|
@ -199,7 +199,7 @@ SRE(charset_loc_ignore)(SRE_STATE* state, SRE_CODE* set, SRE_CODE ch)
|
|||
return up != lo && SRE(charset)(state, set, up);
|
||||
}
|
||||
|
||||
LOCAL(Py_ssize_t) SRE(match)(SRE_STATE* state, SRE_CODE* pattern, int match_all);
|
||||
LOCAL(Py_ssize_t) SRE(match)(SRE_STATE* state, SRE_CODE* pattern, int toplevel);
|
||||
|
||||
LOCAL(Py_ssize_t)
|
||||
SRE(count)(SRE_STATE* state, SRE_CODE* pattern, Py_ssize_t maxcount)
|
||||
|
@ -510,12 +510,12 @@ do { \
|
|||
#define JUMP_ASSERT 12
|
||||
#define JUMP_ASSERT_NOT 13
|
||||
|
||||
#define DO_JUMPX(jumpvalue, jumplabel, nextpattern, matchall) \
|
||||
#define DO_JUMPX(jumpvalue, jumplabel, nextpattern, toplevel_) \
|
||||
DATA_ALLOC(SRE(match_context), nextctx); \
|
||||
nextctx->last_ctx_pos = ctx_pos; \
|
||||
nextctx->jump = jumpvalue; \
|
||||
nextctx->pattern = nextpattern; \
|
||||
nextctx->match_all = matchall; \
|
||||
nextctx->toplevel = toplevel_; \
|
||||
ctx_pos = alloc_pos; \
|
||||
ctx = nextctx; \
|
||||
goto entrance; \
|
||||
|
@ -523,7 +523,7 @@ do { \
|
|||
while (0) /* gcc doesn't like labels at end of scopes */ \
|
||||
|
||||
#define DO_JUMP(jumpvalue, jumplabel, nextpattern) \
|
||||
DO_JUMPX(jumpvalue, jumplabel, nextpattern, ctx->match_all)
|
||||
DO_JUMPX(jumpvalue, jumplabel, nextpattern, ctx->toplevel)
|
||||
|
||||
#define DO_JUMP0(jumpvalue, jumplabel, nextpattern) \
|
||||
DO_JUMPX(jumpvalue, jumplabel, nextpattern, 0)
|
||||
|
@ -540,13 +540,13 @@ typedef struct {
|
|||
SRE_CODE chr;
|
||||
SRE_REPEAT* rep;
|
||||
} u;
|
||||
int match_all;
|
||||
int toplevel;
|
||||
} SRE(match_context);
|
||||
|
||||
/* check if string matches the given pattern. returns <0 for
|
||||
error, 0 for failure, and 1 for success */
|
||||
LOCAL(Py_ssize_t)
|
||||
SRE(match)(SRE_STATE* state, SRE_CODE* pattern, int match_all)
|
||||
SRE(match)(SRE_STATE* state, SRE_CODE* pattern, int toplevel)
|
||||
{
|
||||
SRE_CHAR* end = (SRE_CHAR *)state->end;
|
||||
Py_ssize_t alloc_pos, ctx_pos = -1;
|
||||
|
@ -563,7 +563,7 @@ SRE(match)(SRE_STATE* state, SRE_CODE* pattern, int match_all)
|
|||
ctx->last_ctx_pos = -1;
|
||||
ctx->jump = JUMP_NONE;
|
||||
ctx->pattern = pattern;
|
||||
ctx->match_all = match_all;
|
||||
ctx->toplevel = toplevel;
|
||||
ctx_pos = alloc_pos;
|
||||
|
||||
entrance:
|
||||
|
@ -636,11 +636,14 @@ entrance:
|
|||
case SRE_OP_SUCCESS:
|
||||
/* end of pattern */
|
||||
TRACE(("|%p|%p|SUCCESS\n", ctx->pattern, ctx->ptr));
|
||||
if (!ctx->match_all || ctx->ptr == state->end) {
|
||||
if (ctx->toplevel &&
|
||||
((state->match_all && ctx->ptr != state->end) ||
|
||||
(state->must_advance && ctx->ptr == state->start)))
|
||||
{
|
||||
RETURN_FAILURE;
|
||||
}
|
||||
state->ptr = ctx->ptr;
|
||||
RETURN_SUCCESS;
|
||||
}
|
||||
RETURN_FAILURE;
|
||||
|
||||
case SRE_OP_AT:
|
||||
/* match at given position */
|
||||
|
@ -856,7 +859,9 @@ entrance:
|
|||
RETURN_FAILURE;
|
||||
|
||||
if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS &&
|
||||
ctx->ptr == state->end) {
|
||||
ctx->ptr == state->end &&
|
||||
!(ctx->toplevel && state->must_advance && ctx->ptr == state->start))
|
||||
{
|
||||
/* tail is empty. we're finished */
|
||||
state->ptr = ctx->ptr;
|
||||
RETURN_SUCCESS;
|
||||
|
@ -941,7 +946,10 @@ entrance:
|
|||
}
|
||||
|
||||
if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS &&
|
||||
(!match_all || ctx->ptr == state->end)) {
|
||||
!(ctx->toplevel &&
|
||||
((state->match_all && ctx->ptr != state->end) ||
|
||||
(state->must_advance && ctx->ptr == state->start))))
|
||||
{
|
||||
/* tail is empty. we're finished */
|
||||
state->ptr = ctx->ptr;
|
||||
RETURN_SUCCESS;
|
||||
|
@ -1417,6 +1425,7 @@ SRE(search)(SRE_STATE* state, SRE_CODE* pattern)
|
|||
return 0; /* literal can't match: doesn't fit in char width */
|
||||
#endif
|
||||
end = (SRE_CHAR *)state->end;
|
||||
state->must_advance = 0;
|
||||
while (ptr < end) {
|
||||
while (*ptr != c) {
|
||||
if (++ptr >= end)
|
||||
|
@ -1458,6 +1467,7 @@ SRE(search)(SRE_STATE* state, SRE_CODE* pattern)
|
|||
return 0;
|
||||
|
||||
i = 1;
|
||||
state->must_advance = 0;
|
||||
do {
|
||||
if (*ptr == (SRE_CHAR) prefix[i]) {
|
||||
if (++i != prefix_len) {
|
||||
|
@ -1487,6 +1497,7 @@ SRE(search)(SRE_STATE* state, SRE_CODE* pattern)
|
|||
if (charset) {
|
||||
/* pattern starts with a character from a known set */
|
||||
end = (SRE_CHAR *)state->end;
|
||||
state->must_advance = 0;
|
||||
for (;;) {
|
||||
while (ptr < end && !SRE(charset)(state, charset, *ptr))
|
||||
ptr++;
|
||||
|
@ -1503,13 +1514,15 @@ SRE(search)(SRE_STATE* state, SRE_CODE* pattern)
|
|||
} else {
|
||||
/* general case */
|
||||
assert(ptr <= end);
|
||||
while (1) {
|
||||
TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
|
||||
state->start = state->ptr = ptr;
|
||||
status = SRE(match)(state, pattern, 1);
|
||||
state->must_advance = 0;
|
||||
while (status == 0 && ptr < end) {
|
||||
ptr++;
|
||||
TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
|
||||
state->start = state->ptr = ptr;
|
||||
status = SRE(match)(state, pattern, 0);
|
||||
if (status != 0 || ptr >= end)
|
||||
break;
|
||||
ptr++;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue