fixed character set description in docstring (SRE uses Python

strings, not C strings)

removed USE_PYTHON defines, and related sre.py helpers

skip calling the subx helper if the template is callable.
interestingly enough, this means that

	def callback(m):
	    return literal
	result = pattern.sub(callback, string)

is much faster than

	result = pattern.sub(literal, string)
This commit is contained in:
Fredrik Lundh 2001-10-21 21:48:30 +00:00
parent 0402dd18cb
commit dac58492aa
2 changed files with 55 additions and 175 deletions

View File

@ -17,15 +17,13 @@
r"""Support for regular expressions (RE). r"""Support for regular expressions (RE).
This module provides regular expression matching operations similar to This module provides regular expression matching operations similar to
those found in Perl. It's 8-bit clean: the strings being processed may those found in Perl. It supports both 8-bit and Unicode strings; both
contain both null bytes and characters whose high bit is set. Regular the pattern and the strings being processed can contain null bytes and
expression pattern strings may not contain null bytes, but can specify characters outside the US ASCII range.
the null byte using the \\number notation. Characters with the high
bit set may be included.
Regular expressions can contain both special and ordinary Regular expressions can contain both special and ordinary characters.
characters. Most ordinary characters, like "A", "a", or "0", are the Most ordinary characters, like "A", "a", or "0", are the simplest
simplest regular expressions; they simply match themselves. You can regular expressions; they simply match themselves. You can
concatenate ordinary characters, so last matches the string 'last'. concatenate ordinary characters, so last matches the string 'last'.
The special characters are: The special characters are:
@ -45,7 +43,7 @@ The special characters are:
"|" A|B, creates an RE that will match either A or B. "|" A|B, creates an RE that will match either A or B.
(...) Matches the RE inside the parentheses. (...) Matches the RE inside the parentheses.
The contents can be retrieved or matched later in the string. The contents can be retrieved or matched later in the string.
(?iLmsx) Set the I, L, M, S, or X flag for the RE (see below). (?iLmsux) Set the I, L, M, S, U, or X flag for the RE (see below).
(?:...) Non-grouping version of regular parentheses. (?:...) Non-grouping version of regular parentheses.
(?P<name>...) The substring matched by the group is accessible by name. (?P<name>...) The substring matched by the group is accessible by name.
(?P=name) Matches the text matched earlier by the group named name. (?P=name) Matches the text matched earlier by the group named name.
@ -246,77 +244,14 @@ def _expand(pattern, match, template):
def _subx(pattern, template): def _subx(pattern, template):
# internal: pattern.sub/subn implementation helper # internal: pattern.sub/subn implementation helper
if callable(template):
filter = template
else:
template = _compile_repl(template, pattern) template = _compile_repl(template, pattern)
if not template[0] and len(template[1]) == 1: if not template[0] and len(template[1]) == 1:
# literal replacement # literal replacement
filter = template[1][0] return template[1][0]
else:
def filter(match, template=template): def filter(match, template=template):
return sre_parse.expand_template(template, match) return sre_parse.expand_template(template, match)
return filter return filter
def _sub(pattern, template, text, count=0):
# internal: pattern.sub implementation hook
# FIXME: not used in SRE 2.2.1 and later; will be removed soon
return _subn(pattern, template, text, count)[0]
def _subn(pattern, template, text, count=0):
# internal: pattern.subn implementation hook
# FIXME: not used in SRE 2.2.1 and later; will be removed soon
filter = _subx(pattern, template)
if not callable(filter):
# literal replacement
def filter(match, literal=filter):
return literal
n = i = 0
s = []
append = s.append
c = pattern.scanner(text)
while not count or n < count:
m = c.search()
if not m:
break
b, e = m.span()
if i < b:
append(text[i:b])
elif i == b == e and n:
append(text[i:b])
continue # ignore empty match at previous position
append(filter(m))
i = e
n = n + 1
append(text[i:])
return _join(s, text[:0]), n
def _split(pattern, text, maxsplit=0):
# internal: pattern.split implementation hook
# FIXME: not used in SRE 2.2.1 and later; will be removed soon
n = i = 0
s = []
append = s.append
extend = s.extend
c = pattern.scanner(text)
g = pattern.groups
while not maxsplit or n < maxsplit:
m = c.search()
if not m:
break
b, e = m.span()
if b == e:
if i >= len(text):
break
continue
append(text[i:b])
if g and b != e:
extend(list(m.groups()))
i = e
n = n + 1
append(text[i:])
return s
# register myself for pickling # register myself for pickling
import copy_reg import copy_reg

View File

@ -76,10 +76,6 @@ static char copyright[] =
/* -------------------------------------------------------------------- */ /* -------------------------------------------------------------------- */
/* optional features */ /* optional features */
/* test: define to use sre.py helpers instead of C code */
#undef USE_PYTHON_SPLIT
#undef USE_PYTHON_SUB
/* prevent run-away recursion (bad patterns on long strings) */ /* prevent run-away recursion (bad patterns on long strings) */
#if !defined(USE_STACKCHECK) #if !defined(USE_STACKCHECK)
@ -1251,6 +1247,8 @@ SRE_SEARCH(SRE_STATE* state, SRE_CODE* pattern)
TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr)); TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
state->start = ptr; state->start = ptr;
state->ptr = ++ptr; state->ptr = ++ptr;
if (flags & SRE_INFO_LITERAL)
return 1; /* we got all of it */
status = SRE_MATCH(state, pattern + 2, 1); status = SRE_MATCH(state, pattern + 2, 1);
if (status != 0) if (status != 0)
break; break;
@ -1820,66 +1818,6 @@ join(PyObject* list, PyObject* pattern)
return result; return result;
} }
#ifdef USE_PYTHON_SUB
static PyObject*
pattern_sub(PatternObject* self, PyObject* args, PyObject* kw)
{
PyObject* template;
PyObject* string;
PyObject* count = Py_False; /* zero */
static char* kwlist[] = { "repl", "string", "count", NULL };
if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|O:sub", kwlist,
&template, &string, &count))
return NULL;
/* delegate to Python code */
return call(
SRE_MODULE, "_sub",
Py_BuildValue("OOOO", self, template, string, count)
);
}
#endif
#ifdef USE_PYTHON_SUB
static PyObject*
pattern_subn(PatternObject* self, PyObject* args, PyObject* kw)
{
PyObject* template;
PyObject* string;
PyObject* count = Py_False; /* zero */
static char* kwlist[] = { "repl", "string", "count", NULL };
if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|O:subn", kwlist,
&template, &string, &count))
return NULL;
/* delegate to Python code */
return call(
SRE_MODULE, "_subn",
Py_BuildValue("OOOO", self, template, string, count)
);
}
#endif
#if defined(USE_PYTHON_SPLIT)
static PyObject*
pattern_split(PatternObject* self, PyObject* args, PyObject* kw)
{
PyObject* string;
PyObject* maxsplit = Py_False; /* zero */
static char* kwlist[] = { "source", "maxsplit", NULL };
if (!PyArg_ParseTupleAndKeywords(args, kw, "O|O:split", kwlist,
&string, &maxsplit))
return NULL;
/* delegate to Python code */
return call(
SRE_MODULE, "_split",
Py_BuildValue("OOO", self, string, maxsplit)
);
}
#endif
static PyObject* static PyObject*
pattern_findall(PatternObject* self, PyObject* args, PyObject* kw) pattern_findall(PatternObject* self, PyObject* args, PyObject* kw)
{ {
@ -1980,7 +1918,6 @@ error:
} }
#if !defined(USE_PYTHON_SPLIT)
static PyObject* static PyObject*
pattern_split(PatternObject* self, PyObject* args, PyObject* kw) pattern_split(PatternObject* self, PyObject* args, PyObject* kw)
{ {
@ -2071,15 +2008,16 @@ pattern_split(PatternObject* self, PyObject* args, PyObject* kw)
} }
/* get segment following last match */ /* get segment following last match */
item = PySequence_GetSlice( i = STATE_OFFSET(&state, last);
string, STATE_OFFSET(&state, last), state.endpos if (i < state.endpos) {
); item = PySequence_GetSlice(string, i, state.endpos);
if (!item) if (!item)
goto error; goto error;
status = PyList_Append(list, item); status = PyList_Append(list, item);
Py_DECREF(item); Py_DECREF(item);
if (status < 0) if (status < 0)
goto error; goto error;
}
state_fini(&state); state_fini(&state);
return list; return list;
@ -2090,9 +2028,7 @@ error:
return NULL; return NULL;
} }
#endif
#if !defined(USE_PYTHON_SUB)
static PyObject* static PyObject*
pattern_subx(PatternObject* self, PyObject* template, PyObject* string, pattern_subx(PatternObject* self, PyObject* template, PyObject* string,
int count, int subn) int count, int subn)
@ -2108,15 +2044,22 @@ pattern_subx(PatternObject* self, PyObject* template, PyObject* string,
int i, b, e; int i, b, e;
int filter_is_callable; int filter_is_callable;
/* call subx helper to get the filter */ if (PyCallable_Check(template)) {
/* sub/subn takes either a function or a template */
filter = template;
Py_INCREF(filter);
filter_is_callable = 1;
} else {
/* if not callable, call the template compiler. it may return
either a filter function or a literal string */
filter = call( filter = call(
SRE_MODULE, "_subx", SRE_MODULE, "_subx",
Py_BuildValue("OO", self, template) Py_BuildValue("OO", self, template)
); );
if (!filter) if (!filter)
return NULL; return NULL;
filter_is_callable = PyCallable_Check(filter); filter_is_callable = PyCallable_Check(filter);
}
string = state_init(&state, self, string, 0, INT_MAX); string = state_init(&state, self, string, 0, INT_MAX);
if (!string) if (!string)
@ -2169,7 +2112,7 @@ pattern_subx(PatternObject* self, PyObject* template, PyObject* string,
goto next; goto next;
if (filter_is_callable) { if (filter_is_callable) {
/* filter match */ /* pass match object through filter */
match = pattern_new_match(self, &state, 1); match = pattern_new_match(self, &state, 1);
if (!match) if (!match)
goto error; goto error;
@ -2186,7 +2129,7 @@ pattern_subx(PatternObject* self, PyObject* template, PyObject* string,
} else { } else {
/* filter is literal string */ /* filter is literal string */
item = filter; item = filter;
Py_INCREF(filter); Py_INCREF(item);
} }
/* add to list */ /* add to list */
@ -2208,6 +2151,7 @@ next:
} }
/* get segment following last match */ /* get segment following last match */
if (i < state.endpos) {
item = PySequence_GetSlice(string, i, state.endpos); item = PySequence_GetSlice(string, i, state.endpos);
if (!item) if (!item)
goto error; goto error;
@ -2215,11 +2159,13 @@ next:
Py_DECREF(item); Py_DECREF(item);
if (status < 0) if (status < 0)
goto error; goto error;
}
state_fini(&state); state_fini(&state);
/* convert list to single string */ /* convert list to single string (also removes list) */
item = join(list, self->pattern); item = join(list, self->pattern);
if (!item) if (!item)
return NULL; return NULL;
@ -2262,7 +2208,6 @@ pattern_subn(PatternObject* self, PyObject* args, PyObject* kw)
return pattern_subx(self, template, string, count, 1); return pattern_subx(self, template, string, count, 1);
} }
#endif
static PyObject* static PyObject*
pattern_copy(PatternObject* self, PyObject* args) pattern_copy(PatternObject* self, PyObject* args)