an SRE bugfix a day keeps Guido away...

#462270: sub-tle difference between pre.sub and sre.sub.  PRE ignored
an empty match at the previous location, SRE didn't.

also synced with Secret Labs "sreopen" codebase.
This commit is contained in:
Fredrik Lundh 2001-09-18 18:47:09 +00:00
parent 18d8d5a708
commit 21009b9c6f
3 changed files with 25 additions and 13 deletions

View File

@ -45,7 +45,7 @@ The special characters are:
"|" A|B, creates an RE that will match either A or B. "|" A|B, creates an RE that will match either A or B.
(...) Matches the RE inside the parentheses. (...) Matches the RE inside the parentheses.
The contents can be retrieved or matched later in the string. The contents can be retrieved or matched later in the string.
(?iLmsx) Set the I, L, M, S, or X flag for the RE. (?iLmsx) Set the I, L, M, S, or X flag for the RE (see below).
(?:...) Non-grouping version of regular parentheses. (?:...) Non-grouping version of regular parentheses.
(?P<name>...) The substring matched by the group is accessible by name. (?P<name>...) The substring matched by the group is accessible by name.
(?P=name) Matches the text matched earlier by the group named name. (?P=name) Matches the text matched earlier by the group named name.
@ -80,7 +80,6 @@ This module exports the following functions:
findall Find all occurrences of a pattern in a string. findall Find all occurrences of a pattern in a string.
compile Compile a pattern into a RegexObject. compile Compile a pattern into a RegexObject.
purge Clear the regular expression cache. purge Clear the regular expression cache.
template Compile a template pattern, returning a pattern object.
escape Backslash all non-alphanumerics in a string. escape Backslash all non-alphanumerics in a string.
Some of the functions in this module takes flags as optional parameters: Some of the functions in this module takes flags as optional parameters:
@ -90,11 +89,12 @@ Some of the functions in this module takes flags as optional parameters:
"$" matches the end of lines as well as the string. "$" matches the end of lines as well as the string.
S DOTALL "." matches any character at all, including the newline. S DOTALL "." matches any character at all, including the newline.
X VERBOSE Ignore whitespace and comments for nicer looking RE's. X VERBOSE Ignore whitespace and comments for nicer looking RE's.
U UNICODE Use unicode locale. U UNICODE Make \w, \W, \b, \B, dependent on the Unicode locale.
This module also defines an exception 'error'. This module also defines an exception 'error'.
""" """
import sre_compile import sre_compile
import sre_parse import sre_parse
@ -104,7 +104,7 @@ __all__ = [ "match", "search", "sub", "subn", "split", "findall",
"U", "IGNORECASE", "LOCALE", "MULTILINE", "DOTALL", "VERBOSE", "U", "IGNORECASE", "LOCALE", "MULTILINE", "DOTALL", "VERBOSE",
"UNICODE", "error" ] "UNICODE", "error" ]
__version__ = "2.1b2" __version__ = "2.1.1"
# this module works under 1.5.2 and later. don't use string methods # this module works under 1.5.2 and later. don't use string methods
import string import string
@ -269,6 +269,9 @@ def _subn(pattern, template, text, count=0, sub=0):
b, e = m.span() b, e = m.span()
if i < b: if i < b:
append(text[i:b]) append(text[i:b])
elif i == b == e and n:
append(text[i:b])
continue # ignore empty match at previous position
append(filter(m)) append(filter(m))
i = e i = e
n = n + 1 n = n + 1

View File

@ -123,6 +123,10 @@ test(r"""sre.sub('\r\n', r'\n', 'abc\r\ndef\r\n')""", 'abc\ndef\n')
test(r"""sre.sub(r'\r\n', '\n', 'abc\r\ndef\r\n')""", 'abc\ndef\n') test(r"""sre.sub(r'\r\n', '\n', 'abc\r\ndef\r\n')""", 'abc\ndef\n')
test(r"""sre.sub('\r\n', '\n', 'abc\r\ndef\r\n')""", 'abc\ndef\n') test(r"""sre.sub('\r\n', '\n', 'abc\r\ndef\r\n')""", 'abc\ndef\n')
# Test for empty sub() behaviour, see SF bug #462270
test(r"""sre.sub('x*', '-', 'abxd')""", '-a-b-d-')
test(r"""sre.sub('x+', '-', 'abxd')""", 'ab-d')
if verbose: if verbose:
print 'Running tests on symbolic references' print 'Running tests on symbolic references'

View File

@ -31,6 +31,7 @@
* 2001-04-28 fl added __copy__ methods (work in progress) * 2001-04-28 fl added __copy__ methods (work in progress)
* 2001-05-14 fl fixes for 1.5.2 * 2001-05-14 fl fixes for 1.5.2
* 2001-07-01 fl added BIGCHARSET support (from Martin von Loewis) * 2001-07-01 fl added BIGCHARSET support (from Martin von Loewis)
* 2001-09-18 fl
* *
* Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved. * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
* *
@ -133,6 +134,8 @@ static char copyright[] =
#define SRE_ALNUM_MASK 8 #define SRE_ALNUM_MASK 8
#define SRE_WORD_MASK 16 #define SRE_WORD_MASK 16
/* FIXME: this assumes ASCII. create tables in init_sre() instead */
static char sre_char_info[128] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 6, 2, static char sre_char_info[128] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 6, 2,
2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25,
@ -1141,6 +1144,7 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern, int level)
} }
/* can't end up here */ /* can't end up here */
/* return SRE_ERROR_ILLEGAL; -- see python-dev discussion */
} }
LOCAL(int) LOCAL(int)
@ -2624,16 +2628,17 @@ init_sre(void)
m = Py_InitModule("_" SRE_MODULE, _functions); m = Py_InitModule("_" SRE_MODULE, _functions);
d = PyModule_GetDict(m); d = PyModule_GetDict(m);
PyDict_SetItemString( x = PyInt_FromLong(SRE_MAGIC);
d, "MAGIC", (x = (PyObject*) PyInt_FromLong(SRE_MAGIC)) if (x) {
); PyDict_SetItemString(d, "MAGIC", x);
Py_XDECREF(x); Py_DECREF(x);
}
PyDict_SetItemString(
d, "copyright", (x = (PyObject*)PyString_FromString(copyright))
);
Py_XDECREF(x);
x = PyString_FromString(copyright);
if (x) {
PyDict_SetItemString(d, "copyright", x);
Py_DECREF(x);
}
} }
#endif /* !defined(SRE_RECURSIVE) */ #endif /* !defined(SRE_RECURSIVE) */