Issue #18685: Restore re performance to pre-PEP 393 levels.

This commit is contained in:
Serhiy Storchaka 2013-10-26 10:45:48 +03:00
parent ffb58e96f1
commit 9eabac68a3
4 changed files with 349 additions and 383 deletions

View File

@ -77,6 +77,8 @@ class ReTests(unittest.TestCase):
self.assertTypedEqual(re.sub(b'y', B(b'a'), B(b'xyz')), b'xaz') self.assertTypedEqual(re.sub(b'y', B(b'a'), B(b'xyz')), b'xaz')
self.assertTypedEqual(re.sub(b'y', bytearray(b'a'), bytearray(b'xyz')), b'xaz') self.assertTypedEqual(re.sub(b'y', bytearray(b'a'), bytearray(b'xyz')), b'xaz')
self.assertTypedEqual(re.sub(b'y', memoryview(b'a'), memoryview(b'xyz')), b'xaz') self.assertTypedEqual(re.sub(b'y', memoryview(b'a'), memoryview(b'xyz')), b'xaz')
for y in ("\xe0", "\u0430", "\U0001d49c"):
self.assertEqual(re.sub(y, 'a', 'x%sz' % y), 'xaz')
self.assertEqual(re.sub("(?i)b+", "x", "bbbb BBBB"), 'x x') self.assertEqual(re.sub("(?i)b+", "x", "bbbb BBBB"), 'x x')
self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y'), self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y'),
@ -250,6 +252,13 @@ class ReTests(unittest.TestCase):
[b'', b'a', b'b', b'c']) [b'', b'a', b'b', b'c'])
self.assertTypedEqual(re.split(b"(:*)", string), self.assertTypedEqual(re.split(b"(:*)", string),
[b'', b':', b'a', b':', b'b', b'::', b'c']) [b'', b':', b'a', b':', b'b', b'::', b'c'])
for a, b, c in ("\xe0\xdf\xe7", "\u0430\u0431\u0432",
"\U0001d49c\U0001d49e\U0001d4b5"):
string = ":%s:%s::%s" % (a, b, c)
self.assertEqual(re.split(":", string), ['', a, b, '', c])
self.assertEqual(re.split(":*", string), ['', a, b, c])
self.assertEqual(re.split("(:*)", string),
['', ':', a, ':', b, '::', c])
self.assertEqual(re.split("(?::*)", ":a:b::c"), ['', 'a', 'b', 'c']) self.assertEqual(re.split("(?::*)", ":a:b::c"), ['', 'a', 'b', 'c'])
self.assertEqual(re.split("(:)*", ":a:b::c"), self.assertEqual(re.split("(:)*", ":a:b::c"),
@ -287,6 +296,14 @@ class ReTests(unittest.TestCase):
[b":", b"::", b":::"]) [b":", b"::", b":::"])
self.assertTypedEqual(re.findall(b"(:)(:*)", string), self.assertTypedEqual(re.findall(b"(:)(:*)", string),
[(b":", b""), (b":", b":"), (b":", b"::")]) [(b":", b""), (b":", b":"), (b":", b"::")])
for x in ("\xe0", "\u0430", "\U0001d49c"):
xx = x * 2
xxx = x * 3
string = "a%sb%sc%sd" % (x, xx, xxx)
self.assertEqual(re.findall("%s+" % x, string), [x, xx, xxx])
self.assertEqual(re.findall("(%s+)" % x, string), [x, xx, xxx])
self.assertEqual(re.findall("(%s)(%s*)" % (x, x), string),
[(x, ""), (x, x), (x, xx)])
def test_bug_117612(self): def test_bug_117612(self):
self.assertEqual(re.findall(r"(a|(b))", "aba"), self.assertEqual(re.findall(r"(a|(b))", "aba"),
@ -305,6 +322,12 @@ class ReTests(unittest.TestCase):
self.assertEqual(re.match(b'(a)', string).group(0), b'a') self.assertEqual(re.match(b'(a)', string).group(0), b'a')
self.assertEqual(re.match(b'(a)', string).group(1), b'a') self.assertEqual(re.match(b'(a)', string).group(1), b'a')
self.assertEqual(re.match(b'(a)', string).group(1, 1), (b'a', b'a')) self.assertEqual(re.match(b'(a)', string).group(1, 1), (b'a', b'a'))
for a in ("\xe0", "\u0430", "\U0001d49c"):
self.assertEqual(re.match(a, a).groups(), ())
self.assertEqual(re.match('(%s)' % a, a).groups(), (a,))
self.assertEqual(re.match('(%s)' % a, a).group(0), a)
self.assertEqual(re.match('(%s)' % a, a).group(1), a)
self.assertEqual(re.match('(%s)' % a, a).group(1, 1), (a, a))
pat = re.compile('((a)|(b))(c)?') pat = re.compile('((a)|(b))(c)?')
self.assertEqual(pat.match('a').groups(), ('a', 'a', None, None)) self.assertEqual(pat.match('a').groups(), ('a', 'a', None, None))

View File

@ -21,6 +21,8 @@ Core and Builtins
Library Library
------- -------
- Issue #18685: Restore re performance to pre-PEP 393 levels.
- Issue #19339: telnetlib module is now using time.monotonic() when available - Issue #19339: telnetlib module is now using time.monotonic() when available
to compute timeout. to compute timeout.

File diff suppressed because it is too large Load Diff

View File

@ -31,9 +31,7 @@ typedef struct {
PyObject* pattern; /* pattern source (or None) */ PyObject* pattern; /* pattern source (or None) */
int flags; /* flags used when compiling pattern source */ int flags; /* flags used when compiling pattern source */
PyObject *weakreflist; /* List of weak references */ PyObject *weakreflist; /* List of weak references */
int logical_charsize; /* pattern charsize (or -1) */ int isbytes; /* pattern type (1 - bytes, 0 - string, -1 - None) */
int charsize;
Py_buffer view;
/* pattern code */ /* pattern code */
Py_ssize_t codesize; Py_ssize_t codesize;
SRE_CODE code[1]; SRE_CODE code[1];
@ -73,9 +71,8 @@ typedef struct {
/* attributes for the match object */ /* attributes for the match object */
PyObject* string; PyObject* string;
Py_ssize_t pos, endpos; Py_ssize_t pos, endpos;
/* character size */ int isbytes;
int logical_charsize; /* kind of thing: 1 - bytes, 2/4 - unicode */ int charsize; /* character size */
int charsize;
/* registers */ /* registers */
Py_ssize_t lastindex; Py_ssize_t lastindex;
Py_ssize_t lastmark; Py_ssize_t lastmark;