Implement rich comparison for _sre.SRE_Pattern

Issue #28727: Regular expression patterns, _sre.SRE_Pattern objects created by
re.compile(), become comparable (only x==y and x!=y operators). This change
should fix the issue #18383: don't duplicate warning filters when the warnings
module is reloaded (thing usually only done in unit tests).
This commit is contained in:
Victor Stinner 2016-11-21 16:35:08 +01:00
parent a2f7ee8b26
commit b44fb128ae
3 changed files with 118 additions and 9 deletions

View File

@ -3,12 +3,13 @@ from test.support import verbose, run_unittest, gc_collect, bigmemtest, _2G, \
import io
import locale
import re
from re import Scanner
import sre_compile
import sys
import string
import sys
import traceback
import unittest
import warnings
from re import Scanner
from weakref import proxy
# Misc tests from Tim Peters' re.doc
@ -1777,6 +1778,48 @@ SUBPATTERN None 0 0
self.assertIn('ASCII', str(re.A))
self.assertIn('DOTALL', str(re.S))
def test_pattern_compare(self):
pattern1 = re.compile('abc', re.IGNORECASE)
# equal
re.purge()
pattern2 = re.compile('abc', re.IGNORECASE)
self.assertEqual(hash(pattern2), hash(pattern1))
self.assertEqual(pattern2, pattern1)
# not equal: different pattern
re.purge()
pattern3 = re.compile('XYZ', re.IGNORECASE)
# Don't test hash(pattern3) != hash(pattern1) because there is no
# warranty that hash values are different
self.assertNotEqual(pattern3, pattern1)
# not equal: different flag (flags=0)
re.purge()
pattern4 = re.compile('abc')
self.assertNotEqual(pattern4, pattern1)
# only == and != comparison operators are supported
with self.assertRaises(TypeError):
pattern1 < pattern2
def test_pattern_compare_bytes(self):
pattern1 = re.compile(b'abc')
# equal: test bytes patterns
re.purge()
pattern2 = re.compile(b'abc')
self.assertEqual(hash(pattern2), hash(pattern1))
self.assertEqual(pattern2, pattern1)
# not equal: pattern of a different types (str vs bytes),
# comparison must not raise a BytesWarning
re.purge()
pattern3 = re.compile('abc')
with warnings.catch_warnings():
warnings.simplefilter('error', BytesWarning)
self.assertNotEqual(pattern3, pattern1)
class PatternReprTests(unittest.TestCase):
def check(self, pattern, expected):

View File

@ -42,6 +42,11 @@ Core and Builtins
Library
-------
- Issue #28727: Regular expression patterns, _sre.SRE_Pattern objects created
by re.compile(), become comparable (only x==y and x!=y operators). This
change should fix the issue #18383: don't duplicate warning filters when the
warnings module is reloaded (thing usually only done in unit tests).
- Issue #20572: The subprocess.Popen.wait method's undocumented
endtime parameter now raises a DeprecationWarning.
@ -77,7 +82,7 @@ Library
- Issue #28703: Fix asyncio.iscoroutinefunction to handle Mock objects.
- Issue #28704: Fix create_unix_server to support Path-like objects
- Issue #28704: Fix create_unix_server to support Path-like objects
(PEP 519).
- Issue #28720: Add collections.abc.AsyncGenerator.

View File

@ -1506,14 +1506,12 @@ _sre_compile_impl(PyObject *module, PyObject *pattern, int flags,
self->groups = groups;
Py_XINCREF(groupindex);
Py_INCREF(groupindex);
self->groupindex = groupindex;
Py_XINCREF(indexgroup);
Py_INCREF(indexgroup);
self->indexgroup = indexgroup;
self->weakreflist = NULL;
if (!_validate(self)) {
Py_DECREF(self);
return NULL;
@ -2649,6 +2647,69 @@ pattern_scanner(PatternObject *self, PyObject *string, Py_ssize_t pos, Py_ssize_
return (PyObject*) scanner;
}
static Py_hash_t
pattern_hash(PatternObject *self)
{
Py_hash_t hash, hash2;
hash = PyObject_Hash(self->pattern);
if (hash == -1) {
return -1;
}
hash2 = _Py_HashBytes(self->code, sizeof(self->code[0]) * self->codesize);
hash ^= hash2;
hash ^= self->flags;
hash ^= self->isbytes;
hash ^= self->codesize;
if (hash == -1) {
hash = -2;
}
return hash;
}
static PyObject*
pattern_richcompare(PyObject *lefto, PyObject *righto, int op)
{
PatternObject *left, *right;
int cmp;
if (op != Py_EQ && op != Py_NE) {
Py_RETURN_NOTIMPLEMENTED;
}
if (Py_TYPE(lefto) != &Pattern_Type || Py_TYPE(righto) != &Pattern_Type) {
Py_RETURN_NOTIMPLEMENTED;
}
left = (PatternObject *)lefto;
right = (PatternObject *)righto;
cmp = (left->flags == right->flags
&& left->isbytes == right->isbytes
&& left->codesize && right->codesize);
if (cmp) {
/* Compare the code and the pattern because the same pattern can
produce different codes depending on the locale used to compile the
pattern when the re.LOCALE flag is used. Don't compare groups,
indexgroup nor groupindex: they are derivated from the pattern. */
cmp = (memcmp(left->code, right->code,
sizeof(left->code[0]) * left->codesize) == 0);
}
if (cmp) {
cmp = PyObject_RichCompareBool(left->pattern, right->pattern,
Py_EQ);
if (cmp < 0) {
return NULL;
}
}
if (op == Py_NE) {
cmp = !cmp;
}
return PyBool_FromLong(cmp);
}
#include "clinic/_sre.c.h"
static PyMethodDef pattern_methods[] = {
@ -2693,7 +2754,7 @@ static PyTypeObject Pattern_Type = {
0, /* tp_as_number */
0, /* tp_as_sequence */
0, /* tp_as_mapping */
0, /* tp_hash */
(hashfunc)pattern_hash, /* tp_hash */
0, /* tp_call */
0, /* tp_str */
0, /* tp_getattro */
@ -2703,7 +2764,7 @@ static PyTypeObject Pattern_Type = {
pattern_doc, /* tp_doc */
0, /* tp_traverse */
0, /* tp_clear */
0, /* tp_richcompare */
pattern_richcompare, /* tp_richcompare */
offsetof(PatternObject, weakreflist), /* tp_weaklistoffset */
0, /* tp_iter */
0, /* tp_iternext */