Implement rich comparison for _sre.SRE_Pattern
Issue #28727: Regular expression patterns, _sre.SRE_Pattern objects created by re.compile(), become comparable (only x==y and x!=y operators). This change should fix the issue #18383: don't duplicate warning filters when the warnings module is reloaded (thing usually only done in unit tests).
This commit is contained in:
parent
a2f7ee8b26
commit
b44fb128ae
|
@ -3,12 +3,13 @@ from test.support import verbose, run_unittest, gc_collect, bigmemtest, _2G, \
|
|||
import io
|
||||
import locale
|
||||
import re
|
||||
from re import Scanner
|
||||
import sre_compile
|
||||
import sys
|
||||
import string
|
||||
import sys
|
||||
import traceback
|
||||
import unittest
|
||||
import warnings
|
||||
from re import Scanner
|
||||
from weakref import proxy
|
||||
|
||||
# Misc tests from Tim Peters' re.doc
|
||||
|
@ -1777,6 +1778,48 @@ SUBPATTERN None 0 0
|
|||
self.assertIn('ASCII', str(re.A))
|
||||
self.assertIn('DOTALL', str(re.S))
|
||||
|
||||
def test_pattern_compare(self):
|
||||
pattern1 = re.compile('abc', re.IGNORECASE)
|
||||
|
||||
# equal
|
||||
re.purge()
|
||||
pattern2 = re.compile('abc', re.IGNORECASE)
|
||||
self.assertEqual(hash(pattern2), hash(pattern1))
|
||||
self.assertEqual(pattern2, pattern1)
|
||||
|
||||
# not equal: different pattern
|
||||
re.purge()
|
||||
pattern3 = re.compile('XYZ', re.IGNORECASE)
|
||||
# Don't test hash(pattern3) != hash(pattern1) because there is no
|
||||
# warranty that hash values are different
|
||||
self.assertNotEqual(pattern3, pattern1)
|
||||
|
||||
# not equal: different flag (flags=0)
|
||||
re.purge()
|
||||
pattern4 = re.compile('abc')
|
||||
self.assertNotEqual(pattern4, pattern1)
|
||||
|
||||
# only == and != comparison operators are supported
|
||||
with self.assertRaises(TypeError):
|
||||
pattern1 < pattern2
|
||||
|
||||
def test_pattern_compare_bytes(self):
|
||||
pattern1 = re.compile(b'abc')
|
||||
|
||||
# equal: test bytes patterns
|
||||
re.purge()
|
||||
pattern2 = re.compile(b'abc')
|
||||
self.assertEqual(hash(pattern2), hash(pattern1))
|
||||
self.assertEqual(pattern2, pattern1)
|
||||
|
||||
# not equal: pattern of a different types (str vs bytes),
|
||||
# comparison must not raise a BytesWarning
|
||||
re.purge()
|
||||
pattern3 = re.compile('abc')
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter('error', BytesWarning)
|
||||
self.assertNotEqual(pattern3, pattern1)
|
||||
|
||||
|
||||
class PatternReprTests(unittest.TestCase):
|
||||
def check(self, pattern, expected):
|
||||
|
|
|
@ -42,6 +42,11 @@ Core and Builtins
|
|||
Library
|
||||
-------
|
||||
|
||||
- Issue #28727: Regular expression patterns, _sre.SRE_Pattern objects created
|
||||
by re.compile(), become comparable (only x==y and x!=y operators). This
|
||||
change should fix the issue #18383: don't duplicate warning filters when the
|
||||
warnings module is reloaded (thing usually only done in unit tests).
|
||||
|
||||
- Issue #20572: The subprocess.Popen.wait method's undocumented
|
||||
endtime parameter now raises a DeprecationWarning.
|
||||
|
||||
|
|
|
@ -1506,14 +1506,12 @@ _sre_compile_impl(PyObject *module, PyObject *pattern, int flags,
|
|||
|
||||
self->groups = groups;
|
||||
|
||||
Py_XINCREF(groupindex);
|
||||
Py_INCREF(groupindex);
|
||||
self->groupindex = groupindex;
|
||||
|
||||
Py_XINCREF(indexgroup);
|
||||
Py_INCREF(indexgroup);
|
||||
self->indexgroup = indexgroup;
|
||||
|
||||
self->weakreflist = NULL;
|
||||
|
||||
if (!_validate(self)) {
|
||||
Py_DECREF(self);
|
||||
return NULL;
|
||||
|
@ -2649,6 +2647,69 @@ pattern_scanner(PatternObject *self, PyObject *string, Py_ssize_t pos, Py_ssize_
|
|||
return (PyObject*) scanner;
|
||||
}
|
||||
|
||||
static Py_hash_t
|
||||
pattern_hash(PatternObject *self)
|
||||
{
|
||||
Py_hash_t hash, hash2;
|
||||
|
||||
hash = PyObject_Hash(self->pattern);
|
||||
if (hash == -1) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
hash2 = _Py_HashBytes(self->code, sizeof(self->code[0]) * self->codesize);
|
||||
hash ^= hash2;
|
||||
|
||||
hash ^= self->flags;
|
||||
hash ^= self->isbytes;
|
||||
hash ^= self->codesize;
|
||||
|
||||
if (hash == -1) {
|
||||
hash = -2;
|
||||
}
|
||||
return hash;
|
||||
}
|
||||
|
||||
static PyObject*
|
||||
pattern_richcompare(PyObject *lefto, PyObject *righto, int op)
|
||||
{
|
||||
PatternObject *left, *right;
|
||||
int cmp;
|
||||
|
||||
if (op != Py_EQ && op != Py_NE) {
|
||||
Py_RETURN_NOTIMPLEMENTED;
|
||||
}
|
||||
|
||||
if (Py_TYPE(lefto) != &Pattern_Type || Py_TYPE(righto) != &Pattern_Type) {
|
||||
Py_RETURN_NOTIMPLEMENTED;
|
||||
}
|
||||
left = (PatternObject *)lefto;
|
||||
right = (PatternObject *)righto;
|
||||
|
||||
cmp = (left->flags == right->flags
|
||||
&& left->isbytes == right->isbytes
|
||||
&& left->codesize && right->codesize);
|
||||
if (cmp) {
|
||||
/* Compare the code and the pattern because the same pattern can
|
||||
produce different codes depending on the locale used to compile the
|
||||
pattern when the re.LOCALE flag is used. Don't compare groups,
|
||||
indexgroup nor groupindex: they are derivated from the pattern. */
|
||||
cmp = (memcmp(left->code, right->code,
|
||||
sizeof(left->code[0]) * left->codesize) == 0);
|
||||
}
|
||||
if (cmp) {
|
||||
cmp = PyObject_RichCompareBool(left->pattern, right->pattern,
|
||||
Py_EQ);
|
||||
if (cmp < 0) {
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
if (op == Py_NE) {
|
||||
cmp = !cmp;
|
||||
}
|
||||
return PyBool_FromLong(cmp);
|
||||
}
|
||||
|
||||
#include "clinic/_sre.c.h"
|
||||
|
||||
static PyMethodDef pattern_methods[] = {
|
||||
|
@ -2693,7 +2754,7 @@ static PyTypeObject Pattern_Type = {
|
|||
0, /* tp_as_number */
|
||||
0, /* tp_as_sequence */
|
||||
0, /* tp_as_mapping */
|
||||
0, /* tp_hash */
|
||||
(hashfunc)pattern_hash, /* tp_hash */
|
||||
0, /* tp_call */
|
||||
0, /* tp_str */
|
||||
0, /* tp_getattro */
|
||||
|
@ -2703,7 +2764,7 @@ static PyTypeObject Pattern_Type = {
|
|||
pattern_doc, /* tp_doc */
|
||||
0, /* tp_traverse */
|
||||
0, /* tp_clear */
|
||||
0, /* tp_richcompare */
|
||||
pattern_richcompare, /* tp_richcompare */
|
||||
offsetof(PatternObject, weakreflist), /* tp_weaklistoffset */
|
||||
0, /* tp_iter */
|
||||
0, /* tp_iternext */
|
||||
|
|
Loading…
Reference in New Issue