diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py index aac3a2cbab4..4fcd2d463dc 100644 --- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -3,12 +3,13 @@ from test.support import verbose, run_unittest, gc_collect, bigmemtest, _2G, \ import io import locale import re -from re import Scanner import sre_compile -import sys import string +import sys import traceback import unittest +import warnings +from re import Scanner from weakref import proxy # Misc tests from Tim Peters' re.doc @@ -1777,6 +1778,48 @@ SUBPATTERN None 0 0 self.assertIn('ASCII', str(re.A)) self.assertIn('DOTALL', str(re.S)) + def test_pattern_compare(self): + pattern1 = re.compile('abc', re.IGNORECASE) + + # equal + re.purge() + pattern2 = re.compile('abc', re.IGNORECASE) + self.assertEqual(hash(pattern2), hash(pattern1)) + self.assertEqual(pattern2, pattern1) + + # not equal: different pattern + re.purge() + pattern3 = re.compile('XYZ', re.IGNORECASE) + # Don't test hash(pattern3) != hash(pattern1) because there is no + # warranty that hash values are different + self.assertNotEqual(pattern3, pattern1) + + # not equal: different flag (flags=0) + re.purge() + pattern4 = re.compile('abc') + self.assertNotEqual(pattern4, pattern1) + + # only == and != comparison operators are supported + with self.assertRaises(TypeError): + pattern1 < pattern2 + + def test_pattern_compare_bytes(self): + pattern1 = re.compile(b'abc') + + # equal: test bytes patterns + re.purge() + pattern2 = re.compile(b'abc') + self.assertEqual(hash(pattern2), hash(pattern1)) + self.assertEqual(pattern2, pattern1) + + # not equal: pattern of a different types (str vs bytes), + # comparison must not raise a BytesWarning + re.purge() + pattern3 = re.compile('abc') + with warnings.catch_warnings(): + warnings.simplefilter('error', BytesWarning) + self.assertNotEqual(pattern3, pattern1) + class PatternReprTests(unittest.TestCase): def check(self, pattern, expected): diff --git a/Misc/NEWS b/Misc/NEWS index 7ba6b427700..ab846a6206f 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -42,6 +42,11 @@ Core and Builtins Library ------- +- Issue #28727: Regular expression patterns, _sre.SRE_Pattern objects created + by re.compile(), become comparable (only x==y and x!=y operators). This + change should fix the issue #18383: don't duplicate warning filters when the + warnings module is reloaded (thing usually only done in unit tests). + - Issue #20572: The subprocess.Popen.wait method's undocumented endtime parameter now raises a DeprecationWarning. @@ -77,7 +82,7 @@ Library - Issue #28703: Fix asyncio.iscoroutinefunction to handle Mock objects. -- Issue #28704: Fix create_unix_server to support Path-like objects +- Issue #28704: Fix create_unix_server to support Path-like objects (PEP 519). - Issue #28720: Add collections.abc.AsyncGenerator. diff --git a/Modules/_sre.c b/Modules/_sre.c index 69c7bc0de69..c1e9fa6e6bd 100644 --- a/Modules/_sre.c +++ b/Modules/_sre.c @@ -1506,14 +1506,12 @@ _sre_compile_impl(PyObject *module, PyObject *pattern, int flags, self->groups = groups; - Py_XINCREF(groupindex); + Py_INCREF(groupindex); self->groupindex = groupindex; - Py_XINCREF(indexgroup); + Py_INCREF(indexgroup); self->indexgroup = indexgroup; - self->weakreflist = NULL; - if (!_validate(self)) { Py_DECREF(self); return NULL; @@ -2649,6 +2647,69 @@ pattern_scanner(PatternObject *self, PyObject *string, Py_ssize_t pos, Py_ssize_ return (PyObject*) scanner; } +static Py_hash_t +pattern_hash(PatternObject *self) +{ + Py_hash_t hash, hash2; + + hash = PyObject_Hash(self->pattern); + if (hash == -1) { + return -1; + } + + hash2 = _Py_HashBytes(self->code, sizeof(self->code[0]) * self->codesize); + hash ^= hash2; + + hash ^= self->flags; + hash ^= self->isbytes; + hash ^= self->codesize; + + if (hash == -1) { + hash = -2; + } + return hash; +} + +static PyObject* +pattern_richcompare(PyObject *lefto, PyObject *righto, int op) +{ + PatternObject *left, *right; + int cmp; + + if (op != Py_EQ && op != Py_NE) { + Py_RETURN_NOTIMPLEMENTED; + } + + if (Py_TYPE(lefto) != &Pattern_Type || Py_TYPE(righto) != &Pattern_Type) { + Py_RETURN_NOTIMPLEMENTED; + } + left = (PatternObject *)lefto; + right = (PatternObject *)righto; + + cmp = (left->flags == right->flags + && left->isbytes == right->isbytes + && left->codesize && right->codesize); + if (cmp) { + /* Compare the code and the pattern because the same pattern can + produce different codes depending on the locale used to compile the + pattern when the re.LOCALE flag is used. Don't compare groups, + indexgroup nor groupindex: they are derivated from the pattern. */ + cmp = (memcmp(left->code, right->code, + sizeof(left->code[0]) * left->codesize) == 0); + } + if (cmp) { + cmp = PyObject_RichCompareBool(left->pattern, right->pattern, + Py_EQ); + if (cmp < 0) { + return NULL; + } + } + if (op == Py_NE) { + cmp = !cmp; + } + return PyBool_FromLong(cmp); +} + #include "clinic/_sre.c.h" static PyMethodDef pattern_methods[] = { @@ -2693,7 +2754,7 @@ static PyTypeObject Pattern_Type = { 0, /* tp_as_number */ 0, /* tp_as_sequence */ 0, /* tp_as_mapping */ - 0, /* tp_hash */ + (hashfunc)pattern_hash, /* tp_hash */ 0, /* tp_call */ 0, /* tp_str */ 0, /* tp_getattro */ @@ -2703,7 +2764,7 @@ static PyTypeObject Pattern_Type = { pattern_doc, /* tp_doc */ 0, /* tp_traverse */ 0, /* tp_clear */ - 0, /* tp_richcompare */ + pattern_richcompare, /* tp_richcompare */ offsetof(PatternObject, weakreflist), /* tp_weaklistoffset */ 0, /* tp_iter */ 0, /* tp_iternext */