bpo-40755: Add rich comparisons to Counter (GH-20548)

2020-05-31 14:57:42 -07:00 · 2020-05-31 14:57:42 -07:00 · b7d79b4f36
parent 2b201369b4
commit b7d79b4f36
5 changed files with 71 additions and 190 deletions
--- a/Doc/library/collections.rst
+++ b/Doc/library/collections.rst
@ -290,47 +290,6 @@ For example::
            >>> sorted(c.elements())
            ['a', 'a', 'a', 'a', 'b', 'b']

-    .. method:: isdisjoint(other)
-
-        True if none of the elements in *self* overlap with those in *other*.
-        Negative or missing counts are ignored.
-        Logically equivalent to:  ``not (+self) & (+other)``
-
-        .. versionadded:: 3.10
-
-    .. method:: isequal(other)
-
-        Test whether counts agree exactly.
-        Negative or missing counts are treated as zero.
-
-        This method works differently than the inherited :meth:`__eq__` method
-        which treats negative or missing counts as distinct from zero::
-
-            >>> Counter(a=1, b=0).isequal(Counter(a=1))
-            True
-            >>> Counter(a=1, b=0) == Counter(a=1)
-            False
-
-        Logically equivalent to:  ``+self == +other``
-
-        .. versionadded:: 3.10
-
-    .. method:: issubset(other)
-
-        True if the counts in *self* are less than or equal to those in *other*.
-        Negative or missing counts are treated as zero.
-        Logically equivalent to:  ``not self - (+other)``
-
-        .. versionadded:: 3.10
-
-    .. method:: issuperset(other)
-
-        True if the counts in *self* are greater than or equal to those in *other*.
-        Negative or missing counts are treated as zero.
-        Logically equivalent to:  ``not other - (+self)``
-
-        .. versionadded:: 3.10
-
    .. method:: most_common([n])

        Return a list of the *n* most common elements and their counts from the
@ -369,6 +328,19 @@ For example::
        instead of replacing them.  Also, the *iterable* is expected to be a
        sequence of elements, not a sequence of ``(key, value)`` pairs.

+Counters support rich comparison operators for equality, subset, and
+superset relationships: ``==``, ``!=``, ``<``, ``<=``, ``>``, ``>=``.
+All of those tests treat missing elements as having zero counts so that
+``Counter(a=1) == Counter(a=1, b=0)`` returns true.
+
+.. versionadded:: 3.10
+   Rich comparison operations we were added
+
+.. versionchanged:: 3.10
+   In equality tests, missing elements are treated as having zero counts.
+   Formerly, ``Counter(a=3)`` and ``Counter(a=3, b=0)`` were considered
+   distinct.
+
 Common patterns for working with :class:`Counter` objects::

    sum(c.values())                 # total of all counts
--- a/Lib/collections/init.py
+++ b/Lib/collections/init.py
@ -691,6 +691,42 @@ class Counter(dict):
        if elem in self:
            super().__delitem__(elem)

+    def __eq__(self, other):
+        'True if all counts agree. Missing counts are treated as zero.'
+        if not isinstance(other, Counter):
+            return NotImplemented
+        return all(self[e] == other[e] for c in (self, other) for e in c)
+
+    def __ne__(self, other):
+        'True if any counts disagree. Missing counts are treated as zero.'
+        if not isinstance(other, Counter):
+            return NotImplemented
+        return not self == other
+
+    def __le__(self, other):
+        'True if all counts in self are a subset of those in other.'
+        if not isinstance(other, Counter):
+            return NotImplemented
+        return all(self[e] <= other[e] for c in (self, other) for e in c)
+
+    def __lt__(self, other):
+        'True if all counts in self are a proper subset of those in other.'
+        if not isinstance(other, Counter):
+            return NotImplemented
+        return self <= other and self != other
+
+    def __ge__(self, other):
+        'True if all counts in self are a superset of those in other.'
+        if not isinstance(other, Counter):
+            return NotImplemented
+        return all(self[e] >= other[e] for c in (self, other) for e in c)
+
+    def __gt__(self, other):
+        'True if all counts in self are a proper superset of those in other.'
+        if not isinstance(other, Counter):
+            return NotImplemented
+        return self >= other and self != other
+
    def __repr__(self):
        if not self:
            return '%s()' % self.__class__.__name__
@ -886,92 +922,6 @@ class Counter(dict):
                self[elem] = other_count
        return self._keep_positive()

-    def isequal(self, other):
-        ''' Test whether counts agree exactly.
-
-        Negative or missing counts are treated as zero.
-
-        This is different than the inherited __eq__() method which
-        treats negative or missing counts as distinct from zero:
-
-            >>> Counter(a=1, b=0).isequal(Counter(a=1))
-            True
-            >>> Counter(a=1, b=0) == Counter(a=1)
-            False
-
-        Logically equivalent to:  +self == +other
-        '''
-        if not isinstance(other, Counter):
-            other = Counter(other)
-        for elem in set(self) | set(other):
-            left = self[elem]
-            right = other[elem]
-            if left == right:
-                continue
-            if left < 0:
-                left = 0
-            if right < 0:
-                right = 0
-            if left != right:
-                return False
-        return True
-
-    def issubset(self, other):
-        '''True if the counts in self are less than or equal to those in other.
-
-        Negative or missing counts are treated as zero.
-
-        Logically equivalent to:  not self - (+other)
-        '''
-        if not isinstance(other, Counter):
-            other = Counter(other)
-        for elem, count in self.items():
-            other_count = other[elem]
-            if other_count < 0:
-                other_count = 0
-            if count > other_count:
-                return False
-        return True
-
-    def issuperset(self, other):
-        '''True if the counts in self are greater than or equal to those in other.
-
-        Negative or missing counts are treated as zero.
-
-        Logically equivalent to:  not other - (+self)
-        '''
-        if not isinstance(other, Counter):
-            other = Counter(other)
-        return other.issubset(self)
-
-    def isdisjoint(self, other):
-        '''True if none of the elements in self overlap with those in other.
-
-        Negative or missing counts are ignored.
-
-        Logically equivalent to:  not (+self) & (+other)
-        '''
-        if not isinstance(other, Counter):
-            other = Counter(other)
-        for elem, count in self.items():
-            if count > 0 and other[elem] > 0:
-                return False
-        return True
-
-    # Rich comparison operators for multiset subset and superset tests
-    # have been deliberately omitted due to semantic conflicts with the
-    # existing inherited dict equality method.  Subset and superset
-    # semantics ignore zero counts and require that p⊆q ∧ p⊇q ⇔ p=q;
-    # however, that would not be the case for p=Counter(a=1, b=0)
-    # and q=Counter(a=1) where the dictionaries are not equal.
-
-    def _omitted(self, other):
-        raise TypeError(
-            'Rich comparison operators have been deliberately omitted. '
-            'Use the isequal(), issubset(), and issuperset() methods instead.')
-
-    __lt__ = __le__ = __gt__ = __ge__ = __lt__ = _omitted
-

 ########################################################################
 ###  ChainMap
--- a/Lib/test/test_collections.py
+++ b/Lib/test/test_collections.py
@ -2123,29 +2123,6 @@ class TestCounter(unittest.TestCase):
                set_result = setop(set(p.elements()), set(q.elements()))
                self.assertEqual(counter_result, dict.fromkeys(set_result, 1))

-    def test_subset_superset_not_implemented(self):
-        # Verify that multiset comparison operations are not implemented.
-
-        # These operations were intentionally omitted because multiset
-        # comparison semantics conflict with existing dict equality semantics.
-
-        # For multisets, we would expect that if p<=q and p>=q are both true,
-        # then p==q.  However, dict equality semantics require that p!=q when
-        # one of sets contains an element with a zero count and the other
-        # doesn't.
-
-        p = Counter(a=1, b=0)
-        q = Counter(a=1, c=0)
-        self.assertNotEqual(p, q)
-        with self.assertRaises(TypeError):
-            p < q
-        with self.assertRaises(TypeError):
-            p <= q
-        with self.assertRaises(TypeError):
-            p > q
-        with self.assertRaises(TypeError):
-            p >= q
-
    def test_inplace_operations(self):
        elements = 'abcd'
        for i in range(1000):
@ -2234,49 +2211,32 @@ class TestCounter(unittest.TestCase):
            self.assertEqual(set(cp - cq), sp - sq)
            self.assertEqual(set(cp | cq), sp | sq)
            self.assertEqual(set(cp & cq), sp & sq)
-            self.assertEqual(cp.isequal(cq), sp == sq)
-            self.assertEqual(cp.issubset(cq), sp.issubset(sq))
-            self.assertEqual(cp.issuperset(cq), sp.issuperset(sq))
-            self.assertEqual(cp.isdisjoint(cq), sp.isdisjoint(sq))
+            self.assertEqual(cp == cq, sp == sq)
+            self.assertEqual(cp != cq, sp != sq)
+            self.assertEqual(cp <= cq, sp <= sq)
+            self.assertEqual(cp >= cq, sp >= sq)
+            self.assertEqual(cp < cq, sp < sq)
+            self.assertEqual(cp > cq, sp > sq)

-    def test_multiset_equal(self):
-        self.assertTrue(Counter(a=3, b=2, c=0).isequal('ababa'))
-        self.assertFalse(Counter(a=3, b=2).isequal('babab'))
+    def test_eq(self):
+        self.assertEqual(Counter(a=3, b=2, c=0), Counter('ababa'))
+        self.assertNotEqual(Counter(a=3, b=2), Counter('babab'))

-    def test_multiset_subset(self):
-        self.assertTrue(Counter(a=3, b=2, c=0).issubset('ababa'))
-        self.assertFalse(Counter(a=3, b=2).issubset('babab'))
+    def test_le(self):
+        self.assertTrue(Counter(a=3, b=2, c=0) <= Counter('ababa'))
+        self.assertFalse(Counter(a=3, b=2) <= Counter('babab'))

-    def test_multiset_superset(self):
-        self.assertTrue(Counter(a=3, b=2, c=0).issuperset('aab'))
-        self.assertFalse(Counter(a=3, b=2, c=0).issuperset('aabd'))
+    def test_lt(self):
+        self.assertTrue(Counter(a=3, b=1, c=0) < Counter('ababa'))
+        self.assertFalse(Counter(a=3, b=2, c=0) < Counter('ababa'))

-    def test_multiset_disjoint(self):
-        self.assertTrue(Counter(a=3, b=2, c=0).isdisjoint('cde'))
-        self.assertFalse(Counter(a=3, b=2, c=0).isdisjoint('bcd'))
+    def test_ge(self):
+        self.assertTrue(Counter(a=2, b=1, c=0) >= Counter('aab'))
+        self.assertFalse(Counter(a=3, b=2, c=0) >= Counter('aabd'))

-    def test_multiset_predicates_with_negative_counts(self):
-        # Multiset predicates run on the output of the elements() method,
-        # meaning that zero counts and negative counts are ignored.
-        # The tests below confirm that we get that same results as the
-        # tests above, even after a negative count has been included
-        # in either *self* or *other*.
-        self.assertTrue(Counter(a=3, b=2, c=0, d=-1).isequal('ababa'))
-        self.assertFalse(Counter(a=3, b=2, d=-1).isequal('babab'))
-        self.assertTrue(Counter(a=3, b=2, c=0, d=-1).issubset('ababa'))
-        self.assertFalse(Counter(a=3, b=2, d=-1).issubset('babab'))
-        self.assertTrue(Counter(a=3, b=2, c=0, d=-1).issuperset('aab'))
-        self.assertFalse(Counter(a=3, b=2, c=0, d=-1).issuperset('aabd'))
-        self.assertTrue(Counter(a=3, b=2, c=0, d=-1).isdisjoint('cde'))
-        self.assertFalse(Counter(a=3, b=2, c=0, d=-1).isdisjoint('bcd'))
-        self.assertTrue(Counter(a=3, b=2, c=0, d=-1).isequal(Counter(a=3, b=2, c=-1)))
-        self.assertFalse(Counter(a=3, b=2, d=-1).isequal(Counter(a=2, b=3, c=-1)))
-        self.assertTrue(Counter(a=3, b=2, c=0, d=-1).issubset(Counter(a=3, b=2, c=-1)))
-        self.assertFalse(Counter(a=3, b=2, d=-1).issubset(Counter(a=2, b=3, c=-1)))
-        self.assertTrue(Counter(a=3, b=2, c=0, d=-1).issuperset(Counter(a=2, b=1, c=-1)))
-        self.assertFalse(Counter(a=3, b=2, c=0, d=-1).issuperset(Counter(a=2, b=1, c=-1, d=1)))
-        self.assertTrue(Counter(a=3, b=2, c=0, d=-1).isdisjoint(Counter(c=1, d=2, e=3, f=-1)))
-        self.assertFalse(Counter(a=3, b=2, c=0, d=-1).isdisjoint(Counter(b=1, c=1, d=1, e=-1)))
+    def test_gt(self):
+        self.assertTrue(Counter(a=3, b=2, c=0) > Counter('aab'))
+        self.assertFalse(Counter(a=2, b=1, c=0) > Counter('aab'))


 ################################################################################
--- a/Misc/NEWS.d/next/Library/2020-05-23-18-24-13.bpo-22533.k64XGo.rst
+++ b/Misc/NEWS.d/next/Library/2020-05-23-18-24-13.bpo-22533.k64XGo.rst
@ -1,2 +0,0 @@
-Add multiset comparison methods to collections.Counter(): isequal(),
-issubset(), issuperset(), and isdisjoint().
--- a/Misc/NEWS.d/next/Library/2020-05-30-18-48-58.bpo-40755.IyOe2J.rst
+++ b/Misc/NEWS.d/next/Library/2020-05-30-18-48-58.bpo-40755.IyOe2J.rst
@ -0,0 +1 @@
+Add rich comparisons to collections.Counter().
				`@ -0,0 +1 @@`
				`Add rich comparisons to collections.Counter().`