Issue 10534, difflib: tweak doc; test new SequenceMatcher instance attributes; avoid unneeded lists of SM.b2j keys and items in .__chain_b. Do not backport.
This commit is contained in:
parent
50ba19ee45
commit
17a59252e8
|
@ -359,11 +359,11 @@ The :class:`SequenceMatcher` class has this constructor:
|
||||||
The *autojunk* parameter.
|
The *autojunk* parameter.
|
||||||
|
|
||||||
SequenceMatcher objects get three data attributes: *bjunk* is the
|
SequenceMatcher objects get three data attributes: *bjunk* is the
|
||||||
set of elements of b for which *isjunk* is True; *bpopular* is the set of non-
|
set of elements of *b* for which *isjunk* is True; *bpopular* is the set of
|
||||||
junk elements considered popular by the heuristic (if it is not disabled);
|
non-junk elements considered popular by the heuristic (if it is not
|
||||||
*b2j* is a dict mapping the remaining elements of b to a list of positions where
|
disabled); *b2j* is a dict mapping the remaining elements of *b* to a list
|
||||||
they occur. All three are reset whenever *b* is reset with :meth:`set_seqs`
|
of positions where they occur. All three are reset whenever *b* is reset
|
||||||
or :meth:`set_seq2`.
|
with :meth:`set_seqs` or :meth:`set_seq2`.
|
||||||
|
|
||||||
.. versionadded:: 3.2
|
.. versionadded:: 3.2
|
||||||
The *bjunk* and *bpopular* attributes.
|
The *bjunk* and *bpopular* attributes.
|
||||||
|
|
|
@ -320,9 +320,10 @@ class SequenceMatcher:
|
||||||
self.bjunk = junk = set()
|
self.bjunk = junk = set()
|
||||||
isjunk = self.isjunk
|
isjunk = self.isjunk
|
||||||
if isjunk:
|
if isjunk:
|
||||||
for elt in list(b2j.keys()): # using list() since b2j is modified
|
for elt in b2j.keys():
|
||||||
if isjunk(elt):
|
if isjunk(elt):
|
||||||
junk.add(elt)
|
junk.add(elt)
|
||||||
|
for elt in junk: # separate loop avoids separate list of keys
|
||||||
del b2j[elt]
|
del b2j[elt]
|
||||||
|
|
||||||
# Purge popular elements that are not junk
|
# Purge popular elements that are not junk
|
||||||
|
@ -330,9 +331,10 @@ class SequenceMatcher:
|
||||||
n = len(b)
|
n = len(b)
|
||||||
if self.autojunk and n >= 200:
|
if self.autojunk and n >= 200:
|
||||||
ntest = n // 100 + 1
|
ntest = n // 100 + 1
|
||||||
for elt, idxs in list(b2j.items()):
|
for elt, idxs in b2j.items():
|
||||||
if len(idxs) > ntest:
|
if len(idxs) > ntest:
|
||||||
popular.add(elt)
|
popular.add(elt)
|
||||||
|
for elt in popular: # ditto; as fast for 1% deletion
|
||||||
del b2j[elt]
|
del b2j[elt]
|
||||||
|
|
||||||
def isbjunk(self, item):
|
def isbjunk(self, item):
|
||||||
|
|
|
@ -12,12 +12,14 @@ class TestWithAscii(unittest.TestCase):
|
||||||
self.assertEqual(list(sm.get_opcodes()),
|
self.assertEqual(list(sm.get_opcodes()),
|
||||||
[ ('insert', 0, 0, 0, 1),
|
[ ('insert', 0, 0, 0, 1),
|
||||||
('equal', 0, 100, 1, 101)])
|
('equal', 0, 100, 1, 101)])
|
||||||
|
self.assertEqual(sm.bpopular, set())
|
||||||
sm = difflib.SequenceMatcher(None, 'b' * 100, 'b' * 50 + 'a' + 'b' * 50)
|
sm = difflib.SequenceMatcher(None, 'b' * 100, 'b' * 50 + 'a' + 'b' * 50)
|
||||||
self.assertAlmostEqual(sm.ratio(), 0.995, places=3)
|
self.assertAlmostEqual(sm.ratio(), 0.995, places=3)
|
||||||
self.assertEqual(list(sm.get_opcodes()),
|
self.assertEqual(list(sm.get_opcodes()),
|
||||||
[ ('equal', 0, 50, 0, 50),
|
[ ('equal', 0, 50, 0, 50),
|
||||||
('insert', 50, 50, 50, 51),
|
('insert', 50, 50, 50, 51),
|
||||||
('equal', 50, 100, 51, 101)])
|
('equal', 50, 100, 51, 101)])
|
||||||
|
self.assertEqual(sm.bpopular, set())
|
||||||
|
|
||||||
def test_one_delete(self):
|
def test_one_delete(self):
|
||||||
sm = difflib.SequenceMatcher(None, 'a' * 40 + 'c' + 'b' * 40, 'a' * 40 + 'b' * 40)
|
sm = difflib.SequenceMatcher(None, 'a' * 40 + 'c' + 'b' * 40, 'a' * 40 + 'b' * 40)
|
||||||
|
@ -27,6 +29,19 @@ class TestWithAscii(unittest.TestCase):
|
||||||
('delete', 40, 41, 40, 40),
|
('delete', 40, 41, 40, 40),
|
||||||
('equal', 41, 81, 40, 80)])
|
('equal', 41, 81, 40, 80)])
|
||||||
|
|
||||||
|
def test_bjunk(self):
|
||||||
|
sm = difflib.SequenceMatcher(isjunk=lambda x: x == ' ',
|
||||||
|
a='a' * 40 + 'b' * 40, b='a' * 44 + 'b' * 40)
|
||||||
|
self.assertEqual(sm.bjunk, set())
|
||||||
|
|
||||||
|
sm = difflib.SequenceMatcher(isjunk=lambda x: x == ' ',
|
||||||
|
a='a' * 40 + 'b' * 40, b='a' * 44 + 'b' * 40 + ' ' * 20)
|
||||||
|
self.assertEqual(sm.bjunk, {' '})
|
||||||
|
|
||||||
|
sm = difflib.SequenceMatcher(isjunk=lambda x: x in [' ', 'b'],
|
||||||
|
a='a' * 40 + 'b' * 40, b='a' * 44 + 'b' * 40 + ' ' * 20)
|
||||||
|
self.assertEqual(sm.bjunk, {' ', 'b'})
|
||||||
|
|
||||||
|
|
||||||
class TestAutojunk(unittest.TestCase):
|
class TestAutojunk(unittest.TestCase):
|
||||||
"""Tests for the autojunk parameter added in 2.7"""
|
"""Tests for the autojunk parameter added in 2.7"""
|
||||||
|
@ -38,10 +53,12 @@ class TestAutojunk(unittest.TestCase):
|
||||||
|
|
||||||
sm = difflib.SequenceMatcher(None, seq1, seq2)
|
sm = difflib.SequenceMatcher(None, seq1, seq2)
|
||||||
self.assertAlmostEqual(sm.ratio(), 0, places=3)
|
self.assertAlmostEqual(sm.ratio(), 0, places=3)
|
||||||
|
self.assertEqual(sm.bpopular, {'b'})
|
||||||
|
|
||||||
# Now turn the heuristic off
|
# Now turn the heuristic off
|
||||||
sm = difflib.SequenceMatcher(None, seq1, seq2, autojunk=False)
|
sm = difflib.SequenceMatcher(None, seq1, seq2, autojunk=False)
|
||||||
self.assertAlmostEqual(sm.ratio(), 0.9975, places=3)
|
self.assertAlmostEqual(sm.ratio(), 0.9975, places=3)
|
||||||
|
self.assertEqual(sm.bpopular, set())
|
||||||
|
|
||||||
|
|
||||||
class TestSFbugs(unittest.TestCase):
|
class TestSFbugs(unittest.TestCase):
|
||||||
|
|
Loading…
Reference in New Issue