bpo-40394 - difflib.SequenceMatched.find_longest_match default args (GH-19742)
* bpo-40394 - difflib.SequenceMatched.find_longest_match default args Added default args to find_longest_match, as well as related tests.
This commit is contained in:
parent
6900f16d22
commit
3209cbd99b
|
@ -421,7 +421,7 @@ The :class:`SequenceMatcher` class has this constructor:
|
||||||
is not changed.
|
is not changed.
|
||||||
|
|
||||||
|
|
||||||
.. method:: find_longest_match(alo, ahi, blo, bhi)
|
.. method:: find_longest_match(alo=0, ahi=None, blo=0, bhi=None)
|
||||||
|
|
||||||
Find longest matching block in ``a[alo:ahi]`` and ``b[blo:bhi]``.
|
Find longest matching block in ``a[alo:ahi]`` and ``b[blo:bhi]``.
|
||||||
|
|
||||||
|
@ -458,6 +458,9 @@ The :class:`SequenceMatcher` class has this constructor:
|
||||||
|
|
||||||
This method returns a :term:`named tuple` ``Match(a, b, size)``.
|
This method returns a :term:`named tuple` ``Match(a, b, size)``.
|
||||||
|
|
||||||
|
.. versionchanged:: 3.9
|
||||||
|
Added default arguments.
|
||||||
|
|
||||||
|
|
||||||
.. method:: get_matching_blocks()
|
.. method:: get_matching_blocks()
|
||||||
|
|
||||||
|
|
|
@ -130,7 +130,7 @@ class SequenceMatcher:
|
||||||
set_seq2(b)
|
set_seq2(b)
|
||||||
Set the second sequence to be compared.
|
Set the second sequence to be compared.
|
||||||
|
|
||||||
find_longest_match(alo, ahi, blo, bhi)
|
find_longest_match(alo=0, ahi=None, blo=0, bhi=None)
|
||||||
Find longest matching block in a[alo:ahi] and b[blo:bhi].
|
Find longest matching block in a[alo:ahi] and b[blo:bhi].
|
||||||
|
|
||||||
get_matching_blocks()
|
get_matching_blocks()
|
||||||
|
@ -334,9 +334,11 @@ class SequenceMatcher:
|
||||||
for elt in popular: # ditto; as fast for 1% deletion
|
for elt in popular: # ditto; as fast for 1% deletion
|
||||||
del b2j[elt]
|
del b2j[elt]
|
||||||
|
|
||||||
def find_longest_match(self, alo, ahi, blo, bhi):
|
def find_longest_match(self, alo=0, ahi=None, blo=0, bhi=None):
|
||||||
"""Find longest matching block in a[alo:ahi] and b[blo:bhi].
|
"""Find longest matching block in a[alo:ahi] and b[blo:bhi].
|
||||||
|
|
||||||
|
By default it will find the longest match in the entirety of a and b.
|
||||||
|
|
||||||
If isjunk is not defined:
|
If isjunk is not defined:
|
||||||
|
|
||||||
Return (i,j,k) such that a[i:i+k] is equal to b[j:j+k], where
|
Return (i,j,k) such that a[i:i+k] is equal to b[j:j+k], where
|
||||||
|
@ -391,6 +393,10 @@ class SequenceMatcher:
|
||||||
# the unique 'b's and then matching the first two 'a's.
|
# the unique 'b's and then matching the first two 'a's.
|
||||||
|
|
||||||
a, b, b2j, isbjunk = self.a, self.b, self.b2j, self.bjunk.__contains__
|
a, b, b2j, isbjunk = self.a, self.b, self.b2j, self.bjunk.__contains__
|
||||||
|
if ahi is None:
|
||||||
|
ahi = len(a)
|
||||||
|
if bhi is None:
|
||||||
|
bhi = len(b)
|
||||||
besti, bestj, bestsize = alo, blo, 0
|
besti, bestj, bestsize = alo, blo, 0
|
||||||
# find longest junk-free match
|
# find longest junk-free match
|
||||||
# during an iteration of the loop, j2len[j] = length of longest
|
# during an iteration of the loop, j2len[j] = length of longest
|
||||||
|
|
|
@ -501,12 +501,58 @@ class TestJunkAPIs(unittest.TestCase):
|
||||||
for char in ['a', '#', '\n', '\f', '\r', '\v']:
|
for char in ['a', '#', '\n', '\f', '\r', '\v']:
|
||||||
self.assertFalse(difflib.IS_CHARACTER_JUNK(char), repr(char))
|
self.assertFalse(difflib.IS_CHARACTER_JUNK(char), repr(char))
|
||||||
|
|
||||||
|
class TestFindLongest(unittest.TestCase):
|
||||||
|
def longer_match_exists(self, a, b, n):
|
||||||
|
return any(b_part in a for b_part in
|
||||||
|
[b[i:i + n + 1] for i in range(0, len(b) - n - 1)])
|
||||||
|
|
||||||
|
def test_default_args(self):
|
||||||
|
a = 'foo bar'
|
||||||
|
b = 'foo baz bar'
|
||||||
|
sm = difflib.SequenceMatcher(a=a, b=b)
|
||||||
|
match = sm.find_longest_match()
|
||||||
|
self.assertEqual(match.a, 0)
|
||||||
|
self.assertEqual(match.b, 0)
|
||||||
|
self.assertEqual(match.size, 6)
|
||||||
|
self.assertEqual(a[match.a: match.a + match.size],
|
||||||
|
b[match.b: match.b + match.size])
|
||||||
|
self.assertFalse(self.longer_match_exists(a, b, match.size))
|
||||||
|
|
||||||
|
match = sm.find_longest_match(alo=2, blo=4)
|
||||||
|
self.assertEqual(match.a, 3)
|
||||||
|
self.assertEqual(match.b, 7)
|
||||||
|
self.assertEqual(match.size, 4)
|
||||||
|
self.assertEqual(a[match.a: match.a + match.size],
|
||||||
|
b[match.b: match.b + match.size])
|
||||||
|
self.assertFalse(self.longer_match_exists(a[2:], b[4:], match.size))
|
||||||
|
|
||||||
|
match = sm.find_longest_match(bhi=5, blo=1)
|
||||||
|
self.assertEqual(match.a, 1)
|
||||||
|
self.assertEqual(match.b, 1)
|
||||||
|
self.assertEqual(match.size, 4)
|
||||||
|
self.assertEqual(a[match.a: match.a + match.size],
|
||||||
|
b[match.b: match.b + match.size])
|
||||||
|
self.assertFalse(self.longer_match_exists(a, b[1:5], match.size))
|
||||||
|
|
||||||
|
def test_longest_match_with_popular_chars(self):
|
||||||
|
a = 'dabcd'
|
||||||
|
b = 'd'*100 + 'abc' + 'd'*100 # length over 200 so popular used
|
||||||
|
sm = difflib.SequenceMatcher(a=a, b=b)
|
||||||
|
match = sm.find_longest_match(0, len(a), 0, len(b))
|
||||||
|
self.assertEqual(match.a, 0)
|
||||||
|
self.assertEqual(match.b, 99)
|
||||||
|
self.assertEqual(match.size, 5)
|
||||||
|
self.assertEqual(a[match.a: match.a + match.size],
|
||||||
|
b[match.b: match.b + match.size])
|
||||||
|
self.assertFalse(self.longer_match_exists(a, b, match.size))
|
||||||
|
|
||||||
|
|
||||||
def test_main():
|
def test_main():
|
||||||
difflib.HtmlDiff._default_prefix = 0
|
difflib.HtmlDiff._default_prefix = 0
|
||||||
Doctests = doctest.DocTestSuite(difflib)
|
Doctests = doctest.DocTestSuite(difflib)
|
||||||
run_unittest(
|
run_unittest(
|
||||||
TestWithAscii, TestAutojunk, TestSFpatches, TestSFbugs,
|
TestWithAscii, TestAutojunk, TestSFpatches, TestSFbugs,
|
||||||
TestOutputFormat, TestBytes, TestJunkAPIs, Doctests)
|
TestOutputFormat, TestBytes, TestJunkAPIs, TestFindLongest, Doctests)
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
test_main()
|
test_main()
|
||||||
|
|
|
@ -88,6 +88,7 @@ Dwayne Bailey
|
||||||
Stig Bakken
|
Stig Bakken
|
||||||
Aleksandr Balezin
|
Aleksandr Balezin
|
||||||
Greg Ball
|
Greg Ball
|
||||||
|
Lewis Ball
|
||||||
Luigi Ballabio
|
Luigi Ballabio
|
||||||
Thomas Ballinger
|
Thomas Ballinger
|
||||||
Jeff Balogh
|
Jeff Balogh
|
||||||
|
|
|
@ -0,0 +1 @@
|
||||||
|
Added default arguments to :meth:`difflib.SequenceMatcher.find_longest_match()`.
|
Loading…
Reference in New Issue