Improve usability of the SequenceMatcher by returning named tuples describing match ranges.
This commit is contained in:
parent
e896acc98c
commit
0ff4dafee0
|
@ -30,9 +30,12 @@ Class HtmlDiff:
|
||||||
|
|
||||||
__all__ = ['get_close_matches', 'ndiff', 'restore', 'SequenceMatcher',
|
__all__ = ['get_close_matches', 'ndiff', 'restore', 'SequenceMatcher',
|
||||||
'Differ','IS_CHARACTER_JUNK', 'IS_LINE_JUNK', 'context_diff',
|
'Differ','IS_CHARACTER_JUNK', 'IS_LINE_JUNK', 'context_diff',
|
||||||
'unified_diff', 'HtmlDiff']
|
'unified_diff', 'HtmlDiff', 'Match']
|
||||||
|
|
||||||
import heapq
|
import heapq
|
||||||
|
from collections import namedtuple as _namedtuple
|
||||||
|
|
||||||
|
Match = _namedtuple('Match', 'a b size')
|
||||||
|
|
||||||
def _calculate_ratio(matches, length):
|
def _calculate_ratio(matches, length):
|
||||||
if length:
|
if length:
|
||||||
|
@ -363,7 +366,7 @@ class SequenceMatcher:
|
||||||
|
|
||||||
>>> s = SequenceMatcher(None, " abcd", "abcd abcd")
|
>>> s = SequenceMatcher(None, " abcd", "abcd abcd")
|
||||||
>>> s.find_longest_match(0, 5, 0, 9)
|
>>> s.find_longest_match(0, 5, 0, 9)
|
||||||
(0, 4, 5)
|
Match(a=0, b=4, size=5)
|
||||||
|
|
||||||
If isjunk is defined, first the longest matching block is
|
If isjunk is defined, first the longest matching block is
|
||||||
determined as above, but with the additional restriction that no
|
determined as above, but with the additional restriction that no
|
||||||
|
@ -379,13 +382,13 @@ class SequenceMatcher:
|
||||||
|
|
||||||
>>> s = SequenceMatcher(lambda x: x==" ", " abcd", "abcd abcd")
|
>>> s = SequenceMatcher(lambda x: x==" ", " abcd", "abcd abcd")
|
||||||
>>> s.find_longest_match(0, 5, 0, 9)
|
>>> s.find_longest_match(0, 5, 0, 9)
|
||||||
(1, 0, 4)
|
Match(a=1, b=0, size=4)
|
||||||
|
|
||||||
If no blocks match, return (alo, blo, 0).
|
If no blocks match, return (alo, blo, 0).
|
||||||
|
|
||||||
>>> s = SequenceMatcher(None, "ab", "c")
|
>>> s = SequenceMatcher(None, "ab", "c")
|
||||||
>>> s.find_longest_match(0, 2, 0, 1)
|
>>> s.find_longest_match(0, 2, 0, 1)
|
||||||
(0, 0, 0)
|
Match(a=0, b=0, size=0)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# CAUTION: stripping common prefix or suffix would be incorrect.
|
# CAUTION: stripping common prefix or suffix would be incorrect.
|
||||||
|
@ -452,7 +455,7 @@ class SequenceMatcher:
|
||||||
a[besti+bestsize] == b[bestj+bestsize]:
|
a[besti+bestsize] == b[bestj+bestsize]:
|
||||||
bestsize = bestsize + 1
|
bestsize = bestsize + 1
|
||||||
|
|
||||||
return besti, bestj, bestsize
|
return Match(besti, bestj, bestsize)
|
||||||
|
|
||||||
def get_matching_blocks(self):
|
def get_matching_blocks(self):
|
||||||
"""Return list of triples describing matching subsequences.
|
"""Return list of triples describing matching subsequences.
|
||||||
|
@ -470,7 +473,7 @@ class SequenceMatcher:
|
||||||
|
|
||||||
>>> s = SequenceMatcher(None, "abxcd", "abcd")
|
>>> s = SequenceMatcher(None, "abxcd", "abcd")
|
||||||
>>> s.get_matching_blocks()
|
>>> s.get_matching_blocks()
|
||||||
[(0, 0, 2), (3, 2, 2), (5, 4, 0)]
|
[Match(a=0, b=0, size=2), Match(a=3, b=2, size=2), Match(a=5, b=4, size=0)]
|
||||||
"""
|
"""
|
||||||
|
|
||||||
if self.matching_blocks is not None:
|
if self.matching_blocks is not None:
|
||||||
|
@ -523,7 +526,7 @@ class SequenceMatcher:
|
||||||
|
|
||||||
non_adjacent.append( (la, lb, 0) )
|
non_adjacent.append( (la, lb, 0) )
|
||||||
self.matching_blocks = non_adjacent
|
self.matching_blocks = non_adjacent
|
||||||
return self.matching_blocks
|
return map(Match._make, self.matching_blocks)
|
||||||
|
|
||||||
def get_opcodes(self):
|
def get_opcodes(self):
|
||||||
"""Return list of 5-tuples describing how to turn a into b.
|
"""Return list of 5-tuples describing how to turn a into b.
|
||||||
|
|
Loading…
Reference in New Issue