Improve usability of the SequenceMatcher by returning named tuples describing match ranges.

This commit is contained in:
Raymond Hettinger 2008-01-11 03:20:54 +00:00
parent e896acc98c
commit 0ff4dafee0
1 changed files with 10 additions and 7 deletions

View File

@ -30,9 +30,12 @@ Class HtmlDiff:
__all__ = ['get_close_matches', 'ndiff', 'restore', 'SequenceMatcher', __all__ = ['get_close_matches', 'ndiff', 'restore', 'SequenceMatcher',
'Differ','IS_CHARACTER_JUNK', 'IS_LINE_JUNK', 'context_diff', 'Differ','IS_CHARACTER_JUNK', 'IS_LINE_JUNK', 'context_diff',
'unified_diff', 'HtmlDiff'] 'unified_diff', 'HtmlDiff', 'Match']
import heapq import heapq
from collections import namedtuple as _namedtuple
Match = _namedtuple('Match', 'a b size')
def _calculate_ratio(matches, length): def _calculate_ratio(matches, length):
if length: if length:
@ -363,7 +366,7 @@ class SequenceMatcher:
>>> s = SequenceMatcher(None, " abcd", "abcd abcd") >>> s = SequenceMatcher(None, " abcd", "abcd abcd")
>>> s.find_longest_match(0, 5, 0, 9) >>> s.find_longest_match(0, 5, 0, 9)
(0, 4, 5) Match(a=0, b=4, size=5)
If isjunk is defined, first the longest matching block is If isjunk is defined, first the longest matching block is
determined as above, but with the additional restriction that no determined as above, but with the additional restriction that no
@ -379,13 +382,13 @@ class SequenceMatcher:
>>> s = SequenceMatcher(lambda x: x==" ", " abcd", "abcd abcd") >>> s = SequenceMatcher(lambda x: x==" ", " abcd", "abcd abcd")
>>> s.find_longest_match(0, 5, 0, 9) >>> s.find_longest_match(0, 5, 0, 9)
(1, 0, 4) Match(a=1, b=0, size=4)
If no blocks match, return (alo, blo, 0). If no blocks match, return (alo, blo, 0).
>>> s = SequenceMatcher(None, "ab", "c") >>> s = SequenceMatcher(None, "ab", "c")
>>> s.find_longest_match(0, 2, 0, 1) >>> s.find_longest_match(0, 2, 0, 1)
(0, 0, 0) Match(a=0, b=0, size=0)
""" """
# CAUTION: stripping common prefix or suffix would be incorrect. # CAUTION: stripping common prefix or suffix would be incorrect.
@ -452,7 +455,7 @@ class SequenceMatcher:
a[besti+bestsize] == b[bestj+bestsize]: a[besti+bestsize] == b[bestj+bestsize]:
bestsize = bestsize + 1 bestsize = bestsize + 1
return besti, bestj, bestsize return Match(besti, bestj, bestsize)
def get_matching_blocks(self): def get_matching_blocks(self):
"""Return list of triples describing matching subsequences. """Return list of triples describing matching subsequences.
@ -470,7 +473,7 @@ class SequenceMatcher:
>>> s = SequenceMatcher(None, "abxcd", "abcd") >>> s = SequenceMatcher(None, "abxcd", "abcd")
>>> s.get_matching_blocks() >>> s.get_matching_blocks()
[(0, 0, 2), (3, 2, 2), (5, 4, 0)] [Match(a=0, b=0, size=2), Match(a=3, b=2, size=2), Match(a=5, b=4, size=0)]
""" """
if self.matching_blocks is not None: if self.matching_blocks is not None:
@ -523,7 +526,7 @@ class SequenceMatcher:
non_adjacent.append( (la, lb, 0) ) non_adjacent.append( (la, lb, 0) )
self.matching_blocks = non_adjacent self.matching_blocks = non_adjacent
return self.matching_blocks return map(Match._make, self.matching_blocks)
def get_opcodes(self): def get_opcodes(self):
"""Return list of 5-tuples describing how to turn a into b. """Return list of 5-tuples describing how to turn a into b.