Added functions for creating context diffs and unified diffs.
Documentation update and NEWS item are forthcoming.
This commit is contained in:
parent
f177bf22f9
commit
f0b1a1fca9
201
Lib/difflib.py
201
Lib/difflib.py
|
@ -6,12 +6,18 @@ Module difflib -- helpers for computing deltas between objects.
|
||||||
Function get_close_matches(word, possibilities, n=3, cutoff=0.6):
|
Function get_close_matches(word, possibilities, n=3, cutoff=0.6):
|
||||||
Use SequenceMatcher to return list of the best "good enough" matches.
|
Use SequenceMatcher to return list of the best "good enough" matches.
|
||||||
|
|
||||||
|
Function context_diff(a, b):
|
||||||
|
For two lists of strings, return a delta in context diff format.
|
||||||
|
|
||||||
Function ndiff(a, b):
|
Function ndiff(a, b):
|
||||||
Return a delta: the difference between `a` and `b` (lists of strings).
|
Return a delta: the difference between `a` and `b` (lists of strings).
|
||||||
|
|
||||||
Function restore(delta, which):
|
Function restore(delta, which):
|
||||||
Return one of the two sequences that generated an ndiff delta.
|
Return one of the two sequences that generated an ndiff delta.
|
||||||
|
|
||||||
|
Function unified_diff(a, b):
|
||||||
|
For two lists of strings, return a delta in unified diff format.
|
||||||
|
|
||||||
Class SequenceMatcher:
|
Class SequenceMatcher:
|
||||||
A flexible class for comparing pairs of sequences of any type.
|
A flexible class for comparing pairs of sequences of any type.
|
||||||
|
|
||||||
|
@ -20,7 +26,8 @@ Class Differ:
|
||||||
"""
|
"""
|
||||||
|
|
||||||
__all__ = ['get_close_matches', 'ndiff', 'restore', 'SequenceMatcher',
|
__all__ = ['get_close_matches', 'ndiff', 'restore', 'SequenceMatcher',
|
||||||
'Differ','IS_CHARACTER_JUNK', 'IS_LINE_JUNK']
|
'Differ','IS_CHARACTER_JUNK', 'IS_LINE_JUNK', 'context_diff',
|
||||||
|
'unified_diff']
|
||||||
|
|
||||||
class SequenceMatcher:
|
class SequenceMatcher:
|
||||||
|
|
||||||
|
@ -532,6 +539,54 @@ class SequenceMatcher:
|
||||||
answer.append( ('equal', ai, i, bj, j) )
|
answer.append( ('equal', ai, i, bj, j) )
|
||||||
return answer
|
return answer
|
||||||
|
|
||||||
|
def get_grouped_opcodes(self, n=3):
|
||||||
|
""" Isolate change clusters by eliminating ranges with no changes.
|
||||||
|
|
||||||
|
Return a generator of groups with upto n lines of context.
|
||||||
|
Each group is in the same format as returned by get_opcodes().
|
||||||
|
|
||||||
|
>>> from pprint import pprint
|
||||||
|
>>> a = map(str, range(1,40))
|
||||||
|
>>> b = a[:]
|
||||||
|
>>> b[8:8] = ['i'] # Make an insertion
|
||||||
|
>>> b[20] += 'x' # Make a replacement
|
||||||
|
>>> b[23:28] = [] # Make a deletion
|
||||||
|
>>> b[30] += 'y' # Make another replacement
|
||||||
|
>>> pprint(list(SequenceMatcher(None,a,b).get_grouped_opcodes()))
|
||||||
|
[[('equal', 5, 8, 5, 8), ('insert', 8, 8, 8, 9), ('equal', 8, 11, 9, 12)],
|
||||||
|
[('equal', 16, 19, 17, 20),
|
||||||
|
('replace', 19, 20, 20, 21),
|
||||||
|
('equal', 20, 22, 21, 23),
|
||||||
|
('delete', 22, 27, 23, 23),
|
||||||
|
('equal', 27, 30, 23, 26)],
|
||||||
|
[('equal', 31, 34, 27, 30),
|
||||||
|
('replace', 34, 35, 30, 31),
|
||||||
|
('equal', 35, 38, 31, 34)]]
|
||||||
|
"""
|
||||||
|
|
||||||
|
codes = self.get_opcodes()
|
||||||
|
# Fixup leading and trailing groups if they show no changes.
|
||||||
|
if codes[0][0] == 'equal':
|
||||||
|
tag, i1, i2, j1, j2 = codes[0]
|
||||||
|
codes[0] = tag, max(i1, i2-n), i2, max(j1, j2-n), j2
|
||||||
|
if codes[-1][0] == 'equal':
|
||||||
|
tag, i1, i2, j1, j2 = codes[-1]
|
||||||
|
codes[-1] = tag, i1, min(i2, i1+n), j1, min(j2, j1+n)
|
||||||
|
|
||||||
|
nn = n + n
|
||||||
|
group = []
|
||||||
|
for tag, i1, i2, j1, j2 in codes:
|
||||||
|
# End the current group and start a new one whenever
|
||||||
|
# there is a large range with no changes.
|
||||||
|
if tag == 'equal' and i2-i1 > nn:
|
||||||
|
group.append((tag, i1, min(i2, i1+n), j1, min(j2, j1+n)))
|
||||||
|
yield group
|
||||||
|
group = []
|
||||||
|
i1, j1 = max(i1, i2-n), max(j1, j2-n)
|
||||||
|
group.append((tag, i1, i2, j1 ,j2))
|
||||||
|
if group and not (len(group)==1 and group[0][0] == 'equal'):
|
||||||
|
yield group
|
||||||
|
|
||||||
def ratio(self):
|
def ratio(self):
|
||||||
"""Return a measure of the sequences' similarity (float in [0,1]).
|
"""Return a measure of the sequences' similarity (float in [0,1]).
|
||||||
|
|
||||||
|
@ -1042,6 +1097,150 @@ def IS_CHARACTER_JUNK(ch, ws=" \t"):
|
||||||
|
|
||||||
del re
|
del re
|
||||||
|
|
||||||
|
|
||||||
|
def unified_diff(a, b, fromfile='', tofile='', fromfiledate='',
|
||||||
|
tofiledate='', n=3, lineterm='\n'):
|
||||||
|
r"""
|
||||||
|
Compare two sequences of lines; generate the delta as a unified diff.
|
||||||
|
|
||||||
|
Unified diffs are a compact way of showing line changes and a few
|
||||||
|
lines of context. The number of context lines is set by 'n' which
|
||||||
|
defaults to three.
|
||||||
|
|
||||||
|
By default, the diff control lines (those with *** or ---) are
|
||||||
|
created with a trailing newline. This is helpful so that inputs
|
||||||
|
created from file.readlines() result in diffs that are suitable for
|
||||||
|
file.writelines() since both the inputs and outputs have trailing
|
||||||
|
newlines.
|
||||||
|
|
||||||
|
For inputs that do not have trailing newlines, set the lineterm
|
||||||
|
argument to "" so that the output will be uniformly newline free.
|
||||||
|
|
||||||
|
The unidiff format normally has a header for filenames and modification
|
||||||
|
times. Any or all of these may be specified using strings for
|
||||||
|
'fromfile', 'tofile', 'fromfiledate', and 'tofiledate'. The modification
|
||||||
|
times are normally expressed in the format returned by time.ctime().
|
||||||
|
|
||||||
|
Example:
|
||||||
|
|
||||||
|
>>> for line in unified_diff('one two three four'.split(),
|
||||||
|
... 'zero one tree four'.split(), 'Original', 'Current',
|
||||||
|
... 'Sat Jan 26 23:30:50 1991', 'Fri Jun 06 10:20:52 2003',
|
||||||
|
... lineterm=''):
|
||||||
|
... print line
|
||||||
|
--- Original Sat Jan 26 23:30:50 1991
|
||||||
|
+++ Current Fri Jun 06 10:20:52 2003
|
||||||
|
@@ -1,4 +1,4 @@
|
||||||
|
+zero
|
||||||
|
one
|
||||||
|
-two
|
||||||
|
-three
|
||||||
|
+tree
|
||||||
|
four
|
||||||
|
"""
|
||||||
|
|
||||||
|
started = False
|
||||||
|
for group in SequenceMatcher(None,a,b).get_grouped_opcodes(n):
|
||||||
|
if not started:
|
||||||
|
yield '--- %s %s%s' % (fromfile, fromfiledate, lineterm)
|
||||||
|
yield '+++ %s %s%s' % (tofile, tofiledate, lineterm)
|
||||||
|
started = True
|
||||||
|
i1, i2, j1, j2 = group[0][1], group[-1][2], group[0][3], group[-1][4]
|
||||||
|
yield "@@ -%d,%d +%d,%d @@%s" % (i1+1, i2-i1, j1+1, j2-j1, lineterm)
|
||||||
|
for tag, i1, i2, j1, j2 in group:
|
||||||
|
if tag == 'equal':
|
||||||
|
for line in a[i1:i2]:
|
||||||
|
yield ' ' + line
|
||||||
|
continue
|
||||||
|
if tag == 'replace' or tag == 'delete':
|
||||||
|
for line in a[i1:i2]:
|
||||||
|
yield '-' + line
|
||||||
|
if tag == 'replace' or tag == 'insert':
|
||||||
|
for line in b[j1:j2]:
|
||||||
|
yield '+' + line
|
||||||
|
|
||||||
|
# See http://www.unix.org/single_unix_specification/
|
||||||
|
def context_diff(a, b, fromfile='', tofile='',
|
||||||
|
fromfiledate='', tofiledate='', n=3, lineterm='\n'):
|
||||||
|
r"""
|
||||||
|
Compare two sequences of lines; generate the delta as a context diff.
|
||||||
|
|
||||||
|
Context diffs are a compact way of showing line changes and a few
|
||||||
|
lines of context. The number of context lines is set by 'n' which
|
||||||
|
defaults to three.
|
||||||
|
|
||||||
|
By default, the diff control lines (those with *** or ---) are
|
||||||
|
created with a trailing newline. This is helpful so that inputs
|
||||||
|
created from file.readlines() result in diffs that are suitable for
|
||||||
|
file.writelines() since both the inputs and outputs have trailing
|
||||||
|
newlines.
|
||||||
|
|
||||||
|
For inputs that do not have trailing newlines, set the lineterm
|
||||||
|
argument to "" so that the output will be uniformly newline free.
|
||||||
|
|
||||||
|
The context diff format normally has a header for filenames and
|
||||||
|
modification times. Any or all of these may be specified using
|
||||||
|
strings for 'fromfile', 'tofile', 'fromfiledate', and 'tofiledate'.
|
||||||
|
The modification times are normally expressed in the format returned
|
||||||
|
by time.ctime(). If not specified, the strings default to blanks.
|
||||||
|
|
||||||
|
Example:
|
||||||
|
|
||||||
|
>>> print ''.join(context_diff('one\ntwo\nthree\nfour\n'.splitlines(1),
|
||||||
|
... 'zero\none\ntree\nfour\n'.splitlines(1), 'Original', 'Current',
|
||||||
|
... 'Sat Jan 26 23:30:50 1991', 'Fri Jun 06 10:22:46 2003')),
|
||||||
|
*** Original Sat Jan 26 23:30:50 1991
|
||||||
|
--- Current Fri Jun 06 10:22:46 2003
|
||||||
|
***************
|
||||||
|
*** 1,4 ****
|
||||||
|
one
|
||||||
|
! two
|
||||||
|
! three
|
||||||
|
four
|
||||||
|
--- 1,4 ----
|
||||||
|
+ zero
|
||||||
|
one
|
||||||
|
! tree
|
||||||
|
four
|
||||||
|
"""
|
||||||
|
|
||||||
|
started = False
|
||||||
|
prefixmap = dict(insert='+ ', delete='- ', replace='! ', equal=' ')
|
||||||
|
for group in SequenceMatcher(None,a,b).get_grouped_opcodes(n):
|
||||||
|
if not started:
|
||||||
|
yield '*** %s %s%s' % (fromfile, fromfiledate, lineterm)
|
||||||
|
yield '--- %s %s%s' % (tofile, tofiledate, lineterm)
|
||||||
|
started = True
|
||||||
|
yield '***************%s' % (lineterm,)
|
||||||
|
if group[-1][2] - group[0][1] >= 2:
|
||||||
|
yield '*** %d,%d ****%s' % (group[0][1]+1, group[-1][2], lineterm)
|
||||||
|
else:
|
||||||
|
yield '*** %d ****%s' % (group[-1][2], lineterm)
|
||||||
|
empty = True
|
||||||
|
for tag, i1, i2, j1, j2 in group:
|
||||||
|
if tag == 'replace' or tag == 'delete':
|
||||||
|
empty = False
|
||||||
|
break
|
||||||
|
if not empty:
|
||||||
|
for tag, i1, i2, j1, j2 in group:
|
||||||
|
if tag != 'insert':
|
||||||
|
for line in a[i1:i2]:
|
||||||
|
yield prefixmap[tag] + line
|
||||||
|
if group[-1][4] - group[0][3] >= 2:
|
||||||
|
yield '--- %d,%d ----%s' % (group[0][3]+1, group[-1][4], lineterm)
|
||||||
|
else:
|
||||||
|
yield '--- %d ----%s' % (group[-1][4], lineterm)
|
||||||
|
empty = True
|
||||||
|
for tag, i1, i2, j1, j2 in group:
|
||||||
|
if tag == 'replace' or tag == 'insert':
|
||||||
|
empty = False
|
||||||
|
break
|
||||||
|
if not empty:
|
||||||
|
for tag, i1, i2, j1, j2 in group:
|
||||||
|
if tag != 'delete':
|
||||||
|
for line in b[j1:j2]:
|
||||||
|
yield prefixmap[tag] + line
|
||||||
|
|
||||||
def ndiff(a, b, linejunk=None, charjunk=IS_CHARACTER_JUNK):
|
def ndiff(a, b, linejunk=None, charjunk=IS_CHARACTER_JUNK):
|
||||||
r"""
|
r"""
|
||||||
Compare `a` and `b` (lists of strings); return a `Differ`-style delta.
|
Compare `a` and `b` (lists of strings); return a `Differ`-style delta.
|
||||||
|
|
Loading…
Reference in New Issue