From 3780542039d109ae747816dae208208be93240d6 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Tue, 12 Apr 2011 15:14:12 -0700 Subject: [PATCH 1/2] Issue 11747: Fix output format for context diffs. --- Lib/difflib.py | 108 ++++++++++++++++++++++++++------------- Lib/test/test_difflib.py | 65 ++++++++++++++++++++++- 2 files changed, 136 insertions(+), 37 deletions(-) diff --git a/Lib/difflib.py b/Lib/difflib.py index 24a58a61c06..873fc027286 100644 --- a/Lib/difflib.py +++ b/Lib/difflib.py @@ -1140,6 +1140,21 @@ def IS_CHARACTER_JUNK(ch, ws=" \t"): return ch in ws +######################################################################## +### Unified Diff +######################################################################## + +def _format_range_unified(start, stop): + 'Convert range to the "ed" format' + # Per the diff spec at http://www.unix.org/single_unix_specification/ + beginning = start + 1 # lines start numbering with one + length = stop - start + if length == 1: + return '{}'.format(beginning) + if not length: + beginning -= 1 # empty ranges begin at line just before the range + return '{},{}'.format(beginning, length) + def unified_diff(a, b, fromfile='', tofile='', fromfiledate='', tofiledate='', n=3, lineterm='\n'): r""" @@ -1160,18 +1175,18 @@ def unified_diff(a, b, fromfile='', tofile='', fromfiledate='', The unidiff format normally has a header for filenames and modification times. Any or all of these may be specified using strings for - 'fromfile', 'tofile', 'fromfiledate', and 'tofiledate'. The modification - times are normally expressed in the format returned by time.ctime(). + 'fromfile', 'tofile', 'fromfiledate', and 'tofiledate'. + The modification times are normally expressed in the ISO 8601 format. Example: >>> for line in unified_diff('one two three four'.split(), ... 'zero one tree four'.split(), 'Original', 'Current', - ... 'Sat Jan 26 23:30:50 1991', 'Fri Jun 06 10:20:52 2003', + ... '2005-01-26 23:30:50', '2010-04-02 10:20:52', ... lineterm=''): - ... print(line) - --- Original Sat Jan 26 23:30:50 1991 - +++ Current Fri Jun 06 10:20:52 2003 + ... print(line) # doctest: +NORMALIZE_WHITESPACE + --- Original 2005-01-26 23:30:50 + +++ Current 2010-04-02 10:20:52 @@ -1,4 +1,4 @@ +zero one @@ -1184,23 +1199,45 @@ def unified_diff(a, b, fromfile='', tofile='', fromfiledate='', started = False for group in SequenceMatcher(None,a,b).get_grouped_opcodes(n): if not started: - yield '--- %s %s%s' % (fromfile, fromfiledate, lineterm) - yield '+++ %s %s%s' % (tofile, tofiledate, lineterm) started = True - i1, i2, j1, j2 = group[0][1], group[-1][2], group[0][3], group[-1][4] - yield "@@ -%d,%d +%d,%d @@%s" % (i1+1, i2-i1, j1+1, j2-j1, lineterm) + fromdate = '\t{}'.format(fromfiledate) if fromfiledate else '' + todate = '\t{}'.format(tofiledate) if tofiledate else '' + yield '--- {}{}{}'.format(fromfile, fromdate, lineterm) + yield '+++ {}{}{}'.format(tofile, todate, lineterm) + + first, last = group[0], group[-1] + file1_range = _format_range_unified(first[1], last[2]) + file2_range = _format_range_unified(first[3], last[4]) + yield '@@ -{} +{} @@{}'.format(file1_range, file2_range, lineterm) + for tag, i1, i2, j1, j2 in group: if tag == 'equal': for line in a[i1:i2]: yield ' ' + line continue - if tag == 'replace' or tag == 'delete': + if tag in {'replace', 'delete'}: for line in a[i1:i2]: yield '-' + line - if tag == 'replace' or tag == 'insert': + if tag in {'replace', 'insert'}: for line in b[j1:j2]: yield '+' + line + +######################################################################## +### Context Diff +######################################################################## + +def _format_range_context(start, stop): + 'Convert range to the "ed" format' + # Per the diff spec at http://www.unix.org/single_unix_specification/ + beginning = start + 1 # lines start numbering with one + length = stop - start + if not length: + beginning -= 1 # empty ranges begin at line just before the range + if length <= 1: + return '{}'.format(beginning) + return '{},{}'.format(beginning, beginning + length - 1) + # See http://www.unix.org/single_unix_specification/ def context_diff(a, b, fromfile='', tofile='', fromfiledate='', tofiledate='', n=3, lineterm='\n'): @@ -1223,17 +1260,16 @@ def context_diff(a, b, fromfile='', tofile='', The context diff format normally has a header for filenames and modification times. Any or all of these may be specified using strings for 'fromfile', 'tofile', 'fromfiledate', and 'tofiledate'. - The modification times are normally expressed in the format returned - by time.ctime(). If not specified, the strings default to blanks. + The modification times are normally expressed in the ISO 8601 format. + If not specified, the strings default to blanks. Example: >>> print(''.join(context_diff('one\ntwo\nthree\nfour\n'.splitlines(1), - ... 'zero\none\ntree\nfour\n'.splitlines(1), 'Original', 'Current', - ... 'Sat Jan 26 23:30:50 1991', 'Fri Jun 06 10:22:46 2003')), + ... 'zero\none\ntree\nfour\n'.splitlines(1), 'Original', 'Current')), ... end="") - *** Original Sat Jan 26 23:30:50 1991 - --- Current Fri Jun 06 10:22:46 2003 + *** Original + --- Current *************** *** 1,4 **** one @@ -1247,36 +1283,36 @@ def context_diff(a, b, fromfile='', tofile='', four """ + prefix = dict(insert='+ ', delete='- ', replace='! ', equal=' ') started = False - prefixmap = {'insert':'+ ', 'delete':'- ', 'replace':'! ', 'equal':' '} for group in SequenceMatcher(None,a,b).get_grouped_opcodes(n): if not started: - yield '*** %s %s%s' % (fromfile, fromfiledate, lineterm) - yield '--- %s %s%s' % (tofile, tofiledate, lineterm) started = True + fromdate = '\t{}'.format(fromfiledate) if fromfiledate else '' + todate = '\t{}'.format(tofiledate) if tofiledate else '' + yield '*** {}{}{}'.format(fromfile, fromdate, lineterm) + yield '--- {}{}{}'.format(tofile, todate, lineterm) - yield '***************%s' % (lineterm,) - if group[-1][2] - group[0][1] >= 2: - yield '*** %d,%d ****%s' % (group[0][1]+1, group[-1][2], lineterm) - else: - yield '*** %d ****%s' % (group[-1][2], lineterm) - visiblechanges = [e for e in group if e[0] in ('replace', 'delete')] - if visiblechanges: + first, last = group[0], group[-1] + yield '***************' + lineterm + + file1_range = _format_range_context(first[1], last[2]) + yield '*** {} ****{}'.format(file1_range, lineterm) + + if any(tag in {'replace', 'delete'} for tag, _, _, _, _ in group): for tag, i1, i2, _, _ in group: if tag != 'insert': for line in a[i1:i2]: - yield prefixmap[tag] + line + yield prefix[tag] + line - if group[-1][4] - group[0][3] >= 2: - yield '--- %d,%d ----%s' % (group[0][3]+1, group[-1][4], lineterm) - else: - yield '--- %d ----%s' % (group[-1][4], lineterm) - visiblechanges = [e for e in group if e[0] in ('replace', 'insert')] - if visiblechanges: + file2_range = _format_range_context(first[3], last[4]) + yield '--- {} ----{}'.format(file2_range, lineterm) + + if any(tag in {'replace', 'insert'} for tag, _, _, _, _ in group): for tag, _, _, j1, j2 in group: if tag != 'delete': for line in b[j1:j2]: - yield prefixmap[tag] + line + yield prefix[tag] + line def ndiff(a, b, linejunk=None, charjunk=IS_CHARACTER_JUNK): r""" diff --git a/Lib/test/test_difflib.py b/Lib/test/test_difflib.py index 852aae9b31e..dc2abe874af 100644 --- a/Lib/test/test_difflib.py +++ b/Lib/test/test_difflib.py @@ -159,10 +159,73 @@ class TestSFpatches(unittest.TestCase): difflib.SequenceMatcher(None, old, new).get_opcodes() +class TestOutputFormat(unittest.TestCase): + def test_tab_delimiter(self): + args = ['one', 'two', 'Original', 'Current', + '2005-01-26 23:30:50', '2010-04-02 10:20:52'] + ud = difflib.unified_diff(*args, lineterm='') + self.assertEqual(list(ud)[0:2], [ + "--- Original\t2005-01-26 23:30:50", + "+++ Current\t2010-04-02 10:20:52"]) + cd = difflib.context_diff(*args, lineterm='') + self.assertEqual(list(cd)[0:2], [ + "*** Original\t2005-01-26 23:30:50", + "--- Current\t2010-04-02 10:20:52"]) + + def test_no_trailing_tab_on_empty_filedate(self): + args = ['one', 'two', 'Original', 'Current'] + ud = difflib.unified_diff(*args, lineterm='') + self.assertEqual(list(ud)[0:2], ["--- Original", "+++ Current"]) + + cd = difflib.context_diff(*args, lineterm='') + self.assertEqual(list(cd)[0:2], ["*** Original", "--- Current"]) + + def test_range_format_unified(self): + # Per the diff spec at http://www.unix.org/single_unix_specification/ + spec = '''\ + Each field shall be of the form: + %1d", if the range contains exactly one line, + and: + "%1d,%1d", , otherwise. + If a range is empty, its beginning line number shall be the number of + the line just before the range, or 0 if the empty range starts the file. + ''' + fmt = difflib._format_range_unified + self.assertEqual(fmt(3,3), '3,0') + self.assertEqual(fmt(3,4), '4') + self.assertEqual(fmt(3,5), '4,2') + self.assertEqual(fmt(3,6), '4,3') + self.assertEqual(fmt(0,0), '0,0') + + def test_range_format_context(self): + # Per the diff spec at http://www.unix.org/single_unix_specification/ + spec = '''\ + The range of lines in file1 shall be written in the following format + if the range contains two or more lines: + "*** %d,%d ****\n", , + and the following format otherwise: + "*** %d ****\n", + The ending line number of an empty range shall be the number of the preceding line, + or 0 if the range is at the start of the file. + + Next, the range of lines in file2 shall be written in the following format + if the range contains two or more lines: + "--- %d,%d ----\n", , + and the following format otherwise: + "--- %d ----\n", + ''' + fmt = difflib._format_range_context + self.assertEqual(fmt(3,3), '3') + self.assertEqual(fmt(3,4), '4') + self.assertEqual(fmt(3,5), '4,5') + self.assertEqual(fmt(3,6), '4,6') + self.assertEqual(fmt(0,0), '0') + + def test_main(): difflib.HtmlDiff._default_prefix = 0 Doctests = doctest.DocTestSuite(difflib) - run_unittest(TestSFpatches, TestSFbugs, Doctests) + run_unittest(TestSFpatches, TestSFbugs, Doctests, TestOutputFormat) if __name__ == '__main__': test_main() From f03d3028e0e9ce1a9d4d108d8ecf210a917cd5f8 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Tue, 12 Apr 2011 15:19:33 -0700 Subject: [PATCH 2/2] Issue 11747: Fix output format for context diffs. --- Lib/difflib.py | 30 +++++++++++++++++++++++++----- Lib/test/test_difflib.py | 29 +++++++++++++++++++++++++++-- 2 files changed, 52 insertions(+), 7 deletions(-) diff --git a/Lib/difflib.py b/Lib/difflib.py index c5005a104d5..e6cc6ee4425 100644 --- a/Lib/difflib.py +++ b/Lib/difflib.py @@ -1144,7 +1144,11 @@ def IS_CHARACTER_JUNK(ch, ws=" \t"): return ch in ws -def _format_range(start, stop): +######################################################################## +### Unified Diff +######################################################################## + +def _format_range_unified(start, stop): 'Convert range to the "ed" format' # Per the diff spec at http://www.unix.org/single_unix_specification/ beginning = start + 1 # lines start numbering with one @@ -1206,8 +1210,8 @@ def unified_diff(a, b, fromfile='', tofile='', fromfiledate='', yield '+++ {}{}{}'.format(tofile, todate, lineterm) first, last = group[0], group[-1] - file1_range = _format_range(first[1], last[2]) - file2_range = _format_range(first[3], last[4]) + file1_range = _format_range_unified(first[1], last[2]) + file2_range = _format_range_unified(first[3], last[4]) yield '@@ -{} +{} @@{}'.format(file1_range, file2_range, lineterm) for tag, i1, i2, j1, j2 in group: @@ -1222,6 +1226,22 @@ def unified_diff(a, b, fromfile='', tofile='', fromfiledate='', for line in b[j1:j2]: yield '+' + line + +######################################################################## +### Context Diff +######################################################################## + +def _format_range_context(start, stop): + 'Convert range to the "ed" format' + # Per the diff spec at http://www.unix.org/single_unix_specification/ + beginning = start + 1 # lines start numbering with one + length = stop - start + if not length: + beginning -= 1 # empty ranges begin at line just before the range + if length <= 1: + return '{}'.format(beginning) + return '{},{}'.format(beginning, beginning + length - 1) + # See http://www.unix.org/single_unix_specification/ def context_diff(a, b, fromfile='', tofile='', fromfiledate='', tofiledate='', n=3, lineterm='\n'): @@ -1280,7 +1300,7 @@ def context_diff(a, b, fromfile='', tofile='', first, last = group[0], group[-1] yield '***************' + lineterm - file1_range = _format_range(first[1], last[2]) + file1_range = _format_range_context(first[1], last[2]) yield '*** {} ****{}'.format(file1_range, lineterm) if any(tag in {'replace', 'delete'} for tag, _, _, _, _ in group): @@ -1289,7 +1309,7 @@ def context_diff(a, b, fromfile='', tofile='', for line in a[i1:i2]: yield prefix[tag] + line - file2_range = _format_range(first[3], last[4]) + file2_range = _format_range_context(first[3], last[4]) yield '--- {} ----{}'.format(file2_range, lineterm) if any(tag in {'replace', 'insert'} for tag, _, _, _, _ in group): diff --git a/Lib/test/test_difflib.py b/Lib/test/test_difflib.py index b08be53dd97..325449aa557 100644 --- a/Lib/test/test_difflib.py +++ b/Lib/test/test_difflib.py @@ -236,7 +236,7 @@ class TestOutputFormat(unittest.TestCase): cd = difflib.context_diff(*args, lineterm='') self.assertEqual(list(cd)[0:2], ["*** Original", "--- Current"]) - def test_range_format(self): + def test_range_format_unified(self): # Per the diff spec at http://www.unix.org/single_unix_specification/ spec = '''\ Each field shall be of the form: @@ -246,13 +246,38 @@ class TestOutputFormat(unittest.TestCase): If a range is empty, its beginning line number shall be the number of the line just before the range, or 0 if the empty range starts the file. ''' - fmt = difflib._format_range + fmt = difflib._format_range_unified self.assertEqual(fmt(3,3), '3,0') self.assertEqual(fmt(3,4), '4') self.assertEqual(fmt(3,5), '4,2') self.assertEqual(fmt(3,6), '4,3') self.assertEqual(fmt(0,0), '0,0') + def test_range_format_context(self): + # Per the diff spec at http://www.unix.org/single_unix_specification/ + spec = '''\ + The range of lines in file1 shall be written in the following format + if the range contains two or more lines: + "*** %d,%d ****\n", , + and the following format otherwise: + "*** %d ****\n", + The ending line number of an empty range shall be the number of the preceding line, + or 0 if the range is at the start of the file. + + Next, the range of lines in file2 shall be written in the following format + if the range contains two or more lines: + "--- %d,%d ----\n", , + and the following format otherwise: + "--- %d ----\n", + ''' + fmt = difflib._format_range_context + self.assertEqual(fmt(3,3), '3') + self.assertEqual(fmt(3,4), '4') + self.assertEqual(fmt(3,5), '4,5') + self.assertEqual(fmt(3,6), '4,6') + self.assertEqual(fmt(0,0), '0') + + def test_main(): difflib.HtmlDiff._default_prefix = 0 Doctests = doctest.DocTestSuite(difflib)