Issue #2052: Add charset parameter to HtmlDiff.make_file().

This commit is contained in:
Berker Peksag 2015-03-15 01:18:47 +02:00
parent fbd011dd49
commit 102029dfd6
6 changed files with 63 additions and 10 deletions

View File

@ -104,7 +104,8 @@ diffs. For comparing directories and files, see also, the :mod:`filecmp` module.
The following methods are public:
.. method:: make_file(fromlines, tolines, fromdesc='', todesc='', context=False, numlines=5)
.. method:: make_file(fromlines, tolines, fromdesc='', todesc='', context=False, \
numlines=5, *, charset='utf-8')
Compares *fromlines* and *tolines* (lists of strings) and returns a string which
is a complete HTML file containing a table showing line by line differences with
@ -123,6 +124,10 @@ diffs. For comparing directories and files, see also, the :mod:`filecmp` module.
the next difference highlight at the top of the browser without any leading
context).
.. versionchanged:: 3.5
*charset* keyword-only argument was added. The default charset of
HTML document changed from ``'ISO-8859-1'`` to ``'utf-8'``.
.. method:: make_table(fromlines, tolines, fromdesc='', todesc='', context=False, numlines=5)
Compares *fromlines* and *tolines* (lists of strings) and returns a string which

View File

@ -225,6 +225,14 @@ contextlib
don't provide any options to redirect it.
(Contributed by Berker Peksag in :issue:`22389`.)
difflib
-------
* The charset of the HTML document generated by :meth:`difflib.HtmlDiff.make_file`
can now be customized by using *charset* keyword-only parameter. The default
charset of HTML document changed from ``'ISO-8859-1'`` to ``'utf-8'``.
(Contributed by Berker Peksag in :issue:`2052`.)
distutils
---------

View File

@ -1598,7 +1598,7 @@ _file_template = """
<head>
<meta http-equiv="Content-Type"
content="text/html; charset=ISO-8859-1" />
content="text/html; charset=%(charset)s" />
<title></title>
<style type="text/css">%(styles)s
</style>
@ -1685,8 +1685,8 @@ class HtmlDiff(object):
self._linejunk = linejunk
self._charjunk = charjunk
def make_file(self,fromlines,tolines,fromdesc='',todesc='',context=False,
numlines=5):
def make_file(self, fromlines, tolines, fromdesc='', todesc='',
context=False, numlines=5, *, charset='utf-8'):
"""Returns HTML file of side by side comparison with change highlights
Arguments:
@ -1701,13 +1701,16 @@ class HtmlDiff(object):
When context is False, controls the number of lines to place
the "next" link anchors before the next change (so click of
"next" link jumps to just before the change).
charset -- charset of the HTML document
"""
return self._file_template % dict(
styles = self._styles,
legend = self._legend,
table = self.make_table(fromlines,tolines,fromdesc,todesc,
context=context,numlines=numlines))
return (self._file_template % dict(
styles=self._styles,
legend=self._legend,
table=self.make_table(fromlines, tolines, fromdesc, todesc,
context=context, numlines=numlines),
charset=charset
)).encode(charset, 'xmlcharrefreplace').decode(charset)
def _tab_newline_replace(self,fromlines,tolines):
"""Returns from/to line lists with tabs expanded and newlines removed.

View File

@ -107,6 +107,20 @@ patch914575_to1 = """
5. Flat is better than nested.
"""
patch914575_nonascii_from1 = """
1. Beautiful is beTTer than ugly.
2. Explicit is better than ımplıcıt.
3. Simple is better than complex.
4. Complex is better than complicated.
"""
patch914575_nonascii_to1 = """
1. Beautiful is better than ügly.
3. Sımple is better than complex.
4. Complicated is better than cömplex.
5. Flat is better than nested.
"""
patch914575_from2 = """
\t\tLine 1: preceeded by from:[tt] to:[ssss]
\t\tLine 2: preceeded by from:[sstt] to:[sssst]
@ -223,6 +237,27 @@ class TestSFpatches(unittest.TestCase):
new = [(i%2 and "K:%d" or "V:B:%d") % i for i in range(limit*2)]
difflib.SequenceMatcher(None, old, new).get_opcodes()
def test_make_file_default_charset(self):
html_diff = difflib.HtmlDiff()
output = html_diff.make_file(patch914575_from1.splitlines(),
patch914575_to1.splitlines())
self.assertIn('content="text/html; charset=utf-8"', output)
def test_make_file_iso88591_charset(self):
html_diff = difflib.HtmlDiff()
output = html_diff.make_file(patch914575_from1.splitlines(),
patch914575_to1.splitlines(),
charset='iso-8859-1')
self.assertIn('content="text/html; charset=iso-8859-1"', output)
def test_make_file_usascii_charset_with_nonascii_input(self):
html_diff = difflib.HtmlDiff()
output = html_diff.make_file(patch914575_nonascii_from1.splitlines(),
patch914575_nonascii_to1.splitlines(),
charset='us-ascii')
self.assertIn('content="text/html; charset=us-ascii"', output)
self.assertIn('&#305;mpl&#305;c&#305;t', output)
class TestOutputFormat(unittest.TestCase):
def test_tab_delimiter(self):

View File

@ -6,7 +6,7 @@
<head>
<meta http-equiv="Content-Type"
content="text/html; charset=ISO-8859-1" />
content="text/html; charset=utf-8" />
<title></title>
<style type="text/css">
table.diff {font-family:Courier; border:medium;}

View File

@ -18,6 +18,8 @@ Core and Builtins
Library
-------
- Issue #2052: Add charset parameter to HtmlDiff.make_file().
- Issue #23138: Fixed parsing cookies with absent keys or values in cookiejar.
Patch by Demian Brecht.