Issue #25788: fileinput.hook_encoded() now supports an "errors" argument

for passing to open.  Original patch by Joseph Hackman.
This commit is contained in:
Serhiy Storchaka 2016-04-27 23:13:46 +03:00
parent 258a5d4dcb
commit b275210a3b
6 changed files with 40 additions and 6 deletions

View File

@ -193,10 +193,14 @@ The two following opening hooks are provided by this module:
Usage example: ``fi = fileinput.FileInput(openhook=fileinput.hook_compressed)``
.. function:: hook_encoded(encoding)
.. function:: hook_encoded(encoding, errors=None)
Returns a hook which opens each file with :func:`open`, using the given
*encoding* to read the file.
*encoding* and *errors* to read the file.
Usage example: ``fi =
fileinput.FileInput(openhook=fileinput.hook_encoded("iso-8859-1"))``
fileinput.FileInput(openhook=fileinput.hook_encoded("utf-8",
"surrogateescape"))``
.. versionchanged:: 3.6
Added the optional *errors* parameter.

View File

@ -358,6 +358,13 @@ The :func:`~zlib.compress` function now accepts keyword arguments.
(Contributed by Aviv Palivoda in :issue:`26243`.)
fileinput
---------
:func:`~fileinput.hook_encoded` now supports the *errors* argument.
(Contributed by Joseph Hackman in :issue:`25788`.)
Optimizations
=============

View File

@ -400,9 +400,9 @@ def hook_compressed(filename, mode):
return open(filename, mode)
def hook_encoded(encoding):
def hook_encoded(encoding, errors=None):
def openhook(filename, mode):
return open(filename, mode, encoding=encoding)
return open(filename, mode, encoding=encoding, errors=errors)
return openhook

View File

@ -945,7 +945,8 @@ class Test_hook_encoded(unittest.TestCase):
def test(self):
encoding = object()
result = fileinput.hook_encoded(encoding)
errors = object()
result = fileinput.hook_encoded(encoding, errors=errors)
fake_open = InvocationRecorder()
original_open = builtins.open
@ -963,8 +964,26 @@ class Test_hook_encoded(unittest.TestCase):
self.assertIs(args[0], filename)
self.assertIs(args[1], mode)
self.assertIs(kwargs.pop('encoding'), encoding)
self.assertIs(kwargs.pop('errors'), errors)
self.assertFalse(kwargs)
def test_errors(self):
with open(TESTFN, 'wb') as f:
f.write(b'\x80abc')
self.addCleanup(safe_unlink, TESTFN)
def check(errors, expected_lines):
with FileInput(files=TESTFN, mode='r',
openhook=hook_encoded('utf-8', errors=errors)) as fi:
lines = list(fi)
self.assertEqual(lines, expected_lines)
check('ignore', ['abc'])
with self.assertRaises(UnicodeDecodeError):
check('strict', ['abc'])
check('replace', ['\ufffdabc'])
check('backslashreplace', ['\\x80abc'])
def test_modes(self):
with open(TESTFN, 'wb') as f:
# UTF-7 is a convenient, seldom used encoding

View File

@ -538,6 +538,7 @@ Michael Guravage
Lars Gustäbel
Thomas Güttler
Jonas H.
Joseph Hackman
Barry Haddow
Philipp Hagemeister
Paul ten Hagen

View File

@ -256,6 +256,9 @@ Core and Builtins
Library
-------
- Issue #25788: fileinput.hook_encoded() now supports an "errors" argument
for passing to open. Original patch by Joseph Hackman.
- Issue #26634: recursive_repr() now sets __qualname__ of wrapper. Patch by
Xiang Zhang.