Issue #25788: fileinput.hook_encoded() now supports an "errors" argument
for passing to open. Original patch by Joseph Hackman.
This commit is contained in:
parent
258a5d4dcb
commit
b275210a3b
|
@ -193,10 +193,14 @@ The two following opening hooks are provided by this module:
|
|||
Usage example: ``fi = fileinput.FileInput(openhook=fileinput.hook_compressed)``
|
||||
|
||||
|
||||
.. function:: hook_encoded(encoding)
|
||||
.. function:: hook_encoded(encoding, errors=None)
|
||||
|
||||
Returns a hook which opens each file with :func:`open`, using the given
|
||||
*encoding* to read the file.
|
||||
*encoding* and *errors* to read the file.
|
||||
|
||||
Usage example: ``fi =
|
||||
fileinput.FileInput(openhook=fileinput.hook_encoded("iso-8859-1"))``
|
||||
fileinput.FileInput(openhook=fileinput.hook_encoded("utf-8",
|
||||
"surrogateescape"))``
|
||||
|
||||
.. versionchanged:: 3.6
|
||||
Added the optional *errors* parameter.
|
||||
|
|
|
@ -358,6 +358,13 @@ The :func:`~zlib.compress` function now accepts keyword arguments.
|
|||
(Contributed by Aviv Palivoda in :issue:`26243`.)
|
||||
|
||||
|
||||
fileinput
|
||||
---------
|
||||
|
||||
:func:`~fileinput.hook_encoded` now supports the *errors* argument.
|
||||
(Contributed by Joseph Hackman in :issue:`25788`.)
|
||||
|
||||
|
||||
Optimizations
|
||||
=============
|
||||
|
||||
|
|
|
@ -400,9 +400,9 @@ def hook_compressed(filename, mode):
|
|||
return open(filename, mode)
|
||||
|
||||
|
||||
def hook_encoded(encoding):
|
||||
def hook_encoded(encoding, errors=None):
|
||||
def openhook(filename, mode):
|
||||
return open(filename, mode, encoding=encoding)
|
||||
return open(filename, mode, encoding=encoding, errors=errors)
|
||||
return openhook
|
||||
|
||||
|
||||
|
|
|
@ -945,7 +945,8 @@ class Test_hook_encoded(unittest.TestCase):
|
|||
|
||||
def test(self):
|
||||
encoding = object()
|
||||
result = fileinput.hook_encoded(encoding)
|
||||
errors = object()
|
||||
result = fileinput.hook_encoded(encoding, errors=errors)
|
||||
|
||||
fake_open = InvocationRecorder()
|
||||
original_open = builtins.open
|
||||
|
@ -963,8 +964,26 @@ class Test_hook_encoded(unittest.TestCase):
|
|||
self.assertIs(args[0], filename)
|
||||
self.assertIs(args[1], mode)
|
||||
self.assertIs(kwargs.pop('encoding'), encoding)
|
||||
self.assertIs(kwargs.pop('errors'), errors)
|
||||
self.assertFalse(kwargs)
|
||||
|
||||
def test_errors(self):
|
||||
with open(TESTFN, 'wb') as f:
|
||||
f.write(b'\x80abc')
|
||||
self.addCleanup(safe_unlink, TESTFN)
|
||||
|
||||
def check(errors, expected_lines):
|
||||
with FileInput(files=TESTFN, mode='r',
|
||||
openhook=hook_encoded('utf-8', errors=errors)) as fi:
|
||||
lines = list(fi)
|
||||
self.assertEqual(lines, expected_lines)
|
||||
|
||||
check('ignore', ['abc'])
|
||||
with self.assertRaises(UnicodeDecodeError):
|
||||
check('strict', ['abc'])
|
||||
check('replace', ['\ufffdabc'])
|
||||
check('backslashreplace', ['\\x80abc'])
|
||||
|
||||
def test_modes(self):
|
||||
with open(TESTFN, 'wb') as f:
|
||||
# UTF-7 is a convenient, seldom used encoding
|
||||
|
|
|
@ -538,6 +538,7 @@ Michael Guravage
|
|||
Lars Gustäbel
|
||||
Thomas Güttler
|
||||
Jonas H.
|
||||
Joseph Hackman
|
||||
Barry Haddow
|
||||
Philipp Hagemeister
|
||||
Paul ten Hagen
|
||||
|
|
Loading…
Reference in New Issue