Issue #15068: Got rid of excessive buffering in fileinput.
The bufsize parameter is now deprecated and ignored.
This commit is contained in:
commit
674e2d0ea0
|
@ -71,6 +71,8 @@ The following function is the primary interface of this module:
|
|||
.. versionchanged:: 3.2
|
||||
Can be used as a context manager.
|
||||
|
||||
.. deprecated-removed:: 3.6 3.8
|
||||
The *bufsize* parameter.
|
||||
|
||||
The following functions use the global state created by :func:`fileinput.input`;
|
||||
if there is no active state, :exc:`RuntimeError` is raised.
|
||||
|
@ -161,7 +163,10 @@ available for subclassing as well:
|
|||
Can be used as a context manager.
|
||||
|
||||
.. deprecated:: 3.4
|
||||
The ``'rU'`` and ``'U'`` modes.
|
||||
The ``'rU'`` and ``'U'`` modes.
|
||||
|
||||
.. deprecated-removed:: 3.6 3.8
|
||||
The *bufsize* parameter.
|
||||
|
||||
|
||||
**Optional in-place filtering:** if the keyword argument ``inplace=True`` is
|
||||
|
|
168
Lib/fileinput.py
168
Lib/fileinput.py
|
@ -64,13 +64,6 @@ deleted when the output file is closed. In-place filtering is
|
|||
disabled when standard input is read. XXX The current implementation
|
||||
does not work for MS-DOS 8+3 filesystems.
|
||||
|
||||
Performance: this module is unfortunately one of the slower ways of
|
||||
processing large numbers of input lines. Nevertheless, a significant
|
||||
speed-up has been obtained by using readlines(bufsize) instead of
|
||||
readline(). A new keyword argument, bufsize=N, is present on the
|
||||
input() function and the FileInput() class to override the default
|
||||
buffer size.
|
||||
|
||||
XXX Possible additions:
|
||||
|
||||
- optional getopt argument processing
|
||||
|
@ -87,8 +80,6 @@ __all__ = ["input", "close", "nextfile", "filename", "lineno", "filelineno",
|
|||
|
||||
_state = None
|
||||
|
||||
DEFAULT_BUFSIZE = 8*1024
|
||||
|
||||
def input(files=None, inplace=False, backup="", bufsize=0,
|
||||
mode="r", openhook=None):
|
||||
"""Return an instance of the FileInput class, which can be iterated.
|
||||
|
@ -208,17 +199,19 @@ class FileInput:
|
|||
self._files = files
|
||||
self._inplace = inplace
|
||||
self._backup = backup
|
||||
self._bufsize = bufsize or DEFAULT_BUFSIZE
|
||||
if bufsize:
|
||||
import warnings
|
||||
warnings.warn('bufsize is deprecated and ignored',
|
||||
DeprecationWarning, stacklevel=2)
|
||||
self._savestdout = None
|
||||
self._output = None
|
||||
self._filename = None
|
||||
self._lineno = 0
|
||||
self._startlineno = 0
|
||||
self._filelineno = 0
|
||||
self._file = None
|
||||
self._readline = self._start_readline
|
||||
self._isstdin = False
|
||||
self._backupfilename = None
|
||||
self._buffer = []
|
||||
self._bufindex = 0
|
||||
# restrict mode argument to reading modes
|
||||
if mode not in ('r', 'rU', 'U', 'rb'):
|
||||
raise ValueError("FileInput opening mode must be one of "
|
||||
|
@ -254,22 +247,18 @@ class FileInput:
|
|||
return self
|
||||
|
||||
def __next__(self):
|
||||
try:
|
||||
line = self._buffer[self._bufindex]
|
||||
except IndexError:
|
||||
pass
|
||||
else:
|
||||
self._bufindex += 1
|
||||
self._lineno += 1
|
||||
line = self._readline()
|
||||
if line:
|
||||
self._filelineno += 1
|
||||
return line
|
||||
line = self.readline()
|
||||
if not line:
|
||||
if not self._file:
|
||||
raise StopIteration
|
||||
return line
|
||||
self.nextfile()
|
||||
# Recursive call
|
||||
return self.__next__()
|
||||
|
||||
def __getitem__(self, i):
|
||||
if i != self._lineno:
|
||||
if i != self.lineno():
|
||||
raise RuntimeError("accessing lines out of order")
|
||||
try:
|
||||
return self.__next__()
|
||||
|
@ -290,6 +279,7 @@ class FileInput:
|
|||
finally:
|
||||
file = self._file
|
||||
self._file = None
|
||||
self._readline = self._start_readline
|
||||
try:
|
||||
if file and not self._isstdin:
|
||||
file.close()
|
||||
|
@ -301,85 +291,81 @@ class FileInput:
|
|||
except OSError: pass
|
||||
|
||||
self._isstdin = False
|
||||
self._buffer = []
|
||||
self._bufindex = 0
|
||||
|
||||
def readline(self):
|
||||
try:
|
||||
line = self._buffer[self._bufindex]
|
||||
except IndexError:
|
||||
pass
|
||||
else:
|
||||
self._bufindex += 1
|
||||
self._lineno += 1
|
||||
self._filelineno += 1
|
||||
return line
|
||||
if not self._file:
|
||||
if not self._files:
|
||||
if 'b' in self._mode:
|
||||
return b''
|
||||
else:
|
||||
return ''
|
||||
self._filename = self._files[0]
|
||||
self._files = self._files[1:]
|
||||
self._filelineno = 0
|
||||
self._file = None
|
||||
self._isstdin = False
|
||||
self._backupfilename = 0
|
||||
if self._filename == '-':
|
||||
self._filename = '<stdin>'
|
||||
if 'b' in self._mode:
|
||||
self._file = getattr(sys.stdin, 'buffer', sys.stdin)
|
||||
else:
|
||||
self._file = sys.stdin
|
||||
self._isstdin = True
|
||||
while True:
|
||||
line = self._readline()
|
||||
if line:
|
||||
self._filelineno += 1
|
||||
return line
|
||||
if not self._file:
|
||||
return line
|
||||
self.nextfile()
|
||||
# repeat with next file
|
||||
|
||||
def _start_readline(self):
|
||||
if not self._files:
|
||||
if 'b' in self._mode:
|
||||
return b''
|
||||
else:
|
||||
if self._inplace:
|
||||
self._backupfilename = (
|
||||
self._filename + (self._backup or ".bak"))
|
||||
return ''
|
||||
self._filename = self._files[0]
|
||||
self._files = self._files[1:]
|
||||
self._startlineno = self.lineno()
|
||||
self._filelineno = 0
|
||||
self._file = None
|
||||
self._isstdin = False
|
||||
self._backupfilename = 0
|
||||
if self._filename == '-':
|
||||
self._filename = '<stdin>'
|
||||
if 'b' in self._mode:
|
||||
self._file = getattr(sys.stdin, 'buffer', sys.stdin)
|
||||
else:
|
||||
self._file = sys.stdin
|
||||
self._isstdin = True
|
||||
else:
|
||||
if self._inplace:
|
||||
self._backupfilename = (
|
||||
self._filename + (self._backup or ".bak"))
|
||||
try:
|
||||
os.unlink(self._backupfilename)
|
||||
except OSError:
|
||||
pass
|
||||
# The next few lines may raise OSError
|
||||
os.rename(self._filename, self._backupfilename)
|
||||
self._file = open(self._backupfilename, self._mode)
|
||||
try:
|
||||
perm = os.fstat(self._file.fileno()).st_mode
|
||||
except OSError:
|
||||
self._output = open(self._filename, "w")
|
||||
else:
|
||||
mode = os.O_CREAT | os.O_WRONLY | os.O_TRUNC
|
||||
if hasattr(os, 'O_BINARY'):
|
||||
mode |= os.O_BINARY
|
||||
|
||||
fd = os.open(self._filename, mode, perm)
|
||||
self._output = os.fdopen(fd, "w")
|
||||
try:
|
||||
os.unlink(self._backupfilename)
|
||||
if hasattr(os, 'chmod'):
|
||||
os.chmod(self._filename, perm)
|
||||
except OSError:
|
||||
pass
|
||||
# The next few lines may raise OSError
|
||||
os.rename(self._filename, self._backupfilename)
|
||||
self._file = open(self._backupfilename, self._mode)
|
||||
try:
|
||||
perm = os.fstat(self._file.fileno()).st_mode
|
||||
except OSError:
|
||||
self._output = open(self._filename, "w")
|
||||
else:
|
||||
mode = os.O_CREAT | os.O_WRONLY | os.O_TRUNC
|
||||
if hasattr(os, 'O_BINARY'):
|
||||
mode |= os.O_BINARY
|
||||
|
||||
fd = os.open(self._filename, mode, perm)
|
||||
self._output = os.fdopen(fd, "w")
|
||||
try:
|
||||
if hasattr(os, 'chmod'):
|
||||
os.chmod(self._filename, perm)
|
||||
except OSError:
|
||||
pass
|
||||
self._savestdout = sys.stdout
|
||||
sys.stdout = self._output
|
||||
self._savestdout = sys.stdout
|
||||
sys.stdout = self._output
|
||||
else:
|
||||
# This may raise OSError
|
||||
if self._openhook:
|
||||
self._file = self._openhook(self._filename, self._mode)
|
||||
else:
|
||||
# This may raise OSError
|
||||
if self._openhook:
|
||||
self._file = self._openhook(self._filename, self._mode)
|
||||
else:
|
||||
self._file = open(self._filename, self._mode)
|
||||
self._buffer = self._file.readlines(self._bufsize)
|
||||
self._bufindex = 0
|
||||
if not self._buffer:
|
||||
self.nextfile()
|
||||
# Recursive call
|
||||
return self.readline()
|
||||
self._file = open(self._filename, self._mode)
|
||||
self._readline = self._file.readline
|
||||
return self._readline()
|
||||
|
||||
def filename(self):
|
||||
return self._filename
|
||||
|
||||
def lineno(self):
|
||||
return self._lineno
|
||||
return self._startlineno + self._filelineno
|
||||
|
||||
def filelineno(self):
|
||||
return self._filelineno
|
||||
|
|
|
@ -47,6 +47,42 @@ def remove_tempfiles(*names):
|
|||
if name:
|
||||
safe_unlink(name)
|
||||
|
||||
class LineReader:
|
||||
|
||||
def __init__(self):
|
||||
self._linesread = []
|
||||
|
||||
@property
|
||||
def linesread(self):
|
||||
try:
|
||||
return self._linesread[:]
|
||||
finally:
|
||||
self._linesread = []
|
||||
|
||||
def openhook(self, filename, mode):
|
||||
self.it = iter(filename.splitlines(True))
|
||||
return self
|
||||
|
||||
def readline(self, size=None):
|
||||
line = next(self.it, '')
|
||||
self._linesread.append(line)
|
||||
return line
|
||||
|
||||
def readlines(self, hint=-1):
|
||||
lines = []
|
||||
size = 0
|
||||
while True:
|
||||
line = self.readline()
|
||||
if not line:
|
||||
return lines
|
||||
lines.append(line)
|
||||
size += len(line)
|
||||
if size >= hint:
|
||||
return lines
|
||||
|
||||
def close(self):
|
||||
pass
|
||||
|
||||
class BufferSizesTests(unittest.TestCase):
|
||||
def test_buffer_sizes(self):
|
||||
# First, run the tests with default and teeny buffer size.
|
||||
|
@ -57,7 +93,11 @@ class BufferSizesTests(unittest.TestCase):
|
|||
t2 = writeTmp(2, ["Line %s of file 2\n" % (i+1) for i in range(10)])
|
||||
t3 = writeTmp(3, ["Line %s of file 3\n" % (i+1) for i in range(5)])
|
||||
t4 = writeTmp(4, ["Line %s of file 4\n" % (i+1) for i in range(1)])
|
||||
self.buffer_size_test(t1, t2, t3, t4, bs, round)
|
||||
if bs:
|
||||
with self.assertWarns(DeprecationWarning):
|
||||
self.buffer_size_test(t1, t2, t3, t4, bs, round)
|
||||
else:
|
||||
self.buffer_size_test(t1, t2, t3, t4, bs, round)
|
||||
finally:
|
||||
remove_tempfiles(t1, t2, t3, t4)
|
||||
|
||||
|
@ -290,7 +330,7 @@ class FileInputTests(unittest.TestCase):
|
|||
self.addCleanup(safe_unlink, TESTFN)
|
||||
|
||||
with FileInput(files=TESTFN,
|
||||
openhook=hook_encoded('ascii'), bufsize=8) as fi:
|
||||
openhook=hook_encoded('ascii')) as fi:
|
||||
try:
|
||||
self.assertEqual(fi.readline(), 'A\n')
|
||||
self.assertEqual(fi.readline(), 'B\n')
|
||||
|
@ -458,6 +498,38 @@ class FileInputTests(unittest.TestCase):
|
|||
|
||||
self.assertEqual(result, -1, "fileno() should return -1")
|
||||
|
||||
def test_readline_buffering(self):
|
||||
src = LineReader()
|
||||
with FileInput(files=['line1\nline2', 'line3\n'],
|
||||
openhook=src.openhook) as fi:
|
||||
self.assertEqual(src.linesread, [])
|
||||
self.assertEqual(fi.readline(), 'line1\n')
|
||||
self.assertEqual(src.linesread, ['line1\n'])
|
||||
self.assertEqual(fi.readline(), 'line2')
|
||||
self.assertEqual(src.linesread, ['line2'])
|
||||
self.assertEqual(fi.readline(), 'line3\n')
|
||||
self.assertEqual(src.linesread, ['', 'line3\n'])
|
||||
self.assertEqual(fi.readline(), '')
|
||||
self.assertEqual(src.linesread, [''])
|
||||
self.assertEqual(fi.readline(), '')
|
||||
self.assertEqual(src.linesread, [])
|
||||
|
||||
def test_iteration_buffering(self):
|
||||
src = LineReader()
|
||||
with FileInput(files=['line1\nline2', 'line3\n'],
|
||||
openhook=src.openhook) as fi:
|
||||
self.assertEqual(src.linesread, [])
|
||||
self.assertEqual(next(fi), 'line1\n')
|
||||
self.assertEqual(src.linesread, ['line1\n'])
|
||||
self.assertEqual(next(fi), 'line2')
|
||||
self.assertEqual(src.linesread, ['line2'])
|
||||
self.assertEqual(next(fi), 'line3\n')
|
||||
self.assertEqual(src.linesread, ['', 'line3\n'])
|
||||
self.assertRaises(StopIteration, next, fi)
|
||||
self.assertEqual(src.linesread, [''])
|
||||
self.assertRaises(StopIteration, next, fi)
|
||||
self.assertEqual(src.linesread, [])
|
||||
|
||||
class MockFileInput:
|
||||
"""A class that mocks out fileinput.FileInput for use during unit tests"""
|
||||
|
||||
|
@ -917,8 +989,7 @@ class Test_hook_encoded(unittest.TestCase):
|
|||
class MiscTest(unittest.TestCase):
|
||||
|
||||
def test_all(self):
|
||||
blacklist = {'DEFAULT_BUFSIZE'}
|
||||
support.check__all__(self, fileinput, blacklist=blacklist)
|
||||
support.check__all__(self, fileinput)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
|
@ -201,6 +201,9 @@ Core and Builtins
|
|||
Library
|
||||
-------
|
||||
|
||||
- Issue #15068: Got rid of excessive buffering in fileinput.
|
||||
The bufsize parameter is now deprecated and ignored.
|
||||
|
||||
- Issue #19475: Added an optional argument timespec to the datetime
|
||||
isoformat() method to choose the precision of the time component.
|
||||
|
||||
|
|
Loading…
Reference in New Issue