1997-11-21 13:12:59 -04:00
|
|
|
"""Helper class to quickly write a loop over all standard input files.
|
|
|
|
|
|
|
|
Typical use is:
|
|
|
|
|
|
|
|
import fileinput
|
|
|
|
for line in fileinput.input():
|
|
|
|
process(line)
|
|
|
|
|
|
|
|
This iterates over the lines of all files listed in sys.argv[1:],
|
|
|
|
defaulting to sys.stdin if the list is empty. If a filename is '-' it
|
|
|
|
is also replaced by sys.stdin. To specify an alternative list of
|
|
|
|
filenames, pass it as the argument to input(). A single file name is
|
|
|
|
also allowed.
|
|
|
|
|
|
|
|
Functions filename(), lineno() return the filename and cumulative line
|
|
|
|
number of the line that has just been read; filelineno() returns its
|
|
|
|
line number in the current file; isfirstline() returns true iff the
|
|
|
|
line just read is the first line of its file; isstdin() returns true
|
|
|
|
iff the line was read from sys.stdin. Function nextfile() closes the
|
|
|
|
current file so that the next iteration will read the first line from
|
|
|
|
the next file (if any); lines not read from the file will not count
|
|
|
|
towards the cumulative line count; the filename is not changed until
|
|
|
|
after the first line of the next file has been read. Function close()
|
|
|
|
closes the sequence.
|
|
|
|
|
|
|
|
Before any lines have been read, filename() returns None and both line
|
|
|
|
numbers are zero; nextfile() has no effect. After all lines have been
|
|
|
|
read, filename() and the line number functions return the values
|
|
|
|
pertaining to the last line read; nextfile() has no effect.
|
|
|
|
|
|
|
|
All files are opened in text mode. If an I/O error occurs during
|
|
|
|
opening or reading a file, the IOError exception is raised.
|
|
|
|
|
|
|
|
If sys.stdin is used more than once, the second and further use will
|
|
|
|
return no lines, except perhaps for interactive use, or if it has been
|
|
|
|
explicitly reset (e.g. using sys.stdin.seek(0)).
|
|
|
|
|
|
|
|
Empty files are opened and immediately closed; the only time their
|
|
|
|
presence in the list of filenames is noticeable at all is when the
|
|
|
|
last file opened is empty.
|
|
|
|
|
|
|
|
It is possible that the last line of a file doesn't end in a newline
|
|
|
|
character; otherwise lines are returned including the trailing
|
|
|
|
newline.
|
|
|
|
|
|
|
|
Class FileInput is the implementation; its methods filename(),
|
|
|
|
lineno(), fileline(), isfirstline(), isstdin(), nextfile() and close()
|
|
|
|
correspond to the functions in the module. In addition it has a
|
|
|
|
readline() method which returns the next input line, and a
|
|
|
|
__getitem__() method which implements the sequence behavior. The
|
|
|
|
sequence must be accessed in strictly sequential order; sequence
|
|
|
|
access and readline() cannot be mixed.
|
|
|
|
|
|
|
|
Optional in-place filtering: if the keyword argument inplace=1 is
|
|
|
|
passed to input() or to the FileInput constructor, the file is moved
|
|
|
|
to a backup file and standard output is directed to the input file.
|
|
|
|
This makes it possible to write a filter that rewrites its input file
|
|
|
|
in place. If the keyword argument backup=".<some extension>" is also
|
|
|
|
given, it specifies the extension for the backup file, and the backup
|
|
|
|
file remains around; by default, the extension is ".bak" and it is
|
|
|
|
deleted when the output file is closed. In-place filtering is
|
|
|
|
disabled when standard input is read. XXX The current implementation
|
|
|
|
does not work for MS-DOS 8+3 filesystems.
|
|
|
|
|
2001-01-05 10:44:39 -04:00
|
|
|
Performance: this module is unfortunately one of the slower ways of
|
|
|
|
processing large numbers of input lines. Nevertheless, a significant
|
|
|
|
speed-up has been obtained by using readlines(bufsize) instead of
|
|
|
|
readline(). A new keyword argument, bufsize=N, is present on the
|
|
|
|
input() function and the FileInput() class to override the default
|
|
|
|
buffer size.
|
|
|
|
|
1997-11-21 13:12:59 -04:00
|
|
|
XXX Possible additions:
|
|
|
|
|
|
|
|
- optional getopt argument processing
|
|
|
|
- specify open mode ('r' or 'rb')
|
|
|
|
- fileno()
|
|
|
|
- isatty()
|
|
|
|
- read(), read(size), even readlines()
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
1999-10-18 18:41:43 -03:00
|
|
|
import sys, os, stat
|
1997-11-21 13:12:59 -04:00
|
|
|
|
2001-01-20 19:34:12 -04:00
|
|
|
__all__ = ["input","close","nextfile","filename","lineno","filelineno",
|
|
|
|
"isfirstline","isstdin","FileInput"]
|
|
|
|
|
1997-11-21 13:12:59 -04:00
|
|
|
_state = None
|
|
|
|
|
2001-01-05 10:44:39 -04:00
|
|
|
DEFAULT_BUFSIZE = 8*1024
|
|
|
|
|
|
|
|
def input(files=None, inplace=0, backup="", bufsize=0):
|
2002-05-14 23:56:03 -03:00
|
|
|
"""input([files[, inplace[, backup]]])
|
|
|
|
|
|
|
|
Create an instance of the FileInput class. The instance will be used
|
|
|
|
as global state for the functions of this module, and is also returned
|
|
|
|
to use during iteration. The parameters to this function will be passed
|
2002-05-23 12:15:30 -03:00
|
|
|
along to the constructor of the FileInput class.
|
2002-05-14 23:56:03 -03:00
|
|
|
"""
|
1997-11-21 13:12:59 -04:00
|
|
|
global _state
|
|
|
|
if _state and _state._file:
|
1998-03-26 17:13:24 -04:00
|
|
|
raise RuntimeError, "input() already active"
|
2001-01-05 10:44:39 -04:00
|
|
|
_state = FileInput(files, inplace, backup, bufsize)
|
1997-11-21 13:12:59 -04:00
|
|
|
return _state
|
|
|
|
|
|
|
|
def close():
|
2002-05-14 23:56:03 -03:00
|
|
|
"""Close the sequence."""
|
1997-11-21 13:12:59 -04:00
|
|
|
global _state
|
|
|
|
state = _state
|
|
|
|
_state = None
|
|
|
|
if state:
|
1998-03-26 17:13:24 -04:00
|
|
|
state.close()
|
1997-11-21 13:12:59 -04:00
|
|
|
|
|
|
|
def nextfile():
|
2002-05-14 23:56:03 -03:00
|
|
|
"""
|
|
|
|
Close the current file so that the next iteration will read the first
|
|
|
|
line from the next file (if any); lines not read from the file will
|
|
|
|
not count towards the cumulative line count. The filename is not
|
|
|
|
changed until after the first line of the next file has been read.
|
|
|
|
Before the first line has been read, this function has no effect;
|
|
|
|
it cannot be used to skip the first file. After the last line of the
|
2002-05-23 12:15:30 -03:00
|
|
|
last file has been read, this function has no effect.
|
2002-05-14 23:56:03 -03:00
|
|
|
"""
|
1997-11-21 13:12:59 -04:00
|
|
|
if not _state:
|
1998-03-26 17:13:24 -04:00
|
|
|
raise RuntimeError, "no active input()"
|
1997-11-21 13:12:59 -04:00
|
|
|
return _state.nextfile()
|
|
|
|
|
|
|
|
def filename():
|
2002-05-14 23:56:03 -03:00
|
|
|
"""
|
|
|
|
Return the name of the file currently being read.
|
2002-05-23 12:15:30 -03:00
|
|
|
Before the first line has been read, returns None.
|
2002-05-14 23:56:03 -03:00
|
|
|
"""
|
1997-11-21 13:12:59 -04:00
|
|
|
if not _state:
|
1998-03-26 17:13:24 -04:00
|
|
|
raise RuntimeError, "no active input()"
|
1997-11-21 13:12:59 -04:00
|
|
|
return _state.filename()
|
|
|
|
|
|
|
|
def lineno():
|
2002-05-14 23:56:03 -03:00
|
|
|
"""
|
|
|
|
Return the cumulative line number of the line that has just been read.
|
|
|
|
Before the first line has been read, returns 0. After the last line
|
2002-05-23 12:15:30 -03:00
|
|
|
of the last file has been read, returns the line number of that line.
|
2002-05-14 23:56:03 -03:00
|
|
|
"""
|
1997-11-21 13:12:59 -04:00
|
|
|
if not _state:
|
1998-03-26 17:13:24 -04:00
|
|
|
raise RuntimeError, "no active input()"
|
1997-11-21 13:12:59 -04:00
|
|
|
return _state.lineno()
|
|
|
|
|
|
|
|
def filelineno():
|
2002-05-14 23:56:03 -03:00
|
|
|
"""
|
|
|
|
Return the line number in the current file. Before the first line
|
|
|
|
has been read, returns 0. After the last line of the last file has
|
2002-05-23 12:15:30 -03:00
|
|
|
been read, returns the line number of that line within the file.
|
2002-05-14 23:56:03 -03:00
|
|
|
"""
|
1997-11-21 13:12:59 -04:00
|
|
|
if not _state:
|
1998-03-26 17:13:24 -04:00
|
|
|
raise RuntimeError, "no active input()"
|
1997-11-21 13:12:59 -04:00
|
|
|
return _state.filelineno()
|
|
|
|
|
|
|
|
def isfirstline():
|
2002-05-14 23:56:03 -03:00
|
|
|
"""
|
|
|
|
Returns true the line just read is the first line of its file,
|
2002-05-23 12:15:30 -03:00
|
|
|
otherwise returns false.
|
2002-05-14 23:56:03 -03:00
|
|
|
"""
|
1997-11-21 13:12:59 -04:00
|
|
|
if not _state:
|
1998-03-26 17:13:24 -04:00
|
|
|
raise RuntimeError, "no active input()"
|
1997-11-21 13:12:59 -04:00
|
|
|
return _state.isfirstline()
|
|
|
|
|
|
|
|
def isstdin():
|
2002-05-14 23:56:03 -03:00
|
|
|
"""
|
|
|
|
Returns true if the last line was read from sys.stdin,
|
2002-05-23 12:15:30 -03:00
|
|
|
otherwise returns false.
|
2002-05-14 23:56:03 -03:00
|
|
|
"""
|
1997-11-21 13:12:59 -04:00
|
|
|
if not _state:
|
1998-03-26 17:13:24 -04:00
|
|
|
raise RuntimeError, "no active input()"
|
1997-11-21 13:12:59 -04:00
|
|
|
return _state.isstdin()
|
|
|
|
|
|
|
|
class FileInput:
|
2002-05-14 23:56:03 -03:00
|
|
|
"""class FileInput([files[, inplace[, backup]]])
|
2002-05-23 12:15:30 -03:00
|
|
|
|
2002-05-14 23:56:03 -03:00
|
|
|
Class FileInput is the implementation of the module; its methods
|
|
|
|
filename(), lineno(), fileline(), isfirstline(), isstdin(), nextfile()
|
|
|
|
and close() correspond to the functions of the same name in the module.
|
|
|
|
In addition it has a readline() method which returns the next
|
|
|
|
input line, and a __getitem__() method which implements the
|
|
|
|
sequence behavior. The sequence must be accessed in strictly
|
2002-05-23 12:15:30 -03:00
|
|
|
sequential order; random access and readline() cannot be mixed.
|
2002-05-14 23:56:03 -03:00
|
|
|
"""
|
1997-11-21 13:12:59 -04:00
|
|
|
|
2001-01-05 10:44:39 -04:00
|
|
|
def __init__(self, files=None, inplace=0, backup="", bufsize=0):
|
1998-03-26 17:13:24 -04:00
|
|
|
if type(files) == type(''):
|
|
|
|
files = (files,)
|
|
|
|
else:
|
2000-04-10 14:16:12 -03:00
|
|
|
if files is None:
|
|
|
|
files = sys.argv[1:]
|
1998-03-26 17:13:24 -04:00
|
|
|
if not files:
|
2000-04-10 14:16:12 -03:00
|
|
|
files = ('-',)
|
|
|
|
else:
|
|
|
|
files = tuple(files)
|
1998-03-26 17:13:24 -04:00
|
|
|
self._files = files
|
|
|
|
self._inplace = inplace
|
|
|
|
self._backup = backup
|
2001-01-05 10:44:39 -04:00
|
|
|
self._bufsize = bufsize or DEFAULT_BUFSIZE
|
1998-03-26 17:13:24 -04:00
|
|
|
self._savestdout = None
|
|
|
|
self._output = None
|
|
|
|
self._filename = None
|
|
|
|
self._lineno = 0
|
|
|
|
self._filelineno = 0
|
|
|
|
self._file = None
|
2002-04-07 03:36:23 -03:00
|
|
|
self._isstdin = False
|
1998-07-20 12:49:28 -03:00
|
|
|
self._backupfilename = None
|
2001-01-05 10:44:39 -04:00
|
|
|
self._buffer = []
|
|
|
|
self._bufindex = 0
|
1997-11-21 13:12:59 -04:00
|
|
|
|
|
|
|
def __del__(self):
|
1998-03-26 17:13:24 -04:00
|
|
|
self.close()
|
1997-11-21 13:12:59 -04:00
|
|
|
|
|
|
|
def close(self):
|
1998-03-26 17:13:24 -04:00
|
|
|
self.nextfile()
|
|
|
|
self._files = ()
|
1997-11-21 13:12:59 -04:00
|
|
|
|
2002-03-26 16:28:40 -04:00
|
|
|
def __iter__(self):
|
|
|
|
return self
|
|
|
|
|
|
|
|
def next(self):
|
2001-01-05 10:44:39 -04:00
|
|
|
try:
|
|
|
|
line = self._buffer[self._bufindex]
|
|
|
|
except IndexError:
|
|
|
|
pass
|
|
|
|
else:
|
|
|
|
self._bufindex += 1
|
|
|
|
self._lineno += 1
|
|
|
|
self._filelineno += 1
|
|
|
|
return line
|
1998-03-26 17:13:24 -04:00
|
|
|
line = self.readline()
|
|
|
|
if not line:
|
2002-03-26 16:28:40 -04:00
|
|
|
raise StopIteration
|
1998-03-26 17:13:24 -04:00
|
|
|
return line
|
2002-04-15 22:38:40 -03:00
|
|
|
|
2002-03-26 16:28:40 -04:00
|
|
|
def __getitem__(self, i):
|
|
|
|
if i != self._lineno:
|
|
|
|
raise RuntimeError, "accessing lines out of order"
|
|
|
|
try:
|
|
|
|
return self.next()
|
|
|
|
except StopIteration:
|
|
|
|
raise IndexError, "end of input reached"
|
1997-11-21 13:12:59 -04:00
|
|
|
|
|
|
|
def nextfile(self):
|
1998-03-26 17:13:24 -04:00
|
|
|
savestdout = self._savestdout
|
|
|
|
self._savestdout = 0
|
|
|
|
if savestdout:
|
|
|
|
sys.stdout = savestdout
|
1997-11-21 13:12:59 -04:00
|
|
|
|
1998-03-26 17:13:24 -04:00
|
|
|
output = self._output
|
|
|
|
self._output = 0
|
|
|
|
if output:
|
|
|
|
output.close()
|
1997-11-21 13:12:59 -04:00
|
|
|
|
1998-03-26 17:13:24 -04:00
|
|
|
file = self._file
|
|
|
|
self._file = 0
|
|
|
|
if file and not self._isstdin:
|
|
|
|
file.close()
|
1997-11-21 13:12:59 -04:00
|
|
|
|
1998-03-26 17:13:24 -04:00
|
|
|
backupfilename = self._backupfilename
|
|
|
|
self._backupfilename = 0
|
|
|
|
if backupfilename and not self._backup:
|
|
|
|
try: os.unlink(backupfilename)
|
|
|
|
except: pass
|
1997-11-21 13:12:59 -04:00
|
|
|
|
2002-04-07 03:36:23 -03:00
|
|
|
self._isstdin = False
|
2001-01-05 10:44:39 -04:00
|
|
|
self._buffer = []
|
|
|
|
self._bufindex = 0
|
1997-11-21 13:12:59 -04:00
|
|
|
|
|
|
|
def readline(self):
|
2001-01-05 10:44:39 -04:00
|
|
|
try:
|
|
|
|
line = self._buffer[self._bufindex]
|
|
|
|
except IndexError:
|
|
|
|
pass
|
|
|
|
else:
|
|
|
|
self._bufindex += 1
|
|
|
|
self._lineno += 1
|
|
|
|
self._filelineno += 1
|
|
|
|
return line
|
1998-03-26 17:13:24 -04:00
|
|
|
if not self._file:
|
|
|
|
if not self._files:
|
|
|
|
return ""
|
|
|
|
self._filename = self._files[0]
|
|
|
|
self._files = self._files[1:]
|
|
|
|
self._filelineno = 0
|
|
|
|
self._file = None
|
2002-04-07 03:36:23 -03:00
|
|
|
self._isstdin = False
|
1998-03-26 17:13:24 -04:00
|
|
|
self._backupfilename = 0
|
|
|
|
if self._filename == '-':
|
|
|
|
self._filename = '<stdin>'
|
|
|
|
self._file = sys.stdin
|
2002-04-07 03:36:23 -03:00
|
|
|
self._isstdin = True
|
1998-03-26 17:13:24 -04:00
|
|
|
else:
|
|
|
|
if self._inplace:
|
|
|
|
self._backupfilename = (
|
2001-10-24 17:42:55 -03:00
|
|
|
self._filename + (self._backup or os.extsep+"bak"))
|
1998-03-26 17:13:24 -04:00
|
|
|
try: os.unlink(self._backupfilename)
|
|
|
|
except os.error: pass
|
1999-10-18 18:41:43 -03:00
|
|
|
# The next few lines may raise IOError
|
1998-03-26 17:13:24 -04:00
|
|
|
os.rename(self._filename, self._backupfilename)
|
|
|
|
self._file = open(self._backupfilename, "r")
|
1999-10-18 18:41:43 -03:00
|
|
|
try:
|
|
|
|
perm = os.fstat(self._file.fileno())[stat.ST_MODE]
|
|
|
|
except:
|
|
|
|
self._output = open(self._filename, "w")
|
|
|
|
else:
|
|
|
|
fd = os.open(self._filename,
|
|
|
|
os.O_CREAT | os.O_WRONLY | os.O_TRUNC,
|
|
|
|
perm)
|
|
|
|
self._output = os.fdopen(fd, "w")
|
|
|
|
try:
|
|
|
|
os.chmod(self._filename, perm)
|
|
|
|
except:
|
|
|
|
pass
|
1998-03-26 17:13:24 -04:00
|
|
|
self._savestdout = sys.stdout
|
|
|
|
sys.stdout = self._output
|
|
|
|
else:
|
|
|
|
# This may raise IOError
|
|
|
|
self._file = open(self._filename, "r")
|
2001-01-05 10:44:39 -04:00
|
|
|
self._buffer = self._file.readlines(self._bufsize)
|
|
|
|
self._bufindex = 0
|
|
|
|
if not self._buffer:
|
|
|
|
self.nextfile()
|
1998-03-26 17:13:24 -04:00
|
|
|
# Recursive call
|
|
|
|
return self.readline()
|
1997-11-21 13:12:59 -04:00
|
|
|
|
|
|
|
def filename(self):
|
1998-03-26 17:13:24 -04:00
|
|
|
return self._filename
|
1997-11-21 13:12:59 -04:00
|
|
|
|
|
|
|
def lineno(self):
|
1998-03-26 17:13:24 -04:00
|
|
|
return self._lineno
|
1997-11-21 13:12:59 -04:00
|
|
|
|
|
|
|
def filelineno(self):
|
1998-03-26 17:13:24 -04:00
|
|
|
return self._filelineno
|
1997-11-21 13:12:59 -04:00
|
|
|
|
|
|
|
def isfirstline(self):
|
1998-03-26 17:13:24 -04:00
|
|
|
return self._filelineno == 1
|
1997-11-21 13:12:59 -04:00
|
|
|
|
|
|
|
def isstdin(self):
|
1998-03-26 17:13:24 -04:00
|
|
|
return self._isstdin
|
1997-11-21 13:12:59 -04:00
|
|
|
|
|
|
|
def _test():
|
|
|
|
import getopt
|
|
|
|
inplace = 0
|
|
|
|
backup = 0
|
|
|
|
opts, args = getopt.getopt(sys.argv[1:], "ib:")
|
|
|
|
for o, a in opts:
|
1998-03-26 17:13:24 -04:00
|
|
|
if o == '-i': inplace = 1
|
|
|
|
if o == '-b': backup = a
|
1997-11-21 13:12:59 -04:00
|
|
|
for line in input(args, inplace=inplace, backup=backup):
|
1998-03-26 17:13:24 -04:00
|
|
|
if line[-1:] == '\n': line = line[:-1]
|
|
|
|
if line[-1:] == '\r': line = line[:-1]
|
|
|
|
print "%d: %s[%d]%s %s" % (lineno(), filename(), filelineno(),
|
|
|
|
isfirstline() and "*" or "", line)
|
1997-11-21 13:12:59 -04:00
|
|
|
print "%d: %s[%d]" % (lineno(), filename(), filelineno())
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
_test()
|