2003-12-15 06:16:09 -04:00
|
|
|
r"""File-like objects that read from or write to a string buffer.
|
2000-02-02 11:10:15 -04:00
|
|
|
|
|
|
|
This implements (nearly) all stdio methods.
|
|
|
|
|
|
|
|
f = StringIO() # ready for writing
|
|
|
|
f = StringIO(buf) # ready for reading
|
|
|
|
f.close() # explicitly release resources held
|
|
|
|
flag = f.isatty() # always false
|
|
|
|
pos = f.tell() # get current position
|
|
|
|
f.seek(pos) # set current position
|
|
|
|
f.seek(pos, mode) # mode 0: absolute; 1: relative; 2: relative to EOF
|
|
|
|
buf = f.read() # read until EOF
|
|
|
|
buf = f.read(n) # read up to n bytes
|
|
|
|
buf = f.readline() # read until end of line ('\n') or EOF
|
|
|
|
list = f.readlines()# list of f.readline() results until EOF
|
2000-09-28 01:21:06 -03:00
|
|
|
f.truncate([size]) # truncate file at to at most size (default: current pos)
|
2000-02-02 11:10:15 -04:00
|
|
|
f.write(buf) # write at current position
|
|
|
|
f.writelines(list) # for line in list: f.write(line)
|
|
|
|
f.getvalue() # return whole file's contents as a string
|
|
|
|
|
|
|
|
Notes:
|
|
|
|
- Using a real file is often faster (but less convenient).
|
2000-02-28 11:12:25 -04:00
|
|
|
- There's also a much faster implementation in C, called cStringIO, but
|
|
|
|
it's not subclassable.
|
2000-02-02 11:10:15 -04:00
|
|
|
- fileno() is left unimplemented so that code which uses it triggers
|
|
|
|
an exception early.
|
|
|
|
- Seeking far beyond EOF and then writing will insert real null
|
|
|
|
bytes that occupy space in the buffer.
|
|
|
|
- There's a simple test set (see end of this file).
|
|
|
|
"""
|
2000-12-12 19:12:23 -04:00
|
|
|
try:
|
2000-12-12 19:16:51 -04:00
|
|
|
from errno import EINVAL
|
2000-12-12 19:12:23 -04:00
|
|
|
except ImportError:
|
2000-12-12 19:16:51 -04:00
|
|
|
EINVAL = 22
|
2000-12-12 19:12:23 -04:00
|
|
|
|
2001-01-20 15:54:20 -04:00
|
|
|
__all__ = ["StringIO"]
|
|
|
|
|
2003-10-18 07:20:42 -03:00
|
|
|
def _complain_ifclosed(closed):
|
|
|
|
if closed:
|
|
|
|
raise ValueError, "I/O operation on closed file"
|
|
|
|
|
1994-06-23 08:53:27 -03:00
|
|
|
class StringIO:
|
2002-05-23 12:15:30 -03:00
|
|
|
"""class StringIO([buffer])
|
|
|
|
|
2002-05-14 23:56:03 -03:00
|
|
|
When a StringIO object is created, it can be initialized to an existing
|
|
|
|
string by passing the string to the constructor. If no string is given,
|
2002-05-23 12:15:30 -03:00
|
|
|
the StringIO will start empty.
|
2002-05-14 23:56:03 -03:00
|
|
|
|
|
|
|
The StringIO object can accept either Unicode or 8-bit strings, but
|
|
|
|
mixing the two may take some care. If both are used, 8-bit strings that
|
|
|
|
cannot be interpreted as 7-bit ASCII (that use the 8th bit) will cause
|
2002-05-23 12:15:30 -03:00
|
|
|
a UnicodeError to be raised when getvalue() is called.
|
2002-05-14 23:56:03 -03:00
|
|
|
"""
|
2000-12-12 19:16:51 -04:00
|
|
|
def __init__(self, buf = ''):
|
2002-01-06 13:15:05 -04:00
|
|
|
# Force self.buf to be a string or unicode
|
Remove uses of the string and types modules:
x in string.whitespace => x.isspace()
type(x) in types.StringTypes => isinstance(x, basestring)
isinstance(x, types.StringTypes) => isinstance(x, basestring)
type(x) is types.StringType => isinstance(x, str)
type(x) == types.StringType => isinstance(x, str)
string.split(x, ...) => x.split(...)
string.join(x, y) => y.join(x)
string.zfill(x, ...) => x.zfill(...)
string.count(x, ...) => x.count(...)
hasattr(types, "UnicodeType") => try: unicode except NameError:
type(x) != types.TupleTuple => not isinstance(x, tuple)
isinstance(x, types.TupleType) => isinstance(x, tuple)
type(x) is types.IntType => isinstance(x, int)
Do not mention the string module in the rlcompleter docstring.
This partially applies SF patch http://www.python.org/sf/562373
(with basestring instead of string). (It excludes the changes to
unittest.py and does not change the os.stat stuff.)
2002-06-03 12:58:32 -03:00
|
|
|
if not isinstance(buf, basestring):
|
2002-01-06 13:15:05 -04:00
|
|
|
buf = str(buf)
|
|
|
|
self.buf = buf
|
2000-12-13 16:23:11 -04:00
|
|
|
self.len = len(buf)
|
|
|
|
self.buflist = []
|
|
|
|
self.pos = 0
|
2003-10-18 07:20:42 -03:00
|
|
|
self.closed = False
|
2000-12-13 16:23:11 -04:00
|
|
|
self.softspace = 0
|
2000-12-12 19:16:51 -04:00
|
|
|
|
2001-09-22 01:34:54 -03:00
|
|
|
def __iter__(self):
|
2003-01-31 12:04:15 -04:00
|
|
|
return self
|
|
|
|
|
|
|
|
def next(self):
|
2004-03-14 03:54:37 -04:00
|
|
|
"""A file object is its own iterator, for example iter(f) returns f
|
|
|
|
(unless f is closed). When a file is used as an iterator, typically
|
|
|
|
in a for loop (for example, for line in f: print line), the next()
|
|
|
|
method is called repeatedly. This method returns the next input line,
|
|
|
|
or raises StopIteration when EOF is hit.
|
|
|
|
"""
|
2006-03-15 04:23:53 -04:00
|
|
|
_complain_ifclosed(self.closed)
|
2003-01-31 12:04:15 -04:00
|
|
|
r = self.readline()
|
|
|
|
if not r:
|
|
|
|
raise StopIteration
|
|
|
|
return r
|
2001-09-22 01:34:54 -03:00
|
|
|
|
2000-12-12 19:16:51 -04:00
|
|
|
def close(self):
|
2002-05-23 12:15:30 -03:00
|
|
|
"""Free the memory buffer.
|
2002-05-14 23:56:03 -03:00
|
|
|
"""
|
2000-12-13 16:23:11 -04:00
|
|
|
if not self.closed:
|
2003-10-18 07:20:42 -03:00
|
|
|
self.closed = True
|
2000-12-13 16:23:11 -04:00
|
|
|
del self.buf, self.pos
|
2000-12-12 19:16:51 -04:00
|
|
|
|
|
|
|
def isatty(self):
|
2004-03-14 03:54:37 -04:00
|
|
|
"""Returns False because StringIO objects are not connected to a
|
|
|
|
tty-like device.
|
|
|
|
"""
|
2003-10-18 07:20:42 -03:00
|
|
|
_complain_ifclosed(self.closed)
|
2002-04-07 03:36:23 -03:00
|
|
|
return False
|
2000-12-12 19:16:51 -04:00
|
|
|
|
|
|
|
def seek(self, pos, mode = 0):
|
2004-03-14 03:54:37 -04:00
|
|
|
"""Set the file's current position.
|
|
|
|
|
|
|
|
The mode argument is optional and defaults to 0 (absolute file
|
|
|
|
positioning); other values are 1 (seek relative to the current
|
|
|
|
position) and 2 (seek relative to the file's end).
|
|
|
|
|
|
|
|
There is no return value.
|
|
|
|
"""
|
2003-10-18 07:20:42 -03:00
|
|
|
_complain_ifclosed(self.closed)
|
2000-12-13 16:23:11 -04:00
|
|
|
if self.buflist:
|
2001-02-09 09:37:37 -04:00
|
|
|
self.buf += ''.join(self.buflist)
|
2000-12-13 16:23:11 -04:00
|
|
|
self.buflist = []
|
|
|
|
if mode == 1:
|
|
|
|
pos += self.pos
|
|
|
|
elif mode == 2:
|
|
|
|
pos += self.len
|
|
|
|
self.pos = max(0, pos)
|
2000-12-12 19:16:51 -04:00
|
|
|
|
|
|
|
def tell(self):
|
2004-03-14 03:54:37 -04:00
|
|
|
"""Return the file's current position."""
|
2003-10-18 07:20:42 -03:00
|
|
|
_complain_ifclosed(self.closed)
|
2000-12-13 16:23:11 -04:00
|
|
|
return self.pos
|
2000-12-12 19:16:51 -04:00
|
|
|
|
|
|
|
def read(self, n = -1):
|
2004-03-14 03:54:37 -04:00
|
|
|
"""Read at most size bytes from the file
|
|
|
|
(less if the read hits EOF before obtaining size bytes).
|
|
|
|
|
|
|
|
If the size argument is negative or omitted, read all data until EOF
|
|
|
|
is reached. The bytes are returned as a string object. An empty
|
|
|
|
string is returned when EOF is encountered immediately.
|
|
|
|
"""
|
2003-10-18 07:20:42 -03:00
|
|
|
_complain_ifclosed(self.closed)
|
2000-12-13 16:23:11 -04:00
|
|
|
if self.buflist:
|
2001-02-09 09:37:37 -04:00
|
|
|
self.buf += ''.join(self.buflist)
|
2000-12-13 16:23:11 -04:00
|
|
|
self.buflist = []
|
|
|
|
if n < 0:
|
|
|
|
newpos = self.len
|
|
|
|
else:
|
|
|
|
newpos = min(self.pos+n, self.len)
|
|
|
|
r = self.buf[self.pos:newpos]
|
|
|
|
self.pos = newpos
|
|
|
|
return r
|
2000-12-12 19:16:51 -04:00
|
|
|
|
|
|
|
def readline(self, length=None):
|
2007-01-05 10:24:36 -04:00
|
|
|
r"""Read one entire line from the file.
|
2004-03-14 03:54:37 -04:00
|
|
|
|
|
|
|
A trailing newline character is kept in the string (but may be absent
|
|
|
|
when a file ends with an incomplete line). If the size argument is
|
|
|
|
present and non-negative, it is a maximum byte count (including the
|
|
|
|
trailing newline) and an incomplete line may be returned.
|
|
|
|
|
|
|
|
An empty string is returned only when EOF is encountered immediately.
|
|
|
|
|
|
|
|
Note: Unlike stdio's fgets(), the returned string contains null
|
|
|
|
characters ('\0') if they occurred in the input.
|
|
|
|
"""
|
2003-10-18 07:20:42 -03:00
|
|
|
_complain_ifclosed(self.closed)
|
2000-12-13 16:23:11 -04:00
|
|
|
if self.buflist:
|
2001-02-09 09:37:37 -04:00
|
|
|
self.buf += ''.join(self.buflist)
|
2000-12-13 16:23:11 -04:00
|
|
|
self.buflist = []
|
|
|
|
i = self.buf.find('\n', self.pos)
|
|
|
|
if i < 0:
|
|
|
|
newpos = self.len
|
|
|
|
else:
|
|
|
|
newpos = i+1
|
|
|
|
if length is not None:
|
|
|
|
if self.pos + length < newpos:
|
|
|
|
newpos = self.pos + length
|
|
|
|
r = self.buf[self.pos:newpos]
|
|
|
|
self.pos = newpos
|
|
|
|
return r
|
2000-12-12 19:16:51 -04:00
|
|
|
|
|
|
|
def readlines(self, sizehint = 0):
|
2004-03-14 03:54:37 -04:00
|
|
|
"""Read until EOF using readline() and return a list containing the
|
|
|
|
lines thus read.
|
|
|
|
|
|
|
|
If the optional sizehint argument is present, instead of reading up
|
|
|
|
to EOF, whole lines totalling approximately sizehint bytes (or more
|
|
|
|
to accommodate a final whole line).
|
|
|
|
"""
|
2000-12-13 16:23:11 -04:00
|
|
|
total = 0
|
|
|
|
lines = []
|
|
|
|
line = self.readline()
|
|
|
|
while line:
|
|
|
|
lines.append(line)
|
|
|
|
total += len(line)
|
|
|
|
if 0 < sizehint <= total:
|
|
|
|
break
|
|
|
|
line = self.readline()
|
|
|
|
return lines
|
2000-12-12 19:16:51 -04:00
|
|
|
|
|
|
|
def truncate(self, size=None):
|
2004-03-14 03:54:37 -04:00
|
|
|
"""Truncate the file's size.
|
|
|
|
|
|
|
|
If the optional size argument is present, the file is truncated to
|
|
|
|
(at most) that size. The size defaults to the current position.
|
|
|
|
The current file position is not changed unless the position
|
|
|
|
is beyond the new file size.
|
|
|
|
|
|
|
|
If the specified size exceeds the file's current size, the
|
|
|
|
file remains unchanged.
|
|
|
|
"""
|
2004-01-18 16:29:55 -04:00
|
|
|
_complain_ifclosed(self.closed)
|
2000-12-13 16:23:11 -04:00
|
|
|
if size is None:
|
|
|
|
size = self.pos
|
|
|
|
elif size < 0:
|
|
|
|
raise IOError(EINVAL, "Negative size not allowed")
|
|
|
|
elif size < self.pos:
|
|
|
|
self.pos = size
|
|
|
|
self.buf = self.getvalue()[:size]
|
2004-12-20 19:51:53 -04:00
|
|
|
self.len = size
|
2000-12-12 19:16:51 -04:00
|
|
|
|
|
|
|
def write(self, s):
|
2004-03-14 03:54:37 -04:00
|
|
|
"""Write a string to the file.
|
|
|
|
|
|
|
|
There is no return value.
|
|
|
|
"""
|
2003-10-18 07:20:42 -03:00
|
|
|
_complain_ifclosed(self.closed)
|
2000-12-13 16:23:11 -04:00
|
|
|
if not s: return
|
2002-01-06 13:15:05 -04:00
|
|
|
# Force s to be a string or unicode
|
Remove uses of the string and types modules:
x in string.whitespace => x.isspace()
type(x) in types.StringTypes => isinstance(x, basestring)
isinstance(x, types.StringTypes) => isinstance(x, basestring)
type(x) is types.StringType => isinstance(x, str)
type(x) == types.StringType => isinstance(x, str)
string.split(x, ...) => x.split(...)
string.join(x, y) => y.join(x)
string.zfill(x, ...) => x.zfill(...)
string.count(x, ...) => x.count(...)
hasattr(types, "UnicodeType") => try: unicode except NameError:
type(x) != types.TupleTuple => not isinstance(x, tuple)
isinstance(x, types.TupleType) => isinstance(x, tuple)
type(x) is types.IntType => isinstance(x, int)
Do not mention the string module in the rlcompleter docstring.
This partially applies SF patch http://www.python.org/sf/562373
(with basestring instead of string). (It excludes the changes to
unittest.py and does not change the os.stat stuff.)
2002-06-03 12:58:32 -03:00
|
|
|
if not isinstance(s, basestring):
|
2002-01-06 13:15:05 -04:00
|
|
|
s = str(s)
|
2004-09-23 03:43:25 -03:00
|
|
|
spos = self.pos
|
|
|
|
slen = self.len
|
|
|
|
if spos == slen:
|
2002-09-17 15:10:34 -03:00
|
|
|
self.buflist.append(s)
|
2004-09-23 04:00:47 -03:00
|
|
|
self.len = self.pos = spos + len(s)
|
2002-09-17 15:10:34 -03:00
|
|
|
return
|
2004-09-23 03:43:25 -03:00
|
|
|
if spos > slen:
|
|
|
|
self.buflist.append('\0'*(spos - slen))
|
|
|
|
slen = spos
|
|
|
|
newpos = spos + len(s)
|
|
|
|
if spos < slen:
|
2000-12-13 16:23:11 -04:00
|
|
|
if self.buflist:
|
2001-02-09 09:37:37 -04:00
|
|
|
self.buf += ''.join(self.buflist)
|
2004-09-23 03:43:25 -03:00
|
|
|
self.buflist = [self.buf[:spos], s, self.buf[newpos:]]
|
2000-12-13 16:23:11 -04:00
|
|
|
self.buf = ''
|
2004-09-23 03:43:25 -03:00
|
|
|
if newpos > slen:
|
2004-09-23 04:00:47 -03:00
|
|
|
slen = newpos
|
2000-12-13 16:23:11 -04:00
|
|
|
else:
|
|
|
|
self.buflist.append(s)
|
2004-09-23 04:00:47 -03:00
|
|
|
slen = newpos
|
|
|
|
self.len = slen
|
2000-12-13 16:23:11 -04:00
|
|
|
self.pos = newpos
|
2000-12-12 19:16:51 -04:00
|
|
|
|
2004-03-08 14:17:31 -04:00
|
|
|
def writelines(self, iterable):
|
2004-03-14 03:54:37 -04:00
|
|
|
"""Write a sequence of strings to the file. The sequence can be any
|
|
|
|
iterable object producing strings, typically a list of strings. There
|
|
|
|
is no return value.
|
|
|
|
|
|
|
|
(The name is intended to match readlines(); writelines() does not add
|
|
|
|
line separators.)
|
|
|
|
"""
|
2004-03-08 14:17:31 -04:00
|
|
|
write = self.write
|
|
|
|
for line in iterable:
|
|
|
|
write(line)
|
2000-12-12 19:16:51 -04:00
|
|
|
|
|
|
|
def flush(self):
|
2004-03-14 03:54:37 -04:00
|
|
|
"""Flush the internal buffer
|
|
|
|
"""
|
2003-10-18 07:20:42 -03:00
|
|
|
_complain_ifclosed(self.closed)
|
2000-12-12 19:16:51 -04:00
|
|
|
|
|
|
|
def getvalue(self):
|
2002-05-14 23:56:03 -03:00
|
|
|
"""
|
|
|
|
Retrieve the entire contents of the "file" at any time before
|
|
|
|
the StringIO object's close() method is called.
|
|
|
|
|
|
|
|
The StringIO object can accept either Unicode or 8-bit strings,
|
|
|
|
but mixing the two may take some care. If both are used, 8-bit
|
|
|
|
strings that cannot be interpreted as 7-bit ASCII (that use the
|
|
|
|
8th bit) will cause a UnicodeError to be raised when getvalue()
|
2002-05-23 12:15:30 -03:00
|
|
|
is called.
|
2002-05-14 23:56:03 -03:00
|
|
|
"""
|
2000-12-13 16:23:11 -04:00
|
|
|
if self.buflist:
|
2001-02-09 09:37:37 -04:00
|
|
|
self.buf += ''.join(self.buflist)
|
2000-12-13 16:23:11 -04:00
|
|
|
self.buflist = []
|
|
|
|
return self.buf
|
1994-06-23 08:53:27 -03:00
|
|
|
|
|
|
|
|
|
|
|
# A little test suite
|
|
|
|
|
|
|
|
def test():
|
2000-12-12 19:16:51 -04:00
|
|
|
import sys
|
|
|
|
if sys.argv[1:]:
|
2000-12-13 16:23:11 -04:00
|
|
|
file = sys.argv[1]
|
2000-12-12 19:16:51 -04:00
|
|
|
else:
|
2000-12-13 16:23:11 -04:00
|
|
|
file = '/etc/passwd'
|
2000-12-12 19:16:51 -04:00
|
|
|
lines = open(file, 'r').readlines()
|
|
|
|
text = open(file, 'r').read()
|
|
|
|
f = StringIO()
|
|
|
|
for line in lines[:-2]:
|
2000-12-13 16:23:11 -04:00
|
|
|
f.write(line)
|
2000-12-12 19:16:51 -04:00
|
|
|
f.writelines(lines[-2:])
|
|
|
|
if f.getvalue() != text:
|
2000-12-13 16:23:11 -04:00
|
|
|
raise RuntimeError, 'write failed'
|
2000-12-12 19:16:51 -04:00
|
|
|
length = f.tell()
|
|
|
|
print 'File length =', length
|
|
|
|
f.seek(len(lines[0]))
|
|
|
|
f.write(lines[1])
|
|
|
|
f.seek(0)
|
2004-02-12 13:35:32 -04:00
|
|
|
print 'First line =', repr(f.readline())
|
2002-02-11 13:52:18 -04:00
|
|
|
print 'Position =', f.tell()
|
2000-12-12 19:16:51 -04:00
|
|
|
line = f.readline()
|
2004-02-12 13:35:32 -04:00
|
|
|
print 'Second line =', repr(line)
|
2000-12-12 19:16:51 -04:00
|
|
|
f.seek(-len(line), 1)
|
|
|
|
line2 = f.read(len(line))
|
|
|
|
if line != line2:
|
2000-12-13 16:23:11 -04:00
|
|
|
raise RuntimeError, 'bad result after seek back'
|
2000-12-12 19:16:51 -04:00
|
|
|
f.seek(len(line2), 1)
|
|
|
|
list = f.readlines()
|
|
|
|
line = list[-1]
|
|
|
|
f.seek(f.tell() - len(line))
|
|
|
|
line2 = f.read()
|
|
|
|
if line != line2:
|
2000-12-13 16:23:11 -04:00
|
|
|
raise RuntimeError, 'bad result after seek back from EOF'
|
2000-12-12 19:16:51 -04:00
|
|
|
print 'Read', len(list), 'more lines'
|
|
|
|
print 'File length =', f.tell()
|
|
|
|
if f.tell() != length:
|
2000-12-13 16:23:11 -04:00
|
|
|
raise RuntimeError, 'bad length'
|
2004-12-20 19:51:53 -04:00
|
|
|
f.truncate(length/2)
|
|
|
|
f.seek(0, 2)
|
|
|
|
print 'Truncated length =', f.tell()
|
|
|
|
if f.tell() != length/2:
|
|
|
|
raise RuntimeError, 'truncate did not adjust length'
|
2000-12-12 19:16:51 -04:00
|
|
|
f.close()
|
1994-06-23 08:53:27 -03:00
|
|
|
|
|
|
|
if __name__ == '__main__':
|
2000-12-12 19:16:51 -04:00
|
|
|
test()
|