mirror of https://github.com/python/cpython
Issue 2918: Merge StringIO and cStringIO.
This commit is contained in:
parent
502d89ed15
commit
794652dd06
288
Lib/io.py
288
Lib/io.py
|
@ -1769,20 +1769,20 @@ class TextIOWrapper(TextIOBase):
|
||||||
def newlines(self):
|
def newlines(self):
|
||||||
return self._decoder.newlines if self._decoder else None
|
return self._decoder.newlines if self._decoder else None
|
||||||
|
|
||||||
class StringIO(TextIOWrapper):
|
class _StringIO(TextIOWrapper):
|
||||||
"""An in-memory stream for text. The initial_value argument sets the
|
"""Text I/O implementation using an in-memory buffer.
|
||||||
value of object. The other arguments are like those of TextIOWrapper's
|
|
||||||
constructor.
|
The initial_value argument sets the value of object. The newline
|
||||||
|
argument is like the one of TextIOWrapper's constructor.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# XXX This is really slow, but fully functional
|
# XXX This is really slow, but fully functional
|
||||||
|
|
||||||
def __init__(self, initial_value="", encoding="utf-8",
|
def __init__(self, initial_value="", newline="\n"):
|
||||||
errors="strict", newline="\n"):
|
super(_StringIO, self).__init__(BytesIO(),
|
||||||
super(StringIO, self).__init__(BytesIO(),
|
encoding="utf-8",
|
||||||
encoding=encoding,
|
errors="strict",
|
||||||
errors=errors,
|
newline=newline)
|
||||||
newline=newline)
|
|
||||||
if initial_value:
|
if initial_value:
|
||||||
if not isinstance(initial_value, str):
|
if not isinstance(initial_value, str):
|
||||||
initial_value = str(initial_value)
|
initial_value = str(initial_value)
|
||||||
|
@ -1792,3 +1792,271 @@ class StringIO(TextIOWrapper):
|
||||||
def getvalue(self):
|
def getvalue(self):
|
||||||
self.flush()
|
self.flush()
|
||||||
return self.buffer.getvalue().decode(self._encoding, self._errors)
|
return self.buffer.getvalue().decode(self._encoding, self._errors)
|
||||||
|
|
||||||
|
try:
|
||||||
|
import _stringio
|
||||||
|
|
||||||
|
# This subclass is a reimplementation of the TextIOWrapper
|
||||||
|
# interface without any of its text decoding facilities. All the
|
||||||
|
# stored data is manipulated with the efficient
|
||||||
|
# _stringio._StringIO extension type. Also, the newline decoding
|
||||||
|
# mechanism of IncrementalNewlineDecoder is reimplemented here for
|
||||||
|
# efficiency. Doing otherwise, would require us to implement a
|
||||||
|
# fake decoder which would add an additional and unnecessary layer
|
||||||
|
# on top of the _StringIO methods.
|
||||||
|
|
||||||
|
class StringIO(_stringio._StringIO, TextIOBase):
|
||||||
|
"""Text I/O implementation using an in-memory buffer.
|
||||||
|
|
||||||
|
The initial_value argument sets the value of object. The newline
|
||||||
|
argument is like the one of TextIOWrapper's constructor.
|
||||||
|
"""
|
||||||
|
|
||||||
|
_CHUNK_SIZE = 4096
|
||||||
|
|
||||||
|
def __init__(self, initial_value="", newline="\n"):
|
||||||
|
if newline not in (None, "", "\n", "\r", "\r\n"):
|
||||||
|
raise ValueError("illegal newline value: %r" % (newline,))
|
||||||
|
|
||||||
|
self._readuniversal = not newline
|
||||||
|
self._readtranslate = newline is None
|
||||||
|
self._readnl = newline
|
||||||
|
self._writetranslate = newline != ""
|
||||||
|
self._writenl = newline or os.linesep
|
||||||
|
self._pending = ""
|
||||||
|
self._seennl = 0
|
||||||
|
|
||||||
|
# Reset the buffer first, in case __init__ is called
|
||||||
|
# multiple times.
|
||||||
|
self.truncate(0)
|
||||||
|
if initial_value is None:
|
||||||
|
initial_value = ""
|
||||||
|
self.write(initial_value)
|
||||||
|
self.seek(0)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def buffer(self):
|
||||||
|
raise UnsupportedOperation("%s.buffer attribute is unsupported" %
|
||||||
|
self.__class__.__name__)
|
||||||
|
|
||||||
|
def _decode_newlines(self, input, final=False):
|
||||||
|
# decode input (with the eventual \r from a previous pass)
|
||||||
|
if self._pending:
|
||||||
|
input = self._pending + input
|
||||||
|
|
||||||
|
# retain last \r even when not translating data:
|
||||||
|
# then readline() is sure to get \r\n in one pass
|
||||||
|
if input.endswith("\r") and not final:
|
||||||
|
input = input[:-1]
|
||||||
|
self._pending = "\r"
|
||||||
|
else:
|
||||||
|
self._pending = ""
|
||||||
|
|
||||||
|
# Record which newlines are read
|
||||||
|
crlf = input.count('\r\n')
|
||||||
|
cr = input.count('\r') - crlf
|
||||||
|
lf = input.count('\n') - crlf
|
||||||
|
self._seennl |= (lf and self._LF) | (cr and self._CR) \
|
||||||
|
| (crlf and self._CRLF)
|
||||||
|
|
||||||
|
if self._readtranslate:
|
||||||
|
if crlf:
|
||||||
|
output = input.replace("\r\n", "\n")
|
||||||
|
if cr:
|
||||||
|
output = input.replace("\r", "\n")
|
||||||
|
else:
|
||||||
|
output = input
|
||||||
|
|
||||||
|
return output
|
||||||
|
|
||||||
|
def writable(self):
|
||||||
|
return True
|
||||||
|
|
||||||
|
def readable(self):
|
||||||
|
return True
|
||||||
|
|
||||||
|
def seekable(self):
|
||||||
|
return True
|
||||||
|
|
||||||
|
_read = _stringio._StringIO.read
|
||||||
|
_write = _stringio._StringIO.write
|
||||||
|
_tell = _stringio._StringIO.tell
|
||||||
|
_seek = _stringio._StringIO.seek
|
||||||
|
_truncate = _stringio._StringIO.truncate
|
||||||
|
_getvalue = _stringio._StringIO.getvalue
|
||||||
|
|
||||||
|
def getvalue(self) -> str:
|
||||||
|
"""Retrieve the entire contents of the object."""
|
||||||
|
if self.closed:
|
||||||
|
raise ValueError("read on closed file")
|
||||||
|
return self._getvalue()
|
||||||
|
|
||||||
|
def write(self, s: str) -> int:
|
||||||
|
"""Write string s to file.
|
||||||
|
|
||||||
|
Returns the number of characters written.
|
||||||
|
"""
|
||||||
|
if self.closed:
|
||||||
|
raise ValueError("write to closed file")
|
||||||
|
if not isinstance(s, str):
|
||||||
|
raise TypeError("can't write %s to text stream" %
|
||||||
|
s.__class__.__name__)
|
||||||
|
length = len(s)
|
||||||
|
if self._writetranslate and self._writenl != "\n":
|
||||||
|
s = s.replace("\n", self._writenl)
|
||||||
|
self._pending = ""
|
||||||
|
self._write(s)
|
||||||
|
return length
|
||||||
|
|
||||||
|
def read(self, n: int = None) -> str:
|
||||||
|
"""Read at most n characters, returned as a string.
|
||||||
|
|
||||||
|
If the argument is negative or omitted, read until EOF
|
||||||
|
is reached. Return an empty string at EOF.
|
||||||
|
"""
|
||||||
|
if self.closed:
|
||||||
|
raise ValueError("read to closed file")
|
||||||
|
if n is None:
|
||||||
|
n = -1
|
||||||
|
res = self._pending
|
||||||
|
if n < 0:
|
||||||
|
res += self._decode_newlines(self._read(), True)
|
||||||
|
self._pending = ""
|
||||||
|
return res
|
||||||
|
else:
|
||||||
|
res = self._decode_newlines(self._read(n), True)
|
||||||
|
self._pending = res[n:]
|
||||||
|
return res[:n]
|
||||||
|
|
||||||
|
def tell(self) -> int:
|
||||||
|
"""Tell the current file position."""
|
||||||
|
if self.closed:
|
||||||
|
raise ValueError("tell from closed file")
|
||||||
|
if self._pending:
|
||||||
|
return self._tell() - len(self._pending)
|
||||||
|
else:
|
||||||
|
return self._tell()
|
||||||
|
|
||||||
|
def seek(self, pos: int = None, whence: int = 0) -> int:
|
||||||
|
"""Change stream position.
|
||||||
|
|
||||||
|
Seek to character offset pos relative to position indicated by whence:
|
||||||
|
0 Start of stream (the default). pos should be >= 0;
|
||||||
|
1 Current position - pos must be 0;
|
||||||
|
2 End of stream - pos must be 0.
|
||||||
|
Returns the new absolute position.
|
||||||
|
"""
|
||||||
|
if self.closed:
|
||||||
|
raise ValueError("seek from closed file")
|
||||||
|
self._pending = ""
|
||||||
|
return self._seek(pos, whence)
|
||||||
|
|
||||||
|
def truncate(self, pos: int = None) -> int:
|
||||||
|
"""Truncate size to pos.
|
||||||
|
|
||||||
|
The pos argument defaults to the current file position, as
|
||||||
|
returned by tell(). Imply an absolute seek to pos.
|
||||||
|
Returns the new absolute position.
|
||||||
|
"""
|
||||||
|
if self.closed:
|
||||||
|
raise ValueError("truncate from closed file")
|
||||||
|
self._pending = ""
|
||||||
|
return self._truncate(pos)
|
||||||
|
|
||||||
|
def readline(self, limit: int = None) -> str:
|
||||||
|
if self.closed:
|
||||||
|
raise ValueError("read from closed file")
|
||||||
|
if limit is None:
|
||||||
|
limit = -1
|
||||||
|
if limit >= 0:
|
||||||
|
# XXX: Hack to support limit argument, for backwards
|
||||||
|
# XXX compatibility
|
||||||
|
line = self.readline()
|
||||||
|
if len(line) <= limit:
|
||||||
|
return line
|
||||||
|
line, self._pending = line[:limit], line[limit:] + self._pending
|
||||||
|
return line
|
||||||
|
|
||||||
|
line = self._pending
|
||||||
|
self._pending = ""
|
||||||
|
|
||||||
|
start = 0
|
||||||
|
pos = endpos = None
|
||||||
|
while True:
|
||||||
|
if self._readtranslate:
|
||||||
|
# Newlines are already translated, only search for \n
|
||||||
|
pos = line.find('\n', start)
|
||||||
|
if pos >= 0:
|
||||||
|
endpos = pos + 1
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
start = len(line)
|
||||||
|
|
||||||
|
elif self._readuniversal:
|
||||||
|
# Universal newline search. Find any of \r, \r\n, \n
|
||||||
|
# The decoder ensures that \r\n are not split in two pieces
|
||||||
|
|
||||||
|
# In C we'd look for these in parallel of course.
|
||||||
|
nlpos = line.find("\n", start)
|
||||||
|
crpos = line.find("\r", start)
|
||||||
|
if crpos == -1:
|
||||||
|
if nlpos == -1:
|
||||||
|
# Nothing found
|
||||||
|
start = len(line)
|
||||||
|
else:
|
||||||
|
# Found \n
|
||||||
|
endpos = nlpos + 1
|
||||||
|
break
|
||||||
|
elif nlpos == -1:
|
||||||
|
# Found lone \r
|
||||||
|
endpos = crpos + 1
|
||||||
|
break
|
||||||
|
elif nlpos < crpos:
|
||||||
|
# Found \n
|
||||||
|
endpos = nlpos + 1
|
||||||
|
break
|
||||||
|
elif nlpos == crpos + 1:
|
||||||
|
# Found \r\n
|
||||||
|
endpos = crpos + 2
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
# Found \r
|
||||||
|
endpos = crpos + 1
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
# non-universal
|
||||||
|
pos = line.find(self._readnl)
|
||||||
|
if pos >= 0:
|
||||||
|
endpos = pos + len(self._readnl)
|
||||||
|
break
|
||||||
|
|
||||||
|
# No line ending seen yet - get more data
|
||||||
|
more_line = self.read(self._CHUNK_SIZE)
|
||||||
|
if more_line:
|
||||||
|
line += more_line
|
||||||
|
else:
|
||||||
|
# end of file
|
||||||
|
return line
|
||||||
|
|
||||||
|
self._pending = line[endpos:]
|
||||||
|
return line[:endpos]
|
||||||
|
|
||||||
|
_LF = 1
|
||||||
|
_CR = 2
|
||||||
|
_CRLF = 4
|
||||||
|
|
||||||
|
@property
|
||||||
|
def newlines(self):
|
||||||
|
return (None,
|
||||||
|
"\n",
|
||||||
|
"\r",
|
||||||
|
("\r", "\n"),
|
||||||
|
"\r\n",
|
||||||
|
("\n", "\r\n"),
|
||||||
|
("\r", "\r\n"),
|
||||||
|
("\r", "\n", "\r\n")
|
||||||
|
)[self._seennl]
|
||||||
|
|
||||||
|
|
||||||
|
except ImportError:
|
||||||
|
StringIO = _StringIO
|
||||||
|
|
|
@ -10,7 +10,7 @@ import io
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import _bytesio
|
import _bytesio, _stringio
|
||||||
has_c_implementation = True
|
has_c_implementation = True
|
||||||
except ImportError:
|
except ImportError:
|
||||||
has_c_implementation = False
|
has_c_implementation = False
|
||||||
|
@ -373,7 +373,7 @@ class PyBytesIOTest(MemoryTestMixin, unittest.TestCase):
|
||||||
|
|
||||||
class PyStringIOTest(MemoryTestMixin, unittest.TestCase):
|
class PyStringIOTest(MemoryTestMixin, unittest.TestCase):
|
||||||
buftype = str
|
buftype = str
|
||||||
ioclass = io.StringIO
|
ioclass = io._StringIO
|
||||||
EOF = ""
|
EOF = ""
|
||||||
|
|
||||||
def test_relative_seek(self):
|
def test_relative_seek(self):
|
||||||
|
@ -404,10 +404,14 @@ if has_c_implementation:
|
||||||
class CBytesIOTest(PyBytesIOTest):
|
class CBytesIOTest(PyBytesIOTest):
|
||||||
ioclass = io.BytesIO
|
ioclass = io.BytesIO
|
||||||
|
|
||||||
|
class CStringIOTest(PyStringIOTest):
|
||||||
|
ioclass = io.StringIO
|
||||||
|
|
||||||
|
|
||||||
def test_main():
|
def test_main():
|
||||||
tests = [PyBytesIOTest, PyStringIOTest]
|
tests = [PyBytesIOTest, PyStringIOTest]
|
||||||
if has_c_implementation:
|
if has_c_implementation:
|
||||||
tests.extend([CBytesIOTest])
|
tests.extend([CBytesIOTest, CStringIOTest])
|
||||||
support.run_unittest(*tests)
|
support.run_unittest(*tests)
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
|
@ -3,7 +3,6 @@
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
import pickle
|
import pickle
|
||||||
from io import StringIO
|
|
||||||
from test.support import verbose, run_unittest, TestSkipped
|
from test.support import verbose, run_unittest, TestSkipped
|
||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
|
@ -80,7 +79,7 @@ class MinidomTest(unittest.TestCase):
|
||||||
self.confirm(t == s, "looking for %s, found %s" % (repr(s), repr(t)))
|
self.confirm(t == s, "looking for %s, found %s" % (repr(s), repr(t)))
|
||||||
|
|
||||||
def testParseFromFile(self):
|
def testParseFromFile(self):
|
||||||
dom = parse(StringIO(open(tstfile).read()))
|
dom = parse(open(tstfile))
|
||||||
dom.unlink()
|
dom.unlink()
|
||||||
self.confirm(isinstance(dom, Document))
|
self.confirm(isinstance(dom, Document))
|
||||||
|
|
||||||
|
|
|
@ -17,6 +17,32 @@ encodedtext = b"""\
|
||||||
M5&AE('-M;V]T:\"US8V%L960@<'ET:&]N(&-R97!T(&]V97(@=&AE('-L965P
|
M5&AE('-M;V]T:\"US8V%L960@<'ET:&]N(&-R97!T(&]V97(@=&AE('-L965P
|
||||||
(:6YG(&1O9PH """
|
(:6YG(&1O9PH """
|
||||||
|
|
||||||
|
# Stolen from io.py
|
||||||
|
class FakeIO(io.TextIOWrapper):
|
||||||
|
"""Text I/O implementation using an in-memory buffer.
|
||||||
|
|
||||||
|
Can be a used as a drop-in replacement for sys.stdin and sys.stdout.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# XXX This is really slow, but fully functional
|
||||||
|
|
||||||
|
def __init__(self, initial_value="", encoding="utf-8",
|
||||||
|
errors="strict", newline="\n"):
|
||||||
|
super(FakeIO, self).__init__(io.BytesIO(),
|
||||||
|
encoding=encoding,
|
||||||
|
errors=errors,
|
||||||
|
newline=newline)
|
||||||
|
if initial_value:
|
||||||
|
if not isinstance(initial_value, str):
|
||||||
|
initial_value = str(initial_value)
|
||||||
|
self.write(initial_value)
|
||||||
|
self.seek(0)
|
||||||
|
|
||||||
|
def getvalue(self):
|
||||||
|
self.flush()
|
||||||
|
return self.buffer.getvalue().decode(self._encoding, self._errors)
|
||||||
|
|
||||||
|
|
||||||
def encodedtextwrapped(mode, filename):
|
def encodedtextwrapped(mode, filename):
|
||||||
return (bytes("begin %03o %s\n" % (mode, filename), "ascii") +
|
return (bytes("begin %03o %s\n" % (mode, filename), "ascii") +
|
||||||
encodedtext + b"\n \nend\n")
|
encodedtext + b"\n \nend\n")
|
||||||
|
@ -76,15 +102,15 @@ class UUStdIOTest(unittest.TestCase):
|
||||||
sys.stdout = self.stdout
|
sys.stdout = self.stdout
|
||||||
|
|
||||||
def test_encode(self):
|
def test_encode(self):
|
||||||
sys.stdin = io.StringIO(plaintext.decode("ascii"))
|
sys.stdin = FakeIO(plaintext.decode("ascii"))
|
||||||
sys.stdout = io.StringIO()
|
sys.stdout = FakeIO()
|
||||||
uu.encode("-", "-", "t1", 0o666)
|
uu.encode("-", "-", "t1", 0o666)
|
||||||
self.assertEqual(sys.stdout.getvalue(),
|
self.assertEqual(sys.stdout.getvalue(),
|
||||||
encodedtextwrapped(0o666, "t1").decode("ascii"))
|
encodedtextwrapped(0o666, "t1").decode("ascii"))
|
||||||
|
|
||||||
def test_decode(self):
|
def test_decode(self):
|
||||||
sys.stdin = io.StringIO(encodedtextwrapped(0o666, "t1").decode("ascii"))
|
sys.stdin = FakeIO(encodedtextwrapped(0o666, "t1").decode("ascii"))
|
||||||
sys.stdout = io.StringIO()
|
sys.stdout = FakeIO()
|
||||||
uu.decode("-", "-")
|
uu.decode("-", "-")
|
||||||
stdout = sys.stdout
|
stdout = sys.stdout
|
||||||
sys.stdout = self.stdout
|
sys.stdout = self.stdout
|
||||||
|
|
|
@ -14,6 +14,7 @@ Todo:
|
||||||
* SAX 2 namespaces
|
* SAX 2 namespaces
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import codecs
|
||||||
import io
|
import io
|
||||||
import xml.dom
|
import xml.dom
|
||||||
|
|
||||||
|
@ -49,16 +50,16 @@ class Node(xml.dom.Node):
|
||||||
# indent = the indentation string to prepend, per level
|
# indent = the indentation string to prepend, per level
|
||||||
# newl = the newline string to append
|
# newl = the newline string to append
|
||||||
use_encoding = "utf-8" if encoding is None else encoding
|
use_encoding = "utf-8" if encoding is None else encoding
|
||||||
writer = io.StringIO(encoding=use_encoding)
|
writer = codecs.getwriter(use_encoding)(io.BytesIO())
|
||||||
if self.nodeType == Node.DOCUMENT_NODE:
|
if self.nodeType == Node.DOCUMENT_NODE:
|
||||||
# Can pass encoding only to document, to put it into XML header
|
# Can pass encoding only to document, to put it into XML header
|
||||||
self.writexml(writer, "", indent, newl, encoding)
|
self.writexml(writer, "", indent, newl, encoding)
|
||||||
else:
|
else:
|
||||||
self.writexml(writer, "", indent, newl)
|
self.writexml(writer, "", indent, newl)
|
||||||
if encoding is None:
|
if encoding is None:
|
||||||
return writer.getvalue()
|
return writer.stream.getvalue().decode(use_encoding)
|
||||||
else:
|
else:
|
||||||
return writer.buffer.getvalue()
|
return writer.stream.getvalue()
|
||||||
|
|
||||||
def hasChildNodes(self):
|
def hasChildNodes(self):
|
||||||
if self.childNodes:
|
if self.childNodes:
|
||||||
|
|
|
@ -78,6 +78,8 @@ Extension Modules
|
||||||
Library
|
Library
|
||||||
-------
|
-------
|
||||||
|
|
||||||
|
- Added C optimized implementation of io.StringIO.
|
||||||
|
|
||||||
- The ``pickle`` module is now automatically use an optimized C
|
- The ``pickle`` module is now automatically use an optimized C
|
||||||
implementation of Pickler and Unpickler when available. The
|
implementation of Pickler and Unpickler when available. The
|
||||||
``cPickle`` module is no longer needed.
|
``cPickle`` module is no longer needed.
|
||||||
|
|
|
@ -0,0 +1,379 @@
|
||||||
|
#include "Python.h"
|
||||||
|
|
||||||
|
/* This module is a stripped down version of _bytesio.c with a Py_UNICODE
|
||||||
|
buffer. Most of the functionality is provided by subclassing _StringIO. */
|
||||||
|
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
PyObject_HEAD
|
||||||
|
Py_UNICODE *buf;
|
||||||
|
Py_ssize_t pos;
|
||||||
|
Py_ssize_t string_size;
|
||||||
|
size_t buf_size;
|
||||||
|
} StringIOObject;
|
||||||
|
|
||||||
|
|
||||||
|
/* Internal routine for changing the size, in terms of characters, of the
|
||||||
|
buffer of StringIO objects. The caller should ensure that the 'size'
|
||||||
|
argument is non-negative. Returns 0 on success, -1 otherwise. */
|
||||||
|
static int
|
||||||
|
resize_buffer(StringIOObject *self, size_t size)
|
||||||
|
{
|
||||||
|
/* Here, unsigned types are used to avoid dealing with signed integer
|
||||||
|
overflow, which is undefined in C. */
|
||||||
|
size_t alloc = self->buf_size;
|
||||||
|
Py_UNICODE *new_buf = NULL;
|
||||||
|
|
||||||
|
assert(self->buf != NULL);
|
||||||
|
|
||||||
|
/* For simplicity, stay in the range of the signed type. Anyway, Python
|
||||||
|
doesn't allow strings to be longer than this. */
|
||||||
|
if (size > PY_SSIZE_T_MAX)
|
||||||
|
goto overflow;
|
||||||
|
|
||||||
|
if (size < alloc / 2) {
|
||||||
|
/* Major downsize; resize down to exact size. */
|
||||||
|
alloc = size + 1;
|
||||||
|
}
|
||||||
|
else if (size < alloc) {
|
||||||
|
/* Within allocated size; quick exit */
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
else if (size <= alloc * 1.125) {
|
||||||
|
/* Moderate upsize; overallocate similar to list_resize() */
|
||||||
|
alloc = size + (size >> 3) + (size < 9 ? 3 : 6);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
/* Major upsize; resize up to exact size */
|
||||||
|
alloc = size + 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (alloc > ((size_t)-1) / sizeof(Py_UNICODE))
|
||||||
|
goto overflow;
|
||||||
|
new_buf = (Py_UNICODE *)PyMem_Realloc(self->buf,
|
||||||
|
alloc * sizeof(Py_UNICODE));
|
||||||
|
if (new_buf == NULL) {
|
||||||
|
PyErr_NoMemory();
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
self->buf_size = alloc;
|
||||||
|
self->buf = new_buf;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
overflow:
|
||||||
|
PyErr_SetString(PyExc_OverflowError,
|
||||||
|
"new buffer size too large");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Internal routine for writing a string of characters to the buffer of a
|
||||||
|
StringIO object. Returns the number of bytes wrote, or -1 on error. */
|
||||||
|
static Py_ssize_t
|
||||||
|
write_str(StringIOObject *self, const Py_UNICODE *str, Py_ssize_t len)
|
||||||
|
{
|
||||||
|
assert(self->buf != NULL);
|
||||||
|
assert(self->pos >= 0);
|
||||||
|
assert(len >= 0);
|
||||||
|
|
||||||
|
/* This overflow check is not strictly necessary. However, it avoids us to
|
||||||
|
deal with funky things like comparing an unsigned and a signed
|
||||||
|
integer. */
|
||||||
|
if (self->pos > PY_SSIZE_T_MAX - len) {
|
||||||
|
PyErr_SetString(PyExc_OverflowError,
|
||||||
|
"new position too large");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (self->pos + len > self->string_size) {
|
||||||
|
if (resize_buffer(self, self->pos + len) < 0)
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (self->pos > self->string_size) {
|
||||||
|
/* In case of overseek, pad with null bytes the buffer region between
|
||||||
|
the end of stream and the current position.
|
||||||
|
|
||||||
|
0 lo string_size hi
|
||||||
|
| |<---used--->|<----------available----------->|
|
||||||
|
| | <--to pad-->|<---to write---> |
|
||||||
|
0 buf positon
|
||||||
|
|
||||||
|
*/
|
||||||
|
memset(self->buf + self->string_size, '\0',
|
||||||
|
(self->pos - self->string_size) * sizeof(Py_UNICODE));
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Copy the data to the internal buffer, overwriting some of the
|
||||||
|
existing data if self->pos < self->string_size. */
|
||||||
|
memcpy(self->buf + self->pos, str, len * sizeof(Py_UNICODE));
|
||||||
|
self->pos += len;
|
||||||
|
|
||||||
|
/* Set the new length of the internal string if it has changed */
|
||||||
|
if (self->string_size < self->pos) {
|
||||||
|
self->string_size = self->pos;
|
||||||
|
}
|
||||||
|
|
||||||
|
return len;
|
||||||
|
}
|
||||||
|
|
||||||
|
static PyObject *
|
||||||
|
stringio_getvalue(StringIOObject *self)
|
||||||
|
{
|
||||||
|
return PyUnicode_FromUnicode(self->buf, self->string_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
static PyObject *
|
||||||
|
stringio_tell(StringIOObject *self)
|
||||||
|
{
|
||||||
|
return PyLong_FromSsize_t(self->pos);
|
||||||
|
}
|
||||||
|
|
||||||
|
static PyObject *
|
||||||
|
stringio_read(StringIOObject *self, PyObject *args)
|
||||||
|
{
|
||||||
|
Py_ssize_t size, n;
|
||||||
|
Py_UNICODE *output;
|
||||||
|
PyObject *arg = Py_None;
|
||||||
|
|
||||||
|
if (!PyArg_ParseTuple(args, "|O:read", &arg))
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
if (PyLong_Check(arg)) {
|
||||||
|
size = PyLong_AsSsize_t(arg);
|
||||||
|
}
|
||||||
|
else if (arg == Py_None) {
|
||||||
|
/* Read until EOF is reached, by default. */
|
||||||
|
size = -1;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
PyErr_Format(PyExc_TypeError, "integer argument expected, got '%s'",
|
||||||
|
Py_TYPE(arg)->tp_name);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* adjust invalid sizes */
|
||||||
|
n = self->string_size - self->pos;
|
||||||
|
if (size < 0 || size > n) {
|
||||||
|
size = n;
|
||||||
|
if (size < 0)
|
||||||
|
size = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
assert(self->buf != NULL);
|
||||||
|
output = self->buf + self->pos;
|
||||||
|
self->pos += size;
|
||||||
|
|
||||||
|
return PyUnicode_FromUnicode(output, size);
|
||||||
|
}
|
||||||
|
|
||||||
|
static PyObject *
|
||||||
|
stringio_truncate(StringIOObject *self, PyObject *args)
|
||||||
|
{
|
||||||
|
Py_ssize_t size;
|
||||||
|
PyObject *arg = Py_None;
|
||||||
|
|
||||||
|
if (!PyArg_ParseTuple(args, "|O:truncate", &arg))
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
if (PyLong_Check(arg)) {
|
||||||
|
size = PyLong_AsSsize_t(arg);
|
||||||
|
}
|
||||||
|
else if (arg == Py_None) {
|
||||||
|
/* Truncate to current position if no argument is passed. */
|
||||||
|
size = self->pos;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
PyErr_Format(PyExc_TypeError, "integer argument expected, got '%s'",
|
||||||
|
Py_TYPE(arg)->tp_name);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (size < 0) {
|
||||||
|
PyErr_Format(PyExc_ValueError,
|
||||||
|
"Negative size value %zd", size);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (size < self->string_size) {
|
||||||
|
self->string_size = size;
|
||||||
|
if (resize_buffer(self, size) < 0)
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
self->pos = size;
|
||||||
|
|
||||||
|
return PyLong_FromSsize_t(size);
|
||||||
|
}
|
||||||
|
|
||||||
|
static PyObject *
|
||||||
|
stringio_seek(StringIOObject *self, PyObject *args)
|
||||||
|
{
|
||||||
|
Py_ssize_t pos;
|
||||||
|
int mode = 0;
|
||||||
|
|
||||||
|
if (!PyArg_ParseTuple(args, "n|i:seek", &pos, &mode))
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
if (mode != 0 && mode != 1 && mode != 2) {
|
||||||
|
PyErr_Format(PyExc_ValueError,
|
||||||
|
"Invalid whence (%i, should be 0, 1 or 2)", mode);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
else if (pos < 0 && mode == 0) {
|
||||||
|
PyErr_Format(PyExc_ValueError,
|
||||||
|
"Negative seek position %zd", pos);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
else if (mode != 0 && pos != 0) {
|
||||||
|
PyErr_SetString(PyExc_IOError,
|
||||||
|
"Can't do nonzero cur-relative seeks");
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* mode 0: offset relative to beginning of the string.
|
||||||
|
mode 1: no change to current position.
|
||||||
|
mode 2: change position to end of file. */
|
||||||
|
if (mode == 1) {
|
||||||
|
pos = self->pos;
|
||||||
|
}
|
||||||
|
else if (mode == 2) {
|
||||||
|
pos = self->string_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
self->pos = pos;
|
||||||
|
|
||||||
|
return PyLong_FromSsize_t(self->pos);
|
||||||
|
}
|
||||||
|
|
||||||
|
static PyObject *
|
||||||
|
stringio_write(StringIOObject *self, PyObject *obj)
|
||||||
|
{
|
||||||
|
const Py_UNICODE *str;
|
||||||
|
Py_ssize_t size;
|
||||||
|
Py_ssize_t n = 0;
|
||||||
|
|
||||||
|
if (PyUnicode_Check(obj)) {
|
||||||
|
str = PyUnicode_AsUnicode(obj);
|
||||||
|
size = PyUnicode_GetSize(obj);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
PyErr_Format(PyExc_TypeError, "string argument expected, got '%s'",
|
||||||
|
Py_TYPE(obj)->tp_name);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (size != 0) {
|
||||||
|
n = write_str(self, str, size);
|
||||||
|
if (n < 0)
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
return PyLong_FromSsize_t(n);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
stringio_dealloc(StringIOObject *self)
|
||||||
|
{
|
||||||
|
PyMem_Free(self->buf);
|
||||||
|
Py_TYPE(self)->tp_free(self);
|
||||||
|
}
|
||||||
|
|
||||||
|
static PyObject *
|
||||||
|
stringio_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
|
||||||
|
{
|
||||||
|
StringIOObject *self;
|
||||||
|
|
||||||
|
assert(type != NULL && type->tp_alloc != NULL);
|
||||||
|
self = (StringIOObject *)type->tp_alloc(type, 0);
|
||||||
|
if (self == NULL)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
self->string_size = 0;
|
||||||
|
self->pos = 0;
|
||||||
|
self->buf_size = 0;
|
||||||
|
self->buf = (Py_UNICODE *)PyMem_Malloc(0);
|
||||||
|
if (self->buf == NULL) {
|
||||||
|
Py_DECREF(self);
|
||||||
|
return PyErr_NoMemory();
|
||||||
|
}
|
||||||
|
|
||||||
|
return (PyObject *)self;
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct PyMethodDef stringio_methods[] = {
|
||||||
|
{"getvalue", (PyCFunction)stringio_getvalue, METH_VARARGS, NULL},
|
||||||
|
{"read", (PyCFunction)stringio_read, METH_VARARGS, NULL},
|
||||||
|
{"tell", (PyCFunction)stringio_tell, METH_NOARGS, NULL},
|
||||||
|
{"truncate", (PyCFunction)stringio_truncate, METH_VARARGS, NULL},
|
||||||
|
{"seek", (PyCFunction)stringio_seek, METH_VARARGS, NULL},
|
||||||
|
{"write", (PyCFunction)stringio_write, METH_O, NULL},
|
||||||
|
{NULL, NULL} /* sentinel */
|
||||||
|
};
|
||||||
|
|
||||||
|
static PyTypeObject StringIO_Type = {
|
||||||
|
PyVarObject_HEAD_INIT(NULL, 0)
|
||||||
|
"_stringio._StringIO", /*tp_name*/
|
||||||
|
sizeof(StringIOObject), /*tp_basicsize*/
|
||||||
|
0, /*tp_itemsize*/
|
||||||
|
(destructor)stringio_dealloc, /*tp_dealloc*/
|
||||||
|
0, /*tp_print*/
|
||||||
|
0, /*tp_getattr*/
|
||||||
|
0, /*tp_setattr*/
|
||||||
|
0, /*tp_compare*/
|
||||||
|
0, /*tp_repr*/
|
||||||
|
0, /*tp_as_number*/
|
||||||
|
0, /*tp_as_sequence*/
|
||||||
|
0, /*tp_as_mapping*/
|
||||||
|
0, /*tp_hash*/
|
||||||
|
0, /*tp_call*/
|
||||||
|
0, /*tp_str*/
|
||||||
|
0, /*tp_getattro*/
|
||||||
|
0, /*tp_setattro*/
|
||||||
|
0, /*tp_as_buffer*/
|
||||||
|
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
|
||||||
|
0, /*tp_doc*/
|
||||||
|
0, /*tp_traverse*/
|
||||||
|
0, /*tp_clear*/
|
||||||
|
0, /*tp_richcompare*/
|
||||||
|
0, /*tp_weaklistoffset*/
|
||||||
|
0, /*tp_iter*/
|
||||||
|
0, /*tp_iternext*/
|
||||||
|
stringio_methods, /*tp_methods*/
|
||||||
|
0, /*tp_members*/
|
||||||
|
0, /*tp_getset*/
|
||||||
|
0, /*tp_base*/
|
||||||
|
0, /*tp_dict*/
|
||||||
|
0, /*tp_descr_get*/
|
||||||
|
0, /*tp_descr_set*/
|
||||||
|
0, /*tp_dictoffset*/
|
||||||
|
0, /*tp_init*/
|
||||||
|
0, /*tp_alloc*/
|
||||||
|
stringio_new, /*tp_new*/
|
||||||
|
};
|
||||||
|
|
||||||
|
static struct PyModuleDef _stringiomodule = {
|
||||||
|
PyModuleDef_HEAD_INIT,
|
||||||
|
"_stringio",
|
||||||
|
NULL,
|
||||||
|
-1,
|
||||||
|
NULL,
|
||||||
|
NULL,
|
||||||
|
NULL,
|
||||||
|
NULL,
|
||||||
|
NULL
|
||||||
|
};
|
||||||
|
|
||||||
|
PyMODINIT_FUNC
|
||||||
|
PyInit__stringio(void)
|
||||||
|
{
|
||||||
|
PyObject *m;
|
||||||
|
|
||||||
|
if (PyType_Ready(&StringIO_Type) < 0)
|
||||||
|
return NULL;
|
||||||
|
m = PyModule_Create(&_stringiomodule);
|
||||||
|
if (m == NULL)
|
||||||
|
return NULL;
|
||||||
|
Py_INCREF(&StringIO_Type);
|
||||||
|
if (PyModule_AddObject(m, "_StringIO", (PyObject *)&StringIO_Type) < 0)
|
||||||
|
return NULL;
|
||||||
|
return m;
|
||||||
|
}
|
1
setup.py
1
setup.py
|
@ -422,6 +422,7 @@ class PyBuildExt(build_ext):
|
||||||
exts.append( Extension("_functools", ["_functoolsmodule.c"]) )
|
exts.append( Extension("_functools", ["_functoolsmodule.c"]) )
|
||||||
# Memory-based IO accelerator modules
|
# Memory-based IO accelerator modules
|
||||||
exts.append( Extension("_bytesio", ["_bytesio.c"]) )
|
exts.append( Extension("_bytesio", ["_bytesio.c"]) )
|
||||||
|
exts.append( Extension("_stringio", ["_stringio.c"]) )
|
||||||
# C-optimized pickle replacement
|
# C-optimized pickle replacement
|
||||||
exts.append( Extension("_pickle", ["_pickle.c"]) )
|
exts.append( Extension("_pickle", ["_pickle.c"]) )
|
||||||
# atexit
|
# atexit
|
||||||
|
|
Loading…
Reference in New Issue