MS Win32 .readline() speedup, as discussed on Python-Dev. This is a tricky
variant that never needs to "search from the right". Also fixed unlikely memory leak in get_line, if string size overflows INTMAX. Also new std test test_bufio to make sure .readline() works.
This commit is contained in:
parent
4ddf0a01f7
commit
86821b2563
|
@ -0,0 +1 @@
|
||||||
|
test_bufio
|
|
@ -0,0 +1,60 @@
|
||||||
|
from test_support import TestFailed, TESTFN
|
||||||
|
|
||||||
|
# Simple test to ensure that optimizations in fileobject.c deliver
|
||||||
|
# the expected results. For best testing, run this under a debug-build
|
||||||
|
# Python too (to exercise asserts in the C code).
|
||||||
|
|
||||||
|
# Repeat string 'pattern' as often as needed to reach total length
|
||||||
|
# 'length'. Then call try_one with that string, a string one larger
|
||||||
|
# than that, and a string one smaller than that. The main driver
|
||||||
|
# feeds this all small sizes and various powers of 2, so we exercise
|
||||||
|
# all likely stdio buffer sizes, and "off by one" errors on both
|
||||||
|
# sides.
|
||||||
|
def drive_one(pattern, length):
|
||||||
|
q, r = divmod(length, len(pattern))
|
||||||
|
teststring = pattern * q + pattern[:r]
|
||||||
|
assert len(teststring) == length
|
||||||
|
try_one(teststring)
|
||||||
|
try_one(teststring + "x")
|
||||||
|
try_one(teststring[:-1])
|
||||||
|
|
||||||
|
# Write s + "\n" + s to file, then open it and ensure that successive
|
||||||
|
# .readline()s deliver what we wrote.
|
||||||
|
def try_one(s):
|
||||||
|
# Since C doesn't guarantee we can write/read arbitrary bytes in text
|
||||||
|
# files, use binary mode.
|
||||||
|
f = open(TESTFN, "wb")
|
||||||
|
# write once with \n and once without
|
||||||
|
f.write(s)
|
||||||
|
f.write("\n")
|
||||||
|
f.write(s)
|
||||||
|
f.close()
|
||||||
|
f = open(TESTFN, "rb")
|
||||||
|
line = f.readline()
|
||||||
|
if line != s + "\n":
|
||||||
|
raise TestFailed("Expected %r got %r" % (s + "\n", line))
|
||||||
|
line = f.readline()
|
||||||
|
if line != s:
|
||||||
|
raise TestFailed("Expected %r got %r" % (s, line))
|
||||||
|
line = f.readline()
|
||||||
|
if line:
|
||||||
|
raise TestFailed("Expected EOF but got %r" % line)
|
||||||
|
f.close()
|
||||||
|
|
||||||
|
# A pattern with prime length, to avoid simple relationships with
|
||||||
|
# stdio buffer sizes.
|
||||||
|
primepat = "1234567890\00\01\02\03\04\05\06\07"
|
||||||
|
|
||||||
|
nullpat = "\0" * 1000
|
||||||
|
|
||||||
|
try:
|
||||||
|
for size in range(1, 257) + [512, 1000, 1024, 2048, 4096, 8192, 10000,
|
||||||
|
16384, 32768, 65536, 1000000]:
|
||||||
|
drive_one(primepat, size)
|
||||||
|
drive_one(nullpat, size)
|
||||||
|
finally:
|
||||||
|
try:
|
||||||
|
import os
|
||||||
|
os.unlink(TESTFN)
|
||||||
|
except:
|
||||||
|
pass
|
|
@ -244,7 +244,7 @@ file_close(PyFileObject *f, PyObject *args)
|
||||||
/* a portable fseek() function
|
/* a portable fseek() function
|
||||||
return 0 on success, non-zero on failure (with errno set) */
|
return 0 on success, non-zero on failure (with errno set) */
|
||||||
int
|
int
|
||||||
#if defined(HAVE_LARGEFILE_SUPPORT) && SIZEOF_OFF_T < 8 && SIZEOF_FPOS_T >= 8
|
#if defined(HAVE_LARGEFILE_SUPPORT) && SIZEOF_OFF_T < 8 && SIZEOF_FPOS_T >= 8
|
||||||
_portable_fseek(FILE *fp, fpos_t offset, int whence)
|
_portable_fseek(FILE *fp, fpos_t offset, int whence)
|
||||||
#else
|
#else
|
||||||
_portable_fseek(FILE *fp, off_t offset, int whence)
|
_portable_fseek(FILE *fp, off_t offset, int whence)
|
||||||
|
@ -256,7 +256,7 @@ _portable_fseek(FILE *fp, off_t offset, int whence)
|
||||||
return fseek64(fp, offset, whence);
|
return fseek64(fp, offset, whence);
|
||||||
#elif defined(__BEOS__)
|
#elif defined(__BEOS__)
|
||||||
return _fseek(fp, offset, whence);
|
return _fseek(fp, offset, whence);
|
||||||
#elif defined(HAVE_LARGEFILE_SUPPORT) && SIZEOF_FPOS_T >= 8
|
#elif defined(HAVE_LARGEFILE_SUPPORT) && SIZEOF_FPOS_T >= 8
|
||||||
/* lacking a 64-bit capable fseek() (as Win64 does) use a 64-bit capable
|
/* lacking a 64-bit capable fseek() (as Win64 does) use a 64-bit capable
|
||||||
fsetpos() and tell() to implement fseek()*/
|
fsetpos() and tell() to implement fseek()*/
|
||||||
fpos_t pos;
|
fpos_t pos;
|
||||||
|
@ -287,7 +287,7 @@ _portable_fseek(FILE *fp, off_t offset, int whence)
|
||||||
/* a portable ftell() function
|
/* a portable ftell() function
|
||||||
Return -1 on failure with errno set appropriately, current file
|
Return -1 on failure with errno set appropriately, current file
|
||||||
position on success */
|
position on success */
|
||||||
#if defined(HAVE_LARGEFILE_SUPPORT) && SIZEOF_OFF_T < 8 && SIZEOF_FPOS_T >= 8
|
#if defined(HAVE_LARGEFILE_SUPPORT) && SIZEOF_OFF_T < 8 && SIZEOF_FPOS_T >= 8
|
||||||
fpos_t
|
fpos_t
|
||||||
#else
|
#else
|
||||||
off_t
|
off_t
|
||||||
|
@ -314,13 +314,13 @@ file_seek(PyFileObject *f, PyObject *args)
|
||||||
{
|
{
|
||||||
int whence;
|
int whence;
|
||||||
int ret;
|
int ret;
|
||||||
#if defined(HAVE_LARGEFILE_SUPPORT) && SIZEOF_OFF_T < 8 && SIZEOF_FPOS_T >= 8
|
#if defined(HAVE_LARGEFILE_SUPPORT) && SIZEOF_OFF_T < 8 && SIZEOF_FPOS_T >= 8
|
||||||
fpos_t offset, pos;
|
fpos_t offset, pos;
|
||||||
#else
|
#else
|
||||||
off_t offset;
|
off_t offset;
|
||||||
#endif /* !MS_WIN64 */
|
#endif /* !MS_WIN64 */
|
||||||
PyObject *offobj;
|
PyObject *offobj;
|
||||||
|
|
||||||
if (f->f_fp == NULL)
|
if (f->f_fp == NULL)
|
||||||
return err_closed();
|
return err_closed();
|
||||||
whence = 0;
|
whence = 0;
|
||||||
|
@ -334,7 +334,7 @@ file_seek(PyFileObject *f, PyObject *args)
|
||||||
#endif
|
#endif
|
||||||
if (PyErr_Occurred())
|
if (PyErr_Occurred())
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
Py_BEGIN_ALLOW_THREADS
|
Py_BEGIN_ALLOW_THREADS
|
||||||
errno = 0;
|
errno = 0;
|
||||||
ret = _portable_fseek(f->f_fp, offset, whence);
|
ret = _portable_fseek(f->f_fp, offset, whence);
|
||||||
|
@ -355,13 +355,13 @@ static PyObject *
|
||||||
file_truncate(PyFileObject *f, PyObject *args)
|
file_truncate(PyFileObject *f, PyObject *args)
|
||||||
{
|
{
|
||||||
int ret;
|
int ret;
|
||||||
#if defined(HAVE_LARGEFILE_SUPPORT) && SIZEOF_OFF_T < 8 && SIZEOF_FPOS_T >= 8
|
#if defined(HAVE_LARGEFILE_SUPPORT) && SIZEOF_OFF_T < 8 && SIZEOF_FPOS_T >= 8
|
||||||
fpos_t newsize;
|
fpos_t newsize;
|
||||||
#else
|
#else
|
||||||
off_t newsize;
|
off_t newsize;
|
||||||
#endif
|
#endif
|
||||||
PyObject *newsizeobj;
|
PyObject *newsizeobj;
|
||||||
|
|
||||||
if (f->f_fp == NULL)
|
if (f->f_fp == NULL)
|
||||||
return err_closed();
|
return err_closed();
|
||||||
newsizeobj = NULL;
|
newsizeobj = NULL;
|
||||||
|
@ -416,7 +416,7 @@ file_truncate(PyFileObject *f, PyObject *args)
|
||||||
Py_END_ALLOW_THREADS
|
Py_END_ALLOW_THREADS
|
||||||
if (ret != 0) goto onioerror;
|
if (ret != 0) goto onioerror;
|
||||||
#endif /* !MS_WIN32 */
|
#endif /* !MS_WIN32 */
|
||||||
|
|
||||||
Py_INCREF(Py_None);
|
Py_INCREF(Py_None);
|
||||||
return Py_None;
|
return Py_None;
|
||||||
|
|
||||||
|
@ -430,7 +430,7 @@ onioerror:
|
||||||
static PyObject *
|
static PyObject *
|
||||||
file_tell(PyFileObject *f, PyObject *args)
|
file_tell(PyFileObject *f, PyObject *args)
|
||||||
{
|
{
|
||||||
#if defined(HAVE_LARGEFILE_SUPPORT) && SIZEOF_OFF_T < 8 && SIZEOF_FPOS_T >= 8
|
#if defined(HAVE_LARGEFILE_SUPPORT) && SIZEOF_OFF_T < 8 && SIZEOF_FPOS_T >= 8
|
||||||
fpos_t pos;
|
fpos_t pos;
|
||||||
#else
|
#else
|
||||||
off_t pos;
|
off_t pos;
|
||||||
|
@ -470,7 +470,7 @@ static PyObject *
|
||||||
file_flush(PyFileObject *f, PyObject *args)
|
file_flush(PyFileObject *f, PyObject *args)
|
||||||
{
|
{
|
||||||
int res;
|
int res;
|
||||||
|
|
||||||
if (f->f_fp == NULL)
|
if (f->f_fp == NULL)
|
||||||
return err_closed();
|
return err_closed();
|
||||||
if (!PyArg_NoArgs(args))
|
if (!PyArg_NoArgs(args))
|
||||||
|
@ -559,7 +559,7 @@ file_read(PyFileObject *f, PyObject *args)
|
||||||
long bytesrequested = -1;
|
long bytesrequested = -1;
|
||||||
size_t bytesread, buffersize, chunksize;
|
size_t bytesread, buffersize, chunksize;
|
||||||
PyObject *v;
|
PyObject *v;
|
||||||
|
|
||||||
if (f->f_fp == NULL)
|
if (f->f_fp == NULL)
|
||||||
return err_closed();
|
return err_closed();
|
||||||
if (!PyArg_ParseTuple(args, "|l:read", &bytesrequested))
|
if (!PyArg_ParseTuple(args, "|l:read", &bytesrequested))
|
||||||
|
@ -610,7 +610,7 @@ file_readinto(PyFileObject *f, PyObject *args)
|
||||||
{
|
{
|
||||||
char *ptr;
|
char *ptr;
|
||||||
size_t ntodo, ndone, nnow;
|
size_t ntodo, ndone, nnow;
|
||||||
|
|
||||||
if (f->f_fp == NULL)
|
if (f->f_fp == NULL)
|
||||||
return err_closed();
|
return err_closed();
|
||||||
if (!PyArg_Parse(args, "w#", &ptr, &ntodo))
|
if (!PyArg_Parse(args, "w#", &ptr, &ntodo))
|
||||||
|
@ -634,6 +634,170 @@ file_readinto(PyFileObject *f, PyObject *args)
|
||||||
return PyInt_FromLong((long)ndone);
|
return PyInt_FromLong((long)ndone);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**************************************************************************
|
||||||
|
Win32 MS routine to get next line.
|
||||||
|
|
||||||
|
Under MSVC 6:
|
||||||
|
|
||||||
|
+ MS threadsafe getc is very slow (multiple layers of function calls
|
||||||
|
before+after each character, to lock+unlock the stream).
|
||||||
|
+ The stream-locking functions are MS-internal -- can't access them
|
||||||
|
from user code.
|
||||||
|
+ There's nothing Tim could find in the MS C or platform SDK libraries
|
||||||
|
that can worm around this.
|
||||||
|
+ MS fgets locks/unlocks only once per line; it's the only hook we have.
|
||||||
|
|
||||||
|
So we use fgets for speed(!), despite that it's painful.
|
||||||
|
|
||||||
|
MS realloc is also slow.
|
||||||
|
|
||||||
|
In the usual case, we have one pleasantly small line already sitting in a
|
||||||
|
stdio buffer, and we optimize heavily for that case.
|
||||||
|
|
||||||
|
CAUTION: This routine cheats, relying on how MSVC 6 works internally.
|
||||||
|
They seem to be relatively safe cheats, but we should expect this code
|
||||||
|
to break someday.
|
||||||
|
**************************************************************************/
|
||||||
|
|
||||||
|
/* if Win32 and MS's compiler */
|
||||||
|
#if defined(MS_WIN32) && defined(_MSC_VER)
|
||||||
|
#define USE_MS_GETLINE_HACK
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef USE_MS_GETLINE_HACK
|
||||||
|
static PyObject*
|
||||||
|
ms_getline_hack(FILE *fp)
|
||||||
|
{
|
||||||
|
#define INITBUFSIZE 100
|
||||||
|
#define INCBUFSIZE 1000
|
||||||
|
PyObject* v; /* the string object result */
|
||||||
|
size_t total_v_size; /* total # chars in v's buffer */
|
||||||
|
char* pvfree; /* address of next free slot */
|
||||||
|
char* pvend; /* address one beyond last free slot */
|
||||||
|
char* p; /* temp */
|
||||||
|
|
||||||
|
if (fp->_cnt > 0) { /* HACK: "_cnt" isn't advertised */
|
||||||
|
/* optimize for normal case: something sitting in the
|
||||||
|
* buffer ready to go; avoid thread fiddling & realloc
|
||||||
|
* if possible
|
||||||
|
*/
|
||||||
|
char msbuf[INITBUFSIZE];
|
||||||
|
memset(msbuf, '\n', INITBUFSIZE);
|
||||||
|
p = fgets(msbuf, INITBUFSIZE, fp);
|
||||||
|
/* since we didn't lock the file, there's no guarantee
|
||||||
|
* anything was still in the buffer
|
||||||
|
*/
|
||||||
|
if (p == NULL) {
|
||||||
|
clearerr(fp);
|
||||||
|
if (PyErr_CheckSignals())
|
||||||
|
return NULL;
|
||||||
|
v = PyString_FromStringAndSize("", 0);
|
||||||
|
return v;
|
||||||
|
}
|
||||||
|
/* fgets read *something* */
|
||||||
|
p = memchr(msbuf, '\n', INITBUFSIZE);
|
||||||
|
if (p != NULL) {
|
||||||
|
/* Did the \n come from fgets or from us?
|
||||||
|
* Since fgets stops at the first \n, and then
|
||||||
|
* writes \0, if it's from fgets a \0 must be next.
|
||||||
|
* But if that's so, it could not have come from us,
|
||||||
|
* since the \n's we filled the buffer with have only
|
||||||
|
* more \n's to the right.
|
||||||
|
*/
|
||||||
|
pvend = msbuf + INITBUFSIZE;
|
||||||
|
if (p+1 < pvend && *(p+1) == '\0') {
|
||||||
|
/* it's from fgets: we win! */
|
||||||
|
v = PyString_FromStringAndSize(msbuf,
|
||||||
|
p - msbuf + 1);
|
||||||
|
return v;
|
||||||
|
}
|
||||||
|
/* Must be from us: fgets didn't fill the buffer
|
||||||
|
* and didn't find a newline, so it must be the
|
||||||
|
* last and newline-free line of the file.
|
||||||
|
*/
|
||||||
|
assert(p > msbuf && *(p-1) == '\0');
|
||||||
|
v = PyString_FromStringAndSize(msbuf, p - msbuf - 1);
|
||||||
|
return v;
|
||||||
|
}
|
||||||
|
/* yuck: fgets overwrote all the newlines, i.e. the entire
|
||||||
|
* buffer. So this line isn't over yet, or maybe it is but
|
||||||
|
* we're exactly at EOF; in either case, we're tired <wink>.
|
||||||
|
*/
|
||||||
|
assert(msbuf[INITBUFSIZE-1] == '\0');
|
||||||
|
total_v_size = INITBUFSIZE + INCBUFSIZE;
|
||||||
|
v = PyString_FromStringAndSize((char*)NULL,
|
||||||
|
(int)total_v_size);
|
||||||
|
if (v == NULL)
|
||||||
|
return v;
|
||||||
|
/* copy over everything except the last null byte */
|
||||||
|
memcpy(BUF(v), msbuf, INITBUFSIZE-1);
|
||||||
|
pvfree = BUF(v) + INITBUFSIZE - 1;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
/* The stream isn't ready or isn't buffered. */
|
||||||
|
v = PyString_FromStringAndSize((char*)NULL, INITBUFSIZE);
|
||||||
|
if (v == NULL)
|
||||||
|
return v;
|
||||||
|
total_v_size = INITBUFSIZE;
|
||||||
|
pvfree = BUF(v);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Keep reading stuff into v; if it ever ends successfully, break
|
||||||
|
* after setting p one beyond the end of the line.
|
||||||
|
*/
|
||||||
|
for (;;) {
|
||||||
|
size_t nfree;
|
||||||
|
|
||||||
|
Py_BEGIN_ALLOW_THREADS
|
||||||
|
pvend = BUF(v) + total_v_size;
|
||||||
|
nfree = pvend - pvfree;
|
||||||
|
memset(pvfree, '\n', nfree);
|
||||||
|
p = fgets(pvfree, nfree, fp);
|
||||||
|
Py_END_ALLOW_THREADS
|
||||||
|
|
||||||
|
if (p == NULL) {
|
||||||
|
clearerr(fp);
|
||||||
|
if (PyErr_CheckSignals()) {
|
||||||
|
Py_DECREF(v);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
p = pvfree;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
/* See the "normal case" comments above for details. */
|
||||||
|
p = memchr(pvfree, '\n', nfree);
|
||||||
|
if (p != NULL) {
|
||||||
|
if (p+1 < pvend && *(p+1) == '\0') {
|
||||||
|
/* \n came from fgets */
|
||||||
|
++p;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
/* \n came from us; last line of file, no newline */
|
||||||
|
assert(p > pvfree && *(p-1) == '\0');
|
||||||
|
--p;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
/* expand buffer and try again */
|
||||||
|
assert(*(pvend-1) == '\0');
|
||||||
|
total_v_size += INCBUFSIZE;
|
||||||
|
if (total_v_size > INT_MAX) {
|
||||||
|
PyErr_SetString(PyExc_OverflowError,
|
||||||
|
"line is longer than a Python string can hold");
|
||||||
|
Py_DECREF(v);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
if (_PyString_Resize(&v, (int)total_v_size) < 0)
|
||||||
|
return NULL;
|
||||||
|
/* overwrite the trailing null byte */
|
||||||
|
pvfree = BUF(v) + (total_v_size - INCBUFSIZE - 1);
|
||||||
|
}
|
||||||
|
if (BUF(v) + total_v_size != p)
|
||||||
|
_PyString_Resize(&v, p - BUF(v));
|
||||||
|
return v;
|
||||||
|
#undef INITBUFSIZE
|
||||||
|
#undef INCBUFSIZE
|
||||||
|
}
|
||||||
|
#endif /* ifdef USE_MS_GETLINE_HACK */
|
||||||
|
|
||||||
/* Internal routine to get a line.
|
/* Internal routine to get a line.
|
||||||
Size argument interpretation:
|
Size argument interpretation:
|
||||||
|
@ -661,6 +825,10 @@ get_line(PyFileObject *f, int n)
|
||||||
size_t n1, n2;
|
size_t n1, n2;
|
||||||
PyObject *v;
|
PyObject *v;
|
||||||
|
|
||||||
|
#ifdef USE_MS_GETLINE_HACK
|
||||||
|
if (n == 0)
|
||||||
|
return ms_getline_hack(fp);
|
||||||
|
#endif
|
||||||
n2 = n > 0 ? n : 100;
|
n2 = n > 0 ? n : 100;
|
||||||
v = PyString_FromStringAndSize((char *)NULL, n2);
|
v = PyString_FromStringAndSize((char *)NULL, n2);
|
||||||
if (v == NULL)
|
if (v == NULL)
|
||||||
|
@ -695,6 +863,7 @@ get_line(PyFileObject *f, int n)
|
||||||
if (n2 > INT_MAX) {
|
if (n2 > INT_MAX) {
|
||||||
PyErr_SetString(PyExc_OverflowError,
|
PyErr_SetString(PyExc_OverflowError,
|
||||||
"line is longer than a Python string can hold");
|
"line is longer than a Python string can hold");
|
||||||
|
Py_DECREF(v);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
if (_PyString_Resize(&v, n2) < 0)
|
if (_PyString_Resize(&v, n2) < 0)
|
||||||
|
@ -999,7 +1168,7 @@ file_writelines(PyFileObject *f, PyObject *args)
|
||||||
if (!PyString_Check(v)) {
|
if (!PyString_Check(v)) {
|
||||||
const char *buffer;
|
const char *buffer;
|
||||||
int len;
|
int len;
|
||||||
if (((f->f_binary &&
|
if (((f->f_binary &&
|
||||||
PyObject_AsReadBuffer(v,
|
PyObject_AsReadBuffer(v,
|
||||||
(const void**)&buffer,
|
(const void**)&buffer,
|
||||||
&len)) ||
|
&len)) ||
|
||||||
|
@ -1255,7 +1424,7 @@ int PyObject_AsFileDescriptor(PyObject *o)
|
||||||
Py_DECREF(meth);
|
Py_DECREF(meth);
|
||||||
if (fno == NULL)
|
if (fno == NULL)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
if (PyInt_Check(fno)) {
|
if (PyInt_Check(fno)) {
|
||||||
fd = PyInt_AsLong(fno);
|
fd = PyInt_AsLong(fno);
|
||||||
Py_DECREF(fno);
|
Py_DECREF(fno);
|
||||||
|
|
Loading…
Reference in New Issue