diff --git a/Lib/test/test_file.py b/Lib/test/test_file.py index bde3202e8c0..69501dfe089 100644 --- a/Lib/test/test_file.py +++ b/Lib/test/test_file.py @@ -3,7 +3,7 @@ import os from array import array from weakref import proxy -from test.test_support import verify, TESTFN, TestFailed +from test.test_support import verify, TESTFN, TestFailed, findfile from UserList import UserList # verify weak references @@ -228,3 +228,113 @@ try: bug801631() finally: os.unlink(TESTFN) + +# Test the complex interaction when mixing file-iteration and the various +# read* methods. Ostensibly, the mixture could just be tested to work +# when it should work according to the Python language, instead of fail +# when it should fail according to the current CPython implementation. +# People don't always program Python the way they should, though, and the +# implemenation might change in subtle ways, so we explicitly test for +# errors, too; the test will just have to be updated when the +# implementation changes. +dataoffset = 16384 +filler = "ham\n" +assert not dataoffset % len(filler), \ + "dataoffset must be multiple of len(filler)" +nchunks = dataoffset // len(filler) +testlines = [ + "spam, spam and eggs\n", + "eggs, spam, ham and spam\n", + "saussages, spam, spam and eggs\n", + "spam, ham, spam and eggs\n", + "spam, spam, spam, spam, spam, ham, spam\n", + "wonderful spaaaaaam.\n" +] +methods = [("readline", ()), ("read", ()), ("readlines", ()), + ("readinto", (array("c", " "*100),))] + +try: + # Prepare the testfile + bag = open(TESTFN, "w") + bag.write(filler * nchunks) + bag.writelines(testlines) + bag.close() + # Test for appropriate errors mixing read* and iteration + for methodname, args in methods: + f = open(TESTFN) + if f.next() != filler: + raise TestFailed, "Broken testfile" + meth = getattr(f, methodname) + try: + meth(*args) + except ValueError: + pass + else: + raise TestFailed("%s%r after next() didn't raise ValueError" % + (methodname, args)) + f.close() + + # Test to see if harmless (by accident) mixing of read* and iteration + # still works. This depends on the size of the internal iteration + # buffer (currently 8192,) but we can test it in a flexible manner. + # Each line in the bag o' ham is 4 bytes ("h", "a", "m", "\n"), so + # 4096 lines of that should get us exactly on the buffer boundary for + # any power-of-2 buffersize between 4 and 16384 (inclusive). + f = open(TESTFN) + for i in range(nchunks): + f.next() + testline = testlines.pop(0) + try: + line = f.readline() + except ValueError: + raise TestFailed("readline() after next() with supposedly empty " + "iteration-buffer failed anyway") + if line != testline: + raise TestFailed("readline() after next() with empty buffer " + "failed. Got %r, expected %r" % (line, testline)) + testline = testlines.pop(0) + buf = array("c", "\x00" * len(testline)) + try: + f.readinto(buf) + except ValueError: + raise TestFailed("readinto() after next() with supposedly empty " + "iteration-buffer failed anyway") + line = buf.tostring() + if line != testline: + raise TestFailed("readinto() after next() with empty buffer " + "failed. Got %r, expected %r" % (line, testline)) + + testline = testlines.pop(0) + try: + line = f.read(len(testline)) + except ValueError: + raise TestFailed("read() after next() with supposedly empty " + "iteration-buffer failed anyway") + if line != testline: + raise TestFailed("read() after next() with empty buffer " + "failed. Got %r, expected %r" % (line, testline)) + try: + lines = f.readlines() + except ValueError: + raise TestFailed("readlines() after next() with supposedly empty " + "iteration-buffer failed anyway") + if lines != testlines: + raise TestFailed("readlines() after next() with empty buffer " + "failed. Got %r, expected %r" % (line, testline)) + # Reading after iteration hit EOF shouldn't hurt either + f = open(TESTFN) + for line in f: + pass + try: + f.readline() + f.readinto(buf) + f.read() + f.readlines() + except ValueError: + raise TestFailed("read* failed after next() consumed file") +finally: + # Bare 'except' so as not to mask errors in the test + try: + os.unlink(TESTFN) + except: + pass diff --git a/Objects/fileobject.c b/Objects/fileobject.c index b34dd525926..6faabc3b79c 100644 --- a/Objects/fileobject.c +++ b/Objects/fileobject.c @@ -344,6 +344,17 @@ err_closed(void) return NULL; } +/* Refuse regular file I/O if there's data in the iteration-buffer. + * Mixing them would cause data to arrive out of order, as the read* + * methods don't use the iteration buffer. */ +static PyObject * +err_iterbuffered(void) +{ + PyErr_SetString(PyExc_ValueError, + "Mixing iteration and read methods would lose data"); + return NULL; +} + static void drop_readahead(PyFileObject *); /* Methods */ @@ -795,6 +806,11 @@ file_read(PyFileObject *f, PyObject *args) if (f->f_fp == NULL) return err_closed(); + /* refuse to mix with f.next() */ + if (f->f_buf != NULL && + (f->f_bufend - f->f_bufptr) > 0 && + f->f_buf[0] != '\0') + return err_iterbuffered(); if (!PyArg_ParseTuple(args, "|l:read", &bytesrequested)) return NULL; if (bytesrequested < 0) @@ -858,6 +874,11 @@ file_readinto(PyFileObject *f, PyObject *args) if (f->f_fp == NULL) return err_closed(); + /* refuse to mix with f.next() */ + if (f->f_buf != NULL && + (f->f_bufend - f->f_bufptr) > 0 && + f->f_buf[0] != '\0') + return err_iterbuffered(); if (!PyArg_ParseTuple(args, "w#", &ptr, &ntodo)) return NULL; ndone = 0; @@ -1211,9 +1232,15 @@ PyFile_GetLine(PyObject *f, int n) } if (PyFile_Check(f)) { - if (((PyFileObject*)f)->f_fp == NULL) + PyFileObject *fo = (PyFileObject *)f; + if (fo->f_fp == NULL) return err_closed(); - result = get_line((PyFileObject *)f, n); + /* refuse to mix with f.next() */ + if (fo->f_buf != NULL && + (fo->f_bufend - fo->f_bufptr) > 0 && + fo->f_buf[0] != '\0') + return err_iterbuffered(); + result = get_line(fo, n); } else { PyObject *reader; @@ -1296,6 +1323,11 @@ file_readline(PyFileObject *f, PyObject *args) if (f->f_fp == NULL) return err_closed(); + /* refuse to mix with f.next() */ + if (f->f_buf != NULL && + (f->f_bufend - f->f_bufptr) > 0 && + f->f_buf[0] != '\0') + return err_iterbuffered(); if (!PyArg_ParseTuple(args, "|i:readline", &n)) return NULL; if (n == 0) @@ -1324,6 +1356,11 @@ file_readlines(PyFileObject *f, PyObject *args) if (f->f_fp == NULL) return err_closed(); + /* refuse to mix with f.next() */ + if (f->f_buf != NULL && + (f->f_bufend - f->f_bufptr) > 0 && + f->f_buf[0] != '\0') + return err_iterbuffered(); if (!PyArg_ParseTuple(args, "|l:readlines", &sizehint)) return NULL; if ((list = PyList_New(0)) == NULL)