From e099b37428ab72cf70c745e941b9767fcad8eaa3 Mon Sep 17 00:00:00 2001 From: Benjamin Peterson Date: Sat, 4 Apr 2009 17:09:35 +0000 Subject: [PATCH] #5391 make mmap work exclusively with bytes --- Doc/library/mmap.rst | 54 +++++++++++++++++++++---------------------- Lib/test/test_mmap.py | 46 ++++++++++++++++-------------------- Misc/NEWS | 3 +++ Modules/mmapmodule.c | 8 +++---- 4 files changed, 54 insertions(+), 57 deletions(-) diff --git a/Doc/library/mmap.rst b/Doc/library/mmap.rst index ceca29a0102..c3dba43446d 100644 --- a/Doc/library/mmap.rst +++ b/Doc/library/mmap.rst @@ -6,13 +6,13 @@ :synopsis: Interface to memory-mapped files for Unix and Windows. -Memory-mapped file objects behave like both strings and like file objects. -Unlike normal string objects, however, these are mutable. You can use mmap -objects in most places where strings are expected; for example, you can use -the :mod:`re` module to search through a memory-mapped file. Since they're -mutable, you can change a single character by doing ``obj[index] = 'a'``, or -change a substring by assigning to a slice: ``obj[i1:i2] = '...'``. You can -also read and write data starting at the current file position, and +Memory-mapped file objects behave like both :class:`bytes` and like file +objects. Unlike normal :class:`bytes` objects, however, these are mutable. +You can use mmap objects in most places where :class:`bytes` are expected; for +example, you can use the :mod:`re` module to search through a memory-mapped file. +Since they're mutable, you can change a single byte by doing ``obj[index] = 97``, +or change a subsequence by assigning to a slice: ``obj[i1:i2] = b'...'``. +You can also read and write data starting at the current file position, and :meth:`seek` through the file to different positions. A memory-mapped file is created by the :class:`mmap` constructor, which is @@ -94,21 +94,21 @@ To map anonymous memory, -1 should be passed as the fileno along with the length # write a simple example file with open("hello.txt", "wb") as f: - f.write("Hello Python!\n") + f.write(b"Hello Python!\n") with open("hello.txt", "r+b") as f: # memory-map the file, size 0 means whole file map = mmap.mmap(f.fileno(), 0) # read content via standard file methods - print(map.readline()) # prints "Hello Python!" + print(map.readline()) # prints b"Hello Python!\n" # read content via slice notation - print(map[:5]) # prints "Hello" + print(map[:5]) # prints b"Hello" # update content using slice notation; # note that new content must have same size - map[6:] = " world!\n" + map[6:] = b" world!\n" # ... and read again using standard file methods map.seek(0) - print(map.readline()) # prints "Hello world!" + print(map.readline()) # prints b"Hello world!\n" # close the map map.close() @@ -120,7 +120,7 @@ To map anonymous memory, -1 should be passed as the fileno along with the length import os map = mmap.mmap(-1, 13) - map.write("Hello world!") + map.write(b"Hello world!") pid = os.fork() @@ -140,10 +140,10 @@ To map anonymous memory, -1 should be passed as the fileno along with the length result in an exception being raised. - .. method:: find(string[, start[, end]]) + .. method:: find(sub[, start[, end]]) - Returns the lowest index in the object where the substring *string* is - found, such that *string* is contained in the range [*start*, *end*]. + Returns the lowest index in the object where the subsequence *sub* is + found, such that *sub* is contained in the range [*start*, *end*]. Optional arguments *start* and *end* are interpreted as in slice notation. Returns ``-1`` on failure. @@ -172,15 +172,15 @@ To map anonymous memory, -1 should be passed as the fileno along with the length .. method:: read(num) - Return a string containing up to *num* bytes starting from the current - file position; the file position is updated to point after the bytes that - were returned. + Return a :class:`bytes` containing up to *num* bytes starting from the + current file position; the file position is updated to point after the + bytes that were returned. .. method:: read_byte() - Returns a string of length 1 containing the character at the current file - position, and advances the file position by 1. + Returns a byte at the current file position as an integer, and advances + the file position by 1. .. method:: readline() @@ -196,10 +196,10 @@ To map anonymous memory, -1 should be passed as the fileno along with the length throw a :exc:`TypeError` exception. - .. method:: rfind(string[, start[, end]]) + .. method:: rfind(sub[, start[, end]]) - Returns the highest index in the object where the substring *string* is - found, such that *string* is contained in the range [*start*, *end*]. + Returns the highest index in the object where the subsequence *sub* is + found, such that *sub* is contained in the range [*start*, *end*]. Optional arguments *start* and *end* are interpreted as in slice notation. Returns ``-1`` on failure. @@ -223,9 +223,9 @@ To map anonymous memory, -1 should be passed as the fileno along with the length Returns the current position of the file pointer. - .. method:: write(string) + .. method:: write(bytes) - Write the bytes in *string* into memory at the current position of the + Write the bytes in *bytes* into memory at the current position of the file pointer; the file position is updated to point after the bytes that were written. If the mmap was created with :const:`ACCESS_READ`, then writing to it will throw a :exc:`TypeError` exception. @@ -233,7 +233,7 @@ To map anonymous memory, -1 should be passed as the fileno along with the length .. method:: write_byte(byte) - Write the single-character string *byte* into memory at the current + Write the the integer *byte* into memory at the current position of the file pointer; the file position is advanced by ``1``. If the mmap was created with :const:`ACCESS_READ`, then writing to it will throw a :exc:`TypeError` exception. diff --git a/Lib/test/test_mmap.py b/Lib/test/test_mmap.py index 99cc63f077d..f3e28ccac9e 100644 --- a/Lib/test/test_mmap.py +++ b/Lib/test/test_mmap.py @@ -37,7 +37,7 @@ class MmapTests(unittest.TestCase): # Simple sanity checks tp = str(type(m)) # SF bug 128713: segfaulted on Linux - self.assertEqual(m.find('foo'), PAGESIZE) + self.assertEqual(m.find(b'foo'), PAGESIZE) self.assertEqual(len(m), 2*PAGESIZE) @@ -262,38 +262,38 @@ class MmapTests(unittest.TestCase): def test_find_end(self): # test the new 'end' parameter works as expected - f = open(TESTFN, 'w+') - data = 'one two ones' + f = open(TESTFN, 'wb+') + data = b'one two ones' n = len(data) f.write(data) f.flush() m = mmap.mmap(f.fileno(), n) f.close() - self.assertEqual(m.find('one'), 0) - self.assertEqual(m.find('ones'), 8) - self.assertEqual(m.find('one', 0, -1), 0) - self.assertEqual(m.find('one', 1), 8) - self.assertEqual(m.find('one', 1, -1), 8) - self.assertEqual(m.find('one', 1, -2), -1) + self.assertEqual(m.find(b'one'), 0) + self.assertEqual(m.find(b'ones'), 8) + self.assertEqual(m.find(b'one', 0, -1), 0) + self.assertEqual(m.find(b'one', 1), 8) + self.assertEqual(m.find(b'one', 1, -1), 8) + self.assertEqual(m.find(b'one', 1, -2), -1) def test_rfind(self): # test the new 'end' parameter works as expected - f = open(TESTFN, 'w+') - data = 'one two ones' + f = open(TESTFN, 'wb+') + data = b'one two ones' n = len(data) f.write(data) f.flush() m = mmap.mmap(f.fileno(), n) f.close() - self.assertEqual(m.rfind('one'), 8) - self.assertEqual(m.rfind('one '), 0) - self.assertEqual(m.rfind('one', 0, -1), 8) - self.assertEqual(m.rfind('one', 0, -2), 0) - self.assertEqual(m.rfind('one', 1, -1), 8) - self.assertEqual(m.rfind('one', 1, -2), -1) + self.assertEqual(m.rfind(b'one'), 8) + self.assertEqual(m.rfind(b'one '), 0) + self.assertEqual(m.rfind(b'one', 0, -1), 8) + self.assertEqual(m.rfind(b'one', 0, -2), 0) + self.assertEqual(m.rfind(b'one', 1, -1), 8) + self.assertEqual(m.rfind(b'one', 1, -2), -1) def test_double_close(self): @@ -506,21 +506,15 @@ class MmapTests(unittest.TestCase): # Test write_byte() for i in range(len(data)): self.assertEquals(m.tell(), i) - m.write_byte(data[i:i+1]) + m.write_byte(data[i]) self.assertEquals(m.tell(), i+1) - self.assertRaises(ValueError, m.write_byte, b"x") + self.assertRaises(ValueError, m.write_byte, b"x"[0]) self.assertEquals(m[:], data) # Test read_byte() m.seek(0) for i in range(len(data)): self.assertEquals(m.tell(), i) - # XXX: Disable this test for now because it's not clear - # which type of object m.read_byte returns. Currently, it - # returns 1-length str (unicode). - if 0: - self.assertEquals(m.read_byte(), data[i:i+1]) - else: - m.read_byte() + self.assertEquals(m.read_byte(), data[i]) self.assertEquals(m.tell(), i+1) self.assertRaises(ValueError, m.read_byte) # Test read() diff --git a/Misc/NEWS b/Misc/NEWS index 0774988c995..b567ad14d37 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -11,6 +11,7 @@ What's New in Python 3.1 alpha 2? Core and Builtins ----------------- + - Implement PEP 378, Format Specifier for Thousands Separator, for integers. @@ -128,6 +129,8 @@ Library Extension Modules ----------------- +- Issue #5391: mmap now deals exclusively with bytes. + - Issue #5463: In struct module, remove deprecated overflow wrapping when packing an integer: struct.pack('=L', -1) now raises struct.error instead of returning b'\xff\xff\xff\xff'. The diff --git a/Modules/mmapmodule.c b/Modules/mmapmodule.c index b9c46cd919a..299eafdf234 100644 --- a/Modules/mmapmodule.c +++ b/Modules/mmapmodule.c @@ -204,7 +204,7 @@ mmap_read_byte_method(mmap_object *self, if (self->pos < self->size) { char value = self->data[self->pos]; self->pos += 1; - return Py_BuildValue("c", value); + return Py_BuildValue("b", value); } else { PyErr_SetString(PyExc_ValueError, "read byte out of range"); return NULL; @@ -264,7 +264,7 @@ mmap_gfind(mmap_object *self, Py_ssize_t len; CHECK_VALID(NULL); - if (!PyArg_ParseTuple(args, reverse ? "s#|nn:rfind" : "s#|nn:find", + if (!PyArg_ParseTuple(args, reverse ? "y#|nn:rfind" : "y#|nn:find", &needle, &len, &start, &end)) { return NULL; } else { @@ -348,7 +348,7 @@ mmap_write_method(mmap_object *self, char *data; CHECK_VALID(NULL); - if (!PyArg_ParseTuple(args, "s#:write", &data, &length)) + if (!PyArg_ParseTuple(args, "y#:write", &data, &length)) return(NULL); if (!is_writable(self)) @@ -371,7 +371,7 @@ mmap_write_byte_method(mmap_object *self, char value; CHECK_VALID(NULL); - if (!PyArg_ParseTuple(args, "c:write_byte", &value)) + if (!PyArg_ParseTuple(args, "b:write_byte", &value)) return(NULL); if (!is_writable(self))