#5391 make mmap work exclusively with bytes

This commit is contained in:
Benjamin Peterson 2009-04-04 17:09:35 +00:00
parent f3abcc9d3c
commit e099b37428
4 changed files with 54 additions and 57 deletions

View File

@ -6,13 +6,13 @@
:synopsis: Interface to memory-mapped files for Unix and Windows.
Memory-mapped file objects behave like both strings and like file objects.
Unlike normal string objects, however, these are mutable. You can use mmap
objects in most places where strings are expected; for example, you can use
the :mod:`re` module to search through a memory-mapped file. Since they're
mutable, you can change a single character by doing ``obj[index] = 'a'``, or
change a substring by assigning to a slice: ``obj[i1:i2] = '...'``. You can
also read and write data starting at the current file position, and
Memory-mapped file objects behave like both :class:`bytes` and like file
objects. Unlike normal :class:`bytes` objects, however, these are mutable.
You can use mmap objects in most places where :class:`bytes` are expected; for
example, you can use the :mod:`re` module to search through a memory-mapped file.
Since they're mutable, you can change a single byte by doing ``obj[index] = 97``,
or change a subsequence by assigning to a slice: ``obj[i1:i2] = b'...'``.
You can also read and write data starting at the current file position, and
:meth:`seek` through the file to different positions.
A memory-mapped file is created by the :class:`mmap` constructor, which is
@ -94,21 +94,21 @@ To map anonymous memory, -1 should be passed as the fileno along with the length
# write a simple example file
with open("hello.txt", "wb") as f:
f.write("Hello Python!\n")
f.write(b"Hello Python!\n")
with open("hello.txt", "r+b") as f:
# memory-map the file, size 0 means whole file
map = mmap.mmap(f.fileno(), 0)
# read content via standard file methods
print(map.readline()) # prints "Hello Python!"
print(map.readline()) # prints b"Hello Python!\n"
# read content via slice notation
print(map[:5]) # prints "Hello"
print(map[:5]) # prints b"Hello"
# update content using slice notation;
# note that new content must have same size
map[6:] = " world!\n"
map[6:] = b" world!\n"
# ... and read again using standard file methods
map.seek(0)
print(map.readline()) # prints "Hello world!"
print(map.readline()) # prints b"Hello world!\n"
# close the map
map.close()
@ -120,7 +120,7 @@ To map anonymous memory, -1 should be passed as the fileno along with the length
import os
map = mmap.mmap(-1, 13)
map.write("Hello world!")
map.write(b"Hello world!")
pid = os.fork()
@ -140,10 +140,10 @@ To map anonymous memory, -1 should be passed as the fileno along with the length
result in an exception being raised.
.. method:: find(string[, start[, end]])
.. method:: find(sub[, start[, end]])
Returns the lowest index in the object where the substring *string* is
found, such that *string* is contained in the range [*start*, *end*].
Returns the lowest index in the object where the subsequence *sub* is
found, such that *sub* is contained in the range [*start*, *end*].
Optional arguments *start* and *end* are interpreted as in slice notation.
Returns ``-1`` on failure.
@ -172,15 +172,15 @@ To map anonymous memory, -1 should be passed as the fileno along with the length
.. method:: read(num)
Return a string containing up to *num* bytes starting from the current
file position; the file position is updated to point after the bytes that
were returned.
Return a :class:`bytes` containing up to *num* bytes starting from the
current file position; the file position is updated to point after the
bytes that were returned.
.. method:: read_byte()
Returns a string of length 1 containing the character at the current file
position, and advances the file position by 1.
Returns a byte at the current file position as an integer, and advances
the file position by 1.
.. method:: readline()
@ -196,10 +196,10 @@ To map anonymous memory, -1 should be passed as the fileno along with the length
throw a :exc:`TypeError` exception.
.. method:: rfind(string[, start[, end]])
.. method:: rfind(sub[, start[, end]])
Returns the highest index in the object where the substring *string* is
found, such that *string* is contained in the range [*start*, *end*].
Returns the highest index in the object where the subsequence *sub* is
found, such that *sub* is contained in the range [*start*, *end*].
Optional arguments *start* and *end* are interpreted as in slice notation.
Returns ``-1`` on failure.
@ -223,9 +223,9 @@ To map anonymous memory, -1 should be passed as the fileno along with the length
Returns the current position of the file pointer.
.. method:: write(string)
.. method:: write(bytes)
Write the bytes in *string* into memory at the current position of the
Write the bytes in *bytes* into memory at the current position of the
file pointer; the file position is updated to point after the bytes that
were written. If the mmap was created with :const:`ACCESS_READ`, then
writing to it will throw a :exc:`TypeError` exception.
@ -233,7 +233,7 @@ To map anonymous memory, -1 should be passed as the fileno along with the length
.. method:: write_byte(byte)
Write the single-character string *byte* into memory at the current
Write the the integer *byte* into memory at the current
position of the file pointer; the file position is advanced by ``1``. If
the mmap was created with :const:`ACCESS_READ`, then writing to it will
throw a :exc:`TypeError` exception.

View File

@ -37,7 +37,7 @@ class MmapTests(unittest.TestCase):
# Simple sanity checks
tp = str(type(m)) # SF bug 128713: segfaulted on Linux
self.assertEqual(m.find('foo'), PAGESIZE)
self.assertEqual(m.find(b'foo'), PAGESIZE)
self.assertEqual(len(m), 2*PAGESIZE)
@ -262,38 +262,38 @@ class MmapTests(unittest.TestCase):
def test_find_end(self):
# test the new 'end' parameter works as expected
f = open(TESTFN, 'w+')
data = 'one two ones'
f = open(TESTFN, 'wb+')
data = b'one two ones'
n = len(data)
f.write(data)
f.flush()
m = mmap.mmap(f.fileno(), n)
f.close()
self.assertEqual(m.find('one'), 0)
self.assertEqual(m.find('ones'), 8)
self.assertEqual(m.find('one', 0, -1), 0)
self.assertEqual(m.find('one', 1), 8)
self.assertEqual(m.find('one', 1, -1), 8)
self.assertEqual(m.find('one', 1, -2), -1)
self.assertEqual(m.find(b'one'), 0)
self.assertEqual(m.find(b'ones'), 8)
self.assertEqual(m.find(b'one', 0, -1), 0)
self.assertEqual(m.find(b'one', 1), 8)
self.assertEqual(m.find(b'one', 1, -1), 8)
self.assertEqual(m.find(b'one', 1, -2), -1)
def test_rfind(self):
# test the new 'end' parameter works as expected
f = open(TESTFN, 'w+')
data = 'one two ones'
f = open(TESTFN, 'wb+')
data = b'one two ones'
n = len(data)
f.write(data)
f.flush()
m = mmap.mmap(f.fileno(), n)
f.close()
self.assertEqual(m.rfind('one'), 8)
self.assertEqual(m.rfind('one '), 0)
self.assertEqual(m.rfind('one', 0, -1), 8)
self.assertEqual(m.rfind('one', 0, -2), 0)
self.assertEqual(m.rfind('one', 1, -1), 8)
self.assertEqual(m.rfind('one', 1, -2), -1)
self.assertEqual(m.rfind(b'one'), 8)
self.assertEqual(m.rfind(b'one '), 0)
self.assertEqual(m.rfind(b'one', 0, -1), 8)
self.assertEqual(m.rfind(b'one', 0, -2), 0)
self.assertEqual(m.rfind(b'one', 1, -1), 8)
self.assertEqual(m.rfind(b'one', 1, -2), -1)
def test_double_close(self):
@ -506,21 +506,15 @@ class MmapTests(unittest.TestCase):
# Test write_byte()
for i in range(len(data)):
self.assertEquals(m.tell(), i)
m.write_byte(data[i:i+1])
m.write_byte(data[i])
self.assertEquals(m.tell(), i+1)
self.assertRaises(ValueError, m.write_byte, b"x")
self.assertRaises(ValueError, m.write_byte, b"x"[0])
self.assertEquals(m[:], data)
# Test read_byte()
m.seek(0)
for i in range(len(data)):
self.assertEquals(m.tell(), i)
# XXX: Disable this test for now because it's not clear
# which type of object m.read_byte returns. Currently, it
# returns 1-length str (unicode).
if 0:
self.assertEquals(m.read_byte(), data[i:i+1])
else:
m.read_byte()
self.assertEquals(m.read_byte(), data[i])
self.assertEquals(m.tell(), i+1)
self.assertRaises(ValueError, m.read_byte)
# Test read()

View File

@ -11,6 +11,7 @@ What's New in Python 3.1 alpha 2?
Core and Builtins
-----------------
- Implement PEP 378, Format Specifier for Thousands Separator, for
integers.
@ -128,6 +129,8 @@ Library
Extension Modules
-----------------
- Issue #5391: mmap now deals exclusively with bytes.
- Issue #5463: In struct module, remove deprecated overflow wrapping
when packing an integer: struct.pack('=L', -1) now raises
struct.error instead of returning b'\xff\xff\xff\xff'. The

View File

@ -204,7 +204,7 @@ mmap_read_byte_method(mmap_object *self,
if (self->pos < self->size) {
char value = self->data[self->pos];
self->pos += 1;
return Py_BuildValue("c", value);
return Py_BuildValue("b", value);
} else {
PyErr_SetString(PyExc_ValueError, "read byte out of range");
return NULL;
@ -264,7 +264,7 @@ mmap_gfind(mmap_object *self,
Py_ssize_t len;
CHECK_VALID(NULL);
if (!PyArg_ParseTuple(args, reverse ? "s#|nn:rfind" : "s#|nn:find",
if (!PyArg_ParseTuple(args, reverse ? "y#|nn:rfind" : "y#|nn:find",
&needle, &len, &start, &end)) {
return NULL;
} else {
@ -348,7 +348,7 @@ mmap_write_method(mmap_object *self,
char *data;
CHECK_VALID(NULL);
if (!PyArg_ParseTuple(args, "s#:write", &data, &length))
if (!PyArg_ParseTuple(args, "y#:write", &data, &length))
return(NULL);
if (!is_writable(self))
@ -371,7 +371,7 @@ mmap_write_byte_method(mmap_object *self,
char value;
CHECK_VALID(NULL);
if (!PyArg_ParseTuple(args, "c:write_byte", &value))
if (!PyArg_ParseTuple(args, "b:write_byte", &value))
return(NULL);
if (!is_writable(self))