gh-69093: Add indexing and slicing support to sqlite3.Blob (#91599)

Authored-by: Aviv Palivoda <palaviv@gmail.com>
Co-authored-by: Erlend E. Aasland <erlend.aasland@innova.no>
This commit is contained in:
Erlend Egeberg Aasland 2022-04-22 03:45:16 +02:00 committed by GitHub
parent 1317b70f89
commit 29afb7d2ef
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 349 additions and 16 deletions

View File

@ -2,15 +2,18 @@ import sqlite3
con = sqlite3.connect(":memory:")
con.execute("create table test(blob_col blob)")
con.execute("insert into test(blob_col) values (zeroblob(10))")
con.execute("insert into test(blob_col) values (zeroblob(13))")
# Write to our blob, using two write operations:
with con.blobopen("test", "blob_col", 1) as blob:
blob.write(b"Hello")
blob.write(b"World")
blob.write(b"hello, ")
blob.write(b"world.")
# Modify the first and last bytes of our blob
blob[0] = b"H"
blob[-1] = b"!"
# Read the contents of our blob
with con.blobopen("test", "blob_col", 1) as blob:
greeting = blob.read()
print(greeting) # outputs "b'HelloWorld'"
print(greeting) # outputs "b'Hello, world!'"

View File

@ -1051,9 +1051,10 @@ Blob Objects
.. class:: Blob
A :class:`Blob` instance is a :term:`file-like object` that can read and write
data in an SQLite :abbr:`BLOB (Binary Large OBject)`. Call ``len(blob)`` to
get the size (number of bytes) of the blob.
A :class:`Blob` instance is a :term:`file-like object`
that can read and write data in an SQLite :abbr:`BLOB (Binary Large OBject)`.
Call :func:`len(blob) <len>` to get the size (number of bytes) of the blob.
Use indices and :term:`slices <slice>` for direct access to the blob data.
Use the :class:`Blob` as a :term:`context manager` to ensure that the blob
handle is closed after use.

View File

@ -33,7 +33,7 @@ from test.support import (
check_disallow_instantiation,
threading_helper,
)
from _testcapi import INT_MAX
from _testcapi import INT_MAX, ULLONG_MAX
from os import SEEK_SET, SEEK_CUR, SEEK_END
from test.support.os_helper import TESTFN, unlink, temp_dir
@ -1138,6 +1138,13 @@ class BlobTests(unittest.TestCase):
with self.assertRaisesRegex(ValueError, "data longer than blob"):
self.blob.write(b"a" * 1000)
self.blob.seek(0, SEEK_SET)
n = len(self.blob)
self.blob.write(b"a" * (n-1))
self.blob.write(b"a")
with self.assertRaisesRegex(ValueError, "data longer than blob"):
self.blob.write(b"a")
def test_blob_write_error_row_changed(self):
self.cx.execute("update test set b='aaaa' where rowid=1")
with self.assertRaises(sqlite.OperationalError):
@ -1162,12 +1169,127 @@ class BlobTests(unittest.TestCase):
with self.assertRaisesRegex(sqlite.OperationalError, regex):
self.cx.blobopen(*args, **kwds)
def test_blob_length(self):
self.assertEqual(len(self.blob), 50)
def test_blob_get_item(self):
self.assertEqual(self.blob[5], b"b")
self.assertEqual(self.blob[6], b"l")
self.assertEqual(self.blob[7], b"o")
self.assertEqual(self.blob[8], b"b")
self.assertEqual(self.blob[-1], b"!")
def test_blob_set_item(self):
self.blob[0] = b"b"
expected = b"b" + self.data[1:]
actual = self.cx.execute("select b from test").fetchone()[0]
self.assertEqual(actual, expected)
def test_blob_set_item_with_offset(self):
self.blob.seek(0, SEEK_END)
self.assertEqual(self.blob.read(), b"") # verify that we're at EOB
self.blob[0] = b"T"
self.blob[-1] = b"."
self.blob.seek(0, SEEK_SET)
expected = b"This blob data string is exactly fifty bytes long."
self.assertEqual(self.blob.read(), expected)
def test_blob_set_buffer_object(self):
from array import array
self.blob[0] = memoryview(b"1")
self.assertEqual(self.blob[0], b"1")
self.blob[1] = bytearray(b"2")
self.assertEqual(self.blob[1], b"2")
self.blob[2] = array("b", [4])
self.assertEqual(self.blob[2], b"\x04")
self.blob[0:5] = memoryview(b"12345")
self.assertEqual(self.blob[0:5], b"12345")
self.blob[0:5] = bytearray(b"23456")
self.assertEqual(self.blob[0:5], b"23456")
self.blob[0:5] = array("b", [1, 2, 3, 4, 5])
self.assertEqual(self.blob[0:5], b"\x01\x02\x03\x04\x05")
def test_blob_set_item_negative_index(self):
self.blob[-1] = b"z"
self.assertEqual(self.blob[-1], b"z")
def test_blob_get_slice(self):
self.assertEqual(self.blob[5:14], b"blob data")
def test_blob_get_empty_slice(self):
self.assertEqual(self.blob[5:5], b"")
def test_blob_get_slice_negative_index(self):
self.assertEqual(self.blob[5:-5], self.data[5:-5])
def test_blob_get_slice_with_skip(self):
self.assertEqual(self.blob[0:10:2], b"ti lb")
def test_blob_set_slice(self):
self.blob[0:5] = b"12345"
expected = b"12345" + self.data[5:]
actual = self.cx.execute("select b from test").fetchone()[0]
self.assertEqual(actual, expected)
def test_blob_set_empty_slice(self):
self.blob[0:0] = b""
self.assertEqual(self.blob[:], self.data)
def test_blob_set_slice_with_skip(self):
self.blob[0:10:2] = b"12345"
actual = self.cx.execute("select b from test").fetchone()[0]
expected = b"1h2s3b4o5 " + self.data[10:]
self.assertEqual(actual, expected)
def test_blob_mapping_invalid_index_type(self):
msg = "indices must be integers"
with self.assertRaisesRegex(TypeError, msg):
self.blob[5:5.5]
with self.assertRaisesRegex(TypeError, msg):
self.blob[1.5]
with self.assertRaisesRegex(TypeError, msg):
self.blob["a"] = b"b"
def test_blob_get_item_error(self):
dataset = [len(self.blob), 105, -105]
for idx in dataset:
with self.subTest(idx=idx):
with self.assertRaisesRegex(IndexError, "index out of range"):
self.blob[idx]
with self.assertRaisesRegex(IndexError, "cannot fit 'int'"):
self.blob[ULLONG_MAX]
def test_blob_set_item_error(self):
with self.assertRaisesRegex(ValueError, "must be a single byte"):
self.blob[0] = b"multiple"
with self.assertRaisesRegex(TypeError, "doesn't support.*deletion"):
del self.blob[0]
with self.assertRaisesRegex(IndexError, "Blob index out of range"):
self.blob[1000] = b"a"
def test_blob_set_slice_error(self):
with self.assertRaisesRegex(IndexError, "wrong size"):
self.blob[5:10] = b"a"
with self.assertRaisesRegex(IndexError, "wrong size"):
self.blob[5:10] = b"a" * 1000
with self.assertRaisesRegex(TypeError, "doesn't support.*deletion"):
del self.blob[5:10]
with self.assertRaisesRegex(ValueError, "step cannot be zero"):
self.blob[5:10:0] = b"12345"
with self.assertRaises(BufferError):
self.blob[5:10] = memoryview(b"abcde")[::2]
def test_blob_sequence_not_supported(self):
with self.assertRaises(TypeError):
with self.assertRaisesRegex(TypeError, "unsupported operand"):
self.blob + self.blob
with self.assertRaises(TypeError):
with self.assertRaisesRegex(TypeError, "unsupported operand"):
self.blob * 5
with self.assertRaises(TypeError):
with self.assertRaisesRegex(TypeError, "is not iterable"):
b"a" in self.blob
def test_blob_context_manager(self):
@ -1209,6 +1331,14 @@ class BlobTests(unittest.TestCase):
blob.__enter__()
with self.assertRaisesRegex(sqlite.ProgrammingError, msg):
blob.__exit__(None, None, None)
with self.assertRaisesRegex(sqlite.ProgrammingError, msg):
len(blob)
with self.assertRaisesRegex(sqlite.ProgrammingError, msg):
blob[0]
with self.assertRaisesRegex(sqlite.ProgrammingError, msg):
blob[0:1]
with self.assertRaisesRegex(sqlite.ProgrammingError, msg):
blob[0] = b""
def test_blob_closed_db_read(self):
with memory_database() as cx:

View File

@ -0,0 +1,2 @@
Add indexing and slicing support to :class:`sqlite3.Blob`. Patch by Aviv Palivoda
and Erlend E. Aasland.

View File

@ -120,8 +120,11 @@ blob_seterror(pysqlite_Blob *self, int rc)
}
static PyObject *
inner_read(pysqlite_Blob *self, int length, int offset)
inner_read(pysqlite_Blob *self, Py_ssize_t length, Py_ssize_t offset)
{
assert(length <= sqlite3_blob_bytes(self->blob));
assert(offset <= sqlite3_blob_bytes(self->blob));
PyObject *buffer = PyBytes_FromStringAndSize(NULL, length);
if (buffer == NULL) {
return NULL;
@ -130,7 +133,7 @@ inner_read(pysqlite_Blob *self, int length, int offset)
char *raw_buffer = PyBytes_AS_STRING(buffer);
int rc;
Py_BEGIN_ALLOW_THREADS
rc = sqlite3_blob_read(self->blob, raw_buffer, length, offset);
rc = sqlite3_blob_read(self->blob, raw_buffer, (int)length, (int)offset);
Py_END_ALLOW_THREADS
if (rc != SQLITE_OK) {
@ -181,17 +184,20 @@ blob_read_impl(pysqlite_Blob *self, int length)
};
static int
inner_write(pysqlite_Blob *self, const void *buf, Py_ssize_t len, int offset)
inner_write(pysqlite_Blob *self, const void *buf, Py_ssize_t len,
Py_ssize_t offset)
{
int remaining_len = sqlite3_blob_bytes(self->blob) - self->offset;
Py_ssize_t blob_len = sqlite3_blob_bytes(self->blob);
Py_ssize_t remaining_len = blob_len - offset;
if (len > remaining_len) {
PyErr_SetString(PyExc_ValueError, "data longer than blob length");
return -1;
}
assert(offset <= blob_len);
int rc;
Py_BEGIN_ALLOW_THREADS
rc = sqlite3_blob_write(self->blob, buf, (int)len, offset);
rc = sqlite3_blob_write(self->blob, buf, (int)len, (int)offset);
Py_END_ALLOW_THREADS
if (rc != SQLITE_OK) {
@ -347,6 +353,192 @@ blob_exit_impl(pysqlite_Blob *self, PyObject *type, PyObject *val,
Py_RETURN_FALSE;
}
static Py_ssize_t
blob_length(pysqlite_Blob *self)
{
if (!check_blob(self)) {
return -1;
}
return sqlite3_blob_bytes(self->blob);
};
static Py_ssize_t
get_subscript_index(pysqlite_Blob *self, PyObject *item)
{
Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
if (i == -1 && PyErr_Occurred()) {
return -1;
}
int blob_len = sqlite3_blob_bytes(self->blob);
if (i < 0) {
i += blob_len;
}
if (i < 0 || i >= blob_len) {
PyErr_SetString(PyExc_IndexError, "Blob index out of range");
return -1;
}
return i;
}
static PyObject *
subscript_index(pysqlite_Blob *self, PyObject *item)
{
Py_ssize_t i = get_subscript_index(self, item);
if (i < 0) {
return NULL;
}
return inner_read(self, 1, i);
}
static int
get_slice_info(pysqlite_Blob *self, PyObject *item, Py_ssize_t *start,
Py_ssize_t *stop, Py_ssize_t *step, Py_ssize_t *slicelen)
{
if (PySlice_Unpack(item, start, stop, step) < 0) {
return -1;
}
int len = sqlite3_blob_bytes(self->blob);
*slicelen = PySlice_AdjustIndices(len, start, stop, *step);
return 0;
}
static PyObject *
subscript_slice(pysqlite_Blob *self, PyObject *item)
{
Py_ssize_t start, stop, step, len;
if (get_slice_info(self, item, &start, &stop, &step, &len) < 0) {
return NULL;
}
if (step == 1) {
return inner_read(self, len, start);
}
PyObject *blob = inner_read(self, stop - start, start);
if (blob == NULL) {
return NULL;
}
PyObject *result = PyBytes_FromStringAndSize(NULL, len);
if (result != NULL) {
char *blob_buf = PyBytes_AS_STRING(blob);
char *res_buf = PyBytes_AS_STRING(result);
for (Py_ssize_t i = 0, j = 0; i < len; i++, j += step) {
res_buf[i] = blob_buf[j];
}
Py_DECREF(blob);
}
return result;
}
static PyObject *
blob_subscript(pysqlite_Blob *self, PyObject *item)
{
if (!check_blob(self)) {
return NULL;
}
if (PyIndex_Check(item)) {
return subscript_index(self, item);
}
if (PySlice_Check(item)) {
return subscript_slice(self, item);
}
PyErr_SetString(PyExc_TypeError, "Blob indices must be integers");
return NULL;
}
static int
ass_subscript_index(pysqlite_Blob *self, PyObject *item, PyObject *value)
{
if (value == NULL) {
PyErr_SetString(PyExc_TypeError,
"Blob doesn't support item deletion");
return -1;
}
Py_ssize_t i = get_subscript_index(self, item);
if (i < 0) {
return -1;
}
Py_buffer vbuf;
if (PyObject_GetBuffer(value, &vbuf, PyBUF_SIMPLE) < 0) {
return -1;
}
int rc = -1;
if (vbuf.len != 1) {
PyErr_SetString(PyExc_ValueError, "Blob assignment must be a single byte");
}
else {
rc = inner_write(self, (const char *)vbuf.buf, 1, i);
}
PyBuffer_Release(&vbuf);
return rc;
}
static int
ass_subscript_slice(pysqlite_Blob *self, PyObject *item, PyObject *value)
{
if (value == NULL) {
PyErr_SetString(PyExc_TypeError,
"Blob doesn't support slice deletion");
return -1;
}
Py_ssize_t start, stop, step, len;
if (get_slice_info(self, item, &start, &stop, &step, &len) < 0) {
return -1;
}
if (len == 0) {
return 0;
}
Py_buffer vbuf;
if (PyObject_GetBuffer(value, &vbuf, PyBUF_SIMPLE) < 0) {
return -1;
}
int rc = -1;
if (vbuf.len != len) {
PyErr_SetString(PyExc_IndexError,
"Blob slice assignment is wrong size");
}
else if (step == 1) {
rc = inner_write(self, vbuf.buf, len, start);
}
else {
PyObject *blob_bytes = inner_read(self, stop - start, start);
if (blob_bytes != NULL) {
char *blob_buf = PyBytes_AS_STRING(blob_bytes);
for (Py_ssize_t i = 0, j = 0; i < len; i++, j += step) {
blob_buf[j] = ((char *)vbuf.buf)[i];
}
rc = inner_write(self, blob_buf, stop - start, start);
Py_DECREF(blob_bytes);
}
}
PyBuffer_Release(&vbuf);
return rc;
}
static int
blob_ass_subscript(pysqlite_Blob *self, PyObject *item, PyObject *value)
{
if (!check_blob(self)) {
return -1;
}
if (PyIndex_Check(item)) {
return ass_subscript_index(self, item, value);
}
if (PySlice_Check(item)) {
return ass_subscript_slice(self, item, value);
}
PyErr_SetString(PyExc_TypeError, "Blob indices must be integers");
return -1;
}
static PyMethodDef blob_methods[] = {
BLOB_CLOSE_METHODDEF
@ -370,6 +562,11 @@ static PyType_Slot blob_slots[] = {
{Py_tp_clear, blob_clear},
{Py_tp_methods, blob_methods},
{Py_tp_members, blob_members},
// Mapping protocol
{Py_mp_length, blob_length},
{Py_mp_subscript, blob_subscript},
{Py_mp_ass_subscript, blob_ass_subscript},
{0, NULL},
};