From 6252e10ed906eb419a75b310f7c0d6696a4eeb46 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Wed, 23 May 2007 20:51:02 +0000 Subject: [PATCH] Make gdbm and dumbdbm use byte strings. Updated their tests. --- Lib/dumbdbm.py | 29 ++++++++++++++------------- Lib/test/test_anydbm.py | 16 +++++++-------- Lib/test/test_dumbdbm.py | 43 ++++++++++++++++++++-------------------- Lib/test/test_gdbm.py | 2 +- Lib/test/test_whichdb.py | 2 +- Lib/whichdb.py | 13 ++++++------ Modules/gdbmmodule.c | 24 ++++++++++++---------- 7 files changed, 68 insertions(+), 61 deletions(-) diff --git a/Lib/dumbdbm.py b/Lib/dumbdbm.py index 7724ac6693e..eb1c6133247 100644 --- a/Lib/dumbdbm.py +++ b/Lib/dumbdbm.py @@ -21,12 +21,11 @@ is read when the database is opened, and some updates rewrite the whole index) """ +import io as _io import os as _os import __builtin__ import UserDict -_open = __builtin__.open - _BLOCKSIZE = 512 error = IOError # For anydbm @@ -42,7 +41,7 @@ class _Database(UserDict.DictMixin): # _commit() finish successfully, we can't ignore shutdown races # here, and _commit() must not reference any globals. _os = _os # for _commit() - _open = _open # for _commit() + _io = _io # for _commit() def __init__(self, filebasename, mode): self._mode = mode @@ -66,9 +65,9 @@ class _Database(UserDict.DictMixin): # Mod by Jack: create data file if needed try: - f = _open(self._datfile, 'r') + f = _io.open(self._datfile, 'r') except IOError: - f = _open(self._datfile, 'w') + f = _io.open(self._datfile, 'w') self._chmod(self._datfile) f.close() self._update() @@ -77,7 +76,7 @@ class _Database(UserDict.DictMixin): def _update(self): self._index = {} try: - f = _open(self._dirfile) + f = _io.open(self._dirfile, 'r') except IOError: pass else: @@ -107,7 +106,7 @@ class _Database(UserDict.DictMixin): except self._os.error: pass - f = self._open(self._dirfile, 'w') + f = self._io.open(self._dirfile, 'w') self._chmod(self._dirfile) for key, pos_and_siz_pair in self._index.items(): f.write("%r, %r\n" % (key, pos_and_siz_pair)) @@ -117,7 +116,7 @@ class _Database(UserDict.DictMixin): def __getitem__(self, key): pos, siz = self._index[key] # may raise KeyError - f = _open(self._datfile, 'rb') + f = _io.open(self._datfile, 'rb') f.seek(pos) dat = f.read(siz) f.close() @@ -128,11 +127,11 @@ class _Database(UserDict.DictMixin): # to get to an aligned offset. Return pair # (starting offset of val, len(val)) def _addval(self, val): - f = _open(self._datfile, 'rb+') + f = _io.open(self._datfile, 'rb+') f.seek(0, 2) pos = int(f.tell()) npos = ((pos + _BLOCKSIZE - 1) // _BLOCKSIZE) * _BLOCKSIZE - f.write('\0'*(npos-pos)) + f.write(b'\0'*(npos-pos)) pos = npos f.write(val) f.close() @@ -143,7 +142,7 @@ class _Database(UserDict.DictMixin): # pos to hold val, without overwriting some other value. Return # pair (pos, len(val)). def _setval(self, pos, val): - f = _open(self._datfile, 'rb+') + f = _io.open(self._datfile, 'rb+') f.seek(pos) f.write(val) f.close() @@ -154,14 +153,16 @@ class _Database(UserDict.DictMixin): # the in-memory index dict, and append one to the directory file. def _addkey(self, key, pos_and_siz_pair): self._index[key] = pos_and_siz_pair - f = _open(self._dirfile, 'a') + f = _io.open(self._dirfile, 'a') self._chmod(self._dirfile) f.write("%r, %r\n" % (key, pos_and_siz_pair)) f.close() def __setitem__(self, key, val): - if not type(key) == type('') == type(val): - raise TypeError, "keys and values must be strings" + if not isinstance(key, basestring): + raise TypeError("keys must be strings") + if not isinstance(val, (str8, bytes)): + raise TypeError("values must be byte strings") if key not in self._index: self._addkey(key, self._addval(val)) else: diff --git a/Lib/test/test_anydbm.py b/Lib/test/test_anydbm.py index f9803542463..f5eae42f09b 100644 --- a/Lib/test/test_anydbm.py +++ b/Lib/test/test_anydbm.py @@ -21,13 +21,13 @@ def _delete_files(): pass class AnyDBMTestCase(unittest.TestCase): - _dict = {'0': '', - 'a': 'Python:', - 'b': 'Programming', - 'c': 'the', - 'd': 'way', - 'f': 'Guido', - 'g': 'intended' + _dict = {'0': b'', + 'a': b'Python:', + 'b': b'Programming', + 'c': b'the', + 'd': b'way', + 'f': b'Guido', + 'g': b'intended', } def __init__(self, *args): @@ -44,7 +44,7 @@ class AnyDBMTestCase(unittest.TestCase): def test_anydbm_modification(self): self.init_db() f = anydbm.open(_fname, 'c') - self._dict['g'] = f['g'] = "indented" + self._dict['g'] = f['g'] = b"indented" self.read_helper(f) f.close() diff --git a/Lib/test/test_dumbdbm.py b/Lib/test/test_dumbdbm.py index ba94d989012..c7d29ec008e 100644 --- a/Lib/test/test_dumbdbm.py +++ b/Lib/test/test_dumbdbm.py @@ -3,6 +3,7 @@ Original by Roger E. Masse """ +import io import os import unittest import dumbdbm @@ -18,13 +19,13 @@ def _delete_files(): pass class DumbDBMTestCase(unittest.TestCase): - _dict = {'0': '', - 'a': 'Python:', - 'b': 'Programming', - 'c': 'the', - 'd': 'way', - 'f': 'Guido', - 'g': 'intended' + _dict = {'0': b'', + 'a': b'Python:', + 'b': b'Programming', + 'c': b'the', + 'd': b'way', + 'f': b'Guido', + 'g': b'intended', } def __init__(self, *args): @@ -64,15 +65,15 @@ class DumbDBMTestCase(unittest.TestCase): def test_close_twice(self): f = dumbdbm.open(_fname) - f['a'] = 'b' - self.assertEqual(f['a'], 'b') + f['a'] = b'b' + self.assertEqual(f['a'], b'b') f.close() f.close() def test_dumbdbm_modification(self): self.init_db() f = dumbdbm.open(_fname, 'w') - self._dict['g'] = f['g'] = "indented" + self._dict['g'] = f['g'] = b"indented" self.read_helper(f) f.close() @@ -91,29 +92,29 @@ class DumbDBMTestCase(unittest.TestCase): def test_write_write_read(self): # test for bug #482460 f = dumbdbm.open(_fname) - f['1'] = 'hello' - f['1'] = 'hello2' + f['1'] = b'hello' + f['1'] = b'hello2' f.close() f = dumbdbm.open(_fname) - self.assertEqual(f['1'], 'hello2') + self.assertEqual(f['1'], b'hello2') f.close() def test_line_endings(self): # test for bug #1172763: dumbdbm would die if the line endings # weren't what was expected. f = dumbdbm.open(_fname) - f['1'] = 'hello' - f['2'] = 'hello2' + f['1'] = b'hello' + f['2'] = b'hello2' f.close() # Mangle the file by adding \r before each newline - data = open(_fname + '.dir').read() - data = data.replace('\n', '\r\n') - open(_fname + '.dir', 'wb').write(data) + data = io.open(_fname + '.dir', 'rb').read() + data = data.replace(b'\n', b'\r\n') + io.open(_fname + '.dir', 'wb').write(data) f = dumbdbm.open(_fname) - self.assertEqual(f['1'], 'hello') - self.assertEqual(f['2'], 'hello2') + self.assertEqual(f['1'], b'hello') + self.assertEqual(f['2'], b'hello2') def read_helper(self, f): @@ -147,7 +148,7 @@ class DumbDBMTestCase(unittest.TestCase): del d[k] del f[k] else: - v = random.choice('abc') * random.randrange(10000) + v = random.choice((b'a', b'b', b'c')) * random.randrange(10000) d[k] = v f[k] = v self.assertEqual(f[k], v) diff --git a/Lib/test/test_gdbm.py b/Lib/test/test_gdbm.py index ae76e39178f..4daca617121 100755 --- a/Lib/test/test_gdbm.py +++ b/Lib/test/test_gdbm.py @@ -12,7 +12,7 @@ filename = TESTFN g = gdbm.open(filename, 'c') verify(g.keys() == []) g['a'] = 'b' -g['12345678910'] = '019237410982340912840198242' +g['12345678910'] = b'019237410982340912840198242' a = g.keys() if verbose: print('Test gdbm file keys: ', a) diff --git a/Lib/test/test_whichdb.py b/Lib/test/test_whichdb.py index f2652c1679a..6cb5e4fd6a8 100644 --- a/Lib/test/test_whichdb.py +++ b/Lib/test/test_whichdb.py @@ -51,7 +51,7 @@ for name in anydbm._names: self.assertEqual(name, whichdb.whichdb(_fname)) # Now add a key f = mod.open(_fname, 'w') - f["1"] = "1" + f["1"] = b"1" f.close() self.assertEqual(name, whichdb.whichdb(_fname)) setattr(WhichDBTestCase,"test_whichdb_%s" % name, test_whichdb_name) diff --git a/Lib/whichdb.py b/Lib/whichdb.py index 4d7a5602b48..752bbb14c40 100644 --- a/Lib/whichdb.py +++ b/Lib/whichdb.py @@ -1,6 +1,7 @@ # !/usr/bin/env python """Guess which db package to use to open a db file.""" +import io import os import struct import sys @@ -29,18 +30,18 @@ def whichdb(filename): # Check for dbm first -- this has a .pag and a .dir file try: - f = open(filename + os.extsep + "pag", "rb") + f = io.open(filename + os.extsep + "pag", "rb") f.close() # dbm linked with gdbm on OS/2 doesn't have .dir file if not (dbm.library == "GNU gdbm" and sys.platform == "os2emx"): - f = open(filename + os.extsep + "dir", "rb") + f = io.open(filename + os.extsep + "dir", "rb") f.close() return "dbm" except IOError: # some dbm emulations based on Berkeley DB generate a .db file # some do not, but they should be caught by the dbhash checks try: - f = open(filename + os.extsep + "db", "rb") + f = io.open(filename + os.extsep + "db", "rb") f.close() # guarantee we can actually open the file using dbm # kind of overkill, but since we are dealing with emulations @@ -60,9 +61,9 @@ def whichdb(filename): # dumbdbm files with no keys are empty if size == 0: return "dumbdbm" - f = open(filename + os.extsep + "dir", "rb") + f = io.open(filename + os.extsep + "dir", "rb") try: - if f.read(1) in ("'", '"'): + if f.read(1) in (b"'", b'"'): return "dumbdbm" finally: f.close() @@ -71,7 +72,7 @@ def whichdb(filename): # See if the file exists, return None if not try: - f = open(filename, "rb") + f = io.open(filename, "rb") except IOError: return None diff --git a/Modules/gdbmmodule.c b/Modules/gdbmmodule.c index 0ffb4b4f169..f3fee4d1244 100644 --- a/Modules/gdbmmodule.c +++ b/Modules/gdbmmodule.c @@ -90,8 +90,8 @@ static Py_ssize_t dbm_length(dbmobject *dp) { if (dp->di_dbm == NULL) { - PyErr_SetString(DbmError, "GDBM object has already been closed"); - return -1; + PyErr_SetString(DbmError, "GDBM object has already been closed"); + return -1; } if (dp->di_size < 0) { datum key,okey; @@ -127,11 +127,10 @@ dbm_subscript(dbmobject *dp, register PyObject *key) } drec = gdbm_fetch(dp->di_dbm, krec); if (drec.dptr == 0) { - PyErr_SetString(PyExc_KeyError, - PyString_AS_STRING((PyStringObject *)key)); + PyErr_SetObject(PyExc_KeyError, key); return NULL; } - v = PyString_FromStringAndSize(drec.dptr, drec.dsize); + v = PyBytes_FromStringAndSize(drec.dptr, drec.dsize); free(drec.dptr); return v; } @@ -148,21 +147,20 @@ dbm_ass_sub(dbmobject *dp, PyObject *v, PyObject *w) } if (dp->di_dbm == NULL) { PyErr_SetString(DbmError, - "GDBM object has already been closed"); - return -1; + "GDBM object has already been closed"); + return -1; } dp->di_size = -1; if (w == NULL) { if (gdbm_delete(dp->di_dbm, krec) < 0) { - PyErr_SetString(PyExc_KeyError, - PyString_AS_STRING((PyStringObject *)v)); + PyErr_SetObject(PyExc_KeyError, v); return -1; } } else { if (!PyArg_Parse(w, "s#", &drec.dptr, &drec.dsize)) { PyErr_SetString(PyExc_TypeError, - "gdbm mappings have string elements only"); + "gdbm mappings have byte string elements only"); return -1; } errno = 0; @@ -198,6 +196,7 @@ dbm_close(register dbmobject *dp, PyObject *unused) return Py_None; } +/* XXX Should return a set or a set view */ PyDoc_STRVAR(dbm_keys__doc__, "keys() -> list_of_keys\n\ Get a list of all keys in the database."); @@ -252,6 +251,11 @@ dbm_contains(PyObject *self, PyObject *arg) "GDBM object has already been closed"); return -1; } + if (PyUnicode_Check(arg)) { + arg = _PyUnicode_AsDefaultEncodedString(arg, NULL); + if (arg == NULL) + return -1; + } if (!PyString_Check(arg)) { PyErr_Format(PyExc_TypeError, "gdbm key must be string, not %.100s",