From 1fc240e85150f5cb39502a87cc9a4a0a8cbe5ab0 Mon Sep 17 00:00:00 2001 From: Tim Peters Date: Fri, 26 Oct 2001 05:06:50 +0000 Subject: [PATCH] Generalize dictionary() to accept a sequence of 2-sequences. At the outer level, the iterator protocol is used for memory-efficiency (the outer sequence may be very large if fully materialized); at the inner level, PySequence_Fast() is used for time-efficiency (these should always be sequences of length 2). dictobject.c, new functions PyDict_{Merge,Update}FromSeq2. These are wholly analogous to PyDict_{Merge,Update}, but process a sequence-of-2- sequences argument instead of a mapping object. For now, I left these functions file static, so no corresponding doc changes. It's tempting to change dict.update() to allow a sequence-of-2-seqs argument too. Also changed the name of dictionary's keyword argument from "mapping" to "x". Got a better name? "mapping_or_sequence_of_pairs" isn't attractive, although more so than "mosop" . abstract.h, abstract.tex: Added new PySequence_Fast_GET_SIZE function, much faster than going thru the all-purpose PySequence_Size. libfuncs.tex: - Document dictionary(). - Fiddle tuple() and list() to admit that their argument is optional. - The long-winded repetitions of "a sequence, a container that supports iteration, or an iterator object" is getting to be a PITA. Many months ago I suggested factoring this out into "iterable object", where the definition of that could include being explicit about generators too (as is, I'm not sure a reader outside of PythonLabs could guess that "an iterator object" includes a generator call). - Please check my curly braces -- I'm going blind <0.9 wink>. abstract.c, PySequence_Tuple(): When PyObject_GetIter() fails, leave its error msg alone now (the msg it produces has improved since PySequence_Tuple was generalized to accept iterable objects, and PySequence_Tuple was also stomping on the msg in cases it shouldn't have even before PyObject_GetIter grew a better msg). --- Doc/api/abstract.tex | 17 ++++-- Doc/lib/libfuncs.tex | 26 ++++++++- Include/abstract.h | 12 +++-- Lib/test/test_descr.py | 51 +++++++++++++++--- Misc/NEWS | 9 ++++ Objects/abstract.c | 2 +- Objects/dictobject.c | 118 +++++++++++++++++++++++++++++++++++------ 7 files changed, 199 insertions(+), 36 deletions(-) diff --git a/Doc/api/abstract.tex b/Doc/api/abstract.tex index 8d271df571f..fae8475e4f5 100644 --- a/Doc/api/abstract.tex +++ b/Doc/api/abstract.tex @@ -125,7 +125,7 @@ for which they do not apply, they will raise a Python exception. the Unicode string representation on success, \NULL{} on failure. This is the equivalent of the Python expression \samp{unistr(\var{o})}. Called by the - \function{unistr()}\bifuncindex{unistr} built-in function. + \function{unistr()}\bifuncindex{unistr} built-in function. \end{cfuncdesc} \begin{cfuncdesc}{int}{PyObject_IsInstance}{PyObject *inst, PyObject *cls} @@ -715,10 +715,17 @@ determination. \begin{cfuncdesc}{PyObject*}{PySequence_Fast_GET_ITEM}{PyObject *o, int i} Return the \var{i}th element of \var{o}, assuming that \var{o} was - returned by \cfunction{PySequence_Fast()}, and that \var{i} is - within bounds. The caller is expected to get the length of the - sequence by calling \cfunction{PySequence_Size()} on \var{o}, since - lists and tuples are guaranteed to always return their true length. + returned by \cfunction{PySequence_Fast()}, \var{o} is not \NULL{}, + and that \var{i} is within bounds. +\end{cfuncdesc} + +\begin{cfuncdesc}{int}{PySequence_Fast_GET_SIZE}{PyObject *o} + Returns the length of \var{o}, assuming that \var{o} was + returned by \cfunction{PySequence_Fast()} and that \var{o} is + not \NULL{}. The size can also be gotten by calling + \cfunction{PySequence_Size()} on \var{o}, but + \cfunction{PySequence_Fast_GET_SIZE()} is faster because it can + assume \var{o} is a list or tuple. \end{cfuncdesc} diff --git a/Doc/lib/libfuncs.tex b/Doc/lib/libfuncs.tex index b19d4a643a2..e9baeb36c7a 100644 --- a/Doc/lib/libfuncs.tex +++ b/Doc/lib/libfuncs.tex @@ -175,6 +175,28 @@ def my_import(name): \code{del \var{x}.\var{foobar}}. \end{funcdesc} +\begin{funcdesc}{dictionary}{\optional{mapping-or-sequence}} + Return a new dictionary initialized from the optional argument. + If an argument is not specified, return a new empty dictionary. + If the argument is a mapping object, return a dictionary mapping the + same keys to the same values as does the mapping object. + Else the argument must be a sequence, a container that supports + iteration, or an iterator object. The elements of the argument must + each also be of one of those kinds, and each must in turn contain + exactly two objects. The first is used as a key in the new dictionary, + and the second as the key's value. If a given key is seen more than + once, the last value associated with it is retained in the new + dictionary. + For example, these all return a dictionary equal to + \code{\{1: 2, 2: 3\}}: + \code{dictionary(\{1: 2, 2: 3\})}, + \code{dictionary(\{1: 2, 2: 3\}.items()}, + \code{dictionary(\{1: 2, 2: 3\}.iteritems()}, + \code{dictionary(zip((1, 2), (2, 3)))}, + \code{dictionary([[2, 3], [1, 2]])}, and + \code{dictionary([(i-1, i) for i in (2, 3)])}. +\end{funcdesc} + \begin{funcdesc}{dir}{\optional{object}} Without arguments, return the list of names in the current local symbol table. With an argument, attempts to return a list of valid @@ -472,7 +494,7 @@ def my_import(name): may be a sequence (string, tuple or list) or a mapping (dictionary). \end{funcdesc} -\begin{funcdesc}{list}{sequence} +\begin{funcdesc}{list}{\optional{sequence}} Return a list whose items are the same and in the same order as \var{sequence}'s items. \var{sequence} may be either a sequence, a container that supports iteration, or an iterator object. If @@ -726,7 +748,7 @@ def my_import(name): printable string. \end{funcdesc} -\begin{funcdesc}{tuple}{sequence} +\begin{funcdesc}{tuple}{\optional{sequence}} Return a tuple whose items are the same and in the same order as \var{sequence}'s items. \var{sequence} may be a sequence, a container that supports iteration, or an iterator object. diff --git a/Include/abstract.h b/Include/abstract.h index d736efcb909..351149db6f2 100644 --- a/Include/abstract.h +++ b/Include/abstract.h @@ -951,26 +951,30 @@ xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx*/ DL_IMPORT(PyObject *) PySequence_List(PyObject *o); - /* Returns the sequence, o, as a list on success, and NULL on failure. This is equivalent to the Python expression: list(o) */ DL_IMPORT(PyObject *) PySequence_Fast(PyObject *o, const char* m); - /* Returns the sequence, o, as a tuple, unless it's already a tuple or list. Use PySequence_Fast_GET_ITEM to access the - members of this list. + members of this list, and PySequence_Fast_GET_SIZE to get its length. Returns NULL on failure. If the object does not support iteration, raises a TypeError exception with m as the message text. */ +#define PySequence_Fast_GET_SIZE(o) \ + (PyList_Check(o) ? PyList_GET_SIZE(o) : PyTuple_GET_SIZE(o)) + /* + Return the size of o, assuming that o was returned by + PySequence_Fast and is not NULL. + */ + #define PySequence_Fast_GET_ITEM(o, i)\ (PyList_Check(o) ? PyList_GET_ITEM(o, i) : PyTuple_GET_ITEM(o, i)) - /* Return the ith element of o, assuming that o was returned by PySequence_Fast, and that i is within bounds. diff --git a/Lib/test/test_descr.py b/Lib/test/test_descr.py index 87f4f0f6f47..230d6a16c8c 100644 --- a/Lib/test/test_descr.py +++ b/Lib/test/test_descr.py @@ -178,15 +178,25 @@ def dict_constructor(): vereq(d, {}) d = dictionary({}) vereq(d, {}) - d = dictionary(mapping={}) + d = dictionary(x={}) vereq(d, {}) d = dictionary({1: 2, 'a': 'b'}) vereq(d, {1: 2, 'a': 'b'}) + vereq(d, dictionary(d.items())) + vereq(d, dictionary(x=d.iteritems())) for badarg in 0, 0L, 0j, "0", [0], (0,): try: dictionary(badarg) except TypeError: pass + except ValueError: + if badarg == "0": + # It's a sequence, and its elements are also sequences (gotta + # love strings ), but they aren't of length 2, so this + # one seemed better as a ValueError than a TypeError. + pass + else: + raise TestFailed("no TypeError from dictionary(%r)" % badarg) else: raise TestFailed("no TypeError from dictionary(%r)" % badarg) try: @@ -194,7 +204,7 @@ def dict_constructor(): except TypeError: pass else: - raise TestFailed("no TypeError from dictionary(senseless={}") + raise TestFailed("no TypeError from dictionary(senseless={})") try: dictionary({}, {}) @@ -204,11 +214,9 @@ def dict_constructor(): raise TestFailed("no TypeError from dictionary({}, {})") class Mapping: + # Lacks a .keys() method; will be added later. dict = {1:2, 3:4, 'a':1j} - def __getitem__(self, i): - return self.dict[i] - try: dictionary(Mapping()) except TypeError: @@ -217,9 +225,36 @@ def dict_constructor(): raise TestFailed("no TypeError from dictionary(incomplete mapping)") Mapping.keys = lambda self: self.dict.keys() - d = dictionary(mapping=Mapping()) + Mapping.__getitem__ = lambda self, i: self.dict[i] + d = dictionary(x=Mapping()) vereq(d, Mapping.dict) + # Init from sequence of iterable objects, each producing a 2-sequence. + class AddressBookEntry: + def __init__(self, first, last): + self.first = first + self.last = last + def __iter__(self): + return iter([self.first, self.last]) + + d = dictionary([AddressBookEntry('Tim', 'Warsaw'), + AddressBookEntry('Barry', 'Peters'), + AddressBookEntry('Tim', 'Peters'), + AddressBookEntry('Barry', 'Warsaw')]) + vereq(d, {'Barry': 'Warsaw', 'Tim': 'Peters'}) + + d = dictionary(zip(range(4), range(1, 5))) + vereq(d, dictionary([(i, i+1) for i in range(4)])) + + # Bad sequence lengths. + for bad in ['tooshort'], ['too', 'long', 'by 1']: + try: + dictionary(bad) + except ValueError: + pass + else: + raise TestFailed("no ValueError from dictionary(%r)" % bad) + def test_dir(): if verbose: print "Testing dir() ..." @@ -1830,7 +1865,7 @@ def keywords(): vereq(unicode(string='abc', errors='strict'), u'abc') vereq(tuple(sequence=range(3)), (0, 1, 2)) vereq(list(sequence=(0, 1, 2)), range(3)) - vereq(dictionary(mapping={1: 2}), {1: 2}) + vereq(dictionary(x={1: 2}), {1: 2}) for constructor in (int, float, long, complex, str, unicode, tuple, list, dictionary, file): @@ -2371,7 +2406,7 @@ def kwdargs(): vereq(f.__call__(a=42), 42) a = [] list.__init__(a, sequence=[0, 1, 2]) - vereq(a, [0, 1, 2]) + vereq(a, [0, 1, 2]) def test_main(): class_docstrings() diff --git a/Misc/NEWS b/Misc/NEWS index aa0ab81607f..e55b9de40b9 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -4,6 +4,11 @@ XXX Planned XXX Release date: 14-Nov-2001 Type/class unification and new-style classes +- dictionary() now accepts an iterable object producing 2-sequences. + For example, dictionary(d.items()) == d for any dictionary d. The + argument, and the elements of the argument, can be any iterable + objects. + - Methods of built-in types now properly check for keyword arguments (formerly these were silently ignored). The only built-in methods that take keyword arguments are __call__, __init__ and __new__. @@ -31,6 +36,10 @@ Build C API +- New function PySequence_Fast_GET_SIZE() returns the size of a non- + NULL result from PySequence_Fast(), more quickly than calling + PySequence_Size(). + New platforms - Updated RISCOS port by Dietmar Schwertberger. diff --git a/Objects/abstract.c b/Objects/abstract.c index 8a715c8fa28..6b9201ba361 100644 --- a/Objects/abstract.c +++ b/Objects/abstract.c @@ -1278,7 +1278,7 @@ PySequence_Tuple(PyObject *v) /* Get iterator. */ it = PyObject_GetIter(v); if (it == NULL) - return type_error("tuple() argument must support iteration"); + return NULL; /* Guess result size and allocate space. */ n = PySequence_Size(v); diff --git a/Objects/dictobject.c b/Objects/dictobject.c index 829f76d383c..f901499fe12 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -993,7 +993,89 @@ dict_update(PyObject *mp, PyObject *other) /* Update unconditionally replaces existing items. Merge has a 3rd argument 'override'; if set, it acts like Update, - otherwise it leaves existing items unchanged. */ + otherwise it leaves existing items unchanged. + + PyDict_{Update,Merge} update/merge from a mapping object. + + PyDict_{Update,Merge}FromSeq2 update/merge from any iterable object + producing iterable objects of length 2. +*/ + +static int +PyDict_MergeFromSeq2(PyObject *d, PyObject *seq2, int override) +{ + PyObject *it; /* iter(seq2) */ + int i; /* index into seq2 of current element */ + PyObject *item; /* seq2[i] */ + PyObject *fast; /* item as a 2-tuple or 2-list */ + + assert(d != NULL); + assert(PyDict_Check(d)); + assert(seq2 != NULL); + + it = PyObject_GetIter(seq2); + if (it == NULL) + return -1; + + for (i = 0; ; ++i) { + PyObject *key, *value; + int n; + + fast = NULL; + item = PyIter_Next(it); + if (item == NULL) { + if (PyErr_Occurred()) + goto Fail; + break; + } + + /* Convert item to sequence, and verify length 2. */ + fast = PySequence_Fast(item, ""); + if (fast == NULL) { + if (PyErr_ExceptionMatches(PyExc_TypeError)) + PyErr_Format(PyExc_TypeError, + "cannot convert dictionary update " + "sequence element #%d to a sequence", + i); + goto Fail; + } + n = PySequence_Fast_GET_SIZE(fast); + if (n != 2) { + PyErr_Format(PyExc_ValueError, + "dictionary update sequence element #%d " + "has length %d; 2 is required", + i, n); + goto Fail; + } + + /* Update/merge with this (key, value) pair. */ + key = PySequence_Fast_GET_ITEM(fast, 0); + value = PySequence_Fast_GET_ITEM(fast, 1); + if (override || PyDict_GetItem(d, key) == NULL) { + int status = PyDict_SetItem(d, key, value); + if (status < 0) + goto Fail; + } + Py_DECREF(fast); + Py_DECREF(item); + } + + i = 0; + goto Return; +Fail: + Py_XDECREF(item); + Py_XDECREF(fast); + i = -1; +Return: + Py_DECREF(it); + return i; +} + +static int +PyDict_UpdateFromSeq2(PyObject *d, PyObject *seq2) +{ + return PyDict_MergeFromSeq2(d, seq2, 1); +} int PyDict_Update(PyObject *a, PyObject *b) @@ -1699,23 +1781,20 @@ static int dict_init(PyObject *self, PyObject *args, PyObject *kwds) { PyObject *arg = NULL; - static char *kwlist[] = {"mapping", 0}; + static char *kwlist[] = {"x", 0}; + int result = 0; if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:dictionary", kwlist, &arg)) - return -1; - if (arg != NULL) { - if (PyDict_Merge(self, arg, 1) < 0) { - /* An error like "AttributeError: keys" is too - cryptic in this context. */ - if (PyErr_ExceptionMatches(PyExc_AttributeError)) { - PyErr_SetString(PyExc_TypeError, - "argument must be of a mapping type"); - } - return -1; - } + result = -1; + + else if (arg != NULL) { + if (PyObject_HasAttrString(arg, "keys")) + result = PyDict_Merge(self, arg, 1); + else + result = PyDict_MergeFromSeq2(self, arg, 1); } - return 0; + return result; } static PyObject * @@ -1725,8 +1804,15 @@ dict_iter(dictobject *dict) } static char dictionary_doc[] = -"dictionary() -> new empty dictionary\n" -"dictionary(mapping) -> new dict initialized from mapping's key+value pairs"; +"dictionary() -> new empty dictionary.\n" +"dictionary(mapping) -> new dict initialized from a mapping object's\n" +" (key, value) pairs.\n" +"dictionary(seq) -> new dict initialized from the 2-element elements of\n" +" a sequence; for example, from mapping.items(). seq must be an\n" +" iterable object, producing iterable objects each producing exactly\n" +" two objects, the first of which is used as a key and the second as\n" +" its value. If a given key is seen more than once, the dict retains\n" +" the last value associated with it."; PyTypeObject PyDict_Type = { PyObject_HEAD_INIT(&PyType_Type)