From 1fc240e85150f5cb39502a87cc9a4a0a8cbe5ab0 Mon Sep 17 00:00:00 2001
From: Tim Peters <tim.peters@gmail.com>
Date: Fri, 26 Oct 2001 05:06:50 +0000
Subject: [PATCH] Generalize dictionary() to accept a sequence of 2-sequences. 
 At the outer level, the iterator protocol is used for memory-efficiency (the
 outer sequence may be very large if fully materialized); at the inner level,
 PySequence_Fast() is used for time-efficiency (these should always be
 sequences of length 2).

dictobject.c, new functions PyDict_{Merge,Update}FromSeq2.  These are
wholly analogous to PyDict_{Merge,Update}, but process a sequence-of-2-
sequences argument instead of a mapping object.  For now, I left these
functions file static, so no corresponding doc changes.  It's tempting
to change dict.update() to allow a sequence-of-2-seqs argument too.

Also changed the name of dictionary's keyword argument from "mapping"
to "x".  Got a better name?  "mapping_or_sequence_of_pairs" isn't
attractive, although more so than "mosop" <wink>.

abstract.h, abstract.tex:  Added new PySequence_Fast_GET_SIZE function,
much faster than going thru the all-purpose PySequence_Size.

libfuncs.tex:
- Document dictionary().
- Fiddle tuple() and list() to admit that their argument is optional.
- The long-winded repetitions of "a sequence, a container that supports
  iteration, or an iterator object" is getting to be a PITA.  Many
  months ago I suggested factoring this out into "iterable object",
  where the definition of that could include being explicit about
  generators too (as is, I'm not sure a reader outside of PythonLabs
  could guess that "an iterator object" includes a generator call).
- Please check my curly braces -- I'm going blind <0.9 wink>.

abstract.c, PySequence_Tuple():  When PyObject_GetIter() fails, leave
its error msg alone now (the msg it produces has improved since
PySequence_Tuple was generalized to accept iterable objects, and
PySequence_Tuple was also stomping on the msg in cases it shouldn't
have even before PyObject_GetIter grew a better msg).
---
 Doc/api/abstract.tex   |  17 ++++--
 Doc/lib/libfuncs.tex   |  26 ++++++++-
 Include/abstract.h     |  12 +++--
 Lib/test/test_descr.py |  51 +++++++++++++++---
 Misc/NEWS              |   9 ++++
 Objects/abstract.c     |   2 +-
 Objects/dictobject.c   | 118 +++++++++++++++++++++++++++++++++++------
 7 files changed, 199 insertions(+), 36 deletions(-)
diff --git a/Doc/api/abstract.tex b/Doc/api/abstract.tex
index 8d271df571f..fae8475e4f5 100644
--- a/Doc/api/abstract.tex
+++ b/Doc/api/abstract.tex
@@ -125,7 +125,7 @@ for which they do not apply, they will raise a Python exception.
   the Unicode string representation on success, \NULL{} on failure.
   This is the equivalent of the Python expression
   \samp{unistr(\var{o})}.  Called by the
-  \function{unistr()}\bifuncindex{unistr} built-in function. 
+  \function{unistr()}\bifuncindex{unistr} built-in function.
 \end{cfuncdesc}
 
 \begin{cfuncdesc}{int}{PyObject_IsInstance}{PyObject *inst, PyObject *cls}
@@ -715,10 +715,17 @@ determination.
 
 \begin{cfuncdesc}{PyObject*}{PySequence_Fast_GET_ITEM}{PyObject *o, int i}
   Return the \var{i}th element of \var{o}, assuming that \var{o} was
-  returned by \cfunction{PySequence_Fast()}, and that \var{i} is
-  within bounds.  The caller is expected to get the length of the
-  sequence by calling \cfunction{PySequence_Size()} on \var{o}, since
-  lists and tuples are guaranteed to always return their true length.
+  returned by \cfunction{PySequence_Fast()}, \var{o} is not \NULL{},
+  and that \var{i} is within bounds.
+\end{cfuncdesc}
+
+\begin{cfuncdesc}{int}{PySequence_Fast_GET_SIZE}{PyObject *o}
+  Returns the length of \var{o}, assuming that \var{o} was
+  returned by \cfunction{PySequence_Fast()} and that \var{o} is
+  not \NULL{}.  The size can also be gotten by calling
+  \cfunction{PySequence_Size()} on \var{o}, but
+  \cfunction{PySequence_Fast_GET_SIZE()} is faster because it can
+  assume \var{o} is a list or tuple.
 \end{cfuncdesc}
 
 
diff --git a/Doc/lib/libfuncs.tex b/Doc/lib/libfuncs.tex
index b19d4a643a2..e9baeb36c7a 100644
--- a/Doc/lib/libfuncs.tex
+++ b/Doc/lib/libfuncs.tex
@@ -175,6 +175,28 @@ def my_import(name):
   \code{del \var{x}.\var{foobar}}.
 \end{funcdesc}
 
+\begin{funcdesc}{dictionary}{\optional{mapping-or-sequence}}
+  Return a new dictionary initialized from the optional argument.
+  If an argument is not specified, return a new empty dictionary.
+  If the argument is a mapping object, return a dictionary mapping the
+  same keys to the same values as does the mapping object.
+  Else the argument must be a sequence, a container that supports
+  iteration, or an iterator object.  The elements of the argument must
+  each also be of one of those kinds, and each must in turn contain
+  exactly two objects.  The first is used as a key in the new dictionary,
+  and the second as the key's value.  If a given key is seen more than
+  once, the last value associated with it is retained in the new
+  dictionary.
+  For example, these all return a dictionary equal to
+  \code{\{1: 2, 2: 3\}}:
+  \code{dictionary(\{1: 2, 2: 3\})},
+  \code{dictionary(\{1: 2, 2: 3\}.items()},
+  \code{dictionary(\{1: 2, 2: 3\}.iteritems()},
+  \code{dictionary(zip((1, 2), (2, 3)))},
+  \code{dictionary([[2, 3], [1, 2]])}, and
+  \code{dictionary([(i-1, i) for i in (2, 3)])}.
+\end{funcdesc}
+
 \begin{funcdesc}{dir}{\optional{object}}
   Without arguments, return the list of names in the current local
   symbol table.  With an argument, attempts to return a list of valid
@@ -472,7 +494,7 @@ def my_import(name):
   may be a sequence (string, tuple or list) or a mapping (dictionary).
 \end{funcdesc}
 
-\begin{funcdesc}{list}{sequence}
+\begin{funcdesc}{list}{\optional{sequence}}
   Return a list whose items are the same and in the same order as
   \var{sequence}'s items.  \var{sequence} may be either a sequence, a
   container that supports iteration, or an iterator object.  If
@@ -726,7 +748,7 @@ def my_import(name):
   printable string.
 \end{funcdesc}
 
-\begin{funcdesc}{tuple}{sequence}
+\begin{funcdesc}{tuple}{\optional{sequence}}
   Return a tuple whose items are the same and in the same order as
   \var{sequence}'s items.  \var{sequence} may be a sequence, a
   container that supports iteration, or an iterator object.
diff --git a/Include/abstract.h b/Include/abstract.h
index d736efcb909..351149db6f2 100644
--- a/Include/abstract.h
+++ b/Include/abstract.h
@@ -951,26 +951,30 @@ xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx*/
 
 
      DL_IMPORT(PyObject *) PySequence_List(PyObject *o);
-
        /*
 	 Returns the sequence, o, as a list on success, and NULL on failure.
 	 This is equivalent to the Python expression: list(o)
        */
 
      DL_IMPORT(PyObject *) PySequence_Fast(PyObject *o, const char* m);
-
        /*
          Returns the sequence, o, as a tuple, unless it's already a
          tuple or list.  Use PySequence_Fast_GET_ITEM to access the
-         members of this list.
+         members of this list, and PySequence_Fast_GET_SIZE to get its length.
 
          Returns NULL on failure.  If the object does not support iteration,
          raises a TypeError exception with m as the message text.
        */
 
+#define PySequence_Fast_GET_SIZE(o) \
+	(PyList_Check(o) ? PyList_GET_SIZE(o) : PyTuple_GET_SIZE(o))
+       /*
+	 Return the size of o, assuming that o was returned by
+         PySequence_Fast and is not NULL.
+       */
+
 #define PySequence_Fast_GET_ITEM(o, i)\
      (PyList_Check(o) ? PyList_GET_ITEM(o, i) : PyTuple_GET_ITEM(o, i))
-
        /*
 	 Return the ith element of o, assuming that o was returned by
          PySequence_Fast, and that i is within bounds.
diff --git a/Lib/test/test_descr.py b/Lib/test/test_descr.py
index 87f4f0f6f47..230d6a16c8c 100644
--- a/Lib/test/test_descr.py
+++ b/Lib/test/test_descr.py
@@ -178,15 +178,25 @@ def dict_constructor():
     vereq(d, {})
     d = dictionary({})
     vereq(d, {})
-    d = dictionary(mapping={})
+    d = dictionary(x={})
     vereq(d, {})
     d = dictionary({1: 2, 'a': 'b'})
     vereq(d, {1: 2, 'a': 'b'})
+    vereq(d, dictionary(d.items()))
+    vereq(d, dictionary(x=d.iteritems()))
     for badarg in 0, 0L, 0j, "0", [0], (0,):
         try:
             dictionary(badarg)
         except TypeError:
             pass
+        except ValueError:
+            if badarg == "0":
+                # It's a sequence, and its elements are also sequences (gotta
+                # love strings <wink>), but they aren't of length 2, so this
+                # one seemed better as a ValueError than a TypeError.
+                pass
+            else:
+                raise TestFailed("no TypeError from dictionary(%r)" % badarg)
         else:
             raise TestFailed("no TypeError from dictionary(%r)" % badarg)
     try:
@@ -194,7 +204,7 @@ def dict_constructor():
     except TypeError:
         pass
     else:
-        raise TestFailed("no TypeError from dictionary(senseless={}")
+        raise TestFailed("no TypeError from dictionary(senseless={})")
 
     try:
         dictionary({}, {})
@@ -204,11 +214,9 @@ def dict_constructor():
         raise TestFailed("no TypeError from dictionary({}, {})")
 
     class Mapping:
+        # Lacks a .keys() method; will be added later.
         dict = {1:2, 3:4, 'a':1j}
 
-        def __getitem__(self, i):
-            return self.dict[i]
-
     try:
         dictionary(Mapping())
     except TypeError:
@@ -217,9 +225,36 @@ def dict_constructor():
         raise TestFailed("no TypeError from dictionary(incomplete mapping)")
 
     Mapping.keys = lambda self: self.dict.keys()
-    d = dictionary(mapping=Mapping())
+    Mapping.__getitem__ = lambda self, i: self.dict[i]
+    d = dictionary(x=Mapping())
     vereq(d, Mapping.dict)
 
+    # Init from sequence of iterable objects, each producing a 2-sequence.
+    class AddressBookEntry:
+        def __init__(self, first, last):
+            self.first = first
+            self.last = last
+        def __iter__(self):
+            return iter([self.first, self.last])
+
+    d = dictionary([AddressBookEntry('Tim', 'Warsaw'),
+                    AddressBookEntry('Barry', 'Peters'),
+                    AddressBookEntry('Tim', 'Peters'),
+                    AddressBookEntry('Barry', 'Warsaw')])
+    vereq(d, {'Barry': 'Warsaw', 'Tim': 'Peters'})
+
+    d = dictionary(zip(range(4), range(1, 5)))
+    vereq(d, dictionary([(i, i+1) for i in range(4)]))
+
+    # Bad sequence lengths.
+    for bad in ['tooshort'], ['too', 'long', 'by 1']:
+        try:
+            dictionary(bad)
+        except ValueError:
+            pass
+        else:
+            raise TestFailed("no ValueError from dictionary(%r)" % bad)
+
 def test_dir():
     if verbose:
         print "Testing dir() ..."
@@ -1830,7 +1865,7 @@ def keywords():
     vereq(unicode(string='abc', errors='strict'), u'abc')
     vereq(tuple(sequence=range(3)), (0, 1, 2))
     vereq(list(sequence=(0, 1, 2)), range(3))
-    vereq(dictionary(mapping={1: 2}), {1: 2})
+    vereq(dictionary(x={1: 2}), {1: 2})
 
     for constructor in (int, float, long, complex, str, unicode,
                         tuple, list, dictionary, file):
@@ -2371,7 +2406,7 @@ def kwdargs():
     vereq(f.__call__(a=42), 42)
     a = []
     list.__init__(a, sequence=[0, 1, 2])
-    vereq(a, [0, 1, 2]) 
+    vereq(a, [0, 1, 2])
 
 def test_main():
     class_docstrings()
diff --git a/Misc/NEWS b/Misc/NEWS
index aa0ab81607f..e55b9de40b9 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -4,6 +4,11 @@ XXX Planned XXX Release date: 14-Nov-2001
 
 Type/class unification and new-style classes
 
+- dictionary() now accepts an iterable object producing 2-sequences.
+  For example, dictionary(d.items()) == d for any dictionary d.  The
+  argument, and the elements of the argument, can be any iterable
+  objects.
+
 - Methods of built-in types now properly check for keyword arguments
   (formerly these were silently ignored).  The only built-in methods
   that take keyword arguments are __call__, __init__ and __new__.
@@ -31,6 +36,10 @@ Build
 
 C API
 
+- New function PySequence_Fast_GET_SIZE() returns the size of a non-
+  NULL result from PySequence_Fast(), more quickly than calling
+  PySequence_Size().
+
 New platforms
 
 - Updated RISCOS port by Dietmar Schwertberger.
diff --git a/Objects/abstract.c b/Objects/abstract.c
index 8a715c8fa28..6b9201ba361 100644
--- a/Objects/abstract.c
+++ b/Objects/abstract.c
@@ -1278,7 +1278,7 @@ PySequence_Tuple(PyObject *v)
 	/* Get iterator. */
 	it = PyObject_GetIter(v);
 	if (it == NULL)
-		return type_error("tuple() argument must support iteration");
+		return NULL;
 
 	/* Guess result size and allocate space. */
 	n = PySequence_Size(v);
diff --git a/Objects/dictobject.c b/Objects/dictobject.c
index 829f76d383c..f901499fe12 100644
--- a/Objects/dictobject.c
+++ b/Objects/dictobject.c
@@ -993,7 +993,89 @@ dict_update(PyObject *mp, PyObject *other)
 
 /* Update unconditionally replaces existing items.
    Merge has a 3rd argument 'override'; if set, it acts like Update,
-   otherwise it leaves existing items unchanged. */
+   otherwise it leaves existing items unchanged.
+
+   PyDict_{Update,Merge} update/merge from a mapping object.
+
+   PyDict_{Update,Merge}FromSeq2 update/merge from any iterable object
+   producing iterable objects of length 2.
+*/
+
+static int
+PyDict_MergeFromSeq2(PyObject *d, PyObject *seq2, int override)
+{
+	PyObject *it;	/* iter(seq2) */
+	int i;		/* index into seq2 of current element */
+	PyObject *item;	/* seq2[i] */
+	PyObject *fast;	/* item as a 2-tuple or 2-list */
+
+	assert(d != NULL);
+	assert(PyDict_Check(d));
+	assert(seq2 != NULL);
+
+	it = PyObject_GetIter(seq2);
+	if (it == NULL)
+		return -1;
+
+	for (i = 0; ; ++i) {
+		PyObject *key, *value;
+		int n;
+
+		fast = NULL;
+		item = PyIter_Next(it);
+		if (item == NULL) {
+			if (PyErr_Occurred())
+				goto Fail;
+			break;
+		}
+
+		/* Convert item to sequence, and verify length 2. */
+		fast = PySequence_Fast(item, "");
+		if (fast == NULL) {
+			if (PyErr_ExceptionMatches(PyExc_TypeError))
+				PyErr_Format(PyExc_TypeError,
+					"cannot convert dictionary update "
+					"sequence element #%d to a sequence",
+					i);
+			goto Fail;
+		}
+		n = PySequence_Fast_GET_SIZE(fast);
+		if (n != 2) {
+			PyErr_Format(PyExc_ValueError,
+				     "dictionary update sequence element #%d "
+				     "has length %d; 2 is required",
+				     i, n);
+			goto Fail;
+		}
+
+		/* Update/merge with this (key, value) pair. */
+		key = PySequence_Fast_GET_ITEM(fast, 0);
+		value = PySequence_Fast_GET_ITEM(fast, 1);
+		if (override || PyDict_GetItem(d, key) == NULL) {
+			int status = PyDict_SetItem(d, key, value);
+			if (status < 0)
+				goto Fail;
+		}
+		Py_DECREF(fast);
+		Py_DECREF(item);
+	}
+
+	i = 0;
+	goto Return;
+Fail:
+	Py_XDECREF(item);
+	Py_XDECREF(fast);
+	i = -1;
+Return:
+	Py_DECREF(it);
+	return i;
+}
+
+static int
+PyDict_UpdateFromSeq2(PyObject *d, PyObject *seq2)
+{
+	return PyDict_MergeFromSeq2(d, seq2, 1);
+}
 
 int
 PyDict_Update(PyObject *a, PyObject *b)
@@ -1699,23 +1781,20 @@ static int
 dict_init(PyObject *self, PyObject *args, PyObject *kwds)
 {
 	PyObject *arg = NULL;
-	static char *kwlist[] = {"mapping", 0};
+	static char *kwlist[] = {"x", 0};
+	int result = 0;
 
 	if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:dictionary",
 					 kwlist, &arg))
-		return -1;
-	if (arg != NULL) {
-		if (PyDict_Merge(self, arg, 1) < 0) {
-			/* An error like "AttributeError: keys" is too
-			   cryptic in this context. */
-			if (PyErr_ExceptionMatches(PyExc_AttributeError)) {
-				PyErr_SetString(PyExc_TypeError,
-					"argument must be of a mapping type");
-			}
-			return -1;
-		}
+		result = -1;
+
+	else if (arg != NULL) {
+		if (PyObject_HasAttrString(arg, "keys"))
+			result = PyDict_Merge(self, arg, 1);
+		else
+			result = PyDict_MergeFromSeq2(self, arg, 1);
 	}
-	return 0;
+	return result;
 }
 
 static PyObject *
@@ -1725,8 +1804,15 @@ dict_iter(dictobject *dict)
 }
 
 static char dictionary_doc[] =
-"dictionary() -> new empty dictionary\n"
-"dictionary(mapping) -> new dict initialized from mapping's key+value pairs";
+"dictionary() -> new empty dictionary.\n"
+"dictionary(mapping) -> new dict initialized from a mapping object's\n"
+"    (key, value) pairs.\n"
+"dictionary(seq) -> new dict initialized from the 2-element elements of\n"
+"    a sequence; for example, from mapping.items().  seq must be an\n"
+"    iterable object, producing iterable objects each producing exactly\n"
+"    two objects, the first of which is used as a key and the second as\n"
+"    its value.  If a given key is seen more than once, the dict retains\n"
+"    the last value associated with it.";
 
 PyTypeObject PyDict_Type = {
 	PyObject_HEAD_INIT(&PyType_Type)