Another crack at bug #1460340: make random.sample(dict)

work, this time by ugly brute force.
This commit is contained in:
Tim Peters 2006-04-01 00:26:53 +00:00
parent 2a9a6b0e86
commit c17976e983
3 changed files with 37 additions and 5 deletions

View File

@ -285,6 +285,15 @@ class Random(_random.Random):
large population: sample(xrange(10000000), 60) large population: sample(xrange(10000000), 60)
""" """
# XXX Although the documentation says `population` is "a sequence",
# XXX attempts are made to cater to any iterable with a __len__
# XXX method. This has had mixed success. Examples from both
# XXX sides: sets work fine, and should become officially supported;
# XXX dicts are much harder, and have failed in various subtle
# XXX ways across attempts. Support for mapping types should probably
# XXX be dropped (and users should pass mapping.keys() or .values()
# XXX explicitly).
# Sampling without replacement entails tracking either potential # Sampling without replacement entails tracking either potential
# selections (the pool) in a list or previous selections in a set. # selections (the pool) in a list or previous selections in a set.
@ -304,7 +313,9 @@ class Random(_random.Random):
setsize = 21 # size of a small set minus size of an empty list setsize = 21 # size of a small set minus size of an empty list
if k > 5: if k > 5:
setsize += 4 ** _ceil(_log(k * 3, 4)) # table size for big sets setsize += 4 ** _ceil(_log(k * 3, 4)) # table size for big sets
if n <= setsize: # is an n-length list smaller than a k-length set if n <= setsize or hasattr(population, "keys"):
# An n-length list is smaller than a k-length set, or this is a
# mapping type so the other algorithm wouldn't work.
pool = list(population) pool = list(population)
for i in xrange(k): # invariant: non-selected at [0,n-i) for i in xrange(k): # invariant: non-selected at [0,n-i)
j = _int(random() * (n-i)) j = _int(random() * (n-i))
@ -320,10 +331,10 @@ class Random(_random.Random):
j = _int(random() * n) j = _int(random() * n)
selected_add(j) selected_add(j)
result[i] = population[j] result[i] = population[j]
except (TypeError, KeyError): # handle sets and dictionaries except (TypeError, KeyError): # handle (at least) sets
if isinstance(population, list): if isinstance(population, list):
raise raise
return self.sample(list(population), k) return self.sample(tuple(population), k)
return result return result
## -------------------- real-valued distributions ------------------- ## -------------------- real-valued distributions -------------------

View File

@ -93,12 +93,28 @@ class TestBasicOps(unittest.TestCase):
self.gen.sample(set(range(20)), 2) self.gen.sample(set(range(20)), 2)
self.gen.sample(range(20), 2) self.gen.sample(range(20), 2)
self.gen.sample(xrange(20), 2) self.gen.sample(xrange(20), 2)
self.gen.sample(dict.fromkeys('abcdefghijklmnopqrst'), 2)
self.gen.sample(str('abcdefghijklmnopqrst'), 2) self.gen.sample(str('abcdefghijklmnopqrst'), 2)
self.gen.sample(tuple('abcdefghijklmnopqrst'), 2) self.gen.sample(tuple('abcdefghijklmnopqrst'), 2)
def test_sample_on_dicts(self):
self.gen.sample(dict.fromkeys('abcdefghijklmnopqrst'), 2)
# SF bug #1460340 -- random.sample can raise KeyError # SF bug #1460340 -- random.sample can raise KeyError
a = dict.fromkeys(range(10)+range(10,100,2)+range(100,110)) a = dict.fromkeys(range(10)+range(10,100,2)+range(100,110))
self.gen.sample(a,3) self.gen.sample(a, 3)
# A followup to bug #1460340: sampling from a dict could return
# a subset of its keys or of its values, depending on the size of
# the subset requested.
N = 30
d = dict((i, complex(i, i)) for i in xrange(N))
for k in xrange(N+1):
samp = self.gen.sample(d, k)
# Verify that we got ints back (keys); the values are complex.
for x in samp:
self.assert_(type(x) is int)
samp.sort()
self.assertEqual(samp, range(N))
def test_gauss(self): def test_gauss(self):
# Ensure that the seed() method initializes all the hidden state. In # Ensure that the seed() method initializes all the hidden state. In

View File

@ -489,6 +489,11 @@ Extension Modules
Library Library
------- -------
- Bug #1460340: ``random.sample(dict)`` failed in various ways. Dicts
aren't officially supported here, and trying to use them will probably
raise an exception some day. But dicts have been allowed, and "mostly
worked", so support for them won't go away without warning.
- Bug #1445068: getpass.getpass() can now be given an explicit stream - Bug #1445068: getpass.getpass() can now be given an explicit stream
argument to specify where to write the prompt. argument to specify where to write the prompt.