From 1c3a121bb04b716b9387f6eed7ee6120ed140f9d Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Wed, 12 Oct 2016 01:42:10 -0400 Subject: [PATCH] Issue #18844: Fix-up examples for random.choices(). Remove over-specified test. --- Doc/library/random.rst | 58 +++++++++++++++++++---------------------- Lib/test/test_random.py | 4 --- 2 files changed, 27 insertions(+), 35 deletions(-) diff --git a/Doc/library/random.rst b/Doc/library/random.rst index 9cb145e36c1..a47ed9ce3dd 100644 --- a/Doc/library/random.rst +++ b/Doc/library/random.rst @@ -131,11 +131,12 @@ Functions for sequences: If a *weights* sequence is specified, selections are made according to the relative weights. Alternatively, if a *cum_weights* sequence is given, the - selections are made according to the cumulative weights. For example, the - relative weights ``[10, 5, 30, 5]`` are equivalent to the cumulative - weights ``[10, 15, 45, 50]``. Internally, the relative weights are - converted to cumulative weights before making selections, so supplying the - cumulative weights saves work. + selections are made according to the cumulative weights (perhaps computed + using :func:`itertools.accumulate`). For example, the relative weights + ``[10, 5, 30, 5]`` are equivalent to the cumulative weights + ``[10, 15, 45, 50]``. Internally, the relative weights are converted to + cumulative weights before making selections, so supplying the cumulative + weights saves work. If neither *weights* nor *cum_weights* are specified, selections are made with equal probability. If a weights sequence is supplied, it must be @@ -146,6 +147,9 @@ Functions for sequences: with the :class:`float` values returned by :func:`random` (that includes integers, floats, and fractions but excludes decimals). + .. versionadded:: 3.6 + + .. function:: shuffle(x[, random]) Shuffle the sequence *x* in place. The optional argument *random* is a @@ -335,36 +339,28 @@ Basic usage:: >>> random.choice('abcdefghij') # Single random element 'c' - >>> items = [1, 2, 3, 4, 5, 6, 7] - >>> random.shuffle(items) - >>> items - [7, 3, 2, 5, 6, 4, 1] + >>> deck = ['jack', 'queen', 'king', 'ace'] + >>> shuffle(deck) + >>> deck + ['king', 'queen', 'ace', 'jack'] >>> random.sample([1, 2, 3, 4, 5], 3) # Three samples without replacement [4, 1, 5] -A common task is to make a :func:`random.choice` with weighted probabilities. + >>> # Six weighted samples with replacement + >>> choices(['red', 'black', 'green'], [18, 18, 2], k=6) + ['red', 'green', 'black', 'black', 'red', 'black'] -If the weights are small integer ratios, a simple technique is to build a sample -population with repeats:: +Example of `statistical bootstrapping +`_ using resampling +with replacement to estimate a confidence interval for the mean of a small +sample of size five:: - >>> weighted_choices = [('Red', 3), ('Blue', 2), ('Yellow', 1), ('Green', 4)] - >>> population = [val for val, cnt in weighted_choices for i in range(cnt)] - >>> population - ['Red', 'Red', 'Red', 'Blue', 'Blue', 'Yellow', 'Green', 'Green', 'Green', 'Green'] + # http://statistics.about.com/od/Applications/a/Example-Of-Bootstrapping.htm + from statistics import mean + from random import choices - >>> random.choice(population) - 'Green' - -A more general approach is to arrange the weights in a cumulative distribution -with :func:`itertools.accumulate`, and then locate the random value with -:func:`bisect.bisect`:: - - >>> choices, weights = zip(*weighted_choices) - >>> cumdist = list(itertools.accumulate(weights)) - >>> cumdist # [3, 3+2, 3+2+1, 3+2+1+4] - [3, 5, 6, 10] - - >>> x = random.random() * cumdist[-1] - >>> choices[bisect.bisect(cumdist, x)] - 'Blue' + data = 1, 2, 4, 4, 10 + means = sorted(mean(choices(data, k=5)) for i in range(20)) + print('The sample mean of {:.1f} has a 90% confidence interval ' + 'from {:.1f} to {:.1f}'.format(mean(data), means[1], means[-2])) diff --git a/Lib/test/test_random.py b/Lib/test/test_random.py index 0dfc290824a..840f3e7ce81 100644 --- a/Lib/test/test_random.py +++ b/Lib/test/test_random.py @@ -178,8 +178,6 @@ class TestBasicOps: self.assertTrue(set(choices(data, weights=None, k=5)) <= set(data)) with self.assertRaises(ValueError): choices(data, [1,2], k=5) # len(weights) != len(population) - with self.assertRaises(IndexError): - choices(data, [0]*4, k=5) # weights sum to zero with self.assertRaises(TypeError): choices(data, 10, k=5) # non-iterable weights with self.assertRaises(TypeError): @@ -194,8 +192,6 @@ class TestBasicOps: with self.assertRaises(ValueError): choices(data, cum_weights=[1,2], k=5) # len(weights) != len(population) - with self.assertRaises(IndexError): - choices(data, cum_weights=[0]*4, k=5) # cum_weights sum to zero with self.assertRaises(TypeError): choices(data, cum_weights=10, k=5) # non-iterable cum_weights with self.assertRaises(TypeError):