Issue #4615. Document how to use itertools for de-duping.

This commit is contained in:
Raymond Hettinger 2009-01-02 21:20:38 +00:00
parent ad18a49532
commit d84616535c
2 changed files with 60 additions and 0 deletions

View File

@ -687,3 +687,27 @@ which incur interpreter overhead.
return
indices[i:] = [indices[i] + 1] * (r - i)
yield tuple(pool[i] for i in indices)
def unique_everseen(iterable, key=None):
"List unique elements, preserving order. Remember all elements ever seen."
# unique_everseen('AAAABBBCCDAABBB') --> A B C D
# unique_everseen('ABBCcAD', str.lower) --> A B C D
seen = set()
seen_add = seen.add
if key is None:
for element in iterable:
if element not in seen:
seen_add(element)
yield element
else:
for element in iterable:
k = key(element)
if k not in seen:
seen_add(k)
yield element
def unique_justseen(iterable, key=None):
"List unique elements, preserving order. Remember only the element just seen."
# unique_justseen('AAAABBBCCDAABBB') --> A B C D A B
# unique_justseen('ABBCcAD', str.lower) --> A B C A D
return imap(next, imap(itemgetter(1), groupby(iterable, key)))

View File

@ -1277,6 +1277,30 @@ Samuele
... indices[i:] = [indices[i] + 1] * (r - i)
... yield tuple(pool[i] for i in indices)
>>> def unique_everseen(iterable, key=None):
... "List unique elements, preserving order. Remember all elements ever seen."
... # unique_everseen('AAAABBBCCDAABBB') --> A B C D
... # unique_everseen('ABBCcAD', str.lower) --> A B C D
... seen = set()
... seen_add = seen.add
... if key is None:
... for element in iterable:
... if element not in seen:
... seen_add(element)
... yield element
... else:
... for element in iterable:
... k = key(element)
... if k not in seen:
... seen_add(k)
... yield element
>>> def unique_justseen(iterable, key=None):
... "List unique elements, preserving order. Remember only the element just seen."
... # unique_justseen('AAAABBBCCDAABBB') --> A B C D A B
... # unique_justseen('ABBCcAD', str.lower) --> A B C A D
... return imap(next, imap(itemgetter(1), groupby(iterable, key)))
This is not part of the examples but it tests to make sure the definitions
perform as purported.
@ -1339,6 +1363,18 @@ perform as purported.
>>> list(combinations_with_replacement('abc', 2))
[('a', 'a'), ('a', 'b'), ('a', 'c'), ('b', 'b'), ('b', 'c'), ('c', 'c')]
>>> list(unique_everseen('AAAABBBCCDAABBB'))
['A', 'B', 'C', 'D']
>>> list(unique_everseen('ABBCcAD', str.lower))
['A', 'B', 'C', 'D']
>>> list(unique_justseen('AAAABBBCCDAABBB'))
['A', 'B', 'C', 'D', 'A', 'B']
>>> list(unique_justseen('ABBCcAD', str.lower))
['A', 'B', 'C', 'A', 'D']
"""
__test__ = {'libreftest' : libreftest}