Issue #4615. Document how to use itertools for de-duping.

This commit is contained in:
Raymond Hettinger 2009-01-02 21:20:38 +00:00
parent ad18a49532
commit d84616535c
2 changed files with 60 additions and 0 deletions

View File

@ -687,3 +687,27 @@ which incur interpreter overhead.
return return
indices[i:] = [indices[i] + 1] * (r - i) indices[i:] = [indices[i] + 1] * (r - i)
yield tuple(pool[i] for i in indices) yield tuple(pool[i] for i in indices)
def unique_everseen(iterable, key=None):
"List unique elements, preserving order. Remember all elements ever seen."
# unique_everseen('AAAABBBCCDAABBB') --> A B C D
# unique_everseen('ABBCcAD', str.lower) --> A B C D
seen = set()
seen_add = seen.add
if key is None:
for element in iterable:
if element not in seen:
seen_add(element)
yield element
else:
for element in iterable:
k = key(element)
if k not in seen:
seen_add(k)
yield element
def unique_justseen(iterable, key=None):
"List unique elements, preserving order. Remember only the element just seen."
# unique_justseen('AAAABBBCCDAABBB') --> A B C D A B
# unique_justseen('ABBCcAD', str.lower) --> A B C A D
return imap(next, imap(itemgetter(1), groupby(iterable, key)))

View File

@ -1277,6 +1277,30 @@ Samuele
... indices[i:] = [indices[i] + 1] * (r - i) ... indices[i:] = [indices[i] + 1] * (r - i)
... yield tuple(pool[i] for i in indices) ... yield tuple(pool[i] for i in indices)
>>> def unique_everseen(iterable, key=None):
... "List unique elements, preserving order. Remember all elements ever seen."
... # unique_everseen('AAAABBBCCDAABBB') --> A B C D
... # unique_everseen('ABBCcAD', str.lower) --> A B C D
... seen = set()
... seen_add = seen.add
... if key is None:
... for element in iterable:
... if element not in seen:
... seen_add(element)
... yield element
... else:
... for element in iterable:
... k = key(element)
... if k not in seen:
... seen_add(k)
... yield element
>>> def unique_justseen(iterable, key=None):
... "List unique elements, preserving order. Remember only the element just seen."
... # unique_justseen('AAAABBBCCDAABBB') --> A B C D A B
... # unique_justseen('ABBCcAD', str.lower) --> A B C A D
... return imap(next, imap(itemgetter(1), groupby(iterable, key)))
This is not part of the examples but it tests to make sure the definitions This is not part of the examples but it tests to make sure the definitions
perform as purported. perform as purported.
@ -1339,6 +1363,18 @@ perform as purported.
>>> list(combinations_with_replacement('abc', 2)) >>> list(combinations_with_replacement('abc', 2))
[('a', 'a'), ('a', 'b'), ('a', 'c'), ('b', 'b'), ('b', 'c'), ('c', 'c')] [('a', 'a'), ('a', 'b'), ('a', 'c'), ('b', 'b'), ('b', 'c'), ('c', 'c')]
>>> list(unique_everseen('AAAABBBCCDAABBB'))
['A', 'B', 'C', 'D']
>>> list(unique_everseen('ABBCcAD', str.lower))
['A', 'B', 'C', 'D']
>>> list(unique_justseen('AAAABBBCCDAABBB'))
['A', 'B', 'C', 'D', 'A', 'B']
>>> list(unique_justseen('ABBCcAD', str.lower))
['A', 'B', 'C', 'A', 'D']
""" """
__test__ = {'libreftest' : libreftest} __test__ = {'libreftest' : libreftest}