1 """Classes to represent arbitrary sets (including sets of sets).
3 This module implements sets using dictionaries whose values are
4 ignored. The usual operations (union, intersection, deletion, etc.)
5 are provided as both methods and operators.
7 Important: sets are not sequences! While they support 'x in s',
8 'len(s)', and 'for x in s', none of those operations are unique for
9 sequences; for example, mappings support all three as well. The
10 characteristic operation for sequences is subscripting with small
11 integers: s[i], for i in range(len(s)). Sets don't support
12 subscripting at all. Also, sequences allow multiple occurrences and
13 their elements have a definite order; sets on the other hand don't
14 record multiple occurrences and don't remember the order of element
15 insertion (which is why they don't support s[i]).
17 The following classes are provided:
19 BaseSet -- All the operations common to both mutable and immutable
20 sets. This is an abstract class, not meant to be directly
23 Set -- Mutable sets, subclass of BaseSet; not hashable.
25 ImmutableSet -- Immutable sets, subclass of BaseSet; hashable.
26 An iterable argument is mandatory to create an ImmutableSet.
28 _TemporarilyImmutableSet -- A wrapper around a Set, hashable,
29 giving the same hash value as the immutable set equivalent
30 would have. Do not use this class directly.
32 Only hashable objects can be added to a Set. In particular, you cannot
33 really add a Set as an element to another Set; if you try, what is
34 actually added is an ImmutableSet built from it (it compares equal to
35 the one you tried adding).
37 When you ask if `x in y' where x is a Set and y is a Set or
38 ImmutableSet, x is wrapped into a _TemporarilyImmutableSet z, and
39 what's tested is actually `z in y'.
45 # - Greg V. Wilson wrote the first version, using a different approach
46 # to the mutable/immutable problem, and inheriting from dict.
48 # - Alex Martelli modified Greg's version to implement the current
49 # Set/ImmutableSet approach, and make the data an attribute.
51 # - Guido van Rossum rewrote much of the code, made some API changes,
52 # and cleaned up the docstrings.
54 # - Raymond Hettinger added a number of speedups and other
57 from __future__
import generators
59 from itertools
import ifilter
, ifilterfalse
61 # Code to make the module run under Py2.2
62 def ifilter(predicate
, iterable
):
69 def ifilterfalse(predicate
, iterable
):
79 True, False = (0==0, 0!=0)
81 __all__
= ['BaseSet', 'Set', 'ImmutableSet']
83 class BaseSet(object):
84 """Common base class for mutable and immutable sets."""
91 """This is an abstract class."""
92 # Don't call this from a concrete subclass!
93 if self
.__class
__ is BaseSet
:
94 raise TypeError, ("BaseSet is an abstract class. "
95 "Use Set or ImmutableSet.")
97 # Standard protocols: __len__, __repr__, __str__, __iter__
100 """Return the number of elements of a set."""
101 return len(self
._data
)
104 """Return string representation of a set.
106 This looks like 'Set([<list of elements>])'.
110 # __str__ is the same as __repr__
113 def _repr(self
, sorted=False):
114 elements
= self
._data
.keys()
117 return '%s(%r)' % (self
.__class
__.__name
__, elements
)
120 """Return an iterator over the elements or a set.
122 This is the keys iterator for the underlying dict.
124 return self
._data
.iterkeys()
126 # Three-way comparison is not supported. However, because __eq__ is
127 # tried before __cmp__, if Set x == Set y, x.__eq__(y) returns True and
128 # then cmp(x, y) returns 0 (Python doesn't actually call __cmp__ in this
131 def __cmp__(self
, other
):
132 raise TypeError, "can't compare sets using cmp()"
134 # Equality comparisons using the underlying dicts. Mixed-type comparisons
135 # are allowed here, where Set == z for non-Set z always returns False,
136 # and Set != z always True. This allows expressions like "x in y" to
137 # give the expected result when y is a sequence of mixed types, not
138 # raising a pointless TypeError just because y contains a Set, or x is
139 # a Set and y contain's a non-set ("in" invokes only __eq__).
140 # Subtle: it would be nicer if __eq__ and __ne__ could return
141 # NotImplemented instead of True or False. Then the other comparand
142 # would get a chance to determine the result, and if the other comparand
143 # also returned NotImplemented then it would fall back to object address
144 # comparison (which would always return False for __eq__ and always
145 # True for __ne__). However, that doesn't work, because this type
146 # *also* implements __cmp__: if, e.g., __eq__ returns NotImplemented,
147 # Python tries __cmp__ next, and the __cmp__ here then raises TypeError.
149 def __eq__(self
, other
):
150 if isinstance(other
, BaseSet
):
151 return self
._data
== other
._data
155 def __ne__(self
, other
):
156 if isinstance(other
, BaseSet
):
157 return self
._data
!= other
._data
164 """Return a shallow copy of a set."""
165 result
= self
.__class
__()
166 result
._data
.update(self
._data
)
169 __copy__
= copy
# For the copy module
171 def __deepcopy__(self
, memo
):
172 """Return a deep copy of a set; used by copy module."""
173 # This pre-creates the result and inserts it in the memo
174 # early, in case the deep copy recurses into another reference
175 # to this same set. A set can't be an element of itself, but
176 # it can certainly contain an object that has a reference to
178 from copy
import deepcopy
179 result
= self
.__class
__()
180 memo
[id(self
)] = result
184 data
[deepcopy(elt
, memo
)] = value
187 # Standard set operations: union, intersection, both differences.
188 # Each has an operator version (e.g. __or__, invoked with |) and a
189 # method version (e.g. union).
190 # Subtle: Each pair requires distinct code so that the outcome is
191 # correct when the type of other isn't suitable. For example, if
192 # we did "union = __or__" instead, then Set().union(3) would return
193 # NotImplemented instead of raising TypeError (albeit that *why* it
194 # raises TypeError as-is is also a bit subtle).
196 def __or__(self
, other
):
197 """Return the union of two sets as a new set.
199 (I.e. all elements that are in either set.)
201 if not isinstance(other
, BaseSet
):
202 return NotImplemented
203 return self
.union(other
)
205 def union(self
, other
):
206 """Return the union of two sets as a new set.
208 (I.e. all elements that are in either set.)
210 result
= self
.__class
__(self
)
211 result
._update
(other
)
214 def __and__(self
, other
):
215 """Return the intersection of two sets as a new set.
217 (I.e. all elements that are in both sets.)
219 if not isinstance(other
, BaseSet
):
220 return NotImplemented
221 return self
.intersection(other
)
223 def intersection(self
, other
):
224 """Return the intersection of two sets as a new set.
226 (I.e. all elements that are in both sets.)
228 if not isinstance(other
, BaseSet
):
230 if len(self
) <= len(other
):
231 little
, big
= self
, other
233 little
, big
= other
, self
234 common
= ifilter(big
._data
.has_key
, little
)
235 return self
.__class
__(common
)
237 def __xor__(self
, other
):
238 """Return the symmetric difference of two sets as a new set.
240 (I.e. all elements that are in exactly one of the sets.)
242 if not isinstance(other
, BaseSet
):
243 return NotImplemented
244 return self
.symmetric_difference(other
)
246 def symmetric_difference(self
, other
):
247 """Return the symmetric difference of two sets as a new set.
249 (I.e. all elements that are in exactly one of the sets.)
251 result
= self
.__class
__()
254 selfdata
= self
._data
256 otherdata
= other
._data
257 except AttributeError:
258 otherdata
= Set(other
)._data
259 for elt
in ifilterfalse(otherdata
.has_key
, selfdata
):
261 for elt
in ifilterfalse(selfdata
.has_key
, otherdata
):
265 def __sub__(self
, other
):
266 """Return the difference of two sets as a new Set.
268 (I.e. all elements that are in this set and not in the other.)
270 if not isinstance(other
, BaseSet
):
271 return NotImplemented
272 return self
.difference(other
)
274 def difference(self
, other
):
275 """Return the difference of two sets as a new Set.
277 (I.e. all elements that are in this set and not in the other.)
279 result
= self
.__class
__()
282 otherdata
= other
._data
283 except AttributeError:
284 otherdata
= Set(other
)._data
286 for elt
in ifilterfalse(otherdata
.has_key
, self
):
292 def __contains__(self
, element
):
293 """Report whether an element is a member of a set.
295 (Called in response to the expression `element in self'.)
298 return element
in self
._data
300 transform
= getattr(element
, "__as_temporarily_immutable__", None)
301 if transform
is None:
302 raise # re-raise the TypeError exception we caught
303 return transform() in self
._data
305 # Subset and superset test
307 def issubset(self
, other
):
308 """Report whether another set contains this set."""
309 self
._binary
_sanity
_check
(other
)
310 if len(self
) > len(other
): # Fast check for obvious cases
312 for elt
in ifilterfalse(other
._data
.has_key
, self
):
316 def issuperset(self
, other
):
317 """Report whether this set contains another set."""
318 self
._binary
_sanity
_check
(other
)
319 if len(self
) < len(other
): # Fast check for obvious cases
321 for elt
in ifilterfalse(self
._data
.has_key
, other
):
325 # Inequality comparisons using the is-subset relation.
329 def __lt__(self
, other
):
330 self
._binary
_sanity
_check
(other
)
331 return len(self
) < len(other
) and self
.issubset(other
)
333 def __gt__(self
, other
):
334 self
._binary
_sanity
_check
(other
)
335 return len(self
) > len(other
) and self
.issuperset(other
)
339 def _binary_sanity_check(self
, other
):
340 # Check that the other argument to a binary operation is also
341 # a set, raising a TypeError otherwise.
342 if not isinstance(other
, BaseSet
):
343 raise TypeError, "Binary operation only permitted between sets"
345 def _compute_hash(self
):
346 # Calculate hash code for a set by xor'ing the hash codes of
347 # the elements. This ensures that the hash code does not depend
348 # on the order in which elements are added to the set. This is
349 # not called __hash__ because a BaseSet should not be hashable;
350 # only an ImmutableSet is hashable.
356 def _update(self
, iterable
):
357 # The main loop for update() and the subclass __init__() methods.
360 # Use the fast update() method when a dictionary is available.
361 if isinstance(iterable
, BaseSet
):
362 data
.update(iterable
._data
)
367 if type(iterable
) in (list, tuple, xrange):
368 # Optimized: we know that __iter__() and next() can't
369 # raise TypeError, so we can move 'try:' out of the loop.
374 data
[element
] = value
377 transform
= getattr(element
, "__as_immutable__", None)
378 if transform
is None:
379 raise # re-raise the TypeError exception we caught
380 data
[transform()] = value
382 # Safe: only catch TypeError where intended
383 for element
in iterable
:
385 data
[element
] = value
387 transform
= getattr(element
, "__as_immutable__", None)
388 if transform
is None:
389 raise # re-raise the TypeError exception we caught
390 data
[transform()] = value
393 class ImmutableSet(BaseSet
):
394 """Immutable set class."""
396 __slots__
= ['_hashcode']
400 def __init__(self
, iterable
=None):
401 """Construct an immutable set from an optional iterable."""
402 self
._hashcode
= None
404 if iterable
is not None:
405 self
._update
(iterable
)
408 if self
._hashcode
is None:
409 self
._hashcode
= self
._compute
_hash
()
410 return self
._hashcode
412 def __getstate__(self
):
413 return self
._data
, self
._hashcode
415 def __setstate__(self
, state
):
416 self
._data
, self
._hashcode
= state
419 """ Mutable set class."""
423 # BaseSet + operations requiring mutability; no hashing
425 def __init__(self
, iterable
=None):
426 """Construct a set from an optional iterable."""
428 if iterable
is not None:
429 self
._update
(iterable
)
431 def __getstate__(self
):
432 # getstate's results are ignored if it is not
435 def __setstate__(self
, data
):
439 """A Set cannot be hashed."""
440 # We inherit object.__hash__, so we must deny this explicitly
441 raise TypeError, "Can't hash a Set, only an ImmutableSet."
443 # In-place union, intersection, differences.
444 # Subtle: The xyz_update() functions deliberately return None,
445 # as do all mutating operations on built-in container types.
446 # The __xyz__ spellings have to return self, though.
448 def __ior__(self
, other
):
449 """Update a set with the union of itself and another."""
450 self
._binary
_sanity
_check
(other
)
451 self
._data
.update(other
._data
)
454 def union_update(self
, other
):
455 """Update a set with the union of itself and another."""
458 def __iand__(self
, other
):
459 """Update a set with the intersection of itself and another."""
460 self
._binary
_sanity
_check
(other
)
461 self
._data
= (self
& other
)._data
464 def intersection_update(self
, other
):
465 """Update a set with the intersection of itself and another."""
466 if isinstance(other
, BaseSet
):
469 self
._data
= (self
.intersection(other
))._data
471 def __ixor__(self
, other
):
472 """Update a set with the symmetric difference of itself and another."""
473 self
._binary
_sanity
_check
(other
)
474 self
.symmetric_difference_update(other
)
477 def symmetric_difference_update(self
, other
):
478 """Update a set with the symmetric difference of itself and another."""
481 if not isinstance(other
, BaseSet
):
491 def __isub__(self
, other
):
492 """Remove all elements of another set from this set."""
493 self
._binary
_sanity
_check
(other
)
494 self
.difference_update(other
)
497 def difference_update(self
, other
):
498 """Remove all elements of another set from this set."""
500 if not isinstance(other
, BaseSet
):
504 for elt
in ifilter(data
.has_key
, other
):
507 # Python dict-like mass mutations: update, clear
509 def update(self
, iterable
):
510 """Add all values from an iterable (such as a list or file)."""
511 self
._update
(iterable
)
514 """Remove all elements from this set."""
517 # Single-element mutations: add, remove, discard
519 def add(self
, element
):
520 """Add an element to a set.
522 This has no effect if the element is already present.
525 self
._data
[element
] = True
527 transform
= getattr(element
, "__as_immutable__", None)
528 if transform
is None:
529 raise # re-raise the TypeError exception we caught
530 self
._data
[transform()] = True
532 def remove(self
, element
):
533 """Remove an element from a set; it must be a member.
535 If the element is not a member, raise a KeyError.
538 del self
._data
[element
]
540 transform
= getattr(element
, "__as_temporarily_immutable__", None)
541 if transform
is None:
542 raise # re-raise the TypeError exception we caught
543 del self
._data
[transform()]
545 def discard(self
, element
):
546 """Remove an element from a set if it is a member.
548 If the element is not a member, do nothing.
556 """Remove and return an arbitrary set element."""
557 return self
._data
.popitem()[0]
559 def __as_immutable__(self
):
560 # Return a copy of self as an immutable set
561 return ImmutableSet(self
)
563 def __as_temporarily_immutable__(self
):
564 # Return self wrapped in a temporarily immutable set
565 return _TemporarilyImmutableSet(self
)
568 class _TemporarilyImmutableSet(BaseSet
):
569 # Wrap a mutable set as if it was temporarily immutable.
570 # This only supplies hashing and equality comparisons.
572 def __init__(self
, set):
574 self
._data
= set._data
# Needed by ImmutableSet.__eq__()
577 return self
._set
._compute
_hash
()