Change to flush and close logic to fix #1760556.
[python.git] / Objects / setobject.c
blob025a79b6dcba8e6a81ccc1628f3ba95cc491a810
2 /* set object implementation
3 Written and maintained by Raymond D. Hettinger <python@rcn.com>
4 Derived from Lib/sets.py and Objects/dictobject.c.
6 Copyright (c) 2003-2007 Python Software Foundation.
7 All rights reserved.
8 */
10 #include "Python.h"
11 #include "structmember.h"
13 /* Set a key error with the specified argument, wrapping it in a
14 * tuple automatically so that tuple keys are not unpacked as the
15 * exception arguments. */
16 static void
17 set_key_error(PyObject *arg)
19 PyObject *tup;
20 tup = PyTuple_Pack(1, arg);
21 if (!tup)
22 return; /* caller will expect error to be set anyway */
23 PyErr_SetObject(PyExc_KeyError, tup);
24 Py_DECREF(tup);
27 /* This must be >= 1. */
28 #define PERTURB_SHIFT 5
30 /* Object used as dummy key to fill deleted entries */
31 static PyObject *dummy = NULL; /* Initialized by first call to make_new_set() */
33 #ifdef Py_REF_DEBUG
34 PyObject *
35 _PySet_Dummy(void)
37 return dummy;
39 #endif
41 #define INIT_NONZERO_SET_SLOTS(so) do { \
42 (so)->table = (so)->smalltable; \
43 (so)->mask = PySet_MINSIZE - 1; \
44 (so)->hash = -1; \
45 } while(0)
47 #define EMPTY_TO_MINSIZE(so) do { \
48 memset((so)->smalltable, 0, sizeof((so)->smalltable)); \
49 (so)->used = (so)->fill = 0; \
50 INIT_NONZERO_SET_SLOTS(so); \
51 } while(0)
53 /* Reuse scheme to save calls to malloc, free, and memset */
54 #define MAXFREESETS 80
55 static PySetObject *free_sets[MAXFREESETS];
56 static int num_free_sets = 0;
59 The basic lookup function used by all operations.
60 This is based on Algorithm D from Knuth Vol. 3, Sec. 6.4.
61 Open addressing is preferred over chaining since the link overhead for
62 chaining would be substantial (100% with typical malloc overhead).
64 The initial probe index is computed as hash mod the table size. Subsequent
65 probe indices are computed as explained in Objects/dictobject.c.
67 All arithmetic on hash should ignore overflow.
69 Unlike the dictionary implementation, the lookkey functions can return
70 NULL if the rich comparison returns an error.
73 static setentry *
74 set_lookkey(PySetObject *so, PyObject *key, register long hash)
76 register Py_ssize_t i;
77 register size_t perturb;
78 register setentry *freeslot;
79 register size_t mask = so->mask;
80 setentry *table = so->table;
81 register setentry *entry;
82 register int cmp;
83 PyObject *startkey;
85 i = hash & mask;
86 entry = &table[i];
87 if (entry->key == NULL || entry->key == key)
88 return entry;
90 if (entry->key == dummy)
91 freeslot = entry;
92 else {
93 if (entry->hash == hash) {
94 startkey = entry->key;
95 cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
96 if (cmp < 0)
97 return NULL;
98 if (table == so->table && entry->key == startkey) {
99 if (cmp > 0)
100 return entry;
102 else {
103 /* The compare did major nasty stuff to the
104 * set: start over.
106 return set_lookkey(so, key, hash);
109 freeslot = NULL;
112 /* In the loop, key == dummy is by far (factor of 100s) the
113 least likely outcome, so test for that last. */
114 for (perturb = hash; ; perturb >>= PERTURB_SHIFT) {
115 i = (i << 2) + i + perturb + 1;
116 entry = &table[i & mask];
117 if (entry->key == NULL) {
118 if (freeslot != NULL)
119 entry = freeslot;
120 break;
122 if (entry->key == key)
123 break;
124 if (entry->hash == hash && entry->key != dummy) {
125 startkey = entry->key;
126 cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
127 if (cmp < 0)
128 return NULL;
129 if (table == so->table && entry->key == startkey) {
130 if (cmp > 0)
131 break;
133 else {
134 /* The compare did major nasty stuff to the
135 * set: start over.
137 return set_lookkey(so, key, hash);
140 else if (entry->key == dummy && freeslot == NULL)
141 freeslot = entry;
143 return entry;
147 * Hacked up version of set_lookkey which can assume keys are always strings;
148 * This means we can always use _PyString_Eq directly and not have to check to
149 * see if the comparison altered the table.
151 static setentry *
152 set_lookkey_string(PySetObject *so, PyObject *key, register long hash)
154 register Py_ssize_t i;
155 register size_t perturb;
156 register setentry *freeslot;
157 register size_t mask = so->mask;
158 setentry *table = so->table;
159 register setentry *entry;
161 /* Make sure this function doesn't have to handle non-string keys,
162 including subclasses of str; e.g., one reason to subclass
163 strings is to override __eq__, and for speed we don't cater to
164 that here. */
165 if (!PyString_CheckExact(key)) {
166 so->lookup = set_lookkey;
167 return set_lookkey(so, key, hash);
169 i = hash & mask;
170 entry = &table[i];
171 if (entry->key == NULL || entry->key == key)
172 return entry;
173 if (entry->key == dummy)
174 freeslot = entry;
175 else {
176 if (entry->hash == hash && _PyString_Eq(entry->key, key))
177 return entry;
178 freeslot = NULL;
181 /* In the loop, key == dummy is by far (factor of 100s) the
182 least likely outcome, so test for that last. */
183 for (perturb = hash; ; perturb >>= PERTURB_SHIFT) {
184 i = (i << 2) + i + perturb + 1;
185 entry = &table[i & mask];
186 if (entry->key == NULL)
187 return freeslot == NULL ? entry : freeslot;
188 if (entry->key == key
189 || (entry->hash == hash
190 && entry->key != dummy
191 && _PyString_Eq(entry->key, key)))
192 return entry;
193 if (entry->key == dummy && freeslot == NULL)
194 freeslot = entry;
196 assert(0); /* NOT REACHED */
197 return 0;
201 Internal routine to insert a new key into the table.
202 Used by the public insert routine.
203 Eats a reference to key.
205 static int
206 set_insert_key(register PySetObject *so, PyObject *key, long hash)
208 register setentry *entry;
209 typedef setentry *(*lookupfunc)(PySetObject *, PyObject *, long);
211 assert(so->lookup != NULL);
212 entry = so->lookup(so, key, hash);
213 if (entry == NULL)
214 return -1;
215 if (entry->key == NULL) {
216 /* UNUSED */
217 so->fill++;
218 entry->key = key;
219 entry->hash = hash;
220 so->used++;
221 } else if (entry->key == dummy) {
222 /* DUMMY */
223 entry->key = key;
224 entry->hash = hash;
225 so->used++;
226 Py_DECREF(dummy);
227 } else {
228 /* ACTIVE */
229 Py_DECREF(key);
231 return 0;
235 Internal routine used by set_table_resize() to insert an item which is
236 known to be absent from the set. This routine also assumes that
237 the set contains no deleted entries. Besides the performance benefit,
238 using set_insert_clean() in set_table_resize() is dangerous (SF bug #1456209).
239 Note that no refcounts are changed by this routine; if needed, the caller
240 is responsible for incref'ing `key`.
242 static void
243 set_insert_clean(register PySetObject *so, PyObject *key, long hash)
245 register size_t i;
246 register size_t perturb;
247 register size_t mask = (size_t)so->mask;
248 setentry *table = so->table;
249 register setentry *entry;
251 i = hash & mask;
252 entry = &table[i];
253 for (perturb = hash; entry->key != NULL; perturb >>= PERTURB_SHIFT) {
254 i = (i << 2) + i + perturb + 1;
255 entry = &table[i & mask];
257 so->fill++;
258 entry->key = key;
259 entry->hash = hash;
260 so->used++;
264 Restructure the table by allocating a new table and reinserting all
265 keys again. When entries have been deleted, the new table may
266 actually be smaller than the old one.
268 static int
269 set_table_resize(PySetObject *so, Py_ssize_t minused)
271 Py_ssize_t newsize;
272 setentry *oldtable, *newtable, *entry;
273 Py_ssize_t i;
274 int is_oldtable_malloced;
275 setentry small_copy[PySet_MINSIZE];
277 assert(minused >= 0);
279 /* Find the smallest table size > minused. */
280 for (newsize = PySet_MINSIZE;
281 newsize <= minused && newsize > 0;
282 newsize <<= 1)
284 if (newsize <= 0) {
285 PyErr_NoMemory();
286 return -1;
289 /* Get space for a new table. */
290 oldtable = so->table;
291 assert(oldtable != NULL);
292 is_oldtable_malloced = oldtable != so->smalltable;
294 if (newsize == PySet_MINSIZE) {
295 /* A large table is shrinking, or we can't get any smaller. */
296 newtable = so->smalltable;
297 if (newtable == oldtable) {
298 if (so->fill == so->used) {
299 /* No dummies, so no point doing anything. */
300 return 0;
302 /* We're not going to resize it, but rebuild the
303 table anyway to purge old dummy entries.
304 Subtle: This is *necessary* if fill==size,
305 as set_lookkey needs at least one virgin slot to
306 terminate failing searches. If fill < size, it's
307 merely desirable, as dummies slow searches. */
308 assert(so->fill > so->used);
309 memcpy(small_copy, oldtable, sizeof(small_copy));
310 oldtable = small_copy;
313 else {
314 newtable = PyMem_NEW(setentry, newsize);
315 if (newtable == NULL) {
316 PyErr_NoMemory();
317 return -1;
321 /* Make the set empty, using the new table. */
322 assert(newtable != oldtable);
323 so->table = newtable;
324 so->mask = newsize - 1;
325 memset(newtable, 0, sizeof(setentry) * newsize);
326 so->used = 0;
327 i = so->fill;
328 so->fill = 0;
330 /* Copy the data over; this is refcount-neutral for active entries;
331 dummy entries aren't copied over, of course */
332 for (entry = oldtable; i > 0; entry++) {
333 if (entry->key == NULL) {
334 /* UNUSED */
336 } else if (entry->key == dummy) {
337 /* DUMMY */
338 --i;
339 assert(entry->key == dummy);
340 Py_DECREF(entry->key);
341 } else {
342 /* ACTIVE */
343 --i;
344 set_insert_clean(so, entry->key, entry->hash);
348 if (is_oldtable_malloced)
349 PyMem_DEL(oldtable);
350 return 0;
353 /* CAUTION: set_add_key/entry() must guarantee it won't resize the table */
355 static int
356 set_add_entry(register PySetObject *so, setentry *entry)
358 register Py_ssize_t n_used;
360 assert(so->fill <= so->mask); /* at least one empty slot */
361 n_used = so->used;
362 Py_INCREF(entry->key);
363 if (set_insert_key(so, entry->key, entry->hash) == -1) {
364 Py_DECREF(entry->key);
365 return -1;
367 if (!(so->used > n_used && so->fill*3 >= (so->mask+1)*2))
368 return 0;
369 return set_table_resize(so, so->used>50000 ? so->used*2 : so->used*4);
372 static int
373 set_add_key(register PySetObject *so, PyObject *key)
375 register long hash;
376 register Py_ssize_t n_used;
378 if (!PyString_CheckExact(key) ||
379 (hash = ((PyStringObject *) key)->ob_shash) == -1) {
380 hash = PyObject_Hash(key);
381 if (hash == -1)
382 return -1;
384 assert(so->fill <= so->mask); /* at least one empty slot */
385 n_used = so->used;
386 Py_INCREF(key);
387 if (set_insert_key(so, key, hash) == -1) {
388 Py_DECREF(key);
389 return -1;
391 if (!(so->used > n_used && so->fill*3 >= (so->mask+1)*2))
392 return 0;
393 return set_table_resize(so, so->used>50000 ? so->used*2 : so->used*4);
396 #define DISCARD_NOTFOUND 0
397 #define DISCARD_FOUND 1
399 static int
400 set_discard_entry(PySetObject *so, setentry *oldentry)
401 { register setentry *entry;
402 PyObject *old_key;
404 entry = (so->lookup)(so, oldentry->key, oldentry->hash);
405 if (entry == NULL)
406 return -1;
407 if (entry->key == NULL || entry->key == dummy)
408 return DISCARD_NOTFOUND;
409 old_key = entry->key;
410 Py_INCREF(dummy);
411 entry->key = dummy;
412 so->used--;
413 Py_DECREF(old_key);
414 return DISCARD_FOUND;
417 static int
418 set_discard_key(PySetObject *so, PyObject *key)
420 register long hash;
421 register setentry *entry;
422 PyObject *old_key;
424 assert (PyAnySet_Check(so));
425 if (!PyString_CheckExact(key) ||
426 (hash = ((PyStringObject *) key)->ob_shash) == -1) {
427 hash = PyObject_Hash(key);
428 if (hash == -1)
429 return -1;
431 entry = (so->lookup)(so, key, hash);
432 if (entry == NULL)
433 return -1;
434 if (entry->key == NULL || entry->key == dummy)
435 return DISCARD_NOTFOUND;
436 old_key = entry->key;
437 Py_INCREF(dummy);
438 entry->key = dummy;
439 so->used--;
440 Py_DECREF(old_key);
441 return DISCARD_FOUND;
444 static int
445 set_clear_internal(PySetObject *so)
447 setentry *entry, *table;
448 int table_is_malloced;
449 Py_ssize_t fill;
450 setentry small_copy[PySet_MINSIZE];
451 #ifdef Py_DEBUG
452 Py_ssize_t i, n;
453 assert (PyAnySet_Check(so));
455 n = so->mask + 1;
456 i = 0;
457 #endif
459 table = so->table;
460 assert(table != NULL);
461 table_is_malloced = table != so->smalltable;
463 /* This is delicate. During the process of clearing the set,
464 * decrefs can cause the set to mutate. To avoid fatal confusion
465 * (voice of experience), we have to make the set empty before
466 * clearing the slots, and never refer to anything via so->ref while
467 * clearing.
469 fill = so->fill;
470 if (table_is_malloced)
471 EMPTY_TO_MINSIZE(so);
473 else if (fill > 0) {
474 /* It's a small table with something that needs to be cleared.
475 * Afraid the only safe way is to copy the set entries into
476 * another small table first.
478 memcpy(small_copy, table, sizeof(small_copy));
479 table = small_copy;
480 EMPTY_TO_MINSIZE(so);
482 /* else it's a small table that's already empty */
484 /* Now we can finally clear things. If C had refcounts, we could
485 * assert that the refcount on table is 1 now, i.e. that this function
486 * has unique access to it, so decref side-effects can't alter it.
488 for (entry = table; fill > 0; ++entry) {
489 #ifdef Py_DEBUG
490 assert(i < n);
491 ++i;
492 #endif
493 if (entry->key) {
494 --fill;
495 Py_DECREF(entry->key);
497 #ifdef Py_DEBUG
498 else
499 assert(entry->key == NULL);
500 #endif
503 if (table_is_malloced)
504 PyMem_DEL(table);
505 return 0;
509 * Iterate over a set table. Use like so:
511 * Py_ssize_t pos;
512 * setentry *entry;
513 * pos = 0; # important! pos should not otherwise be changed by you
514 * while (set_next(yourset, &pos, &entry)) {
515 * Refer to borrowed reference in entry->key.
518 * CAUTION: In general, it isn't safe to use set_next in a loop that
519 * mutates the table.
521 static int
522 set_next(PySetObject *so, Py_ssize_t *pos_ptr, setentry **entry_ptr)
524 Py_ssize_t i;
525 Py_ssize_t mask;
526 register setentry *table;
528 assert (PyAnySet_Check(so));
529 i = *pos_ptr;
530 assert(i >= 0);
531 table = so->table;
532 mask = so->mask;
533 while (i <= mask && (table[i].key == NULL || table[i].key == dummy))
534 i++;
535 *pos_ptr = i+1;
536 if (i > mask)
537 return 0;
538 assert(table[i].key != NULL);
539 *entry_ptr = &table[i];
540 return 1;
543 static void
544 set_dealloc(PySetObject *so)
546 register setentry *entry;
547 Py_ssize_t fill = so->fill;
548 PyObject_GC_UnTrack(so);
549 Py_TRASHCAN_SAFE_BEGIN(so)
550 if (so->weakreflist != NULL)
551 PyObject_ClearWeakRefs((PyObject *) so);
553 for (entry = so->table; fill > 0; entry++) {
554 if (entry->key) {
555 --fill;
556 Py_DECREF(entry->key);
559 if (so->table != so->smalltable)
560 PyMem_DEL(so->table);
561 if (num_free_sets < MAXFREESETS && PyAnySet_CheckExact(so))
562 free_sets[num_free_sets++] = so;
563 else
564 Py_Type(so)->tp_free(so);
565 Py_TRASHCAN_SAFE_END(so)
568 static int
569 set_tp_print(PySetObject *so, FILE *fp, int flags)
571 setentry *entry;
572 Py_ssize_t pos=0;
573 char *emit = ""; /* No separator emitted on first pass */
574 char *separator = ", ";
575 int status = Py_ReprEnter((PyObject*)so);
577 if (status != 0) {
578 if (status < 0)
579 return status;
580 Py_BEGIN_ALLOW_THREADS
581 fprintf(fp, "%s(...)", so->ob_type->tp_name);
582 Py_END_ALLOW_THREADS
583 return 0;
586 Py_BEGIN_ALLOW_THREADS
587 fprintf(fp, "%s([", so->ob_type->tp_name);
588 Py_END_ALLOW_THREADS
589 while (set_next(so, &pos, &entry)) {
590 Py_BEGIN_ALLOW_THREADS
591 fputs(emit, fp);
592 Py_END_ALLOW_THREADS
593 emit = separator;
594 if (PyObject_Print(entry->key, fp, 0) != 0) {
595 Py_ReprLeave((PyObject*)so);
596 return -1;
599 Py_BEGIN_ALLOW_THREADS
600 fputs("])", fp);
601 Py_END_ALLOW_THREADS
602 Py_ReprLeave((PyObject*)so);
603 return 0;
606 static PyObject *
607 set_repr(PySetObject *so)
609 PyObject *keys, *result=NULL, *listrepr;
610 int status = Py_ReprEnter((PyObject*)so);
612 if (status != 0) {
613 if (status < 0)
614 return NULL;
615 return PyString_FromFormat("%s(...)", so->ob_type->tp_name);
618 keys = PySequence_List((PyObject *)so);
619 if (keys == NULL)
620 goto done;
621 listrepr = PyObject_Repr(keys);
622 Py_DECREF(keys);
623 if (listrepr == NULL)
624 goto done;
626 result = PyString_FromFormat("%s(%s)", so->ob_type->tp_name,
627 PyString_AS_STRING(listrepr));
628 Py_DECREF(listrepr);
629 done:
630 Py_ReprLeave((PyObject*)so);
631 return result;
634 static Py_ssize_t
635 set_len(PyObject *so)
637 return ((PySetObject *)so)->used;
640 static int
641 set_merge(PySetObject *so, PyObject *otherset)
643 PySetObject *other;
644 register Py_ssize_t i;
645 register setentry *entry;
647 assert (PyAnySet_Check(so));
648 assert (PyAnySet_Check(otherset));
650 other = (PySetObject*)otherset;
651 if (other == so || other->used == 0)
652 /* a.update(a) or a.update({}); nothing to do */
653 return 0;
654 /* Do one big resize at the start, rather than
655 * incrementally resizing as we insert new keys. Expect
656 * that there will be no (or few) overlapping keys.
658 if ((so->fill + other->used)*3 >= (so->mask+1)*2) {
659 if (set_table_resize(so, (so->used + other->used)*2) != 0)
660 return -1;
662 for (i = 0; i <= other->mask; i++) {
663 entry = &other->table[i];
664 if (entry->key != NULL &&
665 entry->key != dummy) {
666 Py_INCREF(entry->key);
667 if (set_insert_key(so, entry->key, entry->hash) == -1) {
668 Py_DECREF(entry->key);
669 return -1;
673 return 0;
676 static int
677 set_contains_key(PySetObject *so, PyObject *key)
679 long hash;
680 setentry *entry;
682 if (!PyString_CheckExact(key) ||
683 (hash = ((PyStringObject *) key)->ob_shash) == -1) {
684 hash = PyObject_Hash(key);
685 if (hash == -1)
686 return -1;
688 entry = (so->lookup)(so, key, hash);
689 if (entry == NULL)
690 return -1;
691 key = entry->key;
692 return key != NULL && key != dummy;
695 static int
696 set_contains_entry(PySetObject *so, setentry *entry)
698 PyObject *key;
699 setentry *lu_entry;
701 lu_entry = (so->lookup)(so, entry->key, entry->hash);
702 if (lu_entry == NULL)
703 return -1;
704 key = lu_entry->key;
705 return key != NULL && key != dummy;
708 static PyObject *
709 set_pop(PySetObject *so)
711 register Py_ssize_t i = 0;
712 register setentry *entry;
713 PyObject *key;
715 assert (PyAnySet_Check(so));
716 if (so->used == 0) {
717 PyErr_SetString(PyExc_KeyError, "pop from an empty set");
718 return NULL;
721 /* Set entry to "the first" unused or dummy set entry. We abuse
722 * the hash field of slot 0 to hold a search finger:
723 * If slot 0 has a value, use slot 0.
724 * Else slot 0 is being used to hold a search finger,
725 * and we use its hash value as the first index to look.
727 entry = &so->table[0];
728 if (entry->key == NULL || entry->key == dummy) {
729 i = entry->hash;
730 /* The hash field may be a real hash value, or it may be a
731 * legit search finger, or it may be a once-legit search
732 * finger that's out of bounds now because it wrapped around
733 * or the table shrunk -- simply make sure it's in bounds now.
735 if (i > so->mask || i < 1)
736 i = 1; /* skip slot 0 */
737 while ((entry = &so->table[i])->key == NULL || entry->key==dummy) {
738 i++;
739 if (i > so->mask)
740 i = 1;
743 key = entry->key;
744 Py_INCREF(dummy);
745 entry->key = dummy;
746 so->used--;
747 so->table[0].hash = i + 1; /* next place to start */
748 return key;
751 PyDoc_STRVAR(pop_doc, "Remove and return an arbitrary set element.");
753 static int
754 set_traverse(PySetObject *so, visitproc visit, void *arg)
756 Py_ssize_t pos = 0;
757 setentry *entry;
759 while (set_next(so, &pos, &entry))
760 Py_VISIT(entry->key);
761 return 0;
764 static long
765 frozenset_hash(PyObject *self)
767 PySetObject *so = (PySetObject *)self;
768 long h, hash = 1927868237L;
769 setentry *entry;
770 Py_ssize_t pos = 0;
772 if (so->hash != -1)
773 return so->hash;
775 hash *= PySet_GET_SIZE(self) + 1;
776 while (set_next(so, &pos, &entry)) {
777 /* Work to increase the bit dispersion for closely spaced hash
778 values. The is important because some use cases have many
779 combinations of a small number of elements with nearby
780 hashes so that many distinct combinations collapse to only
781 a handful of distinct hash values. */
782 h = entry->hash;
783 hash ^= (h ^ (h << 16) ^ 89869747L) * 3644798167u;
785 hash = hash * 69069L + 907133923L;
786 if (hash == -1)
787 hash = 590923713L;
788 so->hash = hash;
789 return hash;
792 static long
793 set_nohash(PyObject *self)
795 PyErr_SetString(PyExc_TypeError, "set objects are unhashable");
796 return -1;
799 /***** Set iterator type ***********************************************/
801 typedef struct {
802 PyObject_HEAD
803 PySetObject *si_set; /* Set to NULL when iterator is exhausted */
804 Py_ssize_t si_used;
805 Py_ssize_t si_pos;
806 Py_ssize_t len;
807 } setiterobject;
809 static void
810 setiter_dealloc(setiterobject *si)
812 Py_XDECREF(si->si_set);
813 PyObject_Del(si);
816 static PyObject *
817 setiter_len(setiterobject *si)
819 Py_ssize_t len = 0;
820 if (si->si_set != NULL && si->si_used == si->si_set->used)
821 len = si->len;
822 return PyInt_FromLong(len);
825 PyDoc_STRVAR(length_hint_doc, "Private method returning an estimate of len(list(it)).");
827 static PyMethodDef setiter_methods[] = {
828 {"__length_hint__", (PyCFunction)setiter_len, METH_NOARGS, length_hint_doc},
829 {NULL, NULL} /* sentinel */
832 static PyObject *setiter_iternext(setiterobject *si)
834 PyObject *key;
835 register Py_ssize_t i, mask;
836 register setentry *entry;
837 PySetObject *so = si->si_set;
839 if (so == NULL)
840 return NULL;
841 assert (PyAnySet_Check(so));
843 if (si->si_used != so->used) {
844 PyErr_SetString(PyExc_RuntimeError,
845 "Set changed size during iteration");
846 si->si_used = -1; /* Make this state sticky */
847 return NULL;
850 i = si->si_pos;
851 assert(i>=0);
852 entry = so->table;
853 mask = so->mask;
854 while (i <= mask && (entry[i].key == NULL || entry[i].key == dummy))
855 i++;
856 si->si_pos = i+1;
857 if (i > mask)
858 goto fail;
859 si->len--;
860 key = entry[i].key;
861 Py_INCREF(key);
862 return key;
864 fail:
865 Py_DECREF(so);
866 si->si_set = NULL;
867 return NULL;
870 static PyTypeObject PySetIter_Type = {
871 PyVarObject_HEAD_INIT(&PyType_Type, 0)
872 "setiterator", /* tp_name */
873 sizeof(setiterobject), /* tp_basicsize */
874 0, /* tp_itemsize */
875 /* methods */
876 (destructor)setiter_dealloc, /* tp_dealloc */
877 0, /* tp_print */
878 0, /* tp_getattr */
879 0, /* tp_setattr */
880 0, /* tp_compare */
881 0, /* tp_repr */
882 0, /* tp_as_number */
883 0, /* tp_as_sequence */
884 0, /* tp_as_mapping */
885 0, /* tp_hash */
886 0, /* tp_call */
887 0, /* tp_str */
888 PyObject_GenericGetAttr, /* tp_getattro */
889 0, /* tp_setattro */
890 0, /* tp_as_buffer */
891 Py_TPFLAGS_DEFAULT, /* tp_flags */
892 0, /* tp_doc */
893 0, /* tp_traverse */
894 0, /* tp_clear */
895 0, /* tp_richcompare */
896 0, /* tp_weaklistoffset */
897 PyObject_SelfIter, /* tp_iter */
898 (iternextfunc)setiter_iternext, /* tp_iternext */
899 setiter_methods, /* tp_methods */
903 static PyObject *
904 set_iter(PySetObject *so)
906 setiterobject *si = PyObject_New(setiterobject, &PySetIter_Type);
907 if (si == NULL)
908 return NULL;
909 Py_INCREF(so);
910 si->si_set = so;
911 si->si_used = so->used;
912 si->si_pos = 0;
913 si->len = so->used;
914 return (PyObject *)si;
917 static int
918 set_update_internal(PySetObject *so, PyObject *other)
920 PyObject *key, *it;
922 if (PyAnySet_CheckExact(other))
923 return set_merge(so, other);
925 if (PyDict_CheckExact(other)) {
926 PyObject *value;
927 Py_ssize_t pos = 0;
928 long hash;
929 Py_ssize_t dictsize = PyDict_Size(other);
931 /* Do one big resize at the start, rather than
932 * incrementally resizing as we insert new keys. Expect
933 * that there will be no (or few) overlapping keys.
935 if (dictsize == -1)
936 return -1;
937 if ((so->fill + dictsize)*3 >= (so->mask+1)*2) {
938 if (set_table_resize(so, (so->used + dictsize)*2) != 0)
939 return -1;
941 while (_PyDict_Next(other, &pos, &key, &value, &hash)) {
942 setentry an_entry;
944 an_entry.hash = hash;
945 an_entry.key = key;
946 if (set_add_entry(so, &an_entry) == -1)
947 return -1;
949 return 0;
952 it = PyObject_GetIter(other);
953 if (it == NULL)
954 return -1;
956 while ((key = PyIter_Next(it)) != NULL) {
957 if (set_add_key(so, key) == -1) {
958 Py_DECREF(it);
959 Py_DECREF(key);
960 return -1;
962 Py_DECREF(key);
964 Py_DECREF(it);
965 if (PyErr_Occurred())
966 return -1;
967 return 0;
970 static PyObject *
971 set_update(PySetObject *so, PyObject *other)
973 if (set_update_internal(so, other) == -1)
974 return NULL;
975 Py_RETURN_NONE;
978 PyDoc_STRVAR(update_doc,
979 "Update a set with the union of itself and another.");
981 static PyObject *
982 make_new_set(PyTypeObject *type, PyObject *iterable)
984 register PySetObject *so = NULL;
986 if (dummy == NULL) { /* Auto-initialize dummy */
987 dummy = PyString_FromString("<dummy key>");
988 if (dummy == NULL)
989 return NULL;
992 /* create PySetObject structure */
993 if (num_free_sets &&
994 (type == &PySet_Type || type == &PyFrozenSet_Type)) {
995 so = free_sets[--num_free_sets];
996 assert (so != NULL && PyAnySet_CheckExact(so));
997 Py_Type(so) = type;
998 _Py_NewReference((PyObject *)so);
999 EMPTY_TO_MINSIZE(so);
1000 PyObject_GC_Track(so);
1001 } else {
1002 so = (PySetObject *)type->tp_alloc(type, 0);
1003 if (so == NULL)
1004 return NULL;
1005 /* tp_alloc has already zeroed the structure */
1006 assert(so->table == NULL && so->fill == 0 && so->used == 0);
1007 INIT_NONZERO_SET_SLOTS(so);
1010 so->lookup = set_lookkey_string;
1011 so->weakreflist = NULL;
1013 if (iterable != NULL) {
1014 if (set_update_internal(so, iterable) == -1) {
1015 Py_DECREF(so);
1016 return NULL;
1020 return (PyObject *)so;
1023 /* The empty frozenset is a singleton */
1024 static PyObject *emptyfrozenset = NULL;
1026 static PyObject *
1027 frozenset_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1029 PyObject *iterable = NULL, *result;
1031 if (type == &PyFrozenSet_Type && !_PyArg_NoKeywords("frozenset()", kwds))
1032 return NULL;
1034 if (!PyArg_UnpackTuple(args, type->tp_name, 0, 1, &iterable))
1035 return NULL;
1037 if (type != &PyFrozenSet_Type)
1038 return make_new_set(type, iterable);
1040 if (iterable != NULL) {
1041 /* frozenset(f) is idempotent */
1042 if (PyFrozenSet_CheckExact(iterable)) {
1043 Py_INCREF(iterable);
1044 return iterable;
1046 result = make_new_set(type, iterable);
1047 if (result == NULL || PySet_GET_SIZE(result))
1048 return result;
1049 Py_DECREF(result);
1051 /* The empty frozenset is a singleton */
1052 if (emptyfrozenset == NULL)
1053 emptyfrozenset = make_new_set(type, NULL);
1054 Py_XINCREF(emptyfrozenset);
1055 return emptyfrozenset;
1058 void
1059 PySet_Fini(void)
1061 PySetObject *so;
1063 while (num_free_sets) {
1064 num_free_sets--;
1065 so = free_sets[num_free_sets];
1066 PyObject_GC_Del(so);
1068 Py_CLEAR(dummy);
1069 Py_CLEAR(emptyfrozenset);
1072 static PyObject *
1073 set_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1075 if (type == &PySet_Type && !_PyArg_NoKeywords("set()", kwds))
1076 return NULL;
1078 return make_new_set(type, NULL);
1081 /* set_swap_bodies() switches the contents of any two sets by moving their
1082 internal data pointers and, if needed, copying the internal smalltables.
1083 Semantically equivalent to:
1085 t=set(a); a.clear(); a.update(b); b.clear(); b.update(t); del t
1087 The function always succeeds and it leaves both objects in a stable state.
1088 Useful for creating temporary frozensets from sets for membership testing
1089 in __contains__(), discard(), and remove(). Also useful for operations
1090 that update in-place (by allowing an intermediate result to be swapped
1091 into one of the original inputs).
1094 static void
1095 set_swap_bodies(PySetObject *a, PySetObject *b)
1097 Py_ssize_t t;
1098 setentry *u;
1099 setentry *(*f)(PySetObject *so, PyObject *key, long hash);
1100 setentry tab[PySet_MINSIZE];
1101 long h;
1103 t = a->fill; a->fill = b->fill; b->fill = t;
1104 t = a->used; a->used = b->used; b->used = t;
1105 t = a->mask; a->mask = b->mask; b->mask = t;
1107 u = a->table;
1108 if (a->table == a->smalltable)
1109 u = b->smalltable;
1110 a->table = b->table;
1111 if (b->table == b->smalltable)
1112 a->table = a->smalltable;
1113 b->table = u;
1115 f = a->lookup; a->lookup = b->lookup; b->lookup = f;
1117 if (a->table == a->smalltable || b->table == b->smalltable) {
1118 memcpy(tab, a->smalltable, sizeof(tab));
1119 memcpy(a->smalltable, b->smalltable, sizeof(tab));
1120 memcpy(b->smalltable, tab, sizeof(tab));
1123 if (PyType_IsSubtype(Py_Type(a), &PyFrozenSet_Type) &&
1124 PyType_IsSubtype(Py_Type(b), &PyFrozenSet_Type)) {
1125 h = a->hash; a->hash = b->hash; b->hash = h;
1126 } else {
1127 a->hash = -1;
1128 b->hash = -1;
1132 static PyObject *
1133 set_copy(PySetObject *so)
1135 return make_new_set(Py_Type(so), (PyObject *)so);
1138 static PyObject *
1139 frozenset_copy(PySetObject *so)
1141 if (PyFrozenSet_CheckExact(so)) {
1142 Py_INCREF(so);
1143 return (PyObject *)so;
1145 return set_copy(so);
1148 PyDoc_STRVAR(copy_doc, "Return a shallow copy of a set.");
1150 static PyObject *
1151 set_clear(PySetObject *so)
1153 set_clear_internal(so);
1154 Py_RETURN_NONE;
1157 PyDoc_STRVAR(clear_doc, "Remove all elements from this set.");
1159 static PyObject *
1160 set_union(PySetObject *so, PyObject *other)
1162 PySetObject *result;
1164 result = (PySetObject *)set_copy(so);
1165 if (result == NULL)
1166 return NULL;
1167 if ((PyObject *)so == other)
1168 return (PyObject *)result;
1169 if (set_update_internal(result, other) == -1) {
1170 Py_DECREF(result);
1171 return NULL;
1173 return (PyObject *)result;
1176 PyDoc_STRVAR(union_doc,
1177 "Return the union of two sets as a new set.\n\
1179 (i.e. all elements that are in either set.)");
1181 static PyObject *
1182 set_or(PySetObject *so, PyObject *other)
1184 if (!PyAnySet_Check(so) || !PyAnySet_Check(other)) {
1185 Py_INCREF(Py_NotImplemented);
1186 return Py_NotImplemented;
1188 return set_union(so, other);
1191 static PyObject *
1192 set_ior(PySetObject *so, PyObject *other)
1194 if (!PyAnySet_Check(other)) {
1195 Py_INCREF(Py_NotImplemented);
1196 return Py_NotImplemented;
1198 if (set_update_internal(so, other) == -1)
1199 return NULL;
1200 Py_INCREF(so);
1201 return (PyObject *)so;
1204 static PyObject *
1205 set_intersection(PySetObject *so, PyObject *other)
1207 PySetObject *result;
1208 PyObject *key, *it, *tmp;
1210 if ((PyObject *)so == other)
1211 return set_copy(so);
1213 result = (PySetObject *)make_new_set(Py_Type(so), NULL);
1214 if (result == NULL)
1215 return NULL;
1217 if (PyAnySet_CheckExact(other)) {
1218 Py_ssize_t pos = 0;
1219 setentry *entry;
1221 if (PySet_GET_SIZE(other) > PySet_GET_SIZE(so)) {
1222 tmp = (PyObject *)so;
1223 so = (PySetObject *)other;
1224 other = tmp;
1227 while (set_next((PySetObject *)other, &pos, &entry)) {
1228 int rv = set_contains_entry(so, entry);
1229 if (rv == -1) {
1230 Py_DECREF(result);
1231 return NULL;
1233 if (rv) {
1234 if (set_add_entry(result, entry) == -1) {
1235 Py_DECREF(result);
1236 return NULL;
1240 return (PyObject *)result;
1243 it = PyObject_GetIter(other);
1244 if (it == NULL) {
1245 Py_DECREF(result);
1246 return NULL;
1249 while ((key = PyIter_Next(it)) != NULL) {
1250 int rv;
1251 setentry entry;
1252 long hash = PyObject_Hash(key);
1254 if (hash == -1) {
1255 Py_DECREF(it);
1256 Py_DECREF(result);
1257 Py_DECREF(key);
1258 return NULL;
1260 entry.hash = hash;
1261 entry.key = key;
1262 rv = set_contains_entry(so, &entry);
1263 if (rv == -1) {
1264 Py_DECREF(it);
1265 Py_DECREF(result);
1266 Py_DECREF(key);
1267 return NULL;
1269 if (rv) {
1270 if (set_add_entry(result, &entry) == -1) {
1271 Py_DECREF(it);
1272 Py_DECREF(result);
1273 Py_DECREF(key);
1274 return NULL;
1277 Py_DECREF(key);
1279 Py_DECREF(it);
1280 if (PyErr_Occurred()) {
1281 Py_DECREF(result);
1282 return NULL;
1284 return (PyObject *)result;
1287 PyDoc_STRVAR(intersection_doc,
1288 "Return the intersection of two sets as a new set.\n\
1290 (i.e. all elements that are in both sets.)");
1292 static PyObject *
1293 set_intersection_update(PySetObject *so, PyObject *other)
1295 PyObject *tmp;
1297 tmp = set_intersection(so, other);
1298 if (tmp == NULL)
1299 return NULL;
1300 set_swap_bodies(so, (PySetObject *)tmp);
1301 Py_DECREF(tmp);
1302 Py_RETURN_NONE;
1305 PyDoc_STRVAR(intersection_update_doc,
1306 "Update a set with the intersection of itself and another.");
1308 static PyObject *
1309 set_and(PySetObject *so, PyObject *other)
1311 if (!PyAnySet_Check(so) || !PyAnySet_Check(other)) {
1312 Py_INCREF(Py_NotImplemented);
1313 return Py_NotImplemented;
1315 return set_intersection(so, other);
1318 static PyObject *
1319 set_iand(PySetObject *so, PyObject *other)
1321 PyObject *result;
1323 if (!PyAnySet_Check(other)) {
1324 Py_INCREF(Py_NotImplemented);
1325 return Py_NotImplemented;
1327 result = set_intersection_update(so, other);
1328 if (result == NULL)
1329 return NULL;
1330 Py_DECREF(result);
1331 Py_INCREF(so);
1332 return (PyObject *)so;
1335 static int
1336 set_difference_update_internal(PySetObject *so, PyObject *other)
1338 if ((PyObject *)so == other)
1339 return set_clear_internal(so);
1341 if (PyAnySet_CheckExact(other)) {
1342 setentry *entry;
1343 Py_ssize_t pos = 0;
1345 while (set_next((PySetObject *)other, &pos, &entry))
1346 if (set_discard_entry(so, entry) == -1)
1347 return -1;
1348 } else {
1349 PyObject *key, *it;
1350 it = PyObject_GetIter(other);
1351 if (it == NULL)
1352 return -1;
1354 while ((key = PyIter_Next(it)) != NULL) {
1355 if (set_discard_key(so, key) == -1) {
1356 Py_DECREF(it);
1357 Py_DECREF(key);
1358 return -1;
1360 Py_DECREF(key);
1362 Py_DECREF(it);
1363 if (PyErr_Occurred())
1364 return -1;
1366 /* If more than 1/5 are dummies, then resize them away. */
1367 if ((so->fill - so->used) * 5 < so->mask)
1368 return 0;
1369 return set_table_resize(so, so->used>50000 ? so->used*2 : so->used*4);
1372 static PyObject *
1373 set_difference_update(PySetObject *so, PyObject *other)
1375 if (set_difference_update_internal(so, other) != -1)
1376 Py_RETURN_NONE;
1377 return NULL;
1380 PyDoc_STRVAR(difference_update_doc,
1381 "Remove all elements of another set from this set.");
1383 static PyObject *
1384 set_difference(PySetObject *so, PyObject *other)
1386 PyObject *result;
1387 setentry *entry;
1388 Py_ssize_t pos = 0;
1390 if (!PyAnySet_CheckExact(other) && !PyDict_CheckExact(other)) {
1391 result = set_copy(so);
1392 if (result == NULL)
1393 return NULL;
1394 if (set_difference_update_internal((PySetObject *)result, other) != -1)
1395 return result;
1396 Py_DECREF(result);
1397 return NULL;
1400 result = make_new_set(Py_Type(so), NULL);
1401 if (result == NULL)
1402 return NULL;
1404 if (PyDict_CheckExact(other)) {
1405 while (set_next(so, &pos, &entry)) {
1406 setentry entrycopy;
1407 entrycopy.hash = entry->hash;
1408 entrycopy.key = entry->key;
1409 if (!_PyDict_Contains(other, entry->key, entry->hash)) {
1410 if (set_add_entry((PySetObject *)result, &entrycopy) == -1) {
1411 Py_DECREF(result);
1412 return NULL;
1416 return result;
1419 while (set_next(so, &pos, &entry)) {
1420 int rv = set_contains_entry((PySetObject *)other, entry);
1421 if (rv == -1) {
1422 Py_DECREF(result);
1423 return NULL;
1425 if (!rv) {
1426 if (set_add_entry((PySetObject *)result, entry) == -1) {
1427 Py_DECREF(result);
1428 return NULL;
1432 return result;
1435 PyDoc_STRVAR(difference_doc,
1436 "Return the difference of two sets as a new set.\n\
1438 (i.e. all elements that are in this set but not the other.)");
1439 static PyObject *
1440 set_sub(PySetObject *so, PyObject *other)
1442 if (!PyAnySet_Check(so) || !PyAnySet_Check(other)) {
1443 Py_INCREF(Py_NotImplemented);
1444 return Py_NotImplemented;
1446 return set_difference(so, other);
1449 static PyObject *
1450 set_isub(PySetObject *so, PyObject *other)
1452 PyObject *result;
1454 if (!PyAnySet_Check(other)) {
1455 Py_INCREF(Py_NotImplemented);
1456 return Py_NotImplemented;
1458 result = set_difference_update(so, other);
1459 if (result == NULL)
1460 return NULL;
1461 Py_DECREF(result);
1462 Py_INCREF(so);
1463 return (PyObject *)so;
1466 static PyObject *
1467 set_symmetric_difference_update(PySetObject *so, PyObject *other)
1469 PySetObject *otherset;
1470 PyObject *key;
1471 Py_ssize_t pos = 0;
1472 setentry *entry;
1474 if ((PyObject *)so == other)
1475 return set_clear(so);
1477 if (PyDict_CheckExact(other)) {
1478 PyObject *value;
1479 int rv;
1480 long hash;
1481 while (_PyDict_Next(other, &pos, &key, &value, &hash)) {
1482 setentry an_entry;
1484 an_entry.hash = hash;
1485 an_entry.key = key;
1486 rv = set_discard_entry(so, &an_entry);
1487 if (rv == -1)
1488 return NULL;
1489 if (rv == DISCARD_NOTFOUND) {
1490 if (set_add_entry(so, &an_entry) == -1)
1491 return NULL;
1494 Py_RETURN_NONE;
1497 if (PyAnySet_CheckExact(other)) {
1498 Py_INCREF(other);
1499 otherset = (PySetObject *)other;
1500 } else {
1501 otherset = (PySetObject *)make_new_set(Py_Type(so), other);
1502 if (otherset == NULL)
1503 return NULL;
1506 while (set_next(otherset, &pos, &entry)) {
1507 int rv = set_discard_entry(so, entry);
1508 if (rv == -1) {
1509 Py_DECREF(otherset);
1510 return NULL;
1512 if (rv == DISCARD_NOTFOUND) {
1513 if (set_add_entry(so, entry) == -1) {
1514 Py_DECREF(otherset);
1515 return NULL;
1519 Py_DECREF(otherset);
1520 Py_RETURN_NONE;
1523 PyDoc_STRVAR(symmetric_difference_update_doc,
1524 "Update a set with the symmetric difference of itself and another.");
1526 static PyObject *
1527 set_symmetric_difference(PySetObject *so, PyObject *other)
1529 PyObject *rv;
1530 PySetObject *otherset;
1532 otherset = (PySetObject *)make_new_set(Py_Type(so), other);
1533 if (otherset == NULL)
1534 return NULL;
1535 rv = set_symmetric_difference_update(otherset, (PyObject *)so);
1536 if (rv == NULL)
1537 return NULL;
1538 Py_DECREF(rv);
1539 return (PyObject *)otherset;
1542 PyDoc_STRVAR(symmetric_difference_doc,
1543 "Return the symmetric difference of two sets as a new set.\n\
1545 (i.e. all elements that are in exactly one of the sets.)");
1547 static PyObject *
1548 set_xor(PySetObject *so, PyObject *other)
1550 if (!PyAnySet_Check(so) || !PyAnySet_Check(other)) {
1551 Py_INCREF(Py_NotImplemented);
1552 return Py_NotImplemented;
1554 return set_symmetric_difference(so, other);
1557 static PyObject *
1558 set_ixor(PySetObject *so, PyObject *other)
1560 PyObject *result;
1562 if (!PyAnySet_Check(other)) {
1563 Py_INCREF(Py_NotImplemented);
1564 return Py_NotImplemented;
1566 result = set_symmetric_difference_update(so, other);
1567 if (result == NULL)
1568 return NULL;
1569 Py_DECREF(result);
1570 Py_INCREF(so);
1571 return (PyObject *)so;
1574 static PyObject *
1575 set_issubset(PySetObject *so, PyObject *other)
1577 setentry *entry;
1578 Py_ssize_t pos = 0;
1580 if (!PyAnySet_CheckExact(other)) {
1581 PyObject *tmp, *result;
1582 tmp = make_new_set(&PySet_Type, other);
1583 if (tmp == NULL)
1584 return NULL;
1585 result = set_issubset(so, tmp);
1586 Py_DECREF(tmp);
1587 return result;
1589 if (PySet_GET_SIZE(so) > PySet_GET_SIZE(other))
1590 Py_RETURN_FALSE;
1592 while (set_next(so, &pos, &entry)) {
1593 int rv = set_contains_entry((PySetObject *)other, entry);
1594 if (rv == -1)
1595 return NULL;
1596 if (!rv)
1597 Py_RETURN_FALSE;
1599 Py_RETURN_TRUE;
1602 PyDoc_STRVAR(issubset_doc, "Report whether another set contains this set.");
1604 static PyObject *
1605 set_issuperset(PySetObject *so, PyObject *other)
1607 PyObject *tmp, *result;
1609 if (!PyAnySet_CheckExact(other)) {
1610 tmp = make_new_set(&PySet_Type, other);
1611 if (tmp == NULL)
1612 return NULL;
1613 result = set_issuperset(so, tmp);
1614 Py_DECREF(tmp);
1615 return result;
1617 return set_issubset((PySetObject *)other, (PyObject *)so);
1620 PyDoc_STRVAR(issuperset_doc, "Report whether this set contains another set.");
1622 static PyObject *
1623 set_richcompare(PySetObject *v, PyObject *w, int op)
1625 PyObject *r1, *r2;
1627 if(!PyAnySet_Check(w)) {
1628 if (op == Py_EQ)
1629 Py_RETURN_FALSE;
1630 if (op == Py_NE)
1631 Py_RETURN_TRUE;
1632 PyErr_SetString(PyExc_TypeError, "can only compare to a set");
1633 return NULL;
1635 switch (op) {
1636 case Py_EQ:
1637 if (PySet_GET_SIZE(v) != PySet_GET_SIZE(w))
1638 Py_RETURN_FALSE;
1639 if (v->hash != -1 &&
1640 ((PySetObject *)w)->hash != -1 &&
1641 v->hash != ((PySetObject *)w)->hash)
1642 Py_RETURN_FALSE;
1643 return set_issubset(v, w);
1644 case Py_NE:
1645 r1 = set_richcompare(v, w, Py_EQ);
1646 if (r1 == NULL)
1647 return NULL;
1648 r2 = PyBool_FromLong(PyObject_Not(r1));
1649 Py_DECREF(r1);
1650 return r2;
1651 case Py_LE:
1652 return set_issubset(v, w);
1653 case Py_GE:
1654 return set_issuperset(v, w);
1655 case Py_LT:
1656 if (PySet_GET_SIZE(v) >= PySet_GET_SIZE(w))
1657 Py_RETURN_FALSE;
1658 return set_issubset(v, w);
1659 case Py_GT:
1660 if (PySet_GET_SIZE(v) <= PySet_GET_SIZE(w))
1661 Py_RETURN_FALSE;
1662 return set_issuperset(v, w);
1664 Py_INCREF(Py_NotImplemented);
1665 return Py_NotImplemented;
1668 static int
1669 set_nocmp(PyObject *self, PyObject *other)
1671 PyErr_SetString(PyExc_TypeError, "cannot compare sets using cmp()");
1672 return -1;
1675 static PyObject *
1676 set_add(PySetObject *so, PyObject *key)
1678 if (set_add_key(so, key) == -1)
1679 return NULL;
1680 Py_RETURN_NONE;
1683 PyDoc_STRVAR(add_doc,
1684 "Add an element to a set.\n\
1686 This has no effect if the element is already present.");
1688 static int
1689 set_contains(PySetObject *so, PyObject *key)
1691 PyObject *tmpkey;
1692 int rv;
1694 rv = set_contains_key(so, key);
1695 if (rv == -1) {
1696 if (!PyAnySet_Check(key) || !PyErr_ExceptionMatches(PyExc_TypeError))
1697 return -1;
1698 PyErr_Clear();
1699 tmpkey = make_new_set(&PyFrozenSet_Type, NULL);
1700 if (tmpkey == NULL)
1701 return -1;
1702 set_swap_bodies((PySetObject *)tmpkey, (PySetObject *)key);
1703 rv = set_contains(so, tmpkey);
1704 set_swap_bodies((PySetObject *)tmpkey, (PySetObject *)key);
1705 Py_DECREF(tmpkey);
1707 return rv;
1710 static PyObject *
1711 set_direct_contains(PySetObject *so, PyObject *key)
1713 long result;
1715 result = set_contains(so, key);
1716 if (result == -1)
1717 return NULL;
1718 return PyBool_FromLong(result);
1721 PyDoc_STRVAR(contains_doc, "x.__contains__(y) <==> y in x.");
1723 static PyObject *
1724 set_remove(PySetObject *so, PyObject *key)
1726 PyObject *tmpkey, *result;
1727 int rv;
1729 rv = set_discard_key(so, key);
1730 if (rv == -1) {
1731 if (!PyAnySet_Check(key) || !PyErr_ExceptionMatches(PyExc_TypeError))
1732 return NULL;
1733 PyErr_Clear();
1734 tmpkey = make_new_set(&PyFrozenSet_Type, NULL);
1735 if (tmpkey == NULL)
1736 return NULL;
1737 set_swap_bodies((PySetObject *)tmpkey, (PySetObject *)key);
1738 result = set_remove(so, tmpkey);
1739 set_swap_bodies((PySetObject *)tmpkey, (PySetObject *)key);
1740 Py_DECREF(tmpkey);
1741 return result;
1742 } else if (rv == DISCARD_NOTFOUND) {
1743 set_key_error(key);
1744 return NULL;
1746 Py_RETURN_NONE;
1749 PyDoc_STRVAR(remove_doc,
1750 "Remove an element from a set; it must be a member.\n\
1752 If the element is not a member, raise a KeyError.");
1754 static PyObject *
1755 set_discard(PySetObject *so, PyObject *key)
1757 PyObject *tmpkey, *result;
1758 int rv;
1760 rv = set_discard_key(so, key);
1761 if (rv == -1) {
1762 if (!PyAnySet_Check(key) || !PyErr_ExceptionMatches(PyExc_TypeError))
1763 return NULL;
1764 PyErr_Clear();
1765 tmpkey = make_new_set(&PyFrozenSet_Type, NULL);
1766 if (tmpkey == NULL)
1767 return NULL;
1768 set_swap_bodies((PySetObject *)tmpkey, (PySetObject *)key);
1769 result = set_discard(so, tmpkey);
1770 set_swap_bodies((PySetObject *)tmpkey, (PySetObject *)key);
1771 Py_DECREF(tmpkey);
1772 return result;
1774 Py_RETURN_NONE;
1777 PyDoc_STRVAR(discard_doc,
1778 "Remove an element from a set if it is a member.\n\
1780 If the element is not a member, do nothing.");
1782 static PyObject *
1783 set_reduce(PySetObject *so)
1785 PyObject *keys=NULL, *args=NULL, *result=NULL, *dict=NULL;
1787 keys = PySequence_List((PyObject *)so);
1788 if (keys == NULL)
1789 goto done;
1790 args = PyTuple_Pack(1, keys);
1791 if (args == NULL)
1792 goto done;
1793 dict = PyObject_GetAttrString((PyObject *)so, "__dict__");
1794 if (dict == NULL) {
1795 PyErr_Clear();
1796 dict = Py_None;
1797 Py_INCREF(dict);
1799 result = PyTuple_Pack(3, Py_Type(so), args, dict);
1800 done:
1801 Py_XDECREF(args);
1802 Py_XDECREF(keys);
1803 Py_XDECREF(dict);
1804 return result;
1807 PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
1809 static int
1810 set_init(PySetObject *self, PyObject *args, PyObject *kwds)
1812 PyObject *iterable = NULL;
1814 if (!PyAnySet_Check(self))
1815 return -1;
1816 if (!PyArg_UnpackTuple(args, Py_Type(self)->tp_name, 0, 1, &iterable))
1817 return -1;
1818 set_clear_internal(self);
1819 self->hash = -1;
1820 if (iterable == NULL)
1821 return 0;
1822 return set_update_internal(self, iterable);
1825 static PySequenceMethods set_as_sequence = {
1826 set_len, /* sq_length */
1827 0, /* sq_concat */
1828 0, /* sq_repeat */
1829 0, /* sq_item */
1830 0, /* sq_slice */
1831 0, /* sq_ass_item */
1832 0, /* sq_ass_slice */
1833 (objobjproc)set_contains, /* sq_contains */
1836 /* set object ********************************************************/
1838 #ifdef Py_DEBUG
1839 static PyObject *test_c_api(PySetObject *so);
1841 PyDoc_STRVAR(test_c_api_doc, "Exercises C API. Returns True.\n\
1842 All is well if assertions don't fail.");
1843 #endif
1845 static PyMethodDef set_methods[] = {
1846 {"add", (PyCFunction)set_add, METH_O,
1847 add_doc},
1848 {"clear", (PyCFunction)set_clear, METH_NOARGS,
1849 clear_doc},
1850 {"__contains__",(PyCFunction)set_direct_contains, METH_O | METH_COEXIST,
1851 contains_doc},
1852 {"copy", (PyCFunction)set_copy, METH_NOARGS,
1853 copy_doc},
1854 {"discard", (PyCFunction)set_discard, METH_O,
1855 discard_doc},
1856 {"difference", (PyCFunction)set_difference, METH_O,
1857 difference_doc},
1858 {"difference_update", (PyCFunction)set_difference_update, METH_O,
1859 difference_update_doc},
1860 {"intersection",(PyCFunction)set_intersection, METH_O,
1861 intersection_doc},
1862 {"intersection_update",(PyCFunction)set_intersection_update, METH_O,
1863 intersection_update_doc},
1864 {"issubset", (PyCFunction)set_issubset, METH_O,
1865 issubset_doc},
1866 {"issuperset", (PyCFunction)set_issuperset, METH_O,
1867 issuperset_doc},
1868 {"pop", (PyCFunction)set_pop, METH_NOARGS,
1869 pop_doc},
1870 {"__reduce__", (PyCFunction)set_reduce, METH_NOARGS,
1871 reduce_doc},
1872 {"remove", (PyCFunction)set_remove, METH_O,
1873 remove_doc},
1874 {"symmetric_difference",(PyCFunction)set_symmetric_difference, METH_O,
1875 symmetric_difference_doc},
1876 {"symmetric_difference_update",(PyCFunction)set_symmetric_difference_update, METH_O,
1877 symmetric_difference_update_doc},
1878 #ifdef Py_DEBUG
1879 {"test_c_api", (PyCFunction)test_c_api, METH_NOARGS,
1880 test_c_api_doc},
1881 #endif
1882 {"union", (PyCFunction)set_union, METH_O,
1883 union_doc},
1884 {"update", (PyCFunction)set_update, METH_O,
1885 update_doc},
1886 {NULL, NULL} /* sentinel */
1889 static PyNumberMethods set_as_number = {
1890 0, /*nb_add*/
1891 (binaryfunc)set_sub, /*nb_subtract*/
1892 0, /*nb_multiply*/
1893 0, /*nb_divide*/
1894 0, /*nb_remainder*/
1895 0, /*nb_divmod*/
1896 0, /*nb_power*/
1897 0, /*nb_negative*/
1898 0, /*nb_positive*/
1899 0, /*nb_absolute*/
1900 0, /*nb_nonzero*/
1901 0, /*nb_invert*/
1902 0, /*nb_lshift*/
1903 0, /*nb_rshift*/
1904 (binaryfunc)set_and, /*nb_and*/
1905 (binaryfunc)set_xor, /*nb_xor*/
1906 (binaryfunc)set_or, /*nb_or*/
1907 0, /*nb_coerce*/
1908 0, /*nb_int*/
1909 0, /*nb_long*/
1910 0, /*nb_float*/
1911 0, /*nb_oct*/
1912 0, /*nb_hex*/
1913 0, /*nb_inplace_add*/
1914 (binaryfunc)set_isub, /*nb_inplace_subtract*/
1915 0, /*nb_inplace_multiply*/
1916 0, /*nb_inplace_divide*/
1917 0, /*nb_inplace_remainder*/
1918 0, /*nb_inplace_power*/
1919 0, /*nb_inplace_lshift*/
1920 0, /*nb_inplace_rshift*/
1921 (binaryfunc)set_iand, /*nb_inplace_and*/
1922 (binaryfunc)set_ixor, /*nb_inplace_xor*/
1923 (binaryfunc)set_ior, /*nb_inplace_or*/
1926 PyDoc_STRVAR(set_doc,
1927 "set(iterable) --> set object\n\
1929 Build an unordered collection of unique elements.");
1931 PyTypeObject PySet_Type = {
1932 PyVarObject_HEAD_INIT(&PyType_Type, 0)
1933 "set", /* tp_name */
1934 sizeof(PySetObject), /* tp_basicsize */
1935 0, /* tp_itemsize */
1936 /* methods */
1937 (destructor)set_dealloc, /* tp_dealloc */
1938 (printfunc)set_tp_print, /* tp_print */
1939 0, /* tp_getattr */
1940 0, /* tp_setattr */
1941 set_nocmp, /* tp_compare */
1942 (reprfunc)set_repr, /* tp_repr */
1943 &set_as_number, /* tp_as_number */
1944 &set_as_sequence, /* tp_as_sequence */
1945 0, /* tp_as_mapping */
1946 set_nohash, /* tp_hash */
1947 0, /* tp_call */
1948 0, /* tp_str */
1949 PyObject_GenericGetAttr, /* tp_getattro */
1950 0, /* tp_setattro */
1951 0, /* tp_as_buffer */
1952 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_CHECKTYPES |
1953 Py_TPFLAGS_BASETYPE, /* tp_flags */
1954 set_doc, /* tp_doc */
1955 (traverseproc)set_traverse, /* tp_traverse */
1956 (inquiry)set_clear_internal, /* tp_clear */
1957 (richcmpfunc)set_richcompare, /* tp_richcompare */
1958 offsetof(PySetObject, weakreflist), /* tp_weaklistoffset */
1959 (getiterfunc)set_iter, /* tp_iter */
1960 0, /* tp_iternext */
1961 set_methods, /* tp_methods */
1962 0, /* tp_members */
1963 0, /* tp_getset */
1964 0, /* tp_base */
1965 0, /* tp_dict */
1966 0, /* tp_descr_get */
1967 0, /* tp_descr_set */
1968 0, /* tp_dictoffset */
1969 (initproc)set_init, /* tp_init */
1970 PyType_GenericAlloc, /* tp_alloc */
1971 set_new, /* tp_new */
1972 PyObject_GC_Del, /* tp_free */
1975 /* frozenset object ********************************************************/
1978 static PyMethodDef frozenset_methods[] = {
1979 {"__contains__",(PyCFunction)set_direct_contains, METH_O | METH_COEXIST,
1980 contains_doc},
1981 {"copy", (PyCFunction)frozenset_copy, METH_NOARGS,
1982 copy_doc},
1983 {"difference", (PyCFunction)set_difference, METH_O,
1984 difference_doc},
1985 {"intersection",(PyCFunction)set_intersection, METH_O,
1986 intersection_doc},
1987 {"issubset", (PyCFunction)set_issubset, METH_O,
1988 issubset_doc},
1989 {"issuperset", (PyCFunction)set_issuperset, METH_O,
1990 issuperset_doc},
1991 {"__reduce__", (PyCFunction)set_reduce, METH_NOARGS,
1992 reduce_doc},
1993 {"symmetric_difference",(PyCFunction)set_symmetric_difference, METH_O,
1994 symmetric_difference_doc},
1995 {"union", (PyCFunction)set_union, METH_O,
1996 union_doc},
1997 {NULL, NULL} /* sentinel */
2000 static PyNumberMethods frozenset_as_number = {
2001 0, /*nb_add*/
2002 (binaryfunc)set_sub, /*nb_subtract*/
2003 0, /*nb_multiply*/
2004 0, /*nb_divide*/
2005 0, /*nb_remainder*/
2006 0, /*nb_divmod*/
2007 0, /*nb_power*/
2008 0, /*nb_negative*/
2009 0, /*nb_positive*/
2010 0, /*nb_absolute*/
2011 0, /*nb_nonzero*/
2012 0, /*nb_invert*/
2013 0, /*nb_lshift*/
2014 0, /*nb_rshift*/
2015 (binaryfunc)set_and, /*nb_and*/
2016 (binaryfunc)set_xor, /*nb_xor*/
2017 (binaryfunc)set_or, /*nb_or*/
2020 PyDoc_STRVAR(frozenset_doc,
2021 "frozenset(iterable) --> frozenset object\n\
2023 Build an immutable unordered collection of unique elements.");
2025 PyTypeObject PyFrozenSet_Type = {
2026 PyVarObject_HEAD_INIT(&PyType_Type, 0)
2027 "frozenset", /* tp_name */
2028 sizeof(PySetObject), /* tp_basicsize */
2029 0, /* tp_itemsize */
2030 /* methods */
2031 (destructor)set_dealloc, /* tp_dealloc */
2032 (printfunc)set_tp_print, /* tp_print */
2033 0, /* tp_getattr */
2034 0, /* tp_setattr */
2035 set_nocmp, /* tp_compare */
2036 (reprfunc)set_repr, /* tp_repr */
2037 &frozenset_as_number, /* tp_as_number */
2038 &set_as_sequence, /* tp_as_sequence */
2039 0, /* tp_as_mapping */
2040 frozenset_hash, /* tp_hash */
2041 0, /* tp_call */
2042 0, /* tp_str */
2043 PyObject_GenericGetAttr, /* tp_getattro */
2044 0, /* tp_setattro */
2045 0, /* tp_as_buffer */
2046 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_CHECKTYPES |
2047 Py_TPFLAGS_BASETYPE, /* tp_flags */
2048 frozenset_doc, /* tp_doc */
2049 (traverseproc)set_traverse, /* tp_traverse */
2050 (inquiry)set_clear_internal, /* tp_clear */
2051 (richcmpfunc)set_richcompare, /* tp_richcompare */
2052 offsetof(PySetObject, weakreflist), /* tp_weaklistoffset */
2053 (getiterfunc)set_iter, /* tp_iter */
2054 0, /* tp_iternext */
2055 frozenset_methods, /* tp_methods */
2056 0, /* tp_members */
2057 0, /* tp_getset */
2058 0, /* tp_base */
2059 0, /* tp_dict */
2060 0, /* tp_descr_get */
2061 0, /* tp_descr_set */
2062 0, /* tp_dictoffset */
2063 0, /* tp_init */
2064 PyType_GenericAlloc, /* tp_alloc */
2065 frozenset_new, /* tp_new */
2066 PyObject_GC_Del, /* tp_free */
2070 /***** C API functions *************************************************/
2072 PyObject *
2073 PySet_New(PyObject *iterable)
2075 return make_new_set(&PySet_Type, iterable);
2078 PyObject *
2079 PyFrozenSet_New(PyObject *iterable)
2081 PyObject *args, *result;
2083 if (iterable == NULL)
2084 args = PyTuple_New(0);
2085 else
2086 args = PyTuple_Pack(1, iterable);
2087 if (args == NULL)
2088 return NULL;
2089 result = frozenset_new(&PyFrozenSet_Type, args, NULL);
2090 Py_DECREF(args);
2091 return result;
2094 Py_ssize_t
2095 PySet_Size(PyObject *anyset)
2097 if (!PyAnySet_Check(anyset)) {
2098 PyErr_BadInternalCall();
2099 return -1;
2101 return PySet_GET_SIZE(anyset);
2105 PySet_Clear(PyObject *set)
2107 if (!PyType_IsSubtype(Py_Type(set), &PySet_Type)) {
2108 PyErr_BadInternalCall();
2109 return -1;
2111 return set_clear_internal((PySetObject *)set);
2115 PySet_Contains(PyObject *anyset, PyObject *key)
2117 if (!PyAnySet_Check(anyset)) {
2118 PyErr_BadInternalCall();
2119 return -1;
2121 return set_contains_key((PySetObject *)anyset, key);
2125 PySet_Discard(PyObject *set, PyObject *key)
2127 if (!PyType_IsSubtype(Py_Type(set), &PySet_Type)) {
2128 PyErr_BadInternalCall();
2129 return -1;
2131 return set_discard_key((PySetObject *)set, key);
2135 PySet_Add(PyObject *set, PyObject *key)
2137 if (!PyType_IsSubtype(Py_Type(set), &PySet_Type)) {
2138 PyErr_BadInternalCall();
2139 return -1;
2141 return set_add_key((PySetObject *)set, key);
2145 _PySet_Next(PyObject *set, Py_ssize_t *pos, PyObject **key)
2147 setentry *entry_ptr;
2149 if (!PyAnySet_Check(set)) {
2150 PyErr_BadInternalCall();
2151 return -1;
2153 if (set_next((PySetObject *)set, pos, &entry_ptr) == 0)
2154 return 0;
2155 *key = entry_ptr->key;
2156 return 1;
2160 _PySet_NextEntry(PyObject *set, Py_ssize_t *pos, PyObject **key, long *hash)
2162 setentry *entry;
2164 if (!PyAnySet_Check(set)) {
2165 PyErr_BadInternalCall();
2166 return -1;
2168 if (set_next((PySetObject *)set, pos, &entry) == 0)
2169 return 0;
2170 *key = entry->key;
2171 *hash = entry->hash;
2172 return 1;
2175 PyObject *
2176 PySet_Pop(PyObject *set)
2178 if (!PyType_IsSubtype(Py_Type(set), &PySet_Type)) {
2179 PyErr_BadInternalCall();
2180 return NULL;
2182 return set_pop((PySetObject *)set);
2186 _PySet_Update(PyObject *set, PyObject *iterable)
2188 if (!PyType_IsSubtype(Py_Type(set), &PySet_Type)) {
2189 PyErr_BadInternalCall();
2190 return -1;
2192 return set_update_internal((PySetObject *)set, iterable);
2195 #ifdef Py_DEBUG
2197 /* Test code to be called with any three element set.
2198 Returns True and original set is restored. */
2200 #define assertRaises(call_return_value, exception) \
2201 do { \
2202 assert(call_return_value); \
2203 assert(PyErr_ExceptionMatches(exception)); \
2204 PyErr_Clear(); \
2205 } while(0)
2207 static PyObject *
2208 test_c_api(PySetObject *so)
2210 Py_ssize_t count;
2211 char *s;
2212 Py_ssize_t i;
2213 PyObject *elem=NULL, *dup=NULL, *t, *f, *dup2, *x;
2214 PyObject *ob = (PyObject *)so;
2216 /* Verify preconditions and exercise type/size checks */
2217 assert(PyAnySet_Check(ob));
2218 assert(PyAnySet_CheckExact(ob));
2219 assert(!PyFrozenSet_CheckExact(ob));
2220 assert(PySet_Size(ob) == 3);
2221 assert(PySet_GET_SIZE(ob) == 3);
2223 /* Raise TypeError for non-iterable constructor arguments */
2224 assertRaises(PySet_New(Py_None) == NULL, PyExc_TypeError);
2225 assertRaises(PyFrozenSet_New(Py_None) == NULL, PyExc_TypeError);
2227 /* Raise TypeError for unhashable key */
2228 dup = PySet_New(ob);
2229 assertRaises(PySet_Discard(ob, dup) == -1, PyExc_TypeError);
2230 assertRaises(PySet_Contains(ob, dup) == -1, PyExc_TypeError);
2231 assertRaises(PySet_Add(ob, dup) == -1, PyExc_TypeError);
2233 /* Exercise successful pop, contains, add, and discard */
2234 elem = PySet_Pop(ob);
2235 assert(PySet_Contains(ob, elem) == 0);
2236 assert(PySet_GET_SIZE(ob) == 2);
2237 assert(PySet_Add(ob, elem) == 0);
2238 assert(PySet_Contains(ob, elem) == 1);
2239 assert(PySet_GET_SIZE(ob) == 3);
2240 assert(PySet_Discard(ob, elem) == 1);
2241 assert(PySet_GET_SIZE(ob) == 2);
2242 assert(PySet_Discard(ob, elem) == 0);
2243 assert(PySet_GET_SIZE(ob) == 2);
2245 /* Exercise clear */
2246 dup2 = PySet_New(dup);
2247 assert(PySet_Clear(dup2) == 0);
2248 assert(PySet_Size(dup2) == 0);
2249 Py_DECREF(dup2);
2251 /* Raise SystemError on clear or update of frozen set */
2252 f = PyFrozenSet_New(dup);
2253 assertRaises(PySet_Clear(f) == -1, PyExc_SystemError);
2254 assertRaises(_PySet_Update(f, dup) == -1, PyExc_SystemError);
2255 Py_DECREF(f);
2257 /* Exercise direct iteration */
2258 i = 0, count = 0;
2259 while (_PySet_Next((PyObject *)dup, &i, &x)) {
2260 s = PyString_AsString(x);
2261 assert(s && (s[0] == 'a' || s[0] == 'b' || s[0] == 'c'));
2262 count++;
2264 assert(count == 3);
2266 /* Exercise updates */
2267 dup2 = PySet_New(NULL);
2268 assert(_PySet_Update(dup2, dup) == 0);
2269 assert(PySet_Size(dup2) == 3);
2270 assert(_PySet_Update(dup2, dup) == 0);
2271 assert(PySet_Size(dup2) == 3);
2272 Py_DECREF(dup2);
2274 /* Raise SystemError when self argument is not a set or frozenset. */
2275 t = PyTuple_New(0);
2276 assertRaises(PySet_Size(t) == -1, PyExc_SystemError);
2277 assertRaises(PySet_Contains(t, elem) == -1, PyExc_SystemError);
2278 Py_DECREF(t);
2280 /* Raise SystemError when self argument is not a set. */
2281 f = PyFrozenSet_New(dup);
2282 assert(PySet_Size(f) == 3);
2283 assert(PyFrozenSet_CheckExact(f));
2284 assertRaises(PySet_Add(f, elem) == -1, PyExc_SystemError);
2285 assertRaises(PySet_Discard(f, elem) == -1, PyExc_SystemError);
2286 assertRaises(PySet_Pop(f) == NULL, PyExc_SystemError);
2287 Py_DECREF(f);
2289 /* Raise KeyError when popping from an empty set */
2290 assert(PyNumber_InPlaceSubtract(ob, ob) == ob);
2291 Py_DECREF(ob);
2292 assert(PySet_GET_SIZE(ob) == 0);
2293 assertRaises(PySet_Pop(ob) == NULL, PyExc_KeyError);
2295 /* Restore the set from the copy using the PyNumber API */
2296 assert(PyNumber_InPlaceOr(ob, dup) == ob);
2297 Py_DECREF(ob);
2299 /* Verify constructors accept NULL arguments */
2300 f = PySet_New(NULL);
2301 assert(f != NULL);
2302 assert(PySet_GET_SIZE(f) == 0);
2303 Py_DECREF(f);
2304 f = PyFrozenSet_New(NULL);
2305 assert(f != NULL);
2306 assert(PyFrozenSet_CheckExact(f));
2307 assert(PySet_GET_SIZE(f) == 0);
2308 Py_DECREF(f);
2310 Py_DECREF(elem);
2311 Py_DECREF(dup);
2312 Py_RETURN_TRUE;
2315 #undef assertRaises
2317 #endif