Patch #1635058 by Mark Roberts: ensure that htonl and friends never accept or
[python.git] / Objects / setobject.c
blobf038ee3fdeb7dd6d07f58bf32422f503a206bcab
2 /* set object implementation
3 Written and maintained by Raymond D. Hettinger <python@rcn.com>
4 Derived from Lib/sets.py and Objects/dictobject.c.
6 Copyright (c) 2003-6 Python Software Foundation.
7 All rights reserved.
8 */
10 #include "Python.h"
11 #include "structmember.h"
13 /* Set a key error with the specified argument, wrapping it in a
14 * tuple automatically so that tuple keys are not unpacked as the
15 * exception arguments. */
16 static void
17 set_key_error(PyObject *arg)
19 PyObject *tup;
20 tup = PyTuple_Pack(1, arg);
21 if (!tup)
22 return; /* caller will expect error to be set anyway */
23 PyErr_SetObject(PyExc_KeyError, tup);
24 Py_DECREF(tup);
27 /* This must be >= 1. */
28 #define PERTURB_SHIFT 5
30 /* Object used as dummy key to fill deleted entries */
31 static PyObject *dummy = NULL; /* Initialized by first call to make_new_set() */
33 #ifdef Py_REF_DEBUG
34 PyObject *
35 _PySet_Dummy(void)
37 return dummy;
39 #endif
41 #define INIT_NONZERO_SET_SLOTS(so) do { \
42 (so)->table = (so)->smalltable; \
43 (so)->mask = PySet_MINSIZE - 1; \
44 (so)->hash = -1; \
45 } while(0)
47 #define EMPTY_TO_MINSIZE(so) do { \
48 memset((so)->smalltable, 0, sizeof((so)->smalltable)); \
49 (so)->used = (so)->fill = 0; \
50 INIT_NONZERO_SET_SLOTS(so); \
51 } while(0)
53 /* Reuse scheme to save calls to malloc, free, and memset */
54 #define MAXFREESETS 80
55 static PySetObject *free_sets[MAXFREESETS];
56 static int num_free_sets = 0;
59 The basic lookup function used by all operations.
60 This is based on Algorithm D from Knuth Vol. 3, Sec. 6.4.
61 Open addressing is preferred over chaining since the link overhead for
62 chaining would be substantial (100% with typical malloc overhead).
64 The initial probe index is computed as hash mod the table size. Subsequent
65 probe indices are computed as explained in Objects/dictobject.c.
67 All arithmetic on hash should ignore overflow.
69 Unlike the dictionary implementation, the lookkey functions can return
70 NULL if the rich comparison returns an error.
73 static setentry *
74 set_lookkey(PySetObject *so, PyObject *key, register long hash)
76 register Py_ssize_t i;
77 register size_t perturb;
78 register setentry *freeslot;
79 register size_t mask = so->mask;
80 setentry *table = so->table;
81 register setentry *entry;
82 register int cmp;
83 PyObject *startkey;
85 i = hash & mask;
86 entry = &table[i];
87 if (entry->key == NULL || entry->key == key)
88 return entry;
90 if (entry->key == dummy)
91 freeslot = entry;
92 else {
93 if (entry->hash == hash) {
94 startkey = entry->key;
95 cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
96 if (cmp < 0)
97 return NULL;
98 if (table == so->table && entry->key == startkey) {
99 if (cmp > 0)
100 return entry;
102 else {
103 /* The compare did major nasty stuff to the
104 * set: start over.
106 return set_lookkey(so, key, hash);
109 freeslot = NULL;
112 /* In the loop, key == dummy is by far (factor of 100s) the
113 least likely outcome, so test for that last. */
114 for (perturb = hash; ; perturb >>= PERTURB_SHIFT) {
115 i = (i << 2) + i + perturb + 1;
116 entry = &table[i & mask];
117 if (entry->key == NULL) {
118 if (freeslot != NULL)
119 entry = freeslot;
120 break;
122 if (entry->key == key)
123 break;
124 if (entry->hash == hash && entry->key != dummy) {
125 startkey = entry->key;
126 cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
127 if (cmp < 0)
128 return NULL;
129 if (table == so->table && entry->key == startkey) {
130 if (cmp > 0)
131 break;
133 else {
134 /* The compare did major nasty stuff to the
135 * set: start over.
137 return set_lookkey(so, key, hash);
140 else if (entry->key == dummy && freeslot == NULL)
141 freeslot = entry;
143 return entry;
147 * Hacked up version of set_lookkey which can assume keys are always strings;
148 * This means we can always use _PyString_Eq directly and not have to check to
149 * see if the comparison altered the table.
151 static setentry *
152 set_lookkey_string(PySetObject *so, PyObject *key, register long hash)
154 register Py_ssize_t i;
155 register size_t perturb;
156 register setentry *freeslot;
157 register size_t mask = so->mask;
158 setentry *table = so->table;
159 register setentry *entry;
161 /* Make sure this function doesn't have to handle non-string keys,
162 including subclasses of str; e.g., one reason to subclass
163 strings is to override __eq__, and for speed we don't cater to
164 that here. */
165 if (!PyString_CheckExact(key)) {
166 so->lookup = set_lookkey;
167 return set_lookkey(so, key, hash);
169 i = hash & mask;
170 entry = &table[i];
171 if (entry->key == NULL || entry->key == key)
172 return entry;
173 if (entry->key == dummy)
174 freeslot = entry;
175 else {
176 if (entry->hash == hash && _PyString_Eq(entry->key, key))
177 return entry;
178 freeslot = NULL;
181 /* In the loop, key == dummy is by far (factor of 100s) the
182 least likely outcome, so test for that last. */
183 for (perturb = hash; ; perturb >>= PERTURB_SHIFT) {
184 i = (i << 2) + i + perturb + 1;
185 entry = &table[i & mask];
186 if (entry->key == NULL)
187 return freeslot == NULL ? entry : freeslot;
188 if (entry->key == key
189 || (entry->hash == hash
190 && entry->key != dummy
191 && _PyString_Eq(entry->key, key)))
192 return entry;
193 if (entry->key == dummy && freeslot == NULL)
194 freeslot = entry;
196 assert(0); /* NOT REACHED */
197 return 0;
201 Internal routine to insert a new key into the table.
202 Used by the public insert routine.
203 Eats a reference to key.
205 static int
206 set_insert_key(register PySetObject *so, PyObject *key, long hash)
208 register setentry *entry;
209 typedef setentry *(*lookupfunc)(PySetObject *, PyObject *, long);
211 assert(so->lookup != NULL);
212 entry = so->lookup(so, key, hash);
213 if (entry == NULL)
214 return -1;
215 if (entry->key == NULL) {
216 /* UNUSED */
217 so->fill++;
218 entry->key = key;
219 entry->hash = hash;
220 so->used++;
221 } else if (entry->key == dummy) {
222 /* DUMMY */
223 entry->key = key;
224 entry->hash = hash;
225 so->used++;
226 Py_DECREF(dummy);
227 } else {
228 /* ACTIVE */
229 Py_DECREF(key);
231 return 0;
235 Internal routine used by set_table_resize() to insert an item which is
236 known to be absent from the set. This routine also assumes that
237 the set contains no deleted entries. Besides the performance benefit,
238 using set_insert_clean() in set_table_resize() is dangerous (SF bug #1456209).
239 Note that no refcounts are changed by this routine; if needed, the caller
240 is responsible for incref'ing `key`.
242 static void
243 set_insert_clean(register PySetObject *so, PyObject *key, long hash)
245 register size_t i;
246 register size_t perturb;
247 register size_t mask = (size_t)so->mask;
248 setentry *table = so->table;
249 register setentry *entry;
251 i = hash & mask;
252 entry = &table[i];
253 for (perturb = hash; entry->key != NULL; perturb >>= PERTURB_SHIFT) {
254 i = (i << 2) + i + perturb + 1;
255 entry = &table[i & mask];
257 so->fill++;
258 entry->key = key;
259 entry->hash = hash;
260 so->used++;
264 Restructure the table by allocating a new table and reinserting all
265 keys again. When entries have been deleted, the new table may
266 actually be smaller than the old one.
268 static int
269 set_table_resize(PySetObject *so, Py_ssize_t minused)
271 Py_ssize_t newsize;
272 setentry *oldtable, *newtable, *entry;
273 Py_ssize_t i;
274 int is_oldtable_malloced;
275 setentry small_copy[PySet_MINSIZE];
277 assert(minused >= 0);
279 /* Find the smallest table size > minused. */
280 for (newsize = PySet_MINSIZE;
281 newsize <= minused && newsize > 0;
282 newsize <<= 1)
284 if (newsize <= 0) {
285 PyErr_NoMemory();
286 return -1;
289 /* Get space for a new table. */
290 oldtable = so->table;
291 assert(oldtable != NULL);
292 is_oldtable_malloced = oldtable != so->smalltable;
294 if (newsize == PySet_MINSIZE) {
295 /* A large table is shrinking, or we can't get any smaller. */
296 newtable = so->smalltable;
297 if (newtable == oldtable) {
298 if (so->fill == so->used) {
299 /* No dummies, so no point doing anything. */
300 return 0;
302 /* We're not going to resize it, but rebuild the
303 table anyway to purge old dummy entries.
304 Subtle: This is *necessary* if fill==size,
305 as set_lookkey needs at least one virgin slot to
306 terminate failing searches. If fill < size, it's
307 merely desirable, as dummies slow searches. */
308 assert(so->fill > so->used);
309 memcpy(small_copy, oldtable, sizeof(small_copy));
310 oldtable = small_copy;
313 else {
314 newtable = PyMem_NEW(setentry, newsize);
315 if (newtable == NULL) {
316 PyErr_NoMemory();
317 return -1;
321 /* Make the set empty, using the new table. */
322 assert(newtable != oldtable);
323 so->table = newtable;
324 so->mask = newsize - 1;
325 memset(newtable, 0, sizeof(setentry) * newsize);
326 so->used = 0;
327 i = so->fill;
328 so->fill = 0;
330 /* Copy the data over; this is refcount-neutral for active entries;
331 dummy entries aren't copied over, of course */
332 for (entry = oldtable; i > 0; entry++) {
333 if (entry->key == NULL) {
334 /* UNUSED */
336 } else if (entry->key == dummy) {
337 /* DUMMY */
338 --i;
339 assert(entry->key == dummy);
340 Py_DECREF(entry->key);
341 } else {
342 /* ACTIVE */
343 --i;
344 set_insert_clean(so, entry->key, entry->hash);
348 if (is_oldtable_malloced)
349 PyMem_DEL(oldtable);
350 return 0;
353 /* CAUTION: set_add_key/entry() must guarantee it won't resize the table */
355 static int
356 set_add_entry(register PySetObject *so, setentry *entry)
358 register Py_ssize_t n_used;
360 assert(so->fill <= so->mask); /* at least one empty slot */
361 n_used = so->used;
362 Py_INCREF(entry->key);
363 if (set_insert_key(so, entry->key, entry->hash) == -1) {
364 Py_DECREF(entry->key);
365 return -1;
367 if (!(so->used > n_used && so->fill*3 >= (so->mask+1)*2))
368 return 0;
369 return set_table_resize(so, so->used>50000 ? so->used*2 : so->used*4);
372 static int
373 set_add_key(register PySetObject *so, PyObject *key)
375 register long hash;
376 register Py_ssize_t n_used;
378 if (!PyString_CheckExact(key) ||
379 (hash = ((PyStringObject *) key)->ob_shash) == -1) {
380 hash = PyObject_Hash(key);
381 if (hash == -1)
382 return -1;
384 assert(so->fill <= so->mask); /* at least one empty slot */
385 n_used = so->used;
386 Py_INCREF(key);
387 if (set_insert_key(so, key, hash) == -1) {
388 Py_DECREF(key);
389 return -1;
391 if (!(so->used > n_used && so->fill*3 >= (so->mask+1)*2))
392 return 0;
393 return set_table_resize(so, so->used>50000 ? so->used*2 : so->used*4);
396 #define DISCARD_NOTFOUND 0
397 #define DISCARD_FOUND 1
399 static int
400 set_discard_entry(PySetObject *so, setentry *oldentry)
401 { register setentry *entry;
402 PyObject *old_key;
404 entry = (so->lookup)(so, oldentry->key, oldentry->hash);
405 if (entry == NULL)
406 return -1;
407 if (entry->key == NULL || entry->key == dummy)
408 return DISCARD_NOTFOUND;
409 old_key = entry->key;
410 Py_INCREF(dummy);
411 entry->key = dummy;
412 so->used--;
413 Py_DECREF(old_key);
414 return DISCARD_FOUND;
417 static int
418 set_discard_key(PySetObject *so, PyObject *key)
420 register long hash;
421 register setentry *entry;
422 PyObject *old_key;
424 assert (PyAnySet_Check(so));
425 if (!PyString_CheckExact(key) ||
426 (hash = ((PyStringObject *) key)->ob_shash) == -1) {
427 hash = PyObject_Hash(key);
428 if (hash == -1)
429 return -1;
431 entry = (so->lookup)(so, key, hash);
432 if (entry == NULL)
433 return -1;
434 if (entry->key == NULL || entry->key == dummy)
435 return DISCARD_NOTFOUND;
436 old_key = entry->key;
437 Py_INCREF(dummy);
438 entry->key = dummy;
439 so->used--;
440 Py_DECREF(old_key);
441 return DISCARD_FOUND;
444 static int
445 set_clear_internal(PySetObject *so)
447 setentry *entry, *table;
448 int table_is_malloced;
449 Py_ssize_t fill;
450 setentry small_copy[PySet_MINSIZE];
451 #ifdef Py_DEBUG
452 Py_ssize_t i, n;
453 assert (PyAnySet_Check(so));
455 n = so->mask + 1;
456 i = 0;
457 #endif
459 table = so->table;
460 assert(table != NULL);
461 table_is_malloced = table != so->smalltable;
463 /* This is delicate. During the process of clearing the set,
464 * decrefs can cause the set to mutate. To avoid fatal confusion
465 * (voice of experience), we have to make the set empty before
466 * clearing the slots, and never refer to anything via so->ref while
467 * clearing.
469 fill = so->fill;
470 if (table_is_malloced)
471 EMPTY_TO_MINSIZE(so);
473 else if (fill > 0) {
474 /* It's a small table with something that needs to be cleared.
475 * Afraid the only safe way is to copy the set entries into
476 * another small table first.
478 memcpy(small_copy, table, sizeof(small_copy));
479 table = small_copy;
480 EMPTY_TO_MINSIZE(so);
482 /* else it's a small table that's already empty */
484 /* Now we can finally clear things. If C had refcounts, we could
485 * assert that the refcount on table is 1 now, i.e. that this function
486 * has unique access to it, so decref side-effects can't alter it.
488 for (entry = table; fill > 0; ++entry) {
489 #ifdef Py_DEBUG
490 assert(i < n);
491 ++i;
492 #endif
493 if (entry->key) {
494 --fill;
495 Py_DECREF(entry->key);
497 #ifdef Py_DEBUG
498 else
499 assert(entry->key == NULL);
500 #endif
503 if (table_is_malloced)
504 PyMem_DEL(table);
505 return 0;
509 * Iterate over a set table. Use like so:
511 * Py_ssize_t pos;
512 * setentry *entry;
513 * pos = 0; # important! pos should not otherwise be changed by you
514 * while (set_next(yourset, &pos, &entry)) {
515 * Refer to borrowed reference in entry->key.
518 * CAUTION: In general, it isn't safe to use set_next in a loop that
519 * mutates the table.
521 static int
522 set_next(PySetObject *so, Py_ssize_t *pos_ptr, setentry **entry_ptr)
524 Py_ssize_t i;
525 Py_ssize_t mask;
526 register setentry *table;
528 assert (PyAnySet_Check(so));
529 i = *pos_ptr;
530 assert(i >= 0);
531 table = so->table;
532 mask = so->mask;
533 while (i <= mask && (table[i].key == NULL || table[i].key == dummy))
534 i++;
535 *pos_ptr = i+1;
536 if (i > mask)
537 return 0;
538 assert(table[i].key != NULL);
539 *entry_ptr = &table[i];
540 return 1;
543 static void
544 set_dealloc(PySetObject *so)
546 register setentry *entry;
547 Py_ssize_t fill = so->fill;
548 PyObject_GC_UnTrack(so);
549 Py_TRASHCAN_SAFE_BEGIN(so)
550 if (so->weakreflist != NULL)
551 PyObject_ClearWeakRefs((PyObject *) so);
553 for (entry = so->table; fill > 0; entry++) {
554 if (entry->key) {
555 --fill;
556 Py_DECREF(entry->key);
559 if (so->table != so->smalltable)
560 PyMem_DEL(so->table);
561 if (num_free_sets < MAXFREESETS && PyAnySet_CheckExact(so))
562 free_sets[num_free_sets++] = so;
563 else
564 so->ob_type->tp_free(so);
565 Py_TRASHCAN_SAFE_END(so)
568 static int
569 set_tp_print(PySetObject *so, FILE *fp, int flags)
571 setentry *entry;
572 Py_ssize_t pos=0;
573 char *emit = ""; /* No separator emitted on first pass */
574 char *separator = ", ";
575 int status = Py_ReprEnter((PyObject*)so);
577 if (status != 0) {
578 if (status < 0)
579 return status;
580 fprintf(fp, "%s(...)", so->ob_type->tp_name);
581 return 0;
584 fprintf(fp, "%s([", so->ob_type->tp_name);
585 while (set_next(so, &pos, &entry)) {
586 fputs(emit, fp);
587 emit = separator;
588 if (PyObject_Print(entry->key, fp, 0) != 0) {
589 Py_ReprLeave((PyObject*)so);
590 return -1;
593 fputs("])", fp);
594 Py_ReprLeave((PyObject*)so);
595 return 0;
598 static PyObject *
599 set_repr(PySetObject *so)
601 PyObject *keys, *result=NULL, *listrepr;
602 int status = Py_ReprEnter((PyObject*)so);
604 if (status != 0) {
605 if (status < 0)
606 return NULL;
607 return PyString_FromFormat("%s(...)", so->ob_type->tp_name);
610 keys = PySequence_List((PyObject *)so);
611 if (keys == NULL)
612 goto done;
613 listrepr = PyObject_Repr(keys);
614 Py_DECREF(keys);
615 if (listrepr == NULL)
616 goto done;
618 result = PyString_FromFormat("%s(%s)", so->ob_type->tp_name,
619 PyString_AS_STRING(listrepr));
620 Py_DECREF(listrepr);
621 done:
622 Py_ReprLeave((PyObject*)so);
623 return result;
626 static Py_ssize_t
627 set_len(PyObject *so)
629 return ((PySetObject *)so)->used;
632 static int
633 set_merge(PySetObject *so, PyObject *otherset)
635 PySetObject *other;
636 register Py_ssize_t i;
637 register setentry *entry;
639 assert (PyAnySet_Check(so));
640 assert (PyAnySet_Check(otherset));
642 other = (PySetObject*)otherset;
643 if (other == so || other->used == 0)
644 /* a.update(a) or a.update({}); nothing to do */
645 return 0;
646 /* Do one big resize at the start, rather than
647 * incrementally resizing as we insert new keys. Expect
648 * that there will be no (or few) overlapping keys.
650 if ((so->fill + other->used)*3 >= (so->mask+1)*2) {
651 if (set_table_resize(so, (so->used + other->used)*2) != 0)
652 return -1;
654 for (i = 0; i <= other->mask; i++) {
655 entry = &other->table[i];
656 if (entry->key != NULL &&
657 entry->key != dummy) {
658 Py_INCREF(entry->key);
659 if (set_insert_key(so, entry->key, entry->hash) == -1) {
660 Py_DECREF(entry->key);
661 return -1;
665 return 0;
668 static int
669 set_contains_key(PySetObject *so, PyObject *key)
671 long hash;
672 setentry *entry;
674 if (!PyString_CheckExact(key) ||
675 (hash = ((PyStringObject *) key)->ob_shash) == -1) {
676 hash = PyObject_Hash(key);
677 if (hash == -1)
678 return -1;
680 entry = (so->lookup)(so, key, hash);
681 if (entry == NULL)
682 return -1;
683 key = entry->key;
684 return key != NULL && key != dummy;
687 static int
688 set_contains_entry(PySetObject *so, setentry *entry)
690 PyObject *key;
691 setentry *lu_entry;
693 lu_entry = (so->lookup)(so, entry->key, entry->hash);
694 if (lu_entry == NULL)
695 return -1;
696 key = lu_entry->key;
697 return key != NULL && key != dummy;
700 static PyObject *
701 set_pop(PySetObject *so)
703 register Py_ssize_t i = 0;
704 register setentry *entry;
705 PyObject *key;
707 assert (PyAnySet_Check(so));
708 if (so->used == 0) {
709 PyErr_SetString(PyExc_KeyError, "pop from an empty set");
710 return NULL;
713 /* Set entry to "the first" unused or dummy set entry. We abuse
714 * the hash field of slot 0 to hold a search finger:
715 * If slot 0 has a value, use slot 0.
716 * Else slot 0 is being used to hold a search finger,
717 * and we use its hash value as the first index to look.
719 entry = &so->table[0];
720 if (entry->key == NULL || entry->key == dummy) {
721 i = entry->hash;
722 /* The hash field may be a real hash value, or it may be a
723 * legit search finger, or it may be a once-legit search
724 * finger that's out of bounds now because it wrapped around
725 * or the table shrunk -- simply make sure it's in bounds now.
727 if (i > so->mask || i < 1)
728 i = 1; /* skip slot 0 */
729 while ((entry = &so->table[i])->key == NULL || entry->key==dummy) {
730 i++;
731 if (i > so->mask)
732 i = 1;
735 key = entry->key;
736 Py_INCREF(dummy);
737 entry->key = dummy;
738 so->used--;
739 so->table[0].hash = i + 1; /* next place to start */
740 return key;
743 PyDoc_STRVAR(pop_doc, "Remove and return an arbitrary set element.");
745 static int
746 set_traverse(PySetObject *so, visitproc visit, void *arg)
748 Py_ssize_t pos = 0;
749 setentry *entry;
751 while (set_next(so, &pos, &entry))
752 Py_VISIT(entry->key);
753 return 0;
756 static long
757 frozenset_hash(PyObject *self)
759 PySetObject *so = (PySetObject *)self;
760 long h, hash = 1927868237L;
761 setentry *entry;
762 Py_ssize_t pos = 0;
764 if (so->hash != -1)
765 return so->hash;
767 hash *= PySet_GET_SIZE(self) + 1;
768 while (set_next(so, &pos, &entry)) {
769 /* Work to increase the bit dispersion for closely spaced hash
770 values. The is important because some use cases have many
771 combinations of a small number of elements with nearby
772 hashes so that many distinct combinations collapse to only
773 a handful of distinct hash values. */
774 h = entry->hash;
775 hash ^= (h ^ (h << 16) ^ 89869747L) * 3644798167u;
777 hash = hash * 69069L + 907133923L;
778 if (hash == -1)
779 hash = 590923713L;
780 so->hash = hash;
781 return hash;
784 static long
785 set_nohash(PyObject *self)
787 PyErr_SetString(PyExc_TypeError, "set objects are unhashable");
788 return -1;
791 /***** Set iterator type ***********************************************/
793 typedef struct {
794 PyObject_HEAD
795 PySetObject *si_set; /* Set to NULL when iterator is exhausted */
796 Py_ssize_t si_used;
797 Py_ssize_t si_pos;
798 Py_ssize_t len;
799 } setiterobject;
801 static void
802 setiter_dealloc(setiterobject *si)
804 Py_XDECREF(si->si_set);
805 PyObject_Del(si);
808 static PyObject *
809 setiter_len(setiterobject *si)
811 Py_ssize_t len = 0;
812 if (si->si_set != NULL && si->si_used == si->si_set->used)
813 len = si->len;
814 return PyInt_FromLong(len);
817 PyDoc_STRVAR(length_hint_doc, "Private method returning an estimate of len(list(it)).");
819 static PyMethodDef setiter_methods[] = {
820 {"__length_hint__", (PyCFunction)setiter_len, METH_NOARGS, length_hint_doc},
821 {NULL, NULL} /* sentinel */
824 static PyObject *setiter_iternext(setiterobject *si)
826 PyObject *key;
827 register Py_ssize_t i, mask;
828 register setentry *entry;
829 PySetObject *so = si->si_set;
831 if (so == NULL)
832 return NULL;
833 assert (PyAnySet_Check(so));
835 if (si->si_used != so->used) {
836 PyErr_SetString(PyExc_RuntimeError,
837 "Set changed size during iteration");
838 si->si_used = -1; /* Make this state sticky */
839 return NULL;
842 i = si->si_pos;
843 assert(i>=0);
844 entry = so->table;
845 mask = so->mask;
846 while (i <= mask && (entry[i].key == NULL || entry[i].key == dummy))
847 i++;
848 si->si_pos = i+1;
849 if (i > mask)
850 goto fail;
851 si->len--;
852 key = entry[i].key;
853 Py_INCREF(key);
854 return key;
856 fail:
857 Py_DECREF(so);
858 si->si_set = NULL;
859 return NULL;
862 static PyTypeObject PySetIter_Type = {
863 PyObject_HEAD_INIT(&PyType_Type)
864 0, /* ob_size */
865 "setiterator", /* tp_name */
866 sizeof(setiterobject), /* tp_basicsize */
867 0, /* tp_itemsize */
868 /* methods */
869 (destructor)setiter_dealloc, /* tp_dealloc */
870 0, /* tp_print */
871 0, /* tp_getattr */
872 0, /* tp_setattr */
873 0, /* tp_compare */
874 0, /* tp_repr */
875 0, /* tp_as_number */
876 0, /* tp_as_sequence */
877 0, /* tp_as_mapping */
878 0, /* tp_hash */
879 0, /* tp_call */
880 0, /* tp_str */
881 PyObject_GenericGetAttr, /* tp_getattro */
882 0, /* tp_setattro */
883 0, /* tp_as_buffer */
884 Py_TPFLAGS_DEFAULT, /* tp_flags */
885 0, /* tp_doc */
886 0, /* tp_traverse */
887 0, /* tp_clear */
888 0, /* tp_richcompare */
889 0, /* tp_weaklistoffset */
890 PyObject_SelfIter, /* tp_iter */
891 (iternextfunc)setiter_iternext, /* tp_iternext */
892 setiter_methods, /* tp_methods */
896 static PyObject *
897 set_iter(PySetObject *so)
899 setiterobject *si = PyObject_New(setiterobject, &PySetIter_Type);
900 if (si == NULL)
901 return NULL;
902 Py_INCREF(so);
903 si->si_set = so;
904 si->si_used = so->used;
905 si->si_pos = 0;
906 si->len = so->used;
907 return (PyObject *)si;
910 static int
911 set_update_internal(PySetObject *so, PyObject *other)
913 PyObject *key, *it;
915 if (PyAnySet_Check(other))
916 return set_merge(so, other);
918 if (PyDict_Check(other)) {
919 PyObject *value;
920 Py_ssize_t pos = 0;
921 while (PyDict_Next(other, &pos, &key, &value)) {
922 if (set_add_key(so, key) == -1)
923 return -1;
925 return 0;
928 it = PyObject_GetIter(other);
929 if (it == NULL)
930 return -1;
932 while ((key = PyIter_Next(it)) != NULL) {
933 if (set_add_key(so, key) == -1) {
934 Py_DECREF(it);
935 Py_DECREF(key);
936 return -1;
938 Py_DECREF(key);
940 Py_DECREF(it);
941 if (PyErr_Occurred())
942 return -1;
943 return 0;
946 static PyObject *
947 set_update(PySetObject *so, PyObject *other)
949 if (set_update_internal(so, other) == -1)
950 return NULL;
951 Py_RETURN_NONE;
954 PyDoc_STRVAR(update_doc,
955 "Update a set with the union of itself and another.");
957 static PyObject *
958 make_new_set(PyTypeObject *type, PyObject *iterable)
960 register PySetObject *so = NULL;
962 if (dummy == NULL) { /* Auto-initialize dummy */
963 dummy = PyString_FromString("<dummy key>");
964 if (dummy == NULL)
965 return NULL;
968 /* create PySetObject structure */
969 if (num_free_sets &&
970 (type == &PySet_Type || type == &PyFrozenSet_Type)) {
971 so = free_sets[--num_free_sets];
972 assert (so != NULL && PyAnySet_CheckExact(so));
973 so->ob_type = type;
974 _Py_NewReference((PyObject *)so);
975 EMPTY_TO_MINSIZE(so);
976 PyObject_GC_Track(so);
977 } else {
978 so = (PySetObject *)type->tp_alloc(type, 0);
979 if (so == NULL)
980 return NULL;
981 /* tp_alloc has already zeroed the structure */
982 assert(so->table == NULL && so->fill == 0 && so->used == 0);
983 INIT_NONZERO_SET_SLOTS(so);
986 so->lookup = set_lookkey_string;
987 so->weakreflist = NULL;
989 if (iterable != NULL) {
990 if (set_update_internal(so, iterable) == -1) {
991 Py_DECREF(so);
992 return NULL;
996 return (PyObject *)so;
999 /* The empty frozenset is a singleton */
1000 static PyObject *emptyfrozenset = NULL;
1002 static PyObject *
1003 frozenset_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1005 PyObject *iterable = NULL, *result;
1007 if (type == &PyFrozenSet_Type && !_PyArg_NoKeywords("frozenset()", kwds))
1008 return NULL;
1010 if (!PyArg_UnpackTuple(args, type->tp_name, 0, 1, &iterable))
1011 return NULL;
1013 if (type != &PyFrozenSet_Type)
1014 return make_new_set(type, iterable);
1016 if (iterable != NULL) {
1017 /* frozenset(f) is idempotent */
1018 if (PyFrozenSet_CheckExact(iterable)) {
1019 Py_INCREF(iterable);
1020 return iterable;
1022 result = make_new_set(type, iterable);
1023 if (result == NULL || PySet_GET_SIZE(result))
1024 return result;
1025 Py_DECREF(result);
1027 /* The empty frozenset is a singleton */
1028 if (emptyfrozenset == NULL)
1029 emptyfrozenset = make_new_set(type, NULL);
1030 Py_XINCREF(emptyfrozenset);
1031 return emptyfrozenset;
1034 void
1035 PySet_Fini(void)
1037 PySetObject *so;
1039 while (num_free_sets) {
1040 num_free_sets--;
1041 so = free_sets[num_free_sets];
1042 PyObject_GC_Del(so);
1044 Py_CLEAR(dummy);
1045 Py_CLEAR(emptyfrozenset);
1048 static PyObject *
1049 set_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1051 if (type == &PySet_Type && !_PyArg_NoKeywords("set()", kwds))
1052 return NULL;
1054 return make_new_set(type, NULL);
1057 /* set_swap_bodies() switches the contents of any two sets by moving their
1058 internal data pointers and, if needed, copying the internal smalltables.
1059 Semantically equivalent to:
1061 t=set(a); a.clear(); a.update(b); b.clear(); b.update(t); del t
1063 The function always succeeds and it leaves both objects in a stable state.
1064 Useful for creating temporary frozensets from sets for membership testing
1065 in __contains__(), discard(), and remove(). Also useful for operations
1066 that update in-place (by allowing an intermediate result to be swapped
1067 into one of the original inputs).
1070 static void
1071 set_swap_bodies(PySetObject *a, PySetObject *b)
1073 Py_ssize_t t;
1074 setentry *u;
1075 setentry *(*f)(PySetObject *so, PyObject *key, long hash);
1076 setentry tab[PySet_MINSIZE];
1077 long h;
1079 t = a->fill; a->fill = b->fill; b->fill = t;
1080 t = a->used; a->used = b->used; b->used = t;
1081 t = a->mask; a->mask = b->mask; b->mask = t;
1083 u = a->table;
1084 if (a->table == a->smalltable)
1085 u = b->smalltable;
1086 a->table = b->table;
1087 if (b->table == b->smalltable)
1088 a->table = a->smalltable;
1089 b->table = u;
1091 f = a->lookup; a->lookup = b->lookup; b->lookup = f;
1093 if (a->table == a->smalltable || b->table == b->smalltable) {
1094 memcpy(tab, a->smalltable, sizeof(tab));
1095 memcpy(a->smalltable, b->smalltable, sizeof(tab));
1096 memcpy(b->smalltable, tab, sizeof(tab));
1099 if (PyType_IsSubtype(a->ob_type, &PyFrozenSet_Type) &&
1100 PyType_IsSubtype(b->ob_type, &PyFrozenSet_Type)) {
1101 h = a->hash; a->hash = b->hash; b->hash = h;
1102 } else {
1103 a->hash = -1;
1104 b->hash = -1;
1108 static PyObject *
1109 set_copy(PySetObject *so)
1111 return make_new_set(so->ob_type, (PyObject *)so);
1114 static PyObject *
1115 frozenset_copy(PySetObject *so)
1117 if (PyFrozenSet_CheckExact(so)) {
1118 Py_INCREF(so);
1119 return (PyObject *)so;
1121 return set_copy(so);
1124 PyDoc_STRVAR(copy_doc, "Return a shallow copy of a set.");
1126 static PyObject *
1127 set_clear(PySetObject *so)
1129 set_clear_internal(so);
1130 Py_RETURN_NONE;
1133 PyDoc_STRVAR(clear_doc, "Remove all elements from this set.");
1135 static PyObject *
1136 set_union(PySetObject *so, PyObject *other)
1138 PySetObject *result;
1140 result = (PySetObject *)set_copy(so);
1141 if (result == NULL)
1142 return NULL;
1143 if ((PyObject *)so == other)
1144 return (PyObject *)result;
1145 if (set_update_internal(result, other) == -1) {
1146 Py_DECREF(result);
1147 return NULL;
1149 return (PyObject *)result;
1152 PyDoc_STRVAR(union_doc,
1153 "Return the union of two sets as a new set.\n\
1155 (i.e. all elements that are in either set.)");
1157 static PyObject *
1158 set_or(PySetObject *so, PyObject *other)
1160 if (!PyAnySet_Check(so) || !PyAnySet_Check(other)) {
1161 Py_INCREF(Py_NotImplemented);
1162 return Py_NotImplemented;
1164 return set_union(so, other);
1167 static PyObject *
1168 set_ior(PySetObject *so, PyObject *other)
1170 if (!PyAnySet_Check(other)) {
1171 Py_INCREF(Py_NotImplemented);
1172 return Py_NotImplemented;
1174 if (set_update_internal(so, other) == -1)
1175 return NULL;
1176 Py_INCREF(so);
1177 return (PyObject *)so;
1180 static PyObject *
1181 set_intersection(PySetObject *so, PyObject *other)
1183 PySetObject *result;
1184 PyObject *key, *it, *tmp;
1186 if ((PyObject *)so == other)
1187 return set_copy(so);
1189 result = (PySetObject *)make_new_set(so->ob_type, NULL);
1190 if (result == NULL)
1191 return NULL;
1193 if (PyAnySet_Check(other)) {
1194 Py_ssize_t pos = 0;
1195 setentry *entry;
1197 if (PySet_GET_SIZE(other) > PySet_GET_SIZE(so)) {
1198 tmp = (PyObject *)so;
1199 so = (PySetObject *)other;
1200 other = tmp;
1203 while (set_next((PySetObject *)other, &pos, &entry)) {
1204 int rv = set_contains_entry(so, entry);
1205 if (rv == -1) {
1206 Py_DECREF(result);
1207 return NULL;
1209 if (rv) {
1210 if (set_add_entry(result, entry) == -1) {
1211 Py_DECREF(result);
1212 return NULL;
1216 return (PyObject *)result;
1219 it = PyObject_GetIter(other);
1220 if (it == NULL) {
1221 Py_DECREF(result);
1222 return NULL;
1225 while ((key = PyIter_Next(it)) != NULL) {
1226 int rv;
1227 setentry entry;
1228 long hash = PyObject_Hash(key);
1230 if (hash == -1) {
1231 Py_DECREF(it);
1232 Py_DECREF(result);
1233 Py_DECREF(key);
1234 return NULL;
1236 entry.hash = hash;
1237 entry.key = key;
1238 rv = set_contains_entry(so, &entry);
1239 if (rv == -1) {
1240 Py_DECREF(it);
1241 Py_DECREF(result);
1242 Py_DECREF(key);
1243 return NULL;
1245 if (rv) {
1246 if (set_add_entry(result, &entry) == -1) {
1247 Py_DECREF(it);
1248 Py_DECREF(result);
1249 Py_DECREF(key);
1250 return NULL;
1253 Py_DECREF(key);
1255 Py_DECREF(it);
1256 if (PyErr_Occurred()) {
1257 Py_DECREF(result);
1258 return NULL;
1260 return (PyObject *)result;
1263 PyDoc_STRVAR(intersection_doc,
1264 "Return the intersection of two sets as a new set.\n\
1266 (i.e. all elements that are in both sets.)");
1268 static PyObject *
1269 set_intersection_update(PySetObject *so, PyObject *other)
1271 PyObject *tmp;
1273 tmp = set_intersection(so, other);
1274 if (tmp == NULL)
1275 return NULL;
1276 set_swap_bodies(so, (PySetObject *)tmp);
1277 Py_DECREF(tmp);
1278 Py_RETURN_NONE;
1281 PyDoc_STRVAR(intersection_update_doc,
1282 "Update a set with the intersection of itself and another.");
1284 static PyObject *
1285 set_and(PySetObject *so, PyObject *other)
1287 if (!PyAnySet_Check(so) || !PyAnySet_Check(other)) {
1288 Py_INCREF(Py_NotImplemented);
1289 return Py_NotImplemented;
1291 return set_intersection(so, other);
1294 static PyObject *
1295 set_iand(PySetObject *so, PyObject *other)
1297 PyObject *result;
1299 if (!PyAnySet_Check(other)) {
1300 Py_INCREF(Py_NotImplemented);
1301 return Py_NotImplemented;
1303 result = set_intersection_update(so, other);
1304 if (result == NULL)
1305 return NULL;
1306 Py_DECREF(result);
1307 Py_INCREF(so);
1308 return (PyObject *)so;
1311 static int
1312 set_difference_update_internal(PySetObject *so, PyObject *other)
1314 if ((PyObject *)so == other)
1315 return set_clear_internal(so);
1317 if (PyAnySet_Check(other)) {
1318 setentry *entry;
1319 Py_ssize_t pos = 0;
1321 while (set_next((PySetObject *)other, &pos, &entry))
1322 if (set_discard_entry(so, entry) == -1)
1323 return -1;
1324 } else {
1325 PyObject *key, *it;
1326 it = PyObject_GetIter(other);
1327 if (it == NULL)
1328 return -1;
1330 while ((key = PyIter_Next(it)) != NULL) {
1331 if (set_discard_key(so, key) == -1) {
1332 Py_DECREF(it);
1333 Py_DECREF(key);
1334 return -1;
1336 Py_DECREF(key);
1338 Py_DECREF(it);
1339 if (PyErr_Occurred())
1340 return -1;
1342 /* If more than 1/5 are dummies, then resize them away. */
1343 if ((so->fill - so->used) * 5 < so->mask)
1344 return 0;
1345 return set_table_resize(so, so->used>50000 ? so->used*2 : so->used*4);
1348 static PyObject *
1349 set_difference_update(PySetObject *so, PyObject *other)
1351 if (set_difference_update_internal(so, other) != -1)
1352 Py_RETURN_NONE;
1353 return NULL;
1356 PyDoc_STRVAR(difference_update_doc,
1357 "Remove all elements of another set from this set.");
1359 static PyObject *
1360 set_difference(PySetObject *so, PyObject *other)
1362 PyObject *result;
1363 setentry *entry;
1364 Py_ssize_t pos = 0;
1366 if (!PyAnySet_Check(other) && !PyDict_Check(other)) {
1367 result = set_copy(so);
1368 if (result == NULL)
1369 return NULL;
1370 if (set_difference_update_internal((PySetObject *)result, other) != -1)
1371 return result;
1372 Py_DECREF(result);
1373 return NULL;
1376 result = make_new_set(so->ob_type, NULL);
1377 if (result == NULL)
1378 return NULL;
1380 if (PyDict_Check(other)) {
1381 while (set_next(so, &pos, &entry)) {
1382 setentry entrycopy;
1383 entrycopy.hash = entry->hash;
1384 entrycopy.key = entry->key;
1385 if (!PyDict_Contains(other, entry->key)) {
1386 if (set_add_entry((PySetObject *)result, &entrycopy) == -1) {
1387 Py_DECREF(result);
1388 return NULL;
1392 return result;
1395 while (set_next(so, &pos, &entry)) {
1396 int rv = set_contains_entry((PySetObject *)other, entry);
1397 if (rv == -1) {
1398 Py_DECREF(result);
1399 return NULL;
1401 if (!rv) {
1402 if (set_add_entry((PySetObject *)result, entry) == -1) {
1403 Py_DECREF(result);
1404 return NULL;
1408 return result;
1411 PyDoc_STRVAR(difference_doc,
1412 "Return the difference of two sets as a new set.\n\
1414 (i.e. all elements that are in this set but not the other.)");
1415 static PyObject *
1416 set_sub(PySetObject *so, PyObject *other)
1418 if (!PyAnySet_Check(so) || !PyAnySet_Check(other)) {
1419 Py_INCREF(Py_NotImplemented);
1420 return Py_NotImplemented;
1422 return set_difference(so, other);
1425 static PyObject *
1426 set_isub(PySetObject *so, PyObject *other)
1428 PyObject *result;
1430 if (!PyAnySet_Check(other)) {
1431 Py_INCREF(Py_NotImplemented);
1432 return Py_NotImplemented;
1434 result = set_difference_update(so, other);
1435 if (result == NULL)
1436 return NULL;
1437 Py_DECREF(result);
1438 Py_INCREF(so);
1439 return (PyObject *)so;
1442 static PyObject *
1443 set_symmetric_difference_update(PySetObject *so, PyObject *other)
1445 PySetObject *otherset;
1446 PyObject *key;
1447 Py_ssize_t pos = 0;
1448 setentry *entry;
1450 if ((PyObject *)so == other)
1451 return set_clear(so);
1453 if (PyDict_Check(other)) {
1454 PyObject *value;
1455 int rv;
1456 while (PyDict_Next(other, &pos, &key, &value)) {
1457 setentry an_entry;
1458 long hash = PyObject_Hash(key);
1460 if (hash == -1)
1461 return NULL;
1462 an_entry.hash = hash;
1463 an_entry.key = key;
1464 rv = set_discard_entry(so, &an_entry);
1465 if (rv == -1)
1466 return NULL;
1467 if (rv == DISCARD_NOTFOUND) {
1468 if (set_add_entry(so, &an_entry) == -1)
1469 return NULL;
1472 Py_RETURN_NONE;
1475 if (PyAnySet_Check(other)) {
1476 Py_INCREF(other);
1477 otherset = (PySetObject *)other;
1478 } else {
1479 otherset = (PySetObject *)make_new_set(so->ob_type, other);
1480 if (otherset == NULL)
1481 return NULL;
1484 while (set_next(otherset, &pos, &entry)) {
1485 int rv = set_discard_entry(so, entry);
1486 if (rv == -1) {
1487 Py_DECREF(otherset);
1488 return NULL;
1490 if (rv == DISCARD_NOTFOUND) {
1491 if (set_add_entry(so, entry) == -1) {
1492 Py_DECREF(otherset);
1493 return NULL;
1497 Py_DECREF(otherset);
1498 Py_RETURN_NONE;
1501 PyDoc_STRVAR(symmetric_difference_update_doc,
1502 "Update a set with the symmetric difference of itself and another.");
1504 static PyObject *
1505 set_symmetric_difference(PySetObject *so, PyObject *other)
1507 PyObject *rv;
1508 PySetObject *otherset;
1510 otherset = (PySetObject *)make_new_set(so->ob_type, other);
1511 if (otherset == NULL)
1512 return NULL;
1513 rv = set_symmetric_difference_update(otherset, (PyObject *)so);
1514 if (rv == NULL)
1515 return NULL;
1516 Py_DECREF(rv);
1517 return (PyObject *)otherset;
1520 PyDoc_STRVAR(symmetric_difference_doc,
1521 "Return the symmetric difference of two sets as a new set.\n\
1523 (i.e. all elements that are in exactly one of the sets.)");
1525 static PyObject *
1526 set_xor(PySetObject *so, PyObject *other)
1528 if (!PyAnySet_Check(so) || !PyAnySet_Check(other)) {
1529 Py_INCREF(Py_NotImplemented);
1530 return Py_NotImplemented;
1532 return set_symmetric_difference(so, other);
1535 static PyObject *
1536 set_ixor(PySetObject *so, PyObject *other)
1538 PyObject *result;
1540 if (!PyAnySet_Check(other)) {
1541 Py_INCREF(Py_NotImplemented);
1542 return Py_NotImplemented;
1544 result = set_symmetric_difference_update(so, other);
1545 if (result == NULL)
1546 return NULL;
1547 Py_DECREF(result);
1548 Py_INCREF(so);
1549 return (PyObject *)so;
1552 static PyObject *
1553 set_issubset(PySetObject *so, PyObject *other)
1555 setentry *entry;
1556 Py_ssize_t pos = 0;
1558 if (!PyAnySet_Check(other)) {
1559 PyObject *tmp, *result;
1560 tmp = make_new_set(&PySet_Type, other);
1561 if (tmp == NULL)
1562 return NULL;
1563 result = set_issubset(so, tmp);
1564 Py_DECREF(tmp);
1565 return result;
1567 if (PySet_GET_SIZE(so) > PySet_GET_SIZE(other))
1568 Py_RETURN_FALSE;
1570 while (set_next(so, &pos, &entry)) {
1571 int rv = set_contains_entry((PySetObject *)other, entry);
1572 if (rv == -1)
1573 return NULL;
1574 if (!rv)
1575 Py_RETURN_FALSE;
1577 Py_RETURN_TRUE;
1580 PyDoc_STRVAR(issubset_doc, "Report whether another set contains this set.");
1582 static PyObject *
1583 set_issuperset(PySetObject *so, PyObject *other)
1585 PyObject *tmp, *result;
1587 if (!PyAnySet_Check(other)) {
1588 tmp = make_new_set(&PySet_Type, other);
1589 if (tmp == NULL)
1590 return NULL;
1591 result = set_issuperset(so, tmp);
1592 Py_DECREF(tmp);
1593 return result;
1595 return set_issubset((PySetObject *)other, (PyObject *)so);
1598 PyDoc_STRVAR(issuperset_doc, "Report whether this set contains another set.");
1600 static PyObject *
1601 set_richcompare(PySetObject *v, PyObject *w, int op)
1603 PyObject *r1, *r2;
1605 if(!PyAnySet_Check(w)) {
1606 if (op == Py_EQ)
1607 Py_RETURN_FALSE;
1608 if (op == Py_NE)
1609 Py_RETURN_TRUE;
1610 PyErr_SetString(PyExc_TypeError, "can only compare to a set");
1611 return NULL;
1613 switch (op) {
1614 case Py_EQ:
1615 if (PySet_GET_SIZE(v) != PySet_GET_SIZE(w))
1616 Py_RETURN_FALSE;
1617 if (v->hash != -1 &&
1618 ((PySetObject *)w)->hash != -1 &&
1619 v->hash != ((PySetObject *)w)->hash)
1620 Py_RETURN_FALSE;
1621 return set_issubset(v, w);
1622 case Py_NE:
1623 r1 = set_richcompare(v, w, Py_EQ);
1624 if (r1 == NULL)
1625 return NULL;
1626 r2 = PyBool_FromLong(PyObject_Not(r1));
1627 Py_DECREF(r1);
1628 return r2;
1629 case Py_LE:
1630 return set_issubset(v, w);
1631 case Py_GE:
1632 return set_issuperset(v, w);
1633 case Py_LT:
1634 if (PySet_GET_SIZE(v) >= PySet_GET_SIZE(w))
1635 Py_RETURN_FALSE;
1636 return set_issubset(v, w);
1637 case Py_GT:
1638 if (PySet_GET_SIZE(v) <= PySet_GET_SIZE(w))
1639 Py_RETURN_FALSE;
1640 return set_issuperset(v, w);
1642 Py_INCREF(Py_NotImplemented);
1643 return Py_NotImplemented;
1646 static int
1647 set_nocmp(PyObject *self, PyObject *other)
1649 PyErr_SetString(PyExc_TypeError, "cannot compare sets using cmp()");
1650 return -1;
1653 static PyObject *
1654 set_add(PySetObject *so, PyObject *key)
1656 if (set_add_key(so, key) == -1)
1657 return NULL;
1658 Py_RETURN_NONE;
1661 PyDoc_STRVAR(add_doc,
1662 "Add an element to a set.\n\
1664 This has no effect if the element is already present.");
1666 static int
1667 set_contains(PySetObject *so, PyObject *key)
1669 PyObject *tmpkey;
1670 int rv;
1672 rv = set_contains_key(so, key);
1673 if (rv == -1) {
1674 if (!PyAnySet_Check(key) || !PyErr_ExceptionMatches(PyExc_TypeError))
1675 return -1;
1676 PyErr_Clear();
1677 tmpkey = make_new_set(&PyFrozenSet_Type, NULL);
1678 if (tmpkey == NULL)
1679 return -1;
1680 set_swap_bodies((PySetObject *)tmpkey, (PySetObject *)key);
1681 rv = set_contains(so, tmpkey);
1682 set_swap_bodies((PySetObject *)tmpkey, (PySetObject *)key);
1683 Py_DECREF(tmpkey);
1685 return rv;
1688 static PyObject *
1689 set_direct_contains(PySetObject *so, PyObject *key)
1691 long result;
1693 result = set_contains(so, key);
1694 if (result == -1)
1695 return NULL;
1696 return PyBool_FromLong(result);
1699 PyDoc_STRVAR(contains_doc, "x.__contains__(y) <==> y in x.");
1701 static PyObject *
1702 set_remove(PySetObject *so, PyObject *key)
1704 PyObject *tmpkey, *result;
1705 int rv;
1707 rv = set_discard_key(so, key);
1708 if (rv == -1) {
1709 if (!PyAnySet_Check(key) || !PyErr_ExceptionMatches(PyExc_TypeError))
1710 return NULL;
1711 PyErr_Clear();
1712 tmpkey = make_new_set(&PyFrozenSet_Type, NULL);
1713 if (tmpkey == NULL)
1714 return NULL;
1715 set_swap_bodies((PySetObject *)tmpkey, (PySetObject *)key);
1716 result = set_remove(so, tmpkey);
1717 set_swap_bodies((PySetObject *)tmpkey, (PySetObject *)key);
1718 Py_DECREF(tmpkey);
1719 return result;
1720 } else if (rv == DISCARD_NOTFOUND) {
1721 set_key_error(key);
1722 return NULL;
1724 Py_RETURN_NONE;
1727 PyDoc_STRVAR(remove_doc,
1728 "Remove an element from a set; it must be a member.\n\
1730 If the element is not a member, raise a KeyError.");
1732 static PyObject *
1733 set_discard(PySetObject *so, PyObject *key)
1735 PyObject *tmpkey, *result;
1736 int rv;
1738 rv = set_discard_key(so, key);
1739 if (rv == -1) {
1740 if (!PyAnySet_Check(key) || !PyErr_ExceptionMatches(PyExc_TypeError))
1741 return NULL;
1742 PyErr_Clear();
1743 tmpkey = make_new_set(&PyFrozenSet_Type, NULL);
1744 if (tmpkey == NULL)
1745 return NULL;
1746 set_swap_bodies((PySetObject *)tmpkey, (PySetObject *)key);
1747 result = set_discard(so, tmpkey);
1748 set_swap_bodies((PySetObject *)tmpkey, (PySetObject *)key);
1749 Py_DECREF(tmpkey);
1750 return result;
1752 Py_RETURN_NONE;
1755 PyDoc_STRVAR(discard_doc,
1756 "Remove an element from a set if it is a member.\n\
1758 If the element is not a member, do nothing.");
1760 static PyObject *
1761 set_reduce(PySetObject *so)
1763 PyObject *keys=NULL, *args=NULL, *result=NULL, *dict=NULL;
1765 keys = PySequence_List((PyObject *)so);
1766 if (keys == NULL)
1767 goto done;
1768 args = PyTuple_Pack(1, keys);
1769 if (args == NULL)
1770 goto done;
1771 dict = PyObject_GetAttrString((PyObject *)so, "__dict__");
1772 if (dict == NULL) {
1773 PyErr_Clear();
1774 dict = Py_None;
1775 Py_INCREF(dict);
1777 result = PyTuple_Pack(3, so->ob_type, args, dict);
1778 done:
1779 Py_XDECREF(args);
1780 Py_XDECREF(keys);
1781 Py_XDECREF(dict);
1782 return result;
1785 PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
1787 static int
1788 set_init(PySetObject *self, PyObject *args, PyObject *kwds)
1790 PyObject *iterable = NULL;
1792 if (!PyAnySet_Check(self))
1793 return -1;
1794 if (!PyArg_UnpackTuple(args, self->ob_type->tp_name, 0, 1, &iterable))
1795 return -1;
1796 set_clear_internal(self);
1797 self->hash = -1;
1798 if (iterable == NULL)
1799 return 0;
1800 return set_update_internal(self, iterable);
1803 static PySequenceMethods set_as_sequence = {
1804 set_len, /* sq_length */
1805 0, /* sq_concat */
1806 0, /* sq_repeat */
1807 0, /* sq_item */
1808 0, /* sq_slice */
1809 0, /* sq_ass_item */
1810 0, /* sq_ass_slice */
1811 (objobjproc)set_contains, /* sq_contains */
1814 /* set object ********************************************************/
1816 #ifdef Py_DEBUG
1817 static PyObject *test_c_api(PySetObject *so);
1819 PyDoc_STRVAR(test_c_api_doc, "Exercises C API. Returns True.\n\
1820 All is well if assertions don't fail.");
1821 #endif
1823 static PyMethodDef set_methods[] = {
1824 {"add", (PyCFunction)set_add, METH_O,
1825 add_doc},
1826 {"clear", (PyCFunction)set_clear, METH_NOARGS,
1827 clear_doc},
1828 {"__contains__",(PyCFunction)set_direct_contains, METH_O | METH_COEXIST,
1829 contains_doc},
1830 {"copy", (PyCFunction)set_copy, METH_NOARGS,
1831 copy_doc},
1832 {"discard", (PyCFunction)set_discard, METH_O,
1833 discard_doc},
1834 {"difference", (PyCFunction)set_difference, METH_O,
1835 difference_doc},
1836 {"difference_update", (PyCFunction)set_difference_update, METH_O,
1837 difference_update_doc},
1838 {"intersection",(PyCFunction)set_intersection, METH_O,
1839 intersection_doc},
1840 {"intersection_update",(PyCFunction)set_intersection_update, METH_O,
1841 intersection_update_doc},
1842 {"issubset", (PyCFunction)set_issubset, METH_O,
1843 issubset_doc},
1844 {"issuperset", (PyCFunction)set_issuperset, METH_O,
1845 issuperset_doc},
1846 {"pop", (PyCFunction)set_pop, METH_NOARGS,
1847 pop_doc},
1848 {"__reduce__", (PyCFunction)set_reduce, METH_NOARGS,
1849 reduce_doc},
1850 {"remove", (PyCFunction)set_remove, METH_O,
1851 remove_doc},
1852 {"symmetric_difference",(PyCFunction)set_symmetric_difference, METH_O,
1853 symmetric_difference_doc},
1854 {"symmetric_difference_update",(PyCFunction)set_symmetric_difference_update, METH_O,
1855 symmetric_difference_update_doc},
1856 #ifdef Py_DEBUG
1857 {"test_c_api", (PyCFunction)test_c_api, METH_NOARGS,
1858 test_c_api_doc},
1859 #endif
1860 {"union", (PyCFunction)set_union, METH_O,
1861 union_doc},
1862 {"update", (PyCFunction)set_update, METH_O,
1863 update_doc},
1864 {NULL, NULL} /* sentinel */
1867 static PyNumberMethods set_as_number = {
1868 0, /*nb_add*/
1869 (binaryfunc)set_sub, /*nb_subtract*/
1870 0, /*nb_multiply*/
1871 0, /*nb_divide*/
1872 0, /*nb_remainder*/
1873 0, /*nb_divmod*/
1874 0, /*nb_power*/
1875 0, /*nb_negative*/
1876 0, /*nb_positive*/
1877 0, /*nb_absolute*/
1878 0, /*nb_nonzero*/
1879 0, /*nb_invert*/
1880 0, /*nb_lshift*/
1881 0, /*nb_rshift*/
1882 (binaryfunc)set_and, /*nb_and*/
1883 (binaryfunc)set_xor, /*nb_xor*/
1884 (binaryfunc)set_or, /*nb_or*/
1885 0, /*nb_coerce*/
1886 0, /*nb_int*/
1887 0, /*nb_long*/
1888 0, /*nb_float*/
1889 0, /*nb_oct*/
1890 0, /*nb_hex*/
1891 0, /*nb_inplace_add*/
1892 (binaryfunc)set_isub, /*nb_inplace_subtract*/
1893 0, /*nb_inplace_multiply*/
1894 0, /*nb_inplace_divide*/
1895 0, /*nb_inplace_remainder*/
1896 0, /*nb_inplace_power*/
1897 0, /*nb_inplace_lshift*/
1898 0, /*nb_inplace_rshift*/
1899 (binaryfunc)set_iand, /*nb_inplace_and*/
1900 (binaryfunc)set_ixor, /*nb_inplace_xor*/
1901 (binaryfunc)set_ior, /*nb_inplace_or*/
1904 PyDoc_STRVAR(set_doc,
1905 "set(iterable) --> set object\n\
1907 Build an unordered collection of unique elements.");
1909 PyTypeObject PySet_Type = {
1910 PyObject_HEAD_INIT(&PyType_Type)
1911 0, /* ob_size */
1912 "set", /* tp_name */
1913 sizeof(PySetObject), /* tp_basicsize */
1914 0, /* tp_itemsize */
1915 /* methods */
1916 (destructor)set_dealloc, /* tp_dealloc */
1917 (printfunc)set_tp_print, /* tp_print */
1918 0, /* tp_getattr */
1919 0, /* tp_setattr */
1920 set_nocmp, /* tp_compare */
1921 (reprfunc)set_repr, /* tp_repr */
1922 &set_as_number, /* tp_as_number */
1923 &set_as_sequence, /* tp_as_sequence */
1924 0, /* tp_as_mapping */
1925 set_nohash, /* tp_hash */
1926 0, /* tp_call */
1927 0, /* tp_str */
1928 PyObject_GenericGetAttr, /* tp_getattro */
1929 0, /* tp_setattro */
1930 0, /* tp_as_buffer */
1931 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_CHECKTYPES |
1932 Py_TPFLAGS_BASETYPE, /* tp_flags */
1933 set_doc, /* tp_doc */
1934 (traverseproc)set_traverse, /* tp_traverse */
1935 (inquiry)set_clear_internal, /* tp_clear */
1936 (richcmpfunc)set_richcompare, /* tp_richcompare */
1937 offsetof(PySetObject, weakreflist), /* tp_weaklistoffset */
1938 (getiterfunc)set_iter, /* tp_iter */
1939 0, /* tp_iternext */
1940 set_methods, /* tp_methods */
1941 0, /* tp_members */
1942 0, /* tp_getset */
1943 0, /* tp_base */
1944 0, /* tp_dict */
1945 0, /* tp_descr_get */
1946 0, /* tp_descr_set */
1947 0, /* tp_dictoffset */
1948 (initproc)set_init, /* tp_init */
1949 PyType_GenericAlloc, /* tp_alloc */
1950 set_new, /* tp_new */
1951 PyObject_GC_Del, /* tp_free */
1954 /* frozenset object ********************************************************/
1957 static PyMethodDef frozenset_methods[] = {
1958 {"__contains__",(PyCFunction)set_direct_contains, METH_O | METH_COEXIST,
1959 contains_doc},
1960 {"copy", (PyCFunction)frozenset_copy, METH_NOARGS,
1961 copy_doc},
1962 {"difference", (PyCFunction)set_difference, METH_O,
1963 difference_doc},
1964 {"intersection",(PyCFunction)set_intersection, METH_O,
1965 intersection_doc},
1966 {"issubset", (PyCFunction)set_issubset, METH_O,
1967 issubset_doc},
1968 {"issuperset", (PyCFunction)set_issuperset, METH_O,
1969 issuperset_doc},
1970 {"__reduce__", (PyCFunction)set_reduce, METH_NOARGS,
1971 reduce_doc},
1972 {"symmetric_difference",(PyCFunction)set_symmetric_difference, METH_O,
1973 symmetric_difference_doc},
1974 {"union", (PyCFunction)set_union, METH_O,
1975 union_doc},
1976 {NULL, NULL} /* sentinel */
1979 static PyNumberMethods frozenset_as_number = {
1980 0, /*nb_add*/
1981 (binaryfunc)set_sub, /*nb_subtract*/
1982 0, /*nb_multiply*/
1983 0, /*nb_divide*/
1984 0, /*nb_remainder*/
1985 0, /*nb_divmod*/
1986 0, /*nb_power*/
1987 0, /*nb_negative*/
1988 0, /*nb_positive*/
1989 0, /*nb_absolute*/
1990 0, /*nb_nonzero*/
1991 0, /*nb_invert*/
1992 0, /*nb_lshift*/
1993 0, /*nb_rshift*/
1994 (binaryfunc)set_and, /*nb_and*/
1995 (binaryfunc)set_xor, /*nb_xor*/
1996 (binaryfunc)set_or, /*nb_or*/
1999 PyDoc_STRVAR(frozenset_doc,
2000 "frozenset(iterable) --> frozenset object\n\
2002 Build an immutable unordered collection of unique elements.");
2004 PyTypeObject PyFrozenSet_Type = {
2005 PyObject_HEAD_INIT(&PyType_Type)
2006 0, /* ob_size */
2007 "frozenset", /* tp_name */
2008 sizeof(PySetObject), /* tp_basicsize */
2009 0, /* tp_itemsize */
2010 /* methods */
2011 (destructor)set_dealloc, /* tp_dealloc */
2012 (printfunc)set_tp_print, /* tp_print */
2013 0, /* tp_getattr */
2014 0, /* tp_setattr */
2015 set_nocmp, /* tp_compare */
2016 (reprfunc)set_repr, /* tp_repr */
2017 &frozenset_as_number, /* tp_as_number */
2018 &set_as_sequence, /* tp_as_sequence */
2019 0, /* tp_as_mapping */
2020 frozenset_hash, /* tp_hash */
2021 0, /* tp_call */
2022 0, /* tp_str */
2023 PyObject_GenericGetAttr, /* tp_getattro */
2024 0, /* tp_setattro */
2025 0, /* tp_as_buffer */
2026 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_CHECKTYPES |
2027 Py_TPFLAGS_BASETYPE, /* tp_flags */
2028 frozenset_doc, /* tp_doc */
2029 (traverseproc)set_traverse, /* tp_traverse */
2030 (inquiry)set_clear_internal, /* tp_clear */
2031 (richcmpfunc)set_richcompare, /* tp_richcompare */
2032 offsetof(PySetObject, weakreflist), /* tp_weaklistoffset */
2033 (getiterfunc)set_iter, /* tp_iter */
2034 0, /* tp_iternext */
2035 frozenset_methods, /* tp_methods */
2036 0, /* tp_members */
2037 0, /* tp_getset */
2038 0, /* tp_base */
2039 0, /* tp_dict */
2040 0, /* tp_descr_get */
2041 0, /* tp_descr_set */
2042 0, /* tp_dictoffset */
2043 0, /* tp_init */
2044 PyType_GenericAlloc, /* tp_alloc */
2045 frozenset_new, /* tp_new */
2046 PyObject_GC_Del, /* tp_free */
2050 /***** C API functions *************************************************/
2052 PyObject *
2053 PySet_New(PyObject *iterable)
2055 return make_new_set(&PySet_Type, iterable);
2058 PyObject *
2059 PyFrozenSet_New(PyObject *iterable)
2061 PyObject *args, *result;
2063 if (iterable == NULL)
2064 args = PyTuple_New(0);
2065 else
2066 args = PyTuple_Pack(1, iterable);
2067 if (args == NULL)
2068 return NULL;
2069 result = frozenset_new(&PyFrozenSet_Type, args, NULL);
2070 Py_DECREF(args);
2071 return result;
2074 Py_ssize_t
2075 PySet_Size(PyObject *anyset)
2077 if (!PyAnySet_Check(anyset)) {
2078 PyErr_BadInternalCall();
2079 return -1;
2081 return PySet_GET_SIZE(anyset);
2085 PySet_Clear(PyObject *set)
2087 if (!PyType_IsSubtype(set->ob_type, &PySet_Type)) {
2088 PyErr_BadInternalCall();
2089 return -1;
2091 return set_clear_internal((PySetObject *)set);
2095 PySet_Contains(PyObject *anyset, PyObject *key)
2097 if (!PyAnySet_Check(anyset)) {
2098 PyErr_BadInternalCall();
2099 return -1;
2101 return set_contains_key((PySetObject *)anyset, key);
2105 PySet_Discard(PyObject *set, PyObject *key)
2107 if (!PyType_IsSubtype(set->ob_type, &PySet_Type)) {
2108 PyErr_BadInternalCall();
2109 return -1;
2111 return set_discard_key((PySetObject *)set, key);
2115 PySet_Add(PyObject *set, PyObject *key)
2117 if (!PyType_IsSubtype(set->ob_type, &PySet_Type)) {
2118 PyErr_BadInternalCall();
2119 return -1;
2121 return set_add_key((PySetObject *)set, key);
2125 _PySet_Next(PyObject *set, Py_ssize_t *pos, PyObject **entry)
2127 setentry *entry_ptr;
2129 if (!PyAnySet_Check(set)) {
2130 PyErr_BadInternalCall();
2131 return -1;
2133 if (set_next((PySetObject *)set, pos, &entry_ptr) == 0)
2134 return 0;
2135 *entry = entry_ptr->key;
2136 return 1;
2139 PyObject *
2140 PySet_Pop(PyObject *set)
2142 if (!PyType_IsSubtype(set->ob_type, &PySet_Type)) {
2143 PyErr_BadInternalCall();
2144 return NULL;
2146 return set_pop((PySetObject *)set);
2150 _PySet_Update(PyObject *set, PyObject *iterable)
2152 if (!PyType_IsSubtype(set->ob_type, &PySet_Type)) {
2153 PyErr_BadInternalCall();
2154 return -1;
2156 return set_update_internal((PySetObject *)set, iterable);
2159 #ifdef Py_DEBUG
2161 /* Test code to be called with any three element set.
2162 Returns True and original set is restored. */
2164 #define assertRaises(call_return_value, exception) \
2165 do { \
2166 assert(call_return_value); \
2167 assert(PyErr_ExceptionMatches(exception)); \
2168 PyErr_Clear(); \
2169 } while(0)
2171 static PyObject *
2172 test_c_api(PySetObject *so)
2174 Py_ssize_t count;
2175 char *s;
2176 Py_ssize_t i;
2177 PyObject *elem, *dup, *t, *f, *dup2;
2178 PyObject *ob = (PyObject *)so;
2180 /* Verify preconditions and exercise type/size checks */
2181 assert(PyAnySet_Check(ob));
2182 assert(PyAnySet_CheckExact(ob));
2183 assert(!PyFrozenSet_CheckExact(ob));
2184 assert(PySet_Size(ob) == 3);
2185 assert(PySet_GET_SIZE(ob) == 3);
2187 /* Raise TypeError for non-iterable constructor arguments */
2188 assertRaises(PySet_New(Py_None) == NULL, PyExc_TypeError);
2189 assertRaises(PyFrozenSet_New(Py_None) == NULL, PyExc_TypeError);
2191 /* Raise TypeError for unhashable key */
2192 dup = PySet_New(ob);
2193 assertRaises(PySet_Discard(ob, dup) == -1, PyExc_TypeError);
2194 assertRaises(PySet_Contains(ob, dup) == -1, PyExc_TypeError);
2195 assertRaises(PySet_Add(ob, dup) == -1, PyExc_TypeError);
2197 /* Exercise successful pop, contains, add, and discard */
2198 elem = PySet_Pop(ob);
2199 assert(PySet_Contains(ob, elem) == 0);
2200 assert(PySet_GET_SIZE(ob) == 2);
2201 assert(PySet_Add(ob, elem) == 0);
2202 assert(PySet_Contains(ob, elem) == 1);
2203 assert(PySet_GET_SIZE(ob) == 3);
2204 assert(PySet_Discard(ob, elem) == 1);
2205 assert(PySet_GET_SIZE(ob) == 2);
2206 assert(PySet_Discard(ob, elem) == 0);
2207 assert(PySet_GET_SIZE(ob) == 2);
2209 /* Exercise clear */
2210 dup2 = PySet_New(dup);
2211 assert(PySet_Clear(dup2) == 0);
2212 assert(PySet_Size(dup2) == 0);
2213 Py_DECREF(dup2);
2215 /* Raise SystemError on clear or update of frozen set */
2216 f = PyFrozenSet_New(dup);
2217 assertRaises(PySet_Clear(f) == -1, PyExc_SystemError);
2218 assertRaises(_PySet_Update(f, dup) == -1, PyExc_SystemError);
2219 Py_DECREF(f);
2221 /* Exercise direct iteration */
2222 i = 0, count = 0;
2223 while (_PySet_Next((PyObject *)dup, &i, &elem)) {
2224 s = PyString_AsString(elem);
2225 assert(s && (s[0] == 'a' || s[0] == 'b' || s[0] == 'c'));
2226 count++;
2228 assert(count == 3);
2230 /* Exercise updates */
2231 dup2 = PySet_New(NULL);
2232 assert(_PySet_Update(dup2, dup) == 0);
2233 assert(PySet_Size(dup2) == 3);
2234 assert(_PySet_Update(dup2, dup) == 0);
2235 assert(PySet_Size(dup2) == 3);
2236 Py_DECREF(dup2);
2238 /* Raise SystemError when self argument is not a set or frozenset. */
2239 t = PyTuple_New(0);
2240 assertRaises(PySet_Size(t) == -1, PyExc_SystemError);
2241 assertRaises(PySet_Contains(t, elem) == -1, PyExc_SystemError);
2242 Py_DECREF(t);
2244 /* Raise SystemError when self argument is not a set. */
2245 f = PyFrozenSet_New(dup);
2246 assert(PySet_Size(f) == 3);
2247 assert(PyFrozenSet_CheckExact(f));
2248 assertRaises(PySet_Add(f, elem) == -1, PyExc_SystemError);
2249 assertRaises(PySet_Discard(f, elem) == -1, PyExc_SystemError);
2250 assertRaises(PySet_Pop(f) == NULL, PyExc_SystemError);
2251 Py_DECREF(f);
2253 /* Raise KeyError when popping from an empty set */
2254 assert(PyNumber_InPlaceSubtract(ob, ob) == ob);
2255 Py_DECREF(ob);
2256 assert(PySet_GET_SIZE(ob) == 0);
2257 assertRaises(PySet_Pop(ob) == NULL, PyExc_KeyError);
2259 /* Restore the set from the copy using the PyNumber API */
2260 assert(PyNumber_InPlaceOr(ob, dup) == ob);
2261 Py_DECREF(ob);
2263 /* Verify constructors accept NULL arguments */
2264 f = PySet_New(NULL);
2265 assert(f != NULL);
2266 assert(PySet_GET_SIZE(f) == 0);
2267 Py_DECREF(f);
2268 f = PyFrozenSet_New(NULL);
2269 assert(f != NULL);
2270 assert(PyFrozenSet_CheckExact(f));
2271 assert(PySet_GET_SIZE(f) == 0);
2272 Py_DECREF(f);
2274 Py_DECREF(elem);
2275 Py_DECREF(dup);
2276 Py_RETURN_TRUE;
2279 #undef assertRaises
2281 #endif