Don't abbreviate ABS, use long name ABSOLUTE.
[python.git] / Modules / gcmodule.c
blob206d34a3573b55e223d880d057ce44c7e4578609
1 /*
3 Reference Cycle Garbage Collection
4 ==================================
6 Neil Schemenauer <nas@arctrix.com>
8 Based on a post on the python-dev list. Ideas from Guido van Rossum,
9 Eric Tiedemann, and various others.
11 http://www.arctrix.com/nas/python/gc/
12 http://www.python.org/pipermail/python-dev/2000-March/003869.html
13 http://www.python.org/pipermail/python-dev/2000-March/004010.html
14 http://www.python.org/pipermail/python-dev/2000-March/004022.html
16 For a highlevel view of the collection process, read the collect
17 function.
21 #include "Python.h"
23 /* Get an object's GC head */
24 #define AS_GC(o) ((PyGC_Head *)(o)-1)
26 /* Get the object given the GC head */
27 #define FROM_GC(g) ((PyObject *)(((PyGC_Head *)g)+1))
29 /*** Global GC state ***/
31 struct gc_generation {
32 PyGC_Head head;
33 int threshold; /* collection threshold */
34 int count; /* count of allocations or collections of younger
35 generations */
38 #define NUM_GENERATIONS 3
39 #define GEN_HEAD(n) (&generations[n].head)
41 /* linked lists of container objects */
42 static struct gc_generation generations[NUM_GENERATIONS] = {
43 /* PyGC_Head, threshold, count */
44 {{{GEN_HEAD(0), GEN_HEAD(0), 0}}, 700, 0},
45 {{{GEN_HEAD(1), GEN_HEAD(1), 0}}, 10, 0},
46 {{{GEN_HEAD(2), GEN_HEAD(2), 0}}, 10, 0},
49 PyGC_Head *_PyGC_generation0 = GEN_HEAD(0);
51 static int enabled = 1; /* automatic collection enabled? */
53 /* true if we are currently running the collector */
54 static int collecting = 0;
56 /* list of uncollectable objects */
57 static PyObject *garbage = NULL;
59 /* Python string to use if unhandled exception occurs */
60 static PyObject *gc_str = NULL;
62 /* Python string used to look for __del__ attribute. */
63 static PyObject *delstr = NULL;
65 /* set for debugging information */
66 #define DEBUG_STATS (1<<0) /* print collection statistics */
67 #define DEBUG_COLLECTABLE (1<<1) /* print collectable objects */
68 #define DEBUG_UNCOLLECTABLE (1<<2) /* print uncollectable objects */
69 #define DEBUG_INSTANCES (1<<3) /* print instances */
70 #define DEBUG_OBJECTS (1<<4) /* print other objects */
71 #define DEBUG_SAVEALL (1<<5) /* save all garbage in gc.garbage */
72 #define DEBUG_LEAK DEBUG_COLLECTABLE | \
73 DEBUG_UNCOLLECTABLE | \
74 DEBUG_INSTANCES | \
75 DEBUG_OBJECTS | \
76 DEBUG_SAVEALL
77 static int debug;
79 /*--------------------------------------------------------------------------
80 gc_refs values.
82 Between collections, every gc'ed object has one of two gc_refs values:
84 GC_UNTRACKED
85 The initial state; objects returned by PyObject_GC_Malloc are in this
86 state. The object doesn't live in any generation list, and its
87 tp_traverse slot must not be called.
89 GC_REACHABLE
90 The object lives in some generation list, and its tp_traverse is safe to
91 call. An object transitions to GC_REACHABLE when PyObject_GC_Track
92 is called.
94 During a collection, gc_refs can temporarily take on other states:
96 >= 0
97 At the start of a collection, update_refs() copies the true refcount
98 to gc_refs, for each object in the generation being collected.
99 subtract_refs() then adjusts gc_refs so that it equals the number of
100 times an object is referenced directly from outside the generation
101 being collected.
102 gc_refs remains >= 0 throughout these steps.
104 GC_TENTATIVELY_UNREACHABLE
105 move_unreachable() then moves objects not reachable (whether directly or
106 indirectly) from outside the generation into an "unreachable" set.
107 Objects that are found to be reachable have gc_refs set to GC_REACHABLE
108 again. Objects that are found to be unreachable have gc_refs set to
109 GC_TENTATIVELY_UNREACHABLE. It's "tentatively" because the pass doing
110 this can't be sure until it ends, and GC_TENTATIVELY_UNREACHABLE may
111 transition back to GC_REACHABLE.
113 Only objects with GC_TENTATIVELY_UNREACHABLE still set are candidates
114 for collection. If it's decided not to collect such an object (e.g.,
115 it has a __del__ method), its gc_refs is restored to GC_REACHABLE again.
116 ----------------------------------------------------------------------------
118 #define GC_UNTRACKED _PyGC_REFS_UNTRACKED
119 #define GC_REACHABLE _PyGC_REFS_REACHABLE
120 #define GC_TENTATIVELY_UNREACHABLE _PyGC_REFS_TENTATIVELY_UNREACHABLE
122 #define IS_TRACKED(o) ((AS_GC(o))->gc.gc_refs != GC_UNTRACKED)
123 #define IS_REACHABLE(o) ((AS_GC(o))->gc.gc_refs == GC_REACHABLE)
124 #define IS_TENTATIVELY_UNREACHABLE(o) ( \
125 (AS_GC(o))->gc.gc_refs == GC_TENTATIVELY_UNREACHABLE)
127 /*** list functions ***/
129 static void
130 gc_list_init(PyGC_Head *list)
132 list->gc.gc_prev = list;
133 list->gc.gc_next = list;
136 static int
137 gc_list_is_empty(PyGC_Head *list)
139 return (list->gc.gc_next == list);
142 #if 0
143 /* This became unused after gc_list_move() was introduced. */
144 /* Append `node` to `list`. */
145 static void
146 gc_list_append(PyGC_Head *node, PyGC_Head *list)
148 node->gc.gc_next = list;
149 node->gc.gc_prev = list->gc.gc_prev;
150 node->gc.gc_prev->gc.gc_next = node;
151 list->gc.gc_prev = node;
153 #endif
155 /* Remove `node` from the gc list it's currently in. */
156 static void
157 gc_list_remove(PyGC_Head *node)
159 node->gc.gc_prev->gc.gc_next = node->gc.gc_next;
160 node->gc.gc_next->gc.gc_prev = node->gc.gc_prev;
161 node->gc.gc_next = NULL; /* object is not currently tracked */
164 /* Move `node` from the gc list it's currently in (which is not explicitly
165 * named here) to the end of `list`. This is semantically the same as
166 * gc_list_remove(node) followed by gc_list_append(node, list).
168 static void
169 gc_list_move(PyGC_Head *node, PyGC_Head *list)
171 PyGC_Head *new_prev;
172 PyGC_Head *current_prev = node->gc.gc_prev;
173 PyGC_Head *current_next = node->gc.gc_next;
174 /* Unlink from current list. */
175 current_prev->gc.gc_next = current_next;
176 current_next->gc.gc_prev = current_prev;
177 /* Relink at end of new list. */
178 new_prev = node->gc.gc_prev = list->gc.gc_prev;
179 new_prev->gc.gc_next = list->gc.gc_prev = node;
180 node->gc.gc_next = list;
183 /* append list `from` onto list `to`; `from` becomes an empty list */
184 static void
185 gc_list_merge(PyGC_Head *from, PyGC_Head *to)
187 PyGC_Head *tail;
188 assert(from != to);
189 if (!gc_list_is_empty(from)) {
190 tail = to->gc.gc_prev;
191 tail->gc.gc_next = from->gc.gc_next;
192 tail->gc.gc_next->gc.gc_prev = tail;
193 to->gc.gc_prev = from->gc.gc_prev;
194 to->gc.gc_prev->gc.gc_next = to;
196 gc_list_init(from);
199 static Py_ssize_t
200 gc_list_size(PyGC_Head *list)
202 PyGC_Head *gc;
203 Py_ssize_t n = 0;
204 for (gc = list->gc.gc_next; gc != list; gc = gc->gc.gc_next) {
205 n++;
207 return n;
210 /* Append objects in a GC list to a Python list.
211 * Return 0 if all OK, < 0 if error (out of memory for list).
213 static int
214 append_objects(PyObject *py_list, PyGC_Head *gc_list)
216 PyGC_Head *gc;
217 for (gc = gc_list->gc.gc_next; gc != gc_list; gc = gc->gc.gc_next) {
218 PyObject *op = FROM_GC(gc);
219 if (op != py_list) {
220 if (PyList_Append(py_list, op)) {
221 return -1; /* exception */
225 return 0;
228 /*** end of list stuff ***/
231 /* Set all gc_refs = ob_refcnt. After this, gc_refs is > 0 for all objects
232 * in containers, and is GC_REACHABLE for all tracked gc objects not in
233 * containers.
235 static void
236 update_refs(PyGC_Head *containers)
238 PyGC_Head *gc = containers->gc.gc_next;
239 for (; gc != containers; gc = gc->gc.gc_next) {
240 assert(gc->gc.gc_refs == GC_REACHABLE);
241 gc->gc.gc_refs = FROM_GC(gc)->ob_refcnt;
242 /* Python's cyclic gc should never see an incoming refcount
243 * of 0: if something decref'ed to 0, it should have been
244 * deallocated immediately at that time.
245 * Possible cause (if the assert triggers): a tp_dealloc
246 * routine left a gc-aware object tracked during its teardown
247 * phase, and did something-- or allowed something to happen --
248 * that called back into Python. gc can trigger then, and may
249 * see the still-tracked dying object. Before this assert
250 * was added, such mistakes went on to allow gc to try to
251 * delete the object again. In a debug build, that caused
252 * a mysterious segfault, when _Py_ForgetReference tried
253 * to remove the object from the doubly-linked list of all
254 * objects a second time. In a release build, an actual
255 * double deallocation occurred, which leads to corruption
256 * of the allocator's internal bookkeeping pointers. That's
257 * so serious that maybe this should be a release-build
258 * check instead of an assert?
260 assert(gc->gc.gc_refs != 0);
264 /* A traversal callback for subtract_refs. */
265 static int
266 visit_decref(PyObject *op, void *data)
268 assert(op != NULL);
269 if (PyObject_IS_GC(op)) {
270 PyGC_Head *gc = AS_GC(op);
271 /* We're only interested in gc_refs for objects in the
272 * generation being collected, which can be recognized
273 * because only they have positive gc_refs.
275 assert(gc->gc.gc_refs != 0); /* else refcount was too small */
276 if (gc->gc.gc_refs > 0)
277 gc->gc.gc_refs--;
279 return 0;
282 /* Subtract internal references from gc_refs. After this, gc_refs is >= 0
283 * for all objects in containers, and is GC_REACHABLE for all tracked gc
284 * objects not in containers. The ones with gc_refs > 0 are directly
285 * reachable from outside containers, and so can't be collected.
287 static void
288 subtract_refs(PyGC_Head *containers)
290 traverseproc traverse;
291 PyGC_Head *gc = containers->gc.gc_next;
292 for (; gc != containers; gc=gc->gc.gc_next) {
293 traverse = FROM_GC(gc)->ob_type->tp_traverse;
294 (void) traverse(FROM_GC(gc),
295 (visitproc)visit_decref,
296 NULL);
300 /* A traversal callback for move_unreachable. */
301 static int
302 visit_reachable(PyObject *op, PyGC_Head *reachable)
304 if (PyObject_IS_GC(op)) {
305 PyGC_Head *gc = AS_GC(op);
306 const Py_ssize_t gc_refs = gc->gc.gc_refs;
308 if (gc_refs == 0) {
309 /* This is in move_unreachable's 'young' list, but
310 * the traversal hasn't yet gotten to it. All
311 * we need to do is tell move_unreachable that it's
312 * reachable.
314 gc->gc.gc_refs = 1;
316 else if (gc_refs == GC_TENTATIVELY_UNREACHABLE) {
317 /* This had gc_refs = 0 when move_unreachable got
318 * to it, but turns out it's reachable after all.
319 * Move it back to move_unreachable's 'young' list,
320 * and move_unreachable will eventually get to it
321 * again.
323 gc_list_move(gc, reachable);
324 gc->gc.gc_refs = 1;
326 /* Else there's nothing to do.
327 * If gc_refs > 0, it must be in move_unreachable's 'young'
328 * list, and move_unreachable will eventually get to it.
329 * If gc_refs == GC_REACHABLE, it's either in some other
330 * generation so we don't care about it, or move_unreachable
331 * already dealt with it.
332 * If gc_refs == GC_UNTRACKED, it must be ignored.
334 else {
335 assert(gc_refs > 0
336 || gc_refs == GC_REACHABLE
337 || gc_refs == GC_UNTRACKED);
340 return 0;
343 /* Move the unreachable objects from young to unreachable. After this,
344 * all objects in young have gc_refs = GC_REACHABLE, and all objects in
345 * unreachable have gc_refs = GC_TENTATIVELY_UNREACHABLE. All tracked
346 * gc objects not in young or unreachable still have gc_refs = GC_REACHABLE.
347 * All objects in young after this are directly or indirectly reachable
348 * from outside the original young; and all objects in unreachable are
349 * not.
351 static void
352 move_unreachable(PyGC_Head *young, PyGC_Head *unreachable)
354 PyGC_Head *gc = young->gc.gc_next;
356 /* Invariants: all objects "to the left" of us in young have gc_refs
357 * = GC_REACHABLE, and are indeed reachable (directly or indirectly)
358 * from outside the young list as it was at entry. All other objects
359 * from the original young "to the left" of us are in unreachable now,
360 * and have gc_refs = GC_TENTATIVELY_UNREACHABLE. All objects to the
361 * left of us in 'young' now have been scanned, and no objects here
362 * or to the right have been scanned yet.
365 while (gc != young) {
366 PyGC_Head *next;
368 if (gc->gc.gc_refs) {
369 /* gc is definitely reachable from outside the
370 * original 'young'. Mark it as such, and traverse
371 * its pointers to find any other objects that may
372 * be directly reachable from it. Note that the
373 * call to tp_traverse may append objects to young,
374 * so we have to wait until it returns to determine
375 * the next object to visit.
377 PyObject *op = FROM_GC(gc);
378 traverseproc traverse = op->ob_type->tp_traverse;
379 assert(gc->gc.gc_refs > 0);
380 gc->gc.gc_refs = GC_REACHABLE;
381 (void) traverse(op,
382 (visitproc)visit_reachable,
383 (void *)young);
384 next = gc->gc.gc_next;
386 else {
387 /* This *may* be unreachable. To make progress,
388 * assume it is. gc isn't directly reachable from
389 * any object we've already traversed, but may be
390 * reachable from an object we haven't gotten to yet.
391 * visit_reachable will eventually move gc back into
392 * young if that's so, and we'll see it again.
394 next = gc->gc.gc_next;
395 gc_list_move(gc, unreachable);
396 gc->gc.gc_refs = GC_TENTATIVELY_UNREACHABLE;
398 gc = next;
402 /* Return true if object has a finalization method.
403 * CAUTION: An instance of an old-style class has to be checked for a
404 *__del__ method, and earlier versions of this used to call PyObject_HasAttr,
405 * which in turn could call the class's __getattr__ hook (if any). That
406 * could invoke arbitrary Python code, mutating the object graph in arbitrary
407 * ways, and that was the source of some excruciatingly subtle bugs.
409 static int
410 has_finalizer(PyObject *op)
412 if (PyInstance_Check(op)) {
413 assert(delstr != NULL);
414 return _PyInstance_Lookup(op, delstr) != NULL;
416 else
417 return op->ob_type->tp_del != NULL;
420 /* Move the objects in unreachable with __del__ methods into `finalizers`.
421 * Objects moved into `finalizers` have gc_refs set to GC_REACHABLE; the
422 * objects remaining in unreachable are left at GC_TENTATIVELY_UNREACHABLE.
424 static void
425 move_finalizers(PyGC_Head *unreachable, PyGC_Head *finalizers)
427 PyGC_Head *gc;
428 PyGC_Head *next;
430 /* March over unreachable. Move objects with finalizers into
431 * `finalizers`.
433 for (gc = unreachable->gc.gc_next; gc != unreachable; gc = next) {
434 PyObject *op = FROM_GC(gc);
436 assert(IS_TENTATIVELY_UNREACHABLE(op));
437 next = gc->gc.gc_next;
439 if (has_finalizer(op)) {
440 gc_list_move(gc, finalizers);
441 gc->gc.gc_refs = GC_REACHABLE;
446 /* A traversal callback for move_finalizer_reachable. */
447 static int
448 visit_move(PyObject *op, PyGC_Head *tolist)
450 if (PyObject_IS_GC(op)) {
451 if (IS_TENTATIVELY_UNREACHABLE(op)) {
452 PyGC_Head *gc = AS_GC(op);
453 gc_list_move(gc, tolist);
454 gc->gc.gc_refs = GC_REACHABLE;
457 return 0;
460 /* Move objects that are reachable from finalizers, from the unreachable set
461 * into finalizers set.
463 static void
464 move_finalizer_reachable(PyGC_Head *finalizers)
466 traverseproc traverse;
467 PyGC_Head *gc = finalizers->gc.gc_next;
468 for (; gc != finalizers; gc = gc->gc.gc_next) {
469 /* Note that the finalizers list may grow during this. */
470 traverse = FROM_GC(gc)->ob_type->tp_traverse;
471 (void) traverse(FROM_GC(gc),
472 (visitproc)visit_move,
473 (void *)finalizers);
477 /* Clear all weakrefs to unreachable objects, and if such a weakref has a
478 * callback, invoke it if necessary. Note that it's possible for such
479 * weakrefs to be outside the unreachable set -- indeed, those are precisely
480 * the weakrefs whose callbacks must be invoked. See gc_weakref.txt for
481 * overview & some details. Some weakrefs with callbacks may be reclaimed
482 * directly by this routine; the number reclaimed is the return value. Other
483 * weakrefs with callbacks may be moved into the `old` generation. Objects
484 * moved into `old` have gc_refs set to GC_REACHABLE; the objects remaining in
485 * unreachable are left at GC_TENTATIVELY_UNREACHABLE. When this returns,
486 * no object in `unreachable` is weakly referenced anymore.
488 static int
489 handle_weakrefs(PyGC_Head *unreachable, PyGC_Head *old)
491 PyGC_Head *gc;
492 PyObject *op; /* generally FROM_GC(gc) */
493 PyWeakReference *wr; /* generally a cast of op */
494 PyGC_Head wrcb_to_call; /* weakrefs with callbacks to call */
495 PyGC_Head *next;
496 int num_freed = 0;
498 gc_list_init(&wrcb_to_call);
500 /* Clear all weakrefs to the objects in unreachable. If such a weakref
501 * also has a callback, move it into `wrcb_to_call` if the callback
502 * needs to be invoked. Note that we cannot invoke any callbacks until
503 * all weakrefs to unreachable objects are cleared, lest the callback
504 * resurrect an unreachable object via a still-active weakref. We
505 * make another pass over wrcb_to_call, invoking callbacks, after this
506 * pass completes.
508 for (gc = unreachable->gc.gc_next; gc != unreachable; gc = next) {
509 PyWeakReference **wrlist;
511 op = FROM_GC(gc);
512 assert(IS_TENTATIVELY_UNREACHABLE(op));
513 next = gc->gc.gc_next;
515 if (! PyType_SUPPORTS_WEAKREFS(op->ob_type))
516 continue;
518 /* It supports weakrefs. Does it have any? */
519 wrlist = (PyWeakReference **)
520 PyObject_GET_WEAKREFS_LISTPTR(op);
522 /* `op` may have some weakrefs. March over the list, clear
523 * all the weakrefs, and move the weakrefs with callbacks
524 * that must be called into wrcb_to_call.
526 for (wr = *wrlist; wr != NULL; wr = *wrlist) {
527 PyGC_Head *wrasgc; /* AS_GC(wr) */
529 /* _PyWeakref_ClearRef clears the weakref but leaves
530 * the callback pointer intact. Obscure: it also
531 * changes *wrlist.
533 assert(wr->wr_object == op);
534 _PyWeakref_ClearRef(wr);
535 assert(wr->wr_object == Py_None);
536 if (wr->wr_callback == NULL)
537 continue; /* no callback */
539 /* Headache time. `op` is going away, and is weakly referenced by
540 * `wr`, which has a callback. Should the callback be invoked? If wr
541 * is also trash, no:
543 * 1. There's no need to call it. The object and the weakref are
544 * both going away, so it's legitimate to pretend the weakref is
545 * going away first. The user has to ensure a weakref outlives its
546 * referent if they want a guarantee that the wr callback will get
547 * invoked.
549 * 2. It may be catastrophic to call it. If the callback is also in
550 * cyclic trash (CT), then although the CT is unreachable from
551 * outside the current generation, CT may be reachable from the
552 * callback. Then the callback could resurrect insane objects.
554 * Since the callback is never needed and may be unsafe in this case,
555 * wr is simply left in the unreachable set. Note that because we
556 * already called _PyWeakref_ClearRef(wr), its callback will never
557 * trigger.
559 * OTOH, if wr isn't part of CT, we should invoke the callback: the
560 * weakref outlived the trash. Note that since wr isn't CT in this
561 * case, its callback can't be CT either -- wr acted as an external
562 * root to this generation, and therefore its callback did too. So
563 * nothing in CT is reachable from the callback either, so it's hard
564 * to imagine how calling it later could create a problem for us. wr
565 * is moved to wrcb_to_call in this case.
567 if (IS_TENTATIVELY_UNREACHABLE(wr))
568 continue;
569 assert(IS_REACHABLE(wr));
571 /* Create a new reference so that wr can't go away
572 * before we can process it again.
574 Py_INCREF(wr);
576 /* Move wr to wrcb_to_call, for the next pass. */
577 wrasgc = AS_GC(wr);
578 assert(wrasgc != next); /* wrasgc is reachable, but
579 next isn't, so they can't
580 be the same */
581 gc_list_move(wrasgc, &wrcb_to_call);
585 /* Invoke the callbacks we decided to honor. It's safe to invoke them
586 * because they can't reference unreachable objects.
588 while (! gc_list_is_empty(&wrcb_to_call)) {
589 PyObject *temp;
590 PyObject *callback;
592 gc = wrcb_to_call.gc.gc_next;
593 op = FROM_GC(gc);
594 assert(IS_REACHABLE(op));
595 assert(PyWeakref_Check(op));
596 wr = (PyWeakReference *)op;
597 callback = wr->wr_callback;
598 assert(callback != NULL);
600 /* copy-paste of weakrefobject.c's handle_callback() */
601 temp = PyObject_CallFunction(callback, "O", wr);
602 if (temp == NULL)
603 PyErr_WriteUnraisable(callback);
604 else
605 Py_DECREF(temp);
607 /* Give up the reference we created in the first pass. When
608 * op's refcount hits 0 (which it may or may not do right now),
609 * op's tp_dealloc will decref op->wr_callback too. Note
610 * that the refcount probably will hit 0 now, and because this
611 * weakref was reachable to begin with, gc didn't already
612 * add it to its count of freed objects. Example: a reachable
613 * weak value dict maps some key to this reachable weakref.
614 * The callback removes this key->weakref mapping from the
615 * dict, leaving no other references to the weakref (excepting
616 * ours).
618 Py_DECREF(op);
619 if (wrcb_to_call.gc.gc_next == gc) {
620 /* object is still alive -- move it */
621 gc_list_move(gc, old);
623 else
624 ++num_freed;
627 return num_freed;
630 static void
631 debug_instance(char *msg, PyInstanceObject *inst)
633 char *cname;
634 /* simple version of instance_repr */
635 PyObject *classname = inst->in_class->cl_name;
636 if (classname != NULL && PyString_Check(classname))
637 cname = PyString_AsString(classname);
638 else
639 cname = "?";
640 PySys_WriteStderr("gc: %.100s <%.100s instance at %p>\n",
641 msg, cname, inst);
644 static void
645 debug_cycle(char *msg, PyObject *op)
647 if ((debug & DEBUG_INSTANCES) && PyInstance_Check(op)) {
648 debug_instance(msg, (PyInstanceObject *)op);
650 else if (debug & DEBUG_OBJECTS) {
651 PySys_WriteStderr("gc: %.100s <%.100s %p>\n",
652 msg, op->ob_type->tp_name, op);
656 /* Handle uncollectable garbage (cycles with finalizers, and stuff reachable
657 * only from such cycles).
658 * If DEBUG_SAVEALL, all objects in finalizers are appended to the module
659 * garbage list (a Python list), else only the objects in finalizers with
660 * __del__ methods are appended to garbage. All objects in finalizers are
661 * merged into the old list regardless.
662 * Returns 0 if all OK, <0 on error (out of memory to grow the garbage list).
663 * The finalizers list is made empty on a successful return.
665 static int
666 handle_finalizers(PyGC_Head *finalizers, PyGC_Head *old)
668 PyGC_Head *gc = finalizers->gc.gc_next;
670 if (garbage == NULL) {
671 garbage = PyList_New(0);
672 if (garbage == NULL)
673 Py_FatalError("gc couldn't create gc.garbage list");
675 for (; gc != finalizers; gc = gc->gc.gc_next) {
676 PyObject *op = FROM_GC(gc);
678 if ((debug & DEBUG_SAVEALL) || has_finalizer(op)) {
679 if (PyList_Append(garbage, op) < 0)
680 return -1;
684 gc_list_merge(finalizers, old);
685 return 0;
688 /* Break reference cycles by clearing the containers involved. This is
689 * tricky business as the lists can be changing and we don't know which
690 * objects may be freed. It is possible I screwed something up here.
692 static void
693 delete_garbage(PyGC_Head *collectable, PyGC_Head *old)
695 inquiry clear;
697 while (!gc_list_is_empty(collectable)) {
698 PyGC_Head *gc = collectable->gc.gc_next;
699 PyObject *op = FROM_GC(gc);
701 assert(IS_TENTATIVELY_UNREACHABLE(op));
702 if (debug & DEBUG_SAVEALL) {
703 PyList_Append(garbage, op);
705 else {
706 if ((clear = op->ob_type->tp_clear) != NULL) {
707 Py_INCREF(op);
708 clear(op);
709 Py_DECREF(op);
712 if (collectable->gc.gc_next == gc) {
713 /* object is still alive, move it, it may die later */
714 gc_list_move(gc, old);
715 gc->gc.gc_refs = GC_REACHABLE;
720 /* This is the main function. Read this to understand how the
721 * collection process works. */
722 static Py_ssize_t
723 collect(int generation)
725 int i;
726 Py_ssize_t m = 0; /* # objects collected */
727 Py_ssize_t n = 0; /* # unreachable objects that couldn't be collected */
728 PyGC_Head *young; /* the generation we are examining */
729 PyGC_Head *old; /* next older generation */
730 PyGC_Head unreachable; /* non-problematic unreachable trash */
731 PyGC_Head finalizers; /* objects with, & reachable from, __del__ */
732 PyGC_Head *gc;
734 if (delstr == NULL) {
735 delstr = PyString_InternFromString("__del__");
736 if (delstr == NULL)
737 Py_FatalError("gc couldn't allocate \"__del__\"");
740 if (debug & DEBUG_STATS) {
741 PySys_WriteStderr("gc: collecting generation %d...\n",
742 generation);
743 PySys_WriteStderr("gc: objects in each generation:");
744 for (i = 0; i < NUM_GENERATIONS; i++)
745 PySys_WriteStderr(" %" PY_FORMAT_SIZE_T "d",
746 gc_list_size(GEN_HEAD(i)));
747 PySys_WriteStderr("\n");
750 /* update collection and allocation counters */
751 if (generation+1 < NUM_GENERATIONS)
752 generations[generation+1].count += 1;
753 for (i = 0; i <= generation; i++)
754 generations[i].count = 0;
756 /* merge younger generations with one we are currently collecting */
757 for (i = 0; i < generation; i++) {
758 gc_list_merge(GEN_HEAD(i), GEN_HEAD(generation));
761 /* handy references */
762 young = GEN_HEAD(generation);
763 if (generation < NUM_GENERATIONS-1)
764 old = GEN_HEAD(generation+1);
765 else
766 old = young;
768 /* Using ob_refcnt and gc_refs, calculate which objects in the
769 * container set are reachable from outside the set (i.e., have a
770 * refcount greater than 0 when all the references within the
771 * set are taken into account).
773 update_refs(young);
774 subtract_refs(young);
776 /* Leave everything reachable from outside young in young, and move
777 * everything else (in young) to unreachable.
778 * NOTE: This used to move the reachable objects into a reachable
779 * set instead. But most things usually turn out to be reachable,
780 * so it's more efficient to move the unreachable things.
782 gc_list_init(&unreachable);
783 move_unreachable(young, &unreachable);
785 /* Move reachable objects to next generation. */
786 if (young != old)
787 gc_list_merge(young, old);
789 /* All objects in unreachable are trash, but objects reachable from
790 * finalizers can't safely be deleted. Python programmers should take
791 * care not to create such things. For Python, finalizers means
792 * instance objects with __del__ methods. Weakrefs with callbacks
793 * can also call arbitrary Python code but they will be dealt with by
794 * handle_weakrefs().
796 gc_list_init(&finalizers);
797 move_finalizers(&unreachable, &finalizers);
798 /* finalizers contains the unreachable objects with a finalizer;
799 * unreachable objects reachable *from* those are also uncollectable,
800 * and we move those into the finalizers list too.
802 move_finalizer_reachable(&finalizers);
804 /* Collect statistics on collectable objects found and print
805 * debugging information.
807 for (gc = unreachable.gc.gc_next; gc != &unreachable;
808 gc = gc->gc.gc_next) {
809 m++;
810 if (debug & DEBUG_COLLECTABLE) {
811 debug_cycle("collectable", FROM_GC(gc));
815 /* Clear weakrefs and invoke callbacks as necessary. */
816 m += handle_weakrefs(&unreachable, old);
818 /* Call tp_clear on objects in the unreachable set. This will cause
819 * the reference cycles to be broken. It may also cause some objects
820 * in finalizers to be freed.
822 delete_garbage(&unreachable, old);
824 /* Collect statistics on uncollectable objects found and print
825 * debugging information. */
826 for (gc = finalizers.gc.gc_next;
827 gc != &finalizers;
828 gc = gc->gc.gc_next) {
829 n++;
830 if (debug & DEBUG_UNCOLLECTABLE)
831 debug_cycle("uncollectable", FROM_GC(gc));
833 if (debug & DEBUG_STATS) {
834 if (m == 0 && n == 0)
835 PySys_WriteStderr("gc: done.\n");
836 else
837 PySys_WriteStderr(
838 "gc: done, "
839 "%" PY_FORMAT_SIZE_T "d unreachable, "
840 "%" PY_FORMAT_SIZE_T "d uncollectable.\n",
841 n+m, n);
844 /* Append instances in the uncollectable set to a Python
845 * reachable list of garbage. The programmer has to deal with
846 * this if they insist on creating this type of structure.
848 (void)handle_finalizers(&finalizers, old);
850 if (PyErr_Occurred()) {
851 if (gc_str == NULL)
852 gc_str = PyString_FromString("garbage collection");
853 PyErr_WriteUnraisable(gc_str);
854 Py_FatalError("unexpected exception during garbage collection");
856 return n+m;
859 static Py_ssize_t
860 collect_generations(void)
862 int i;
863 Py_ssize_t n = 0;
865 /* Find the oldest generation (higest numbered) where the count
866 * exceeds the threshold. Objects in the that generation and
867 * generations younger than it will be collected. */
868 for (i = NUM_GENERATIONS-1; i >= 0; i--) {
869 if (generations[i].count > generations[i].threshold) {
870 n = collect(i);
871 break;
874 return n;
877 PyDoc_STRVAR(gc_enable__doc__,
878 "enable() -> None\n"
879 "\n"
880 "Enable automatic garbage collection.\n");
882 static PyObject *
883 gc_enable(PyObject *self, PyObject *noargs)
885 enabled = 1;
886 Py_INCREF(Py_None);
887 return Py_None;
890 PyDoc_STRVAR(gc_disable__doc__,
891 "disable() -> None\n"
892 "\n"
893 "Disable automatic garbage collection.\n");
895 static PyObject *
896 gc_disable(PyObject *self, PyObject *noargs)
898 enabled = 0;
899 Py_INCREF(Py_None);
900 return Py_None;
903 PyDoc_STRVAR(gc_isenabled__doc__,
904 "isenabled() -> status\n"
905 "\n"
906 "Returns true if automatic garbage collection is enabled.\n");
908 static PyObject *
909 gc_isenabled(PyObject *self, PyObject *noargs)
911 return PyBool_FromLong((long)enabled);
914 PyDoc_STRVAR(gc_collect__doc__,
915 "collect([generation]) -> n\n"
916 "\n"
917 "With no arguments, run a full collection. The optional argument\n"
918 "may be an integer specifying which generation to collect. A ValueError\n"
919 "is raised if the generation number is invalid.\n\n"
920 "The number of unreachable objects is returned.\n");
922 static PyObject *
923 gc_collect(PyObject *self, PyObject *args, PyObject *kws)
925 static char *keywords[] = {"generation", NULL};
926 int genarg = NUM_GENERATIONS - 1;
927 Py_ssize_t n;
929 if (!PyArg_ParseTupleAndKeywords(args, kws, "|i", keywords, &genarg))
930 return NULL;
932 else if (genarg < 0 || genarg >= NUM_GENERATIONS) {
933 PyErr_SetString(PyExc_ValueError, "invalid generation");
934 return NULL;
937 if (collecting)
938 n = 0; /* already collecting, don't do anything */
939 else {
940 collecting = 1;
941 n = collect(genarg);
942 collecting = 0;
945 return PyInt_FromSsize_t(n);
948 PyDoc_STRVAR(gc_set_debug__doc__,
949 "set_debug(flags) -> None\n"
950 "\n"
951 "Set the garbage collection debugging flags. Debugging information is\n"
952 "written to sys.stderr.\n"
953 "\n"
954 "flags is an integer and can have the following bits turned on:\n"
955 "\n"
956 " DEBUG_STATS - Print statistics during collection.\n"
957 " DEBUG_COLLECTABLE - Print collectable objects found.\n"
958 " DEBUG_UNCOLLECTABLE - Print unreachable but uncollectable objects found.\n"
959 " DEBUG_INSTANCES - Print instance objects.\n"
960 " DEBUG_OBJECTS - Print objects other than instances.\n"
961 " DEBUG_SAVEALL - Save objects to gc.garbage rather than freeing them.\n"
962 " DEBUG_LEAK - Debug leaking programs (everything but STATS).\n");
964 static PyObject *
965 gc_set_debug(PyObject *self, PyObject *args)
967 if (!PyArg_ParseTuple(args, "i:set_debug", &debug))
968 return NULL;
970 Py_INCREF(Py_None);
971 return Py_None;
974 PyDoc_STRVAR(gc_get_debug__doc__,
975 "get_debug() -> flags\n"
976 "\n"
977 "Get the garbage collection debugging flags.\n");
979 static PyObject *
980 gc_get_debug(PyObject *self, PyObject *noargs)
982 return Py_BuildValue("i", debug);
985 PyDoc_STRVAR(gc_set_thresh__doc__,
986 "set_threshold(threshold0, [threshold1, threshold2]) -> None\n"
987 "\n"
988 "Sets the collection thresholds. Setting threshold0 to zero disables\n"
989 "collection.\n");
991 static PyObject *
992 gc_set_thresh(PyObject *self, PyObject *args)
994 int i;
995 if (!PyArg_ParseTuple(args, "i|ii:set_threshold",
996 &generations[0].threshold,
997 &generations[1].threshold,
998 &generations[2].threshold))
999 return NULL;
1000 for (i = 2; i < NUM_GENERATIONS; i++) {
1001 /* generations higher than 2 get the same threshold */
1002 generations[i].threshold = generations[2].threshold;
1005 Py_INCREF(Py_None);
1006 return Py_None;
1009 PyDoc_STRVAR(gc_get_thresh__doc__,
1010 "get_threshold() -> (threshold0, threshold1, threshold2)\n"
1011 "\n"
1012 "Return the current collection thresholds\n");
1014 static PyObject *
1015 gc_get_thresh(PyObject *self, PyObject *noargs)
1017 return Py_BuildValue("(iii)",
1018 generations[0].threshold,
1019 generations[1].threshold,
1020 generations[2].threshold);
1023 PyDoc_STRVAR(gc_get_count__doc__,
1024 "get_count() -> (count0, count1, count2)\n"
1025 "\n"
1026 "Return the current collection counts\n");
1028 static PyObject *
1029 gc_get_count(PyObject *self, PyObject *noargs)
1031 return Py_BuildValue("(iii)",
1032 generations[0].count,
1033 generations[1].count,
1034 generations[2].count);
1037 static int
1038 referrersvisit(PyObject* obj, PyObject *objs)
1040 int i;
1041 for (i = 0; i < PyTuple_GET_SIZE(objs); i++)
1042 if (PyTuple_GET_ITEM(objs, i) == obj)
1043 return 1;
1044 return 0;
1047 static int
1048 gc_referrers_for(PyObject *objs, PyGC_Head *list, PyObject *resultlist)
1050 PyGC_Head *gc;
1051 PyObject *obj;
1052 traverseproc traverse;
1053 for (gc = list->gc.gc_next; gc != list; gc = gc->gc.gc_next) {
1054 obj = FROM_GC(gc);
1055 traverse = obj->ob_type->tp_traverse;
1056 if (obj == objs || obj == resultlist)
1057 continue;
1058 if (traverse(obj, (visitproc)referrersvisit, objs)) {
1059 if (PyList_Append(resultlist, obj) < 0)
1060 return 0; /* error */
1063 return 1; /* no error */
1066 PyDoc_STRVAR(gc_get_referrers__doc__,
1067 "get_referrers(*objs) -> list\n\
1068 Return the list of objects that directly refer to any of objs.");
1070 static PyObject *
1071 gc_get_referrers(PyObject *self, PyObject *args)
1073 int i;
1074 PyObject *result = PyList_New(0);
1075 if (!result) return NULL;
1077 for (i = 0; i < NUM_GENERATIONS; i++) {
1078 if (!(gc_referrers_for(args, GEN_HEAD(i), result))) {
1079 Py_DECREF(result);
1080 return NULL;
1083 return result;
1086 /* Append obj to list; return true if error (out of memory), false if OK. */
1087 static int
1088 referentsvisit(PyObject *obj, PyObject *list)
1090 return PyList_Append(list, obj) < 0;
1093 PyDoc_STRVAR(gc_get_referents__doc__,
1094 "get_referents(*objs) -> list\n\
1095 Return the list of objects that are directly referred to by objs.");
1097 static PyObject *
1098 gc_get_referents(PyObject *self, PyObject *args)
1100 int i;
1101 PyObject *result = PyList_New(0);
1103 if (result == NULL)
1104 return NULL;
1106 for (i = 0; i < PyTuple_GET_SIZE(args); i++) {
1107 traverseproc traverse;
1108 PyObject *obj = PyTuple_GET_ITEM(args, i);
1110 if (! PyObject_IS_GC(obj))
1111 continue;
1112 traverse = obj->ob_type->tp_traverse;
1113 if (! traverse)
1114 continue;
1115 if (traverse(obj, (visitproc)referentsvisit, result)) {
1116 Py_DECREF(result);
1117 return NULL;
1120 return result;
1123 PyDoc_STRVAR(gc_get_objects__doc__,
1124 "get_objects() -> [...]\n"
1125 "\n"
1126 "Return a list of objects tracked by the collector (excluding the list\n"
1127 "returned).\n");
1129 static PyObject *
1130 gc_get_objects(PyObject *self, PyObject *noargs)
1132 int i;
1133 PyObject* result;
1135 result = PyList_New(0);
1136 if (result == NULL)
1137 return NULL;
1138 for (i = 0; i < NUM_GENERATIONS; i++) {
1139 if (append_objects(result, GEN_HEAD(i))) {
1140 Py_DECREF(result);
1141 return NULL;
1144 return result;
1148 PyDoc_STRVAR(gc__doc__,
1149 "This module provides access to the garbage collector for reference cycles.\n"
1150 "\n"
1151 "enable() -- Enable automatic garbage collection.\n"
1152 "disable() -- Disable automatic garbage collection.\n"
1153 "isenabled() -- Returns true if automatic collection is enabled.\n"
1154 "collect() -- Do a full collection right now.\n"
1155 "set_debug() -- Set debugging flags.\n"
1156 "get_debug() -- Get debugging flags.\n"
1157 "set_threshold() -- Set the collection thresholds.\n"
1158 "get_threshold() -- Return the current the collection thresholds.\n"
1159 "get_objects() -- Return a list of all objects tracked by the collector.\n"
1160 "get_referrers() -- Return the list of objects that refer to an object.\n"
1161 "get_referents() -- Return the list of objects that an object refers to.\n");
1163 static PyMethodDef GcMethods[] = {
1164 {"enable", gc_enable, METH_NOARGS, gc_enable__doc__},
1165 {"disable", gc_disable, METH_NOARGS, gc_disable__doc__},
1166 {"isenabled", gc_isenabled, METH_NOARGS, gc_isenabled__doc__},
1167 {"set_debug", gc_set_debug, METH_VARARGS, gc_set_debug__doc__},
1168 {"get_debug", gc_get_debug, METH_NOARGS, gc_get_debug__doc__},
1169 {"get_count", gc_get_count, METH_NOARGS, gc_get_count__doc__},
1170 {"set_threshold", gc_set_thresh, METH_VARARGS, gc_set_thresh__doc__},
1171 {"get_threshold", gc_get_thresh, METH_NOARGS, gc_get_thresh__doc__},
1172 {"collect", (PyCFunction)gc_collect,
1173 METH_VARARGS | METH_KEYWORDS, gc_collect__doc__},
1174 {"get_objects", gc_get_objects,METH_NOARGS, gc_get_objects__doc__},
1175 {"get_referrers", gc_get_referrers, METH_VARARGS,
1176 gc_get_referrers__doc__},
1177 {"get_referents", gc_get_referents, METH_VARARGS,
1178 gc_get_referents__doc__},
1179 {NULL, NULL} /* Sentinel */
1182 PyMODINIT_FUNC
1183 initgc(void)
1185 PyObject *m;
1187 m = Py_InitModule4("gc",
1188 GcMethods,
1189 gc__doc__,
1190 NULL,
1191 PYTHON_API_VERSION);
1192 if (m == NULL)
1193 return;
1195 if (garbage == NULL) {
1196 garbage = PyList_New(0);
1197 if (garbage == NULL)
1198 return;
1200 Py_INCREF(garbage);
1201 if (PyModule_AddObject(m, "garbage", garbage) < 0)
1202 return;
1203 #define ADD_INT(NAME) if (PyModule_AddIntConstant(m, #NAME, NAME) < 0) return
1204 ADD_INT(DEBUG_STATS);
1205 ADD_INT(DEBUG_COLLECTABLE);
1206 ADD_INT(DEBUG_UNCOLLECTABLE);
1207 ADD_INT(DEBUG_INSTANCES);
1208 ADD_INT(DEBUG_OBJECTS);
1209 ADD_INT(DEBUG_SAVEALL);
1210 ADD_INT(DEBUG_LEAK);
1211 #undef ADD_INT
1214 /* API to invoke gc.collect() from C */
1215 Py_ssize_t
1216 PyGC_Collect(void)
1218 Py_ssize_t n;
1220 if (collecting)
1221 n = 0; /* already collecting, don't do anything */
1222 else {
1223 collecting = 1;
1224 n = collect(NUM_GENERATIONS - 1);
1225 collecting = 0;
1228 return n;
1231 /* for debugging */
1232 void
1233 _PyGC_Dump(PyGC_Head *g)
1235 _PyObject_Dump(FROM_GC(g));
1238 /* extension modules might be compiled with GC support so these
1239 functions must always be available */
1241 #undef PyObject_GC_Track
1242 #undef PyObject_GC_UnTrack
1243 #undef PyObject_GC_Del
1244 #undef _PyObject_GC_Malloc
1246 void
1247 PyObject_GC_Track(void *op)
1249 _PyObject_GC_TRACK(op);
1252 /* for binary compatibility with 2.2 */
1253 void
1254 _PyObject_GC_Track(PyObject *op)
1256 PyObject_GC_Track(op);
1259 void
1260 PyObject_GC_UnTrack(void *op)
1262 /* Obscure: the Py_TRASHCAN mechanism requires that we be able to
1263 * call PyObject_GC_UnTrack twice on an object.
1265 if (IS_TRACKED(op))
1266 _PyObject_GC_UNTRACK(op);
1269 /* for binary compatibility with 2.2 */
1270 void
1271 _PyObject_GC_UnTrack(PyObject *op)
1273 PyObject_GC_UnTrack(op);
1276 PyObject *
1277 _PyObject_GC_Malloc(size_t basicsize)
1279 PyObject *op;
1280 PyGC_Head *g = PyObject_MALLOC(sizeof(PyGC_Head) + basicsize);
1281 if (g == NULL)
1282 return PyErr_NoMemory();
1283 g->gc.gc_refs = GC_UNTRACKED;
1284 generations[0].count++; /* number of allocated GC objects */
1285 if (generations[0].count > generations[0].threshold &&
1286 enabled &&
1287 generations[0].threshold &&
1288 !collecting &&
1289 !PyErr_Occurred()) {
1290 collecting = 1;
1291 collect_generations();
1292 collecting = 0;
1294 op = FROM_GC(g);
1295 return op;
1298 PyObject *
1299 _PyObject_GC_New(PyTypeObject *tp)
1301 PyObject *op = _PyObject_GC_Malloc(_PyObject_SIZE(tp));
1302 if (op != NULL)
1303 op = PyObject_INIT(op, tp);
1304 return op;
1307 PyVarObject *
1308 _PyObject_GC_NewVar(PyTypeObject *tp, Py_ssize_t nitems)
1310 const size_t size = _PyObject_VAR_SIZE(tp, nitems);
1311 PyVarObject *op = (PyVarObject *) _PyObject_GC_Malloc(size);
1312 if (op != NULL)
1313 op = PyObject_INIT_VAR(op, tp, nitems);
1314 return op;
1317 PyVarObject *
1318 _PyObject_GC_Resize(PyVarObject *op, Py_ssize_t nitems)
1320 const size_t basicsize = _PyObject_VAR_SIZE(op->ob_type, nitems);
1321 PyGC_Head *g = AS_GC(op);
1322 g = PyObject_REALLOC(g, sizeof(PyGC_Head) + basicsize);
1323 if (g == NULL)
1324 return (PyVarObject *)PyErr_NoMemory();
1325 op = (PyVarObject *) FROM_GC(g);
1326 op->ob_size = nitems;
1327 return op;
1330 void
1331 PyObject_GC_Del(void *op)
1333 PyGC_Head *g = AS_GC(op);
1334 if (IS_TRACKED(op))
1335 gc_list_remove(g);
1336 if (generations[0].count > 0) {
1337 generations[0].count--;
1339 PyObject_FREE(g);
1342 /* for binary compatibility with 2.2 */
1343 #undef _PyObject_GC_Del
1344 void
1345 _PyObject_GC_Del(PyObject *op)
1347 PyObject_GC_Del(op);