Added section about adding contextual information to log output.
[python.git] / Modules / gcmodule.c
blob522cc89c50615c71c28aaf8f462b862562058c9e
1 /*
3 Reference Cycle Garbage Collection
4 ==================================
6 Neil Schemenauer <nas@arctrix.com>
8 Based on a post on the python-dev list. Ideas from Guido van Rossum,
9 Eric Tiedemann, and various others.
11 http://www.arctrix.com/nas/python/gc/
12 http://www.python.org/pipermail/python-dev/2000-March/003869.html
13 http://www.python.org/pipermail/python-dev/2000-March/004010.html
14 http://www.python.org/pipermail/python-dev/2000-March/004022.html
16 For a highlevel view of the collection process, read the collect
17 function.
21 #include "Python.h"
23 /* Get an object's GC head */
24 #define AS_GC(o) ((PyGC_Head *)(o)-1)
26 /* Get the object given the GC head */
27 #define FROM_GC(g) ((PyObject *)(((PyGC_Head *)g)+1))
29 /*** Global GC state ***/
31 struct gc_generation {
32 PyGC_Head head;
33 int threshold; /* collection threshold */
34 int count; /* count of allocations or collections of younger
35 generations */
38 #define NUM_GENERATIONS 3
39 #define GEN_HEAD(n) (&generations[n].head)
41 /* linked lists of container objects */
42 static struct gc_generation generations[NUM_GENERATIONS] = {
43 /* PyGC_Head, threshold, count */
44 {{{GEN_HEAD(0), GEN_HEAD(0), 0}}, 700, 0},
45 {{{GEN_HEAD(1), GEN_HEAD(1), 0}}, 10, 0},
46 {{{GEN_HEAD(2), GEN_HEAD(2), 0}}, 10, 0},
49 PyGC_Head *_PyGC_generation0 = GEN_HEAD(0);
51 static int enabled = 1; /* automatic collection enabled? */
53 /* true if we are currently running the collector */
54 static int collecting = 0;
56 /* list of uncollectable objects */
57 static PyObject *garbage = NULL;
59 /* Python string to use if unhandled exception occurs */
60 static PyObject *gc_str = NULL;
62 /* Python string used to look for __del__ attribute. */
63 static PyObject *delstr = NULL;
65 /* set for debugging information */
66 #define DEBUG_STATS (1<<0) /* print collection statistics */
67 #define DEBUG_COLLECTABLE (1<<1) /* print collectable objects */
68 #define DEBUG_UNCOLLECTABLE (1<<2) /* print uncollectable objects */
69 #define DEBUG_INSTANCES (1<<3) /* print instances */
70 #define DEBUG_OBJECTS (1<<4) /* print other objects */
71 #define DEBUG_SAVEALL (1<<5) /* save all garbage in gc.garbage */
72 #define DEBUG_LEAK DEBUG_COLLECTABLE | \
73 DEBUG_UNCOLLECTABLE | \
74 DEBUG_INSTANCES | \
75 DEBUG_OBJECTS | \
76 DEBUG_SAVEALL
77 static int debug;
78 static PyObject *tmod = NULL;
80 /*--------------------------------------------------------------------------
81 gc_refs values.
83 Between collections, every gc'ed object has one of two gc_refs values:
85 GC_UNTRACKED
86 The initial state; objects returned by PyObject_GC_Malloc are in this
87 state. The object doesn't live in any generation list, and its
88 tp_traverse slot must not be called.
90 GC_REACHABLE
91 The object lives in some generation list, and its tp_traverse is safe to
92 call. An object transitions to GC_REACHABLE when PyObject_GC_Track
93 is called.
95 During a collection, gc_refs can temporarily take on other states:
97 >= 0
98 At the start of a collection, update_refs() copies the true refcount
99 to gc_refs, for each object in the generation being collected.
100 subtract_refs() then adjusts gc_refs so that it equals the number of
101 times an object is referenced directly from outside the generation
102 being collected.
103 gc_refs remains >= 0 throughout these steps.
105 GC_TENTATIVELY_UNREACHABLE
106 move_unreachable() then moves objects not reachable (whether directly or
107 indirectly) from outside the generation into an "unreachable" set.
108 Objects that are found to be reachable have gc_refs set to GC_REACHABLE
109 again. Objects that are found to be unreachable have gc_refs set to
110 GC_TENTATIVELY_UNREACHABLE. It's "tentatively" because the pass doing
111 this can't be sure until it ends, and GC_TENTATIVELY_UNREACHABLE may
112 transition back to GC_REACHABLE.
114 Only objects with GC_TENTATIVELY_UNREACHABLE still set are candidates
115 for collection. If it's decided not to collect such an object (e.g.,
116 it has a __del__ method), its gc_refs is restored to GC_REACHABLE again.
117 ----------------------------------------------------------------------------
119 #define GC_UNTRACKED _PyGC_REFS_UNTRACKED
120 #define GC_REACHABLE _PyGC_REFS_REACHABLE
121 #define GC_TENTATIVELY_UNREACHABLE _PyGC_REFS_TENTATIVELY_UNREACHABLE
123 #define IS_TRACKED(o) ((AS_GC(o))->gc.gc_refs != GC_UNTRACKED)
124 #define IS_REACHABLE(o) ((AS_GC(o))->gc.gc_refs == GC_REACHABLE)
125 #define IS_TENTATIVELY_UNREACHABLE(o) ( \
126 (AS_GC(o))->gc.gc_refs == GC_TENTATIVELY_UNREACHABLE)
128 /*** list functions ***/
130 static void
131 gc_list_init(PyGC_Head *list)
133 list->gc.gc_prev = list;
134 list->gc.gc_next = list;
137 static int
138 gc_list_is_empty(PyGC_Head *list)
140 return (list->gc.gc_next == list);
143 #if 0
144 /* This became unused after gc_list_move() was introduced. */
145 /* Append `node` to `list`. */
146 static void
147 gc_list_append(PyGC_Head *node, PyGC_Head *list)
149 node->gc.gc_next = list;
150 node->gc.gc_prev = list->gc.gc_prev;
151 node->gc.gc_prev->gc.gc_next = node;
152 list->gc.gc_prev = node;
154 #endif
156 /* Remove `node` from the gc list it's currently in. */
157 static void
158 gc_list_remove(PyGC_Head *node)
160 node->gc.gc_prev->gc.gc_next = node->gc.gc_next;
161 node->gc.gc_next->gc.gc_prev = node->gc.gc_prev;
162 node->gc.gc_next = NULL; /* object is not currently tracked */
165 /* Move `node` from the gc list it's currently in (which is not explicitly
166 * named here) to the end of `list`. This is semantically the same as
167 * gc_list_remove(node) followed by gc_list_append(node, list).
169 static void
170 gc_list_move(PyGC_Head *node, PyGC_Head *list)
172 PyGC_Head *new_prev;
173 PyGC_Head *current_prev = node->gc.gc_prev;
174 PyGC_Head *current_next = node->gc.gc_next;
175 /* Unlink from current list. */
176 current_prev->gc.gc_next = current_next;
177 current_next->gc.gc_prev = current_prev;
178 /* Relink at end of new list. */
179 new_prev = node->gc.gc_prev = list->gc.gc_prev;
180 new_prev->gc.gc_next = list->gc.gc_prev = node;
181 node->gc.gc_next = list;
184 /* append list `from` onto list `to`; `from` becomes an empty list */
185 static void
186 gc_list_merge(PyGC_Head *from, PyGC_Head *to)
188 PyGC_Head *tail;
189 assert(from != to);
190 if (!gc_list_is_empty(from)) {
191 tail = to->gc.gc_prev;
192 tail->gc.gc_next = from->gc.gc_next;
193 tail->gc.gc_next->gc.gc_prev = tail;
194 to->gc.gc_prev = from->gc.gc_prev;
195 to->gc.gc_prev->gc.gc_next = to;
197 gc_list_init(from);
200 static Py_ssize_t
201 gc_list_size(PyGC_Head *list)
203 PyGC_Head *gc;
204 Py_ssize_t n = 0;
205 for (gc = list->gc.gc_next; gc != list; gc = gc->gc.gc_next) {
206 n++;
208 return n;
211 /* Append objects in a GC list to a Python list.
212 * Return 0 if all OK, < 0 if error (out of memory for list).
214 static int
215 append_objects(PyObject *py_list, PyGC_Head *gc_list)
217 PyGC_Head *gc;
218 for (gc = gc_list->gc.gc_next; gc != gc_list; gc = gc->gc.gc_next) {
219 PyObject *op = FROM_GC(gc);
220 if (op != py_list) {
221 if (PyList_Append(py_list, op)) {
222 return -1; /* exception */
226 return 0;
229 /*** end of list stuff ***/
232 /* Set all gc_refs = ob_refcnt. After this, gc_refs is > 0 for all objects
233 * in containers, and is GC_REACHABLE for all tracked gc objects not in
234 * containers.
236 static void
237 update_refs(PyGC_Head *containers)
239 PyGC_Head *gc = containers->gc.gc_next;
240 for (; gc != containers; gc = gc->gc.gc_next) {
241 assert(gc->gc.gc_refs == GC_REACHABLE);
242 gc->gc.gc_refs = Py_REFCNT(FROM_GC(gc));
243 /* Python's cyclic gc should never see an incoming refcount
244 * of 0: if something decref'ed to 0, it should have been
245 * deallocated immediately at that time.
246 * Possible cause (if the assert triggers): a tp_dealloc
247 * routine left a gc-aware object tracked during its teardown
248 * phase, and did something-- or allowed something to happen --
249 * that called back into Python. gc can trigger then, and may
250 * see the still-tracked dying object. Before this assert
251 * was added, such mistakes went on to allow gc to try to
252 * delete the object again. In a debug build, that caused
253 * a mysterious segfault, when _Py_ForgetReference tried
254 * to remove the object from the doubly-linked list of all
255 * objects a second time. In a release build, an actual
256 * double deallocation occurred, which leads to corruption
257 * of the allocator's internal bookkeeping pointers. That's
258 * so serious that maybe this should be a release-build
259 * check instead of an assert?
261 assert(gc->gc.gc_refs != 0);
265 /* A traversal callback for subtract_refs. */
266 static int
267 visit_decref(PyObject *op, void *data)
269 assert(op != NULL);
270 if (PyObject_IS_GC(op)) {
271 PyGC_Head *gc = AS_GC(op);
272 /* We're only interested in gc_refs for objects in the
273 * generation being collected, which can be recognized
274 * because only they have positive gc_refs.
276 assert(gc->gc.gc_refs != 0); /* else refcount was too small */
277 if (gc->gc.gc_refs > 0)
278 gc->gc.gc_refs--;
280 return 0;
283 /* Subtract internal references from gc_refs. After this, gc_refs is >= 0
284 * for all objects in containers, and is GC_REACHABLE for all tracked gc
285 * objects not in containers. The ones with gc_refs > 0 are directly
286 * reachable from outside containers, and so can't be collected.
288 static void
289 subtract_refs(PyGC_Head *containers)
291 traverseproc traverse;
292 PyGC_Head *gc = containers->gc.gc_next;
293 for (; gc != containers; gc=gc->gc.gc_next) {
294 traverse = Py_TYPE(FROM_GC(gc))->tp_traverse;
295 (void) traverse(FROM_GC(gc),
296 (visitproc)visit_decref,
297 NULL);
301 /* A traversal callback for move_unreachable. */
302 static int
303 visit_reachable(PyObject *op, PyGC_Head *reachable)
305 if (PyObject_IS_GC(op)) {
306 PyGC_Head *gc = AS_GC(op);
307 const Py_ssize_t gc_refs = gc->gc.gc_refs;
309 if (gc_refs == 0) {
310 /* This is in move_unreachable's 'young' list, but
311 * the traversal hasn't yet gotten to it. All
312 * we need to do is tell move_unreachable that it's
313 * reachable.
315 gc->gc.gc_refs = 1;
317 else if (gc_refs == GC_TENTATIVELY_UNREACHABLE) {
318 /* This had gc_refs = 0 when move_unreachable got
319 * to it, but turns out it's reachable after all.
320 * Move it back to move_unreachable's 'young' list,
321 * and move_unreachable will eventually get to it
322 * again.
324 gc_list_move(gc, reachable);
325 gc->gc.gc_refs = 1;
327 /* Else there's nothing to do.
328 * If gc_refs > 0, it must be in move_unreachable's 'young'
329 * list, and move_unreachable will eventually get to it.
330 * If gc_refs == GC_REACHABLE, it's either in some other
331 * generation so we don't care about it, or move_unreachable
332 * already dealt with it.
333 * If gc_refs == GC_UNTRACKED, it must be ignored.
335 else {
336 assert(gc_refs > 0
337 || gc_refs == GC_REACHABLE
338 || gc_refs == GC_UNTRACKED);
341 return 0;
344 /* Move the unreachable objects from young to unreachable. After this,
345 * all objects in young have gc_refs = GC_REACHABLE, and all objects in
346 * unreachable have gc_refs = GC_TENTATIVELY_UNREACHABLE. All tracked
347 * gc objects not in young or unreachable still have gc_refs = GC_REACHABLE.
348 * All objects in young after this are directly or indirectly reachable
349 * from outside the original young; and all objects in unreachable are
350 * not.
352 static void
353 move_unreachable(PyGC_Head *young, PyGC_Head *unreachable)
355 PyGC_Head *gc = young->gc.gc_next;
357 /* Invariants: all objects "to the left" of us in young have gc_refs
358 * = GC_REACHABLE, and are indeed reachable (directly or indirectly)
359 * from outside the young list as it was at entry. All other objects
360 * from the original young "to the left" of us are in unreachable now,
361 * and have gc_refs = GC_TENTATIVELY_UNREACHABLE. All objects to the
362 * left of us in 'young' now have been scanned, and no objects here
363 * or to the right have been scanned yet.
366 while (gc != young) {
367 PyGC_Head *next;
369 if (gc->gc.gc_refs) {
370 /* gc is definitely reachable from outside the
371 * original 'young'. Mark it as such, and traverse
372 * its pointers to find any other objects that may
373 * be directly reachable from it. Note that the
374 * call to tp_traverse may append objects to young,
375 * so we have to wait until it returns to determine
376 * the next object to visit.
378 PyObject *op = FROM_GC(gc);
379 traverseproc traverse = Py_TYPE(op)->tp_traverse;
380 assert(gc->gc.gc_refs > 0);
381 gc->gc.gc_refs = GC_REACHABLE;
382 (void) traverse(op,
383 (visitproc)visit_reachable,
384 (void *)young);
385 next = gc->gc.gc_next;
387 else {
388 /* This *may* be unreachable. To make progress,
389 * assume it is. gc isn't directly reachable from
390 * any object we've already traversed, but may be
391 * reachable from an object we haven't gotten to yet.
392 * visit_reachable will eventually move gc back into
393 * young if that's so, and we'll see it again.
395 next = gc->gc.gc_next;
396 gc_list_move(gc, unreachable);
397 gc->gc.gc_refs = GC_TENTATIVELY_UNREACHABLE;
399 gc = next;
403 /* Return true if object has a finalization method.
404 * CAUTION: An instance of an old-style class has to be checked for a
405 *__del__ method, and earlier versions of this used to call PyObject_HasAttr,
406 * which in turn could call the class's __getattr__ hook (if any). That
407 * could invoke arbitrary Python code, mutating the object graph in arbitrary
408 * ways, and that was the source of some excruciatingly subtle bugs.
410 static int
411 has_finalizer(PyObject *op)
413 if (PyInstance_Check(op)) {
414 assert(delstr != NULL);
415 return _PyInstance_Lookup(op, delstr) != NULL;
417 else if (PyType_HasFeature(op->ob_type, Py_TPFLAGS_HEAPTYPE))
418 return op->ob_type->tp_del != NULL;
419 else if (PyGen_CheckExact(op))
420 return PyGen_NeedsFinalizing((PyGenObject *)op);
421 else
422 return 0;
425 /* Move the objects in unreachable with __del__ methods into `finalizers`.
426 * Objects moved into `finalizers` have gc_refs set to GC_REACHABLE; the
427 * objects remaining in unreachable are left at GC_TENTATIVELY_UNREACHABLE.
429 static void
430 move_finalizers(PyGC_Head *unreachable, PyGC_Head *finalizers)
432 PyGC_Head *gc;
433 PyGC_Head *next;
435 /* March over unreachable. Move objects with finalizers into
436 * `finalizers`.
438 for (gc = unreachable->gc.gc_next; gc != unreachable; gc = next) {
439 PyObject *op = FROM_GC(gc);
441 assert(IS_TENTATIVELY_UNREACHABLE(op));
442 next = gc->gc.gc_next;
444 if (has_finalizer(op)) {
445 gc_list_move(gc, finalizers);
446 gc->gc.gc_refs = GC_REACHABLE;
451 /* A traversal callback for move_finalizer_reachable. */
452 static int
453 visit_move(PyObject *op, PyGC_Head *tolist)
455 if (PyObject_IS_GC(op)) {
456 if (IS_TENTATIVELY_UNREACHABLE(op)) {
457 PyGC_Head *gc = AS_GC(op);
458 gc_list_move(gc, tolist);
459 gc->gc.gc_refs = GC_REACHABLE;
462 return 0;
465 /* Move objects that are reachable from finalizers, from the unreachable set
466 * into finalizers set.
468 static void
469 move_finalizer_reachable(PyGC_Head *finalizers)
471 traverseproc traverse;
472 PyGC_Head *gc = finalizers->gc.gc_next;
473 for (; gc != finalizers; gc = gc->gc.gc_next) {
474 /* Note that the finalizers list may grow during this. */
475 traverse = Py_TYPE(FROM_GC(gc))->tp_traverse;
476 (void) traverse(FROM_GC(gc),
477 (visitproc)visit_move,
478 (void *)finalizers);
482 /* Clear all weakrefs to unreachable objects, and if such a weakref has a
483 * callback, invoke it if necessary. Note that it's possible for such
484 * weakrefs to be outside the unreachable set -- indeed, those are precisely
485 * the weakrefs whose callbacks must be invoked. See gc_weakref.txt for
486 * overview & some details. Some weakrefs with callbacks may be reclaimed
487 * directly by this routine; the number reclaimed is the return value. Other
488 * weakrefs with callbacks may be moved into the `old` generation. Objects
489 * moved into `old` have gc_refs set to GC_REACHABLE; the objects remaining in
490 * unreachable are left at GC_TENTATIVELY_UNREACHABLE. When this returns,
491 * no object in `unreachable` is weakly referenced anymore.
493 static int
494 handle_weakrefs(PyGC_Head *unreachable, PyGC_Head *old)
496 PyGC_Head *gc;
497 PyObject *op; /* generally FROM_GC(gc) */
498 PyWeakReference *wr; /* generally a cast of op */
499 PyGC_Head wrcb_to_call; /* weakrefs with callbacks to call */
500 PyGC_Head *next;
501 int num_freed = 0;
503 gc_list_init(&wrcb_to_call);
505 /* Clear all weakrefs to the objects in unreachable. If such a weakref
506 * also has a callback, move it into `wrcb_to_call` if the callback
507 * needs to be invoked. Note that we cannot invoke any callbacks until
508 * all weakrefs to unreachable objects are cleared, lest the callback
509 * resurrect an unreachable object via a still-active weakref. We
510 * make another pass over wrcb_to_call, invoking callbacks, after this
511 * pass completes.
513 for (gc = unreachable->gc.gc_next; gc != unreachable; gc = next) {
514 PyWeakReference **wrlist;
516 op = FROM_GC(gc);
517 assert(IS_TENTATIVELY_UNREACHABLE(op));
518 next = gc->gc.gc_next;
520 if (! PyType_SUPPORTS_WEAKREFS(Py_TYPE(op)))
521 continue;
523 /* It supports weakrefs. Does it have any? */
524 wrlist = (PyWeakReference **)
525 PyObject_GET_WEAKREFS_LISTPTR(op);
527 /* `op` may have some weakrefs. March over the list, clear
528 * all the weakrefs, and move the weakrefs with callbacks
529 * that must be called into wrcb_to_call.
531 for (wr = *wrlist; wr != NULL; wr = *wrlist) {
532 PyGC_Head *wrasgc; /* AS_GC(wr) */
534 /* _PyWeakref_ClearRef clears the weakref but leaves
535 * the callback pointer intact. Obscure: it also
536 * changes *wrlist.
538 assert(wr->wr_object == op);
539 _PyWeakref_ClearRef(wr);
540 assert(wr->wr_object == Py_None);
541 if (wr->wr_callback == NULL)
542 continue; /* no callback */
544 /* Headache time. `op` is going away, and is weakly referenced by
545 * `wr`, which has a callback. Should the callback be invoked? If wr
546 * is also trash, no:
548 * 1. There's no need to call it. The object and the weakref are
549 * both going away, so it's legitimate to pretend the weakref is
550 * going away first. The user has to ensure a weakref outlives its
551 * referent if they want a guarantee that the wr callback will get
552 * invoked.
554 * 2. It may be catastrophic to call it. If the callback is also in
555 * cyclic trash (CT), then although the CT is unreachable from
556 * outside the current generation, CT may be reachable from the
557 * callback. Then the callback could resurrect insane objects.
559 * Since the callback is never needed and may be unsafe in this case,
560 * wr is simply left in the unreachable set. Note that because we
561 * already called _PyWeakref_ClearRef(wr), its callback will never
562 * trigger.
564 * OTOH, if wr isn't part of CT, we should invoke the callback: the
565 * weakref outlived the trash. Note that since wr isn't CT in this
566 * case, its callback can't be CT either -- wr acted as an external
567 * root to this generation, and therefore its callback did too. So
568 * nothing in CT is reachable from the callback either, so it's hard
569 * to imagine how calling it later could create a problem for us. wr
570 * is moved to wrcb_to_call in this case.
572 if (IS_TENTATIVELY_UNREACHABLE(wr))
573 continue;
574 assert(IS_REACHABLE(wr));
576 /* Create a new reference so that wr can't go away
577 * before we can process it again.
579 Py_INCREF(wr);
581 /* Move wr to wrcb_to_call, for the next pass. */
582 wrasgc = AS_GC(wr);
583 assert(wrasgc != next); /* wrasgc is reachable, but
584 next isn't, so they can't
585 be the same */
586 gc_list_move(wrasgc, &wrcb_to_call);
590 /* Invoke the callbacks we decided to honor. It's safe to invoke them
591 * because they can't reference unreachable objects.
593 while (! gc_list_is_empty(&wrcb_to_call)) {
594 PyObject *temp;
595 PyObject *callback;
597 gc = wrcb_to_call.gc.gc_next;
598 op = FROM_GC(gc);
599 assert(IS_REACHABLE(op));
600 assert(PyWeakref_Check(op));
601 wr = (PyWeakReference *)op;
602 callback = wr->wr_callback;
603 assert(callback != NULL);
605 /* copy-paste of weakrefobject.c's handle_callback() */
606 temp = PyObject_CallFunctionObjArgs(callback, wr, NULL);
607 if (temp == NULL)
608 PyErr_WriteUnraisable(callback);
609 else
610 Py_DECREF(temp);
612 /* Give up the reference we created in the first pass. When
613 * op's refcount hits 0 (which it may or may not do right now),
614 * op's tp_dealloc will decref op->wr_callback too. Note
615 * that the refcount probably will hit 0 now, and because this
616 * weakref was reachable to begin with, gc didn't already
617 * add it to its count of freed objects. Example: a reachable
618 * weak value dict maps some key to this reachable weakref.
619 * The callback removes this key->weakref mapping from the
620 * dict, leaving no other references to the weakref (excepting
621 * ours).
623 Py_DECREF(op);
624 if (wrcb_to_call.gc.gc_next == gc) {
625 /* object is still alive -- move it */
626 gc_list_move(gc, old);
628 else
629 ++num_freed;
632 return num_freed;
635 static void
636 debug_instance(char *msg, PyInstanceObject *inst)
638 char *cname;
639 /* simple version of instance_repr */
640 PyObject *classname = inst->in_class->cl_name;
641 if (classname != NULL && PyString_Check(classname))
642 cname = PyString_AsString(classname);
643 else
644 cname = "?";
645 PySys_WriteStderr("gc: %.100s <%.100s instance at %p>\n",
646 msg, cname, inst);
649 static void
650 debug_cycle(char *msg, PyObject *op)
652 if ((debug & DEBUG_INSTANCES) && PyInstance_Check(op)) {
653 debug_instance(msg, (PyInstanceObject *)op);
655 else if (debug & DEBUG_OBJECTS) {
656 PySys_WriteStderr("gc: %.100s <%.100s %p>\n",
657 msg, Py_TYPE(op)->tp_name, op);
661 /* Handle uncollectable garbage (cycles with finalizers, and stuff reachable
662 * only from such cycles).
663 * If DEBUG_SAVEALL, all objects in finalizers are appended to the module
664 * garbage list (a Python list), else only the objects in finalizers with
665 * __del__ methods are appended to garbage. All objects in finalizers are
666 * merged into the old list regardless.
667 * Returns 0 if all OK, <0 on error (out of memory to grow the garbage list).
668 * The finalizers list is made empty on a successful return.
670 static int
671 handle_finalizers(PyGC_Head *finalizers, PyGC_Head *old)
673 PyGC_Head *gc = finalizers->gc.gc_next;
675 if (garbage == NULL) {
676 garbage = PyList_New(0);
677 if (garbage == NULL)
678 Py_FatalError("gc couldn't create gc.garbage list");
680 for (; gc != finalizers; gc = gc->gc.gc_next) {
681 PyObject *op = FROM_GC(gc);
683 if ((debug & DEBUG_SAVEALL) || has_finalizer(op)) {
684 if (PyList_Append(garbage, op) < 0)
685 return -1;
689 gc_list_merge(finalizers, old);
690 return 0;
693 /* Break reference cycles by clearing the containers involved. This is
694 * tricky business as the lists can be changing and we don't know which
695 * objects may be freed. It is possible I screwed something up here.
697 static void
698 delete_garbage(PyGC_Head *collectable, PyGC_Head *old)
700 inquiry clear;
702 while (!gc_list_is_empty(collectable)) {
703 PyGC_Head *gc = collectable->gc.gc_next;
704 PyObject *op = FROM_GC(gc);
706 assert(IS_TENTATIVELY_UNREACHABLE(op));
707 if (debug & DEBUG_SAVEALL) {
708 PyList_Append(garbage, op);
710 else {
711 if ((clear = Py_TYPE(op)->tp_clear) != NULL) {
712 Py_INCREF(op);
713 clear(op);
714 Py_DECREF(op);
717 if (collectable->gc.gc_next == gc) {
718 /* object is still alive, move it, it may die later */
719 gc_list_move(gc, old);
720 gc->gc.gc_refs = GC_REACHABLE;
725 /* This is the main function. Read this to understand how the
726 * collection process works. */
727 static Py_ssize_t
728 collect(int generation)
730 int i;
731 Py_ssize_t m = 0; /* # objects collected */
732 Py_ssize_t n = 0; /* # unreachable objects that couldn't be collected */
733 PyGC_Head *young; /* the generation we are examining */
734 PyGC_Head *old; /* next older generation */
735 PyGC_Head unreachable; /* non-problematic unreachable trash */
736 PyGC_Head finalizers; /* objects with, & reachable from, __del__ */
737 PyGC_Head *gc;
738 double t1 = 0.0;
740 if (delstr == NULL) {
741 delstr = PyString_InternFromString("__del__");
742 if (delstr == NULL)
743 Py_FatalError("gc couldn't allocate \"__del__\"");
746 if (debug & DEBUG_STATS) {
747 if (tmod != NULL) {
748 PyObject *f = PyObject_CallMethod(tmod, "time", NULL);
749 if (f == NULL) {
750 PyErr_Clear();
752 else {
753 t1 = PyFloat_AsDouble(f);
754 Py_DECREF(f);
757 PySys_WriteStderr("gc: collecting generation %d...\n",
758 generation);
759 PySys_WriteStderr("gc: objects in each generation:");
760 for (i = 0; i < NUM_GENERATIONS; i++)
761 PySys_WriteStderr(" %" PY_FORMAT_SIZE_T "d",
762 gc_list_size(GEN_HEAD(i)));
763 PySys_WriteStderr("\n");
766 /* update collection and allocation counters */
767 if (generation+1 < NUM_GENERATIONS)
768 generations[generation+1].count += 1;
769 for (i = 0; i <= generation; i++)
770 generations[i].count = 0;
772 /* merge younger generations with one we are currently collecting */
773 for (i = 0; i < generation; i++) {
774 gc_list_merge(GEN_HEAD(i), GEN_HEAD(generation));
777 /* handy references */
778 young = GEN_HEAD(generation);
779 if (generation < NUM_GENERATIONS-1)
780 old = GEN_HEAD(generation+1);
781 else
782 old = young;
784 /* Using ob_refcnt and gc_refs, calculate which objects in the
785 * container set are reachable from outside the set (i.e., have a
786 * refcount greater than 0 when all the references within the
787 * set are taken into account).
789 update_refs(young);
790 subtract_refs(young);
792 /* Leave everything reachable from outside young in young, and move
793 * everything else (in young) to unreachable.
794 * NOTE: This used to move the reachable objects into a reachable
795 * set instead. But most things usually turn out to be reachable,
796 * so it's more efficient to move the unreachable things.
798 gc_list_init(&unreachable);
799 move_unreachable(young, &unreachable);
801 /* Move reachable objects to next generation. */
802 if (young != old)
803 gc_list_merge(young, old);
805 /* All objects in unreachable are trash, but objects reachable from
806 * finalizers can't safely be deleted. Python programmers should take
807 * care not to create such things. For Python, finalizers means
808 * instance objects with __del__ methods. Weakrefs with callbacks
809 * can also call arbitrary Python code but they will be dealt with by
810 * handle_weakrefs().
812 gc_list_init(&finalizers);
813 move_finalizers(&unreachable, &finalizers);
814 /* finalizers contains the unreachable objects with a finalizer;
815 * unreachable objects reachable *from* those are also uncollectable,
816 * and we move those into the finalizers list too.
818 move_finalizer_reachable(&finalizers);
820 /* Collect statistics on collectable objects found and print
821 * debugging information.
823 for (gc = unreachable.gc.gc_next; gc != &unreachable;
824 gc = gc->gc.gc_next) {
825 m++;
826 if (debug & DEBUG_COLLECTABLE) {
827 debug_cycle("collectable", FROM_GC(gc));
829 if (tmod != NULL && (debug & DEBUG_STATS)) {
830 PyObject *f = PyObject_CallMethod(tmod, "time", NULL);
831 if (f == NULL) {
832 PyErr_Clear();
834 else {
835 t1 = PyFloat_AsDouble(f)-t1;
836 Py_DECREF(f);
837 PySys_WriteStderr("gc: %.4fs elapsed.\n", t1);
842 /* Clear weakrefs and invoke callbacks as necessary. */
843 m += handle_weakrefs(&unreachable, old);
845 /* Call tp_clear on objects in the unreachable set. This will cause
846 * the reference cycles to be broken. It may also cause some objects
847 * in finalizers to be freed.
849 delete_garbage(&unreachable, old);
851 /* Collect statistics on uncollectable objects found and print
852 * debugging information. */
853 for (gc = finalizers.gc.gc_next;
854 gc != &finalizers;
855 gc = gc->gc.gc_next) {
856 n++;
857 if (debug & DEBUG_UNCOLLECTABLE)
858 debug_cycle("uncollectable", FROM_GC(gc));
860 if (debug & DEBUG_STATS) {
861 if (m == 0 && n == 0)
862 PySys_WriteStderr("gc: done.\n");
863 else
864 PySys_WriteStderr(
865 "gc: done, "
866 "%" PY_FORMAT_SIZE_T "d unreachable, "
867 "%" PY_FORMAT_SIZE_T "d uncollectable.\n",
868 n+m, n);
871 /* Append instances in the uncollectable set to a Python
872 * reachable list of garbage. The programmer has to deal with
873 * this if they insist on creating this type of structure.
875 (void)handle_finalizers(&finalizers, old);
877 if (PyErr_Occurred()) {
878 if (gc_str == NULL)
879 gc_str = PyString_FromString("garbage collection");
880 PyErr_WriteUnraisable(gc_str);
881 Py_FatalError("unexpected exception during garbage collection");
883 return n+m;
886 static Py_ssize_t
887 collect_generations(void)
889 int i;
890 Py_ssize_t n = 0;
892 /* Find the oldest generation (higest numbered) where the count
893 * exceeds the threshold. Objects in the that generation and
894 * generations younger than it will be collected. */
895 for (i = NUM_GENERATIONS-1; i >= 0; i--) {
896 if (generations[i].count > generations[i].threshold) {
897 n = collect(i);
898 break;
901 return n;
904 PyDoc_STRVAR(gc_enable__doc__,
905 "enable() -> None\n"
906 "\n"
907 "Enable automatic garbage collection.\n");
909 static PyObject *
910 gc_enable(PyObject *self, PyObject *noargs)
912 enabled = 1;
913 Py_INCREF(Py_None);
914 return Py_None;
917 PyDoc_STRVAR(gc_disable__doc__,
918 "disable() -> None\n"
919 "\n"
920 "Disable automatic garbage collection.\n");
922 static PyObject *
923 gc_disable(PyObject *self, PyObject *noargs)
925 enabled = 0;
926 Py_INCREF(Py_None);
927 return Py_None;
930 PyDoc_STRVAR(gc_isenabled__doc__,
931 "isenabled() -> status\n"
932 "\n"
933 "Returns true if automatic garbage collection is enabled.\n");
935 static PyObject *
936 gc_isenabled(PyObject *self, PyObject *noargs)
938 return PyBool_FromLong((long)enabled);
941 PyDoc_STRVAR(gc_collect__doc__,
942 "collect([generation]) -> n\n"
943 "\n"
944 "With no arguments, run a full collection. The optional argument\n"
945 "may be an integer specifying which generation to collect. A ValueError\n"
946 "is raised if the generation number is invalid.\n\n"
947 "The number of unreachable objects is returned.\n");
949 static PyObject *
950 gc_collect(PyObject *self, PyObject *args, PyObject *kws)
952 static char *keywords[] = {"generation", NULL};
953 int genarg = NUM_GENERATIONS - 1;
954 Py_ssize_t n;
956 if (!PyArg_ParseTupleAndKeywords(args, kws, "|i", keywords, &genarg))
957 return NULL;
959 else if (genarg < 0 || genarg >= NUM_GENERATIONS) {
960 PyErr_SetString(PyExc_ValueError, "invalid generation");
961 return NULL;
964 if (collecting)
965 n = 0; /* already collecting, don't do anything */
966 else {
967 collecting = 1;
968 n = collect(genarg);
969 collecting = 0;
972 return PyInt_FromSsize_t(n);
975 PyDoc_STRVAR(gc_set_debug__doc__,
976 "set_debug(flags) -> None\n"
977 "\n"
978 "Set the garbage collection debugging flags. Debugging information is\n"
979 "written to sys.stderr.\n"
980 "\n"
981 "flags is an integer and can have the following bits turned on:\n"
982 "\n"
983 " DEBUG_STATS - Print statistics during collection.\n"
984 " DEBUG_COLLECTABLE - Print collectable objects found.\n"
985 " DEBUG_UNCOLLECTABLE - Print unreachable but uncollectable objects found.\n"
986 " DEBUG_INSTANCES - Print instance objects.\n"
987 " DEBUG_OBJECTS - Print objects other than instances.\n"
988 " DEBUG_SAVEALL - Save objects to gc.garbage rather than freeing them.\n"
989 " DEBUG_LEAK - Debug leaking programs (everything but STATS).\n");
991 static PyObject *
992 gc_set_debug(PyObject *self, PyObject *args)
994 if (!PyArg_ParseTuple(args, "i:set_debug", &debug))
995 return NULL;
997 Py_INCREF(Py_None);
998 return Py_None;
1001 PyDoc_STRVAR(gc_get_debug__doc__,
1002 "get_debug() -> flags\n"
1003 "\n"
1004 "Get the garbage collection debugging flags.\n");
1006 static PyObject *
1007 gc_get_debug(PyObject *self, PyObject *noargs)
1009 return Py_BuildValue("i", debug);
1012 PyDoc_STRVAR(gc_set_thresh__doc__,
1013 "set_threshold(threshold0, [threshold1, threshold2]) -> None\n"
1014 "\n"
1015 "Sets the collection thresholds. Setting threshold0 to zero disables\n"
1016 "collection.\n");
1018 static PyObject *
1019 gc_set_thresh(PyObject *self, PyObject *args)
1021 int i;
1022 if (!PyArg_ParseTuple(args, "i|ii:set_threshold",
1023 &generations[0].threshold,
1024 &generations[1].threshold,
1025 &generations[2].threshold))
1026 return NULL;
1027 for (i = 2; i < NUM_GENERATIONS; i++) {
1028 /* generations higher than 2 get the same threshold */
1029 generations[i].threshold = generations[2].threshold;
1032 Py_INCREF(Py_None);
1033 return Py_None;
1036 PyDoc_STRVAR(gc_get_thresh__doc__,
1037 "get_threshold() -> (threshold0, threshold1, threshold2)\n"
1038 "\n"
1039 "Return the current collection thresholds\n");
1041 static PyObject *
1042 gc_get_thresh(PyObject *self, PyObject *noargs)
1044 return Py_BuildValue("(iii)",
1045 generations[0].threshold,
1046 generations[1].threshold,
1047 generations[2].threshold);
1050 PyDoc_STRVAR(gc_get_count__doc__,
1051 "get_count() -> (count0, count1, count2)\n"
1052 "\n"
1053 "Return the current collection counts\n");
1055 static PyObject *
1056 gc_get_count(PyObject *self, PyObject *noargs)
1058 return Py_BuildValue("(iii)",
1059 generations[0].count,
1060 generations[1].count,
1061 generations[2].count);
1064 static int
1065 referrersvisit(PyObject* obj, PyObject *objs)
1067 Py_ssize_t i;
1068 for (i = 0; i < PyTuple_GET_SIZE(objs); i++)
1069 if (PyTuple_GET_ITEM(objs, i) == obj)
1070 return 1;
1071 return 0;
1074 static int
1075 gc_referrers_for(PyObject *objs, PyGC_Head *list, PyObject *resultlist)
1077 PyGC_Head *gc;
1078 PyObject *obj;
1079 traverseproc traverse;
1080 for (gc = list->gc.gc_next; gc != list; gc = gc->gc.gc_next) {
1081 obj = FROM_GC(gc);
1082 traverse = Py_TYPE(obj)->tp_traverse;
1083 if (obj == objs || obj == resultlist)
1084 continue;
1085 if (traverse(obj, (visitproc)referrersvisit, objs)) {
1086 if (PyList_Append(resultlist, obj) < 0)
1087 return 0; /* error */
1090 return 1; /* no error */
1093 PyDoc_STRVAR(gc_get_referrers__doc__,
1094 "get_referrers(*objs) -> list\n\
1095 Return the list of objects that directly refer to any of objs.");
1097 static PyObject *
1098 gc_get_referrers(PyObject *self, PyObject *args)
1100 int i;
1101 PyObject *result = PyList_New(0);
1102 if (!result) return NULL;
1104 for (i = 0; i < NUM_GENERATIONS; i++) {
1105 if (!(gc_referrers_for(args, GEN_HEAD(i), result))) {
1106 Py_DECREF(result);
1107 return NULL;
1110 return result;
1113 /* Append obj to list; return true if error (out of memory), false if OK. */
1114 static int
1115 referentsvisit(PyObject *obj, PyObject *list)
1117 return PyList_Append(list, obj) < 0;
1120 PyDoc_STRVAR(gc_get_referents__doc__,
1121 "get_referents(*objs) -> list\n\
1122 Return the list of objects that are directly referred to by objs.");
1124 static PyObject *
1125 gc_get_referents(PyObject *self, PyObject *args)
1127 Py_ssize_t i;
1128 PyObject *result = PyList_New(0);
1130 if (result == NULL)
1131 return NULL;
1133 for (i = 0; i < PyTuple_GET_SIZE(args); i++) {
1134 traverseproc traverse;
1135 PyObject *obj = PyTuple_GET_ITEM(args, i);
1137 if (! PyObject_IS_GC(obj))
1138 continue;
1139 traverse = Py_TYPE(obj)->tp_traverse;
1140 if (! traverse)
1141 continue;
1142 if (traverse(obj, (visitproc)referentsvisit, result)) {
1143 Py_DECREF(result);
1144 return NULL;
1147 return result;
1150 PyDoc_STRVAR(gc_get_objects__doc__,
1151 "get_objects() -> [...]\n"
1152 "\n"
1153 "Return a list of objects tracked by the collector (excluding the list\n"
1154 "returned).\n");
1156 static PyObject *
1157 gc_get_objects(PyObject *self, PyObject *noargs)
1159 int i;
1160 PyObject* result;
1162 result = PyList_New(0);
1163 if (result == NULL)
1164 return NULL;
1165 for (i = 0; i < NUM_GENERATIONS; i++) {
1166 if (append_objects(result, GEN_HEAD(i))) {
1167 Py_DECREF(result);
1168 return NULL;
1171 return result;
1175 PyDoc_STRVAR(gc__doc__,
1176 "This module provides access to the garbage collector for reference cycles.\n"
1177 "\n"
1178 "enable() -- Enable automatic garbage collection.\n"
1179 "disable() -- Disable automatic garbage collection.\n"
1180 "isenabled() -- Returns true if automatic collection is enabled.\n"
1181 "collect() -- Do a full collection right now.\n"
1182 "get_count() -- Return the current collection counts.\n"
1183 "set_debug() -- Set debugging flags.\n"
1184 "get_debug() -- Get debugging flags.\n"
1185 "set_threshold() -- Set the collection thresholds.\n"
1186 "get_threshold() -- Return the current the collection thresholds.\n"
1187 "get_objects() -- Return a list of all objects tracked by the collector.\n"
1188 "get_referrers() -- Return the list of objects that refer to an object.\n"
1189 "get_referents() -- Return the list of objects that an object refers to.\n");
1191 static PyMethodDef GcMethods[] = {
1192 {"enable", gc_enable, METH_NOARGS, gc_enable__doc__},
1193 {"disable", gc_disable, METH_NOARGS, gc_disable__doc__},
1194 {"isenabled", gc_isenabled, METH_NOARGS, gc_isenabled__doc__},
1195 {"set_debug", gc_set_debug, METH_VARARGS, gc_set_debug__doc__},
1196 {"get_debug", gc_get_debug, METH_NOARGS, gc_get_debug__doc__},
1197 {"get_count", gc_get_count, METH_NOARGS, gc_get_count__doc__},
1198 {"set_threshold", gc_set_thresh, METH_VARARGS, gc_set_thresh__doc__},
1199 {"get_threshold", gc_get_thresh, METH_NOARGS, gc_get_thresh__doc__},
1200 {"collect", (PyCFunction)gc_collect,
1201 METH_VARARGS | METH_KEYWORDS, gc_collect__doc__},
1202 {"get_objects", gc_get_objects,METH_NOARGS, gc_get_objects__doc__},
1203 {"get_referrers", gc_get_referrers, METH_VARARGS,
1204 gc_get_referrers__doc__},
1205 {"get_referents", gc_get_referents, METH_VARARGS,
1206 gc_get_referents__doc__},
1207 {NULL, NULL} /* Sentinel */
1210 PyMODINIT_FUNC
1211 initgc(void)
1213 PyObject *m;
1215 m = Py_InitModule4("gc",
1216 GcMethods,
1217 gc__doc__,
1218 NULL,
1219 PYTHON_API_VERSION);
1220 if (m == NULL)
1221 return;
1223 if (garbage == NULL) {
1224 garbage = PyList_New(0);
1225 if (garbage == NULL)
1226 return;
1228 Py_INCREF(garbage);
1229 if (PyModule_AddObject(m, "garbage", garbage) < 0)
1230 return;
1232 /* Importing can't be done in collect() because collect()
1233 * can be called via PyGC_Collect() in Py_Finalize().
1234 * This wouldn't be a problem, except that <initialized> is
1235 * reset to 0 before calling collect which trips up
1236 * the import and triggers an assertion.
1238 if (tmod == NULL) {
1239 tmod = PyImport_ImportModuleNoBlock("time");
1240 if (tmod == NULL)
1241 PyErr_Clear();
1244 #define ADD_INT(NAME) if (PyModule_AddIntConstant(m, #NAME, NAME) < 0) return
1245 ADD_INT(DEBUG_STATS);
1246 ADD_INT(DEBUG_COLLECTABLE);
1247 ADD_INT(DEBUG_UNCOLLECTABLE);
1248 ADD_INT(DEBUG_INSTANCES);
1249 ADD_INT(DEBUG_OBJECTS);
1250 ADD_INT(DEBUG_SAVEALL);
1251 ADD_INT(DEBUG_LEAK);
1252 #undef ADD_INT
1255 /* API to invoke gc.collect() from C */
1256 Py_ssize_t
1257 PyGC_Collect(void)
1259 Py_ssize_t n;
1261 if (collecting)
1262 n = 0; /* already collecting, don't do anything */
1263 else {
1264 collecting = 1;
1265 n = collect(NUM_GENERATIONS - 1);
1266 collecting = 0;
1269 return n;
1272 /* for debugging */
1273 void
1274 _PyGC_Dump(PyGC_Head *g)
1276 _PyObject_Dump(FROM_GC(g));
1279 /* extension modules might be compiled with GC support so these
1280 functions must always be available */
1282 #undef PyObject_GC_Track
1283 #undef PyObject_GC_UnTrack
1284 #undef PyObject_GC_Del
1285 #undef _PyObject_GC_Malloc
1287 void
1288 PyObject_GC_Track(void *op)
1290 _PyObject_GC_TRACK(op);
1293 /* for binary compatibility with 2.2 */
1294 void
1295 _PyObject_GC_Track(PyObject *op)
1297 PyObject_GC_Track(op);
1300 void
1301 PyObject_GC_UnTrack(void *op)
1303 /* Obscure: the Py_TRASHCAN mechanism requires that we be able to
1304 * call PyObject_GC_UnTrack twice on an object.
1306 if (IS_TRACKED(op))
1307 _PyObject_GC_UNTRACK(op);
1310 /* for binary compatibility with 2.2 */
1311 void
1312 _PyObject_GC_UnTrack(PyObject *op)
1314 PyObject_GC_UnTrack(op);
1317 PyObject *
1318 _PyObject_GC_Malloc(size_t basicsize)
1320 PyObject *op;
1321 PyGC_Head *g = (PyGC_Head *)PyObject_MALLOC(
1322 sizeof(PyGC_Head) + basicsize);
1323 if (g == NULL)
1324 return PyErr_NoMemory();
1325 g->gc.gc_refs = GC_UNTRACKED;
1326 generations[0].count++; /* number of allocated GC objects */
1327 if (generations[0].count > generations[0].threshold &&
1328 enabled &&
1329 generations[0].threshold &&
1330 !collecting &&
1331 !PyErr_Occurred()) {
1332 collecting = 1;
1333 collect_generations();
1334 collecting = 0;
1336 op = FROM_GC(g);
1337 return op;
1340 PyObject *
1341 _PyObject_GC_New(PyTypeObject *tp)
1343 PyObject *op = _PyObject_GC_Malloc(_PyObject_SIZE(tp));
1344 if (op != NULL)
1345 op = PyObject_INIT(op, tp);
1346 return op;
1349 PyVarObject *
1350 _PyObject_GC_NewVar(PyTypeObject *tp, Py_ssize_t nitems)
1352 const size_t size = _PyObject_VAR_SIZE(tp, nitems);
1353 PyVarObject *op = (PyVarObject *) _PyObject_GC_Malloc(size);
1354 if (op != NULL)
1355 op = PyObject_INIT_VAR(op, tp, nitems);
1356 return op;
1359 PyVarObject *
1360 _PyObject_GC_Resize(PyVarObject *op, Py_ssize_t nitems)
1362 const size_t basicsize = _PyObject_VAR_SIZE(Py_TYPE(op), nitems);
1363 PyGC_Head *g = AS_GC(op);
1364 g = (PyGC_Head *)PyObject_REALLOC(g, sizeof(PyGC_Head) + basicsize);
1365 if (g == NULL)
1366 return (PyVarObject *)PyErr_NoMemory();
1367 op = (PyVarObject *) FROM_GC(g);
1368 Py_SIZE(op) = nitems;
1369 return op;
1372 void
1373 PyObject_GC_Del(void *op)
1375 PyGC_Head *g = AS_GC(op);
1376 if (IS_TRACKED(op))
1377 gc_list_remove(g);
1378 if (generations[0].count > 0) {
1379 generations[0].count--;
1381 PyObject_FREE(g);
1384 /* for binary compatibility with 2.2 */
1385 #undef _PyObject_GC_Del
1386 void
1387 _PyObject_GC_Del(PyObject *op)
1389 PyObject_GC_Del(op);