3 Reference Cycle Garbage Collection
4 ==================================
6 Neil Schemenauer <nas@arctrix.com>
8 Based on a post on the python-dev list. Ideas from Guido van Rossum,
9 Eric Tiedemann, and various others.
11 http://www.arctrix.com/nas/python/gc/
12 http://www.python.org/pipermail/python-dev/2000-March/003869.html
13 http://www.python.org/pipermail/python-dev/2000-March/004010.html
14 http://www.python.org/pipermail/python-dev/2000-March/004022.html
16 For a highlevel view of the collection process, read the collect
22 #include "frameobject.h" /* for PyFrame_ClearFreeList */
24 /* Get an object's GC head */
25 #define AS_GC(o) ((PyGC_Head *)(o)-1)
27 /* Get the object given the GC head */
28 #define FROM_GC(g) ((PyObject *)(((PyGC_Head *)g)+1))
30 /*** Global GC state ***/
32 struct gc_generation
{
34 int threshold
; /* collection threshold */
35 int count
; /* count of allocations or collections of younger
39 #define NUM_GENERATIONS 3
40 #define GEN_HEAD(n) (&generations[n].head)
42 /* linked lists of container objects */
43 static struct gc_generation generations
[NUM_GENERATIONS
] = {
44 /* PyGC_Head, threshold, count */
45 {{{GEN_HEAD(0), GEN_HEAD(0), 0}}, 700, 0},
46 {{{GEN_HEAD(1), GEN_HEAD(1), 0}}, 10, 0},
47 {{{GEN_HEAD(2), GEN_HEAD(2), 0}}, 10, 0},
50 PyGC_Head
*_PyGC_generation0
= GEN_HEAD(0);
52 static int enabled
= 1; /* automatic collection enabled? */
54 /* true if we are currently running the collector */
55 static int collecting
= 0;
57 /* list of uncollectable objects */
58 static PyObject
*garbage
= NULL
;
60 /* Python string to use if unhandled exception occurs */
61 static PyObject
*gc_str
= NULL
;
63 /* Python string used to look for __del__ attribute. */
64 static PyObject
*delstr
= NULL
;
66 /* This is the number of objects who survived the last full collection. It
67 approximates the number of long lived objects tracked by the GC.
69 (by "full collection", we mean a collection of the oldest generation).
71 static Py_ssize_t long_lived_total
= 0;
73 /* This is the number of objects who survived all "non-full" collections,
74 and are awaiting to undergo a full collection for the first time.
77 static Py_ssize_t long_lived_pending
= 0;
80 NOTE: about the counting of long-lived objects.
82 To limit the cost of garbage collection, there are two strategies;
83 - make each collection faster, e.g. by scanning fewer objects
85 This heuristic is about the latter strategy.
87 In addition to the various configurable thresholds, we only trigger a
88 full collection if the ratio
89 long_lived_pending / long_lived_total
90 is above a given value (hardwired to 25%).
92 The reason is that, while "non-full" collections (i.e., collections of
93 the young and middle generations) will always examine roughly the same
94 number of objects -- determined by the aforementioned thresholds --,
95 the cost of a full collection is proportional to the total number of
96 long-lived objects, which is virtually unbounded.
98 Indeed, it has been remarked that doing a full collection every
99 <constant number> of object creations entails a dramatic performance
100 degradation in workloads which consist in creating and storing lots of
101 long-lived objects (e.g. building a large list of GC-tracked objects would
102 show quadratic performance, instead of linear as expected: see issue #4074).
104 Using the above ratio, instead, yields amortized linear performance in
105 the total number of objects (the effect of which can be summarized
106 thusly: "each full garbage collection is more and more costly as the
107 number of objects grows, but we do fewer and fewer of them").
109 This heuristic was suggested by Martin von Löwis on python-dev in
110 June 2008. His original analysis and proposal can be found at:
111 http://mail.python.org/pipermail/python-dev/2008-June/080579.html
115 /* set for debugging information */
116 #define DEBUG_STATS (1<<0) /* print collection statistics */
117 #define DEBUG_COLLECTABLE (1<<1) /* print collectable objects */
118 #define DEBUG_UNCOLLECTABLE (1<<2) /* print uncollectable objects */
119 #define DEBUG_INSTANCES (1<<3) /* print instances */
120 #define DEBUG_OBJECTS (1<<4) /* print other objects */
121 #define DEBUG_SAVEALL (1<<5) /* save all garbage in gc.garbage */
122 #define DEBUG_LEAK DEBUG_COLLECTABLE | \
123 DEBUG_UNCOLLECTABLE | \
128 static PyObject
*tmod
= NULL
;
130 /*--------------------------------------------------------------------------
133 Between collections, every gc'ed object has one of two gc_refs values:
136 The initial state; objects returned by PyObject_GC_Malloc are in this
137 state. The object doesn't live in any generation list, and its
138 tp_traverse slot must not be called.
141 The object lives in some generation list, and its tp_traverse is safe to
142 call. An object transitions to GC_REACHABLE when PyObject_GC_Track
145 During a collection, gc_refs can temporarily take on other states:
148 At the start of a collection, update_refs() copies the true refcount
149 to gc_refs, for each object in the generation being collected.
150 subtract_refs() then adjusts gc_refs so that it equals the number of
151 times an object is referenced directly from outside the generation
153 gc_refs remains >= 0 throughout these steps.
155 GC_TENTATIVELY_UNREACHABLE
156 move_unreachable() then moves objects not reachable (whether directly or
157 indirectly) from outside the generation into an "unreachable" set.
158 Objects that are found to be reachable have gc_refs set to GC_REACHABLE
159 again. Objects that are found to be unreachable have gc_refs set to
160 GC_TENTATIVELY_UNREACHABLE. It's "tentatively" because the pass doing
161 this can't be sure until it ends, and GC_TENTATIVELY_UNREACHABLE may
162 transition back to GC_REACHABLE.
164 Only objects with GC_TENTATIVELY_UNREACHABLE still set are candidates
165 for collection. If it's decided not to collect such an object (e.g.,
166 it has a __del__ method), its gc_refs is restored to GC_REACHABLE again.
167 ----------------------------------------------------------------------------
169 #define GC_UNTRACKED _PyGC_REFS_UNTRACKED
170 #define GC_REACHABLE _PyGC_REFS_REACHABLE
171 #define GC_TENTATIVELY_UNREACHABLE _PyGC_REFS_TENTATIVELY_UNREACHABLE
173 #define IS_TRACKED(o) ((AS_GC(o))->gc.gc_refs != GC_UNTRACKED)
174 #define IS_REACHABLE(o) ((AS_GC(o))->gc.gc_refs == GC_REACHABLE)
175 #define IS_TENTATIVELY_UNREACHABLE(o) ( \
176 (AS_GC(o))->gc.gc_refs == GC_TENTATIVELY_UNREACHABLE)
178 /*** list functions ***/
181 gc_list_init(PyGC_Head
*list
)
183 list
->gc
.gc_prev
= list
;
184 list
->gc
.gc_next
= list
;
188 gc_list_is_empty(PyGC_Head
*list
)
190 return (list
->gc
.gc_next
== list
);
194 /* This became unused after gc_list_move() was introduced. */
195 /* Append `node` to `list`. */
197 gc_list_append(PyGC_Head
*node
, PyGC_Head
*list
)
199 node
->gc
.gc_next
= list
;
200 node
->gc
.gc_prev
= list
->gc
.gc_prev
;
201 node
->gc
.gc_prev
->gc
.gc_next
= node
;
202 list
->gc
.gc_prev
= node
;
206 /* Remove `node` from the gc list it's currently in. */
208 gc_list_remove(PyGC_Head
*node
)
210 node
->gc
.gc_prev
->gc
.gc_next
= node
->gc
.gc_next
;
211 node
->gc
.gc_next
->gc
.gc_prev
= node
->gc
.gc_prev
;
212 node
->gc
.gc_next
= NULL
; /* object is not currently tracked */
215 /* Move `node` from the gc list it's currently in (which is not explicitly
216 * named here) to the end of `list`. This is semantically the same as
217 * gc_list_remove(node) followed by gc_list_append(node, list).
220 gc_list_move(PyGC_Head
*node
, PyGC_Head
*list
)
223 PyGC_Head
*current_prev
= node
->gc
.gc_prev
;
224 PyGC_Head
*current_next
= node
->gc
.gc_next
;
225 /* Unlink from current list. */
226 current_prev
->gc
.gc_next
= current_next
;
227 current_next
->gc
.gc_prev
= current_prev
;
228 /* Relink at end of new list. */
229 new_prev
= node
->gc
.gc_prev
= list
->gc
.gc_prev
;
230 new_prev
->gc
.gc_next
= list
->gc
.gc_prev
= node
;
231 node
->gc
.gc_next
= list
;
234 /* append list `from` onto list `to`; `from` becomes an empty list */
236 gc_list_merge(PyGC_Head
*from
, PyGC_Head
*to
)
240 if (!gc_list_is_empty(from
)) {
241 tail
= to
->gc
.gc_prev
;
242 tail
->gc
.gc_next
= from
->gc
.gc_next
;
243 tail
->gc
.gc_next
->gc
.gc_prev
= tail
;
244 to
->gc
.gc_prev
= from
->gc
.gc_prev
;
245 to
->gc
.gc_prev
->gc
.gc_next
= to
;
251 gc_list_size(PyGC_Head
*list
)
255 for (gc
= list
->gc
.gc_next
; gc
!= list
; gc
= gc
->gc
.gc_next
) {
261 /* Append objects in a GC list to a Python list.
262 * Return 0 if all OK, < 0 if error (out of memory for list).
265 append_objects(PyObject
*py_list
, PyGC_Head
*gc_list
)
268 for (gc
= gc_list
->gc
.gc_next
; gc
!= gc_list
; gc
= gc
->gc
.gc_next
) {
269 PyObject
*op
= FROM_GC(gc
);
271 if (PyList_Append(py_list
, op
)) {
272 return -1; /* exception */
279 /*** end of list stuff ***/
282 /* Set all gc_refs = ob_refcnt. After this, gc_refs is > 0 for all objects
283 * in containers, and is GC_REACHABLE for all tracked gc objects not in
287 update_refs(PyGC_Head
*containers
)
289 PyGC_Head
*gc
= containers
->gc
.gc_next
;
290 for (; gc
!= containers
; gc
= gc
->gc
.gc_next
) {
291 assert(gc
->gc
.gc_refs
== GC_REACHABLE
);
292 gc
->gc
.gc_refs
= Py_REFCNT(FROM_GC(gc
));
293 /* Python's cyclic gc should never see an incoming refcount
294 * of 0: if something decref'ed to 0, it should have been
295 * deallocated immediately at that time.
296 * Possible cause (if the assert triggers): a tp_dealloc
297 * routine left a gc-aware object tracked during its teardown
298 * phase, and did something-- or allowed something to happen --
299 * that called back into Python. gc can trigger then, and may
300 * see the still-tracked dying object. Before this assert
301 * was added, such mistakes went on to allow gc to try to
302 * delete the object again. In a debug build, that caused
303 * a mysterious segfault, when _Py_ForgetReference tried
304 * to remove the object from the doubly-linked list of all
305 * objects a second time. In a release build, an actual
306 * double deallocation occurred, which leads to corruption
307 * of the allocator's internal bookkeeping pointers. That's
308 * so serious that maybe this should be a release-build
309 * check instead of an assert?
311 assert(gc
->gc
.gc_refs
!= 0);
315 /* A traversal callback for subtract_refs. */
317 visit_decref(PyObject
*op
, void *data
)
320 if (PyObject_IS_GC(op
)) {
321 PyGC_Head
*gc
= AS_GC(op
);
322 /* We're only interested in gc_refs for objects in the
323 * generation being collected, which can be recognized
324 * because only they have positive gc_refs.
326 assert(gc
->gc
.gc_refs
!= 0); /* else refcount was too small */
327 if (gc
->gc
.gc_refs
> 0)
333 /* Subtract internal references from gc_refs. After this, gc_refs is >= 0
334 * for all objects in containers, and is GC_REACHABLE for all tracked gc
335 * objects not in containers. The ones with gc_refs > 0 are directly
336 * reachable from outside containers, and so can't be collected.
339 subtract_refs(PyGC_Head
*containers
)
341 traverseproc traverse
;
342 PyGC_Head
*gc
= containers
->gc
.gc_next
;
343 for (; gc
!= containers
; gc
=gc
->gc
.gc_next
) {
344 traverse
= Py_TYPE(FROM_GC(gc
))->tp_traverse
;
345 (void) traverse(FROM_GC(gc
),
346 (visitproc
)visit_decref
,
351 /* A traversal callback for move_unreachable. */
353 visit_reachable(PyObject
*op
, PyGC_Head
*reachable
)
355 if (PyObject_IS_GC(op
)) {
356 PyGC_Head
*gc
= AS_GC(op
);
357 const Py_ssize_t gc_refs
= gc
->gc
.gc_refs
;
360 /* This is in move_unreachable's 'young' list, but
361 * the traversal hasn't yet gotten to it. All
362 * we need to do is tell move_unreachable that it's
367 else if (gc_refs
== GC_TENTATIVELY_UNREACHABLE
) {
368 /* This had gc_refs = 0 when move_unreachable got
369 * to it, but turns out it's reachable after all.
370 * Move it back to move_unreachable's 'young' list,
371 * and move_unreachable will eventually get to it
374 gc_list_move(gc
, reachable
);
377 /* Else there's nothing to do.
378 * If gc_refs > 0, it must be in move_unreachable's 'young'
379 * list, and move_unreachable will eventually get to it.
380 * If gc_refs == GC_REACHABLE, it's either in some other
381 * generation so we don't care about it, or move_unreachable
382 * already dealt with it.
383 * If gc_refs == GC_UNTRACKED, it must be ignored.
387 || gc_refs
== GC_REACHABLE
388 || gc_refs
== GC_UNTRACKED
);
394 /* Move the unreachable objects from young to unreachable. After this,
395 * all objects in young have gc_refs = GC_REACHABLE, and all objects in
396 * unreachable have gc_refs = GC_TENTATIVELY_UNREACHABLE. All tracked
397 * gc objects not in young or unreachable still have gc_refs = GC_REACHABLE.
398 * All objects in young after this are directly or indirectly reachable
399 * from outside the original young; and all objects in unreachable are
403 move_unreachable(PyGC_Head
*young
, PyGC_Head
*unreachable
)
405 PyGC_Head
*gc
= young
->gc
.gc_next
;
407 /* Invariants: all objects "to the left" of us in young have gc_refs
408 * = GC_REACHABLE, and are indeed reachable (directly or indirectly)
409 * from outside the young list as it was at entry. All other objects
410 * from the original young "to the left" of us are in unreachable now,
411 * and have gc_refs = GC_TENTATIVELY_UNREACHABLE. All objects to the
412 * left of us in 'young' now have been scanned, and no objects here
413 * or to the right have been scanned yet.
416 while (gc
!= young
) {
419 if (gc
->gc
.gc_refs
) {
420 /* gc is definitely reachable from outside the
421 * original 'young'. Mark it as such, and traverse
422 * its pointers to find any other objects that may
423 * be directly reachable from it. Note that the
424 * call to tp_traverse may append objects to young,
425 * so we have to wait until it returns to determine
426 * the next object to visit.
428 PyObject
*op
= FROM_GC(gc
);
429 traverseproc traverse
= Py_TYPE(op
)->tp_traverse
;
430 assert(gc
->gc
.gc_refs
> 0);
431 gc
->gc
.gc_refs
= GC_REACHABLE
;
433 (visitproc
)visit_reachable
,
435 next
= gc
->gc
.gc_next
;
436 if (PyTuple_CheckExact(op
)) {
437 _PyTuple_MaybeUntrack(op
);
439 else if (PyDict_CheckExact(op
)) {
440 _PyDict_MaybeUntrack(op
);
444 /* This *may* be unreachable. To make progress,
445 * assume it is. gc isn't directly reachable from
446 * any object we've already traversed, but may be
447 * reachable from an object we haven't gotten to yet.
448 * visit_reachable will eventually move gc back into
449 * young if that's so, and we'll see it again.
451 next
= gc
->gc
.gc_next
;
452 gc_list_move(gc
, unreachable
);
453 gc
->gc
.gc_refs
= GC_TENTATIVELY_UNREACHABLE
;
459 /* Return true if object has a finalization method.
460 * CAUTION: An instance of an old-style class has to be checked for a
461 *__del__ method, and earlier versions of this used to call PyObject_HasAttr,
462 * which in turn could call the class's __getattr__ hook (if any). That
463 * could invoke arbitrary Python code, mutating the object graph in arbitrary
464 * ways, and that was the source of some excruciatingly subtle bugs.
467 has_finalizer(PyObject
*op
)
469 if (PyInstance_Check(op
)) {
470 assert(delstr
!= NULL
);
471 return _PyInstance_Lookup(op
, delstr
) != NULL
;
473 else if (PyType_HasFeature(op
->ob_type
, Py_TPFLAGS_HEAPTYPE
))
474 return op
->ob_type
->tp_del
!= NULL
;
475 else if (PyGen_CheckExact(op
))
476 return PyGen_NeedsFinalizing((PyGenObject
*)op
);
481 /* Move the objects in unreachable with __del__ methods into `finalizers`.
482 * Objects moved into `finalizers` have gc_refs set to GC_REACHABLE; the
483 * objects remaining in unreachable are left at GC_TENTATIVELY_UNREACHABLE.
486 move_finalizers(PyGC_Head
*unreachable
, PyGC_Head
*finalizers
)
491 /* March over unreachable. Move objects with finalizers into
494 for (gc
= unreachable
->gc
.gc_next
; gc
!= unreachable
; gc
= next
) {
495 PyObject
*op
= FROM_GC(gc
);
497 assert(IS_TENTATIVELY_UNREACHABLE(op
));
498 next
= gc
->gc
.gc_next
;
500 if (has_finalizer(op
)) {
501 gc_list_move(gc
, finalizers
);
502 gc
->gc
.gc_refs
= GC_REACHABLE
;
507 /* A traversal callback for move_finalizer_reachable. */
509 visit_move(PyObject
*op
, PyGC_Head
*tolist
)
511 if (PyObject_IS_GC(op
)) {
512 if (IS_TENTATIVELY_UNREACHABLE(op
)) {
513 PyGC_Head
*gc
= AS_GC(op
);
514 gc_list_move(gc
, tolist
);
515 gc
->gc
.gc_refs
= GC_REACHABLE
;
521 /* Move objects that are reachable from finalizers, from the unreachable set
522 * into finalizers set.
525 move_finalizer_reachable(PyGC_Head
*finalizers
)
527 traverseproc traverse
;
528 PyGC_Head
*gc
= finalizers
->gc
.gc_next
;
529 for (; gc
!= finalizers
; gc
= gc
->gc
.gc_next
) {
530 /* Note that the finalizers list may grow during this. */
531 traverse
= Py_TYPE(FROM_GC(gc
))->tp_traverse
;
532 (void) traverse(FROM_GC(gc
),
533 (visitproc
)visit_move
,
538 /* Clear all weakrefs to unreachable objects, and if such a weakref has a
539 * callback, invoke it if necessary. Note that it's possible for such
540 * weakrefs to be outside the unreachable set -- indeed, those are precisely
541 * the weakrefs whose callbacks must be invoked. See gc_weakref.txt for
542 * overview & some details. Some weakrefs with callbacks may be reclaimed
543 * directly by this routine; the number reclaimed is the return value. Other
544 * weakrefs with callbacks may be moved into the `old` generation. Objects
545 * moved into `old` have gc_refs set to GC_REACHABLE; the objects remaining in
546 * unreachable are left at GC_TENTATIVELY_UNREACHABLE. When this returns,
547 * no object in `unreachable` is weakly referenced anymore.
550 handle_weakrefs(PyGC_Head
*unreachable
, PyGC_Head
*old
)
553 PyObject
*op
; /* generally FROM_GC(gc) */
554 PyWeakReference
*wr
; /* generally a cast of op */
555 PyGC_Head wrcb_to_call
; /* weakrefs with callbacks to call */
559 gc_list_init(&wrcb_to_call
);
561 /* Clear all weakrefs to the objects in unreachable. If such a weakref
562 * also has a callback, move it into `wrcb_to_call` if the callback
563 * needs to be invoked. Note that we cannot invoke any callbacks until
564 * all weakrefs to unreachable objects are cleared, lest the callback
565 * resurrect an unreachable object via a still-active weakref. We
566 * make another pass over wrcb_to_call, invoking callbacks, after this
569 for (gc
= unreachable
->gc
.gc_next
; gc
!= unreachable
; gc
= next
) {
570 PyWeakReference
**wrlist
;
573 assert(IS_TENTATIVELY_UNREACHABLE(op
));
574 next
= gc
->gc
.gc_next
;
576 if (! PyType_SUPPORTS_WEAKREFS(Py_TYPE(op
)))
579 /* It supports weakrefs. Does it have any? */
580 wrlist
= (PyWeakReference
**)
581 PyObject_GET_WEAKREFS_LISTPTR(op
);
583 /* `op` may have some weakrefs. March over the list, clear
584 * all the weakrefs, and move the weakrefs with callbacks
585 * that must be called into wrcb_to_call.
587 for (wr
= *wrlist
; wr
!= NULL
; wr
= *wrlist
) {
588 PyGC_Head
*wrasgc
; /* AS_GC(wr) */
590 /* _PyWeakref_ClearRef clears the weakref but leaves
591 * the callback pointer intact. Obscure: it also
594 assert(wr
->wr_object
== op
);
595 _PyWeakref_ClearRef(wr
);
596 assert(wr
->wr_object
== Py_None
);
597 if (wr
->wr_callback
== NULL
)
598 continue; /* no callback */
600 /* Headache time. `op` is going away, and is weakly referenced by
601 * `wr`, which has a callback. Should the callback be invoked? If wr
604 * 1. There's no need to call it. The object and the weakref are
605 * both going away, so it's legitimate to pretend the weakref is
606 * going away first. The user has to ensure a weakref outlives its
607 * referent if they want a guarantee that the wr callback will get
610 * 2. It may be catastrophic to call it. If the callback is also in
611 * cyclic trash (CT), then although the CT is unreachable from
612 * outside the current generation, CT may be reachable from the
613 * callback. Then the callback could resurrect insane objects.
615 * Since the callback is never needed and may be unsafe in this case,
616 * wr is simply left in the unreachable set. Note that because we
617 * already called _PyWeakref_ClearRef(wr), its callback will never
620 * OTOH, if wr isn't part of CT, we should invoke the callback: the
621 * weakref outlived the trash. Note that since wr isn't CT in this
622 * case, its callback can't be CT either -- wr acted as an external
623 * root to this generation, and therefore its callback did too. So
624 * nothing in CT is reachable from the callback either, so it's hard
625 * to imagine how calling it later could create a problem for us. wr
626 * is moved to wrcb_to_call in this case.
628 if (IS_TENTATIVELY_UNREACHABLE(wr
))
630 assert(IS_REACHABLE(wr
));
632 /* Create a new reference so that wr can't go away
633 * before we can process it again.
637 /* Move wr to wrcb_to_call, for the next pass. */
639 assert(wrasgc
!= next
); /* wrasgc is reachable, but
640 next isn't, so they can't
642 gc_list_move(wrasgc
, &wrcb_to_call
);
646 /* Invoke the callbacks we decided to honor. It's safe to invoke them
647 * because they can't reference unreachable objects.
649 while (! gc_list_is_empty(&wrcb_to_call
)) {
653 gc
= wrcb_to_call
.gc
.gc_next
;
655 assert(IS_REACHABLE(op
));
656 assert(PyWeakref_Check(op
));
657 wr
= (PyWeakReference
*)op
;
658 callback
= wr
->wr_callback
;
659 assert(callback
!= NULL
);
661 /* copy-paste of weakrefobject.c's handle_callback() */
662 temp
= PyObject_CallFunctionObjArgs(callback
, wr
, NULL
);
664 PyErr_WriteUnraisable(callback
);
668 /* Give up the reference we created in the first pass. When
669 * op's refcount hits 0 (which it may or may not do right now),
670 * op's tp_dealloc will decref op->wr_callback too. Note
671 * that the refcount probably will hit 0 now, and because this
672 * weakref was reachable to begin with, gc didn't already
673 * add it to its count of freed objects. Example: a reachable
674 * weak value dict maps some key to this reachable weakref.
675 * The callback removes this key->weakref mapping from the
676 * dict, leaving no other references to the weakref (excepting
680 if (wrcb_to_call
.gc
.gc_next
== gc
) {
681 /* object is still alive -- move it */
682 gc_list_move(gc
, old
);
692 debug_instance(char *msg
, PyInstanceObject
*inst
)
695 /* simple version of instance_repr */
696 PyObject
*classname
= inst
->in_class
->cl_name
;
697 if (classname
!= NULL
&& PyString_Check(classname
))
698 cname
= PyString_AsString(classname
);
701 PySys_WriteStderr("gc: %.100s <%.100s instance at %p>\n",
706 debug_cycle(char *msg
, PyObject
*op
)
708 if ((debug
& DEBUG_INSTANCES
) && PyInstance_Check(op
)) {
709 debug_instance(msg
, (PyInstanceObject
*)op
);
711 else if (debug
& DEBUG_OBJECTS
) {
712 PySys_WriteStderr("gc: %.100s <%.100s %p>\n",
713 msg
, Py_TYPE(op
)->tp_name
, op
);
717 /* Handle uncollectable garbage (cycles with finalizers, and stuff reachable
718 * only from such cycles).
719 * If DEBUG_SAVEALL, all objects in finalizers are appended to the module
720 * garbage list (a Python list), else only the objects in finalizers with
721 * __del__ methods are appended to garbage. All objects in finalizers are
722 * merged into the old list regardless.
723 * Returns 0 if all OK, <0 on error (out of memory to grow the garbage list).
724 * The finalizers list is made empty on a successful return.
727 handle_finalizers(PyGC_Head
*finalizers
, PyGC_Head
*old
)
729 PyGC_Head
*gc
= finalizers
->gc
.gc_next
;
731 if (garbage
== NULL
) {
732 garbage
= PyList_New(0);
734 Py_FatalError("gc couldn't create gc.garbage list");
736 for (; gc
!= finalizers
; gc
= gc
->gc
.gc_next
) {
737 PyObject
*op
= FROM_GC(gc
);
739 if ((debug
& DEBUG_SAVEALL
) || has_finalizer(op
)) {
740 if (PyList_Append(garbage
, op
) < 0)
745 gc_list_merge(finalizers
, old
);
749 /* Break reference cycles by clearing the containers involved. This is
750 * tricky business as the lists can be changing and we don't know which
751 * objects may be freed. It is possible I screwed something up here.
754 delete_garbage(PyGC_Head
*collectable
, PyGC_Head
*old
)
758 while (!gc_list_is_empty(collectable
)) {
759 PyGC_Head
*gc
= collectable
->gc
.gc_next
;
760 PyObject
*op
= FROM_GC(gc
);
762 assert(IS_TENTATIVELY_UNREACHABLE(op
));
763 if (debug
& DEBUG_SAVEALL
) {
764 PyList_Append(garbage
, op
);
767 if ((clear
= Py_TYPE(op
)->tp_clear
) != NULL
) {
773 if (collectable
->gc
.gc_next
== gc
) {
774 /* object is still alive, move it, it may die later */
775 gc_list_move(gc
, old
);
776 gc
->gc
.gc_refs
= GC_REACHABLE
;
781 /* Clear all free lists
782 * All free lists are cleared during the collection of the highest generation.
783 * Allocated items in the free list may keep a pymalloc arena occupied.
784 * Clearing the free lists may give back memory to the OS earlier.
787 clear_freelists(void)
789 (void)PyMethod_ClearFreeList();
790 (void)PyFrame_ClearFreeList();
791 (void)PyCFunction_ClearFreeList();
792 (void)PyTuple_ClearFreeList();
793 #ifdef Py_USING_UNICODE
794 (void)PyUnicode_ClearFreeList();
796 (void)PyInt_ClearFreeList();
797 (void)PyFloat_ClearFreeList();
805 PyObject
*f
= PyObject_CallMethod(tmod
, "time", NULL
);
810 if (PyFloat_Check(f
))
811 result
= PyFloat_AsDouble(f
);
818 /* This is the main function. Read this to understand how the
819 * collection process works. */
821 collect(int generation
)
824 Py_ssize_t m
= 0; /* # objects collected */
825 Py_ssize_t n
= 0; /* # unreachable objects that couldn't be collected */
826 PyGC_Head
*young
; /* the generation we are examining */
827 PyGC_Head
*old
; /* next older generation */
828 PyGC_Head unreachable
; /* non-problematic unreachable trash */
829 PyGC_Head finalizers
; /* objects with, & reachable from, __del__ */
833 if (delstr
== NULL
) {
834 delstr
= PyString_InternFromString("__del__");
836 Py_FatalError("gc couldn't allocate \"__del__\"");
839 if (debug
& DEBUG_STATS
) {
841 PySys_WriteStderr("gc: collecting generation %d...\n",
843 PySys_WriteStderr("gc: objects in each generation:");
844 for (i
= 0; i
< NUM_GENERATIONS
; i
++)
845 PySys_WriteStderr(" %" PY_FORMAT_SIZE_T
"d",
846 gc_list_size(GEN_HEAD(i
)));
847 PySys_WriteStderr("\n");
850 /* update collection and allocation counters */
851 if (generation
+1 < NUM_GENERATIONS
)
852 generations
[generation
+1].count
+= 1;
853 for (i
= 0; i
<= generation
; i
++)
854 generations
[i
].count
= 0;
856 /* merge younger generations with one we are currently collecting */
857 for (i
= 0; i
< generation
; i
++) {
858 gc_list_merge(GEN_HEAD(i
), GEN_HEAD(generation
));
861 /* handy references */
862 young
= GEN_HEAD(generation
);
863 if (generation
< NUM_GENERATIONS
-1)
864 old
= GEN_HEAD(generation
+1);
868 /* Using ob_refcnt and gc_refs, calculate which objects in the
869 * container set are reachable from outside the set (i.e., have a
870 * refcount greater than 0 when all the references within the
871 * set are taken into account).
874 subtract_refs(young
);
876 /* Leave everything reachable from outside young in young, and move
877 * everything else (in young) to unreachable.
878 * NOTE: This used to move the reachable objects into a reachable
879 * set instead. But most things usually turn out to be reachable,
880 * so it's more efficient to move the unreachable things.
882 gc_list_init(&unreachable
);
883 move_unreachable(young
, &unreachable
);
885 /* Move reachable objects to next generation. */
887 if (generation
== NUM_GENERATIONS
- 2) {
888 long_lived_pending
+= gc_list_size(young
);
890 gc_list_merge(young
, old
);
893 long_lived_pending
= 0;
894 long_lived_total
= gc_list_size(young
);
897 /* All objects in unreachable are trash, but objects reachable from
898 * finalizers can't safely be deleted. Python programmers should take
899 * care not to create such things. For Python, finalizers means
900 * instance objects with __del__ methods. Weakrefs with callbacks
901 * can also call arbitrary Python code but they will be dealt with by
904 gc_list_init(&finalizers
);
905 move_finalizers(&unreachable
, &finalizers
);
906 /* finalizers contains the unreachable objects with a finalizer;
907 * unreachable objects reachable *from* those are also uncollectable,
908 * and we move those into the finalizers list too.
910 move_finalizer_reachable(&finalizers
);
912 /* Collect statistics on collectable objects found and print
913 * debugging information.
915 for (gc
= unreachable
.gc
.gc_next
; gc
!= &unreachable
;
916 gc
= gc
->gc
.gc_next
) {
918 if (debug
& DEBUG_COLLECTABLE
) {
919 debug_cycle("collectable", FROM_GC(gc
));
923 /* Clear weakrefs and invoke callbacks as necessary. */
924 m
+= handle_weakrefs(&unreachable
, old
);
926 /* Call tp_clear on objects in the unreachable set. This will cause
927 * the reference cycles to be broken. It may also cause some objects
928 * in finalizers to be freed.
930 delete_garbage(&unreachable
, old
);
932 /* Collect statistics on uncollectable objects found and print
933 * debugging information. */
934 for (gc
= finalizers
.gc
.gc_next
;
936 gc
= gc
->gc
.gc_next
) {
938 if (debug
& DEBUG_UNCOLLECTABLE
)
939 debug_cycle("uncollectable", FROM_GC(gc
));
941 if (debug
& DEBUG_STATS
) {
942 double t2
= get_time();
943 if (m
== 0 && n
== 0)
944 PySys_WriteStderr("gc: done");
948 "%" PY_FORMAT_SIZE_T
"d unreachable, "
949 "%" PY_FORMAT_SIZE_T
"d uncollectable",
952 PySys_WriteStderr(", %.4fs elapsed", t2
-t1
);
954 PySys_WriteStderr(".\n");
957 /* Append instances in the uncollectable set to a Python
958 * reachable list of garbage. The programmer has to deal with
959 * this if they insist on creating this type of structure.
961 (void)handle_finalizers(&finalizers
, old
);
963 /* Clear free list only during the collection of the highest
965 if (generation
== NUM_GENERATIONS
-1) {
969 if (PyErr_Occurred()) {
971 gc_str
= PyString_FromString("garbage collection");
972 PyErr_WriteUnraisable(gc_str
);
973 Py_FatalError("unexpected exception during garbage collection");
979 collect_generations(void)
984 /* Find the oldest generation (highest numbered) where the count
985 * exceeds the threshold. Objects in the that generation and
986 * generations younger than it will be collected. */
987 for (i
= NUM_GENERATIONS
-1; i
>= 0; i
--) {
988 if (generations
[i
].count
> generations
[i
].threshold
) {
989 /* Avoid quadratic performance degradation in number
990 of tracked objects. See comments at the beginning
991 of this file, and issue #4074.
993 if (i
== NUM_GENERATIONS
- 1
994 && long_lived_pending
< long_lived_total
/ 4)
1003 PyDoc_STRVAR(gc_enable__doc__
,
1004 "enable() -> None\n"
1006 "Enable automatic garbage collection.\n");
1009 gc_enable(PyObject
*self
, PyObject
*noargs
)
1016 PyDoc_STRVAR(gc_disable__doc__
,
1017 "disable() -> None\n"
1019 "Disable automatic garbage collection.\n");
1022 gc_disable(PyObject
*self
, PyObject
*noargs
)
1029 PyDoc_STRVAR(gc_isenabled__doc__
,
1030 "isenabled() -> status\n"
1032 "Returns true if automatic garbage collection is enabled.\n");
1035 gc_isenabled(PyObject
*self
, PyObject
*noargs
)
1037 return PyBool_FromLong((long)enabled
);
1040 PyDoc_STRVAR(gc_collect__doc__
,
1041 "collect([generation]) -> n\n"
1043 "With no arguments, run a full collection. The optional argument\n"
1044 "may be an integer specifying which generation to collect. A ValueError\n"
1045 "is raised if the generation number is invalid.\n\n"
1046 "The number of unreachable objects is returned.\n");
1049 gc_collect(PyObject
*self
, PyObject
*args
, PyObject
*kws
)
1051 static char *keywords
[] = {"generation", NULL
};
1052 int genarg
= NUM_GENERATIONS
- 1;
1055 if (!PyArg_ParseTupleAndKeywords(args
, kws
, "|i", keywords
, &genarg
))
1058 else if (genarg
< 0 || genarg
>= NUM_GENERATIONS
) {
1059 PyErr_SetString(PyExc_ValueError
, "invalid generation");
1064 n
= 0; /* already collecting, don't do anything */
1067 n
= collect(genarg
);
1071 return PyInt_FromSsize_t(n
);
1074 PyDoc_STRVAR(gc_set_debug__doc__
,
1075 "set_debug(flags) -> None\n"
1077 "Set the garbage collection debugging flags. Debugging information is\n"
1078 "written to sys.stderr.\n"
1080 "flags is an integer and can have the following bits turned on:\n"
1082 " DEBUG_STATS - Print statistics during collection.\n"
1083 " DEBUG_COLLECTABLE - Print collectable objects found.\n"
1084 " DEBUG_UNCOLLECTABLE - Print unreachable but uncollectable objects found.\n"
1085 " DEBUG_INSTANCES - Print instance objects.\n"
1086 " DEBUG_OBJECTS - Print objects other than instances.\n"
1087 " DEBUG_SAVEALL - Save objects to gc.garbage rather than freeing them.\n"
1088 " DEBUG_LEAK - Debug leaking programs (everything but STATS).\n");
1091 gc_set_debug(PyObject
*self
, PyObject
*args
)
1093 if (!PyArg_ParseTuple(args
, "i:set_debug", &debug
))
1100 PyDoc_STRVAR(gc_get_debug__doc__
,
1101 "get_debug() -> flags\n"
1103 "Get the garbage collection debugging flags.\n");
1106 gc_get_debug(PyObject
*self
, PyObject
*noargs
)
1108 return Py_BuildValue("i", debug
);
1111 PyDoc_STRVAR(gc_set_thresh__doc__
,
1112 "set_threshold(threshold0, [threshold1, threshold2]) -> None\n"
1114 "Sets the collection thresholds. Setting threshold0 to zero disables\n"
1118 gc_set_thresh(PyObject
*self
, PyObject
*args
)
1121 if (!PyArg_ParseTuple(args
, "i|ii:set_threshold",
1122 &generations
[0].threshold
,
1123 &generations
[1].threshold
,
1124 &generations
[2].threshold
))
1126 for (i
= 2; i
< NUM_GENERATIONS
; i
++) {
1127 /* generations higher than 2 get the same threshold */
1128 generations
[i
].threshold
= generations
[2].threshold
;
1135 PyDoc_STRVAR(gc_get_thresh__doc__
,
1136 "get_threshold() -> (threshold0, threshold1, threshold2)\n"
1138 "Return the current collection thresholds\n");
1141 gc_get_thresh(PyObject
*self
, PyObject
*noargs
)
1143 return Py_BuildValue("(iii)",
1144 generations
[0].threshold
,
1145 generations
[1].threshold
,
1146 generations
[2].threshold
);
1149 PyDoc_STRVAR(gc_get_count__doc__
,
1150 "get_count() -> (count0, count1, count2)\n"
1152 "Return the current collection counts\n");
1155 gc_get_count(PyObject
*self
, PyObject
*noargs
)
1157 return Py_BuildValue("(iii)",
1158 generations
[0].count
,
1159 generations
[1].count
,
1160 generations
[2].count
);
1164 referrersvisit(PyObject
* obj
, PyObject
*objs
)
1167 for (i
= 0; i
< PyTuple_GET_SIZE(objs
); i
++)
1168 if (PyTuple_GET_ITEM(objs
, i
) == obj
)
1174 gc_referrers_for(PyObject
*objs
, PyGC_Head
*list
, PyObject
*resultlist
)
1178 traverseproc traverse
;
1179 for (gc
= list
->gc
.gc_next
; gc
!= list
; gc
= gc
->gc
.gc_next
) {
1181 traverse
= Py_TYPE(obj
)->tp_traverse
;
1182 if (obj
== objs
|| obj
== resultlist
)
1184 if (traverse(obj
, (visitproc
)referrersvisit
, objs
)) {
1185 if (PyList_Append(resultlist
, obj
) < 0)
1186 return 0; /* error */
1189 return 1; /* no error */
1192 PyDoc_STRVAR(gc_get_referrers__doc__
,
1193 "get_referrers(*objs) -> list\n\
1194 Return the list of objects that directly refer to any of objs.");
1197 gc_get_referrers(PyObject
*self
, PyObject
*args
)
1200 PyObject
*result
= PyList_New(0);
1201 if (!result
) return NULL
;
1203 for (i
= 0; i
< NUM_GENERATIONS
; i
++) {
1204 if (!(gc_referrers_for(args
, GEN_HEAD(i
), result
))) {
1212 /* Append obj to list; return true if error (out of memory), false if OK. */
1214 referentsvisit(PyObject
*obj
, PyObject
*list
)
1216 return PyList_Append(list
, obj
) < 0;
1219 PyDoc_STRVAR(gc_get_referents__doc__
,
1220 "get_referents(*objs) -> list\n\
1221 Return the list of objects that are directly referred to by objs.");
1224 gc_get_referents(PyObject
*self
, PyObject
*args
)
1227 PyObject
*result
= PyList_New(0);
1232 for (i
= 0; i
< PyTuple_GET_SIZE(args
); i
++) {
1233 traverseproc traverse
;
1234 PyObject
*obj
= PyTuple_GET_ITEM(args
, i
);
1236 if (! PyObject_IS_GC(obj
))
1238 traverse
= Py_TYPE(obj
)->tp_traverse
;
1241 if (traverse(obj
, (visitproc
)referentsvisit
, result
)) {
1249 PyDoc_STRVAR(gc_get_objects__doc__
,
1250 "get_objects() -> [...]\n"
1252 "Return a list of objects tracked by the collector (excluding the list\n"
1256 gc_get_objects(PyObject
*self
, PyObject
*noargs
)
1261 result
= PyList_New(0);
1264 for (i
= 0; i
< NUM_GENERATIONS
; i
++) {
1265 if (append_objects(result
, GEN_HEAD(i
))) {
1273 PyDoc_STRVAR(gc_is_tracked__doc__
,
1274 "is_tracked(obj) -> bool\n"
1276 "Returns true if the object is tracked by the garbage collector.\n"
1277 "Simple atomic objects will return false.\n"
1281 gc_is_tracked(PyObject
*self
, PyObject
*obj
)
1285 if (PyObject_IS_GC(obj
) && IS_TRACKED(obj
))
1294 PyDoc_STRVAR(gc__doc__
,
1295 "This module provides access to the garbage collector for reference cycles.\n"
1297 "enable() -- Enable automatic garbage collection.\n"
1298 "disable() -- Disable automatic garbage collection.\n"
1299 "isenabled() -- Returns true if automatic collection is enabled.\n"
1300 "collect() -- Do a full collection right now.\n"
1301 "get_count() -- Return the current collection counts.\n"
1302 "set_debug() -- Set debugging flags.\n"
1303 "get_debug() -- Get debugging flags.\n"
1304 "set_threshold() -- Set the collection thresholds.\n"
1305 "get_threshold() -- Return the current the collection thresholds.\n"
1306 "get_objects() -- Return a list of all objects tracked by the collector.\n"
1307 "is_tracked() -- Returns true if a given object is tracked.\n"
1308 "get_referrers() -- Return the list of objects that refer to an object.\n"
1309 "get_referents() -- Return the list of objects that an object refers to.\n");
1311 static PyMethodDef GcMethods
[] = {
1312 {"enable", gc_enable
, METH_NOARGS
, gc_enable__doc__
},
1313 {"disable", gc_disable
, METH_NOARGS
, gc_disable__doc__
},
1314 {"isenabled", gc_isenabled
, METH_NOARGS
, gc_isenabled__doc__
},
1315 {"set_debug", gc_set_debug
, METH_VARARGS
, gc_set_debug__doc__
},
1316 {"get_debug", gc_get_debug
, METH_NOARGS
, gc_get_debug__doc__
},
1317 {"get_count", gc_get_count
, METH_NOARGS
, gc_get_count__doc__
},
1318 {"set_threshold", gc_set_thresh
, METH_VARARGS
, gc_set_thresh__doc__
},
1319 {"get_threshold", gc_get_thresh
, METH_NOARGS
, gc_get_thresh__doc__
},
1320 {"collect", (PyCFunction
)gc_collect
,
1321 METH_VARARGS
| METH_KEYWORDS
, gc_collect__doc__
},
1322 {"get_objects", gc_get_objects
,METH_NOARGS
, gc_get_objects__doc__
},
1323 {"is_tracked", gc_is_tracked
, METH_O
, gc_is_tracked__doc__
},
1324 {"get_referrers", gc_get_referrers
, METH_VARARGS
,
1325 gc_get_referrers__doc__
},
1326 {"get_referents", gc_get_referents
, METH_VARARGS
,
1327 gc_get_referents__doc__
},
1328 {NULL
, NULL
} /* Sentinel */
1336 m
= Py_InitModule4("gc",
1340 PYTHON_API_VERSION
);
1344 if (garbage
== NULL
) {
1345 garbage
= PyList_New(0);
1346 if (garbage
== NULL
)
1350 if (PyModule_AddObject(m
, "garbage", garbage
) < 0)
1353 /* Importing can't be done in collect() because collect()
1354 * can be called via PyGC_Collect() in Py_Finalize().
1355 * This wouldn't be a problem, except that <initialized> is
1356 * reset to 0 before calling collect which trips up
1357 * the import and triggers an assertion.
1360 tmod
= PyImport_ImportModuleNoBlock("time");
1365 #define ADD_INT(NAME) if (PyModule_AddIntConstant(m, #NAME, NAME) < 0) return
1366 ADD_INT(DEBUG_STATS
);
1367 ADD_INT(DEBUG_COLLECTABLE
);
1368 ADD_INT(DEBUG_UNCOLLECTABLE
);
1369 ADD_INT(DEBUG_INSTANCES
);
1370 ADD_INT(DEBUG_OBJECTS
);
1371 ADD_INT(DEBUG_SAVEALL
);
1372 ADD_INT(DEBUG_LEAK
);
1376 /* API to invoke gc.collect() from C */
1383 n
= 0; /* already collecting, don't do anything */
1386 n
= collect(NUM_GENERATIONS
- 1);
1395 _PyGC_Dump(PyGC_Head
*g
)
1397 _PyObject_Dump(FROM_GC(g
));
1400 /* extension modules might be compiled with GC support so these
1401 functions must always be available */
1403 #undef PyObject_GC_Track
1404 #undef PyObject_GC_UnTrack
1405 #undef PyObject_GC_Del
1406 #undef _PyObject_GC_Malloc
1409 PyObject_GC_Track(void *op
)
1411 _PyObject_GC_TRACK(op
);
1414 /* for binary compatibility with 2.2 */
1416 _PyObject_GC_Track(PyObject
*op
)
1418 PyObject_GC_Track(op
);
1422 PyObject_GC_UnTrack(void *op
)
1424 /* Obscure: the Py_TRASHCAN mechanism requires that we be able to
1425 * call PyObject_GC_UnTrack twice on an object.
1428 _PyObject_GC_UNTRACK(op
);
1431 /* for binary compatibility with 2.2 */
1433 _PyObject_GC_UnTrack(PyObject
*op
)
1435 PyObject_GC_UnTrack(op
);
1439 _PyObject_GC_Malloc(size_t basicsize
)
1443 if (basicsize
> PY_SSIZE_T_MAX
- sizeof(PyGC_Head
))
1444 return PyErr_NoMemory();
1445 g
= (PyGC_Head
*)PyObject_MALLOC(
1446 sizeof(PyGC_Head
) + basicsize
);
1448 return PyErr_NoMemory();
1449 g
->gc
.gc_refs
= GC_UNTRACKED
;
1450 generations
[0].count
++; /* number of allocated GC objects */
1451 if (generations
[0].count
> generations
[0].threshold
&&
1453 generations
[0].threshold
&&
1455 !PyErr_Occurred()) {
1457 collect_generations();
1465 _PyObject_GC_New(PyTypeObject
*tp
)
1467 PyObject
*op
= _PyObject_GC_Malloc(_PyObject_SIZE(tp
));
1469 op
= PyObject_INIT(op
, tp
);
1474 _PyObject_GC_NewVar(PyTypeObject
*tp
, Py_ssize_t nitems
)
1476 const size_t size
= _PyObject_VAR_SIZE(tp
, nitems
);
1477 PyVarObject
*op
= (PyVarObject
*) _PyObject_GC_Malloc(size
);
1479 op
= PyObject_INIT_VAR(op
, tp
, nitems
);
1484 _PyObject_GC_Resize(PyVarObject
*op
, Py_ssize_t nitems
)
1486 const size_t basicsize
= _PyObject_VAR_SIZE(Py_TYPE(op
), nitems
);
1487 PyGC_Head
*g
= AS_GC(op
);
1488 if (basicsize
> PY_SSIZE_T_MAX
- sizeof(PyGC_Head
))
1489 return (PyVarObject
*)PyErr_NoMemory();
1490 g
= (PyGC_Head
*)PyObject_REALLOC(g
, sizeof(PyGC_Head
) + basicsize
);
1492 return (PyVarObject
*)PyErr_NoMemory();
1493 op
= (PyVarObject
*) FROM_GC(g
);
1494 Py_SIZE(op
) = nitems
;
1499 PyObject_GC_Del(void *op
)
1501 PyGC_Head
*g
= AS_GC(op
);
1504 if (generations
[0].count
> 0) {
1505 generations
[0].count
--;
1510 /* for binary compatibility with 2.2 */
1511 #undef _PyObject_GC_Del
1513 _PyObject_GC_Del(PyObject
*op
)
1515 PyObject_GC_Del(op
);