3 Reference Cycle Garbage Collection
4 ==================================
6 Neil Schemenauer <nas@arctrix.com>
8 Based on a post on the python-dev list. Ideas from Guido van Rossum,
9 Eric Tiedemann, and various others.
11 http://www.arctrix.com/nas/python/gc/
13 The following mailing list threads provide a historical perspective on
14 the design of this module. Note that a fair amount of refinement has
15 occurred since those discussions.
17 http://mail.python.org/pipermail/python-dev/2000-March/002385.html
18 http://mail.python.org/pipermail/python-dev/2000-March/002434.html
19 http://mail.python.org/pipermail/python-dev/2000-March/002497.html
21 For a highlevel view of the collection process, read the collect
27 #include "frameobject.h" /* for PyFrame_ClearFreeList */
29 /* Get an object's GC head */
30 #define AS_GC(o) ((PyGC_Head *)(o)-1)
32 /* Get the object given the GC head */
33 #define FROM_GC(g) ((PyObject *)(((PyGC_Head *)g)+1))
35 /*** Global GC state ***/
37 struct gc_generation
{
39 int threshold
; /* collection threshold */
40 int count
; /* count of allocations or collections of younger
44 #define NUM_GENERATIONS 3
45 #define GEN_HEAD(n) (&generations[n].head)
47 /* linked lists of container objects */
48 static struct gc_generation generations
[NUM_GENERATIONS
] = {
49 /* PyGC_Head, threshold, count */
50 {{{GEN_HEAD(0), GEN_HEAD(0), 0}}, 700, 0},
51 {{{GEN_HEAD(1), GEN_HEAD(1), 0}}, 10, 0},
52 {{{GEN_HEAD(2), GEN_HEAD(2), 0}}, 10, 0},
55 PyGC_Head
*_PyGC_generation0
= GEN_HEAD(0);
57 static int enabled
= 1; /* automatic collection enabled? */
59 /* true if we are currently running the collector */
60 static int collecting
= 0;
62 /* list of uncollectable objects */
63 static PyObject
*garbage
= NULL
;
65 /* Python string to use if unhandled exception occurs */
66 static PyObject
*gc_str
= NULL
;
68 /* Python string used to look for __del__ attribute. */
69 static PyObject
*delstr
= NULL
;
71 /* This is the number of objects who survived the last full collection. It
72 approximates the number of long lived objects tracked by the GC.
74 (by "full collection", we mean a collection of the oldest generation).
76 static Py_ssize_t long_lived_total
= 0;
78 /* This is the number of objects who survived all "non-full" collections,
79 and are awaiting to undergo a full collection for the first time.
82 static Py_ssize_t long_lived_pending
= 0;
85 NOTE: about the counting of long-lived objects.
87 To limit the cost of garbage collection, there are two strategies;
88 - make each collection faster, e.g. by scanning fewer objects
90 This heuristic is about the latter strategy.
92 In addition to the various configurable thresholds, we only trigger a
93 full collection if the ratio
94 long_lived_pending / long_lived_total
95 is above a given value (hardwired to 25%).
97 The reason is that, while "non-full" collections (i.e., collections of
98 the young and middle generations) will always examine roughly the same
99 number of objects -- determined by the aforementioned thresholds --,
100 the cost of a full collection is proportional to the total number of
101 long-lived objects, which is virtually unbounded.
103 Indeed, it has been remarked that doing a full collection every
104 <constant number> of object creations entails a dramatic performance
105 degradation in workloads which consist in creating and storing lots of
106 long-lived objects (e.g. building a large list of GC-tracked objects would
107 show quadratic performance, instead of linear as expected: see issue #4074).
109 Using the above ratio, instead, yields amortized linear performance in
110 the total number of objects (the effect of which can be summarized
111 thusly: "each full garbage collection is more and more costly as the
112 number of objects grows, but we do fewer and fewer of them").
114 This heuristic was suggested by Martin von Löwis on python-dev in
115 June 2008. His original analysis and proposal can be found at:
116 http://mail.python.org/pipermail/python-dev/2008-June/080579.html
120 /* set for debugging information */
121 #define DEBUG_STATS (1<<0) /* print collection statistics */
122 #define DEBUG_COLLECTABLE (1<<1) /* print collectable objects */
123 #define DEBUG_UNCOLLECTABLE (1<<2) /* print uncollectable objects */
124 #define DEBUG_SAVEALL (1<<5) /* save all garbage in gc.garbage */
125 #define DEBUG_LEAK DEBUG_COLLECTABLE | \
126 DEBUG_UNCOLLECTABLE | \
129 static PyObject
*tmod
= NULL
;
131 /*--------------------------------------------------------------------------
134 Between collections, every gc'ed object has one of two gc_refs values:
137 The initial state; objects returned by PyObject_GC_Malloc are in this
138 state. The object doesn't live in any generation list, and its
139 tp_traverse slot must not be called.
142 The object lives in some generation list, and its tp_traverse is safe to
143 call. An object transitions to GC_REACHABLE when PyObject_GC_Track
146 During a collection, gc_refs can temporarily take on other states:
149 At the start of a collection, update_refs() copies the true refcount
150 to gc_refs, for each object in the generation being collected.
151 subtract_refs() then adjusts gc_refs so that it equals the number of
152 times an object is referenced directly from outside the generation
154 gc_refs remains >= 0 throughout these steps.
156 GC_TENTATIVELY_UNREACHABLE
157 move_unreachable() then moves objects not reachable (whether directly or
158 indirectly) from outside the generation into an "unreachable" set.
159 Objects that are found to be reachable have gc_refs set to GC_REACHABLE
160 again. Objects that are found to be unreachable have gc_refs set to
161 GC_TENTATIVELY_UNREACHABLE. It's "tentatively" because the pass doing
162 this can't be sure until it ends, and GC_TENTATIVELY_UNREACHABLE may
163 transition back to GC_REACHABLE.
165 Only objects with GC_TENTATIVELY_UNREACHABLE still set are candidates
166 for collection. If it's decided not to collect such an object (e.g.,
167 it has a __del__ method), its gc_refs is restored to GC_REACHABLE again.
168 ----------------------------------------------------------------------------
170 #define GC_UNTRACKED _PyGC_REFS_UNTRACKED
171 #define GC_REACHABLE _PyGC_REFS_REACHABLE
172 #define GC_TENTATIVELY_UNREACHABLE _PyGC_REFS_TENTATIVELY_UNREACHABLE
174 #define IS_TRACKED(o) ((AS_GC(o))->gc.gc_refs != GC_UNTRACKED)
175 #define IS_REACHABLE(o) ((AS_GC(o))->gc.gc_refs == GC_REACHABLE)
176 #define IS_TENTATIVELY_UNREACHABLE(o) ( \
177 (AS_GC(o))->gc.gc_refs == GC_TENTATIVELY_UNREACHABLE)
179 /*** list functions ***/
182 gc_list_init(PyGC_Head
*list
)
184 list
->gc
.gc_prev
= list
;
185 list
->gc
.gc_next
= list
;
189 gc_list_is_empty(PyGC_Head
*list
)
191 return (list
->gc
.gc_next
== list
);
195 /* This became unused after gc_list_move() was introduced. */
196 /* Append `node` to `list`. */
198 gc_list_append(PyGC_Head
*node
, PyGC_Head
*list
)
200 node
->gc
.gc_next
= list
;
201 node
->gc
.gc_prev
= list
->gc
.gc_prev
;
202 node
->gc
.gc_prev
->gc
.gc_next
= node
;
203 list
->gc
.gc_prev
= node
;
207 /* Remove `node` from the gc list it's currently in. */
209 gc_list_remove(PyGC_Head
*node
)
211 node
->gc
.gc_prev
->gc
.gc_next
= node
->gc
.gc_next
;
212 node
->gc
.gc_next
->gc
.gc_prev
= node
->gc
.gc_prev
;
213 node
->gc
.gc_next
= NULL
; /* object is not currently tracked */
216 /* Move `node` from the gc list it's currently in (which is not explicitly
217 * named here) to the end of `list`. This is semantically the same as
218 * gc_list_remove(node) followed by gc_list_append(node, list).
221 gc_list_move(PyGC_Head
*node
, PyGC_Head
*list
)
224 PyGC_Head
*current_prev
= node
->gc
.gc_prev
;
225 PyGC_Head
*current_next
= node
->gc
.gc_next
;
226 /* Unlink from current list. */
227 current_prev
->gc
.gc_next
= current_next
;
228 current_next
->gc
.gc_prev
= current_prev
;
229 /* Relink at end of new list. */
230 new_prev
= node
->gc
.gc_prev
= list
->gc
.gc_prev
;
231 new_prev
->gc
.gc_next
= list
->gc
.gc_prev
= node
;
232 node
->gc
.gc_next
= list
;
235 /* append list `from` onto list `to`; `from` becomes an empty list */
237 gc_list_merge(PyGC_Head
*from
, PyGC_Head
*to
)
241 if (!gc_list_is_empty(from
)) {
242 tail
= to
->gc
.gc_prev
;
243 tail
->gc
.gc_next
= from
->gc
.gc_next
;
244 tail
->gc
.gc_next
->gc
.gc_prev
= tail
;
245 to
->gc
.gc_prev
= from
->gc
.gc_prev
;
246 to
->gc
.gc_prev
->gc
.gc_next
= to
;
252 gc_list_size(PyGC_Head
*list
)
256 for (gc
= list
->gc
.gc_next
; gc
!= list
; gc
= gc
->gc
.gc_next
) {
262 /* Append objects in a GC list to a Python list.
263 * Return 0 if all OK, < 0 if error (out of memory for list).
266 append_objects(PyObject
*py_list
, PyGC_Head
*gc_list
)
269 for (gc
= gc_list
->gc
.gc_next
; gc
!= gc_list
; gc
= gc
->gc
.gc_next
) {
270 PyObject
*op
= FROM_GC(gc
);
272 if (PyList_Append(py_list
, op
)) {
273 return -1; /* exception */
280 /*** end of list stuff ***/
283 /* Set all gc_refs = ob_refcnt. After this, gc_refs is > 0 for all objects
284 * in containers, and is GC_REACHABLE for all tracked gc objects not in
288 update_refs(PyGC_Head
*containers
)
290 PyGC_Head
*gc
= containers
->gc
.gc_next
;
291 for (; gc
!= containers
; gc
= gc
->gc
.gc_next
) {
292 assert(gc
->gc
.gc_refs
== GC_REACHABLE
);
293 gc
->gc
.gc_refs
= Py_REFCNT(FROM_GC(gc
));
294 /* Python's cyclic gc should never see an incoming refcount
295 * of 0: if something decref'ed to 0, it should have been
296 * deallocated immediately at that time.
297 * Possible cause (if the assert triggers): a tp_dealloc
298 * routine left a gc-aware object tracked during its teardown
299 * phase, and did something-- or allowed something to happen --
300 * that called back into Python. gc can trigger then, and may
301 * see the still-tracked dying object. Before this assert
302 * was added, such mistakes went on to allow gc to try to
303 * delete the object again. In a debug build, that caused
304 * a mysterious segfault, when _Py_ForgetReference tried
305 * to remove the object from the doubly-linked list of all
306 * objects a second time. In a release build, an actual
307 * double deallocation occurred, which leads to corruption
308 * of the allocator's internal bookkeeping pointers. That's
309 * so serious that maybe this should be a release-build
310 * check instead of an assert?
312 assert(gc
->gc
.gc_refs
!= 0);
316 /* A traversal callback for subtract_refs. */
318 visit_decref(PyObject
*op
, void *data
)
321 if (PyObject_IS_GC(op
)) {
322 PyGC_Head
*gc
= AS_GC(op
);
323 /* We're only interested in gc_refs for objects in the
324 * generation being collected, which can be recognized
325 * because only they have positive gc_refs.
327 assert(gc
->gc
.gc_refs
!= 0); /* else refcount was too small */
328 if (gc
->gc
.gc_refs
> 0)
334 /* Subtract internal references from gc_refs. After this, gc_refs is >= 0
335 * for all objects in containers, and is GC_REACHABLE for all tracked gc
336 * objects not in containers. The ones with gc_refs > 0 are directly
337 * reachable from outside containers, and so can't be collected.
340 subtract_refs(PyGC_Head
*containers
)
342 traverseproc traverse
;
343 PyGC_Head
*gc
= containers
->gc
.gc_next
;
344 for (; gc
!= containers
; gc
=gc
->gc
.gc_next
) {
345 traverse
= Py_TYPE(FROM_GC(gc
))->tp_traverse
;
346 (void) traverse(FROM_GC(gc
),
347 (visitproc
)visit_decref
,
352 /* A traversal callback for move_unreachable. */
354 visit_reachable(PyObject
*op
, PyGC_Head
*reachable
)
356 if (PyObject_IS_GC(op
)) {
357 PyGC_Head
*gc
= AS_GC(op
);
358 const Py_ssize_t gc_refs
= gc
->gc
.gc_refs
;
361 /* This is in move_unreachable's 'young' list, but
362 * the traversal hasn't yet gotten to it. All
363 * we need to do is tell move_unreachable that it's
368 else if (gc_refs
== GC_TENTATIVELY_UNREACHABLE
) {
369 /* This had gc_refs = 0 when move_unreachable got
370 * to it, but turns out it's reachable after all.
371 * Move it back to move_unreachable's 'young' list,
372 * and move_unreachable will eventually get to it
375 gc_list_move(gc
, reachable
);
378 /* Else there's nothing to do.
379 * If gc_refs > 0, it must be in move_unreachable's 'young'
380 * list, and move_unreachable will eventually get to it.
381 * If gc_refs == GC_REACHABLE, it's either in some other
382 * generation so we don't care about it, or move_unreachable
383 * already dealt with it.
384 * If gc_refs == GC_UNTRACKED, it must be ignored.
388 || gc_refs
== GC_REACHABLE
389 || gc_refs
== GC_UNTRACKED
);
395 /* Move the unreachable objects from young to unreachable. After this,
396 * all objects in young have gc_refs = GC_REACHABLE, and all objects in
397 * unreachable have gc_refs = GC_TENTATIVELY_UNREACHABLE. All tracked
398 * gc objects not in young or unreachable still have gc_refs = GC_REACHABLE.
399 * All objects in young after this are directly or indirectly reachable
400 * from outside the original young; and all objects in unreachable are
404 move_unreachable(PyGC_Head
*young
, PyGC_Head
*unreachable
)
406 PyGC_Head
*gc
= young
->gc
.gc_next
;
408 /* Invariants: all objects "to the left" of us in young have gc_refs
409 * = GC_REACHABLE, and are indeed reachable (directly or indirectly)
410 * from outside the young list as it was at entry. All other objects
411 * from the original young "to the left" of us are in unreachable now,
412 * and have gc_refs = GC_TENTATIVELY_UNREACHABLE. All objects to the
413 * left of us in 'young' now have been scanned, and no objects here
414 * or to the right have been scanned yet.
417 while (gc
!= young
) {
420 if (gc
->gc
.gc_refs
) {
421 /* gc is definitely reachable from outside the
422 * original 'young'. Mark it as such, and traverse
423 * its pointers to find any other objects that may
424 * be directly reachable from it. Note that the
425 * call to tp_traverse may append objects to young,
426 * so we have to wait until it returns to determine
427 * the next object to visit.
429 PyObject
*op
= FROM_GC(gc
);
430 traverseproc traverse
= Py_TYPE(op
)->tp_traverse
;
431 assert(gc
->gc
.gc_refs
> 0);
432 gc
->gc
.gc_refs
= GC_REACHABLE
;
434 (visitproc
)visit_reachable
,
436 next
= gc
->gc
.gc_next
;
437 if (PyTuple_CheckExact(op
)) {
438 _PyTuple_MaybeUntrack(op
);
440 else if (PyDict_CheckExact(op
)) {
441 _PyDict_MaybeUntrack(op
);
445 /* This *may* be unreachable. To make progress,
446 * assume it is. gc isn't directly reachable from
447 * any object we've already traversed, but may be
448 * reachable from an object we haven't gotten to yet.
449 * visit_reachable will eventually move gc back into
450 * young if that's so, and we'll see it again.
452 next
= gc
->gc
.gc_next
;
453 gc_list_move(gc
, unreachable
);
454 gc
->gc
.gc_refs
= GC_TENTATIVELY_UNREACHABLE
;
460 /* Return true if object has a finalization method. */
462 has_finalizer(PyObject
*op
)
464 if (PyGen_CheckExact(op
))
465 return PyGen_NeedsFinalizing((PyGenObject
*)op
);
467 return op
->ob_type
->tp_del
!= NULL
;
470 /* Move the objects in unreachable with __del__ methods into `finalizers`.
471 * Objects moved into `finalizers` have gc_refs set to GC_REACHABLE; the
472 * objects remaining in unreachable are left at GC_TENTATIVELY_UNREACHABLE.
475 move_finalizers(PyGC_Head
*unreachable
, PyGC_Head
*finalizers
)
480 /* March over unreachable. Move objects with finalizers into
483 for (gc
= unreachable
->gc
.gc_next
; gc
!= unreachable
; gc
= next
) {
484 PyObject
*op
= FROM_GC(gc
);
486 assert(IS_TENTATIVELY_UNREACHABLE(op
));
487 next
= gc
->gc
.gc_next
;
489 if (has_finalizer(op
)) {
490 gc_list_move(gc
, finalizers
);
491 gc
->gc
.gc_refs
= GC_REACHABLE
;
496 /* A traversal callback for move_finalizer_reachable. */
498 visit_move(PyObject
*op
, PyGC_Head
*tolist
)
500 if (PyObject_IS_GC(op
)) {
501 if (IS_TENTATIVELY_UNREACHABLE(op
)) {
502 PyGC_Head
*gc
= AS_GC(op
);
503 gc_list_move(gc
, tolist
);
504 gc
->gc
.gc_refs
= GC_REACHABLE
;
510 /* Move objects that are reachable from finalizers, from the unreachable set
511 * into finalizers set.
514 move_finalizer_reachable(PyGC_Head
*finalizers
)
516 traverseproc traverse
;
517 PyGC_Head
*gc
= finalizers
->gc
.gc_next
;
518 for (; gc
!= finalizers
; gc
= gc
->gc
.gc_next
) {
519 /* Note that the finalizers list may grow during this. */
520 traverse
= Py_TYPE(FROM_GC(gc
))->tp_traverse
;
521 (void) traverse(FROM_GC(gc
),
522 (visitproc
)visit_move
,
527 /* Clear all weakrefs to unreachable objects, and if such a weakref has a
528 * callback, invoke it if necessary. Note that it's possible for such
529 * weakrefs to be outside the unreachable set -- indeed, those are precisely
530 * the weakrefs whose callbacks must be invoked. See gc_weakref.txt for
531 * overview & some details. Some weakrefs with callbacks may be reclaimed
532 * directly by this routine; the number reclaimed is the return value. Other
533 * weakrefs with callbacks may be moved into the `old` generation. Objects
534 * moved into `old` have gc_refs set to GC_REACHABLE; the objects remaining in
535 * unreachable are left at GC_TENTATIVELY_UNREACHABLE. When this returns,
536 * no object in `unreachable` is weakly referenced anymore.
539 handle_weakrefs(PyGC_Head
*unreachable
, PyGC_Head
*old
)
542 PyObject
*op
; /* generally FROM_GC(gc) */
543 PyWeakReference
*wr
; /* generally a cast of op */
544 PyGC_Head wrcb_to_call
; /* weakrefs with callbacks to call */
548 gc_list_init(&wrcb_to_call
);
550 /* Clear all weakrefs to the objects in unreachable. If such a weakref
551 * also has a callback, move it into `wrcb_to_call` if the callback
552 * needs to be invoked. Note that we cannot invoke any callbacks until
553 * all weakrefs to unreachable objects are cleared, lest the callback
554 * resurrect an unreachable object via a still-active weakref. We
555 * make another pass over wrcb_to_call, invoking callbacks, after this
558 for (gc
= unreachable
->gc
.gc_next
; gc
!= unreachable
; gc
= next
) {
559 PyWeakReference
**wrlist
;
562 assert(IS_TENTATIVELY_UNREACHABLE(op
));
563 next
= gc
->gc
.gc_next
;
565 if (! PyType_SUPPORTS_WEAKREFS(Py_TYPE(op
)))
568 /* It supports weakrefs. Does it have any? */
569 wrlist
= (PyWeakReference
**)
570 PyObject_GET_WEAKREFS_LISTPTR(op
);
572 /* `op` may have some weakrefs. March over the list, clear
573 * all the weakrefs, and move the weakrefs with callbacks
574 * that must be called into wrcb_to_call.
576 for (wr
= *wrlist
; wr
!= NULL
; wr
= *wrlist
) {
577 PyGC_Head
*wrasgc
; /* AS_GC(wr) */
579 /* _PyWeakref_ClearRef clears the weakref but leaves
580 * the callback pointer intact. Obscure: it also
583 assert(wr
->wr_object
== op
);
584 _PyWeakref_ClearRef(wr
);
585 assert(wr
->wr_object
== Py_None
);
586 if (wr
->wr_callback
== NULL
)
587 continue; /* no callback */
589 /* Headache time. `op` is going away, and is weakly referenced by
590 * `wr`, which has a callback. Should the callback be invoked? If wr
593 * 1. There's no need to call it. The object and the weakref are
594 * both going away, so it's legitimate to pretend the weakref is
595 * going away first. The user has to ensure a weakref outlives its
596 * referent if they want a guarantee that the wr callback will get
599 * 2. It may be catastrophic to call it. If the callback is also in
600 * cyclic trash (CT), then although the CT is unreachable from
601 * outside the current generation, CT may be reachable from the
602 * callback. Then the callback could resurrect insane objects.
604 * Since the callback is never needed and may be unsafe in this case,
605 * wr is simply left in the unreachable set. Note that because we
606 * already called _PyWeakref_ClearRef(wr), its callback will never
609 * OTOH, if wr isn't part of CT, we should invoke the callback: the
610 * weakref outlived the trash. Note that since wr isn't CT in this
611 * case, its callback can't be CT either -- wr acted as an external
612 * root to this generation, and therefore its callback did too. So
613 * nothing in CT is reachable from the callback either, so it's hard
614 * to imagine how calling it later could create a problem for us. wr
615 * is moved to wrcb_to_call in this case.
617 if (IS_TENTATIVELY_UNREACHABLE(wr
))
619 assert(IS_REACHABLE(wr
));
621 /* Create a new reference so that wr can't go away
622 * before we can process it again.
626 /* Move wr to wrcb_to_call, for the next pass. */
628 assert(wrasgc
!= next
); /* wrasgc is reachable, but
629 next isn't, so they can't
631 gc_list_move(wrasgc
, &wrcb_to_call
);
635 /* Invoke the callbacks we decided to honor. It's safe to invoke them
636 * because they can't reference unreachable objects.
638 while (! gc_list_is_empty(&wrcb_to_call
)) {
642 gc
= wrcb_to_call
.gc
.gc_next
;
644 assert(IS_REACHABLE(op
));
645 assert(PyWeakref_Check(op
));
646 wr
= (PyWeakReference
*)op
;
647 callback
= wr
->wr_callback
;
648 assert(callback
!= NULL
);
650 /* copy-paste of weakrefobject.c's handle_callback() */
651 temp
= PyObject_CallFunctionObjArgs(callback
, wr
, NULL
);
653 PyErr_WriteUnraisable(callback
);
657 /* Give up the reference we created in the first pass. When
658 * op's refcount hits 0 (which it may or may not do right now),
659 * op's tp_dealloc will decref op->wr_callback too. Note
660 * that the refcount probably will hit 0 now, and because this
661 * weakref was reachable to begin with, gc didn't already
662 * add it to its count of freed objects. Example: a reachable
663 * weak value dict maps some key to this reachable weakref.
664 * The callback removes this key->weakref mapping from the
665 * dict, leaving no other references to the weakref (excepting
669 if (wrcb_to_call
.gc
.gc_next
== gc
) {
670 /* object is still alive -- move it */
671 gc_list_move(gc
, old
);
681 debug_cycle(char *msg
, PyObject
*op
)
683 PySys_WriteStderr("gc: %.100s <%.100s %p>\n",
684 msg
, Py_TYPE(op
)->tp_name
, op
);
687 /* Handle uncollectable garbage (cycles with finalizers, and stuff reachable
688 * only from such cycles).
689 * If DEBUG_SAVEALL, all objects in finalizers are appended to the module
690 * garbage list (a Python list), else only the objects in finalizers with
691 * __del__ methods are appended to garbage. All objects in finalizers are
692 * merged into the old list regardless.
693 * Returns 0 if all OK, <0 on error (out of memory to grow the garbage list).
694 * The finalizers list is made empty on a successful return.
697 handle_finalizers(PyGC_Head
*finalizers
, PyGC_Head
*old
)
699 PyGC_Head
*gc
= finalizers
->gc
.gc_next
;
701 if (garbage
== NULL
) {
702 garbage
= PyList_New(0);
704 Py_FatalError("gc couldn't create gc.garbage list");
706 for (; gc
!= finalizers
; gc
= gc
->gc
.gc_next
) {
707 PyObject
*op
= FROM_GC(gc
);
709 if ((debug
& DEBUG_SAVEALL
) || has_finalizer(op
)) {
710 if (PyList_Append(garbage
, op
) < 0)
715 gc_list_merge(finalizers
, old
);
719 /* Break reference cycles by clearing the containers involved. This is
720 * tricky business as the lists can be changing and we don't know which
721 * objects may be freed. It is possible I screwed something up here.
724 delete_garbage(PyGC_Head
*collectable
, PyGC_Head
*old
)
728 while (!gc_list_is_empty(collectable
)) {
729 PyGC_Head
*gc
= collectable
->gc
.gc_next
;
730 PyObject
*op
= FROM_GC(gc
);
732 assert(IS_TENTATIVELY_UNREACHABLE(op
));
733 if (debug
& DEBUG_SAVEALL
) {
734 PyList_Append(garbage
, op
);
737 if ((clear
= Py_TYPE(op
)->tp_clear
) != NULL
) {
743 if (collectable
->gc
.gc_next
== gc
) {
744 /* object is still alive, move it, it may die later */
745 gc_list_move(gc
, old
);
746 gc
->gc
.gc_refs
= GC_REACHABLE
;
751 /* Clear all free lists
752 * All free lists are cleared during the collection of the highest generation.
753 * Allocated items in the free list may keep a pymalloc arena occupied.
754 * Clearing the free lists may give back memory to the OS earlier.
757 clear_freelists(void)
759 (void)PyMethod_ClearFreeList();
760 (void)PyFrame_ClearFreeList();
761 (void)PyCFunction_ClearFreeList();
762 (void)PyTuple_ClearFreeList();
763 (void)PyUnicode_ClearFreeList();
764 (void)PyFloat_ClearFreeList();
772 PyObject
*f
= PyObject_CallMethod(tmod
, "time", NULL
);
777 if (PyFloat_Check(f
))
778 result
= PyFloat_AsDouble(f
);
785 /* This is the main function. Read this to understand how the
786 * collection process works. */
788 collect(int generation
)
791 Py_ssize_t m
= 0; /* # objects collected */
792 Py_ssize_t n
= 0; /* # unreachable objects that couldn't be collected */
793 PyGC_Head
*young
; /* the generation we are examining */
794 PyGC_Head
*old
; /* next older generation */
795 PyGC_Head unreachable
; /* non-problematic unreachable trash */
796 PyGC_Head finalizers
; /* objects with, & reachable from, __del__ */
800 if (delstr
== NULL
) {
801 delstr
= PyUnicode_InternFromString("__del__");
803 Py_FatalError("gc couldn't allocate \"__del__\"");
806 if (debug
& DEBUG_STATS
) {
807 PySys_WriteStderr("gc: collecting generation %d...\n",
809 PySys_WriteStderr("gc: objects in each generation:");
810 for (i
= 0; i
< NUM_GENERATIONS
; i
++)
811 PySys_WriteStderr(" %" PY_FORMAT_SIZE_T
"d",
812 gc_list_size(GEN_HEAD(i
)));
814 PySys_WriteStderr("\n");
817 /* update collection and allocation counters */
818 if (generation
+1 < NUM_GENERATIONS
)
819 generations
[generation
+1].count
+= 1;
820 for (i
= 0; i
<= generation
; i
++)
821 generations
[i
].count
= 0;
823 /* merge younger generations with one we are currently collecting */
824 for (i
= 0; i
< generation
; i
++) {
825 gc_list_merge(GEN_HEAD(i
), GEN_HEAD(generation
));
828 /* handy references */
829 young
= GEN_HEAD(generation
);
830 if (generation
< NUM_GENERATIONS
-1)
831 old
= GEN_HEAD(generation
+1);
835 /* Using ob_refcnt and gc_refs, calculate which objects in the
836 * container set are reachable from outside the set (i.e., have a
837 * refcount greater than 0 when all the references within the
838 * set are taken into account).
841 subtract_refs(young
);
843 /* Leave everything reachable from outside young in young, and move
844 * everything else (in young) to unreachable.
845 * NOTE: This used to move the reachable objects into a reachable
846 * set instead. But most things usually turn out to be reachable,
847 * so it's more efficient to move the unreachable things.
849 gc_list_init(&unreachable
);
850 move_unreachable(young
, &unreachable
);
852 /* Move reachable objects to next generation. */
854 if (generation
== NUM_GENERATIONS
- 2) {
855 long_lived_pending
+= gc_list_size(young
);
857 gc_list_merge(young
, old
);
860 long_lived_pending
= 0;
861 long_lived_total
= gc_list_size(young
);
864 /* All objects in unreachable are trash, but objects reachable from
865 * finalizers can't safely be deleted. Python programmers should take
866 * care not to create such things. For Python, finalizers means
867 * instance objects with __del__ methods. Weakrefs with callbacks
868 * can also call arbitrary Python code but they will be dealt with by
871 gc_list_init(&finalizers
);
872 move_finalizers(&unreachable
, &finalizers
);
873 /* finalizers contains the unreachable objects with a finalizer;
874 * unreachable objects reachable *from* those are also uncollectable,
875 * and we move those into the finalizers list too.
877 move_finalizer_reachable(&finalizers
);
879 /* Collect statistics on collectable objects found and print
880 * debugging information.
882 for (gc
= unreachable
.gc
.gc_next
; gc
!= &unreachable
;
883 gc
= gc
->gc
.gc_next
) {
885 if (debug
& DEBUG_COLLECTABLE
) {
886 debug_cycle("collectable", FROM_GC(gc
));
890 /* Clear weakrefs and invoke callbacks as necessary. */
891 m
+= handle_weakrefs(&unreachable
, old
);
893 /* Call tp_clear on objects in the unreachable set. This will cause
894 * the reference cycles to be broken. It may also cause some objects
895 * in finalizers to be freed.
897 delete_garbage(&unreachable
, old
);
899 /* Collect statistics on uncollectable objects found and print
900 * debugging information. */
901 for (gc
= finalizers
.gc
.gc_next
;
903 gc
= gc
->gc
.gc_next
) {
905 if (debug
& DEBUG_UNCOLLECTABLE
)
906 debug_cycle("uncollectable", FROM_GC(gc
));
908 if (debug
& DEBUG_STATS
) {
909 double t2
= get_time();
910 if (m
== 0 && n
== 0)
911 PySys_WriteStderr("gc: done");
915 "%" PY_FORMAT_SIZE_T
"d unreachable, "
916 "%" PY_FORMAT_SIZE_T
"d uncollectable",
919 PySys_WriteStderr(", %.4fs elapsed", t2
-t1
);
921 PySys_WriteStderr(".\n");
924 /* Append instances in the uncollectable set to a Python
925 * reachable list of garbage. The programmer has to deal with
926 * this if they insist on creating this type of structure.
928 (void)handle_finalizers(&finalizers
, old
);
930 /* Clear free list only during the collection of the highest
932 if (generation
== NUM_GENERATIONS
-1) {
936 if (PyErr_Occurred()) {
938 gc_str
= PyUnicode_FromString("garbage collection");
939 PyErr_WriteUnraisable(gc_str
);
940 Py_FatalError("unexpected exception during garbage collection");
946 collect_generations(void)
951 /* Find the oldest generation (highest numbered) where the count
952 * exceeds the threshold. Objects in the that generation and
953 * generations younger than it will be collected. */
954 for (i
= NUM_GENERATIONS
-1; i
>= 0; i
--) {
955 if (generations
[i
].count
> generations
[i
].threshold
) {
956 /* Avoid quadratic performance degradation in number
957 of tracked objects. See comments at the beginning
958 of this file, and issue #4074.
960 if (i
== NUM_GENERATIONS
- 1
961 && long_lived_pending
< long_lived_total
/ 4)
970 PyDoc_STRVAR(gc_enable__doc__
,
973 "Enable automatic garbage collection.\n");
976 gc_enable(PyObject
*self
, PyObject
*noargs
)
983 PyDoc_STRVAR(gc_disable__doc__
,
984 "disable() -> None\n"
986 "Disable automatic garbage collection.\n");
989 gc_disable(PyObject
*self
, PyObject
*noargs
)
996 PyDoc_STRVAR(gc_isenabled__doc__
,
997 "isenabled() -> status\n"
999 "Returns true if automatic garbage collection is enabled.\n");
1002 gc_isenabled(PyObject
*self
, PyObject
*noargs
)
1004 return PyBool_FromLong((long)enabled
);
1007 PyDoc_STRVAR(gc_collect__doc__
,
1008 "collect([generation]) -> n\n"
1010 "With no arguments, run a full collection. The optional argument\n"
1011 "may be an integer specifying which generation to collect. A ValueError\n"
1012 "is raised if the generation number is invalid.\n\n"
1013 "The number of unreachable objects is returned.\n");
1016 gc_collect(PyObject
*self
, PyObject
*args
, PyObject
*kws
)
1018 static char *keywords
[] = {"generation", NULL
};
1019 int genarg
= NUM_GENERATIONS
- 1;
1022 if (!PyArg_ParseTupleAndKeywords(args
, kws
, "|i", keywords
, &genarg
))
1025 else if (genarg
< 0 || genarg
>= NUM_GENERATIONS
) {
1026 PyErr_SetString(PyExc_ValueError
, "invalid generation");
1031 n
= 0; /* already collecting, don't do anything */
1034 n
= collect(genarg
);
1038 return PyLong_FromSsize_t(n
);
1041 PyDoc_STRVAR(gc_set_debug__doc__
,
1042 "set_debug(flags) -> None\n"
1044 "Set the garbage collection debugging flags. Debugging information is\n"
1045 "written to sys.stderr.\n"
1047 "flags is an integer and can have the following bits turned on:\n"
1049 " DEBUG_STATS - Print statistics during collection.\n"
1050 " DEBUG_COLLECTABLE - Print collectable objects found.\n"
1051 " DEBUG_UNCOLLECTABLE - Print unreachable but uncollectable objects found.\n"
1052 " DEBUG_SAVEALL - Save objects to gc.garbage rather than freeing them.\n"
1053 " DEBUG_LEAK - Debug leaking programs (everything but STATS).\n");
1056 gc_set_debug(PyObject
*self
, PyObject
*args
)
1058 if (!PyArg_ParseTuple(args
, "i:set_debug", &debug
))
1065 PyDoc_STRVAR(gc_get_debug__doc__
,
1066 "get_debug() -> flags\n"
1068 "Get the garbage collection debugging flags.\n");
1071 gc_get_debug(PyObject
*self
, PyObject
*noargs
)
1073 return Py_BuildValue("i", debug
);
1076 PyDoc_STRVAR(gc_set_thresh__doc__
,
1077 "set_threshold(threshold0, [threshold1, threshold2]) -> None\n"
1079 "Sets the collection thresholds. Setting threshold0 to zero disables\n"
1083 gc_set_thresh(PyObject
*self
, PyObject
*args
)
1086 if (!PyArg_ParseTuple(args
, "i|ii:set_threshold",
1087 &generations
[0].threshold
,
1088 &generations
[1].threshold
,
1089 &generations
[2].threshold
))
1091 for (i
= 2; i
< NUM_GENERATIONS
; i
++) {
1092 /* generations higher than 2 get the same threshold */
1093 generations
[i
].threshold
= generations
[2].threshold
;
1100 PyDoc_STRVAR(gc_get_thresh__doc__
,
1101 "get_threshold() -> (threshold0, threshold1, threshold2)\n"
1103 "Return the current collection thresholds\n");
1106 gc_get_thresh(PyObject
*self
, PyObject
*noargs
)
1108 return Py_BuildValue("(iii)",
1109 generations
[0].threshold
,
1110 generations
[1].threshold
,
1111 generations
[2].threshold
);
1114 PyDoc_STRVAR(gc_get_count__doc__
,
1115 "get_count() -> (count0, count1, count2)\n"
1117 "Return the current collection counts\n");
1120 gc_get_count(PyObject
*self
, PyObject
*noargs
)
1122 return Py_BuildValue("(iii)",
1123 generations
[0].count
,
1124 generations
[1].count
,
1125 generations
[2].count
);
1129 referrersvisit(PyObject
* obj
, PyObject
*objs
)
1132 for (i
= 0; i
< PyTuple_GET_SIZE(objs
); i
++)
1133 if (PyTuple_GET_ITEM(objs
, i
) == obj
)
1139 gc_referrers_for(PyObject
*objs
, PyGC_Head
*list
, PyObject
*resultlist
)
1143 traverseproc traverse
;
1144 for (gc
= list
->gc
.gc_next
; gc
!= list
; gc
= gc
->gc
.gc_next
) {
1146 traverse
= Py_TYPE(obj
)->tp_traverse
;
1147 if (obj
== objs
|| obj
== resultlist
)
1149 if (traverse(obj
, (visitproc
)referrersvisit
, objs
)) {
1150 if (PyList_Append(resultlist
, obj
) < 0)
1151 return 0; /* error */
1154 return 1; /* no error */
1157 PyDoc_STRVAR(gc_get_referrers__doc__
,
1158 "get_referrers(*objs) -> list\n\
1159 Return the list of objects that directly refer to any of objs.");
1162 gc_get_referrers(PyObject
*self
, PyObject
*args
)
1165 PyObject
*result
= PyList_New(0);
1166 if (!result
) return NULL
;
1168 for (i
= 0; i
< NUM_GENERATIONS
; i
++) {
1169 if (!(gc_referrers_for(args
, GEN_HEAD(i
), result
))) {
1177 /* Append obj to list; return true if error (out of memory), false if OK. */
1179 referentsvisit(PyObject
*obj
, PyObject
*list
)
1181 return PyList_Append(list
, obj
) < 0;
1184 PyDoc_STRVAR(gc_get_referents__doc__
,
1185 "get_referents(*objs) -> list\n\
1186 Return the list of objects that are directly referred to by objs.");
1189 gc_get_referents(PyObject
*self
, PyObject
*args
)
1192 PyObject
*result
= PyList_New(0);
1197 for (i
= 0; i
< PyTuple_GET_SIZE(args
); i
++) {
1198 traverseproc traverse
;
1199 PyObject
*obj
= PyTuple_GET_ITEM(args
, i
);
1201 if (! PyObject_IS_GC(obj
))
1203 traverse
= Py_TYPE(obj
)->tp_traverse
;
1206 if (traverse(obj
, (visitproc
)referentsvisit
, result
)) {
1214 PyDoc_STRVAR(gc_get_objects__doc__
,
1215 "get_objects() -> [...]\n"
1217 "Return a list of objects tracked by the collector (excluding the list\n"
1221 gc_get_objects(PyObject
*self
, PyObject
*noargs
)
1226 result
= PyList_New(0);
1229 for (i
= 0; i
< NUM_GENERATIONS
; i
++) {
1230 if (append_objects(result
, GEN_HEAD(i
))) {
1238 PyDoc_STRVAR(gc_is_tracked__doc__
,
1239 "is_tracked(obj) -> bool\n"
1241 "Returns true if the object is tracked by the garbage collector.\n"
1242 "Simple atomic objects will return false.\n"
1246 gc_is_tracked(PyObject
*self
, PyObject
*obj
)
1250 if (PyObject_IS_GC(obj
) && IS_TRACKED(obj
))
1259 PyDoc_STRVAR(gc__doc__
,
1260 "This module provides access to the garbage collector for reference cycles.\n"
1262 "enable() -- Enable automatic garbage collection.\n"
1263 "disable() -- Disable automatic garbage collection.\n"
1264 "isenabled() -- Returns true if automatic collection is enabled.\n"
1265 "collect() -- Do a full collection right now.\n"
1266 "get_count() -- Return the current collection counts.\n"
1267 "set_debug() -- Set debugging flags.\n"
1268 "get_debug() -- Get debugging flags.\n"
1269 "set_threshold() -- Set the collection thresholds.\n"
1270 "get_threshold() -- Return the current the collection thresholds.\n"
1271 "get_objects() -- Return a list of all objects tracked by the collector.\n"
1272 "is_tracked() -- Returns true if a given object is tracked.\n"
1273 "get_referrers() -- Return the list of objects that refer to an object.\n"
1274 "get_referents() -- Return the list of objects that an object refers to.\n");
1276 static PyMethodDef GcMethods
[] = {
1277 {"enable", gc_enable
, METH_NOARGS
, gc_enable__doc__
},
1278 {"disable", gc_disable
, METH_NOARGS
, gc_disable__doc__
},
1279 {"isenabled", gc_isenabled
, METH_NOARGS
, gc_isenabled__doc__
},
1280 {"set_debug", gc_set_debug
, METH_VARARGS
, gc_set_debug__doc__
},
1281 {"get_debug", gc_get_debug
, METH_NOARGS
, gc_get_debug__doc__
},
1282 {"get_count", gc_get_count
, METH_NOARGS
, gc_get_count__doc__
},
1283 {"set_threshold", gc_set_thresh
, METH_VARARGS
, gc_set_thresh__doc__
},
1284 {"get_threshold", gc_get_thresh
, METH_NOARGS
, gc_get_thresh__doc__
},
1285 {"collect", (PyCFunction
)gc_collect
,
1286 METH_VARARGS
| METH_KEYWORDS
, gc_collect__doc__
},
1287 {"get_objects", gc_get_objects
,METH_NOARGS
, gc_get_objects__doc__
},
1288 {"is_tracked", gc_is_tracked
, METH_O
, gc_is_tracked__doc__
},
1289 {"get_referrers", gc_get_referrers
, METH_VARARGS
,
1290 gc_get_referrers__doc__
},
1291 {"get_referents", gc_get_referents
, METH_VARARGS
,
1292 gc_get_referents__doc__
},
1293 {NULL
, NULL
} /* Sentinel */
1296 static struct PyModuleDef gcmodule
= {
1297 PyModuleDef_HEAD_INIT
,
1314 m
= PyModule_Create(&gcmodule
);
1319 if (garbage
== NULL
) {
1320 garbage
= PyList_New(0);
1321 if (garbage
== NULL
)
1325 if (PyModule_AddObject(m
, "garbage", garbage
) < 0)
1328 /* Importing can't be done in collect() because collect()
1329 * can be called via PyGC_Collect() in Py_Finalize().
1330 * This wouldn't be a problem, except that <initialized> is
1331 * reset to 0 before calling collect which trips up
1332 * the import and triggers an assertion.
1335 tmod
= PyImport_ImportModuleNoBlock("time");
1340 #define ADD_INT(NAME) if (PyModule_AddIntConstant(m, #NAME, NAME) < 0) return NULL
1341 ADD_INT(DEBUG_STATS
);
1342 ADD_INT(DEBUG_COLLECTABLE
);
1343 ADD_INT(DEBUG_UNCOLLECTABLE
);
1344 ADD_INT(DEBUG_SAVEALL
);
1345 ADD_INT(DEBUG_LEAK
);
1350 /* API to invoke gc.collect() from C */
1357 n
= 0; /* already collecting, don't do anything */
1360 n
= collect(NUM_GENERATIONS
- 1);
1369 _PyGC_Dump(PyGC_Head
*g
)
1371 _PyObject_Dump(FROM_GC(g
));
1374 /* extension modules might be compiled with GC support so these
1375 functions must always be available */
1377 #undef PyObject_GC_Track
1378 #undef PyObject_GC_UnTrack
1379 #undef PyObject_GC_Del
1380 #undef _PyObject_GC_Malloc
1383 PyObject_GC_Track(void *op
)
1385 _PyObject_GC_TRACK(op
);
1388 /* for binary compatibility with 2.2 */
1390 _PyObject_GC_Track(PyObject
*op
)
1392 PyObject_GC_Track(op
);
1396 PyObject_GC_UnTrack(void *op
)
1398 /* Obscure: the Py_TRASHCAN mechanism requires that we be able to
1399 * call PyObject_GC_UnTrack twice on an object.
1402 _PyObject_GC_UNTRACK(op
);
1405 /* for binary compatibility with 2.2 */
1407 _PyObject_GC_UnTrack(PyObject
*op
)
1409 PyObject_GC_UnTrack(op
);
1413 _PyObject_GC_Malloc(size_t basicsize
)
1417 if (basicsize
> PY_SSIZE_T_MAX
- sizeof(PyGC_Head
))
1418 return PyErr_NoMemory();
1419 g
= (PyGC_Head
*)PyObject_MALLOC(
1420 sizeof(PyGC_Head
) + basicsize
);
1422 return PyErr_NoMemory();
1423 g
->gc
.gc_refs
= GC_UNTRACKED
;
1424 generations
[0].count
++; /* number of allocated GC objects */
1425 if (generations
[0].count
> generations
[0].threshold
&&
1427 generations
[0].threshold
&&
1429 !PyErr_Occurred()) {
1431 collect_generations();
1439 _PyObject_GC_New(PyTypeObject
*tp
)
1441 PyObject
*op
= _PyObject_GC_Malloc(_PyObject_SIZE(tp
));
1443 op
= PyObject_INIT(op
, tp
);
1448 _PyObject_GC_NewVar(PyTypeObject
*tp
, Py_ssize_t nitems
)
1450 const size_t size
= _PyObject_VAR_SIZE(tp
, nitems
);
1451 PyVarObject
*op
= (PyVarObject
*) _PyObject_GC_Malloc(size
);
1453 op
= PyObject_INIT_VAR(op
, tp
, nitems
);
1458 _PyObject_GC_Resize(PyVarObject
*op
, Py_ssize_t nitems
)
1460 const size_t basicsize
= _PyObject_VAR_SIZE(Py_TYPE(op
), nitems
);
1461 PyGC_Head
*g
= AS_GC(op
);
1462 if (basicsize
> PY_SSIZE_T_MAX
- sizeof(PyGC_Head
))
1463 return (PyVarObject
*)PyErr_NoMemory();
1464 g
= (PyGC_Head
*)PyObject_REALLOC(g
, sizeof(PyGC_Head
) + basicsize
);
1466 return (PyVarObject
*)PyErr_NoMemory();
1467 op
= (PyVarObject
*) FROM_GC(g
);
1468 Py_SIZE(op
) = nitems
;
1473 PyObject_GC_Del(void *op
)
1475 PyGC_Head
*g
= AS_GC(op
);
1478 if (generations
[0].count
> 0) {
1479 generations
[0].count
--;
1484 /* for binary compatibility with 2.2 */
1485 #undef _PyObject_GC_Del
1487 _PyObject_GC_Del(PyObject
*op
)
1489 PyObject_GC_Del(op
);