3 Reference Cycle Garbage Collection
4 ==================================
6 Neil Schemenauer <nas@arctrix.com>
8 Based on a post on the python-dev list. Ideas from Guido van Rossum,
9 Eric Tiedemann, and various others.
11 http://www.arctrix.com/nas/python/gc/
12 http://www.python.org/pipermail/python-dev/2000-March/003869.html
13 http://www.python.org/pipermail/python-dev/2000-March/004010.html
14 http://www.python.org/pipermail/python-dev/2000-March/004022.html
16 For a highlevel view of the collection process, read the collect
22 #include "frameobject.h" /* for PyFrame_ClearFreeList */
24 /* Get an object's GC head */
25 #define AS_GC(o) ((PyGC_Head *)(o)-1)
27 /* Get the object given the GC head */
28 #define FROM_GC(g) ((PyObject *)(((PyGC_Head *)g)+1))
30 /*** Global GC state ***/
32 struct gc_generation
{
34 int threshold
; /* collection threshold */
35 int count
; /* count of allocations or collections of younger
39 #define NUM_GENERATIONS 3
40 #define GEN_HEAD(n) (&generations[n].head)
42 /* linked lists of container objects */
43 static struct gc_generation generations
[NUM_GENERATIONS
] = {
44 /* PyGC_Head, threshold, count */
45 {{{GEN_HEAD(0), GEN_HEAD(0), 0}}, 700, 0},
46 {{{GEN_HEAD(1), GEN_HEAD(1), 0}}, 10, 0},
47 {{{GEN_HEAD(2), GEN_HEAD(2), 0}}, 10, 0},
50 PyGC_Head
*_PyGC_generation0
= GEN_HEAD(0);
52 static int enabled
= 1; /* automatic collection enabled? */
54 /* true if we are currently running the collector */
55 static int collecting
= 0;
57 /* list of uncollectable objects */
58 static PyObject
*garbage
= NULL
;
60 /* Python string to use if unhandled exception occurs */
61 static PyObject
*gc_str
= NULL
;
63 /* Python string used to look for __del__ attribute. */
64 static PyObject
*delstr
= NULL
;
66 /* This is the number of objects who survived the last full collection. It
67 approximates the number of long lived objects tracked by the GC.
69 (by "full collection", we mean a collection of the oldest generation).
71 static Py_ssize_t long_lived_total
= 0;
73 /* This is the number of objects who survived all "non-full" collections,
74 and are awaiting to undergo a full collection for the first time.
77 static Py_ssize_t long_lived_pending
= 0;
80 NOTE: about the counting of long-lived objects.
82 To limit the cost of garbage collection, there are two strategies;
83 - make each collection faster, e.g. by scanning fewer objects
85 This heuristic is about the latter strategy.
87 In addition to the various configurable thresholds, we only trigger a
88 full collection if the ratio
89 long_lived_pending / long_lived_total
90 is above a given value (hardwired to 25%).
92 The reason is that, while "non-full" collections (i.e., collections of
93 the young and middle generations) will always examine roughly the same
94 number of objects -- determined by the aforementioned thresholds --,
95 the cost of a full collection is proportional to the total number of
96 long-lived objects, which is virtually unbounded.
98 Indeed, it has been remarked that doing a full collection every
99 <constant number> of object creations entails a dramatic performance
100 degradation in workloads which consist in creating and storing lots of
101 long-lived objects (e.g. building a large list of GC-tracked objects would
102 show quadratic performance, instead of linear as expected: see issue #4074).
104 Using the above ratio, instead, yields amortized linear performance in
105 the total number of objects (the effect of which can be summarized
106 thusly: "each full garbage collection is more and more costly as the
107 number of objects grows, but we do fewer and fewer of them").
109 This heuristic was suggested by Martin von Löwis on python-dev in
110 June 2008. His original analysis and proposal can be found at:
111 http://mail.python.org/pipermail/python-dev/2008-June/080579.html
115 /* set for debugging information */
116 #define DEBUG_STATS (1<<0) /* print collection statistics */
117 #define DEBUG_COLLECTABLE (1<<1) /* print collectable objects */
118 #define DEBUG_UNCOLLECTABLE (1<<2) /* print uncollectable objects */
119 #define DEBUG_INSTANCES (1<<3) /* print instances */
120 #define DEBUG_OBJECTS (1<<4) /* print other objects */
121 #define DEBUG_SAVEALL (1<<5) /* save all garbage in gc.garbage */
122 #define DEBUG_LEAK DEBUG_COLLECTABLE | \
123 DEBUG_UNCOLLECTABLE | \
128 static PyObject
*tmod
= NULL
;
130 /*--------------------------------------------------------------------------
133 Between collections, every gc'ed object has one of two gc_refs values:
136 The initial state; objects returned by PyObject_GC_Malloc are in this
137 state. The object doesn't live in any generation list, and its
138 tp_traverse slot must not be called.
141 The object lives in some generation list, and its tp_traverse is safe to
142 call. An object transitions to GC_REACHABLE when PyObject_GC_Track
145 During a collection, gc_refs can temporarily take on other states:
148 At the start of a collection, update_refs() copies the true refcount
149 to gc_refs, for each object in the generation being collected.
150 subtract_refs() then adjusts gc_refs so that it equals the number of
151 times an object is referenced directly from outside the generation
153 gc_refs remains >= 0 throughout these steps.
155 GC_TENTATIVELY_UNREACHABLE
156 move_unreachable() then moves objects not reachable (whether directly or
157 indirectly) from outside the generation into an "unreachable" set.
158 Objects that are found to be reachable have gc_refs set to GC_REACHABLE
159 again. Objects that are found to be unreachable have gc_refs set to
160 GC_TENTATIVELY_UNREACHABLE. It's "tentatively" because the pass doing
161 this can't be sure until it ends, and GC_TENTATIVELY_UNREACHABLE may
162 transition back to GC_REACHABLE.
164 Only objects with GC_TENTATIVELY_UNREACHABLE still set are candidates
165 for collection. If it's decided not to collect such an object (e.g.,
166 it has a __del__ method), its gc_refs is restored to GC_REACHABLE again.
167 ----------------------------------------------------------------------------
169 #define GC_UNTRACKED _PyGC_REFS_UNTRACKED
170 #define GC_REACHABLE _PyGC_REFS_REACHABLE
171 #define GC_TENTATIVELY_UNREACHABLE _PyGC_REFS_TENTATIVELY_UNREACHABLE
173 #define IS_TRACKED(o) ((AS_GC(o))->gc.gc_refs != GC_UNTRACKED)
174 #define IS_REACHABLE(o) ((AS_GC(o))->gc.gc_refs == GC_REACHABLE)
175 #define IS_TENTATIVELY_UNREACHABLE(o) ( \
176 (AS_GC(o))->gc.gc_refs == GC_TENTATIVELY_UNREACHABLE)
178 /*** list functions ***/
181 gc_list_init(PyGC_Head
*list
)
183 list
->gc
.gc_prev
= list
;
184 list
->gc
.gc_next
= list
;
188 gc_list_is_empty(PyGC_Head
*list
)
190 return (list
->gc
.gc_next
== list
);
194 /* This became unused after gc_list_move() was introduced. */
195 /* Append `node` to `list`. */
197 gc_list_append(PyGC_Head
*node
, PyGC_Head
*list
)
199 node
->gc
.gc_next
= list
;
200 node
->gc
.gc_prev
= list
->gc
.gc_prev
;
201 node
->gc
.gc_prev
->gc
.gc_next
= node
;
202 list
->gc
.gc_prev
= node
;
206 /* Remove `node` from the gc list it's currently in. */
208 gc_list_remove(PyGC_Head
*node
)
210 node
->gc
.gc_prev
->gc
.gc_next
= node
->gc
.gc_next
;
211 node
->gc
.gc_next
->gc
.gc_prev
= node
->gc
.gc_prev
;
212 node
->gc
.gc_next
= NULL
; /* object is not currently tracked */
215 /* Move `node` from the gc list it's currently in (which is not explicitly
216 * named here) to the end of `list`. This is semantically the same as
217 * gc_list_remove(node) followed by gc_list_append(node, list).
220 gc_list_move(PyGC_Head
*node
, PyGC_Head
*list
)
223 PyGC_Head
*current_prev
= node
->gc
.gc_prev
;
224 PyGC_Head
*current_next
= node
->gc
.gc_next
;
225 /* Unlink from current list. */
226 current_prev
->gc
.gc_next
= current_next
;
227 current_next
->gc
.gc_prev
= current_prev
;
228 /* Relink at end of new list. */
229 new_prev
= node
->gc
.gc_prev
= list
->gc
.gc_prev
;
230 new_prev
->gc
.gc_next
= list
->gc
.gc_prev
= node
;
231 node
->gc
.gc_next
= list
;
234 /* append list `from` onto list `to`; `from` becomes an empty list */
236 gc_list_merge(PyGC_Head
*from
, PyGC_Head
*to
)
240 if (!gc_list_is_empty(from
)) {
241 tail
= to
->gc
.gc_prev
;
242 tail
->gc
.gc_next
= from
->gc
.gc_next
;
243 tail
->gc
.gc_next
->gc
.gc_prev
= tail
;
244 to
->gc
.gc_prev
= from
->gc
.gc_prev
;
245 to
->gc
.gc_prev
->gc
.gc_next
= to
;
251 gc_list_size(PyGC_Head
*list
)
255 for (gc
= list
->gc
.gc_next
; gc
!= list
; gc
= gc
->gc
.gc_next
) {
261 /* Append objects in a GC list to a Python list.
262 * Return 0 if all OK, < 0 if error (out of memory for list).
265 append_objects(PyObject
*py_list
, PyGC_Head
*gc_list
)
268 for (gc
= gc_list
->gc
.gc_next
; gc
!= gc_list
; gc
= gc
->gc
.gc_next
) {
269 PyObject
*op
= FROM_GC(gc
);
271 if (PyList_Append(py_list
, op
)) {
272 return -1; /* exception */
279 /*** end of list stuff ***/
282 /* Set all gc_refs = ob_refcnt. After this, gc_refs is > 0 for all objects
283 * in containers, and is GC_REACHABLE for all tracked gc objects not in
287 update_refs(PyGC_Head
*containers
)
289 PyGC_Head
*gc
= containers
->gc
.gc_next
;
290 for (; gc
!= containers
; gc
= gc
->gc
.gc_next
) {
291 assert(gc
->gc
.gc_refs
== GC_REACHABLE
);
292 gc
->gc
.gc_refs
= Py_REFCNT(FROM_GC(gc
));
293 /* Python's cyclic gc should never see an incoming refcount
294 * of 0: if something decref'ed to 0, it should have been
295 * deallocated immediately at that time.
296 * Possible cause (if the assert triggers): a tp_dealloc
297 * routine left a gc-aware object tracked during its teardown
298 * phase, and did something-- or allowed something to happen --
299 * that called back into Python. gc can trigger then, and may
300 * see the still-tracked dying object. Before this assert
301 * was added, such mistakes went on to allow gc to try to
302 * delete the object again. In a debug build, that caused
303 * a mysterious segfault, when _Py_ForgetReference tried
304 * to remove the object from the doubly-linked list of all
305 * objects a second time. In a release build, an actual
306 * double deallocation occurred, which leads to corruption
307 * of the allocator's internal bookkeeping pointers. That's
308 * so serious that maybe this should be a release-build
309 * check instead of an assert?
311 assert(gc
->gc
.gc_refs
!= 0);
315 /* A traversal callback for subtract_refs. */
317 visit_decref(PyObject
*op
, void *data
)
320 if (PyObject_IS_GC(op
)) {
321 PyGC_Head
*gc
= AS_GC(op
);
322 /* We're only interested in gc_refs for objects in the
323 * generation being collected, which can be recognized
324 * because only they have positive gc_refs.
326 assert(gc
->gc
.gc_refs
!= 0); /* else refcount was too small */
327 if (gc
->gc
.gc_refs
> 0)
333 /* Subtract internal references from gc_refs. After this, gc_refs is >= 0
334 * for all objects in containers, and is GC_REACHABLE for all tracked gc
335 * objects not in containers. The ones with gc_refs > 0 are directly
336 * reachable from outside containers, and so can't be collected.
339 subtract_refs(PyGC_Head
*containers
)
341 traverseproc traverse
;
342 PyGC_Head
*gc
= containers
->gc
.gc_next
;
343 for (; gc
!= containers
; gc
=gc
->gc
.gc_next
) {
344 traverse
= Py_TYPE(FROM_GC(gc
))->tp_traverse
;
345 (void) traverse(FROM_GC(gc
),
346 (visitproc
)visit_decref
,
351 /* A traversal callback for move_unreachable. */
353 visit_reachable(PyObject
*op
, PyGC_Head
*reachable
)
355 if (PyObject_IS_GC(op
)) {
356 PyGC_Head
*gc
= AS_GC(op
);
357 const Py_ssize_t gc_refs
= gc
->gc
.gc_refs
;
360 /* This is in move_unreachable's 'young' list, but
361 * the traversal hasn't yet gotten to it. All
362 * we need to do is tell move_unreachable that it's
367 else if (gc_refs
== GC_TENTATIVELY_UNREACHABLE
) {
368 /* This had gc_refs = 0 when move_unreachable got
369 * to it, but turns out it's reachable after all.
370 * Move it back to move_unreachable's 'young' list,
371 * and move_unreachable will eventually get to it
374 gc_list_move(gc
, reachable
);
377 /* Else there's nothing to do.
378 * If gc_refs > 0, it must be in move_unreachable's 'young'
379 * list, and move_unreachable will eventually get to it.
380 * If gc_refs == GC_REACHABLE, it's either in some other
381 * generation so we don't care about it, or move_unreachable
382 * already dealt with it.
383 * If gc_refs == GC_UNTRACKED, it must be ignored.
387 || gc_refs
== GC_REACHABLE
388 || gc_refs
== GC_UNTRACKED
);
394 /* Move the unreachable objects from young to unreachable. After this,
395 * all objects in young have gc_refs = GC_REACHABLE, and all objects in
396 * unreachable have gc_refs = GC_TENTATIVELY_UNREACHABLE. All tracked
397 * gc objects not in young or unreachable still have gc_refs = GC_REACHABLE.
398 * All objects in young after this are directly or indirectly reachable
399 * from outside the original young; and all objects in unreachable are
403 move_unreachable(PyGC_Head
*young
, PyGC_Head
*unreachable
)
405 PyGC_Head
*gc
= young
->gc
.gc_next
;
407 /* Invariants: all objects "to the left" of us in young have gc_refs
408 * = GC_REACHABLE, and are indeed reachable (directly or indirectly)
409 * from outside the young list as it was at entry. All other objects
410 * from the original young "to the left" of us are in unreachable now,
411 * and have gc_refs = GC_TENTATIVELY_UNREACHABLE. All objects to the
412 * left of us in 'young' now have been scanned, and no objects here
413 * or to the right have been scanned yet.
416 while (gc
!= young
) {
419 if (gc
->gc
.gc_refs
) {
420 /* gc is definitely reachable from outside the
421 * original 'young'. Mark it as such, and traverse
422 * its pointers to find any other objects that may
423 * be directly reachable from it. Note that the
424 * call to tp_traverse may append objects to young,
425 * so we have to wait until it returns to determine
426 * the next object to visit.
428 PyObject
*op
= FROM_GC(gc
);
429 traverseproc traverse
= Py_TYPE(op
)->tp_traverse
;
430 assert(gc
->gc
.gc_refs
> 0);
431 gc
->gc
.gc_refs
= GC_REACHABLE
;
433 (visitproc
)visit_reachable
,
435 next
= gc
->gc
.gc_next
;
438 /* This *may* be unreachable. To make progress,
439 * assume it is. gc isn't directly reachable from
440 * any object we've already traversed, but may be
441 * reachable from an object we haven't gotten to yet.
442 * visit_reachable will eventually move gc back into
443 * young if that's so, and we'll see it again.
445 next
= gc
->gc
.gc_next
;
446 gc_list_move(gc
, unreachable
);
447 gc
->gc
.gc_refs
= GC_TENTATIVELY_UNREACHABLE
;
453 /* Return true if object has a finalization method.
454 * CAUTION: An instance of an old-style class has to be checked for a
455 *__del__ method, and earlier versions of this used to call PyObject_HasAttr,
456 * which in turn could call the class's __getattr__ hook (if any). That
457 * could invoke arbitrary Python code, mutating the object graph in arbitrary
458 * ways, and that was the source of some excruciatingly subtle bugs.
461 has_finalizer(PyObject
*op
)
463 if (PyInstance_Check(op
)) {
464 assert(delstr
!= NULL
);
465 return _PyInstance_Lookup(op
, delstr
) != NULL
;
467 else if (PyType_HasFeature(op
->ob_type
, Py_TPFLAGS_HEAPTYPE
))
468 return op
->ob_type
->tp_del
!= NULL
;
469 else if (PyGen_CheckExact(op
))
470 return PyGen_NeedsFinalizing((PyGenObject
*)op
);
475 /* Move the objects in unreachable with __del__ methods into `finalizers`.
476 * Objects moved into `finalizers` have gc_refs set to GC_REACHABLE; the
477 * objects remaining in unreachable are left at GC_TENTATIVELY_UNREACHABLE.
480 move_finalizers(PyGC_Head
*unreachable
, PyGC_Head
*finalizers
)
485 /* March over unreachable. Move objects with finalizers into
488 for (gc
= unreachable
->gc
.gc_next
; gc
!= unreachable
; gc
= next
) {
489 PyObject
*op
= FROM_GC(gc
);
491 assert(IS_TENTATIVELY_UNREACHABLE(op
));
492 next
= gc
->gc
.gc_next
;
494 if (has_finalizer(op
)) {
495 gc_list_move(gc
, finalizers
);
496 gc
->gc
.gc_refs
= GC_REACHABLE
;
501 /* A traversal callback for move_finalizer_reachable. */
503 visit_move(PyObject
*op
, PyGC_Head
*tolist
)
505 if (PyObject_IS_GC(op
)) {
506 if (IS_TENTATIVELY_UNREACHABLE(op
)) {
507 PyGC_Head
*gc
= AS_GC(op
);
508 gc_list_move(gc
, tolist
);
509 gc
->gc
.gc_refs
= GC_REACHABLE
;
515 /* Move objects that are reachable from finalizers, from the unreachable set
516 * into finalizers set.
519 move_finalizer_reachable(PyGC_Head
*finalizers
)
521 traverseproc traverse
;
522 PyGC_Head
*gc
= finalizers
->gc
.gc_next
;
523 for (; gc
!= finalizers
; gc
= gc
->gc
.gc_next
) {
524 /* Note that the finalizers list may grow during this. */
525 traverse
= Py_TYPE(FROM_GC(gc
))->tp_traverse
;
526 (void) traverse(FROM_GC(gc
),
527 (visitproc
)visit_move
,
532 /* Clear all weakrefs to unreachable objects, and if such a weakref has a
533 * callback, invoke it if necessary. Note that it's possible for such
534 * weakrefs to be outside the unreachable set -- indeed, those are precisely
535 * the weakrefs whose callbacks must be invoked. See gc_weakref.txt for
536 * overview & some details. Some weakrefs with callbacks may be reclaimed
537 * directly by this routine; the number reclaimed is the return value. Other
538 * weakrefs with callbacks may be moved into the `old` generation. Objects
539 * moved into `old` have gc_refs set to GC_REACHABLE; the objects remaining in
540 * unreachable are left at GC_TENTATIVELY_UNREACHABLE. When this returns,
541 * no object in `unreachable` is weakly referenced anymore.
544 handle_weakrefs(PyGC_Head
*unreachable
, PyGC_Head
*old
)
547 PyObject
*op
; /* generally FROM_GC(gc) */
548 PyWeakReference
*wr
; /* generally a cast of op */
549 PyGC_Head wrcb_to_call
; /* weakrefs with callbacks to call */
553 gc_list_init(&wrcb_to_call
);
555 /* Clear all weakrefs to the objects in unreachable. If such a weakref
556 * also has a callback, move it into `wrcb_to_call` if the callback
557 * needs to be invoked. Note that we cannot invoke any callbacks until
558 * all weakrefs to unreachable objects are cleared, lest the callback
559 * resurrect an unreachable object via a still-active weakref. We
560 * make another pass over wrcb_to_call, invoking callbacks, after this
563 for (gc
= unreachable
->gc
.gc_next
; gc
!= unreachable
; gc
= next
) {
564 PyWeakReference
**wrlist
;
567 assert(IS_TENTATIVELY_UNREACHABLE(op
));
568 next
= gc
->gc
.gc_next
;
570 if (! PyType_SUPPORTS_WEAKREFS(Py_TYPE(op
)))
573 /* It supports weakrefs. Does it have any? */
574 wrlist
= (PyWeakReference
**)
575 PyObject_GET_WEAKREFS_LISTPTR(op
);
577 /* `op` may have some weakrefs. March over the list, clear
578 * all the weakrefs, and move the weakrefs with callbacks
579 * that must be called into wrcb_to_call.
581 for (wr
= *wrlist
; wr
!= NULL
; wr
= *wrlist
) {
582 PyGC_Head
*wrasgc
; /* AS_GC(wr) */
584 /* _PyWeakref_ClearRef clears the weakref but leaves
585 * the callback pointer intact. Obscure: it also
588 assert(wr
->wr_object
== op
);
589 _PyWeakref_ClearRef(wr
);
590 assert(wr
->wr_object
== Py_None
);
591 if (wr
->wr_callback
== NULL
)
592 continue; /* no callback */
594 /* Headache time. `op` is going away, and is weakly referenced by
595 * `wr`, which has a callback. Should the callback be invoked? If wr
598 * 1. There's no need to call it. The object and the weakref are
599 * both going away, so it's legitimate to pretend the weakref is
600 * going away first. The user has to ensure a weakref outlives its
601 * referent if they want a guarantee that the wr callback will get
604 * 2. It may be catastrophic to call it. If the callback is also in
605 * cyclic trash (CT), then although the CT is unreachable from
606 * outside the current generation, CT may be reachable from the
607 * callback. Then the callback could resurrect insane objects.
609 * Since the callback is never needed and may be unsafe in this case,
610 * wr is simply left in the unreachable set. Note that because we
611 * already called _PyWeakref_ClearRef(wr), its callback will never
614 * OTOH, if wr isn't part of CT, we should invoke the callback: the
615 * weakref outlived the trash. Note that since wr isn't CT in this
616 * case, its callback can't be CT either -- wr acted as an external
617 * root to this generation, and therefore its callback did too. So
618 * nothing in CT is reachable from the callback either, so it's hard
619 * to imagine how calling it later could create a problem for us. wr
620 * is moved to wrcb_to_call in this case.
622 if (IS_TENTATIVELY_UNREACHABLE(wr
))
624 assert(IS_REACHABLE(wr
));
626 /* Create a new reference so that wr can't go away
627 * before we can process it again.
631 /* Move wr to wrcb_to_call, for the next pass. */
633 assert(wrasgc
!= next
); /* wrasgc is reachable, but
634 next isn't, so they can't
636 gc_list_move(wrasgc
, &wrcb_to_call
);
640 /* Invoke the callbacks we decided to honor. It's safe to invoke them
641 * because they can't reference unreachable objects.
643 while (! gc_list_is_empty(&wrcb_to_call
)) {
647 gc
= wrcb_to_call
.gc
.gc_next
;
649 assert(IS_REACHABLE(op
));
650 assert(PyWeakref_Check(op
));
651 wr
= (PyWeakReference
*)op
;
652 callback
= wr
->wr_callback
;
653 assert(callback
!= NULL
);
655 /* copy-paste of weakrefobject.c's handle_callback() */
656 temp
= PyObject_CallFunctionObjArgs(callback
, wr
, NULL
);
658 PyErr_WriteUnraisable(callback
);
662 /* Give up the reference we created in the first pass. When
663 * op's refcount hits 0 (which it may or may not do right now),
664 * op's tp_dealloc will decref op->wr_callback too. Note
665 * that the refcount probably will hit 0 now, and because this
666 * weakref was reachable to begin with, gc didn't already
667 * add it to its count of freed objects. Example: a reachable
668 * weak value dict maps some key to this reachable weakref.
669 * The callback removes this key->weakref mapping from the
670 * dict, leaving no other references to the weakref (excepting
674 if (wrcb_to_call
.gc
.gc_next
== gc
) {
675 /* object is still alive -- move it */
676 gc_list_move(gc
, old
);
686 debug_instance(char *msg
, PyInstanceObject
*inst
)
689 /* simple version of instance_repr */
690 PyObject
*classname
= inst
->in_class
->cl_name
;
691 if (classname
!= NULL
&& PyString_Check(classname
))
692 cname
= PyString_AsString(classname
);
695 PySys_WriteStderr("gc: %.100s <%.100s instance at %p>\n",
700 debug_cycle(char *msg
, PyObject
*op
)
702 if ((debug
& DEBUG_INSTANCES
) && PyInstance_Check(op
)) {
703 debug_instance(msg
, (PyInstanceObject
*)op
);
705 else if (debug
& DEBUG_OBJECTS
) {
706 PySys_WriteStderr("gc: %.100s <%.100s %p>\n",
707 msg
, Py_TYPE(op
)->tp_name
, op
);
711 /* Handle uncollectable garbage (cycles with finalizers, and stuff reachable
712 * only from such cycles).
713 * If DEBUG_SAVEALL, all objects in finalizers are appended to the module
714 * garbage list (a Python list), else only the objects in finalizers with
715 * __del__ methods are appended to garbage. All objects in finalizers are
716 * merged into the old list regardless.
717 * Returns 0 if all OK, <0 on error (out of memory to grow the garbage list).
718 * The finalizers list is made empty on a successful return.
721 handle_finalizers(PyGC_Head
*finalizers
, PyGC_Head
*old
)
723 PyGC_Head
*gc
= finalizers
->gc
.gc_next
;
725 if (garbage
== NULL
) {
726 garbage
= PyList_New(0);
728 Py_FatalError("gc couldn't create gc.garbage list");
730 for (; gc
!= finalizers
; gc
= gc
->gc
.gc_next
) {
731 PyObject
*op
= FROM_GC(gc
);
733 if ((debug
& DEBUG_SAVEALL
) || has_finalizer(op
)) {
734 if (PyList_Append(garbage
, op
) < 0)
739 gc_list_merge(finalizers
, old
);
743 /* Break reference cycles by clearing the containers involved. This is
744 * tricky business as the lists can be changing and we don't know which
745 * objects may be freed. It is possible I screwed something up here.
748 delete_garbage(PyGC_Head
*collectable
, PyGC_Head
*old
)
752 while (!gc_list_is_empty(collectable
)) {
753 PyGC_Head
*gc
= collectable
->gc
.gc_next
;
754 PyObject
*op
= FROM_GC(gc
);
756 assert(IS_TENTATIVELY_UNREACHABLE(op
));
757 if (debug
& DEBUG_SAVEALL
) {
758 PyList_Append(garbage
, op
);
761 if ((clear
= Py_TYPE(op
)->tp_clear
) != NULL
) {
767 if (collectable
->gc
.gc_next
== gc
) {
768 /* object is still alive, move it, it may die later */
769 gc_list_move(gc
, old
);
770 gc
->gc
.gc_refs
= GC_REACHABLE
;
775 /* Clear all free lists
776 * All free lists are cleared during the collection of the highest generation.
777 * Allocated items in the free list may keep a pymalloc arena occupied.
778 * Clearing the free lists may give back memory to the OS earlier.
781 clear_freelists(void)
783 (void)PyMethod_ClearFreeList();
784 (void)PyFrame_ClearFreeList();
785 (void)PyCFunction_ClearFreeList();
786 (void)PyTuple_ClearFreeList();
787 (void)PyUnicode_ClearFreeList();
788 (void)PyInt_ClearFreeList();
789 (void)PyFloat_ClearFreeList();
797 PyObject
*f
= PyObject_CallMethod(tmod
, "time", NULL
);
802 if (PyFloat_Check(f
))
803 result
= PyFloat_AsDouble(f
);
810 /* This is the main function. Read this to understand how the
811 * collection process works. */
813 collect(int generation
)
816 Py_ssize_t m
= 0; /* # objects collected */
817 Py_ssize_t n
= 0; /* # unreachable objects that couldn't be collected */
818 PyGC_Head
*young
; /* the generation we are examining */
819 PyGC_Head
*old
; /* next older generation */
820 PyGC_Head unreachable
; /* non-problematic unreachable trash */
821 PyGC_Head finalizers
; /* objects with, & reachable from, __del__ */
825 if (delstr
== NULL
) {
826 delstr
= PyString_InternFromString("__del__");
828 Py_FatalError("gc couldn't allocate \"__del__\"");
831 if (debug
& DEBUG_STATS
) {
833 PySys_WriteStderr("gc: collecting generation %d...\n",
835 PySys_WriteStderr("gc: objects in each generation:");
836 for (i
= 0; i
< NUM_GENERATIONS
; i
++)
837 PySys_WriteStderr(" %" PY_FORMAT_SIZE_T
"d",
838 gc_list_size(GEN_HEAD(i
)));
839 PySys_WriteStderr("\n");
842 /* update collection and allocation counters */
843 if (generation
+1 < NUM_GENERATIONS
)
844 generations
[generation
+1].count
+= 1;
845 for (i
= 0; i
<= generation
; i
++)
846 generations
[i
].count
= 0;
848 /* merge younger generations with one we are currently collecting */
849 for (i
= 0; i
< generation
; i
++) {
850 gc_list_merge(GEN_HEAD(i
), GEN_HEAD(generation
));
853 /* handy references */
854 young
= GEN_HEAD(generation
);
855 if (generation
< NUM_GENERATIONS
-1)
856 old
= GEN_HEAD(generation
+1);
860 /* Using ob_refcnt and gc_refs, calculate which objects in the
861 * container set are reachable from outside the set (i.e., have a
862 * refcount greater than 0 when all the references within the
863 * set are taken into account).
866 subtract_refs(young
);
868 /* Leave everything reachable from outside young in young, and move
869 * everything else (in young) to unreachable.
870 * NOTE: This used to move the reachable objects into a reachable
871 * set instead. But most things usually turn out to be reachable,
872 * so it's more efficient to move the unreachable things.
874 gc_list_init(&unreachable
);
875 move_unreachable(young
, &unreachable
);
877 /* Move reachable objects to next generation. */
879 if (generation
== NUM_GENERATIONS
- 2) {
880 long_lived_pending
+= gc_list_size(young
);
882 gc_list_merge(young
, old
);
885 long_lived_pending
= 0;
886 long_lived_total
= gc_list_size(young
);
889 /* All objects in unreachable are trash, but objects reachable from
890 * finalizers can't safely be deleted. Python programmers should take
891 * care not to create such things. For Python, finalizers means
892 * instance objects with __del__ methods. Weakrefs with callbacks
893 * can also call arbitrary Python code but they will be dealt with by
896 gc_list_init(&finalizers
);
897 move_finalizers(&unreachable
, &finalizers
);
898 /* finalizers contains the unreachable objects with a finalizer;
899 * unreachable objects reachable *from* those are also uncollectable,
900 * and we move those into the finalizers list too.
902 move_finalizer_reachable(&finalizers
);
904 /* Collect statistics on collectable objects found and print
905 * debugging information.
907 for (gc
= unreachable
.gc
.gc_next
; gc
!= &unreachable
;
908 gc
= gc
->gc
.gc_next
) {
910 if (debug
& DEBUG_COLLECTABLE
) {
911 debug_cycle("collectable", FROM_GC(gc
));
915 /* Clear weakrefs and invoke callbacks as necessary. */
916 m
+= handle_weakrefs(&unreachable
, old
);
918 /* Call tp_clear on objects in the unreachable set. This will cause
919 * the reference cycles to be broken. It may also cause some objects
920 * in finalizers to be freed.
922 delete_garbage(&unreachable
, old
);
924 /* Collect statistics on uncollectable objects found and print
925 * debugging information. */
926 for (gc
= finalizers
.gc
.gc_next
;
928 gc
= gc
->gc
.gc_next
) {
930 if (debug
& DEBUG_UNCOLLECTABLE
)
931 debug_cycle("uncollectable", FROM_GC(gc
));
933 if (debug
& DEBUG_STATS
) {
934 double t2
= get_time();
935 if (m
== 0 && n
== 0)
936 PySys_WriteStderr("gc: done");
940 "%" PY_FORMAT_SIZE_T
"d unreachable, "
941 "%" PY_FORMAT_SIZE_T
"d uncollectable",
944 PySys_WriteStderr(", %.4fs elapsed", t2
-t1
);
946 PySys_WriteStderr(".\n");
949 /* Append instances in the uncollectable set to a Python
950 * reachable list of garbage. The programmer has to deal with
951 * this if they insist on creating this type of structure.
953 (void)handle_finalizers(&finalizers
, old
);
955 /* Clear free list only during the collection of the higest
957 if (generation
== NUM_GENERATIONS
-1) {
961 if (PyErr_Occurred()) {
963 gc_str
= PyString_FromString("garbage collection");
964 PyErr_WriteUnraisable(gc_str
);
965 Py_FatalError("unexpected exception during garbage collection");
971 collect_generations(void)
976 /* Find the oldest generation (higest numbered) where the count
977 * exceeds the threshold. Objects in the that generation and
978 * generations younger than it will be collected. */
979 for (i
= NUM_GENERATIONS
-1; i
>= 0; i
--) {
980 if (generations
[i
].count
> generations
[i
].threshold
) {
981 /* Avoid quadratic performance degradation in number
982 of tracked objects. See comments at the beginning
983 of this file, and issue #4074.
985 if (i
== NUM_GENERATIONS
- 1
986 && long_lived_pending
< long_lived_total
/ 4)
995 PyDoc_STRVAR(gc_enable__doc__
,
998 "Enable automatic garbage collection.\n");
1001 gc_enable(PyObject
*self
, PyObject
*noargs
)
1008 PyDoc_STRVAR(gc_disable__doc__
,
1009 "disable() -> None\n"
1011 "Disable automatic garbage collection.\n");
1014 gc_disable(PyObject
*self
, PyObject
*noargs
)
1021 PyDoc_STRVAR(gc_isenabled__doc__
,
1022 "isenabled() -> status\n"
1024 "Returns true if automatic garbage collection is enabled.\n");
1027 gc_isenabled(PyObject
*self
, PyObject
*noargs
)
1029 return PyBool_FromLong((long)enabled
);
1032 PyDoc_STRVAR(gc_collect__doc__
,
1033 "collect([generation]) -> n\n"
1035 "With no arguments, run a full collection. The optional argument\n"
1036 "may be an integer specifying which generation to collect. A ValueError\n"
1037 "is raised if the generation number is invalid.\n\n"
1038 "The number of unreachable objects is returned.\n");
1041 gc_collect(PyObject
*self
, PyObject
*args
, PyObject
*kws
)
1043 static char *keywords
[] = {"generation", NULL
};
1044 int genarg
= NUM_GENERATIONS
- 1;
1047 if (!PyArg_ParseTupleAndKeywords(args
, kws
, "|i", keywords
, &genarg
))
1050 else if (genarg
< 0 || genarg
>= NUM_GENERATIONS
) {
1051 PyErr_SetString(PyExc_ValueError
, "invalid generation");
1056 n
= 0; /* already collecting, don't do anything */
1059 n
= collect(genarg
);
1063 return PyInt_FromSsize_t(n
);
1066 PyDoc_STRVAR(gc_set_debug__doc__
,
1067 "set_debug(flags) -> None\n"
1069 "Set the garbage collection debugging flags. Debugging information is\n"
1070 "written to sys.stderr.\n"
1072 "flags is an integer and can have the following bits turned on:\n"
1074 " DEBUG_STATS - Print statistics during collection.\n"
1075 " DEBUG_COLLECTABLE - Print collectable objects found.\n"
1076 " DEBUG_UNCOLLECTABLE - Print unreachable but uncollectable objects found.\n"
1077 " DEBUG_INSTANCES - Print instance objects.\n"
1078 " DEBUG_OBJECTS - Print objects other than instances.\n"
1079 " DEBUG_SAVEALL - Save objects to gc.garbage rather than freeing them.\n"
1080 " DEBUG_LEAK - Debug leaking programs (everything but STATS).\n");
1083 gc_set_debug(PyObject
*self
, PyObject
*args
)
1085 if (!PyArg_ParseTuple(args
, "i:set_debug", &debug
))
1092 PyDoc_STRVAR(gc_get_debug__doc__
,
1093 "get_debug() -> flags\n"
1095 "Get the garbage collection debugging flags.\n");
1098 gc_get_debug(PyObject
*self
, PyObject
*noargs
)
1100 return Py_BuildValue("i", debug
);
1103 PyDoc_STRVAR(gc_set_thresh__doc__
,
1104 "set_threshold(threshold0, [threshold1, threshold2]) -> None\n"
1106 "Sets the collection thresholds. Setting threshold0 to zero disables\n"
1110 gc_set_thresh(PyObject
*self
, PyObject
*args
)
1113 if (!PyArg_ParseTuple(args
, "i|ii:set_threshold",
1114 &generations
[0].threshold
,
1115 &generations
[1].threshold
,
1116 &generations
[2].threshold
))
1118 for (i
= 2; i
< NUM_GENERATIONS
; i
++) {
1119 /* generations higher than 2 get the same threshold */
1120 generations
[i
].threshold
= generations
[2].threshold
;
1127 PyDoc_STRVAR(gc_get_thresh__doc__
,
1128 "get_threshold() -> (threshold0, threshold1, threshold2)\n"
1130 "Return the current collection thresholds\n");
1133 gc_get_thresh(PyObject
*self
, PyObject
*noargs
)
1135 return Py_BuildValue("(iii)",
1136 generations
[0].threshold
,
1137 generations
[1].threshold
,
1138 generations
[2].threshold
);
1141 PyDoc_STRVAR(gc_get_count__doc__
,
1142 "get_count() -> (count0, count1, count2)\n"
1144 "Return the current collection counts\n");
1147 gc_get_count(PyObject
*self
, PyObject
*noargs
)
1149 return Py_BuildValue("(iii)",
1150 generations
[0].count
,
1151 generations
[1].count
,
1152 generations
[2].count
);
1156 referrersvisit(PyObject
* obj
, PyObject
*objs
)
1159 for (i
= 0; i
< PyTuple_GET_SIZE(objs
); i
++)
1160 if (PyTuple_GET_ITEM(objs
, i
) == obj
)
1166 gc_referrers_for(PyObject
*objs
, PyGC_Head
*list
, PyObject
*resultlist
)
1170 traverseproc traverse
;
1171 for (gc
= list
->gc
.gc_next
; gc
!= list
; gc
= gc
->gc
.gc_next
) {
1173 traverse
= Py_TYPE(obj
)->tp_traverse
;
1174 if (obj
== objs
|| obj
== resultlist
)
1176 if (traverse(obj
, (visitproc
)referrersvisit
, objs
)) {
1177 if (PyList_Append(resultlist
, obj
) < 0)
1178 return 0; /* error */
1181 return 1; /* no error */
1184 PyDoc_STRVAR(gc_get_referrers__doc__
,
1185 "get_referrers(*objs) -> list\n\
1186 Return the list of objects that directly refer to any of objs.");
1189 gc_get_referrers(PyObject
*self
, PyObject
*args
)
1192 PyObject
*result
= PyList_New(0);
1193 if (!result
) return NULL
;
1195 for (i
= 0; i
< NUM_GENERATIONS
; i
++) {
1196 if (!(gc_referrers_for(args
, GEN_HEAD(i
), result
))) {
1204 /* Append obj to list; return true if error (out of memory), false if OK. */
1206 referentsvisit(PyObject
*obj
, PyObject
*list
)
1208 return PyList_Append(list
, obj
) < 0;
1211 PyDoc_STRVAR(gc_get_referents__doc__
,
1212 "get_referents(*objs) -> list\n\
1213 Return the list of objects that are directly referred to by objs.");
1216 gc_get_referents(PyObject
*self
, PyObject
*args
)
1219 PyObject
*result
= PyList_New(0);
1224 for (i
= 0; i
< PyTuple_GET_SIZE(args
); i
++) {
1225 traverseproc traverse
;
1226 PyObject
*obj
= PyTuple_GET_ITEM(args
, i
);
1228 if (! PyObject_IS_GC(obj
))
1230 traverse
= Py_TYPE(obj
)->tp_traverse
;
1233 if (traverse(obj
, (visitproc
)referentsvisit
, result
)) {
1241 PyDoc_STRVAR(gc_get_objects__doc__
,
1242 "get_objects() -> [...]\n"
1244 "Return a list of objects tracked by the collector (excluding the list\n"
1248 gc_get_objects(PyObject
*self
, PyObject
*noargs
)
1253 result
= PyList_New(0);
1256 for (i
= 0; i
< NUM_GENERATIONS
; i
++) {
1257 if (append_objects(result
, GEN_HEAD(i
))) {
1266 PyDoc_STRVAR(gc__doc__
,
1267 "This module provides access to the garbage collector for reference cycles.\n"
1269 "enable() -- Enable automatic garbage collection.\n"
1270 "disable() -- Disable automatic garbage collection.\n"
1271 "isenabled() -- Returns true if automatic collection is enabled.\n"
1272 "collect() -- Do a full collection right now.\n"
1273 "get_count() -- Return the current collection counts.\n"
1274 "set_debug() -- Set debugging flags.\n"
1275 "get_debug() -- Get debugging flags.\n"
1276 "set_threshold() -- Set the collection thresholds.\n"
1277 "get_threshold() -- Return the current the collection thresholds.\n"
1278 "get_objects() -- Return a list of all objects tracked by the collector.\n"
1279 "get_referrers() -- Return the list of objects that refer to an object.\n"
1280 "get_referents() -- Return the list of objects that an object refers to.\n");
1282 static PyMethodDef GcMethods
[] = {
1283 {"enable", gc_enable
, METH_NOARGS
, gc_enable__doc__
},
1284 {"disable", gc_disable
, METH_NOARGS
, gc_disable__doc__
},
1285 {"isenabled", gc_isenabled
, METH_NOARGS
, gc_isenabled__doc__
},
1286 {"set_debug", gc_set_debug
, METH_VARARGS
, gc_set_debug__doc__
},
1287 {"get_debug", gc_get_debug
, METH_NOARGS
, gc_get_debug__doc__
},
1288 {"get_count", gc_get_count
, METH_NOARGS
, gc_get_count__doc__
},
1289 {"set_threshold", gc_set_thresh
, METH_VARARGS
, gc_set_thresh__doc__
},
1290 {"get_threshold", gc_get_thresh
, METH_NOARGS
, gc_get_thresh__doc__
},
1291 {"collect", (PyCFunction
)gc_collect
,
1292 METH_VARARGS
| METH_KEYWORDS
, gc_collect__doc__
},
1293 {"get_objects", gc_get_objects
,METH_NOARGS
, gc_get_objects__doc__
},
1294 {"get_referrers", gc_get_referrers
, METH_VARARGS
,
1295 gc_get_referrers__doc__
},
1296 {"get_referents", gc_get_referents
, METH_VARARGS
,
1297 gc_get_referents__doc__
},
1298 {NULL
, NULL
} /* Sentinel */
1306 m
= Py_InitModule4("gc",
1310 PYTHON_API_VERSION
);
1314 if (garbage
== NULL
) {
1315 garbage
= PyList_New(0);
1316 if (garbage
== NULL
)
1320 if (PyModule_AddObject(m
, "garbage", garbage
) < 0)
1323 /* Importing can't be done in collect() because collect()
1324 * can be called via PyGC_Collect() in Py_Finalize().
1325 * This wouldn't be a problem, except that <initialized> is
1326 * reset to 0 before calling collect which trips up
1327 * the import and triggers an assertion.
1330 tmod
= PyImport_ImportModuleNoBlock("time");
1335 #define ADD_INT(NAME) if (PyModule_AddIntConstant(m, #NAME, NAME) < 0) return
1336 ADD_INT(DEBUG_STATS
);
1337 ADD_INT(DEBUG_COLLECTABLE
);
1338 ADD_INT(DEBUG_UNCOLLECTABLE
);
1339 ADD_INT(DEBUG_INSTANCES
);
1340 ADD_INT(DEBUG_OBJECTS
);
1341 ADD_INT(DEBUG_SAVEALL
);
1342 ADD_INT(DEBUG_LEAK
);
1346 /* API to invoke gc.collect() from C */
1353 n
= 0; /* already collecting, don't do anything */
1356 n
= collect(NUM_GENERATIONS
- 1);
1365 _PyGC_Dump(PyGC_Head
*g
)
1367 _PyObject_Dump(FROM_GC(g
));
1370 /* extension modules might be compiled with GC support so these
1371 functions must always be available */
1373 #undef PyObject_GC_Track
1374 #undef PyObject_GC_UnTrack
1375 #undef PyObject_GC_Del
1376 #undef _PyObject_GC_Malloc
1379 PyObject_GC_Track(void *op
)
1381 _PyObject_GC_TRACK(op
);
1384 /* for binary compatibility with 2.2 */
1386 _PyObject_GC_Track(PyObject
*op
)
1388 PyObject_GC_Track(op
);
1392 PyObject_GC_UnTrack(void *op
)
1394 /* Obscure: the Py_TRASHCAN mechanism requires that we be able to
1395 * call PyObject_GC_UnTrack twice on an object.
1398 _PyObject_GC_UNTRACK(op
);
1401 /* for binary compatibility with 2.2 */
1403 _PyObject_GC_UnTrack(PyObject
*op
)
1405 PyObject_GC_UnTrack(op
);
1409 _PyObject_GC_Malloc(size_t basicsize
)
1413 if (basicsize
> PY_SSIZE_T_MAX
- sizeof(PyGC_Head
))
1414 return PyErr_NoMemory();
1415 g
= (PyGC_Head
*)PyObject_MALLOC(
1416 sizeof(PyGC_Head
) + basicsize
);
1418 return PyErr_NoMemory();
1419 g
->gc
.gc_refs
= GC_UNTRACKED
;
1420 generations
[0].count
++; /* number of allocated GC objects */
1421 if (generations
[0].count
> generations
[0].threshold
&&
1423 generations
[0].threshold
&&
1425 !PyErr_Occurred()) {
1427 collect_generations();
1435 _PyObject_GC_New(PyTypeObject
*tp
)
1437 PyObject
*op
= _PyObject_GC_Malloc(_PyObject_SIZE(tp
));
1439 op
= PyObject_INIT(op
, tp
);
1444 _PyObject_GC_NewVar(PyTypeObject
*tp
, Py_ssize_t nitems
)
1446 const size_t size
= _PyObject_VAR_SIZE(tp
, nitems
);
1447 PyVarObject
*op
= (PyVarObject
*) _PyObject_GC_Malloc(size
);
1449 op
= PyObject_INIT_VAR(op
, tp
, nitems
);
1454 _PyObject_GC_Resize(PyVarObject
*op
, Py_ssize_t nitems
)
1456 const size_t basicsize
= _PyObject_VAR_SIZE(Py_TYPE(op
), nitems
);
1457 PyGC_Head
*g
= AS_GC(op
);
1458 if (basicsize
> PY_SSIZE_T_MAX
- sizeof(PyGC_Head
))
1459 return (PyVarObject
*)PyErr_NoMemory();
1460 g
= (PyGC_Head
*)PyObject_REALLOC(g
, sizeof(PyGC_Head
) + basicsize
);
1462 return (PyVarObject
*)PyErr_NoMemory();
1463 op
= (PyVarObject
*) FROM_GC(g
);
1464 Py_SIZE(op
) = nitems
;
1469 PyObject_GC_Del(void *op
)
1471 PyGC_Head
*g
= AS_GC(op
);
1474 if (generations
[0].count
> 0) {
1475 generations
[0].count
--;
1480 /* for binary compatibility with 2.2 */
1481 #undef _PyObject_GC_Del
1483 _PyObject_GC_Del(PyObject
*op
)
1485 PyObject_GC_Del(op
);