sparc: unhack the test runner
[sbcl.git] / src / runtime / gencgc.c
blob1637d94e417249994a5a75471b3db95d41f62e5c
1 /*
2 * GENerational Conservative Garbage Collector for SBCL
3 */
5 /*
6 * This software is part of the SBCL system. See the README file for
7 * more information.
9 * This software is derived from the CMU CL system, which was
10 * written at Carnegie Mellon University and released into the
11 * public domain. The software is in the public domain and is
12 * provided with absolutely no warranty. See the COPYING and CREDITS
13 * files for more information.
17 * For a review of garbage collection techniques (e.g. generational
18 * GC) and terminology (e.g. "scavenging") see Paul R. Wilson,
19 * "Uniprocessor Garbage Collection Techniques" available at
20 * <https://www.cs.rice.edu/~javaplt/311/Readings/wilson92uniprocessor.pdf>
21 * or
22 * <ftp://ftp.cs.utexas.edu/pub/garbage/bigsurv.ps>.
25 #include <stdlib.h>
26 #include <stdio.h>
27 #include <errno.h>
28 #include <string.h>
29 #include <inttypes.h>
30 #include "genesis/sbcl.h"
31 #include "runtime.h"
32 #include "os.h"
33 #include "interr.h"
34 #include "globals.h"
35 #include "interrupt.h"
36 #include "validate.h"
37 #include "lispregs.h"
38 #include "arch.h"
39 #include "gc.h"
40 #include "thread.h"
41 #include "pseudo-atomic.h"
42 #include "code.h"
43 #include "genesis/gc-tables.h"
44 #include "genesis/vector.h"
45 #include "genesis/weak-pointer.h"
46 #include "genesis/symbol.h"
47 #include "genesis/hash-table.h"
48 #include "genesis/instance.h"
49 #include "hopscotch.h"
50 #include "genesis/cons.h"
51 #include "genesis/brothertree.h"
52 #include "genesis/split-ordered-list.h"
53 #include "var-io.h"
55 /* forward declarations */
56 extern FILE *gc_activitylog();
58 /* Largest allocation seen since last GC. */
59 os_vm_size_t large_allocation = 0;
60 int n_lisp_gcs;
64 * debugging
67 /* the verbosity level. All non-error messages are disabled at level 0;
68 * and only a few rare messages are printed at level 1. */
69 int gencgc_verbose = 0;
72 * GC structures and variables
75 /* the total bytes allocated. These are seen by Lisp DYNAMIC-USAGE. */
76 os_vm_size_t bytes_allocated = 0;
77 os_vm_size_t auto_gc_trigger = 0;
79 /* the source and destination generations. These are set before a GC starts
80 * scavenging. */
81 generation_index_t from_space;
82 generation_index_t new_space;
84 /* Set to 1 when in GC */
85 bool gc_active_p = 0;
87 /* should the GC be conservative on stack. If false (only right before
88 * saving a core), don't scan the stack / mark pages pinned. */
89 bool conservative_stack = 1;
90 int save_lisp_gc_iteration;
92 /* An array of page structures is allocated on gc initialization.
93 * This helps to quickly map between an address and its page structure.
94 * page_table_pages is set from the size of the dynamic space. */
95 page_index_t page_table_pages;
96 struct page *page_table;
97 unsigned char *gc_page_pins;
98 unsigned char *gc_card_mark;
99 // Filtered pins include code but not simple-funs,
100 // and must not include invalid pointers.
101 static lispobj* gc_filtered_pins;
102 static int pins_alloc_size;
103 static int gc_pin_count;
104 struct hopscotch_table pinned_objects;
106 /* This is always 0 except during gc_and_save() */
107 lispobj lisp_init_function;
109 static inline bool boxed_type_p(int type) { return type > 1; }
110 static inline bool page_boxed_p(page_index_t page) {
111 // ignore SINGLE_OBJECT_FLAG and OPEN_REGION_PAGE_FLAG
112 return boxed_type_p(page_table[page].type & PAGE_TYPE_MASK);
115 #ifndef LISP_FEATURE_SOFT_CARD_MARKS
116 static inline bool protect_page_p(page_index_t page, generation_index_t generation) {
117 return (page_boxed_p(page)
118 && !(page_table[page].type & OPEN_REGION_PAGE_FLAG)
119 && (page_words_used(page) != 0)
120 && !gc_page_pins[page]
121 && (page_table[page].gen == generation));
123 #endif
125 /* Calculate the start address for the given page number. */
126 inline char *page_address(page_index_t page_num)
128 return (void*)(DYNAMIC_SPACE_START + (page_num * GENCGC_PAGE_BYTES));
131 /* Calculate the address where the allocation region associated with
132 * the page starts. */
133 static inline void *
134 page_scan_start(page_index_t page_index)
136 return page_address(page_index)-page_scan_start_offset(page_index);
139 /* We maintain the invariant that pages with FREE_PAGE_FLAG have
140 * scan_start of zero, to optimize page_ends_contiguous_block_p().
141 * Clear all the flags that don't pertain to a free page.
142 * Particularly the 'need_zerofill' bit MUST remain as-is */
143 void reset_page_flags(page_index_t page) {
144 page_table[page].scan_start_offset_ = 0;
145 set_page_type(page_table[page], FREE_PAGE_FLAG);
146 gc_page_pins[page] = 0;
147 // Why can't the 'gen' get cleared? It caused failures. THIS MAKES NO SENSE!!!
148 // page_table[page].gen = 0;
149 // Free pages are dirty (MARKED) because MARKED is equivalent
150 // to not-write-protected, which is what you want for allocation.
151 assign_page_card_marks(page, CARD_MARKED);
154 #include "genesis/cardmarks.h"
155 #ifdef LISP_FEATURE_SOFT_CARD_MARKS
156 int page_cards_all_marked_nonsticky(page_index_t page) {
157 return cardseq_all_marked_nonsticky(page_to_card_index(page));
159 #endif
161 /// External function for calling from Lisp.
162 page_index_t ext_find_page_index(void *addr) { return find_page_index(addr); }
164 /* an array of generation structures. There needs to be one more
165 * generation structure than actual generations as the oldest
166 * generation is temporarily raised then lowered. */
167 struct generation generations[NUM_GENERATIONS];
169 /* the oldest generation that is will currently be GCed by default.
170 * Valid values are: 0, 1, ... HIGHEST_NORMAL_GENERATION
172 * The default of HIGHEST_NORMAL_GENERATION enables GC on all generations.
174 * Setting this to 0 effectively disables the generational nature of
175 * the GC. In some applications generational GC may not be useful
176 * because there are no long-lived objects.
178 * An intermediate value could be handy after moving long-lived data
179 * into an older generation so an unnecessary GC of this long-lived
180 * data can be avoided. */
181 generation_index_t gencgc_oldest_gen_to_gc = HIGHEST_NORMAL_GENERATION;
183 page_index_t next_free_page; // upper (exclusive) bound on used page range
185 #ifdef LISP_FEATURE_SB_THREAD
186 /* This lock is to prevent multiple threads from simultaneously
187 * allocating new regions which overlap each other. Note that the
188 * majority of GC is single-threaded, but alloc() may be called from
189 * >1 thread at a time and must be thread-safe. This lock must be
190 * seized before all accesses to generations[] or to parts of
191 * page_table[] that other threads may want to see */
192 #ifdef LISP_FEATURE_WIN32
193 static CRITICAL_SECTION free_pages_lock;
194 #else
195 static pthread_mutex_t free_pages_lock = PTHREAD_MUTEX_INITIALIZER;
196 #endif
197 #endif
199 void acquire_gc_page_table_lock() { ignore_value(mutex_acquire(&free_pages_lock)); }
200 void release_gc_page_table_lock() { ignore_value(mutex_release(&free_pages_lock)); }
202 extern os_vm_size_t gencgc_alloc_granularity;
203 os_vm_size_t gencgc_alloc_granularity = GENCGC_ALLOC_GRANULARITY;
207 * miscellaneous heap functions
210 static void show_pinnedobj_count()
212 page_index_t page;
213 int nwords = 0;
214 int n_pinned_largeobj = 0;
215 for (page = 0; page < next_free_page; ++page) {
216 if (page_table[page].gen == from_space && gc_page_pins[page]
217 && page_single_obj_p(page)) {
218 nwords += page_words_used(page);
219 if (page_starts_contiguous_block_p(page))
220 ++n_pinned_largeobj;
223 fprintf(stderr,
224 "/pinned objects(g%d): large=%d (%d words), small=%d\n",
225 from_space, n_pinned_largeobj, nwords, pinned_objects.count);
228 /* Work through the pages and add up the number of bytes used for the
229 * given generation. */
230 static __attribute__((unused)) os_vm_size_t
231 count_generation_bytes_allocated (generation_index_t gen)
233 page_index_t i;
234 os_vm_size_t result = 0;
235 for (i = 0; i < next_free_page; i++) {
236 if (!page_free_p(i) && page_table[i].gen == gen)
237 result += page_words_used(i);
239 return result*N_WORD_BYTES;
243 /* The generation currently being allocated to. */
244 static generation_index_t gc_alloc_generation;
246 __attribute__((unused)) static const char * const page_type_description[8] =
247 {0, "unboxed", "boxed", "mixed", "sm_mix", "cons", "?", "code"};
250 * To support quick and inline allocation, regions of memory can be
251 * allocated and then allocated from with just a free pointer and a
252 * check against an end address.
254 * Since objects can be allocated to spaces with different properties
255 * e.g. boxed/unboxed, generation, ages; there may need to be many
256 * allocation regions.
258 * Each allocation region may start within a partly used page. Many
259 * features of memory use are noted on a page wise basis, e.g. the
260 * generation; so if a region starts within an existing allocated page
261 * it must be consistent with this page.
263 * During the scavenging of the newspace, objects will be transported
264 * into an allocation region, and pointers updated to point to this
265 * allocation region. It is possible that these pointers will be
266 * scavenged again before the allocation region is closed, e.g. due to
267 * trans_list which jumps all over the place to cleanup the list. It
268 * is important to be able to determine properties of all objects
269 * pointed to when scavenging, e.g to detect pointers to the oldspace.
270 * Thus it's important that the allocation regions have the correct
271 * properties set when allocated, and not just set when closed. The
272 * region allocation routines return regions with the specified
273 * properties, and grab all the pages, setting their properties
274 * appropriately, except that the amount used is not known.
276 * These regions are used to support quicker allocation using just a
277 * free pointer. The actual space used by the region is not reflected
278 * in the pages tables until it is closed. It can't be scavenged until
279 * closed.
281 * When finished with the region it should be closed, which will
282 * update the page tables for the actual space used returning unused
283 * space. Further it may be noted in the new regions which is
284 * necessary when scavenging the newspace.
286 * Large objects may be allocated directly without an allocation
287 * region, the page table is updated immediately.
289 * Unboxed objects don't contain pointers to other objects and so
290 * don't need scavenging. Further they can't contain pointers to
291 * younger generations so WP is not needed. By allocating pages to
292 * unboxed objects the whole page never needs scavenging or
293 * write-protecting. */
295 /* We use five regions for the current newspace generation. */
296 struct alloc_region gc_alloc_region[6];
298 static page_index_t
299 alloc_start_pages[8], // one for each value of PAGE_TYPE_x
300 max_alloc_start_page; // the largest of any array element
301 page_index_t gencgc_alloc_start_page; // initializer for the preceding array
303 /* Each 'start_page' informs the region-opening logic where it should
304 * attempt to continue allocating after closing a region associated
305 * with a particular page type. We aren't very clever about this -
306 * either the start_page has space remaining or it doesn't, and when it
307 * doesn't, then we should hop over *all* allocated pages regardless of
308 * type that intercede between the page we couldn't use up to next_free_page.
309 * It's kind of dumb that there is one start_page per type,
310 * other than it serves its purpose for picking up where it left off
311 * on a partially full page during GC */
312 #define RESET_ALLOC_START_PAGES() \
313 alloc_start_pages[0] = gencgc_alloc_start_page; \
314 alloc_start_pages[1] = gencgc_alloc_start_page; \
315 alloc_start_pages[2] = gencgc_alloc_start_page; \
316 alloc_start_pages[3] = gencgc_alloc_start_page; \
317 alloc_start_pages[4] = gencgc_alloc_start_page; \
318 alloc_start_pages[5] = gencgc_alloc_start_page; \
319 alloc_start_pages[6] = gencgc_alloc_start_page; \
320 alloc_start_pages[7] = gencgc_alloc_start_page; \
321 max_alloc_start_page = gencgc_alloc_start_page;
323 static page_index_t
324 get_alloc_start_page(unsigned int page_type)
326 if (page_type > 7) lose("bad page_type: %d", page_type);
327 struct thread* th = get_sb_vm_thread();
328 page_index_t global_start = alloc_start_pages[page_type];
329 page_index_t hint;
330 switch (page_type) {
331 case PAGE_TYPE_MIXED:
332 if ((hint = thread_extra_data(th)->mixed_page_hint) > 0 && hint <= global_start) {
333 thread_extra_data(th)->mixed_page_hint = - 1;
334 return hint;
336 break;
337 case PAGE_TYPE_CONS:
338 if ((hint = thread_extra_data(th)->cons_page_hint) > 0 && hint <= global_start) {
339 thread_extra_data(th)->cons_page_hint = - 1;
340 return hint;
342 break;
344 return global_start;
347 static inline void
348 set_alloc_start_page(unsigned int page_type, page_index_t page)
350 if (page_type > 7) lose("bad page_type: %d", page_type);
351 if (page > max_alloc_start_page) max_alloc_start_page = page;
352 alloc_start_pages[page_type] = page;
354 #include "private-cons.inc"
356 /* Find a new region with room for at least the given number of bytes.
358 * It starts looking at the current generation's alloc_start_page. So
359 * may pick up from the previous region if there is enough space. This
360 * keeps the allocation contiguous when scavenging the newspace.
362 * The alloc_region should have been closed by a call to
363 * gc_close_region(), and will thus be in an empty state.
365 * To assist the scavenging functions write-protected pages are not
366 * used. Free pages should not be write-protected.
368 * It is critical to the conservative GC that the start of regions be
369 * known. To help achieve this only small regions are allocated at a
370 * time.
372 * During scavenging, pointers may be found to within the current
373 * region and the page generation must be set so that pointers to the
374 * from space can be recognized. Therefore the generation of pages in
375 * the region are set to gc_alloc_generation. To prevent another
376 * allocation call using the same pages, all the pages in the region
377 * are allocated, although they will initially be empty.
380 #ifdef LISP_FEATURE_ALLOCATOR_METRICS
381 #define INSTRUMENTING(expression, metric) { \
382 struct timespec t0, t1; clock_gettime(CLOCK_REALTIME, &t0); expression; \
383 clock_gettime(CLOCK_REALTIME, &t1); \
384 struct thread* th = get_sb_vm_thread(); \
385 th->metric += (t1.tv_sec - t0.tv_sec)*1000000000 + (t1.tv_nsec - t0.tv_nsec); }
386 #else
387 #define INSTRUMENTING(expression, metric) expression
388 #endif
390 /* Test whether page 'index' can continue a non-large-object region
391 * having specified 'gen' and 'type' values. It must not be pinned
392 * and must be marked but not referenced from the stack */
393 static inline bool
394 page_extensible_p(page_index_t index, generation_index_t gen, int type) {
395 #ifdef LISP_FEATURE_BIG_ENDIAN /* TODO: implement this as single comparison */
396 int attributes_match =
397 page_table[index].type == type
398 && page_table[index].gen == gen
399 && !gc_page_pins[index];
400 #else
401 // FIXME: "warning: dereferencing type-punned pointer will break strict-aliasing rules"
402 int attributes_match =
403 *(int16_t*)&page_table[index].type == ((gen<<8)|type);
404 #endif
405 #ifdef LISP_FEATURE_SOFT_CARD_MARKS
406 return attributes_match && page_cards_all_marked_nonsticky(index);
407 #else
408 return attributes_match && !PAGE_WRITEPROTECTED_P(index);
409 #endif
412 void gc_heap_exhausted_error_or_lose (sword_t available, sword_t requested) never_returns;
414 /* Find a single page for conses or SMALL_MIXED objects.
415 * CONS differs because:
416 * - not all GENCGC_PAGE_BYTES of the page can be used.
417 * - a region can't be extended from one page to the next
418 * (implied by the preceding restriction).
419 * SMALL_MIXED is similar to cons, but all bytes of the page can be used
420 * for storing objects, subject to the non-card-spaning constraint. */
421 static page_index_t find_single_page(int page_type, sword_t nbytes, generation_index_t gen)
423 page_index_t page = alloc_start_pages[page_type];;
424 // Compute the max words that could already be used while satisfying the request.
425 page_words_t usage_allowance =
426 usage_allowance = GENCGC_PAGE_BYTES/N_WORD_BYTES - (nbytes>>WORD_SHIFT);
427 if (page_type == PAGE_TYPE_CONS) {
428 gc_assert(nbytes <= CONS_PAGE_USABLE_BYTES);
429 usage_allowance = (CONS_SIZE*MAX_CONSES_PER_PAGE) - (nbytes>>WORD_SHIFT);
431 for ( ; page < page_table_pages ; ++page) {
432 if (page_words_used(page) <= usage_allowance
433 && (page_free_p(page) || page_extensible_p(page, gen, page_type))) return page;
435 /* Compute the "available" space for the lossage message. This is kept out of the
436 * search loop because it's needless overhead. Any free page would have been returned,
437 * so we just have to find the least full page meeting the gen+type criteria */
438 sword_t min_used = GENCGC_PAGE_WORDS;
439 for ( page = alloc_start_pages[page_type]; page < page_table_pages ; ++page) {
440 if (page_words_used(page) < min_used && page_extensible_p(page, gen, page_type))
441 min_used = page_words_used(page);
443 sword_t bytes_avail;
444 if (page_type == PAGE_TYPE_CONS) {
445 bytes_avail = CONS_PAGE_USABLE_BYTES - (min_used<<WORD_SHIFT);
446 /* The sentinel value initially in 'least_words_used' exceeds a cons
447 * page's capacity, so clip to 0 instead of showing a negative value
448 * if no page matched on gen+type */
449 if (bytes_avail < 0) bytes_avail = 0;
450 } else {
451 bytes_avail = GENCGC_PAGE_BYTES - (min_used<<WORD_SHIFT);
453 gc_heap_exhausted_error_or_lose(bytes_avail, nbytes);
456 #if 0
457 bool page_is_zeroed(page_index_t page)
459 int nwords_per_page = GENCGC_PAGE_BYTES/N_WORD_BYTES;
460 uword_t *pagebase = (void*)page_address(page);
461 int i;
462 for (i=0; i<nwords_per_page; ++i) if (pagebase[i]) return 0;
463 return 1;
465 #endif
467 static void*
468 gc_alloc_new_region(sword_t nbytes, int page_type, struct alloc_region *alloc_region, int unlock)
470 /* Check that the region is in a reset state. */
471 gc_dcheck(!alloc_region->start_addr);
473 if (page_type == PAGE_TYPE_CONS || page_type == PAGE_TYPE_SMALL_MIXED) {
474 // No mutex release, because either this is:
475 // - not called from Lisp, as in the SMALL_MIXED case
476 // - called from lisp_alloc() which does its own unlock
477 gc_dcheck(!unlock);
478 page_index_t page;
479 INSTRUMENTING(page = find_single_page(page_type, nbytes, gc_alloc_generation),
480 et_find_freeish_page);
481 if (page+1 > next_free_page) next_free_page = page+1;
482 page_table[page].gen = gc_alloc_generation;
483 set_page_type(page_table[page], OPEN_REGION_PAGE_FLAG | page_type);
484 if (!page_words_used(page))
485 prepare_pages(1, page, page, page_type, gc_alloc_generation);
486 // Don't need to set the scan_start_offset because free pages have it 0
487 // (and each of these page types starts a new contiguous block)
488 gc_dcheck(page_table[page].scan_start_offset_ == 0);
489 alloc_region->start_addr = page_address(page) + page_bytes_used(page);
490 if (page_type == PAGE_TYPE_CONS) {
491 alloc_region->end_addr = page_address(page) + CONS_PAGE_USABLE_BYTES;
492 } else {
493 alloc_region->end_addr =
494 (char*)ALIGN_DOWN((uword_t)alloc_region->start_addr, GENCGC_CARD_BYTES) + GENCGC_CARD_BYTES;
496 alloc_region->free_pointer = alloc_region->start_addr;
497 gc_assert(find_page_index(alloc_region->start_addr) == page);
498 return alloc_region->free_pointer;
501 page_index_t first_page = get_alloc_start_page(page_type), last_page;
503 INSTRUMENTING(
504 last_page = gc_find_freeish_pages(&first_page, nbytes,
505 ((nbytes >= (sword_t)GENCGC_PAGE_BYTES) ?
506 SINGLE_OBJECT_FLAG : 0) | page_type,
507 gc_alloc_generation),
508 et_find_freeish_page);
510 /* Set up the alloc_region. */
511 alloc_region->start_addr = page_address(first_page) + page_bytes_used(first_page);
512 alloc_region->free_pointer = alloc_region->start_addr;
513 alloc_region->end_addr = page_address(last_page+1);
514 gc_assert(find_page_index(alloc_region->start_addr) == first_page);
516 /* Set up the pages. */
518 /* The first page may have already been in use. */
519 /* If so, just assert that it's consistent, otherwise, set it up. */
520 if (page_words_used(first_page)) {
521 gc_assert(page_table[first_page].type == page_type);
522 gc_assert(page_table[first_page].gen == gc_alloc_generation);
523 } else {
524 page_table[first_page].gen = gc_alloc_generation;
526 set_page_type(page_table[first_page], OPEN_REGION_PAGE_FLAG | page_type);
528 page_index_t i;
529 for (i = first_page+1; i <= last_page; i++) {
530 set_page_type(page_table[i], OPEN_REGION_PAGE_FLAG | page_type);
531 page_table[i].gen = gc_alloc_generation;
532 set_page_scan_start_offset(i,
533 addr_diff(page_address(i), alloc_region->start_addr));
535 if (unlock) {
536 int __attribute__((unused)) ret = mutex_release(&free_pages_lock);
537 gc_assert(ret);
540 if (page_words_used(first_page)) ++first_page;
541 if (first_page <= last_page)
542 INSTRUMENTING(prepare_pages(1, first_page, last_page, page_type, gc_alloc_generation),
543 et_bzeroing);
545 return alloc_region->free_pointer;
548 /* The new_object structure holds the page, byte offset, and size of
549 * new regions of objects. Each new area is placed in the array of
550 * these structures pointer to by new_areas. new_areas_index holds the
551 * offset into new_areas.
553 * If new_area overflows NUM_NEW_AREAS then it stops adding them. The
554 * later code must detect this and handle it, probably by doing a full
555 * scavenge of a generation. */
556 #define NUM_NEW_AREAS 512
558 /* 'record_new_regions_below' is the page number (strictly) below which
559 * allocations must be tracked. Choosing the boundary cases with care allows
560 * for all the required modes of operation without an additional control flag:
561 * (1) When allocating from Lisp code, we need not record regions into areas.
562 * In this case 'record_new_regions_below' is 0,
563 * because no page index is less than that value.
564 * (2) When performing a full scavenge of newspace, we record regions below the
565 * highest scavenged page thus far. Pages ahead of (at a higher index than)
566 * the pointer which walks all pages can be ignored, because those pages
567 * will be scavenged in the future regardless of where allocations occur.
568 * (3) When iteratively scavenging newspace, all regions are tracked in areas,
569 * so this variable is set to 1+page_table_pages,
570 * because every page index is less than that sentinel value.
572 static page_index_t record_new_regions_below;
573 struct new_area {
574 page_index_t page;
575 size_t offset;
576 size_t size;
578 static struct new_area *new_areas;
579 static int new_areas_index;
580 int new_areas_index_hwm; // high water mark
582 /* Add a new area to new_areas. */
583 static void
584 add_new_area(page_index_t first_page, size_t offset, size_t size)
586 if (!(first_page < record_new_regions_below))
587 return;
589 /* Ignore if full. */
590 // Technically overflow occurs at 1+ this number, but it's not worth
591 // losing sleep (or splitting hairs) over one potentially wasted array cell.
592 // i.e. overflow did not necessarily happen if we needed _exactly_ this
593 // many areas. But who cares? The limit should not be approached at all.
594 if (new_areas_index >= NUM_NEW_AREAS)
595 return;
597 size_t new_area_start = npage_bytes(first_page) + offset;
598 int i, c;
599 if (GC_LOGGING) {
600 char* base = page_address(first_page) + offset;
601 fprintf(gc_activitylog(), "enqueue rescan [%p:%p]\n", base, base+size);
603 /* Search backwards for a prior area that this follows from. If
604 found this will save adding a new area. */
605 for (i = new_areas_index-1, c = 0; (i >= 0) && (c < 8); i--, c++) {
606 size_t area_end =
607 npage_bytes(new_areas[i].page) + new_areas[i].offset + new_areas[i].size;
608 if (new_area_start == area_end) {
609 new_areas[i].size += size;
610 return;
614 new_areas[new_areas_index].page = first_page;
615 new_areas[new_areas_index].offset = offset;
616 new_areas[new_areas_index].size = size;
617 new_areas_index++;
620 /* Update the PTEs for the alloc_region. The region may be added to
621 * the new_areas.
623 * When done the alloc_region is set up so that the next quick alloc
624 * will fail safely and thus a new region will be allocated. Further
625 * it is safe to try to re-update the page table of this reset
626 * alloc_region.
628 * This is the internal implementation of ensure_region_closed(),
629 * and not to be invoked as the interface to closing a region.
631 * Note that in no case will closing a region alter the need_to_zero bit
632 * on any page in the region. It is legal to set that bit as late as possible,
633 * because we only have to know just-in-time - when changing the page
634 * (at some point later) from FREE to non-free - whether to zeroize it.
635 * Therefore, we can set the need_to_zero bit only when there is otherwise
636 * no way to detect that it ever held nonzero data, namely immediately
637 * before doing reset_page_flags() or setting the words_used to 0.
638 * Reflecting the words_used into that bit each time we update words_used
639 * from a region's free pointer would be redundant (newspace scavenging
640 * can open/close/open/close a region several times on the same page).
642 void
643 gc_close_region(struct alloc_region *alloc_region, int page_type)
645 page_index_t first_page = find_page_index(alloc_region->start_addr);
646 page_index_t next_page = first_page+1;
647 char *page_base = page_address(first_page);
648 char *free_pointer = alloc_region->free_pointer;
650 #if defined LISP_FEATURE_SYSTEM_TLABS && defined DEBUG
651 if (alloc_region == &get_sb_vm_thread()->sys_mixed_tlab ||
652 alloc_region == &get_sb_vm_thread()->sys_cons_tlab) {
653 char msg[] = "NOTE: closing a system allocation region\n";
654 write(2, msg, sizeof msg-1); // signal-safe
656 #endif
658 // page_bytes_used() can be done without holding a lock. Nothing else
659 // affects the usage on the first page of a region owned by this thread.
660 page_bytes_t orig_first_page_bytes_used = page_bytes_used(first_page);
661 gc_assert(alloc_region->start_addr == page_base + orig_first_page_bytes_used);
663 // Mark the region as closed on its first page.
664 page_table[first_page].type &= ~(OPEN_REGION_PAGE_FLAG);
666 if (free_pointer != alloc_region->start_addr) {
667 /* some bytes were allocated in the region */
669 /* All the pages used need to be updated */
671 /* Update the first page. */
672 if (!orig_first_page_bytes_used)
673 gc_assert(page_starts_contiguous_block_p(first_page));
675 gc_assert(page_table[first_page].type == page_type);
676 gc_assert(page_table[first_page].gen == gc_alloc_generation);
678 /* Calculate the number of bytes used in this page. This is not
679 * always the number of new bytes, unless it was free. */
680 os_vm_size_t bytes_used = addr_diff(free_pointer, page_base);
681 bool more;
682 if ((more = (bytes_used > GENCGC_PAGE_BYTES)))
683 bytes_used = GENCGC_PAGE_BYTES;
684 set_page_bytes_used(first_page, bytes_used);
686 /* 'region_size' will be the sum of new bytes consumed by the region,
687 * EXCLUDING any part of the first page already in use,
688 * and any unused part of the final used page */
689 os_vm_size_t region_size = bytes_used - orig_first_page_bytes_used;
691 /* All the rest of the pages should be accounted for. */
692 while (more) {
693 gc_assert(page_table[next_page].type ==
694 (OPEN_REGION_PAGE_FLAG | page_type));
695 page_table[next_page].type ^= OPEN_REGION_PAGE_FLAG;
696 gc_assert(page_words_used(next_page) == 0);
697 gc_assert(page_table[next_page].gen == gc_alloc_generation);
698 page_base += GENCGC_PAGE_BYTES;
699 gc_assert(page_scan_start_offset(next_page) ==
700 addr_diff(page_base, alloc_region->start_addr));
702 /* Calculate the number of bytes used in this page. */
703 bytes_used = addr_diff(free_pointer, page_base);
704 if ((more = (bytes_used > GENCGC_PAGE_BYTES)))
705 bytes_used = GENCGC_PAGE_BYTES;
706 set_page_bytes_used(next_page, bytes_used);
707 region_size += bytes_used;
709 next_page++;
712 // Now 'next_page' is 1 page beyond those fully accounted for.
713 gc_assert(addr_diff(free_pointer, alloc_region->start_addr) == region_size);
714 // Update the global totals
715 bytes_allocated += region_size;
716 generations[gc_alloc_generation].bytes_allocated += region_size;
718 /* Set the alloc restart page to the last page of the region. */
719 set_alloc_start_page(page_type, next_page-1);
721 /* Add the region to the new_areas if requested. */
722 if (boxed_type_p(page_type))
723 add_new_area(first_page, orig_first_page_bytes_used, region_size);
725 } else if (!orig_first_page_bytes_used) {
726 /* The first page is completely unused. Unallocate it */
727 reset_page_flags(first_page);
730 /* Unallocate any unused pages. */
731 page_index_t region_last_page = find_page_index((char*)alloc_region->end_addr-1);
732 while (next_page <= region_last_page) {
733 gc_assert(page_words_used(next_page) == 0);
734 reset_page_flags(next_page);
735 next_page++;
737 gc_set_region_empty(alloc_region);
740 /* Allocate a possibly large object. */
741 void *gc_alloc_large(sword_t nbytes, int page_type)
743 page_index_t first_page, last_page;
744 // Large BOXED would serve no purpose beyond MIXED, and "small large" is illogical.
745 if (page_type == PAGE_TYPE_BOXED || page_type == PAGE_TYPE_SMALL_MIXED)
746 page_type = PAGE_TYPE_MIXED;
748 int locked = !gc_active_p;
749 if (locked) {
750 int __attribute__((unused)) ret = mutex_acquire(&free_pages_lock);
751 gc_assert(ret);
754 first_page = max_alloc_start_page;
755 INSTRUMENTING(
756 last_page = gc_find_freeish_pages(&first_page, nbytes,
757 SINGLE_OBJECT_FLAG | page_type,
758 gc_alloc_generation),
759 et_find_freeish_page);
760 // No need to check whether last_page > old max; it's gotta be.
761 max_alloc_start_page = last_page;
763 /* Set up the pages. */
764 page_index_t page;
765 for (page = first_page; page <= last_page; ++page) {
766 /* Large objects don't share pages with other objects. */
767 gc_assert(page_words_used(page) == 0);
768 set_page_type(page_table[page], SINGLE_OBJECT_FLAG | page_type);
769 page_table[page].gen = gc_alloc_generation;
772 #ifdef LISP_FEATURE_WIN32
773 // don't incur access violations
774 os_commit_memory(page_address(first_page), npage_bytes(1+last_page-first_page));
775 #endif
777 // Store a filler so that a linear heap walk does not try to examine
778 // these pages cons-by-cons (or whatever they happen to look like).
779 // A concurrent walk would probably crash anyway, and most certainly
780 // will if it uses the page tables while this allocation is partway
781 // through assigning bytes_used per page.
782 // The fix for that is clear: MAP-OBJECTS-IN-RANGE should acquire
783 // free_pages_lock when computing the extent of a contiguous block.
784 // Anyway it's best if the new page resembles a valid object ASAP.
785 uword_t nwords = nbytes >> WORD_SHIFT;
786 lispobj* addr = (lispobj*)page_address(first_page);
788 /* The test of whether to use THREAD_JIT_WP here is not based on 'page_type'
789 * but rather how the page _is_mapped_now_. Conservatively do the call
790 * because thning about all 4 combinations of how-it-was-mapped x how-it-will-be-mapped,
791 * here and down below is too confusing */
792 if (locked) { THREAD_JIT_WP(0); }
793 *addr = make_filler_header(nwords);
795 os_vm_size_t scan_start_offset = 0;
796 for (page = first_page; page < last_page; ++page) {
797 set_page_scan_start_offset(page, scan_start_offset);
798 set_page_bytes_used(page, GENCGC_PAGE_BYTES);
799 scan_start_offset += GENCGC_PAGE_BYTES;
801 page_bytes_t final_bytes_used = nbytes - scan_start_offset;
802 gc_dcheck((nbytes % GENCGC_PAGE_BYTES ? nbytes % GENCGC_PAGE_BYTES
803 : GENCGC_PAGE_BYTES) == final_bytes_used);
804 set_page_scan_start_offset(last_page, scan_start_offset);
805 set_page_bytes_used(last_page, final_bytes_used);
806 bytes_allocated += nbytes;
807 generations[gc_alloc_generation].bytes_allocated += nbytes;
809 if (locked) {
810 int __attribute__((unused)) ret = mutex_release(&free_pages_lock);
811 gc_assert(ret);
813 INSTRUMENTING(prepare_pages(0, first_page, last_page, page_type, gc_alloc_generation),
814 et_bzeroing);
816 /* Add the region to the new_areas if requested. */
817 if (boxed_type_p(page_type)) add_new_area(first_page, 0, nbytes);
819 // page may have not needed zeroing, but first word was stored,
820 // turning the putative object temporarily into a page filler object.
821 // Now turn it back into free space.
822 *addr = 0;
823 if (locked) { THREAD_JIT_WP(1); }
825 return addr;
828 /* Search for at least nbytes of space, possibly picking up any
829 * remaining space on the tail of a page that was not fully used.
831 * The found space is guaranteed to be page-aligned if the SINGLE_OBJECT_FLAG
832 * bit is set in page_type.
834 page_index_t
835 gc_find_freeish_pages(page_index_t *restart_page_ptr, sword_t nbytes,
836 int page_type, generation_index_t gen)
838 page_index_t most_bytes_found_from = 0, most_bytes_found_to = 0;
839 page_index_t first_page, last_page, restart_page = *restart_page_ptr;
840 sword_t nbytes_goal = nbytes;
841 sword_t bytes_found = 0;
842 sword_t most_bytes_found = 0;
843 int multi_object = !(page_type & SINGLE_OBJECT_FLAG);
844 /* FIXME: assert(free_pages_lock is held); */
846 if (multi_object) {
847 if (nbytes_goal < (sword_t)gencgc_alloc_granularity)
848 nbytes_goal = gencgc_alloc_granularity;
849 #if !defined(LISP_FEATURE_64_BIT)
850 // Increase the region size to avoid excessive fragmentation
851 if (page_type == PAGE_TYPE_CODE && nbytes_goal < 65536)
852 nbytes_goal = 65536;
853 #endif
855 page_type &= ~SINGLE_OBJECT_FLAG;
857 gc_assert(nbytes>=0);
858 first_page = restart_page;
859 while (first_page < page_table_pages) {
860 bytes_found = 0;
861 if (page_free_p(first_page)) {
862 gc_dcheck(!page_words_used(first_page));
863 bytes_found = GENCGC_PAGE_BYTES;
864 } else if (multi_object &&
865 // Never return a range starting with a 100% full page
866 (bytes_found = GENCGC_PAGE_BYTES
867 - page_bytes_used(first_page)) > 0 &&
868 // "extensible" means all PTE fields are compatible
869 page_extensible_p(first_page, gen, page_type)) {
870 // TODO: Now that BOXED, CONS, and SMALL_MIXED pages exist, investigate
871 // whether the bias against returning partial pages is still useful.
872 // It probably isn't.
873 if (bytes_found < nbytes && !is_code(page_type)) {
874 if (bytes_found > most_bytes_found)
875 most_bytes_found = bytes_found;
876 first_page++;
877 continue;
879 } else {
880 first_page++;
881 continue;
883 #ifndef LISP_FEATURE_SOFT_CARD_MARKS
884 gc_dcheck(!PAGE_WRITEPROTECTED_P(first_page));
885 #endif
886 /* page_free_p() can legally be used at index 'page_table_pages'
887 * because the array dimension is 1+page_table_pages */
888 for (last_page = first_page+1;
889 bytes_found < nbytes_goal &&
890 page_free_p(last_page) && last_page < page_table_pages;
891 last_page++) {
892 /* page_free_p() implies 0 bytes used, thus GENCGC_PAGE_BYTES available.
893 * It also implies !write_protected, and if the OS's conception were
894 * otherwise, lossage would routinely occur in the fault handler) */
895 bytes_found += GENCGC_PAGE_BYTES;
896 gc_dcheck(!page_words_used(last_page));
897 #ifndef LISP_FEATURE_SOFT_CARD_MARKS
898 gc_dcheck(!PAGE_WRITEPROTECTED_P(last_page));
899 #endif
902 if (bytes_found > most_bytes_found) {
903 most_bytes_found = bytes_found;
904 most_bytes_found_from = first_page;
905 most_bytes_found_to = last_page;
907 if (bytes_found >= nbytes_goal)
908 break;
910 first_page = last_page;
913 bytes_found = most_bytes_found;
914 restart_page = first_page + 1;
916 /* Check for a failure */
917 if (bytes_found < nbytes) {
918 gc_assert(restart_page >= page_table_pages);
919 gc_heap_exhausted_error_or_lose(most_bytes_found, nbytes);
922 gc_assert(most_bytes_found_to);
923 // most_bytes_found_to is the upper exclusive bound on the found range.
924 // next_free_page is the high water mark of most_bytes_found_to.
925 if (most_bytes_found_to > next_free_page) next_free_page = most_bytes_found_to;
926 *restart_page_ptr = most_bytes_found_from;
927 return most_bytes_found_to-1;
930 /* Allocate bytes. The fast path of gc_general_alloc() calls this
931 * when it can't fit in the open region.
932 * This entry point is only for use within the GC itself.
933 * The Lisp region overflow handler either directly calls gc_alloc_large
934 * or closes and opens a region if the allocation is small.
936 * There are two general approaches to handling SMALL_MIXED allocations:
937 * 1. always open the alloc region as whole page, but hack up gc_general_alloc
938 * to avoid spanning cards in the fast case.
939 * 2. open the region as one card, and alter the slow case to try consuming
940 * the next card on the same page if it can.
941 * Choice 2 is better because choice 1 makes an extra test for page_type
942 * in each call to gc_general_alloc.
944 static void *new_region(struct alloc_region* region, sword_t nbytes, int page_type)
946 ensure_region_closed(region, page_type);
947 void* new_obj = gc_alloc_new_region(nbytes, page_type, region, 0);
948 region->free_pointer = (char*)new_obj + nbytes;
949 gc_assert(region->free_pointer <= region->end_addr);
950 return new_obj;
952 void *collector_alloc_fallback(struct alloc_region* region, sword_t nbytes, int page_type)
954 /* If this is a normal GC - as opposed to "final" GC just prior to saving
955 * a core, then we should never copy a large object (not that that's the best
956 * strategy always, because it entirely precludes defragmenting those objects).
957 * But unfortunately we can't assert that only small objects are seen here,
958 * because genesis does not use large-object pages. So cold-init could fail,
959 * depending on whether objects in the cold core are sufficiently large that
960 * they ought to have gone on large object pages if they could have. */
961 if (nbytes >= LARGE_OBJECT_SIZE) return gc_alloc_large(nbytes, page_type);
963 if (page_type != PAGE_TYPE_SMALL_MIXED) return new_region(region, nbytes, page_type);
965 #define SMALL_MIXED_NWORDS_LIMIT 10
966 #define SMALL_MIXED_NBYTES_LIMIT (SMALL_MIXED_NWORDS_LIMIT * N_WORD_BYTES)
967 /* We're want to try to place mix raw/tagged slot objects such that they don't span cards.
968 * There are essentially three cases:
969 * (1) If the object size exceeds one card, we go straight to the MIXED region.
970 * (2) If the object size is <= SMALL_MIXED_NWORDS_LIMIT, we will _always_ place it
971 * on one card. To do that, just align up to the next card or whole page
972 * if it would span cards based on the current free_pointer.
973 * This wastes at most SMALL_MIXED_NWORDS_LIMIT - 2 words, per card.
974 * (3) If the object is larger than that, we will waste at most the threshold number
975 * of words, but if it would waste more, we use the MIXED region.
976 * So this case opportunistically uses the subcard region if it can */
977 if ((int)nbytes > (int)GENCGC_CARD_BYTES)
978 return new_region(mixed_region, nbytes, PAGE_TYPE_MIXED);
979 if (!region->start_addr) { // region is not in an open state
980 /* Don't try to request too much, because that might return a brand new page,
981 * when we could have kept going on the same page with small objects.
982 * Better to put the threshold-exceeding object in the MIXED region */
983 int request = nbytes > SMALL_MIXED_NBYTES_LIMIT ? SMALL_MIXED_NBYTES_LIMIT : nbytes;
984 void* new_obj = gc_alloc_new_region(request, page_type, region, 0);
985 char* new_freeptr = (char*)new_obj + nbytes;
986 /* alloc_new_region() ensures that the page it returns has at least 'nbytes' more
987 * but does *not* ensure that there is that much space below the end of the region.
988 * This is a little weird, but doing things this way confines the filler insertion
989 * logic to just here instead of also being also in alloc_new_region.
990 * You could try to put that logic only in alloc_new_region, but doing that has
991 * its own down-side: to call alloc_new_region, you first have to close the region,
992 * which entails extra work in sync'ing the PTE when we don't really need to */
993 if (new_freeptr <= (char*)region->end_addr) {
994 region->free_pointer = new_freeptr;
995 return new_obj;
998 __attribute__((unused)) page_index_t fpi = find_page_index(region->start_addr);
999 __attribute__((unused)) page_index_t lpi = find_page_index((char*)region->end_addr-1);
1000 gc_assert(fpi == lpi);
1001 gc_assert(page_table[fpi].type & OPEN_REGION_PAGE_FLAG);
1002 // Region is open, but card at free_pointer lacks sufficient space.
1003 // See if there's another card on the same page.
1004 char* page_base = PTR_ALIGN_DOWN(region->start_addr, GENCGC_PAGE_BYTES);
1005 char* next_card = PTR_ALIGN_UP(region->free_pointer, GENCGC_CARD_BYTES);
1006 if (next_card < page_base + GENCGC_PAGE_BYTES) {
1007 int fill_nbytes = next_card - (char*)region->free_pointer;
1008 if (fill_nbytes) {
1009 int fill_nwords = fill_nbytes >> WORD_SHIFT;
1010 /* Object size might strictly exceed SMALL_MIXED_NWORDS_LIMIT.
1011 * Never insert that much filler */
1012 if (fill_nwords >= SMALL_MIXED_NWORDS_LIMIT)
1013 return new_region(mixed_region, nbytes, PAGE_TYPE_MIXED);
1014 *(lispobj*)region->free_pointer = make_filler_header(fill_nwords);
1016 region->free_pointer = next_card;
1017 region->end_addr = next_card + GENCGC_CARD_BYTES;
1018 void* new_obj = next_card;
1019 region->free_pointer = (char*)new_obj + nbytes;
1020 gc_assert(region->free_pointer <= region->end_addr);
1021 return new_obj;
1023 /* Now be careful not to waste too much at the end of the page in the following situation:
1024 * page has 20 words more, but we need 24 words. Use the MIXED region because the subcard
1025 * region has room for anywhere from 2 to 10 more objects depending on how small */
1026 if (nbytes > SMALL_MIXED_NBYTES_LIMIT) page_type = PAGE_TYPE_MIXED, region = mixed_region;
1027 return new_region(region, nbytes, page_type);
1031 /* Free any trailing pages of the object starting at 'first_page'
1032 * that are currently unused due to object shrinkage.
1033 * Possibly assign different 'gen' and 'allocated' values.
1035 * maybe_adjust_large_object() specifies 'from_space' for 'new_gen'
1036 * and copy_potential_large_object() specifies 'new_space'
1038 * Note that creating a large object might not affect the 'need_to_zero'
1039 * flag on any of pages consumed (it would if the page type demands prezeroing
1040 * and wasn't zero), but freeing the unused pages of a shrunken object DOES
1041 * set the need_to_zero bit unconditionally. We have to suppose that the object
1042 * constructor wrote bytes on each of its pages, and we don't know whether the tail
1043 * of the object got zeroed versus bashed into FILLER_WIDETAG + random bits.
1046 static uword_t adjust_obj_ptes(page_index_t first_page,
1047 sword_t nwords,
1048 generation_index_t new_gen,
1049 int new_allocated)
1051 int old_allocated = page_table[first_page].type;
1052 sword_t remaining_bytes = nwords * N_WORD_BYTES;
1053 page_index_t n_full_pages = nwords / (GENCGC_PAGE_BYTES / N_WORD_BYTES);
1054 page_bytes_t excess = remaining_bytes & (GENCGC_PAGE_BYTES - 1);
1055 // page number of ending page of this object at its new size
1056 page_index_t final_page = first_page + (n_full_pages - 1) + (excess != 0);
1058 /* Decide whether there is anything to do by checking whether:
1059 * (1) the page at n_full_pages-1 beyond the first is fully used,
1060 * (2) the next fractional page, if any, has correct usage, and
1061 * (3) the page after that is not part of this object.
1062 * If all those conditions are met, this is the easy case,
1063 * though we may still have to change the generation and/or page type. */
1064 if ((!n_full_pages || page_words_used(first_page+(n_full_pages-1))
1065 == GENCGC_PAGE_WORDS) &&
1066 (!excess || page_bytes_used(final_page) == excess) &&
1067 page_starts_contiguous_block_p(1+final_page)) {
1068 /* The 'if' below has an 'else' which subsumes the 'then' in generality.
1069 * Why? Because usually we only need perform one assignment.
1070 * Moreover, after a further change which makes us not look at the 'gen'
1071 * of the *interior* of a page-spanning object, then the fast case
1072 * reduces to "page_table[first_page].gen = new_gen". And we're done.
1073 * At present, some logic assumes that every page's gen was updated */
1074 page_index_t page;
1075 if (old_allocated == new_allocated) { // Almost always true,
1076 // except when bignums or specialized arrays change from thread-local
1077 // (boxed) allocation to unboxed, for downstream efficiency.
1078 for (page = first_page; page <= final_page; ++page)
1079 page_table[page].gen = new_gen;
1080 } else {
1081 for (page = first_page; page <= final_page; ++page) {
1082 set_page_type(page_table[page], new_allocated);
1083 page_table[page].gen = new_gen;
1086 return 0;
1089 /* The assignments to the page table here affect only one object
1090 * since its pages can't be shared with other objects */
1091 #define CHECK_AND_SET_PTE_FIELDS() \
1092 gc_assert(page_table[page].type == old_allocated); \
1093 gc_assert(page_table[page].gen == from_space); \
1094 gc_assert(page_scan_start_offset(page) == npage_bytes(page-first_page)); \
1095 page_table[page].gen = new_gen; \
1096 set_page_type(page_table[page], new_allocated)
1098 gc_assert(page_starts_contiguous_block_p(first_page));
1099 page_index_t page = first_page;
1100 while (remaining_bytes > (sword_t)GENCGC_PAGE_BYTES) {
1101 gc_assert(page_words_used(page) == GENCGC_PAGE_WORDS);
1102 CHECK_AND_SET_PTE_FIELDS();
1103 remaining_bytes -= GENCGC_PAGE_BYTES;
1104 page++;
1107 /* Now at most one page of data in use by the object remains,
1108 * but there may be more unused pages beyond which will be freed. */
1110 /* This page must have at least as many bytes in use as expected */
1111 gc_assert((sword_t)page_bytes_used(page) >= remaining_bytes);
1112 CHECK_AND_SET_PTE_FIELDS();
1114 /* Adjust the bytes_used. */
1115 page_bytes_t prev_bytes_used = page_bytes_used(page);
1116 set_page_bytes_used(page, remaining_bytes);
1118 uword_t bytes_freed = prev_bytes_used - remaining_bytes;
1120 /* Free unused pages that were originally allocated to this object. */
1121 page++;
1122 while (prev_bytes_used == GENCGC_PAGE_BYTES &&
1123 page_table[page].gen == from_space &&
1124 page_table[page].type == old_allocated &&
1125 page_scan_start_offset(page) == npage_bytes(page - first_page)) {
1126 // These pages are part of oldspace, which was un-write-protected.
1127 gc_assert(page_cards_all_marked_nonsticky(page));
1129 /* Zeroing must have been done before shrinking the object.
1130 * (It is strictly necessary for correctness with objects other
1131 * than simple-vector, but pragmatically it reduces accidental
1132 * conservativism when done for simple-vectors as well) */
1133 #ifdef DEBUG
1134 { lispobj* words = (lispobj*)page_address(page);
1135 int i;
1136 for(i=0; i<(int)(GENCGC_PAGE_BYTES/N_WORD_BYTES); ++i)
1137 if (words[i])
1138 lose("non-zeroed trailer of shrunken object @ %p",
1139 page_address(first_page));
1141 #endif
1142 /* It checks out OK, free the page. */
1143 prev_bytes_used = page_bytes_used(page);
1144 set_page_need_to_zero(page, 1);
1145 set_page_bytes_used(page, 0);
1146 reset_page_flags(page);
1147 bytes_freed += prev_bytes_used;
1148 page++;
1151 // If this freed nothing, it ought to have gone through the fast path.
1152 gc_assert(bytes_freed != 0);
1153 return bytes_freed;
1156 /* "Copy" a large object. If the object is on large object pages,
1157 * and satisifies the condition to remain where it is,
1158 * it is simply promoted, else it is copied.
1159 * To stay on large-object pages, the object must either be at least
1160 * LARGE_OBJECT_SIZE, or must waste fewer than about 1% of the space
1161 * on its allocated pages. Using 32k pages as a reference point:
1162 * 3 pages - ok if size >= 97552
1163 * 2 pages - ... size >= 65040
1164 * 1 page - ... size >= 32528
1166 * Bignums and vectors may have shrunk. If the object is not copied,
1167 * the slack needs to be reclaimed, and the page_tables corrected.
1169 * Code objects can't shrink, but it's not worth adding an extra test
1170 * for large code just to avoid the loop that performs adjustment, so
1171 * go through the adjustment motions even though nothing happens.
1174 lispobj
1175 copy_potential_large_object(lispobj object, sword_t nwords,
1176 struct alloc_region* region, int page_type)
1178 page_index_t first_page;
1180 CHECK_COPY_PRECONDITIONS(object, nwords);
1182 /* Check whether it's a large object. */
1183 first_page = find_page_index((void *)object);
1184 gc_dcheck(first_page >= 0);
1186 os_vm_size_t nbytes = nwords * N_WORD_BYTES;
1187 os_vm_size_t rounded = ALIGN_UP(nbytes, GENCGC_PAGE_BYTES);
1188 if (page_single_obj_p(first_page) &&
1189 (nbytes >= LARGE_OBJECT_SIZE || (rounded - nbytes < rounded / 128))) {
1191 // Large BOXED would serve no purpose beyond MIXED, and "small large" is illogical.
1192 if (page_type == PAGE_TYPE_BOXED || page_type == PAGE_TYPE_SMALL_MIXED)
1193 page_type = PAGE_TYPE_MIXED;
1194 os_vm_size_t bytes_freed =
1195 adjust_obj_ptes(first_page, nwords, new_space,
1196 SINGLE_OBJECT_FLAG | page_type);
1198 generations[from_space].bytes_allocated -= (bytes_freed + nbytes);
1199 generations[new_space].bytes_allocated += nbytes;
1200 bytes_allocated -= bytes_freed;
1202 /* Add the region to the new_areas if requested. */
1203 gc_in_situ_live_nwords += nbytes>>WORD_SHIFT;
1204 if (boxed_type_p(page_type)) add_new_area(first_page, 0, nbytes);
1206 return object;
1208 return gc_copy_object(object, nwords, region, page_type);
1211 /* to copy unboxed objects */
1212 lispobj
1213 copy_unboxed_object(lispobj object, sword_t nwords)
1215 return gc_copy_object(object, nwords, unboxed_region, PAGE_TYPE_UNBOXED);
1218 /* This WILL NOT reliably work for objects in a currently open allocation region,
1219 * because page_words_used() is not sync'ed to the free pointer until closing.
1220 * However it should work reliably for codeblobs, because if you can hold
1221 * a reference to the codeblob, then either you'll find it in the generation 0
1222 * tree, or else can linearly scan for it in an older generation */
1223 static lispobj dynspace_codeblob_tree_snapshot; // valid only during GC
1224 lispobj *search_dynamic_space(void *pointer)
1226 page_index_t page_index = find_page_index(pointer);
1228 /* The address may be invalid, so do some checks.
1229 * page_index -1 is legal, and page_free_p returns true in that case. */
1230 if (page_free_p(page_index)) return NULL;
1232 int type = page_table[page_index].type & PAGE_TYPE_MASK;
1233 // Generation 0 code is in the tree usually - it isn't for objects
1234 // in generation 0 following a non-promotion cycle.
1235 if (type == PAGE_TYPE_CODE && page_table[page_index].gen == 0) {
1236 lispobj tree = dynspace_codeblob_tree_snapshot ? dynspace_codeblob_tree_snapshot :
1237 SYMBOL(DYNSPACE_CODEBLOB_TREE)->value;
1238 lispobj node = brothertree_find_lesseql((uword_t)pointer, tree);
1239 if (node != NIL) {
1240 lispobj *found = (lispobj*)((struct binary_node*)INSTANCE(node))->uw_key;
1241 int widetag = widetag_of(found);
1242 if (widetag != CODE_HEADER_WIDETAG && widetag != FUNCALLABLE_INSTANCE_WIDETAG)
1243 lose("header not OK for code page: @ %p = %"OBJ_FMTX"\n", found, *found);
1244 sword_t nwords = object_size(found);
1245 lispobj *upper_bound = found + nwords;
1246 if (pointer < (void*)upper_bound) return found;
1249 char* limit = page_address(page_index) + page_bytes_used(page_index);
1250 if ((char*)pointer > limit) return NULL;
1251 if (type == PAGE_TYPE_CONS) {
1252 return (lispobj*)ALIGN_DOWN((uword_t)pointer, 2*N_WORD_BYTES);
1254 lispobj *start;
1255 if (type == PAGE_TYPE_SMALL_MIXED) { // find the nearest card boundary below 'pointer'
1256 start = (lispobj*)ALIGN_DOWN((uword_t)pointer, GENCGC_CARD_BYTES);
1257 } else {
1258 start = (lispobj *)page_scan_start(page_index);
1260 return gc_search_space(start, pointer);
1263 /* Return true if and only if everything on the specified page is NOT subject
1264 * to evacuation, i.e. either the page is not in 'from_space', or is entirely
1265 * pinned. "Entirely pinned" is predicated on being marked as pinned,
1266 * and satisfying one of two additional criteria:
1267 * 1. the page is a single-object page
1268 * 2. the page contains only code, and all code objects are pinned.
1270 * A non-large-object page that is marked "pinned" does not suffice
1271 * to be considered entirely pinned if it contains other than code.
1273 int pin_all_dynamic_space_code;
1274 static inline int immune_set_memberp(page_index_t page)
1276 return (page_table[page].gen != from_space)
1277 || (gc_page_pins[page] &&
1278 (page_single_obj_p(page) ||
1279 (is_code(page_table[page].type) && pin_all_dynamic_space_code)));
1282 #ifndef LISP_FEATURE_WEAK_VECTOR_READBARRIER
1283 // Only a bignum, code blob, or vector could be on a single-object page.
1284 #define potential_largeobj_p(w) \
1285 (w==BIGNUM_WIDETAG || w==CODE_HEADER_WIDETAG || \
1286 (w>=SIMPLE_VECTOR_WIDETAG && w < COMPLEX_BASE_STRING_WIDETAG))
1287 #else
1288 // also include WEAK_POINTER_WIDETAG because it could be vector-like
1289 #define potential_largeobj_p(w) \
1290 (w==BIGNUM_WIDETAG || w==CODE_HEADER_WIDETAG || w==WEAK_POINTER_WIDETAG || \
1291 (w>=SIMPLE_VECTOR_WIDETAG && w < COMPLEX_BASE_STRING_WIDETAG))
1292 #endif
1294 static inline __attribute__((unused))
1295 int lowtag_ok_for_page_type(__attribute__((unused)) lispobj ptr,
1296 __attribute__((unused)) int page_type) {
1297 // If the young generation goes to mixed-region, this filter is not valid
1298 #ifdef LISP_FEATURE_USE_CONS_REGION
1299 // This doesn't currently decide on acceptability for code/non-code
1300 if (lowtag_of(ptr) == LIST_POINTER_LOWTAG) {
1301 if (page_type != PAGE_TYPE_CONS) return 0;
1302 } else {
1303 if (page_type == PAGE_TYPE_CONS) return 0;
1305 #endif
1306 return 1;
1310 * We offer many variations on root scanning:
1311 * 1. X86: all refs from them stack are ambiguous, and pin their referent
1312 * if there is one. All refs from registers (interrupt contexts)
1313 * are ambiguous, and similarly pin their referent if there is one.
1314 * Interior pointers are disallowed to anything except code.
1315 * (FIXME: the PC to the jump instruction into an immobile fdefn
1316 * or self-contained trampoline GF - what does it do wrt pinning???)
1318 * 2. ARM64: interior code pointers from the stack are ambiguous
1319 * and pin their referent if there is one,
1320 * Non-code references are unambiguous, and do NOT pin their referent.
1321 * Only the call chain is scanned for code pointers.
1322 * Interrupt context registers are unambiguous, and can get
1323 * altered by GC.
1325 * 3. PPC64: interior code pointers from the stack are ambiguous roots,
1326 * and pin their referent if there is one.
1327 * FDEFN pointers may be untagged, and are therefore ambiguous.
1328 * They pin their referent if there is one, but only if the reference
1329 * is from a register in an interrupt context, not the control stack.
1330 * (codegen will never spill an untagged fdefn to the stack)
1331 * All other non-code object pointers are unambiguous, and do NOT pin
1332 * their referent from the stack.
1333 * Interrupt context registers are unambiguous and DO pin their referent.
1334 * The entire control stack is scanned for code pointers, thus avoiding
1335 * reliance on a correct backtrace. (I doubt the veracity of all claims
1336 * to the backtrace chain being correct in the presence of interrupts)
1338 * 4. All references from the stack are tagged, and precise, and none pin
1339 * their referent.
1340 * Interrupt contexts registers are unambiguous, and do not pin their referent.
1341 * (pertains to any architecture not specifically mentione above)
1343 * A single boolean value for GENCGC_IS_PRECISE is inadequate to express
1344 * the possibilities. Anything except case 1 is considered "precise".
1345 * Because of the variations, there are many other #ifdefs surrounding
1346 * the logic pertaining to stack and interrupt context scanning.
1347 * Anyway, the above is the theory, but in practice, we have to treat
1348 * some unambiguous pointers as ambiguous for lack of information
1349 * in conservative_root_p what the intent is.
1351 #define AMBIGUOUS_POINTER 1
1352 #if !GENCGC_IS_PRECISE
1353 // Return the starting address of the object containing 'addr'
1354 // if and only if the object is one which would be evacuated from 'from_space'
1355 // were it allowed to be either discarded as garbage or moved.
1356 // 'addr_page_index' is the page containing 'addr' and must not be -1.
1357 // Return 0 if there is no such object - that is, if addr is past the
1358 // end of the used bytes, or its pages are not in 'from_space' etc.
1359 static lispobj conservative_root_p(lispobj addr, page_index_t addr_page_index)
1361 /* quick check 1: Address is quite likely to have been invalid. */
1362 struct page* page = &page_table[addr_page_index];
1363 bool enforce_lowtag = !is_code(page->type);
1365 if ((addr & (GENCGC_PAGE_BYTES - 1)) >= page_bytes_used(addr_page_index) ||
1366 (!is_lisp_pointer(addr) && enforce_lowtag) ||
1367 (compacting_p() && immune_set_memberp(addr_page_index)))
1368 return 0;
1369 gc_assert(!(page->type & OPEN_REGION_PAGE_FLAG));
1371 /* If this page can hold only one object, the test is very simple.
1372 * Code pages allow random interior pointers, but only a correctly
1373 * tagged pointer to the boxed words. Tagged interior pointers to SIMPLE-FUNs
1374 * are just as good as any untagged instruction pointer. */
1375 if (page_single_obj_p(addr_page_index)) {
1376 lispobj* object_start = page_scan_start(addr_page_index);
1377 int widetag = widetag_of(object_start);
1378 if (instruction_ptr_p((char*)addr, object_start) ||
1379 (potential_largeobj_p(widetag) &&
1380 // Conveniently all potential largeobjs are OTHER_POINTER
1381 make_lispobj(object_start, OTHER_POINTER_LOWTAG) == addr))
1382 return make_lispobj(object_start, OTHER_POINTER_LOWTAG);
1383 return 0;
1386 /* For pages of code:
1387 * - we can't enforce a particular lowtag on the pointer.
1388 * - we have to find the object base, because pinning a code object
1389 * pins its embedded simple-funs and vice-versa.
1390 * I don't know what to think about pointing to filler objects.
1391 * It seems like a bad idea, but what if Lisp code does that?
1392 * Can it crash if we free the page? I'll assume we're fine
1393 * unless someone can show otherwise */
1394 if (is_code(page->type)) {
1395 lispobj* object_start = search_dynamic_space((void*)addr);
1396 /* This search must not fail. We've already verified that the
1397 * pointer is within range for its page. */
1398 gc_assert(object_start);
1399 switch (widetag_of(object_start)) {
1400 case CODE_HEADER_WIDETAG:
1401 /* If 'addr' points anywhere beyond the boxed words, it's valid
1402 * (i.e. allow it even if an incorrectly tagged pointer to a simple-fun header)
1403 * FIXME: Do we want to allow pointing at the untagged base address too?
1404 * It'll find a key in the codeblob tree, but why would Lisp have the
1405 * untagged pointer and expect it to be a strong reference? */
1406 if (instruction_ptr_p((void*)addr, object_start)
1407 || addr == make_lispobj(object_start, OTHER_POINTER_LOWTAG))
1408 return make_lispobj(object_start, OTHER_POINTER_LOWTAG);
1409 return 0;
1410 #ifdef LISP_FEATURE_X86_64
1411 case FUNCALLABLE_INSTANCE_WIDETAG:
1412 // Allow any of these to pin a funcallable instance:
1413 // - pointer to embedded machine instructions
1414 // - untagged pointer to trampoline word
1415 // - correctly tagged pointer
1416 if ((addr >= (uword_t)(object_start+2) && addr < (uword_t)(object_start+4))
1417 || addr == (lispobj)(object_start+1)
1418 || addr == make_lispobj(object_start, FUN_POINTER_LOWTAG))
1419 return make_lispobj(object_start, FUN_POINTER_LOWTAG);
1420 return 0;
1421 #endif
1423 return 0;
1426 /* For non-code, the pointer's lowtag and widetag must correspond.
1427 * The putative object header can safely be read even if it turns out
1428 * that the pointer is not valid, because 'addr' was in bounds for the page.
1429 * Note that this can falsely pass if looking at the interior of an unboxed
1430 * array that masquerades as a Lisp object header by random chance. */
1431 if (widetag_of(native_pointer(addr)) != FILLER_WIDETAG
1432 && lowtag_ok_for_page_type(addr, page->type)
1433 && plausible_tag_p(addr)) return AMBIGUOUS_POINTER;
1435 // FIXME: I think there is a window of GC vulnerability regarding FINs
1436 // and FDEFNs containing executable bytes. In either case if the only pointer
1437 // to such an object is the program counter, the object could be considered
1438 // garbage because there is no _tagged_ pointer to it.
1439 // This is an almost impossible situation to arise, but seems worth some study.
1441 return 0;
1443 #elif defined LISP_FEATURE_MIPS || defined LISP_FEATURE_PPC64
1444 /* Consider interior pointers to code as roots, and untagged fdefn pointers.
1445 * But most other pointers are *unambiguous* conservative roots.
1446 * This is not "less conservative" per se, than the non-precise code,
1447 * because it's actually up to the user of this predicate to decide whehther
1448 * the control stack as a whole is scanned for objects to pin.
1449 * The so-called "precise" code should generally NOT scan the stack,
1450 * and not call this on stack words.
1451 * Anyway, this code isn't as performance-critical as the x86 variant,
1452 * so it's not worth trying to optimize out the search for the object */
1453 static lispobj conservative_root_p(lispobj addr, page_index_t addr_page_index)
1455 struct page* page = &page_table[addr_page_index];
1457 // quick check: within from_space and within page usage
1458 if ((addr & (GENCGC_PAGE_BYTES - 1)) >= page_bytes_used(addr_page_index) ||
1459 (compacting_p() && immune_set_memberp(addr_page_index)))
1460 return 0;
1461 gc_assert(!(page->type & OPEN_REGION_PAGE_FLAG));
1463 /* Find the containing object, if any
1464 * This is slightly less quick than could be: if sticky_preserve_pointer() was
1465 * called on the contents of a boxed register, then we know that the value is
1466 * a properly tagged descriptor, and don't really need to "search" for an object.
1467 * (And in fact we should rule out fixnums up front)
1468 * Unfortunately sticky_preserve_pointer() does not inform conservative_root_p()
1469 * whether the pointer is known good. So we need a slightly different interface
1470 * to achieve that extra bit of efficiency */
1471 lispobj* object_start = search_dynamic_space((void*)addr);
1472 if (!object_start) return 0;
1474 // Untagged fdefn pointer or code pointer: ok
1475 if ((widetag_of(object_start) == FDEFN_WIDETAG && addr == (uword_t)object_start)
1476 || is_code(page->type))
1477 return make_lispobj(object_start, OTHER_POINTER_LOWTAG);
1479 /* Take special care not to return fillers. A real-world example:
1480 * - a boxed register contains 0x528b4000
1481 * - the object formerly at 0x528b4000 is a filler
1482 * - compute_lispobj(0x528b4000) returns 0x528b4000 because LOWTAG_FOR_WIDETAG
1483 * says that FILLER_WIDTAG has a 0 lowtag.
1484 * compute_lispobj simply ORs in the 0 which gives back the original address
1485 * and that of course satisfies the equality test. */
1487 // Correctly tagged pointer: ok
1488 if (addr == compute_lispobj(object_start)
1489 && widetag_of(object_start) != FILLER_WIDETAG)
1490 return addr;
1491 return 0;
1493 #endif
1495 /* Adjust large bignum and vector objects. This will adjust the
1496 * allocated region if the size has shrunk, and change boxed pages
1497 * into unboxed pages. The pages are not promoted here, and the
1498 * object is not added to the new_regions; this is really
1499 * only designed to be called from preserve_pointer(). Shouldn't fail
1500 * if this is missed, just may delay the moving of objects to unboxed
1501 * pages, and the freeing of pages. */
1502 static void
1503 maybe_adjust_large_object(lispobj* where, page_index_t first_page, sword_t nwords)
1505 int page_type;
1507 /* Check whether it's a vector or bignum object. */
1508 /* There is no difference between MIXED and BOXED for large objects,
1509 * because in any event we'll use the large simple-vector optimization
1510 * for root scavenging if applicable. */
1511 lispobj widetag = widetag_of(where);
1512 if (widetag == SIMPLE_VECTOR_WIDETAG)
1513 page_type = SINGLE_OBJECT_FLAG | PAGE_TYPE_MIXED;
1514 #ifndef LISP_FEATURE_UBSAN
1515 else if (specialized_vector_widetag_p(widetag) || widetag == BIGNUM_WIDETAG)
1516 page_type = SINGLE_OBJECT_FLAG | PAGE_TYPE_UNBOXED;
1517 #endif
1518 else
1519 return;
1521 os_vm_size_t bytes_freed =
1522 adjust_obj_ptes(first_page, nwords, from_space, page_type);
1523 generations[from_space].bytes_allocated -= bytes_freed;
1524 bytes_allocated -= bytes_freed;
1527 /* After scavenging of the roots is done, we go back to the pinned objects
1528 * and look within them for pointers. Additionally we delete any keys
1529 * from the list of pins that were not legal object addresses,
1530 * but passed through all the filters in conservative_root_p.
1532 #define SMALL_MAX_PINS 200
1533 static uword_t small_pins_vector[SMALL_MAX_PINS];
1535 uword_t gc_pinned_nwords;
1536 static void refine_ambiguous_roots()
1538 void gc_heapsort_uwords(uword_t*, int);
1540 int pre_deletion_count = pinned_objects.count;
1541 gc_pin_count = pre_deletion_count;
1542 if (pre_deletion_count == 0) return;
1544 /* We need a place to sort the keys of pinned_objects. If the key count is small,
1545 * use the small_pins vector; otherwise grab some memory via mmap */
1546 lispobj* workspace;
1547 if (pre_deletion_count < SMALL_MAX_PINS) { // leave room for sentinel at end
1548 workspace = small_pins_vector;
1549 } else {
1550 pins_alloc_size = ALIGN_UP((pre_deletion_count+1)*N_WORD_BYTES, BACKEND_PAGE_BYTES);
1551 workspace = (lispobj*)os_allocate(pins_alloc_size);
1552 gc_assert(workspace);
1554 gc_filtered_pins = workspace; // needed for obliterate_nonpinned_words
1555 lispobj key;
1556 int count = 0, index;
1557 for_each_hopscotch_key(index, key, pinned_objects) {
1558 gc_assert(is_lisp_pointer(key));
1559 // Preserve only the object base addresses, including any "false" pointers.
1560 if (listp(key) || widetag_of(native_pointer(key)) != SIMPLE_FUN_WIDETAG)
1561 workspace[count++] = key;
1563 gc_heapsort_uwords(workspace, count);
1564 /* Algorithm:
1565 * for each group of keys with the same page_scan_start
1566 * - scan the heap at the indicated start address
1567 * - "intersect" the list of objects visited with the list of
1568 * ambiguous roots (this is easy because the keys are sorted)
1569 * - change any missed key to 0 as we go
1571 lispobj *where = 0, // as is tradition
1572 *previous_scan_start = 0;
1573 int removed = 0;
1574 for (index = 0 ; index < count ; ++index) {
1575 lispobj* key = native_pointer(workspace[index]);
1576 lispobj* scan_start = page_scan_start(find_page_index(key));
1577 if (scan_start != previous_scan_start) where = previous_scan_start = scan_start;
1578 /* Scan forward from 'where'. This does not need a termination test based
1579 * on page_bytes_used because we know that 'key' was in-bounds for its page.
1580 * Therefore at least as many bytes are in use on the page as are needed
1581 * to enclose 'where'. If the next object we would visit is beyond it,
1582 * then we're done; the key was not found */
1583 while (1) {
1584 if (where < key) {
1585 where += object_size(where);
1586 } else if (where == key) {
1587 break;
1588 } else { // 'where' went past the key, so the key is bad
1589 workspace[index] = 0;
1590 removed = 1;
1591 break;
1595 // Delete any 0s
1596 if (removed) {
1597 int new_index = 0;
1598 for (index = 0 ; index < count; ++index) {
1599 key = workspace[index];
1600 if (key) workspace[new_index++] = key;
1602 gc_assert(new_index < count);
1603 count = new_index;
1605 gc_pin_count = count;
1606 if (!(gencgc_verbose & 4)) return;
1607 // Print in multiple columns to fit more on a screen
1608 // and sort like 'ls' (down varying fastest)
1609 char description[24];
1610 fprintf(stderr, "Sorted pin list (%d):\n", count);
1611 const int ncolumns = 4;
1612 int nrows = ALIGN_UP(count,ncolumns) / ncolumns;
1613 int row, col;
1614 for (row = 0; row < nrows; ++row) {
1615 for (col = 0; col < ncolumns; ++col) {
1616 int index = col * nrows + row;
1617 if (index < count) {
1618 lispobj* obj = native_pointer(workspace[index]);
1619 lispobj word = *obj;
1620 strcpy(description, "cons");
1621 if (is_header(word))
1622 snprintf(description, sizeof description, "%s,%ldw",
1623 widetag_names[header_widetag(word)>>2],
1624 (long)object_size(obj));
1625 fprintf(stderr, " %"OBJ_FMTX": %-24s", (uword_t)obj, description);
1628 putc('\n', stderr);
1632 /* After scavenging of the roots is done, we go back to the pinned objects
1633 * and look within them for pointers. */
1634 static void
1635 scavenge_pinned_ranges()
1637 int i;
1638 lispobj key;
1639 sword_t nwords = 0;
1640 for (i = 0; i < gc_pin_count; ++i) {
1641 key = gc_filtered_pins[i];
1642 gc_assert(is_lisp_pointer(key));
1643 lispobj* obj = native_pointer(key);
1644 if (listp(key)) {
1645 scavenge(obj, 2);
1646 nwords += 2;
1647 } else {
1648 lispobj header = *obj;
1649 nwords += scavtab[header_widetag(header)](obj, header);
1652 gc_pinned_nwords = nwords;
1655 /* visit_freed_objects() was designed to support post-GC actions such as
1656 * recycling of unused symbol TLS indices. However, I could not make this work
1657 * as claimed at the time that it gets called, so at best this is reserved
1658 * for debugging, and only when you can tolerate some inaccuracy.
1660 * The problem is that oldspace pages which were not pinned should eventually
1661 * be scanned en masse using contiguous blocks as large as possible without
1662 * encroaching on pinned pages. But we need to visit the dead objects on partially
1663 * pinned pages prior to turning those objects into page-filling objects.
1664 * Based on a real-life example, finding a correct approach is difficult.
1665 * Consider three pages all having the same scan_start of 0x1008e78000,
1666 * with the final page and only the final containing a pinned object:
1668 * start: 0x1008e78000 0x1008e80000 0x1008e88000
1669 * pin: 0x1008e8bec0
1670 * ^------------------+------------------|
1671 * There is a page-spanning (SIMPLE-ARRAY (UNSIGNED-BYTE 64) 8192)
1672 * from 0x1008e78000 to 0x1008E88010 (exclusive). The penultimate word
1673 * of that array appears to be a valid widetag:
1675 * 0x1008e88000: 0x0000000000001df1
1676 * 0x1008e88008: 0x0000000000000000
1677 * followed by:
1678 * 0x1008e88010: 0x0000001006c798c7 CONS
1679 * 0x1008e88018: 0x0000001008e88447
1680 * 0x1008e88020: 0x00000000000000ad (SIMPLE-ARRAY (UNSIGNED-BYTE 64) 32)
1681 * 0x1008e88028: 0x0000000000000040
1682 * ... pretty much anything in here ...
1683 * 0x1008e8bec0: any valid pinned object
1685 * Page wiping ignores the pages based at 0x1008e78000 and 0x1008e80000
1686 * and it is only concerned with the range from 0x1008e88000..0x1008e8bec0
1687 * which becomes filler. The question is how to traverse objects in the filled
1688 * range. You can't start scanning dead objects at the page base address
1689 * of the final page because that would parse these objects as:
1691 * 0x1008e88000: 0x0000000000001df1 (complex-vector-nil) ; 30 words
1692 * 0x1008e880f0: any random garbage
1694 * But if you scan from the correct scan start of 0x1008e78000 then how do you
1695 * know to skip that page later (in free_oldspace), as it is entirely in oldspace,
1696 * but partially visited already? This what in malloc/free terms would be
1697 * a "double free", and there is no obvious solution to that.
1699 void visit_freed_objects(char __attribute__((unused)) *start,
1700 sword_t __attribute__((unused)) nbytes)
1702 #ifdef TRAVERSE_FREED_OBJECTS
1703 /* At this point we could attempt to recycle unused TLS indices
1704 * as follows: For each now-garbage symbol that had a nonzero index,
1705 * return that index to a "free TLS index" pool, perhaps a linked list
1706 * or bitmap. Then either always try the free pool first (for better
1707 * locality) or if ALLOC-TLS-INDEX detects exhaustion (for speed). */
1708 lispobj* where = (lispobj*)start;
1709 lispobj* end = (lispobj*)(start + nbytes);
1710 while (where < end) {
1711 lispobj word = *where;
1712 if (forwarding_pointer_p(where)) { // live oject
1713 /* CAUTION: This CAN NOT WORK RELIABLY. Due to gc_copy_object_resizing()
1714 * we might compute the wrong size because we take it from the copy.
1715 * Are there other places where we get this wrong??? I sure hope not */
1716 lispobj* fwd_where = native_pointer(forwarding_pointer_value(where));
1717 fprintf(stderr, "%p: -> %p\n", where, fwd_where);
1718 where += object_size(fwd_where);
1719 } else { // dead object
1720 fprintf(stderr, "%p: %"OBJ_FMTX" %"OBJ_FMTX"\n", where, where[0], where[1]);
1721 if (is_header(word)) {
1722 // Do something interesting
1723 where += headerobj_size(where, word);
1724 } else {
1725 /* Can't do much useful with conses because often we can't distinguish
1726 * filler from data. visit_freed_objects is called on ranges of pages
1727 * without regard to whether each intervening page was completely full.
1728 * (This is not usually the way, but freeing of pages is slightly
1729 * imprecise in that regard).
1730 * And it's probably broken, since we leave detritus on code pages */
1731 where += 2;
1735 #endif
1738 /* Deposit a FILLER_WIDETAG object covering one or more dead objects.
1739 * If using more than 1 card per page, scavenge_root_gens() is able to scan
1740 * some pages without aligning to object boundaries. For that to work,
1741 * it must not accidentally see a raw word or leftover garbage.
1742 * Note that while CONS and SMALL_MIXED pages never have card-spanning objects,
1743 * deposit_filler() deals with the "mirror image" of the pinned objects,
1744 * hence it might get a card-spanning filler. It has to do something to ensure
1745 * that no card will see garbage if scanned from its base address.
1746 * To achieve that, an extra filler may be needed at the start of any spanned card.
1747 * The sizes of extra fillers don't have to sum up to the total filler size.
1748 * They serve the vital purpose of getting descriptors_scavenge() to skip a
1749 * portion of the card they're on, but those fillers are never visited in a
1750 * heap walk that steps by object from a page's page_scan_start.
1751 * The final filler must be the correct size, so any algorithm that achieves
1752 * the desired end result is OK */
1753 void deposit_filler(char* from, char* to) {
1754 sword_t nbytes = to - from;
1755 if (!nbytes) return;
1756 gc_assert(nbytes > 0);
1757 sword_t nwords = nbytes >> WORD_SHIFT;
1758 gc_assert((nwords - 1) <= 0x7FFFFF);
1759 page_index_t page = find_page_index(from);
1760 gc_assert(find_page_index(to-1) == page);
1761 *(lispobj*)from = make_filler_header(nwords);
1762 long unsigned last_card;
1763 switch (page_table[page].type) {
1764 case PAGE_TYPE_BOXED:
1765 case PAGE_TYPE_CONS:
1766 case PAGE_TYPE_SMALL_MIXED:
1767 last_card = addr_to_card_index(to-1);
1768 while (addr_to_card_index(from) != last_card) {
1769 from = PTR_ALIGN_DOWN(from, GENCGC_CARD_BYTES) + GENCGC_CARD_BYTES;
1770 nwords = (to - from) >> WORD_SHIFT;
1771 *(lispobj*)from = make_filler_header(nwords);
1776 /* Deposit filler objects on small object pinned pages.
1777 * Also ensure that no scan_start_offset points to a page in
1778 * oldspace that will be freed.
1780 static void obliterate_nonpinned_words()
1782 if (!gc_pin_count) return;
1784 #define page_base(x) ALIGN_DOWN(x, GENCGC_PAGE_BYTES)
1785 // This macro asserts that space accounting happens exactly
1786 // once per affected page (a page with any pins, no matter how many)
1787 #define adjust_gen_usage(i) \
1788 gc_assert(page_table[i].gen == from_space); \
1789 bytes_moved += page_bytes_used(i); \
1790 page_table[i].gen = new_space
1792 lispobj* keys = gc_filtered_pins;
1793 int n_pins = gc_pin_count;
1794 // Store a sentinel at the end.
1795 // It is safe to write one more word than there are pins.
1796 keys[n_pins] = ~(uword_t)0;
1798 // Each pinned object begets two ranges of bytes to be turned into filler:
1799 // - the range preceding it back to its page start or predecessor object
1800 // - the range after it, up to the lesser of page bytes used or successor object
1802 // Prime the loop
1803 uword_t fill_from = page_base(keys[0]);
1804 os_vm_size_t bytes_moved = 0; // i.e. virtually moved
1805 int i;
1807 for (i = 0; i < n_pins; ++i) {
1808 lispobj* obj = native_pointer(keys[i]);
1809 page_index_t begin_page_index = find_page_index(obj);
1810 // Create a filler object occupying space from 'fill_from' up to but
1811 // excluding 'obj'.
1812 deposit_filler((char*)fill_from, (char*)obj);
1813 if (fill_from == page_base((uword_t)obj)) {
1814 adjust_gen_usage(begin_page_index);
1815 // This pinned object started a new page of pins.
1816 // scan_start must not see any page prior to this page,
1817 // as those might be in oldspace and about to be marked free.
1818 set_page_scan_start_offset(begin_page_index, 0);
1820 // If 'obj' spans pages, move its successive page(s) to newspace and
1821 // ensure that those pages' scan_starts point at the same address
1822 // that this page's scan start does, which could be this page or earlier.
1823 sword_t nwords = object_size(obj);
1824 uword_t obj_end = (uword_t)(obj + nwords); // non-inclusive address bound
1825 page_index_t end_page_index = find_page_index((char*)obj_end - 1); // inclusive bound
1827 if (end_page_index > begin_page_index) {
1828 char *scan_start = page_scan_start(begin_page_index);
1829 page_index_t index;
1830 for (index = begin_page_index + 1; index <= end_page_index; ++index) {
1831 set_page_scan_start_offset(index,
1832 addr_diff(page_address(index), scan_start));
1833 adjust_gen_usage(index);
1836 // Compute page base address of last page touched by this obj.
1837 uword_t obj_end_pageaddr = page_base(obj_end - 1);
1838 // See if there's another pinned object on this page.
1839 // There is always a next object, due to the sentinel.
1840 if (keys[i+1] < obj_end_pageaddr + GENCGC_PAGE_BYTES) {
1841 // Next object starts within the same page.
1842 fill_from = obj_end;
1843 } else {
1844 /* Next pinned object does not start on the same page this obj ends on.
1845 * Any bytes following 'obj' up to its page end are garbage.
1846 * The reason we don't merely reduce the page_bytes_used is that decreasing
1847 * the grand total bytes allocated had a tendency to delay triggering the
1848 * next GC. This phenomenon was especially bad if the only pinned objects
1849 * were at the start of a page, as it caused the entire rest of the page to
1850 * be unusable. :SMALLOBJ-AUTO-GC-TRIGGER from rev dfddbc8a tests this */
1851 deposit_filler((char*)obj_end,
1852 (char*)obj_end_pageaddr + page_bytes_used(end_page_index));
1853 fill_from = page_base(keys[i+1]);
1856 generations[from_space].bytes_allocated -= bytes_moved;
1857 generations[new_space].bytes_allocated += bytes_moved;
1858 #undef adjust_gen_usage
1859 #undef page_base
1860 if (pins_alloc_size) {
1861 os_deallocate((char*)gc_filtered_pins, pins_alloc_size);
1862 gc_filtered_pins = 0;
1863 gc_pin_count = 0;
1864 pins_alloc_size = 0;
1868 int sb_introspect_pinnedp(lispobj obj) {
1869 return hopscotch_containsp(&pinned_objects, obj);
1872 /* Add 'object' to the hashtable, and if the object is a code component,
1873 * then also add all of the embedded simple-funs.
1874 * It is OK to call this function on an object which is already pinned-
1875 * it will do nothing.
1876 * But it is not OK to call this if the object is not one which merits
1877 * pinning in the first place. i.e. It MUST be an object in from_space
1878 * and moreover must be in the condemned set, which means that it can't
1879 * be a code object if pin_all_dynamic_space_code is 1.
1881 * The rationale for doing some extra work on code components is that without it,
1882 * every call to pinned_p() would entail this logic:
1883 * if the object is a simple-fun then
1884 * read the header
1885 * if already forwarded then return "no"
1886 * else go backwards to the code header and test pinned_p().
1887 * But we can avoid that by making every embedded function pinned
1888 * whenever the containing object is pinned.
1889 * Experimentation bears out that this is the better technique.
1890 * Also, we wouldn't often expect code components in the collected generation
1891 * so the extra work here is quite minimal, even if it can generally add to
1892 * the number of keys in the hashtable.
1894 #define PAGE_PINNED 0xFF
1895 static void pin_object(lispobj object)
1897 if (!compacting_p()) {
1898 gc_mark_obj(object);
1899 return;
1902 lispobj* object_start = native_pointer(object);
1903 page_index_t page = find_page_index(object_start);
1905 /* Large object: the 'pinned' bit in the PTE on the first page should be definitive
1906 * for that object. However, all occupied pages have to marked pinned,
1907 * because move_pinned_pages_to_newspace() looks at pages as if they're independent.
1908 * That seems to be the only place that cares how many pages' pinned bits are affected
1909 * here for large objects, though I do wonder why we can't move the object right now
1910 * and be done with it */
1911 if (page_single_obj_p(page)) {
1912 if (gc_page_pins[page]) return;
1913 sword_t nwords = object_size(object_start);
1914 maybe_adjust_large_object(object_start, page, nwords);
1915 page_index_t last_page = find_page_index(object_start + nwords - 1);
1916 while (page <= last_page) gc_page_pins[page++] = PAGE_PINNED;
1917 return;
1920 // Multi-object page (the usual case) - presence in the hash table is the pinned criterion.
1921 // The 'pinned' bit is a coarse-grained test of whether to bother looking in the table.
1922 if (hopscotch_containsp(&pinned_objects, object)) return;
1924 hopscotch_insert(&pinned_objects, object, 1);
1925 unsigned int addr_lowpart = object & (GENCGC_PAGE_BYTES-1);
1926 // Divide the page into 8 parts, mark that part pinned
1927 gc_page_pins[page] |= 1 << (addr_lowpart / (GENCGC_PAGE_BYTES/8));
1928 struct code* maybe_code = (struct code*)native_pointer(object);
1929 // Avoid iterating over embedded simple-funs until the debug info is set.
1930 // Prior to that, the unboxed payload will contain random bytes.
1931 // There can't be references to any of the simple-funs
1932 // until the object is fully constructed.
1933 if (widetag_of(&maybe_code->header) == CODE_HEADER_WIDETAG && maybe_code->debug_info) {
1934 for_each_simple_fun(i, fun, maybe_code, 0, {
1935 hopscotch_insert(&pinned_objects, make_lispobj(fun, FUN_POINTER_LOWTAG), 1);
1936 addr_lowpart = (uword_t)fun & (GENCGC_PAGE_BYTES-1);
1937 gc_page_pins[find_page_index(fun)] |=
1938 1 << (addr_lowpart / (GENCGC_PAGE_BYTES/8));
1943 /* Additional logic for soft marks: any word that is potentially a
1944 * tagged pointer to a page being written must preserve the mark regardless
1945 * of what update_writeprotection() thinks. That's because the mark is set
1946 * prior to storing. If GC occurs in between setting the mark and storing,
1947 * then resetting the mark would be wrong if the subsequent store
1948 * creates an old->young pointer.
1949 * Mark stickiness is checked only once per invocation of collect_garbage(),
1950 * when scanning interrupt contexts for generation 0 but not higher gens.
1951 * There are two cases:
1952 * (1) tagged pointer to a large simple-vector, but we scan card-by-card
1953 * for specifically the marked cards. This has to be checked first
1954 * so as not to fail to see subsequent cards if the first is marked.
1955 * (2) tagged pointer to an object that marks only the page containing
1956 * the object base.
1957 * And note a subtle point: only an already-marked card can acquire sticky
1958 * status. So we can ignore any unmarked (a/k/a WRITEPROTECTED_P) card
1959 * regardless of a context register pointing to it, because if a mark was not
1960 * stored, then the pointer was not stored. Without examining the next few
1961 * instructions, there's no reason even to suppose that a store occurs.
1962 * It seems like the stop-for-GC handler must be enforcing that GC sees things
1963 * stored in the correct order for out-of-order memory models */
1964 #ifdef LISP_FEATURE_SOFT_CARD_MARKS
1965 static void impart_mark_stickiness(lispobj word)
1967 // This function does not care whether 'word' points to a valid object.
1968 // At worst this will spurisouly mark a card as sticky,
1969 // which can happen only if it was already marked as dirty.
1970 page_index_t page = find_page_index((void*)word);
1971 if (page >= 0 && page_boxed_p(page) // stores to raw bytes are uninteresting
1972 && (word & (GENCGC_PAGE_BYTES - 1)) < page_bytes_used(page)
1973 && page_table[page].gen != 0
1974 && lowtag_ok_for_page_type(word, page_table[page].type)
1975 && plausible_tag_p(word)) { // "plausible" is good enough
1976 /* if 'word' is the correctly-tagged pointer to the base of a SIMPLE-VECTOR,
1977 * then set the sticky mark on every marked card. The only other large
1978 * objects are CODE (writes to which are pseudo-atomic),
1979 * and BIGNUM (which aren't on boxed pages)
1980 * I'm not sure if it's inadvertent that this first 'if' is taken
1981 * for non-large simple-vectors. It probably can't hurt,
1982 * but I think it's not necessary */
1983 if (lowtag_of(word) == OTHER_POINTER_LOWTAG &&
1984 widetag_of(native_pointer(word)) == SIMPLE_VECTOR_WIDETAG) {
1985 generation_index_t gen = page_table[page].gen;
1986 while (1) {
1987 long card = page_to_card_index(page);
1988 int i;
1989 for(i=0; i<CARDS_PER_PAGE; ++i)
1990 if (gc_card_mark[card+i]==CARD_MARKED) gc_card_mark[card+i]=STICKY_MARK;
1991 if (page_ends_contiguous_block_p(page, gen)) return;
1992 ++page;
1994 } else if (gc_card_mark[addr_to_card_index((void*)word)] == CARD_MARKED) {
1995 gc_card_mark[addr_to_card_index((void*)word)] = STICKY_MARK;
1999 #endif
2001 #if !GENCGC_IS_PRECISE || defined LISP_FEATURE_MIPS || defined LISP_FEATURE_PPC64
2002 /* Take a possible pointer to a Lisp object and mark its page in the
2003 * page_table so that it will not be relocated during a GC.
2005 * This involves locating the page it points to, then backing up to
2006 * the start of its region, then marking all pages pinned from there
2007 * up to the first page that's not full or has a different generation
2009 * It is assumed that all the pages' pin flags have been cleared at
2010 * the start of a GC.
2012 * It is also assumed that the current gc_alloc() region has been
2013 * flushed and the tables updated. */
2015 static void NO_SANITIZE_MEMORY preserve_pointer(os_context_register_t word, void* arg)
2017 int contextp = arg == (void*)1;
2018 page_index_t page = find_page_index((void*)word);
2019 if (page < 0) {
2020 // Though immobile_space_preserve_pointer accepts any pointer,
2021 // there's a benefit to testing immobile_space_p first
2022 // because it's inlined. Either is a no-op if no immobile space.
2023 if (immobile_space_p(word))
2024 immobile_space_preserve_pointer((void*)word);
2025 return;
2028 // Special case for untagged instance pointers in registers. This might belong in
2029 // conservative_root_p() but the pointer has to be adjusted here or else the wrong
2030 // value will be inserted into 'pinned_objects' (which demands tagged pointers)
2031 if (contextp && lowtag_of(word) == 0 &&
2032 (page_table[page].type == PAGE_TYPE_MIXED ||
2033 page_table[page].type == PAGE_TYPE_SMALL_MIXED) &&
2034 widetag_of((lispobj*)word) == INSTANCE_WIDETAG)
2035 word |= INSTANCE_POINTER_LOWTAG;
2037 lispobj object = conservative_root_p(word, page);
2038 if (!object) return;
2039 if (object != AMBIGUOUS_POINTER) {
2040 pin_object(object);
2041 return;
2043 // It's a non-large non-code ambiguous pointer.
2044 if (compacting_p()) {
2045 if (!hopscotch_containsp(&pinned_objects, word)) {
2046 hopscotch_insert(&pinned_objects, word, 1);
2047 unsigned int addr_lowpart = word & (GENCGC_PAGE_BYTES-1);
2048 // Divide the page into 8 parts, mark that part pinned
2049 gc_page_pins[page] |= 1 << (addr_lowpart / (GENCGC_PAGE_BYTES/8));
2051 return;
2053 // Mark only: search for the object, because fullcgc can't handle random pointers
2054 lispobj* found = search_dynamic_space((void*)word);
2055 if (found) gc_mark_obj(compute_lispobj(found));
2057 #ifdef LISP_FEATURE_SOFT_CARD_MARKS
2058 static void sticky_preserve_pointer(os_context_register_t register_word, void* arg)
2060 // registers can be wider than words. This could accept uword_t as the arg type
2061 // but I like it to be directly callable with os_context_register.
2062 uword_t word = register_word;
2063 if (is_lisp_pointer(word)) impart_mark_stickiness(word);
2064 preserve_pointer(word, arg);
2066 #endif
2067 #endif
2069 /* Pin an unambiguous descriptor object which may or may not be a pointer.
2070 * Ignore immediate objects, and heuristically skip some objects that are
2071 * known to be pinned without looking in pinned_objects.
2072 * pin_object() will always do the right thing and ignore multiple
2073 * calls with the same object in the same collection pass.
2075 static void pin_exact_root(lispobj obj)
2077 // These tests are performed in approximate order of quickness to check.
2079 // 1. pointerness
2080 if (!is_lisp_pointer(obj)) return;
2081 // 2. If not moving, then pinning is irrelevant. 'obj' is a-priori live given
2082 // the reference from *PINNED-OBJECTS*, and obviously it won't move.
2083 if (!compacting_p()) return;
2084 // 3. If pointing off-heap, why are you pinning? Just ignore it.
2085 // Would this need to do anything if immobile-space were ported
2086 // to the precise GC platforms. FIXME?
2087 page_index_t page = find_page_index((void*)obj);
2088 if (page < 0) return;
2089 // 4. Ignore if not in the condemned set.
2090 if (immune_set_memberp(page)) return;
2092 // Never try to pin an interior pointer - always use base pointers.
2093 lispobj *object_start = native_pointer(obj);
2094 switch (widetag_of(object_start)) {
2095 case SIMPLE_FUN_WIDETAG:
2096 #ifdef RETURN_PC_WIDETAG
2097 case RETURN_PC_WIDETAG:
2098 #endif
2099 obj = make_lispobj(fun_code_header((struct simple_fun*)object_start),
2100 OTHER_POINTER_LOWTAG);
2102 pin_object(obj);
2106 /* Return true if 'ptr' is OK to be on a write-protected page
2107 * of an object in 'gen'. That is, if the pointer does not point to a younger object.
2108 * Note: 'ptr' is _sometimes_ an ambiguous pointer - we do not utilize the layout bitmap
2109 * when scanning instances for pointers, so we will occasionally see a raw word for 'ptr'.
2110 * Also, 'ptr might not have a lowtag (such as lockfree list node successor), */
2111 static bool ptr_ok_to_writeprotect(lispobj ptr, generation_index_t gen)
2113 page_index_t index;
2114 lispobj __attribute__((unused)) header;
2116 /* Check that it's in the dynamic space */
2117 if ((index = find_page_index((void*)ptr)) != -1) {
2118 int pointee_gen = page_table[index].gen;
2119 if (/* Does it point to a younger or the temp. generation? */
2120 (pointee_gen < gen || pointee_gen == SCRATCH_GENERATION) &&
2121 /* and an in-use part of the page?
2122 * Formerly this examined the bounds of each open region,
2123 * but that is extra work with little benefit. It is faster
2124 * to treat all of any page with an open region as in-use.
2125 * It will self-correct when the region gets closed */
2126 ((page_table[index].type & OPEN_REGION_PAGE_FLAG)
2127 || (ptr & (GENCGC_PAGE_BYTES-1)) < page_bytes_used(index)))
2128 return 0;
2130 #ifdef LISP_FEATURE_IMMOBILE_SPACE
2131 else if (immobile_space_p(ptr) &&
2132 other_immediate_lowtag_p(header = *native_pointer(ptr))) {
2133 // This is *possibly* a pointer to an object in immobile space,
2134 // given that above two conditions were satisfied.
2135 // But unlike in the dynamic space case, we need to read a byte
2136 // from the object to determine its generation, which requires care.
2137 // Consider an unboxed word that looks like a pointer to a word that
2138 // looks like simple-fun-widetag. We can't naively back up to the
2139 // underlying code object since the alleged header might not be one.
2140 int pointee_gen = gen; // Make comparison fail if we fall through
2141 switch (header_widetag(header)) {
2142 case SIMPLE_FUN_WIDETAG:
2143 if (functionp(ptr)) {
2144 lispobj* code = (lispobj*)fun_code_header(FUNCTION(ptr));
2145 // This is a heuristic, since we're not actually looking for
2146 // an object boundary. Precise scanning of 'page' would obviate
2147 // the guard conditions here.
2148 if (immobile_space_p((lispobj)code)
2149 && widetag_of(code) == CODE_HEADER_WIDETAG)
2150 pointee_gen = immobile_obj_generation(code);
2152 break;
2153 default:
2154 pointee_gen = immobile_obj_generation(native_pointer(ptr));
2156 // A bogus generation number implies a not-really-pointer,
2157 // but it won't cause misbehavior.
2158 if (pointee_gen < gen || pointee_gen == SCRATCH_GENERATION) {
2159 return 0;
2162 #endif
2163 return 1;
2166 #ifndef LISP_FEATURE_SOFT_CARD_MARKS
2167 static inline void protect_page(void* page_addr)
2169 os_protect((void *)page_addr, GENCGC_PAGE_BYTES, OS_VM_PROT_READ);
2170 gc_card_mark[addr_to_card_index(page_addr)] = CARD_UNMARKED;
2172 #endif
2174 #define LOCKFREE_LIST_NEXT(x) ((struct list_node*)x)->_node_next
2176 /* Helper function for update_writeprotection.
2177 * If the [where,limit) contain an old->young pointer, then return
2178 * the address - or approximate address - containing such pointer.
2179 * The return value is used as a boolean, but if debugging, you might
2180 * want to see the address */
2181 static lispobj* range_dirty_p(lispobj* where, lispobj* limit, generation_index_t gen)
2183 sword_t nwords;
2184 for ( ; where < limit ; where += nwords ) {
2185 lispobj word = *where;
2186 if (is_cons_half(word)) {
2187 if (is_lisp_pointer(word) && !ptr_ok_to_writeprotect(word, gen)) return where;
2188 word = where[1];
2189 if (is_lisp_pointer(word) && !ptr_ok_to_writeprotect(word, gen)) return where;
2190 nwords = 2;
2191 continue;
2193 int widetag = widetag_of(where);
2194 gc_dcheck(widetag !== CODE_HEADER_WIDETAG); // This can't be called on a code page
2195 nwords = sizetab[widetag](where);
2196 if (leaf_obj_widetag_p(widetag)) continue; // Do nothing
2197 #ifdef LISP_FEATURE_COMPACT_INSTANCE_HEADER
2198 if (instanceoid_widetag_p(widetag)) {
2199 // instance_layout works on funcallable or regular instances
2200 // and we have to specially check it because it's in the upper
2201 // bytes of the 0th word.
2202 lispobj layout = instance_layout(where);
2203 if (layout) {
2204 if (!ptr_ok_to_writeprotect(layout, gen)) return where;
2205 if (lockfree_list_node_layout_p(LAYOUT(layout)) &&
2206 !ptr_ok_to_writeprotect(LOCKFREE_LIST_NEXT(where), gen))
2207 return where;
2210 #else
2211 if (widetag == INSTANCE_WIDETAG) {
2212 // instance_layout works only on regular instances,
2213 // we don't have to treat it specially but we do have to
2214 // check for lockfree list nodes.
2215 lispobj layout = instance_layout(where);
2216 if (layout && lockfree_list_node_layout_p(LAYOUT(layout)) &&
2217 !ptr_ok_to_writeprotect(LOCKFREE_LIST_NEXT(where), gen))
2218 return where;
2220 #endif
2221 // Scan all the rest of the words even if some of them are raw bits.
2222 // At worst this overestimates the set of pointer words.
2223 sword_t index;
2224 for (index=1; index<nwords; ++index)
2225 if (is_lisp_pointer(where[index]) && !ptr_ok_to_writeprotect(where[index], gen))
2226 return where;
2228 return 0;
2231 /* Given a range of pages at least one of which is not WPed (logically or physically,
2232 * depending on SOFT_CARD_MARKS), scan all those pages for pointers to younger generations.
2233 * If no such pointers are found, then write-protect the range.
2235 * Care is taken to check for pointers to any open allocation regions,
2236 * which by design contain younger objects.
2238 * If we find a word which is a witness for the inability to apply write-protection,
2239 * then return the address of the object containing the witness pointer.
2240 * Otherwise return 0. The word address is just for debugging; there are cases
2241 * where we don't apply write protectection, but nonetheless return 0.
2243 * This function is still buggy, but not in a fatal way.
2244 * The issue is that for any kind of weak object - hash-table vector,
2245 * weak pointer, or weak simple-vector, we skip scavenging the object
2246 * which might leave some pointers to younger generation objects
2247 * which will later be smashed when processing weak objects.
2248 * That is, the referent is non-live. But when we scanned this page range,
2249 * it looks like it still had the pointer to the younger object.
2250 * To get this really right, we would have to wait until after weak objects
2251 * have been processed.
2252 * It may or may not be possible to get verify_range to croak
2253 * about suboptimal application of WP. Possibly not, because of the hack
2254 * for pinned pages without soft card marking (which won't WP).
2256 * See also 'doc/internals-notes/fdefn-gc-safety' for execution schedules
2257 * that lead to invariant loss with FDEFNs. This might not be a problem
2258 * in practice. At least it seems like it never has been.
2260 static lispobj*
2261 update_writeprotection(page_index_t first_page, page_index_t last_page,
2262 lispobj* where, lispobj* limit)
2264 /* Shouldn't be a free page. */
2265 gc_dcheck(!page_free_p(first_page)); // Implied by the next assertion
2266 gc_assert(page_words_used(first_page) != 0);
2268 if (!ENABLE_PAGE_PROTECTION) return 0;
2269 if (!page_boxed_p(first_page)) return 0;
2271 page_index_t page;
2272 #ifdef LISP_FEATURE_SOFT_CARD_MARKS
2273 /* If any page is referenced from the stack (mark byte = 2), then we're
2274 * can not apply protection even if we see no witness, because the
2275 * absence of synchronization between mutator and GC means that the next
2276 * instruction issued when the mutator resumes might create the witness,
2277 * and it thinks it already marked a card */
2278 for (page = first_page; page <= last_page; ++page)
2279 if (cardseq_any_sticky_mark(page_to_card_index(page))) return 0;
2280 #else
2281 /* Skip if any page is pinned.
2282 * The 'pinned' check is sort of bogus but sort of necessary,
2283 * but doesn't completely fix the problem that it tries to, which is
2284 * passing a memory address to the OS for it to write into.
2285 * An object on a never-written protected page would still fail.
2286 * It's probably rare to pass boxed pages to the OS, but it could be
2287 * to read fixnums into a simple-vector. */
2288 for (page = first_page; page <= last_page; ++page)
2289 if (gc_page_pins[page]) return 0;
2290 #endif
2292 /* Now we attempt to find any 1 "witness" that the pages should NOT be protected.
2293 * If such witness is found, then return without doing anything, otherwise
2294 * apply protection to the range. */
2295 lispobj* witness = range_dirty_p(where, limit, page_table[first_page].gen);
2296 if (witness) return witness;
2298 for (page = first_page; page <= last_page; ++page) {
2299 #ifdef LISP_FEATURE_SOFT_CARD_MARKS
2300 // Don't worry, the cards are all clean - if any card mark was sticky,
2301 // then we would have bailed out as the first thing (way up above).
2302 assign_page_card_marks(page, CARD_UNMARKED);
2303 #else
2304 // Try to avoid a system call
2305 if (!PAGE_WRITEPROTECTED_P(page)) protect_page(page_address(page));
2306 #endif
2308 return 0;
2311 /* Decide if this single-object page holds a normal simple-vector.
2312 * "Normal" now includes non-weak address-insensitive k/v vectors */
2313 static inline bool large_scannable_vector_p(page_index_t page) {
2314 lispobj header = *(lispobj *)page_address(page);
2315 if (header_widetag(header) == SIMPLE_VECTOR_WIDETAG) {
2316 int mask = (flag_VectorWeak | flag_VectorAddrHashing) << ARRAY_FLAGS_POSITION;
2317 if (header & mask) return 0;
2318 if (vector_flagp(header, VectorHashing)) {
2319 lispobj* data = ((struct vector*)page_address(page))->data;
2320 // If not very full, use the normal path.
2321 // The exact boundary here doesn't matter too much.
2322 if (KV_PAIRS_HIGH_WATER_MARK(data) < (int)(GENCGC_PAGE_BYTES/N_WORD_BYTES))
2323 return 0;
2325 return 1;
2327 return 0;
2330 /* Attempt to re-protect code from first_page to last_page inclusive.
2331 * The object bounds are 'start' and 'limit', the former being redundant
2332 * with page_address(first_page).
2333 * Immobile space is dealt with in "immobile-space.c"
2335 static void
2336 update_code_writeprotection(page_index_t first_page, page_index_t last_page,
2337 lispobj* start, lispobj* limit)
2339 if (!ENABLE_PAGE_PROTECTION) return;
2340 page_index_t i;
2341 for (i=first_page; i <= last_page; ++i) {// last_page is inclusive
2342 gc_assert(is_code(page_table[i].type));
2343 #ifdef LISP_FEATURE_SOFT_CARD_MARKS
2344 if (cardseq_any_sticky_mark(page_to_card_index(i))) {
2345 return;
2347 #endif
2350 lispobj* where = start;
2351 for (; where < limit; where += headerobj_size(where)) {
2352 switch (widetag_of(where)) {
2353 case CODE_HEADER_WIDETAG:
2354 if (header_rememberedp(*where)) return;
2355 break;
2356 case FUNCALLABLE_INSTANCE_WIDETAG:
2357 if (range_dirty_p(where, where+headerobj_size(where), page_table[first_page].gen))
2358 return;
2359 break;
2362 for (i = first_page; i <= last_page; i++) assign_page_card_marks(i, CARD_UNMARKED);
2365 #ifdef LISP_FEATURE_SOFT_CARD_MARKS
2366 # define card_stickymarked_p(x) (gc_card_mark[x] == STICKY_MARK)
2367 #endif
2368 extern int descriptors_scavenge(lispobj *, lispobj*, generation_index_t, int);
2369 int root_boxed_words_scanned, root_vector_words_scanned, root_mixed_words_scanned;
2371 /* Special treatment for strictly boxed pages improves on the general case as follows:
2372 * - It can skip determining the extent of the contiguous block up front,
2373 * instead just blasting through the cards as it sees them.
2374 * - If only a subset of cards in a contiguous block are dirty, the scan
2375 * can be restricted to that subset. We don't need to align at object boundaries.
2376 * - It is not necessary to invoke a scavenge method specific to each object type.
2377 * - new write-protection status can be recomputed as we go.
2378 * This combination of aspects will be especially beneficial if cards are
2379 * are much smaller than they currently are (like 1K)
2381 * We have two choices for object traversal: walk object-by-object,
2382 * or card-by-card just blasting through the words looking for pointers.
2383 * But the latter can fail on a card-spanning object if care is not taken.
2384 * Example: Suppose the card size is 1K, and an instance has 200 slots.
2385 * The instance consumes around 1600 bytes (@ 8 bytes/word), which conceivably
2386 * could use 3 cards: header + 10 slots on the end of the first card,
2387 * 128 slots on the next, and the remainder on the final card. The soft write
2388 * barrier marks only the card with the header, so we don't know exactly
2389 * which card contains a modified pointer. Therefore, in all cases when using
2390 * card-by-card scan that disregards object boundaries, we have to assume
2391 * that 1 card beyond any marked card contains part of a marked object,
2392 * if that next card has the same scan start as its predecessor.
2393 * But where to stop scanning under this assumption? We shouldn't assume
2394 * that any marked card implies scanning an unbounded number of cards.
2395 * Therefore, a big instance should not be put on a purely boxed card.
2396 * (And granted, a massive instance will go on single-object pages.)
2397 * The other purely boxed objects are cons-sized, so they don't have a problem.
2398 * And (SETF SVREF) does mark an exact card, so it's all good.
2399 * Also, the hardware write barrier does not have this concern.
2401 #define WORDS_PER_CARD (GENCGC_CARD_BYTES/N_WORD_BYTES)
2402 static page_index_t scan_boxed_root_cards_spanning(page_index_t page, generation_index_t gen)
2404 __attribute__((unused)) int prev_marked = 0;
2405 do {
2406 lispobj* start = (void*)page_address(page);
2407 lispobj* limit = start + page_words_used(page);
2408 #ifdef LISP_FEATURE_SOFT_CARD_MARKS
2409 long card = addr_to_card_index(start);
2410 /* Cards can change from marked to unmarked (just like with physical protection),
2411 * but also unmarked to marked, if transferring the card mark from the object's
2412 * header card to a cell in that object on a later card.
2413 * Lisp is given leeway because marking the header is easier. So the
2414 * algorithm accepts either way on input, but makes its output canonical.
2415 * (similar in spirit to Postel's Law) */
2416 if (prev_marked || cardseq_any_marked(card)) {
2417 if (GC_LOGGING) fprintf(gc_activitylog(), "scan_roots spanning %p\n", page_address(page));
2418 int j;
2419 for (j=0; j<CARDS_PER_PAGE; ++j, ++card, start += WORDS_PER_CARD) {
2420 int marked = card_dirtyp(card);
2421 if (marked || prev_marked) {
2422 lispobj* end = start + WORDS_PER_CARD;
2423 if (end > limit) end = limit;
2424 int dirty = descriptors_scavenge(start, end, gen, card_stickymarked_p(card));
2425 root_boxed_words_scanned += end - start;
2426 gc_card_mark[card] =
2427 (gc_card_mark[card] != STICKY_MARK) ? (dirty ? CARD_MARKED : CARD_UNMARKED) :
2428 STICKY_MARK;
2429 prev_marked = marked;
2433 #else
2434 if (!PAGE_WRITEPROTECTED_P(page)) {
2435 int dirty = descriptors_scavenge(start, limit, gen, 0);
2436 if (ENABLE_PAGE_PROTECTION && !dirty) protect_page(start);
2438 #endif
2439 ++page;
2440 } while (!page_ends_contiguous_block_p(page-1, gen));
2441 return page;
2444 /* Large simple-vectors and pages of conses are even easier than strictly boxed root pages
2445 * because individual cons cells can't span cards, and vectors always mark the card of a
2446 * specific element. So there is no looking back 1 card to check for a marked header */
2447 static page_index_t scan_boxed_root_cards_non_spanning(page_index_t page, generation_index_t gen)
2449 #ifndef LISP_FEATURE_SOFT_CARD_MARKS
2450 /* Physical protection doesn't distinguish between card-spanning and non-card-spanning,
2451 * because the write fault always occurs on the page that is getting dirtied by a store,
2452 * unlike soft marks which can mark an object header, but store onto the next card */
2453 return scan_boxed_root_cards_spanning(page, gen);
2454 #else
2455 do {
2456 lispobj* start = (void*)page_address(page);
2457 long card = addr_to_card_index(start);
2458 if (cardseq_any_marked(card)) {
2459 if (GC_LOGGING) fprintf(gc_activitylog(), "scan_roots non-spanning %p\n", page_address(page));
2460 lispobj* limit = start + page_words_used(page);
2461 int j;
2462 for (j=0; j<CARDS_PER_PAGE; ++j, ++card, start += WORDS_PER_CARD) {
2463 if (card_dirtyp(card)) {
2464 lispobj* end = start + WORDS_PER_CARD;
2465 if (end > limit) end = limit;
2466 int dirty = descriptors_scavenge(start, end, gen,
2467 card_stickymarked_p(card));
2468 root_vector_words_scanned += end - start;
2469 if (!dirty) gc_card_mark[card] = CARD_UNMARKED;
2473 ++page;
2474 } while (!page_ends_contiguous_block_p(page-1, gen));
2475 return page;
2476 #endif
2479 #ifdef LISP_FEATURE_SOFT_CARD_MARKS
2480 /* PAGE_TYPE_SMALL_MIXED roots are walked object-by-object to avoid affecting any raw word.
2481 * By construction, objects will never span cards */
2482 static page_index_t scan_mixed_root_cards(page_index_t page, generation_index_t gen)
2484 do {
2485 lispobj* start = (void*)page_address(page);
2486 long card = addr_to_card_index(start);
2487 if (cardseq_any_marked(card)) {
2488 if (GC_LOGGING) fprintf(gc_activitylog(), "scan_roots subcard mixed %p\n", page_address(page));
2489 lispobj* limit = start + page_words_used(page);
2490 int j;
2491 for (j=0; j<CARDS_PER_PAGE; ++j, ++card, start += WORDS_PER_CARD) {
2492 if (card_dirtyp(card)) {
2493 lispobj* end = start + WORDS_PER_CARD;
2494 if (end > limit) end = limit;
2495 // heap_scavenge doesn't take kindly to inverted start+end
2496 if (start < limit) {
2497 heap_scavenge(start, limit);
2498 if (!card_stickymarked_p(card) && !range_dirty_p(start, limit, gen))
2499 gc_card_mark[card] = CARD_UNMARKED;
2500 } else
2501 gc_card_mark[card] = CARD_UNMARKED;
2505 ++page;
2506 } while (!page_ends_contiguous_block_p(page-1, gen));
2507 return page;
2509 #endif
2511 /* Scavenge all generations greater than or equal to FROM.
2513 * Under the current scheme when a generation is GCed, the generations
2514 * younger than it are empty. So, when a generation is being GCed it
2515 * is only necessary to examine generations older than it for pointers.
2517 * Logical or physical write-protection is used to note pages that don't
2518 * contain old->young pointers. But pages can be written without having
2519 * such pointers. After the pages are scavenged here, they are examined
2520 * for old->young pointer, are marked clean (unprotected) if there are none.
2522 * Write-protected pages will not have any pointers to the
2523 * from_space so do not need scavenging, but might be visited
2524 * as part of a contiguous range containing a relevant page.
2527 static void
2528 scavenge_root_gens(generation_index_t from)
2530 page_index_t i = 0;
2531 page_index_t limit = next_free_page;
2532 gc_dcheck(compacting_p());
2534 while (i < limit) {
2535 generation_index_t generation = page_table[i].gen;
2536 if (generation < from || generation == SCRATCH_GENERATION
2537 /* Not sure why word_used is checked. Probably because reset_page_flags()
2538 * does not change the page's gen to an unused number. Perhaps it should */
2539 || !page_boxed_p(i) || !page_words_used(i)) {
2540 ++i;
2541 continue;
2544 /* This should be the start of a region */
2545 gc_assert(page_starts_contiguous_block_p(i));
2547 if (page_table[i].type == PAGE_TYPE_BOXED) {
2548 i = scan_boxed_root_cards_spanning(i, generation);
2549 } else if ((page_table[i].type == PAGE_TYPE_CONS) ||
2550 (page_single_obj_p(i) && large_scannable_vector_p(i))) {
2551 i = scan_boxed_root_cards_non_spanning(i, generation);
2552 #ifdef LISP_FEATURE_SOFT_CARD_MARKS
2553 } else if (page_table[i].type == PAGE_TYPE_SMALL_MIXED) {
2554 i = scan_mixed_root_cards(i, generation);
2555 #endif
2556 } else {
2557 page_index_t last_page;
2558 int marked = 0;
2559 /* Now work forward until the end of the region */
2560 for (last_page = i; ; last_page++) {
2561 long card_index = page_to_card_index(last_page);
2562 marked = marked || cardseq_any_marked(card_index);
2563 if (page_ends_contiguous_block_p(last_page, generation))
2564 break;
2566 if (marked) {
2567 lispobj* start = (lispobj*)page_address(i);
2568 lispobj* limit =
2569 (lispobj*)page_address(last_page) + page_words_used(last_page);
2570 if (GC_LOGGING) fprintf(gc_activitylog(), "scan_roots mixed %p:%p\n", start, limit);
2571 root_mixed_words_scanned += limit - start;
2572 heap_scavenge(start, limit);
2573 /* Now scan the pages and write protect those that
2574 * don't have pointers to younger generations. */
2575 if (is_code(page_table[i].type))
2576 update_code_writeprotection(i, last_page, start, limit);
2577 else
2578 update_writeprotection(i, last_page, start, limit);
2580 i = 1 + last_page;
2586 /* Scavenge a newspace generation. As it is scavenged new objects may
2587 * be allocated to it; these will also need to be scavenged. This
2588 * repeats until there are no more objects unscavenged in the
2589 * newspace generation.
2591 * To help improve the efficiency, areas written are recorded by
2592 * gc_alloc() and only these scavenged. Sometimes a little more will be
2593 * scavenged, but this causes no harm. An easy check is done that the
2594 * scavenged bytes equals the number allocated in the previous
2595 * scavenge.
2597 * Write-protected pages are not scanned except if they are marked
2598 * pinned, in which case they may have been promoted and still have
2599 * pointers to the from space.
2601 * Write-protected pages could potentially be written by alloc however
2602 * to avoid having to handle re-scavenging of write-protected pages
2603 * gc_alloc() does not write to write-protected pages.
2605 * New areas of objects allocated are recorded alternatively in the two
2606 * new_areas arrays below. */
2607 static struct new_area new_areas_1[NUM_NEW_AREAS];
2608 static struct new_area new_areas_2[NUM_NEW_AREAS];
2610 /* Do one full scan of the new space generation. This is not enough to
2611 * complete the job as new objects may be added to the generation in
2612 * the process which are not scavenged. */
2613 static void newspace_full_scavenge(generation_index_t generation)
2615 page_index_t i;
2617 for (i = 0; i < next_free_page; i++) {
2618 if ((page_table[i].gen == generation) && page_boxed_p(i)
2619 && (page_words_used(i) != 0)
2620 && cardseq_any_marked(page_to_card_index(i))) {
2621 page_index_t last_page;
2623 /* The scavenge will start at the scan_start_offset of
2624 * page i.
2626 * We need to find the full extent of this contiguous
2627 * block in case objects span pages. */
2628 for (last_page = i; ;last_page++) {
2629 /* Check whether this is the last page in this
2630 * contiguous block */
2631 if (page_ends_contiguous_block_p(last_page, generation))
2632 break;
2635 record_new_regions_below = 1 + last_page;
2636 heap_scavenge(page_scan_start(i),
2637 (lispobj*)page_address(last_page) + page_words_used(last_page));
2638 i = last_page;
2641 /* Enable recording of all new allocation regions */
2642 record_new_regions_below = 1 + page_table_pages;
2645 void gc_close_collector_regions(int flag)
2647 ensure_region_closed(code_region, flag|PAGE_TYPE_CODE);
2648 ensure_region_closed(boxed_region, PAGE_TYPE_BOXED);
2649 ensure_region_closed(unboxed_region, PAGE_TYPE_UNBOXED);
2650 ensure_region_closed(mixed_region, PAGE_TYPE_MIXED);
2651 ensure_region_closed(small_mixed_region, PAGE_TYPE_SMALL_MIXED);
2652 ensure_region_closed(cons_region, PAGE_TYPE_CONS);
2655 /* Do a complete scavenge of the newspace generation. */
2656 static void
2657 scavenge_newspace(generation_index_t generation)
2659 /* Flush the current regions updating the page table. */
2660 gc_close_collector_regions(0);
2662 /* Turn on the recording of new areas. */
2663 gc_assert(new_areas_index == 0);
2664 new_areas = new_areas_1;
2666 /* Start with a full scavenge. */
2667 if (GC_LOGGING) fprintf(gc_activitylog(), "newspace full scav\n");
2668 newspace_full_scavenge(generation);
2670 /* Flush the current regions updating the page table. */
2671 gc_close_collector_regions(0);
2673 while (1) {
2674 if (GC_LOGGING) fprintf(gc_activitylog(), "newspace loop\n");
2675 if (!new_areas_index && !immobile_scav_queue_count) { // possible stopping point
2676 if (!test_weak_triggers(0, 0))
2677 break; // no work to do
2678 // testing of triggers can't detect whether any triggering object
2679 // actually entails new work - it only knows which triggers were removed
2680 // from the pending list. So check again if allocations occurred,
2681 // which is only if not all triggers referenced already-live objects.
2682 gc_close_collector_regions(0); // update new_areas from regions
2683 if (!new_areas_index && !immobile_scav_queue_count)
2684 break; // still no work to do
2686 /* Move the current to the previous new areas */
2687 struct new_area *previous_new_areas = new_areas;
2688 int previous_new_areas_index = new_areas_index;
2689 /* Note the max new_areas used. */
2690 if (new_areas_index > new_areas_index_hwm)
2691 new_areas_index_hwm = new_areas_index;
2693 /* Prepare to record new areas. Alternate between using new_areas_1 and 2 */
2694 new_areas = (new_areas == new_areas_1) ? new_areas_2 : new_areas_1;
2695 new_areas_index = 0;
2697 scavenge_immobile_newspace();
2698 /* Check whether previous_new_areas had overflowed. */
2699 if (previous_new_areas_index >= NUM_NEW_AREAS) {
2701 /* New areas of objects allocated have been lost so need to do a
2702 * full scan to be sure! If this becomes a problem try
2703 * increasing NUM_NEW_AREAS. */
2704 newspace_full_scavenge(generation);
2706 } else {
2708 int i;
2709 /* Work through previous_new_areas. */
2710 for (i = 0; i < previous_new_areas_index; i++) {
2711 page_index_t page = previous_new_areas[i].page;
2712 size_t offset = previous_new_areas[i].offset;
2713 size_t size = previous_new_areas[i].size;
2714 gc_assert(size % (2*N_WORD_BYTES) == 0);
2715 lispobj *start = (lispobj*)(page_address(page) + offset);
2716 if (GC_LOGGING) fprintf(gc_activitylog(), "heap_scav %p..%p\n",
2717 start, (lispobj*)((char*)start + size));
2718 heap_scavenge(start, (lispobj*)((char*)start + size));
2722 /* Flush the current regions updating the page table. */
2723 gc_close_collector_regions(0);
2726 /* Turn off recording of allocation regions. */
2727 record_new_regions_below = 0;
2728 new_areas = NULL;
2729 new_areas_index = 0;
2732 /* Un-write-protect all the pages in from_space. This is done at the
2733 * start of a GC else there may be many page faults while scavenging
2734 * the newspace (I've seen drive the system time to 99%). These pages
2735 * would need to be unprotected anyway before unmapping in
2736 * free_oldspace; not sure what effect this has on paging..
2738 * Here is a real-life example of what can go wrong if we don't
2739 * unprotect oldspace:
2740 * Scenario:
2741 * - gc-with-promotion (raise=1) of gen2 to gen3
2742 * - symbol FOO in gen 3 on page 1000
2743 * - large vector 'v' in gen 2 on page 1300..1305
2744 * - 'v' points only to gen 2 objects (so it is unmarked, or "protected")
2745 * - symbol-value of FOO is 'v'
2746 * - root generations are 4 and higher
2747 * - no roots point to vector 'v' or any of its contents
2748 * Thence:
2749 * - scavenge_newspace_full_scan visits page 1000
2750 * - assigns 'record_new_regions_below' = 1001
2751 * - traces slots of FOO, calls copy_potential_large_object(v)
2752 * - 'v' is promoted into gen3
2753 * - call add_new_area on page 1300..1305
2754 * - 1300 exceeds 1001 so we skip this area
2755 * So because 'v' is ahead of the wavefront, and theoretically page 1300
2756 * will be picked up by the remainder of the full_scan loop, we optimized out
2757 * the addition of the area. But then the scan loop sees that page 1300
2758 * is protected and it decides that it can can skip it even though it was
2759 * originally part of 'from_space' and points to other 'from_space' things.
2760 * The consequence is that everything 'v' pointed to in gen2 becomes freed
2761 * while 'v' holds dangling pointers to all that garbage.
2763 static void
2764 unprotect_oldspace(void)
2766 page_index_t i;
2768 /* Gen0 never has protection applied, so we can usually skip the un-protect step,
2769 * however, in the final GC, because everything got moved to gen0 by brute force
2770 * adjustment of the page table, we don't know the state of the protection.
2771 * Therefore only skip out if NOT in the final GC */
2772 if (conservative_stack && from_space == 0) return;
2774 #ifdef LISP_FEATURE_SOFT_CARD_MARKS
2775 for (i = 0; i < next_free_page; i++)
2776 /* Why does this even matter? Obviously it did for physical protection
2777 * (storing the forwarding pointers shouldn't fault)
2778 * but there's no physical protection, so ... why bother?
2779 * But I tried removing it and got assertion failures */
2780 if (page_words_used(i) && page_table[i].gen == from_space)
2781 assign_page_card_marks(i, CARD_MARKED);
2782 #else
2783 char *page_addr = 0;
2784 char *region_addr = 0;
2785 uword_t region_bytes = 0;
2786 for (i = 0; i < next_free_page; i++) {
2787 if ((page_words_used(i) != 0)
2788 && (page_table[i].gen == from_space)) {
2790 /* Remove any write-protection. We should be able to rely
2791 * on the write-protect flag to avoid redundant calls. */
2792 if (PAGE_WRITEPROTECTED_P(i)) {
2793 SET_PAGE_PROTECTED(i, 0);
2794 if (protection_mode(i) == PHYSICAL) {
2795 page_addr = page_address(i);
2796 if (!region_addr) {
2797 /* First region. */
2798 region_addr = page_addr;
2799 region_bytes = GENCGC_PAGE_BYTES;
2800 } else if (region_addr + region_bytes == page_addr) {
2801 /* Region continue. */
2802 region_bytes += GENCGC_PAGE_BYTES;
2803 } else {
2804 /* Unprotect previous region. */
2805 os_protect(region_addr, region_bytes, OS_VM_PROT_ALL);
2806 /* First page in new region. */
2807 region_addr = page_addr;
2808 region_bytes = GENCGC_PAGE_BYTES;
2814 if (region_addr) {
2815 /* Unprotect last region. */
2816 os_protect(region_addr, region_bytes, OS_VM_PROT_ALL);
2818 #endif
2821 /* Work through all the pages and free any in from_space.
2822 * Live non-pinned objects will have been copied to new pages.
2823 * Pinned objects are no longer in 'from_space', as the containing
2824 * page is now in a different generation.
2825 * Bytes_allocated and the generation bytes_allocated
2826 * counter are updated. */
2827 static void free_oldspace(void)
2829 uword_t bytes_freed = 0;
2830 page_index_t page;
2831 for (page = 0; page < next_free_page; ++page) {
2832 if (page_table[page].gen == from_space) {
2833 /* Should already be unprotected by unprotect_oldspace(). */
2834 gc_dcheck(page_cards_all_marked_nonsticky(page));
2835 /* Free the page. */
2836 int used = page_words_used(page);
2837 if (used) set_page_need_to_zero(page, 1);
2838 set_page_bytes_used(page, 0);
2839 reset_page_flags(page);
2840 bytes_freed += used << WORD_SHIFT;
2843 generations[from_space].bytes_allocated -= bytes_freed;
2844 bytes_allocated -= bytes_freed;
2846 void free_large_object(lispobj* where, lispobj* end)
2848 page_index_t first = find_page_index(where);
2849 page_index_t last = find_page_index((char*)end - 1);
2850 generation_index_t g = page_table[first].gen;
2851 gc_assert(page_ends_contiguous_block_p(last, g));
2852 uword_t bytes_freed = 0;
2853 page_index_t page;
2854 // Perform all assertions before clobbering anything
2855 for (page = first ; page <= last ; ++page) {
2856 gc_assert(page_single_obj_p(page)); // redundant for the first page
2857 gc_assert(page_table[page].gen == g); // also redundant
2858 gc_assert(page_scan_start(page) == where);
2859 #ifdef LISP_FEATURE_SOFT_CARD_MARKS
2860 gc_dcheck(page_cards_all_marked_nonsticky(page));
2861 #else
2862 /* Force page to be writable. As much as memory faults should not occur
2863 * during GC, they are allowed, and this step will ensure writability. */
2864 *page_address(page) = 0;
2865 #endif
2867 // Copied from free_oldspace
2868 for (page = first ; page <= last ; ++page) {
2869 int used = page_words_used(page);
2870 if (used) set_page_need_to_zero(page, 1);
2871 set_page_bytes_used(page, 0);
2872 reset_page_flags(page);
2873 bytes_freed += used << WORD_SHIFT;
2875 generations[g].bytes_allocated -= bytes_freed;
2876 bytes_allocated -= bytes_freed;
2879 /* Call 'proc' with pairs of addresses demarcating ranges in the
2880 * specified generation.
2881 * Stop if any invocation returns non-zero, and return that value */
2882 uword_t
2883 walk_generation(uword_t (*proc)(lispobj*,lispobj*,uword_t),
2884 generation_index_t generation, uword_t extra)
2886 page_index_t i;
2887 int genmask = generation >= 0 ? 1 << generation : ~0;
2889 for (i = 0; i < next_free_page; i++) {
2890 if ((page_words_used(i) != 0) && ((1 << page_table[i].gen) & genmask)) {
2891 page_index_t last_page;
2893 /* This should be the start of a contiguous block */
2894 gc_assert(page_starts_contiguous_block_p(i));
2896 /* Need to find the full extent of this contiguous block in case
2897 objects span pages. */
2899 /* Now work forward until the end of this contiguous area is
2900 found. */
2901 for (last_page = i; ;last_page++)
2902 /* Check whether this is the last page in this contiguous
2903 * block. */
2904 if (page_ends_contiguous_block_p(last_page, page_table[i].gen))
2905 break;
2907 uword_t result =
2908 proc((lispobj*)page_address(i),
2909 (lispobj*)page_address(last_page) + page_words_used(last_page),
2910 extra);
2911 if (result) return result;
2913 i = last_page;
2916 return 0;
2920 /* Write-protect all the dynamic boxed pages in the given generation. */
2921 static void
2922 write_protect_generation_pages(generation_index_t generation)
2924 // Neither 0 nor scratch can be protected. Additionally, protection of
2925 // pseudo-static space is applied only in gc_load_corefile_ptes().
2926 gc_assert(generation != 0 && generation != SCRATCH_GENERATION
2927 && generation != PSEUDO_STATIC_GENERATION);
2929 #ifdef LISP_FEATURE_SOFT_CARD_MARKS
2930 page_index_t page;
2931 for (page = 0; page < next_free_page; ++page) {
2932 if (page_table[page].gen == generation && page_boxed_p(page)
2933 && page_words_used(page)) {
2934 long card = page_to_card_index(page);
2935 int j;
2936 // must not touch a card referenced from the control stack
2937 // because the next instruction executed by user code
2938 // might store an old->young pointer.
2939 // There's probably a clever SIMD-in-a-register algorithm for this...
2940 for (j=0; j<CARDS_PER_PAGE; ++j, card++)
2941 if (gc_card_mark[card] != STICKY_MARK) gc_card_mark[card] = CARD_UNMARKED;
2944 #else
2945 page_index_t start = 0, end;
2946 int n_hw_prot = 0, n_sw_prot = 0;
2948 while (start < next_free_page) {
2949 if (!protect_page_p(start, generation)) {
2950 ++start;
2951 continue;
2953 if (protection_mode(start) == LOGICAL) {
2954 SET_PAGE_PROTECTED(start, 1);
2955 ++n_sw_prot;
2956 ++start;
2957 continue;
2960 /* Note the page as protected in the page tables. */
2961 SET_PAGE_PROTECTED(start, 1);
2963 /* Find the extent of pages desiring physical protection */
2964 for (end = start + 1; end < next_free_page; end++) {
2965 if (!protect_page_p(end, generation) || protection_mode(end) == LOGICAL)
2966 break;
2967 SET_PAGE_PROTECTED(end, 1);
2970 n_hw_prot += end - start;
2971 os_protect(page_address(start), npage_bytes(end - start), OS_VM_PROT_READ);
2973 start = end;
2976 if (gencgc_verbose > 1) {
2977 printf("HW protected %d, SW protected %d\n", n_hw_prot, n_sw_prot);
2979 #endif
2982 static void
2983 move_pinned_pages_to_newspace()
2985 page_index_t i;
2987 /* scavenge() will evacuate all oldspace pages, but no newspace
2988 * pages. Pinned pages are precisely those pages which must not
2989 * be evacuated, so move them to newspace directly. */
2991 for (i = 0; i < next_free_page; i++) {
2992 /* 'pinned' is cleared lazily, so test the 'gen' field as well. */
2993 if (gc_page_pins[i] == PAGE_PINNED &&
2994 page_table[i].gen == from_space &&
2995 (page_single_obj_p(i) ||
2996 (is_code(page_table[i].type) && pin_all_dynamic_space_code))) {
2997 page_table[i].gen = new_space;
2998 /* And since we're moving the pages wholesale, also adjust
2999 * the generation allocation counters. */
3000 page_bytes_t used = page_bytes_used(i);
3001 generations[new_space].bytes_allocated += used;
3002 generations[from_space].bytes_allocated -= used;
3007 static void __attribute__((unused)) maybe_pin_code(lispobj addr) {
3008 page_index_t page = find_page_index((char*)addr);
3010 if (page < 0) {
3011 if (immobile_space_p(addr))
3012 immobile_space_preserve_pointer((void*)addr);
3013 return;
3015 if (immune_set_memberp(page)) return;
3017 struct code* code = (struct code*)dynamic_space_code_from_pc((char *)addr);
3018 if (code) {
3019 pin_exact_root(make_lispobj(code, OTHER_POINTER_LOWTAG));
3023 #if defined reg_RA
3024 static void conservative_pin_code_from_return_addresses(struct thread* th) {
3025 lispobj *object_ptr;
3026 // We need more information to reliably backtrace through a call
3027 // chain, as these backends may generate leaf functions where the
3028 // return address does not get spilled. Therefore, fall back to
3029 // scanning the entire stack for potential interior code pointers.
3030 for (object_ptr = th->control_stack_start;
3031 object_ptr < access_control_stack_pointer(th);
3032 object_ptr++)
3033 maybe_pin_code(*object_ptr);
3034 int i = fixnum_value(read_TLS(FREE_INTERRUPT_CONTEXT_INDEX,th));
3035 // Scan program counters and return registers in interrupted
3036 // frames: They may contain interior code pointers that weren't
3037 // spilled onto the stack, as is the case for leaf functions.
3038 for (i = i - 1; i >= 0; --i) {
3039 os_context_t* context = nth_interrupt_context(i, th);
3040 maybe_pin_code(os_context_pc(context));
3041 maybe_pin_code((lispobj)*os_context_register_addr(context, reg_RA));
3044 #endif
3046 #if defined LISP_FEATURE_MIPS || defined LISP_FEATURE_PPC64
3047 static void semiconservative_pin_stack(struct thread* th,
3048 generation_index_t gen) {
3049 /* Stack can only pin code, since it contains return addresses.
3050 * Non-code pointers on stack do *not* pin anything, and may be updated
3051 * when scavenging.
3052 * Interrupt contexts' boxed registers do pin their referents */
3053 lispobj *object_ptr;
3054 for (object_ptr = th->control_stack_start;
3055 object_ptr < access_control_stack_pointer(th);
3056 object_ptr++)
3057 maybe_pin_code(*object_ptr);
3058 int i = fixnum_value(read_TLS(FREE_INTERRUPT_CONTEXT_INDEX,th));
3059 for (i = i - 1; i >= 0; --i) {
3060 os_context_t* context = nth_interrupt_context(i, th);
3061 int j;
3062 #if defined LISP_FEATURE_MIPS
3063 mcontext_t *mctx = &context->uc_mcontext;
3064 for(j=1; j<32; ++j) {
3065 // context registers have more significant bits than lispobj.
3066 uword_t word = mctx->gregs[j];
3067 if (gen == 0) sticky_preserve_pointer(word, (void*)1);
3068 else preserve_pointer(word, (void*)1);
3070 #elif defined LISP_FEATURE_PPC64
3071 static int boxed_registers[] = BOXED_REGISTERS;
3072 for (j = (int)(sizeof boxed_registers / sizeof boxed_registers[0])-1; j >= 0; --j) {
3073 lispobj word = *os_context_register_addr(context, boxed_registers[j]);
3074 if (gen == 0) sticky_preserve_pointer(word, (void*)1);
3075 else preserve_pointer(word, (void*)1);
3077 // What kinds of data do we put in the Count register?
3078 // maybe it's count (raw word), maybe it's a PC. I just don't know.
3079 preserve_pointer(*os_context_lr_addr(context), (void*)1);
3080 preserve_pointer(*os_context_ctr_addr(context), (void*)1);
3081 #endif
3082 preserve_pointer(os_context_pc(context), (void*)1);
3085 #endif
3087 #if GENCGC_IS_PRECISE && !defined(reg_CODE)
3089 static int boxed_registers[] = BOXED_REGISTERS;
3091 /* Pin all (condemned) code objects pointed to by the chain of in-flight calls
3092 * based on scanning from the innermost frame pointer. This relies on an exact backtrace,
3093 * which some of our architectures have trouble obtaining. But it's theoretically
3094 * more efficient to do it this way versus looking at all stack words to see
3095 * whether each points to a code object. */
3096 static void pin_call_chain_and_boxed_registers(struct thread* th) {
3097 lispobj *cfp = access_control_frame_pointer(th);
3099 if (cfp) {
3100 while (1) {
3101 lispobj* ocfp = (lispobj *) cfp[0];
3102 lispobj lr = cfp[1];
3103 if (ocfp == 0)
3104 break;
3105 maybe_pin_code(lr);
3106 cfp = ocfp;
3109 int i = fixnum_value(read_TLS(FREE_INTERRUPT_CONTEXT_INDEX,th));
3110 for (i = i - 1; i >= 0; --i) {
3111 os_context_t* context = nth_interrupt_context(i, th);
3112 maybe_pin_code((lispobj)*os_context_register_addr(context, reg_LR));
3114 for (unsigned i = 0; i < (sizeof(boxed_registers) / sizeof(int)); i++) {
3115 lispobj word = *os_context_register_addr(context, boxed_registers[i]);
3116 if (is_lisp_pointer(word)) {
3117 #ifdef LISP_FEATURE_SOFT_CARD_MARKS
3118 impart_mark_stickiness(word);
3119 #endif
3120 pin_exact_root(word);
3126 #endif
3128 #if !GENCGC_IS_PRECISE
3129 extern void visit_context_registers(void (*proc)(os_context_register_t, void*),
3130 os_context_t *context, void*);
3131 static void NO_SANITIZE_ADDRESS NO_SANITIZE_MEMORY
3132 conservative_stack_scan(struct thread* th,
3133 __attribute__((unused)) generation_index_t gen,
3134 // #+sb-safepoint uses os_get_csp() and not this arg
3135 __attribute__((unused)) lispobj* cur_thread_approx_stackptr)
3137 /* there are potentially two stacks for each thread: the main
3138 * stack, which may contain Lisp pointers, and the alternate stack.
3139 * We don't ever run Lisp code on the altstack, but it may
3140 * host a sigcontext with lisp objects in it.
3141 * Actually, STOP_FOR_GC has a signal context on the main stack,
3142 * and the values it in will be *above* the stack-pointer in it
3143 * at the point of interruption, so we would not scan all registers
3144 * unless the context is scanned.
3146 * For the thread which initiates GC there will usually not be a
3147 * sigcontext, though there could, in theory be if it performs
3148 * GC while handling an interruption */
3150 __attribute__((unused)) void (*context_method)(os_context_register_t,void*) =
3151 #ifdef LISP_FEATURE_SOFT_CARD_MARKS
3152 gen == 0 ? sticky_preserve_pointer : preserve_pointer;
3153 #else
3154 preserve_pointer;
3155 #endif
3157 void* esp = (void*)-1;
3158 # if defined(LISP_FEATURE_SB_SAFEPOINT)
3159 /* Conservative collect_garbage is always invoked with a
3160 * foreign C call or an interrupt handler on top of every
3161 * existing thread, so the stored SP in each thread
3162 * structure is valid, no matter which thread we are looking
3163 * at. For threads that were running Lisp code, the pitstop
3164 * and edge functions maintain this value within the
3165 * interrupt or exception handler. */
3166 esp = os_get_csp(th);
3167 assert_on_stack(th, esp);
3169 /* And on platforms with interrupts: scavenge ctx registers. */
3171 /* Disabled on Windows, because it does not have an explicit
3172 * stack of `interrupt_contexts'. The reported CSP has been
3173 * chosen so that the current context on the stack is
3174 * covered by the stack scan. See also set_csp_from_context(). */
3175 # ifndef LISP_FEATURE_WIN32
3176 if (th != get_sb_vm_thread()) {
3177 int k = fixnum_value(read_TLS(FREE_INTERRUPT_CONTEXT_INDEX,th));
3178 while (k > 0) {
3179 os_context_t* context = nth_interrupt_context(--k, th);
3180 if (context)
3181 visit_context_registers(context_method, context, (void*)1);
3184 # endif
3185 # elif defined(LISP_FEATURE_SB_THREAD)
3186 int i;
3187 /* fprintf(stderr, "Thread %p, ici=%d stack[%p:%p] (%dw)",
3188 th, fixnum_value(read_TLS(FREE_INTERRUPT_CONTEXT_INDEX,th)),
3189 th->control_stack_start, th->control_stack_end,
3190 th->control_stack_end - th->control_stack_start); */
3191 for (i = fixnum_value(read_TLS(FREE_INTERRUPT_CONTEXT_INDEX,th))-1; i>=0; i--) {
3192 os_context_t *c = nth_interrupt_context(i, th);
3193 visit_context_registers(context_method, c, (void*)1);
3194 lispobj* esp1 = (lispobj*) *os_context_register_addr(c,reg_SP);
3195 if (esp1 >= th->control_stack_start && esp1 < th->control_stack_end && (void*)esp1 < esp)
3196 esp = esp1;
3198 if (th == get_sb_vm_thread()) {
3199 if ((void*)cur_thread_approx_stackptr < esp) esp = cur_thread_approx_stackptr;
3201 # else
3202 esp = cur_thread_approx_stackptr;
3203 # endif
3204 if (!esp || esp == (void*) -1)
3205 UNKNOWN_STACK_POINTER_ERROR("garbage_collect", th);
3206 /* fprintf(stderr, " SP=%p (%dw)%s\n",
3207 esp, (int)(th->control_stack_end - (lispobj*)esp),
3208 (th == get_sb_vm_thread()) ? " CURRENT":""); */
3210 // Words on the stack which point into the stack are likely
3211 // frame pointers or alien or DX object pointers. In any case
3212 // there's no need to call preserve_pointer on them since
3213 // they definitely don't point to the heap.
3214 // See the picture at alloc_thread_struct() as a reminder.
3215 #ifdef LISP_FEATURE_UNIX
3216 lispobj exclude_from = (lispobj)th->control_stack_start;
3217 lispobj exclude_to = (lispobj)th + dynamic_values_bytes;
3218 #define potential_heap_pointer(word) !(exclude_from <= word && word < exclude_to)
3219 #else
3220 // We can't use the heuristic of excluding words that appear to point into
3221 // 'struct thread' on win32 because ... I don't know why.
3222 // See https://groups.google.com/g/sbcl-devel/c/8s7mrapq56s/m/UaAjYPqKBAAJ
3223 #define potential_heap_pointer(word) 1
3224 #endif
3226 lispobj* ptr;
3227 for (ptr = esp; ptr < th->control_stack_end; ptr++) {
3228 lispobj word = *ptr;
3229 // Also note that we can eliminate small fixnums from consideration
3230 // since there is no memory on the 0th page.
3231 // (most OSes don't let users map memory there, though they used to).
3232 if (word >= BACKEND_PAGE_BYTES && potential_heap_pointer(word)) {
3233 preserve_pointer(word, 0);
3237 #endif
3239 static void scan_explicit_pins(__attribute__((unused)) struct thread* th)
3241 lispobj pin_list = read_TLS(PINNED_OBJECTS, th);
3242 for ( ; pin_list != NIL ; pin_list = CONS(pin_list)->cdr ) {
3243 lispobj object = CONS(pin_list)->car;
3244 pin_exact_root(object);
3245 if (lowtag_of(object) == INSTANCE_POINTER_LOWTAG) {
3246 struct instance* instance = INSTANCE(object);
3247 lispobj layout = instance_layout((lispobj*)instance);
3248 // Since we're still in the pinning phase of GC, layouts can't have moved yet,
3249 // so there is no forwarding check needed here.
3250 if (layout && lockfree_list_node_layout_p(LAYOUT(layout))) {
3251 /* A logically-deleted explicitly-pinned lockfree list node pins its
3252 * successor too, since Lisp reconstructs the next node's tagged pointer
3253 * from an untagged pointer currently stored in %NEXT of this node. */
3254 lispobj successor = ((struct list_node*)instance)->_node_next;
3255 // Be sure to ignore an uninitialized word containing 0.
3256 if (successor && fixnump(successor))
3257 pin_exact_root(successor | INSTANCE_POINTER_LOWTAG);
3263 /* Given the slightly asymmetric formulation of page_ends_contiguous_block_p()
3264 * you might think that it could cause the next page's assertion about start_block_p()
3265 * to fail, but it does not seem to. That's really weird! */
3266 __attribute__((unused)) static void check_contiguity()
3268 page_index_t first = 0;
3269 while (first < next_free_page) {
3270 if (!page_words_used(first)) { ++first; continue; }
3271 gc_assert(page_starts_contiguous_block_p(first));
3272 page_index_t last = first;
3273 while (!page_ends_contiguous_block_p(last, page_table[first].gen)) ++last;
3274 first = last + 1;
3278 static void finish_code_metadata();
3279 int show_gc_generation_throughput = 0;
3280 /* Garbage collect a generation. If raise is 0 then the remains of the
3281 * generation are not raised to the next generation. */
3282 void NO_SANITIZE_ADDRESS NO_SANITIZE_MEMORY
3283 garbage_collect_generation(generation_index_t generation, int raise,
3284 void* cur_thread_approx_stackptr)
3286 struct thread *th;
3288 if (gencgc_verbose > 2) fprintf(stderr, "BEGIN gc_gen(%d,%d)\n", generation, raise);
3290 #ifdef COLLECT_GC_STATS
3291 struct timespec t0;
3292 clock_gettime(CLOCK_MONOTONIC, &t0);
3293 uword_t gen_usage_at_start = generations[generation].bytes_allocated;
3294 uword_t higher_gen_usage_at_start =
3295 raise ? generations[generation+1].bytes_allocated : 0;
3296 #endif
3298 gc_assert(generation <= PSEUDO_STATIC_GENERATION);
3300 /* The oldest generation can't be raised. */
3301 gc_assert(!raise || generation < HIGHEST_NORMAL_GENERATION);
3303 /* Check that weak hash tables were processed in the previous GC. */
3304 gc_assert(weak_hash_tables == NULL);
3306 /* When a generation is not being raised it is transported to a
3307 * temporary generation (NUM_GENERATIONS), and lowered when
3308 * done. Set up this new generation. There should be no pages
3309 * allocated to it yet. */
3310 if (!raise) {
3311 gc_assert(generations[SCRATCH_GENERATION].bytes_allocated == 0);
3314 hopscotch_reset(&pinned_objects);
3316 #ifdef LISP_FEATURE_SB_THREAD
3317 pin_all_dynamic_space_code = 0;
3318 for_each_thread(th) {
3319 if (th->state_word.state != STATE_DEAD && \
3320 (read_TLS(GC_PIN_CODE_PAGES, th) & make_fixnum(1))) {
3321 pin_all_dynamic_space_code = 1;
3322 break;
3325 #else
3326 pin_all_dynamic_space_code = read_TLS(GC_PIN_CODE_PAGES, 0) & make_fixnum(1);
3327 #endif
3329 /* Set the global src and dest. generations */
3330 generation_index_t original_alloc_generation = gc_alloc_generation;
3332 if (generation < PSEUDO_STATIC_GENERATION) {
3334 from_space = generation;
3335 if (raise)
3336 new_space = generation+1;
3337 else
3338 new_space = SCRATCH_GENERATION;
3340 /* Change to a new space for allocation, resetting the alloc_start_page */
3341 gc_alloc_generation = new_space;
3342 RESET_ALLOC_START_PAGES();
3344 if (pin_all_dynamic_space_code) {
3345 /* This needs to happen before ambiguous root pinning, as the mechanisms
3346 * overlap in a way that all-code pinning wouldn't do the right thing if flipped.
3347 * FIXME: why would it not? More explanation needed!
3348 * Code objects should never get into the pins table in this case */
3349 page_index_t i;
3350 for (i = 0; i < next_free_page; i++) {
3351 if (page_table[i].gen == from_space
3352 && is_code(page_table[i].type) && page_words_used(i))
3353 gc_page_pins[i] = PAGE_PINNED;
3357 /* Un-write-protect the old-space pages. This is essential for the
3358 * promoted pages as they may contain pointers into the old-space
3359 * which need to be scavenged. It also helps avoid unnecessary page
3360 * faults as forwarding pointers are written into them. They need to
3361 * be un-protected anyway before unmapping later. */
3362 unprotect_oldspace();
3364 } else { // "full" [sic] GC
3366 gc_assert(!pin_all_dynamic_space_code); // not supported (but could be)
3368 /* This is a full mark-and-sweep of all generations without compacting
3369 * and without returning free space to the allocator. The intent is to
3370 * break chains of objects causing accidental reachability.
3371 * Subsequent GC cycles will compact and reclaims space as usual. */
3372 from_space = new_space = -1;
3374 // Allocate pages from dynamic space for the work queue.
3375 extern void prepare_for_full_mark_phase();
3376 prepare_for_full_mark_phase();
3380 /* Possibly pin stack roots and/or *PINNED-OBJECTS*, unless saving a core.
3381 * Scavenging (fixing up pointers) will occur later on */
3383 if (conservative_stack) {
3384 for_each_thread(th) {
3385 if (th->state_word.state == STATE_DEAD) continue;
3386 scan_explicit_pins(th);
3387 #if !GENCGC_IS_PRECISE
3388 /* Pin everything in fromspace with a stack root, and also set the
3389 * sticky card mark on any page (in any generation)
3390 * referenced from the stack. */
3391 conservative_stack_scan(th, generation, cur_thread_approx_stackptr);
3392 #elif defined LISP_FEATURE_MIPS || defined LISP_FEATURE_PPC64
3393 // Pin code if needed
3394 semiconservative_pin_stack(th, generation);
3395 #elif defined REG_RA
3396 conservative_pin_code_from_return_addresses(th);
3397 #elif !defined(reg_CODE)
3398 pin_call_chain_and_boxed_registers(th);
3399 #endif
3403 // Thread creation optionally no longer synchronizes the creating and
3404 // created thread. When synchronized, the parent thread is responsible
3405 // for pinning the start function for handoff to the created thread.
3406 // When not synchronized, The startup parameters are pinned via this list
3407 // which will always be NIL if the feature is not enabled.
3408 #ifdef STARTING_THREADS
3409 lispobj pin_list = SYMBOL(STARTING_THREADS)->value;
3410 for ( ; pin_list != NIL ; pin_list = CONS(pin_list)->cdr ) {
3411 lispobj thing = CONS(pin_list)->car;
3412 if (!thing) continue; // Nothing to worry about when 'thing' is already smashed
3413 // It might be tempting to say that only the SB-THREAD:THREAD instance
3414 // requires pinning - because right after we access it to extract the
3415 // primitive thread, we link into all_threads - but it may be that the code
3416 // emitted by the C compiler in new_thread_trampoline computes untagged pointers
3417 // when accessing the vector and the start function, so those would not be
3418 // seen as valid lisp pointers by the implicit pinning logic.
3419 // And the precisely GC'd platforms would not pin anything from C code.
3420 // The tests in 'threads.impure.lisp' are good at detecting omissions here.
3421 gc_assert(instancep(thing));
3422 struct thread_instance *lispthread = (void*)(thing - INSTANCE_POINTER_LOWTAG);
3423 lispobj info = lispthread->startup_info;
3424 // INFO gets set to a fixnum when the thread is exiting. I *think* it won't
3425 // ever be seen in the starting-threads list, but let's be cautious.
3426 if (is_lisp_pointer(info)) {
3427 gc_assert(simple_vector_p(info));
3428 gc_assert(vector_len(VECTOR(info)) >= 1);
3429 lispobj fun = VECTOR(info)->data[0];
3430 gc_assert(functionp(fun));
3431 #ifdef LISP_FEATURE_X86_64
3432 /* FIXME: re. the following remark that pin_exact_root() "does not
3433 * work", does it have to be that way? It seems the issue is that
3434 * pin_exact_root does absolutely nothing for objects in immobile space.
3435 * Are there other objects we call it on which could be in immobile-space
3436 * and should it be made to deal with them? */
3437 // slight KLUDGE: 'fun' is a simple-fun in immobile-space,
3438 // and pin_exact_root() doesn't work. In all probability 'fun'
3439 // is pseudo-static, but let's use the right pinning function.
3440 // (This line of code is so rarely executed that it doesn't
3441 // impact performance to search for the object)
3442 preserve_pointer(fun, 0);
3443 #else
3444 pin_exact_root(fun);
3445 #endif
3446 // pin_exact_root is more efficient than preserve_pointer()
3447 // because it does not search for the object.
3448 pin_exact_root(thing);
3449 pin_exact_root(info);
3452 #endif
3454 /* Remove any key from pinned_objects this does not identify an object.
3455 * This is done more efficiently by delaying until after all keys are
3456 * inserted rather than at each insertion */
3457 refine_ambiguous_roots();
3459 if (gencgc_verbose > 1) {
3460 extern void dump_marked_objects();
3461 if (compacting_p()) show_pinnedobj_count(); /*else dump_marked_objects();*/
3464 /* Now that all of the pinned pages are known, and
3465 * before we start to scavenge (and thus relocate) objects,
3466 * relocate the pinned pages to newspace, so that the scavenger
3467 * will not attempt to relocate their contents. */
3468 if (compacting_p())
3469 move_pinned_pages_to_newspace();
3471 /* Scavenge all the rest of the roots. */
3473 #if GENCGC_IS_PRECISE
3475 * If not x86, we need to scavenge the interrupt context(s) and the
3476 * control stack, unless in final GC then don't.
3478 if (conservative_stack) {
3479 struct thread *th;
3480 for_each_thread(th) {
3481 #if !defined(LISP_FEATURE_MIPS) && defined(reg_CODE) // interrupt contexts already pinned everything they see
3482 scavenge_interrupt_contexts(th);
3483 #endif
3484 scavenge_control_stack(th);
3487 # ifdef LISP_FEATURE_SB_SAFEPOINT
3488 /* In this case, scrub all stacks right here from the GCing thread
3489 * instead of doing what the comment below says. Suboptimal, but
3490 * easier. */
3491 for_each_thread(th)
3492 scrub_thread_control_stack(th);
3493 # else
3494 /* Scrub the unscavenged control stack space, so that we can't run
3495 * into any stale pointers in a later GC (this is done by the
3496 * stop-for-gc handler in the other threads). */
3497 scrub_control_stack();
3498 # endif
3500 #endif
3502 /* Scavenge the Lisp functions of the interrupt handlers */
3503 if (GC_LOGGING) fprintf(gc_activitylog(), "begin scavenge sighandlers\n");
3504 if (compacting_p())
3505 scavenge(lisp_sig_handlers, NSIG);
3506 else
3507 gc_mark_range(lisp_sig_handlers, NSIG);
3509 /* Scavenge the binding stacks. */
3510 if (GC_LOGGING) fprintf(gc_activitylog(), "begin scavenge thread roots\n");
3512 struct thread *th;
3513 for_each_thread(th) {
3514 scav_binding_stack((lispobj*)th->binding_stack_start,
3515 (lispobj*)get_binding_stack_pointer(th),
3516 compacting_p() ? 0 : gc_mark_obj);
3517 /* do the tls as well */
3518 lispobj* from = &th->lisp_thread;
3519 lispobj* to = (lispobj*)(SymbolValue(FREE_TLS_INDEX,0) + (char*)th);
3520 sword_t nwords = to - from;
3521 if (compacting_p())
3522 scavenge(from, nwords);
3523 else
3524 gc_mark_range(from, nwords);
3528 if (!compacting_p()) {
3529 extern void execute_full_mark_phase();
3530 extern void execute_full_sweep_phase();
3531 execute_full_mark_phase();
3532 execute_full_sweep_phase();
3533 goto maybe_verify;
3536 if (GC_LOGGING) fprintf(gc_activitylog(), "begin scavenge static roots\n");
3537 heap_scavenge((lispobj*)NIL_SYMBOL_SLOTS_START, (lispobj*)NIL_SYMBOL_SLOTS_END);
3538 heap_scavenge((lispobj*)STATIC_SPACE_OBJECTS_START, static_space_free_pointer);
3539 heap_scavenge((lispobj*)PERMGEN_SPACE_START, permgen_space_free_pointer);
3540 #ifndef LISP_FEATURE_IMMOBILE_SPACE
3541 // TODO: use an explicit remembered set of modified objects in this range
3542 if (TEXT_SPACE_START) heap_scavenge((lispobj*)TEXT_SPACE_START, text_space_highwatermark);
3543 #endif
3544 #ifdef LISP_FEATURE_SYSTEM_TLABS
3545 extern void gc_scavenge_arenas();
3546 gc_scavenge_arenas();
3547 #endif
3549 /* All generations but the generation being GCed need to be
3550 * scavenged. The new_space generation needs special handling as
3551 * objects may be moved in - it is handled separately below. */
3553 // SCRATCH_GENERATION is scavenged in immobile space
3554 // because pinned objects will already have had their generation
3555 // number reassigned to that generation if applicable.
3556 scavenge_immobile_roots(generation+1, SCRATCH_GENERATION);
3558 // When collecting gen0, ordinarily the roots would be gen1 and higher,
3559 // but if gen0 is getting raised to 1 on this cycle, then we skip roots in gen1
3560 // because we'll eventually examine all of gen1 as part of newspace.
3561 // Similarly for higher generations. So if raising, the minimum root gen is
3562 // always the collected generation + 2, otherwise it's the collected + 1.
3563 if (GC_LOGGING) fprintf(gc_activitylog(), "begin scavenge_root_gens\n");
3564 scavenge_root_gens(generation+1+raise);
3565 scavenge_pinned_ranges();
3566 /* The Lisp start function is stored in the core header, not a static
3567 * symbol. It is passed to gc_and_save() in this C variable */
3568 if (lisp_init_function) scavenge(&lisp_init_function, 1);
3569 if (lisp_package_vector) scavenge(&lisp_package_vector, 1);
3570 if (alloc_profile_data) scavenge(&alloc_profile_data, 1);
3572 /* If SB-SPROF was used, enliven all pages of code.
3573 * Note that some objects may have already been transported off the page.
3574 * Despite the extra scan, it is more efficient than scanning all trace buffers
3575 * and potentially updating them and/or invalidating hashes.
3576 * This really wants a better algorithm. Each code blob could have one byte
3577 * indicating whether it is present in any trace buffer; the SIGPROF handler
3578 * can update that byte. */
3579 if (sb_sprof_enabled) {
3580 page_index_t first = 0;
3581 while (first < next_free_page) {
3582 if (page_table[first].gen != from_space
3583 || !is_code(page_table[first].type)
3584 || !page_words_used(first)) {
3585 ++first;
3586 continue;
3588 page_index_t last = first;
3589 while (!page_ends_contiguous_block_p(last, from_space)) ++last;
3590 // [first,last] are inclusive bounds on a code range
3591 /* FIXME: should 'where' be initialized to page_scan_start()? I think so,
3592 * because ends_contiguous_block(page-1) does NOT imply
3593 * starts_contiguous_block(page). This is very unfortunate.
3594 * I've seen things such as the following:
3595 * page base: 0x20000 0x21000 0x22000
3596 * used: 1000 10 0
3597 * ss: 0x20000 0x20000 0x21010
3598 * where the first two pages were opened together and then closed
3599 * after consuming all of the first + 0x10 bytes more, and then the next
3600 * page extends the region (so not to waste the entire rest of the second
3601 * page), pointing its scan_start to the end of the range that was updated
3602 * into the page table. In that scenario, ends_p() is true of the page
3603 * based at 0x21000 but starts_p() is false of the next page,
3604 * because its scan start is an earlier page than itself.
3605 * How does this assertion NOT fail sometimes? Yet, it does not. */
3606 gc_assert(page_starts_contiguous_block_p(first));
3607 lispobj* where = (lispobj*)page_address(first);
3608 lispobj* limit = (lispobj*)page_address(last) + page_words_used(last);
3609 while (where < limit) {
3610 if (forwarding_pointer_p(where)) {
3611 // The codeblob already survived GC, so we just need to step over it.
3612 lispobj* copy = native_pointer(forwarding_pointer_value(where));
3613 // NOTE: it's OK to size the newspace copy rather than the original
3614 // because code size can't change.
3615 where += headerobj_size(copy);
3616 } else {
3617 // Compute 'nwords' before potentially moving the object
3618 // at 'where', because moving it stomps on the header word.
3619 sword_t nwords = headerobj_size(where);
3620 // If the object is not a filler and not a trampline, then create
3621 // a pointer to it and eliven the pointee.
3622 if (widetag_of(where) == CODE_HEADER_WIDETAG
3623 && where[1] != 0 /* has at least one boxed word */
3624 && code_serialno((struct code*)where) != 0) {
3625 lispobj ptr = make_lispobj(where, OTHER_POINTER_LOWTAG);
3626 scavenge(&ptr, 1);
3628 where += nwords;
3631 first = last + 1;
3635 /* Finally scavenge the new_space generation. Keep going until no
3636 * more objects are moved into the new generation */
3637 scavenge_newspace(new_space);
3638 if (save_lisp_gc_iteration == 2) finish_code_metadata();
3640 scan_binding_stack();
3641 smash_weak_pointers();
3642 /* Return private-use pages to the general pool so that Lisp can have them */
3643 gc_dispose_private_pages();
3644 cull_weak_hash_tables(weak_ht_alivep_funs);
3645 scan_finalizers();
3647 obliterate_nonpinned_words();
3648 // Do this last, because until obliterate_nonpinned_words() happens,
3649 // not all page table entries have the 'gen' value updated,
3650 // which we need to correctly find all old->young pointers.
3651 sweep_immobile_space(raise);
3653 ASSERT_REGIONS_CLOSED();
3654 hopscotch_log_stats(&pinned_objects, "pins");
3656 free_oldspace();
3658 /* If this cycle was not a promotion cycle, change SCRATCH_GENERATION back
3659 * to its correct generation number */
3660 struct generation* g = &generations[generation];
3661 if (!raise) {
3662 page_index_t i;
3663 for (i = 0; i < next_free_page; i++)
3664 if (page_table[i].gen == SCRATCH_GENERATION) page_table[i].gen = generation;
3665 gc_assert(g->bytes_allocated == 0);
3666 g->bytes_allocated = generations[SCRATCH_GENERATION].bytes_allocated;
3667 generations[SCRATCH_GENERATION].bytes_allocated = 0;
3669 #ifdef COLLECT_GC_STATS
3670 if (show_gc_generation_throughput) {
3671 struct timespec t1;
3672 clock_gettime(CLOCK_MONOTONIC, &t1);
3673 long et_nsec = (t1.tv_sec - t0.tv_sec)*1000000000 + (t1.tv_nsec - t0.tv_nsec);
3674 sword_t bytes_retained, bytes_freed;
3675 if (raise) {
3676 bytes_retained = (generations[generation+1].bytes_allocated
3677 - higher_gen_usage_at_start);
3678 } else {
3679 bytes_retained = generations[generation].bytes_allocated;
3681 bytes_freed = gen_usage_at_start - bytes_retained;
3683 double pct_freed = gen_usage_at_start ? (double)bytes_freed / gen_usage_at_start : 0.0;
3684 double et_sec = (double)et_nsec / 1000000000.0;
3685 double speed = (double)(gc_copied_nwords << WORD_SHIFT) / 1024 / et_sec;
3686 char *units = "KiB";
3687 if (speed > 1024.0) speed /= 1024.0, units = "MiB";
3688 /* The pre-GC bytes allocated should sum to copied + pinned + freed, which it
3689 * more-or-less does, but there can be discrepancies because structure instances
3690 * can be extended with a stable-hash slot (which isn't accounted for at all),
3691 * vectors can be shrunk (part being "freed" and part being "copied", depending
3692 * on the size and partial pinning),and the finalizer hash-table can have cons
3693 * cells allocated to record the list of functions to call.
3694 * In particular, there could be 0 usage before, and some usage after due to
3695 * the finalizer table, which causes "freed" to be negative.
3696 * While those factors could be accounted for in the report, it would be needlessly
3697 * pedantic and confusing, and not really affect the big picture.
3698 * If the MiB per sec is low, it could be that not many bytes were copied.
3699 * Low speed + large count is bad though */
3700 char buffer[200];
3701 // can't use fprintf() inside GC because of malloc. snprintf() can deadlock too,
3702 // but seems to do so much less often.
3703 int n = snprintf(buffer, sizeof buffer,
3704 "gen%d: %ldw copied in %f sec (%.0f %s/sec), %ldw in-situ,"
3705 " %d pins (%ldw), %ldw freed (%.1f%%)\n",
3706 generation, gc_copied_nwords, et_sec, speed, units,
3707 gc_in_situ_live_nwords,
3708 gc_pin_count, gc_pinned_nwords,
3709 bytes_freed >> WORD_SHIFT, pct_freed*100.0);
3710 write(2, buffer, n);
3711 n = snprintf(buffer, sizeof buffer,
3712 "root word counts: %d + %d + %d\n", root_boxed_words_scanned,
3713 root_vector_words_scanned, root_mixed_words_scanned);
3714 write(2, buffer, n);
3716 gc_copied_nwords = gc_in_situ_live_nwords = gc_pinned_nwords = 0;
3717 root_boxed_words_scanned = root_vector_words_scanned = root_mixed_words_scanned = 0;
3718 #endif
3720 /* Reset the alloc_start_page for generation. */
3721 RESET_ALLOC_START_PAGES();
3723 /* Set the new gc trigger for the GCed generation. */
3724 g->gc_trigger = g->bytes_allocated + g->bytes_consed_between_gc;
3725 g->num_gc = raise ? 0 : (1 + g->num_gc);
3727 maybe_verify:
3728 // Have to kill this structure from its root, because any of the nodes would have
3729 // been on pages that got freed by free_oldspace.
3730 dynspace_codeblob_tree_snapshot = 0;
3731 if (generation >= verify_gens)
3732 hexdump_and_verify_heap(cur_thread_approx_stackptr,
3733 VERIFY_POST_GC | (generation<<1) | raise);
3735 extern int n_unboxed_instances;
3736 n_unboxed_instances = 0;
3737 gc_alloc_generation = original_alloc_generation;
3740 static page_index_t
3741 find_next_free_page(void)
3743 page_index_t last_page = -1, i;
3745 for (i = 0; i < next_free_page; i++)
3746 if (page_words_used(i) != 0)
3747 last_page = i;
3749 /* 1 page beyond the last used page is the next free page */
3750 return last_page + 1;
3753 generation_index_t small_generation_limit = 1;
3755 extern int finalizer_thread_runflag;
3757 /* GC all generations newer than last_gen, raising the objects in each
3758 * to the next older generation - we finish when all generations below
3759 * last_gen are empty. Then if last_gen is due for a GC, or if
3760 * last_gen==NUM_GENERATIONS (the scratch generation? eh?) we GC that
3761 * too. The valid range for last_gen is: 0,1,...,NUM_GENERATIONS.
3763 * We stop collecting at gencgc_oldest_gen_to_gc, even if this is less than
3764 * last_gen (oh, and note that by default it is NUM_GENERATIONS-1) */
3765 long tot_gc_nsec;
3766 void NO_SANITIZE_ADDRESS NO_SANITIZE_MEMORY
3767 collect_garbage(generation_index_t last_gen)
3769 ++n_lisp_gcs;
3770 THREAD_JIT_WP(0);
3771 generation_index_t gen = 0, i;
3772 bool gc_mark_only = 0;
3773 int raise, more = 0;
3774 int gen_to_wp;
3775 /* The largest value of next_free_page seen since the time
3776 * remap_free_pages was called. */
3777 static page_index_t high_water_mark = 0;
3779 #ifdef COLLECT_GC_STATS
3780 struct timespec t_gc_start;
3781 clock_gettime(CLOCK_MONOTONIC, &t_gc_start);
3782 #endif
3783 log_generation_stats(gc_logfile, "=== GC Start ===");
3785 gc_active_p = 1;
3787 if (last_gen == 1+PSEUDO_STATIC_GENERATION) {
3788 // Pseudostatic space undergoes a non-moving collection
3789 last_gen = PSEUDO_STATIC_GENERATION;
3790 gc_mark_only = 1;
3791 } else if (last_gen > 1+PSEUDO_STATIC_GENERATION) {
3792 // This is a completely non-obvious thing to do, but whatever...
3793 last_gen = 0;
3796 /* Flush the alloc regions updating the page table.
3798 * GC is single-threaded and all memory allocations during a collection
3799 * happen in the GC thread, so it is sufficient to update PTEs for the
3800 * per-thread regions exactly once at the beginning of a collection
3801 * and update only from the GC's regions thereafter during collection.
3803 * The GC's regions are probably empty already, except:
3804 * - The code region is shared across all threads
3805 * - The boxed region is used in lieu of thread-specific regions
3806 * in a unithread build.
3807 * So we need to close them for those two cases.
3809 struct thread *th;
3810 for_each_thread(th) gc_close_thread_regions(th, 0);
3811 ensure_region_closed(code_region, PAGE_TYPE_CODE);
3812 if (gencgc_verbose > 2) fprintf(stderr, "[%d] BEGIN gc(%d)\n", n_lisp_gcs, last_gen);
3814 #ifdef LISP_FEATURE_IMMOBILE_SPACE
3815 if (ENABLE_PAGE_PROTECTION) {
3816 // Unprotect the in-use ranges. Any page could be written during scavenge
3817 os_protect((os_vm_address_t)FIXEDOBJ_SPACE_START,
3818 (lispobj)fixedobj_free_pointer - FIXEDOBJ_SPACE_START,
3819 OS_VM_PROT_ALL);
3821 #endif
3823 lispobj* cur_thread_approx_stackptr =
3824 (lispobj*)ALIGN_DOWN((uword_t)&last_gen, N_WORD_BYTES);
3825 /* Verify the new objects created by Lisp code. */
3826 if (pre_verify_gen_0)
3827 hexdump_and_verify_heap(cur_thread_approx_stackptr, VERIFY_PRE_GC);
3829 if (gencgc_verbose > 1) {
3830 fprintf(stderr, "Pre-GC:\n");
3831 print_generation_stats();
3834 /* After a GC, pages of code are safe to linearly scan because
3835 * there won't be random junk on them below page_bytes_used.
3836 * But generation 0 pages are _not_ safe to linearly scan because they aren't
3837 * pre-zeroed. The SIGPROF handler could have a bad time if were to misread
3838 * the header of an object mid-creation. Therefore, codeblobs newly made by Lisp
3839 * are kept in a lock-free and threadsafe datastructure. But we don't want to
3840 * enliven nodes of that structure for Lisp to see (absent any other references)
3841 * because the whole thing becomes garbage after this GC. So capture the tree
3842 * for GC's benefit, and delete the view of it from Lisp.
3843 * Incidentally, immobile text pages have their own tree, for other purposes
3844 * (among them being to find page scan start offsets) which is pruned as
3845 * needed by a finalizer. */
3846 dynspace_codeblob_tree_snapshot = SYMBOL(DYNSPACE_CODEBLOB_TREE)->value;
3847 SYMBOL(DYNSPACE_CODEBLOB_TREE)->value = NIL;
3849 page_index_t initial_nfp = next_free_page;
3850 if (gc_mark_only) {
3851 garbage_collect_generation(PSEUDO_STATIC_GENERATION, 0,
3852 cur_thread_approx_stackptr);
3853 goto finish;
3856 do {
3857 /* Collect the generation. */
3859 if (more || (gen >= gencgc_oldest_gen_to_gc)) {
3860 /* Never raise the oldest generation. Never raise the extra generation
3861 * collected due to more-flag. */
3862 raise = 0;
3863 more = 0;
3864 } else {
3865 raise =
3866 (gen < last_gen)
3867 || (generations[gen].num_gc >= generations[gen].number_of_gcs_before_promotion);
3868 /* If we would not normally raise this one, but we're
3869 * running low on space in comparison to the object-sizes
3870 * we've been seeing, raise it and collect the next one
3871 * too. */
3872 if (!raise && gen == last_gen) {
3873 more = (2*large_allocation) >= (dynamic_space_size - bytes_allocated);
3874 raise = more;
3878 /* If an older generation is being filled, then update its
3879 * memory age. */
3880 if (raise == 1) {
3881 generations[gen+1].cum_sum_bytes_allocated +=
3882 generations[gen+1].bytes_allocated;
3885 garbage_collect_generation(gen, raise, cur_thread_approx_stackptr);
3887 /* Reset the memory age cum_sum. */
3888 generations[gen].cum_sum_bytes_allocated = 0;
3890 if (gencgc_verbose > 1) {
3891 fprintf(stderr, "Post-GC(gen=%d):\n", gen);
3892 print_generation_stats();
3895 gen++;
3896 } while ((gen <= gencgc_oldest_gen_to_gc)
3897 && ((gen < last_gen)
3898 || more
3899 || (raise
3900 && (generations[gen].bytes_allocated
3901 > generations[gen].gc_trigger)
3902 && (generation_average_age(gen)
3903 > generations[gen].minimum_age_before_gc))));
3905 /* Now if gen-1 was raised all generations before gen are empty.
3906 * If it wasn't raised then all generations before gen-1 are empty.
3908 * Now objects within this gen's pages cannot point to younger
3909 * generations unless they are written to. This can be exploited
3910 * by write-protecting the pages of gen; then when younger
3911 * generations are GCed only the pages which have been written
3912 * need scanning. */
3913 if (raise)
3914 gen_to_wp = gen;
3915 else
3916 gen_to_wp = gen - 1;
3918 /* There's not much point in WPing pages in generation 0 as it is
3919 * never scavenged (except promoted pages). */
3920 if ((gen_to_wp > 0) && ENABLE_PAGE_PROTECTION) {
3921 /* Check that they are all empty. */
3922 for (i = 0; i < gen_to_wp; i++) {
3923 if (generations[i].bytes_allocated)
3924 lose("trying to write-protect gen. %d when gen. %d nonempty",
3925 gen_to_wp, i);
3927 write_protect_generation_pages(gen_to_wp);
3929 #ifdef LISP_FEATURE_SOFT_CARD_MARKS
3931 // Turn sticky cards marks to the regular mark.
3932 page_index_t page;
3933 for (page=0; page<next_free_page; ++page) {
3934 long card = page_to_card_index(page);
3935 int j;
3936 for (j=0; j<CARDS_PER_PAGE; ++j, ++card)
3937 if (gc_card_mark[card] == STICKY_MARK) gc_card_mark[card] = CARD_MARKED;
3940 #endif
3942 /* Save the high-water mark before updating next_free_page */
3943 if (next_free_page > high_water_mark)
3944 high_water_mark = next_free_page;
3946 next_free_page = find_next_free_page();
3948 /* Update auto_gc_trigger. Make sure we trigger the next GC before
3949 * running out of heap! */
3950 if (bytes_consed_between_gcs <= (dynamic_space_size - bytes_allocated))
3951 auto_gc_trigger = bytes_allocated + bytes_consed_between_gcs;
3952 else
3953 auto_gc_trigger = bytes_allocated + (dynamic_space_size - bytes_allocated)/2;
3955 if(gencgc_verbose) {
3956 #define MESSAGE ("Next gc when %"OS_VM_SIZE_FMT" bytes have been consed\n")
3957 char buf[64];
3958 int n;
3959 // fprintf() can - and does - cause deadlock here.
3960 // snprintf() seems to work fine.
3961 n = snprintf(buf, sizeof buf, MESSAGE, (uintptr_t)auto_gc_trigger);
3962 ignore_value(write(2, buf, n));
3963 #undef MESSAGE
3966 /* If we did a big GC (arbitrarily defined as gen > 1), release memory
3967 * back to the OS.
3969 if (gen > small_generation_limit) {
3970 if (next_free_page > high_water_mark)
3971 high_water_mark = next_free_page;
3972 // BUG? high_water_mark is the highest value of next_free_page,
3973 // which means that page_table[high_water_mark] was actually NOT ever
3974 // used, because next_free_page is an exclusive bound on the range
3975 // of pages used. But remap_free_pages takes to 'to' as an *inclusive*
3976 // bound. The only reason it's not an array overrun error is that
3977 // the page_table has one more element than there are pages.
3978 remap_free_pages(0, high_water_mark);
3979 high_water_mark = 0;
3982 large_allocation = 0;
3983 finish:
3984 write_protect_immobile_space();
3985 gc_active_p = 0;
3987 #ifdef COLLECT_GC_STATS
3988 struct timespec t_gc_done;
3989 clock_gettime(CLOCK_MONOTONIC, &t_gc_done);
3990 long et_nsec = (t_gc_done.tv_sec - t_gc_start.tv_sec)*1000000000
3991 + (t_gc_done.tv_nsec - t_gc_start.tv_nsec);
3992 tot_gc_nsec += et_nsec;
3993 #endif
3995 log_generation_stats(gc_logfile, "=== GC End ===");
3996 // Increment the finalizer runflag. This acts as a count of the number
3997 // of GCs as well as a notification to wake the finalizer thread.
3998 if (finalizer_thread_runflag != 0) {
3999 int newval = 1 + finalizer_thread_runflag;
4000 // check if counter wrapped around. Don't store 0 as the new value,
4001 // as that causes the thread to exit.
4002 finalizer_thread_runflag = newval ? newval : 1;
4004 THREAD_JIT_WP(1);
4005 // Clear all pin bits for the next GC cycle.
4006 // This could be done in the background somehow maybe.
4007 page_index_t max_nfp = initial_nfp > next_free_page ? initial_nfp : next_free_page;
4008 memset(gc_page_pins, 0, max_nfp);
4011 /* Initialization of gencgc metadata is split into two steps:
4012 * 1. gc_init() - allocation of a fixed-address space via mmap(),
4013 * failing which there's no reason to go on. (safepoint only)
4014 * 2. gc_allocate_ptes() - page table entries
4016 void
4017 gc_init(void)
4019 hopscotch_create(&pinned_objects, HOPSCOTCH_HASH_FUN_DEFAULT, 0 /* hashset */,
4020 32 /* logical bin count */, 0 /* default range */);
4021 #ifdef LISP_FEATURE_WIN32
4022 InitializeCriticalSection(&free_pages_lock);
4023 #endif
4026 int gc_card_table_nbits;
4027 long gc_card_table_mask;
4030 /* alloc() and alloc_list() are external interfaces for memory allocation.
4031 * They allocate to generation 0 and are not called from within the garbage
4032 * collector as it is only external uses that need the check for heap
4033 * size (GC trigger) and to disable the interrupts (interrupts are
4034 * always disabled during a GC).
4036 * The vops that allocate assume that the returned space is zero-filled.
4037 * (E.g. the most significant word of a 2-word bignum in MOVE-FROM-UNSIGNED.)
4039 * The check for a GC trigger is only performed when the current
4040 * region is full, so in most cases it's not needed. */
4042 int gencgc_alloc_profiler;
4043 NO_SANITIZE_MEMORY lispobj*
4044 lisp_alloc(int flags, struct alloc_region *region, sword_t nbytes,
4045 int page_type, struct thread *thread)
4047 os_vm_size_t trigger_bytes = 0;
4049 gc_assert(nbytes > 0);
4051 /* Check for alignment allocation problems. */
4052 gc_assert((((uword_t)region->free_pointer & LOWTAG_MASK) == 0)
4053 && ((nbytes & LOWTAG_MASK) == 0));
4055 #define SYSTEM_ALLOCATION_FLAG 2
4056 #ifdef LISP_FEATURE_SYSTEM_TLABS
4057 lispobj* handle_arena_alloc(struct thread*, struct alloc_region *, int, sword_t);
4058 if (page_type != PAGE_TYPE_CODE && thread->arena && !(flags & SYSTEM_ALLOCATION_FLAG))
4059 return handle_arena_alloc(thread, region, page_type, nbytes);
4060 #endif
4062 ++thread->slow_path_allocs;
4063 if ((os_vm_size_t) nbytes > large_allocation)
4064 large_allocation = nbytes;
4066 /* maybe we can do this quickly ... */
4067 /* I'd really like this "quick" case to be more uniform in terms of whether
4068 * it's allowed to occur at all. Some of the inconsistencies are:
4069 * - 32-bit x86 will (or would, not sure any more) choose to use
4070 * out-of-line allocation if lexical policy favors space.
4071 * - PPC at git rev 28aaa39f4e had a subtle "but-not-wrong" bug at the edge
4072 * where it trapped to C if the new free pointer was ':lge' instead of ':lgt'
4073 * the region end, fixed in rev 05047647.
4074 * - other architectures may have similar issues.
4075 * So because of those reasons, even if we satisfy the allocation
4076 * from the TLAB it might be worth a check of whether to refill
4077 * the TLAB now. */
4078 void *new_obj = region->free_pointer;
4079 char *new_free_pointer = (char*)new_obj + nbytes;
4080 if (new_free_pointer <= (char*)region->end_addr) {
4081 region->free_pointer = new_free_pointer;
4082 #if defined LISP_FEATURE_MIPS || defined LISP_FEATURE_PPC || \
4083 defined LISP_FEATURE_PPC64 || defined LISP_FEATURE_X86_64
4084 /* Most allocations should never get here, but two page types are special.
4085 * - CODE always comes through here.
4086 * - CONS can come through here because when overflow occurs in lisp,
4087 * the fallback logic will call lisp_alloc one or more times,
4088 * obtaining possibly discontiguous pages of conses */
4089 gc_assert(page_type == PAGE_TYPE_CONS || page_type == PAGE_TYPE_CODE);
4090 #endif
4091 return new_obj;
4094 /* We don't want to count nbytes against auto_gc_trigger unless we
4095 * have to: it speeds up the tenuring of objects and slows down
4096 * allocation. However, unless we do so when allocating _very_
4097 * large objects we are in danger of exhausting the heap without
4098 * running sufficient GCs.
4100 if ((os_vm_size_t) nbytes >= bytes_consed_between_gcs)
4101 trigger_bytes = nbytes;
4103 /* we have to go the long way around, it seems. Check whether we
4104 * should GC in the near future
4106 if (auto_gc_trigger && (bytes_allocated+trigger_bytes > auto_gc_trigger)) {
4107 /* Don't flood the system with interrupts if the need to gc is
4108 * already noted. This can happen for example when SUB-GC
4109 * allocates or after a gc triggered in a WITHOUT-GCING. */
4110 if (read_TLS(GC_PENDING,thread) == NIL) {
4111 /* set things up so that GC happens when we finish the PA
4112 * section */
4113 write_TLS(GC_PENDING, LISP_T, thread);
4114 if (read_TLS(GC_INHIBIT,thread) == NIL) {
4115 #ifdef LISP_FEATURE_SB_SAFEPOINT
4116 thread_register_gc_trigger();
4117 #else
4118 set_pseudo_atomic_interrupted(thread);
4119 maybe_save_gc_mask_and_block_deferrables
4120 # if HAVE_ALLOCATION_TRAP_CONTEXT
4121 (thread_interrupt_data(thread).allocation_trap_context);
4122 # else
4123 (0);
4124 # endif
4125 #endif
4130 /* For the architectures which do NOT use a trap instruction for allocation,
4131 * overflow, record a backtrace now if statistical profiling is enabled.
4132 * The ones which use a trap will backtrace from the signal handler.
4133 * Code allocations are ignored, because every code allocation
4134 * comes through lisp_alloc() which makes this not a statistical
4135 * sample. Also the trapping ones don't trap for code.
4136 * #+win32 doesn't seem to work, but neither does CPU profiling */
4137 #if !(defined LISP_FEATURE_PPC || defined LISP_FEATURE_PPC64 \
4138 || defined LISP_FEATURE_SPARC || defined LISP_FEATURE_WIN32)
4139 extern void allocator_record_backtrace(void*, struct thread*);
4140 if (page_type != PAGE_TYPE_CODE && gencgc_alloc_profiler
4141 && thread->state_word.sprof_enable)
4142 allocator_record_backtrace(__builtin_frame_address(0), thread);
4143 #endif
4145 if (flags & 1) return gc_alloc_large(nbytes, page_type);
4147 int __attribute__((unused)) ret = mutex_acquire(&free_pages_lock);
4148 gc_assert(ret);
4149 ensure_region_closed(region, page_type);
4150 // hold the lock after alloc_new_region if a cons page
4151 int release = page_type != PAGE_TYPE_CONS;
4152 new_obj = gc_alloc_new_region(nbytes, page_type, region, release);
4153 region->free_pointer = (char*)new_obj + nbytes;
4154 // addr_diff asserts that 'end' >= 'free_pointer'
4155 int remaining = addr_diff(region->end_addr, region->free_pointer);
4157 // System TLABs are not important to refill right away (in the nearly-empty case)
4158 // so put a high-enough number in 'remaining' to suppress the grab-another-page test
4159 if (flags & SYSTEM_ALLOCATION_FLAG) remaining = 256;
4161 // Try to avoid the next Lisp -> C -> Lisp round-trip by possibly
4162 // requesting yet another region.
4163 if (page_type == PAGE_TYPE_CONS) {
4164 if (remaining <= CONS_SIZE * N_WORD_BYTES) { // Refill now if <= 1 more cons to go
4165 gc_close_region(region, page_type);
4166 // Request > 2 words, forcing a new page to be claimed.
4167 gc_alloc_new_region(4 * N_WORD_BYTES, page_type, region, 0); // don't release
4169 ret = mutex_release(&free_pages_lock);
4170 gc_assert(ret);
4171 } else if (remaining <= 4 * N_WORD_BYTES
4172 && TryEnterCriticalSection(&free_pages_lock)) {
4173 gc_close_region(region, page_type);
4174 // Request > 4 words, forcing a new page to be claimed.
4175 gc_alloc_new_region(6 * N_WORD_BYTES, page_type, region, 1); // do release
4178 return new_obj;
4181 #ifdef LISP_FEATURE_SPARC
4182 void mixed_region_rollback(sword_t size)
4184 struct alloc_region *region = main_thread_mixed_region;
4185 gc_assert(region->free_pointer > region->end_addr);
4186 region->free_pointer = (char*)region->free_pointer - size;
4187 gc_assert(region->free_pointer >= region->start_addr
4188 && region->free_pointer <= region->end_addr);
4190 #endif
4193 * shared support for the OS-dependent signal handlers which
4194 * catch GENCGC-related write-protect violations
4196 void unhandled_sigmemoryfault(void* addr);
4198 /* Depending on which OS we're running under, different signals might
4199 * be raised for a violation of write protection in the heap. This
4200 * function factors out the common generational GC magic which needs
4201 * to invoked in this case, and should be called from whatever signal
4202 * handler is appropriate for the OS we're running under.
4204 * Return true if this signal is a normal generational GC thing that
4205 * we were able to handle, or false if it was abnormal and control
4206 * should fall through to the general SIGSEGV/SIGBUS/whatever logic.
4208 * We have two control flags for this: one causes us to ignore faults
4209 * on unprotected pages completely, and the second complains to stderr
4210 * but allows us to continue without losing.
4212 extern bool ignore_memoryfaults_on_unprotected_pages;
4213 bool ignore_memoryfaults_on_unprotected_pages = 0;
4215 extern bool continue_after_memoryfault_on_unprotected_pages;
4216 bool continue_after_memoryfault_on_unprotected_pages = 0;
4218 int gencgc_handle_wp_violation(__attribute__((unused)) void* context, void* fault_addr)
4220 page_index_t page_index = find_page_index(fault_addr);
4222 /* Check whether the fault is within the dynamic space. */
4223 if (page_index == (-1)) {
4224 #ifdef LISP_FEATURE_IMMOBILE_SPACE
4225 extern int immobile_space_handle_wp_violation(void*);
4226 if (immobile_space_handle_wp_violation(fault_addr))
4227 return 1;
4228 #endif
4230 /* It can be helpful to be able to put a breakpoint on this
4231 * case to help diagnose low-level problems. */
4232 unhandled_sigmemoryfault(fault_addr);
4234 /* not within the dynamic space -- not our responsibility */
4235 return 0;
4238 #ifdef LISP_FEATURE_SOFT_CARD_MARKS
4239 fake_foreign_function_call(context);
4240 lose("misuse of mprotect() on dynamic space @ %p", fault_addr);
4241 #else
4242 // Pages of code are never have MMU-based protection, except on darwin,
4243 // where they do, but they are thread-locally-un-protected when creating
4244 // objets on those pages.
4245 gc_assert(!is_code(page_table[page_index].type));
4247 // There can not be an open region. gc_close_region() does not attempt
4248 // to flip that bit atomically. (What does this mean?)
4249 gc_assert(!(page_table[page_index].type & OPEN_REGION_PAGE_FLAG));
4251 // The collector should almost never incur page faults, but I haven't
4252 // found all the trouble spots. It may or may not be worth doing.
4253 // See git rev 8a0af65bfd24
4254 // if (gc_active_p && compacting_p()) lose("unexpected WP fault @ %p during GC", fault_addr);
4256 // Because this signal handler can not be interrupted by STOP_FOR_GC,
4257 // the only possible state change between reading the mark and deciding how
4258 // to proceed is due to another thread also unprotecting the address.
4259 // That's fine; in fact it's OK to read a stale value here.
4260 // The only harmful case would be where the mark byte says it was
4261 // never protected, and the fault occurred nonetheless. That can't happen.
4262 unsigned char mark = gc_card_mark[addr_to_card_index(fault_addr)];
4263 switch (mark) {
4264 case CARD_UNMARKED:
4265 case WP_CLEARED_AND_MARKED: // possible data race
4266 unprotect_page(fault_addr, WP_CLEARED_AND_MARKED);
4267 break;
4268 default:
4269 if (!ignore_memoryfaults_on_unprotected_pages) {
4270 void lisp_backtrace(int frames);
4271 lisp_backtrace(10);
4272 fprintf(stderr,
4273 "Fault @ %p, PC=%p, page %"PAGE_INDEX_FMT" (~WP) mark=%#x gc_active=%d\n"
4274 " mixed_region=%p:%p\n"
4275 " page.scan_start: %p .words_used: %u .type: %d .gen: %d\n",
4276 fault_addr, (void*)(context?os_context_pc(context):(uword_t)-1), page_index,
4277 mark, gc_active_p,
4278 mixed_region->start_addr, mixed_region->end_addr,
4279 page_scan_start(page_index),
4280 page_words_used(page_index),
4281 page_table[page_index].type,
4282 page_table[page_index].gen);
4283 if (!continue_after_memoryfault_on_unprotected_pages) lose("Feh.");
4286 #endif
4287 return 1; // Handled
4289 /* This is to be called when we catch a SIGSEGV/SIGBUS, determine that
4290 * it's not just a case of the program hitting the write barrier, and
4291 * are about to let Lisp deal with it. It's basically just a
4292 * convenient place to set a gdb breakpoint. */
4293 void
4294 unhandled_sigmemoryfault(void __attribute__((unused)) *addr)
4297 void zero_all_free_ranges() /* called only by gc_and_save() */
4299 page_index_t i;
4300 for (i = 0; i < next_free_page; i++) {
4301 char* start = page_address(i);
4302 char* page_end = start + GENCGC_PAGE_BYTES;
4303 start += page_bytes_used(i);
4304 memset(start, 0, page_end-start);
4306 #ifndef LISP_FEATURE_SB_THREAD
4307 // zero the allocation regions at the start of static-space
4308 // This gets a spurious warning:
4309 // warning: 'memset' offset [0, 71] is out of the bounds [0, 0] [-Warray-bounds]
4310 // which 'volatile' works around.
4311 char * volatile region = (char*)STATIC_SPACE_START;
4312 bzero((char*)region, 3*sizeof (struct alloc_region));
4313 #endif
4316 generation_index_t gc_gen_of(lispobj obj, int defaultval) {
4317 int page = find_page_index((void*)obj);
4318 if (page >= 0) return page_table[page].gen;
4319 #ifdef LISP_FEATURE_IMMOBILE_SPACE
4320 if (immobile_space_p(obj))
4321 return immobile_obj_generation(base_pointer(obj));
4322 #endif
4323 return defaultval;
4326 /* Return 1 if 'a' is strictly younger than 'b'.
4327 * This asserts that 'a' is pinned if in 'from_space' because it is
4328 * specifically a helper function for scav_code_blob(), where this is
4329 * called after scavenging the header. So if something didn't get moved
4330 * out of from_space, then it must have been pinned.
4331 * So don't call this for anything except that use-case. */
4332 static inline bool obj_gen_lessp(lispobj obj, generation_index_t b)
4334 generation_index_t a = gc_gen_of(obj, ARTIFICIALLY_HIGH_GEN);
4335 if (a == from_space) {
4336 gc_assert(pinned_p(obj, find_page_index((void*)obj)));
4337 a = new_space;
4339 return ((a==SCRATCH_GENERATION) ? from_space : a) < b;
4342 /* Loosely inspired by the code in 'purify' */
4343 #define LATERBLOCKSIZE 50000 // arbitrary
4344 static struct later {
4345 struct later *next;
4346 struct code *list[LATERBLOCKSIZE];
4347 int count;
4348 } *later_blocks = NULL;
4350 static void delay_code_metadata_scavenge(struct code* code)
4352 struct later* block = later_blocks;
4353 if (!block || block->count == LATERBLOCKSIZE) {
4354 block = calloc(1, sizeof (struct later));
4355 block->next = later_blocks;
4356 later_blocks = block;
4358 block->list[block->count] = code;
4359 ++block->count;
4362 static void finish_code_metadata()
4364 struct later *block = later_blocks;
4365 int i;
4366 save_lisp_gc_iteration = 3; // ensure no more delaying of metadata scavenge
4367 for ( ; block ; block = block->next ) {
4368 for (i = 0; i < block->count; ++i) {
4369 struct code*c = block->list[i];
4370 gc_assert(!forwarding_pointer_p((lispobj*)c));
4371 // first two words are non-pointer, then come debug_info and fixups
4372 // in whatever order they were defined in objdef.
4373 scavenge((lispobj*)c + 2, 2);
4374 CLEAR_WRITTEN_FLAG((lispobj*)c);
4377 scavenge_newspace(new_space);
4380 sword_t scav_code_blob(lispobj *object, lispobj header)
4382 struct code* code = (struct code*)object;
4383 int nboxed = code_header_words(code);
4384 if (!nboxed) goto done;
4386 int my_gen = gc_gen_of((lispobj)object, ARTIFICIALLY_HIGH_GEN);
4387 if (my_gen < ARTIFICIALLY_HIGH_GEN && ((my_gen & 7) == from_space)) {
4388 // Since 'from_space' objects are not directly scavenged - they can
4389 // only be scavenged after moving to newspace, then this object
4390 // must be pinned. (It's logically in newspace). Assert that.
4391 gc_assert(pinned_p(make_lispobj(object, OTHER_POINTER_LOWTAG),
4392 find_page_index(object)));
4393 my_gen = new_space;
4396 // If the header's 'written' flag is off and it was not copied by GC
4397 // into newspace, then the object should be ignored.
4399 // This test could stand to be tightened up: in a GC promotion cycle
4400 // (e.g. 0 becomes 1), we can't discern objects that got copied to newspace
4401 // from objects that started out there. Of the ones that were already there,
4402 // we need only scavenge those marked as written. All the copied one
4403 // should always be scavenged. So really what we could do is mark anything
4404 // that got copied as written, which would allow dropping the second half
4405 // of the OR condition. As is, we scavenge "too much" of newspace which
4406 // is not an issue of correctness but rather efficiency.
4407 if (header_rememberedp(header) || (my_gen == new_space) ||
4408 #ifndef LISP_FEATURE_IMMOBILE_SPACE
4409 // if NO immobile-space, then text space is equivalent to static space
4410 ((uword_t)object >= TEXT_SPACE_START && object < text_space_highwatermark) ||
4411 #endif
4412 ((uword_t)object >= STATIC_SPACE_START && object < static_space_free_pointer)) {
4413 // FIXME: We sometimes scavenge protected pages.
4414 // This assertion fails, but things work nonetheless.
4415 // gc_assert(!card_protected_p(object));
4417 if (save_lisp_gc_iteration == 2) {
4418 // Attempt to place debug-info at end of the heap by not scavenging now
4419 scavenge(object + 4, nboxed - 4);
4420 delay_code_metadata_scavenge(code);
4421 } else {
4422 /* Scavenge the boxed section of the code data block. */
4423 scavenge(object + 2, nboxed - 2);
4426 #ifdef LISP_FEATURE_UNTAGGED_FDEFNS
4427 // Process each untagged fdefn pointer.
4428 // TODO: assert that the generation of any fdefn is older than that of 'code'.
4429 lispobj* fdefns = code->constants +
4430 code_n_funs(code) * CODE_SLOTS_PER_SIMPLE_FUN;
4431 int n_fdefns = code_n_named_calls(code);
4432 int i;
4433 for (i=0; i<n_fdefns; ++i) {
4434 lispobj word = fdefns[i];
4435 if (word) {
4436 gc_assert(!(word & LOWTAG_MASK)); // must not have OTHER_POINTER_LOWTAG
4437 lispobj tagged_word = word | OTHER_POINTER_LOWTAG;
4438 scavenge(&tagged_word, 1);
4439 if (tagged_word - OTHER_POINTER_LOWTAG != word) {
4440 fdefns[i] = tagged_word - OTHER_POINTER_LOWTAG;
4444 #endif
4446 // What does this have to do with DARWIN_JIT?
4447 #if defined LISP_FEATURE_64_BIT && !defined LISP_FEATURE_DARWIN_JIT
4448 /* If any function in this code object redirects to a function outside
4449 * the object, then scavenge all entry points. Otherwise there is no need,
4450 * as trans_code() made necessary adjustments to internal entry points.
4451 * This test is just an optimization to avoid some work */
4452 if (((*object >> 16) & 0xff) == CODE_IS_TRACED) {
4453 #else
4454 { /* Not enough spare bits in the header to hold random flags.
4455 * Just do the extra work always */
4456 #endif
4457 for_each_simple_fun(i, fun, code, 1, {
4458 if (simplefun_is_wrapped(fun)) {
4459 lispobj target_fun = fun_taggedptr_from_self(fun->self);
4460 lispobj new = target_fun;
4461 scavenge(&new, 1);
4462 if (new != target_fun) fun->self = fun_self_from_taggedptr(new);
4467 if (save_lisp_gc_iteration == 2) goto done;
4469 /* If my_gen is other than newspace, then scan for old->young
4470 * pointers. If my_gen is newspace, there can be no such pointers
4471 * because newspace is the lowest numbered generation post-GC
4472 * (regardless of whether this is a promotion cycle) */
4473 if (my_gen != new_space) {
4474 lispobj *where, *end = object + nboxed, ptr;
4475 for (where= object + 2; where < end; ++where)
4476 if (is_lisp_pointer(ptr = *where) && obj_gen_lessp(ptr, my_gen))
4477 goto done;
4479 CLEAR_WRITTEN_FLAG(object);
4481 done:
4482 return code_total_nwords(code);
4485 void really_note_transporting(lispobj old,void*new,sword_t nwords)
4487 page_index_t p = find_page_index((void*)old);
4488 __attribute__((unused)) uword_t page_usage_limit = (uword_t)((lispobj*)page_address(p) + page_words_used(p));
4489 gc_assert(old < (uword_t)page_usage_limit); // this helps find bogus pointers
4490 if (GC_LOGGING)
4491 fprintf(gc_activitylog(),
4492 listp(old)?"t %"OBJ_FMTX" %"OBJ_FMTX"\n":
4493 "t %"OBJ_FMTX" %"OBJ_FMTX" %x\n",
4494 old, (uword_t)new, (int)nwords);
4497 /** heap invariant checker **/
4499 static bool card_markedp(void* addr)
4501 #ifdef LISP_FEATURE_IMMOBILE_SPACE
4502 if (immobile_space_p((lispobj)addr))
4503 return !immobile_card_protected_p(addr);
4504 #endif
4505 return gc_card_mark[addr_to_card_index(addr)] != CARD_UNMARKED;
4508 // Check a single pointer. Return 1 if we should stop verifying due to too many errors.
4509 // (Otherwise continue showing errors until then)
4510 // NOTE: This function can produces false failure indications,
4511 // usually related to dynamic space pointing to the stack of a
4512 // dead thread, but there may be other reasons as well.
4513 static void note_failure(lispobj thing, lispobj *where, struct verify_state *state,
4514 char *str)
4516 if (state->flags & VERIFY_PRINT_HEADER_ON_FAILURE) {
4517 if (state->flags & VERIFY_PRE_GC) fprintf(stderr, "pre-GC failure\n");
4518 if (state->flags & VERIFY_POST_GC) fprintf(stderr, "post-GC failure\n");
4519 state->flags &= ~VERIFY_PRINT_HEADER_ON_FAILURE;
4521 if (state->object_addr) {
4522 lispobj obj = compute_lispobj(state->object_addr);
4523 page_index_t pg = find_page_index(state->object_addr);
4524 fprintf(stderr, "Ptr %p @ %"OBJ_FMTX" (lispobj %"OBJ_FMTX",pg%d,h=%"OBJ_FMTX") sees %s\n",
4525 (void*)thing, (uword_t)where, obj, (int)pg, *native_pointer(obj), str);
4526 // Record this in state->err_objs if possible
4527 int i;
4528 for(i=0; i<MAX_ERR_OBJS; ++i)
4529 if (!state->err_objs[i]) {
4530 state->err_objs[i] = (uword_t)state->object_addr;
4531 break;
4533 } else {
4534 fprintf(stderr, "Ptr %p @ %"OBJ_FMTX" sees %s\n", (void*)thing, (uword_t)where, str);
4538 static int
4539 verify_pointer(lispobj thing, lispobj *where, struct verify_state *state)
4541 /* Strict containment: no pointer from a heap space may point
4542 * to anything outside of a heap space. */
4543 // bool strict_containment = state->flags & VERIFY_FINAL;
4545 #define FAIL_IF(cond, why) \
4546 if (cond) { if (++state->nerrors > 25) return 1; note_failure(thing,where,state,why); }
4548 if (!is_lisp_pointer(thing)) {
4549 FAIL_IF(!is_lisp_immediate(thing), "strange non-pointer");
4550 return 0;
4552 // if (strict_containment && !gc_managed_heap_space_p(thing)) GC_WARN("non-Lisp memory");
4553 page_index_t source_page_index = find_page_index(where);
4554 page_index_t target_page_index = find_page_index((void*)thing);
4555 int source_is_generational = source_page_index >= 0 || immobile_space_p((lispobj)where);
4556 if (!(target_page_index >= 0 || immobile_space_p(thing))) return 0; // can't do much with it
4557 if ((state->flags & VERIFY_TAGS) && target_page_index >= 0) {
4558 if (listp(thing)) {
4559 FAIL_IF(!(is_cons_half(CONS(thing)->car) && is_cons_half(CONS(thing)->cdr)),
4560 "non-cons");
4561 } else {
4562 FAIL_IF(LOWTAG_FOR_WIDETAG(widetag_of(native_pointer(thing))) != lowtag_of(thing),
4563 "incompatible widetag");
4566 generation_index_t to_gen =
4567 #ifdef LISP_FEATURE_IMMOBILE_SPACE
4568 points_to_asm_code_p((uword_t)thing)?
4569 gc_gen_of(make_lispobj((void*)asm_routines_start,OTHER_POINTER_LOWTAG),0):
4570 #endif
4571 gc_gen_of(thing, ARTIFICIALLY_HIGH_GEN);
4572 if (to_gen < state->min_pointee_gen) state->min_pointee_gen = to_gen;
4573 if (state->flags & VERIFY_QUICK) return 0;
4574 if (target_page_index >= 0) {
4575 // If it's within the dynamic space it should point to a used page.
4576 FAIL_IF(page_free_p(target_page_index), "free page");
4577 FAIL_IF(!(page_table[target_page_index].type & OPEN_REGION_PAGE_FLAG)
4578 && (thing & (GENCGC_PAGE_BYTES-1)) >= page_bytes_used(target_page_index),
4579 "unallocated space");
4580 } else {
4581 // The object pointed to must not have been discarded as garbage.
4582 FAIL_IF(!other_immediate_lowtag_p(*native_pointer(thing)), "trashed object");
4584 // Must not point to a forwarding pointer
4585 FAIL_IF(*native_pointer(thing) == FORWARDING_HEADER, "forwarding ptr");
4586 // Forbid pointers from R/O space into a GCed space
4587 FAIL_IF((READ_ONLY_SPACE_START <= (uword_t)where && where < read_only_space_free_pointer),
4588 "dynamic space from RO space");
4589 // Card marking invariant check, but only if the source of pointer is a heap object
4590 if (header_widetag(state->object_header) == CODE_HEADER_WIDETAG
4591 && ! is_in_static_space(state->object_addr)
4592 && to_gen < state->object_gen) {
4593 // two things must be true:
4594 // 1. the card containing the code must be marked
4595 FAIL_IF(!card_markedp(state->object_addr), "younger obj from WP'd code header page");
4596 // 2. the object header must be marked as written
4597 if (!header_rememberedp(state->object_header))
4598 lose("code @ %p (g%d). word @ %p -> %"OBJ_FMTX" (g%d)",
4599 state->object_addr, state->object_gen, where, thing, to_gen);
4600 } else if ((state->flags & VERIFYING_GENERATIONAL) && to_gen < state->object_gen
4601 && source_is_generational) {
4602 /* The WP criteria are:
4603 * - CONS marks the exact card since it can't span cards
4604 * - SIMPLE-VECTOR marks the card containing the cell with the old->young pointer.
4605 * - Everything else marks the object header -OR- the card with the pointer.
4606 * (either/or because Lisp marks the header card,
4607 * but the collector marks the cell's card.) */
4608 int marked = card_markedp(where)
4609 #ifdef LISP_FEATURE_SOFT_CARD_MARKS
4610 || (state->object_header
4611 && header_widetag(state->object_header) != SIMPLE_VECTOR_WIDETAG
4612 && card_markedp(state->object_addr))
4613 #elif defined LISP_FEATURE_EXECUTABLE_FUNINSTANCES
4614 /* #+(and (not soft-card-marks) executable-funinstances) could find the mark
4615 * for a page-spanning funinstance on the preceding page, because it uses
4616 * logical marking, not physical protection of the page holding the pointer */
4617 || (header_widetag(state->object_header) == FUNCALLABLE_INSTANCE_WIDETAG
4618 && card_markedp(state->object_addr))
4619 #endif
4621 FAIL_IF(!marked, "younger obj from WP page");
4623 int valid;
4624 if (state->flags & VERIFY_AGGRESSIVE) // Extreme paranoia mode
4625 valid = valid_tagged_pointer_p(thing);
4626 else {
4627 /* Efficiently decide whether 'thing' is plausible.
4628 * This MUST NOT use properly_tagged_descriptor_p() which
4629 * assumes a known good object base address, and would
4630 * "dangerously" scan a code component for embedded funs. */
4631 valid = plausible_tag_p(thing);
4633 /* If 'thing' points to a stack, we can only hope that the stack
4634 * frame is ok, or the object at 'where' is unreachable. */
4635 FAIL_IF(!valid && !is_in_stack_space(thing), "junk");
4636 return 0;
4638 #define CHECK(pointer, where) if (verify_pointer(pointer, where, state)) return 1
4640 /* Return 0 if good, 1 if bad.
4641 * Take extra pains to process weak SOLIST nodes - Finalizer list nodes weakly point
4642 * to a referent via an untagged pointer, so the GC doesn't even have to know that
4643 * the reference is weak - it simply is ignored as a non-pointer.
4644 * This makes invariant verification a little tricky. We want to restore the tagged
4645 * pointer, but only if the list is the finalizer list. */
4646 extern bool finalizer_list_node_p(struct instance*);
4647 static int verify_headered_object(lispobj* object, sword_t nwords,
4648 struct verify_state *state)
4650 long i;
4651 int widetag = widetag_of(object);
4652 if (instanceoid_widetag_p(widetag)) {
4653 lispobj layout = layout_of(object);
4654 if (layout) {
4655 CHECK(layout, object);
4656 struct bitmap bitmap = get_layout_bitmap(LAYOUT(layout));
4657 if (lockfree_list_node_layout_p(LAYOUT(layout))) {
4658 // These objects might have _two_ untagged references -
4659 // 1) the 'next' slot may or may not have tag bits
4660 // 2) finalizer list node always stores its referent as untagged
4661 struct list_node* node = (void*)object;
4662 lispobj next = node->_node_next;
4663 if (fixnump(next) && next)
4664 CHECK(next | INSTANCE_POINTER_LOWTAG, &node->_node_next);
4665 if (finalizer_node_layout_p(LAYOUT(layout))) {
4666 struct solist_node* node = (void*)object;
4667 // !fixnump(next) implies that this node is NOT deleted, nor in
4668 // the process of getting deleted by CANCEL-FINALIZATION
4669 if (node->so_key && !fixnump(next)) {
4670 gc_assert(fixnump(node->so_key));
4671 lispobj key = compute_lispobj((lispobj*)node->so_key);
4672 CHECK(key, &node->so_key);
4676 for (i=0; i<(nwords-1); ++i)
4677 if (bitmap_logbitp(i, bitmap)) CHECK(object[1+i], object+1+i);
4679 return 0;
4681 if (widetag == CODE_HEADER_WIDETAG) {
4682 struct code *code = (struct code *)object;
4683 gc_assert(fixnump(object[1])); // boxed size, needed for code_header_words()
4684 sword_t nheader_words = code_header_words(code);
4685 /* Verify the boxed section of the code data block */
4686 state->min_pointee_gen = ARTIFICIALLY_HIGH_GEN;
4687 #ifdef LISP_FEATURE_UNTAGGED_FDEFNS
4689 lispobj* pfdefn = code->constants + code_n_funs(code) * CODE_SLOTS_PER_SIMPLE_FUN;
4690 lispobj* end = pfdefn + code_n_named_calls(code);
4691 for ( ; pfdefn < end ; ++pfdefn)
4692 if (*pfdefn) CHECK(*pfdefn | OTHER_POINTER_LOWTAG, pfdefn);
4694 #endif
4695 for (i=2; i <nheader_words; ++i) CHECK(object[i], object+i);
4696 #ifndef NDEBUG // avoid "unused" warnings on auto vars of for_each_simple_fun()
4697 // Check the SIMPLE-FUN headers
4698 for_each_simple_fun(i, fheaderp, code, 1, {
4699 #if defined LISP_FEATURE_COMPACT_INSTANCE_HEADER
4700 lispobj __attribute__((unused)) layout = funinstance_layout((lispobj*)fheaderp);
4701 gc_assert(!layout || layout == LAYOUT_OF_FUNCTION);
4702 #elif defined LISP_FEATURE_64_BIT
4703 gc_assert((fheaderp->header >> 32) == 0);
4704 #endif
4706 #endif
4707 #if 0 // this looks redundant. It's checked with each pointer, no?
4708 bool rememberedp = header_rememberedp(code->header);
4709 /* The remembered set invariant is that an object is marked "written"
4710 * if and only if either it points to a younger object or is pointed
4711 * to by a register or stack. (The pointed-to case assumes that the
4712 * very next instruction on return from GC would store an old->young
4713 * pointer into that object). Non-compacting GC does not have the
4714 * "only if" part of that, nor does pre-GC verification because we
4715 * don't test the generation of the newval when storing into code. */
4716 if (is_in_static_space(object)) { }
4717 else if (compacting_p() && (state->flags & VERIFY_POST_GC) ?
4718 (state->min_pointee_gen < state->object_gen) != rememberedp :
4719 (state->min_pointee_gen < state->object_gen) && !rememberedp)
4720 lose("object @ %p is gen%d min_pointee=gen%d %s",
4721 (void*)state->tagged_object, state->object_gen, state->min_pointee_gen,
4722 rememberedp ? "written" : "not written");
4723 #endif
4724 return 0;
4726 if (widetag == SYMBOL_WIDETAG) {
4727 struct symbol* s = (void*)object;
4728 CHECK(s->value, &s->value);
4729 CHECK(s->fdefn, &s->fdefn);
4730 CHECK(s->info, &s->info);
4731 CHECK(decode_symbol_name(s->name), &s->name);
4732 return 0;
4734 if (widetag == FDEFN_WIDETAG) {
4735 struct fdefn* f = (void*)object;
4736 CHECK(f->name, &f->name);
4737 CHECK(f->fun, &f->fun);
4738 CHECK(decode_fdefn_rawfun(f), (lispobj*)&f->raw_addr);
4739 return 0;
4741 for (i=1; i<nwords; ++i) CHECK(object[i], object+i);
4742 return 0;
4745 static __attribute__((unused)) bool acceptable_filler_cons_p(lispobj* where)
4747 if (where[0] == 0 && where[1] == 0) return 1;
4748 // These "conses" can result from bignum multiplication-
4749 // trailing insigificant sign bits which get chopped.
4750 if (where[0] == (uword_t)-1 && where[1] == (uword_t)-1) return 1;
4751 if (where[0] == (uword_t)-1 && where[1] == 0) return 1;
4752 return 0;
4754 static int verify_range(lispobj* start, lispobj* end, struct verify_state* state)
4756 lispobj* where = start;
4757 if (state->flags & VERIFYING_GENERATIONAL && find_page_index(start)>=0) {
4758 page_index_t page = find_page_index(start);
4759 if (page_table[page].type == PAGE_TYPE_CONS)
4760 gc_assert(page_words_used(page) <= MAX_CONSES_PER_PAGE*CONS_SIZE);
4762 if ((state->flags & VERIFYING_UNFORMATTED)) {
4763 while (where < end) {
4764 if (*where != NO_TLS_VALUE_MARKER) {
4765 int result = verify_pointer(*where, where, state);
4766 if (result) return result;
4768 ++where;
4770 return 0;
4772 while (where < end) {
4773 int widetag = is_header(*where) ? header_widetag(*where) : LIST_POINTER_LOWTAG;
4774 /* Technically we should wait until after performing the widetag validity
4775 * tests before calling the sizer. Otherwise the lossage message would
4776 * not be as good as it could be. I guess that failure doesn't happen much */
4777 sword_t nwords = object_size(where);
4778 state->object_addr = where;
4779 state->object_header = is_cons_half(*where) ? 0 : *where;
4780 if (state->flags & VERIFYING_GENERATIONAL) {
4781 page_index_t pg = find_page_index(where);
4782 state->object_gen = pg >= 0 ? page_table[pg].gen :
4783 gc_gen_of((lispobj)where, ARTIFICIALLY_HIGH_GEN);
4784 #ifdef LISP_FEATURE_PPC64
4785 // Cons fillers (two words of all 1s) cause failure of
4786 // the default verification logic, so brute-force skip them
4787 // regardless of whether the page type is PAGE_TYPE_CONS.
4788 if (*where == (uword_t)-1 && where[1] == (uword_t)-1) {
4789 where +=2;
4790 continue;
4792 #endif
4793 if (widetag != FILLER_WIDETAG && pg >= 0) {
4794 // Assert proper page type
4795 if (state->object_header) // is not a cons
4796 gc_assert(page_table[pg].type != PAGE_TYPE_CONS);
4797 #ifdef LISP_FEATURE_USE_CONS_REGION
4798 else if (page_table[pg].type != PAGE_TYPE_CONS) {
4799 if (is_cons_half(where[0]))
4800 gc_assert(acceptable_filler_cons_p(where));
4802 #endif
4803 if (widetag == CODE_HEADER_WIDETAG) {
4804 if (!is_code(page_table[pg].type))
4805 lose("object @ %p is code on non-code page", where);
4806 } else if (widetag == FUNCALLABLE_INSTANCE_WIDETAG) {
4807 // where these reside depends on the architecture
4808 } else {
4809 if (is_code(page_table[pg].type))
4810 lose("object @ %p is non-code on code page", where);
4814 if (!state->object_header) {
4815 if (verify_pointer(where[0], where+0, state) ||
4816 verify_pointer(where[1], where+1, state)) break;
4817 } else if (widetag == FILLER_WIDETAG) { // do nothing
4818 } else if (!(other_immediate_lowtag_p(widetag) && LOWTAG_FOR_WIDETAG(widetag))) {
4819 lose("Unhandled widetag %"OBJ_FMTX" @ %p", *where, where);
4820 } else if (leaf_obj_widetag_p(widetag)) {
4821 #ifdef LISP_FEATURE_UBSAN
4822 if (specialized_vector_widetag_p(widetag)) {
4823 if (is_lisp_pointer(object[1])) {
4824 struct vector* bits = (void*)native_pointer(object[1]);
4825 if (header_widetag(bits->header) != SIMPLE_BIT_VECTOR_WIDETAG)
4826 lose("bad shadow bits for %p", where);
4827 gc_assert(header_widetag(bits->header) == SIMPLE_BIT_VECTOR_WIDETAG);
4828 gc_assert(vector_len(bits) >= vector_len((struct vector*)object));
4831 #endif
4832 bool strict_containment = state->flags & VERIFY_FINAL;
4833 if (strict_containment && gencgc_verbose && widetag == SAP_WIDETAG && where[1])
4834 fprintf(stderr, "\nStrange SAP %p -> %p\n", where, (void*)where[1]);
4835 } else {
4836 if (verify_headered_object(where, nwords, state)) break;
4838 where += nwords;
4840 return 0;
4843 static int verify(lispobj start, lispobj* end, struct verify_state* state, int flags)
4845 int savedflags = state->flags;
4846 state->flags |= flags;
4847 int result = verify_range((lispobj*)start, end, state);
4848 state->flags = savedflags;
4849 return result;
4852 extern void save_gc_crashdump(char *, lispobj*);
4853 /* Return the number of verification errors found.
4854 * You might want to use that as a deciding factor for dump the heap
4855 * to a file (which takes time, and generally isn't needed).
4856 * But if a particular verification fails, then do dump it */
4857 int verify_heap(__attribute__((unused)) lispobj* cur_thread_approx_stackptr,
4858 int flags)
4860 int verbose = gencgc_verbose | ((flags & VERIFY_VERBOSE) != 0);
4862 struct verify_state state;
4863 memset(&state, 0, sizeof state);
4864 state.flags = flags;
4866 if (verbose)
4867 fprintf(stderr,
4868 flags & VERIFY_PRE_GC ? "Verify before GC" :
4869 flags & VERIFY_POST_GC ? "Verify after GC(%d,%d)" :
4870 "Heap check", // if called at a random time
4871 (flags >> 1) & 7, // generation number
4872 flags & 1); // 'raise'
4873 else
4874 state.flags |= VERIFY_PRINT_HEADER_ON_FAILURE;
4876 #ifdef LISP_FEATURE_IMMOBILE_SPACE
4877 # ifdef __linux__
4878 // Try this verification if immobile-space was compiled with extra debugging.
4879 // But weak symbols don't work on macOS.
4880 extern void __attribute__((weak)) check_text_pages();
4881 if (&check_text_pages) check_text_pages();
4882 # endif
4883 if (verbose)
4884 fprintf(stderr, " [immobile]");
4885 if (verify(FIXEDOBJ_SPACE_START,
4886 fixedobj_free_pointer, &state,
4887 flags | VERIFYING_GENERATIONAL)) goto out;
4888 if (verify(TEXT_SPACE_START,
4889 text_space_highwatermark, &state,
4890 flags | VERIFYING_GENERATIONAL)) goto out;
4891 #endif
4892 struct thread *th;
4893 if (verbose)
4894 fprintf(stderr, " [threads]");
4895 state.object_addr = 0;
4896 state.object_gen = 0;
4897 for_each_thread(th) {
4898 if (verify((lispobj)th->binding_stack_start,
4899 (lispobj*)get_binding_stack_pointer(th), &state,
4900 VERIFYING_UNFORMATTED)) goto out;
4901 if (verify((lispobj)&th->lisp_thread,
4902 (lispobj*)(SymbolValue(FREE_TLS_INDEX,0) + (char*)th), &state,
4903 VERIFYING_UNFORMATTED))
4904 goto out;
4906 if (verbose)
4907 fprintf(stderr, " [RO]");
4908 if (verify(READ_ONLY_SPACE_START, read_only_space_free_pointer, &state, 0)) goto out;
4909 if (verbose)
4910 fprintf(stderr, " [static]");
4911 // Just don't worry about NIL, it's seldom the problem
4912 // if (verify(NIL_SYMBOL_SLOTS_START, (lispobj*)NIL_SYMBOL_SLOTS_END, &state, 0)) goto out;
4913 if (verify(STATIC_SPACE_OBJECTS_START, static_space_free_pointer, &state, 0)) goto out;
4914 if (verify(PERMGEN_SPACE_START, permgen_space_free_pointer, &state,0)) goto out;
4915 if (verbose)
4916 fprintf(stderr, " [dynamic]");
4917 state.flags |= VERIFYING_GENERATIONAL;
4918 walk_generation((uword_t(*)(lispobj*,lispobj*,uword_t))verify_range,
4919 -1, (uword_t)&state);
4920 if (verbose && state.nerrors==0) fprintf(stderr, " passed\n");
4921 out:
4922 if (state.nerrors && !(flags & VERIFY_DONT_LOSE)) {
4923 // dump_spaces(&state, "verify failed");
4924 lose("Verify failed: %d errors", state.nerrors);
4926 return state.nerrors;
4929 void gc_show_pte(lispobj obj)
4931 char marks[1+CARDS_PER_PAGE];
4932 page_index_t page = find_page_index((void*)obj);
4933 if (page>=0) {
4934 printf("page %"PAGE_INDEX_FMT" base %p gen %d type %x ss %p used %x",
4935 page, page_address(page), page_table[page].gen, page_table[page].type,
4936 page_scan_start(page), page_bytes_used(page));
4937 if (page_starts_contiguous_block_p(page)) printf(" startsblock");
4938 if (page_ends_contiguous_block_p(page, page_table[page].gen)) printf(" endsblock");
4939 printf(" (%s)\n", page_card_mark_string(page, marks));
4940 return;
4942 #ifdef LISP_FEATURE_IMMOBILE_SPACE
4943 page = find_text_page_index((void*)obj);
4944 if (page>=0) {
4945 lispobj* text_page_scan_start(low_page_index_t page);
4946 int gens = text_page_genmask[page];
4947 char genstring[9];
4948 int i;
4949 for (i=0;i<8;++i) genstring[i] = (gens & (1<<i)) ? '0'+i : '-';
4950 genstring[8] = 0;
4951 printf("page %d (v) base %p gens %s ss=%p%s\n",
4952 (int)page, text_page_address(page), genstring,
4953 text_page_scan_start(page),
4954 card_markedp((void*)obj)?"":" WP");
4955 return;
4957 page = find_fixedobj_page_index((void*)obj);
4958 if (page>=0) {
4959 printf("page %d (f) align %d gens %x%s\n", (int)page,
4960 fixedobj_pages[page].attr.parts.obj_align,
4961 fixedobj_pages[page].attr.parts.gens_,
4962 card_markedp((void*)obj)?"": " WP");
4963 return;
4965 #endif
4966 printf("not in GC'ed space\n");
4969 static int count_immobile_objects(__attribute__((unused)) int gen, int res[4])
4971 #ifdef LISP_FEATURE_IMMOBILE_SPACE
4972 lispobj* where = (lispobj*)FIXEDOBJ_SPACE_START;
4973 lispobj* end = fixedobj_free_pointer;
4974 while (where < end) {
4975 if (immobile_obj_generation(where) == gen) {
4976 switch (widetag_of(where)) {
4977 case INSTANCE_WIDETAG: ++res[0]; break;
4978 case FDEFN_WIDETAG: ++res[1]; break;
4979 case SYMBOL_WIDETAG: ++res[2]; break;
4982 where += object_size(where);
4984 where = (lispobj*)TEXT_SPACE_START;
4985 end = text_space_highwatermark;
4986 while (where < end) {
4987 if (widetag_of(where) != FILLER_WIDETAG && immobile_obj_generation(where) == gen)
4988 ++res[3];
4989 where += object_size(where);
4991 #endif
4992 return (res[0] | res[1] | res[2] | res[3]) != 0;
4995 /* Count the number of pages in the given generation.
4996 * Additionally, if 'n_dirty' is non-NULL, then assign
4997 * into *n_dirty the count of marked pages.
4999 page_index_t
5000 count_generation_pages(generation_index_t generation, page_index_t* n_dirty)
5002 page_index_t i, total = 0, dirty = 0;
5003 int j;
5005 for (i = 0; i < next_free_page; i++)
5006 if (!page_free_p(i) && (page_table[i].gen == generation)) {
5007 total++;
5008 long card = page_to_card_index(i);
5009 for (j=0; j<CARDS_PER_PAGE; ++j, ++card)
5010 if (card_dirtyp(card)) ++dirty;
5012 // divide by cards per page rounding up
5013 if (n_dirty) *n_dirty = (dirty + (CARDS_PER_PAGE-1)) / CARDS_PER_PAGE;
5014 return total;
5017 // You can call this with 0 and NULL to perform its assertions silently
5018 void gc_gen_report_to_file(int filedes, FILE *file)
5020 #ifdef LISP_FEATURE_X86
5021 extern void fpu_save(void *), fpu_restore(void *);
5022 int fpu_state[27];
5024 /* Can end up here after calling alloc_tramp which doesn't prepare
5025 * the x87 state, and the C ABI uses a different mode */
5026 fpu_save(fpu_state);
5027 #endif
5029 #define OUTPUT(str, len) \
5030 {if (file) fwrite(str, 1, len, file); if (filedes>=0) ignore_value(write(filedes, str, len));}
5032 /* Print the heap stats. */
5033 char header1[] =
5034 " Immobile Object Counts\n";
5035 OUTPUT(header1, sizeof header1-1);
5036 char header2[] =
5037 " Gen layout fdefn symbol code Boxed Cons Raw Code SmMix Mixed LgRaw LgCode LgMix"
5038 " Waste% Alloc Trig Dirty GCs Mem-age\n";
5039 OUTPUT(header2, sizeof header2-1);
5041 generation_index_t gen_num, begin, end;
5042 int immobile_matrix[8][4], have_immobile_obj = 0;
5043 int immobile_totals[4];
5044 memset(immobile_matrix, 0, sizeof immobile_matrix);
5045 memset(immobile_totals, 0, sizeof immobile_totals);
5046 for (gen_num = 0; gen_num <= 6; ++gen_num) {
5047 if (count_immobile_objects(gen_num, immobile_matrix[gen_num]))
5048 have_immobile_obj |= 1 << gen_num;
5049 immobile_totals[0] += immobile_matrix[gen_num][0];
5050 immobile_totals[1] += immobile_matrix[gen_num][1];
5051 immobile_totals[2] += immobile_matrix[gen_num][2];
5052 immobile_totals[3] += immobile_matrix[gen_num][3];
5054 // Print from the lowest gen that has any allocated pages.
5055 for (begin = 0; begin <= PSEUDO_STATIC_GENERATION; ++begin)
5056 if ((have_immobile_obj>>begin)&1 || generations[begin].bytes_allocated) break;
5057 // Print up to and including the highest gen that has any allocated pages.
5058 for (end = SCRATCH_GENERATION; end >= 0; --end)
5059 if (generations[end].bytes_allocated) break;
5061 char linebuf[180];
5062 page_index_t coltot[9];
5063 uword_t eden_words_allocated = 0;
5064 page_index_t eden_pages = 0;
5065 memset(coltot, 0, sizeof coltot);
5066 for (gen_num = begin; gen_num <= end; gen_num++) {
5067 page_index_t page;
5068 page_index_t pagect[9];
5069 int *objct = immobile_matrix[gen_num];
5070 memset(pagect, 0, sizeof pagect);
5071 if (gen_num == 0) { // Count the eden pages
5072 for (page = 0; page < next_free_page; page++)
5073 if (page_table[page].gen == 0 && page_table[page].type & THREAD_PAGE_FLAG) {
5074 int column;
5075 switch (page_table[page].type & ~THREAD_PAGE_FLAG) {
5076 case PAGE_TYPE_BOXED: column = 0; break;
5077 case PAGE_TYPE_CONS: column = 1; break;
5078 case PAGE_TYPE_CODE: column = 3; break;
5079 case PAGE_TYPE_MIXED: column = 5; break;
5080 default: lose("Bad eden page subtype: %x\n", page_table[page].type);
5082 pagect[column]++;
5083 coltot[column]++;
5084 ++eden_pages;
5085 eden_words_allocated += page_words_used(page);
5087 uword_t waste = npage_bytes(eden_pages) - (eden_words_allocated<<WORD_SHIFT);
5088 double pct_waste = eden_pages > 0 ?
5089 (double)waste / (double)npage_bytes(eden_pages) * 100 : 0.0;
5090 if (eden_pages) {
5091 int linelen = snprintf(linebuf, sizeof linebuf,
5092 " E %6d %6d %6d %6d %7"PAGE_INDEX_FMT"%7"PAGE_INDEX_FMT"%14"PAGE_INDEX_FMT
5093 "%14"PAGE_INDEX_FMT
5094 "%28.1f %11"OS_VM_SIZE_FMT"\n",
5095 objct[0], objct[1], objct[2], objct[3],
5096 pagect[0], pagect[1], pagect[3], pagect[5],
5097 pct_waste, eden_words_allocated<<WORD_SHIFT);
5098 OUTPUT(linebuf, linelen);
5100 memset(pagect, 0, sizeof pagect);
5102 uword_t words_allocated = 0;
5103 page_index_t tot_pages = 0;
5104 for (page = 0; page < next_free_page; page++)
5105 if (!page_free_p(page) && page_table[page].gen == gen_num
5106 && !(page_table[page].type & THREAD_PAGE_FLAG)) {
5107 int column;
5108 switch (page_table[page].type & (SINGLE_OBJECT_FLAG|PAGE_TYPE_MASK)) {
5109 case PAGE_TYPE_BOXED: column = 0; break;
5110 case PAGE_TYPE_CONS: column = 1; break;
5111 case PAGE_TYPE_UNBOXED: column = 2; break;
5112 case PAGE_TYPE_CODE: column = 3; break;
5113 case PAGE_TYPE_SMALL_MIXED: column = 4; break;
5114 case PAGE_TYPE_MIXED: column = 5; break;
5115 case SINGLE_OBJECT_FLAG|PAGE_TYPE_UNBOXED: column = 6; break;
5116 case SINGLE_OBJECT_FLAG|PAGE_TYPE_CODE: column = 7; break;
5117 case SINGLE_OBJECT_FLAG|PAGE_TYPE_MIXED: column = 8; break;
5118 default: lose("Invalid page type %#x (p%"PAGE_INDEX_FMT")", page_table[page].type, page);
5120 pagect[column]++;
5121 coltot[column]++;
5122 ++tot_pages;
5123 words_allocated += page_words_used(page);
5125 struct generation* gen = &generations[gen_num];
5126 if (gen_num == 0)
5127 gc_assert(gen->bytes_allocated ==
5128 (words_allocated+eden_words_allocated) << WORD_SHIFT);
5129 /* private-cons.inc doesn't update bytes_allocated */
5131 else {
5132 gc_assert(gen->bytes_allocated == words_allocated << WORD_SHIFT);
5135 page_index_t n_dirty;
5136 count_generation_pages(gen_num, &n_dirty);
5137 uword_t waste = npage_bytes(tot_pages) - (words_allocated<<WORD_SHIFT);
5138 double pct_waste = tot_pages > 0 ?
5139 (double)waste / (double)npage_bytes(tot_pages) * 100 : 0.0;
5140 int linelen =
5141 snprintf(linebuf, sizeof linebuf,
5142 " %d %6d %6d %6d %6d"
5143 "%7"PAGE_INDEX_FMT"%7"PAGE_INDEX_FMT"%7"PAGE_INDEX_FMT"%7"PAGE_INDEX_FMT
5144 "%7"PAGE_INDEX_FMT"%7"PAGE_INDEX_FMT"%7"PAGE_INDEX_FMT"%7"PAGE_INDEX_FMT
5145 "%7"PAGE_INDEX_FMT" %6.1f %11"OS_VM_SIZE_FMT" %11"OS_VM_SIZE_FMT,
5146 gen_num, objct[0], objct[1], objct[2], objct[3],
5147 pagect[0], pagect[1], pagect[2], pagect[3], pagect[4], pagect[5],
5148 pagect[6], pagect[7], pagect[8],
5149 pct_waste, words_allocated<<WORD_SHIFT,
5150 (uintptr_t)gen->gc_trigger);
5151 // gen0 pages are never WPed
5152 linelen += snprintf(linebuf+linelen, sizeof linebuf-linelen,
5153 gen_num==0?" -" : " %7"PAGE_INDEX_FMT, n_dirty);
5154 linelen += snprintf(linebuf+linelen, sizeof linebuf-linelen,
5155 " %3d %7.4f\n", gen->num_gc, generation_average_age(gen_num));
5156 OUTPUT(linebuf, linelen);
5158 page_index_t tot_pages = coltot[0] + coltot[1] + coltot[2] + coltot[3] + coltot[4] +
5159 coltot[5] + coltot[6] + coltot[7] + coltot[8];
5160 uword_t waste = npage_bytes(tot_pages) - bytes_allocated;
5161 double pct_waste = (double)waste / (double)npage_bytes(tot_pages) * 100;
5162 double heap_use_frac = 100 * (double)bytes_allocated / (double)dynamic_space_size;
5163 int *objct = immobile_totals;
5164 int linelen =
5165 snprintf(linebuf, sizeof linebuf,
5166 "Tot %6d %6d %6d %6d"
5167 "%7"PAGE_INDEX_FMT"%7"PAGE_INDEX_FMT"%7"PAGE_INDEX_FMT"%7"PAGE_INDEX_FMT
5168 "%7"PAGE_INDEX_FMT"%7"PAGE_INDEX_FMT"%7"PAGE_INDEX_FMT"%7"PAGE_INDEX_FMT
5169 "%7"PAGE_INDEX_FMT" %6.1f%12"OS_VM_SIZE_FMT
5170 " [%.1f%% of %"OS_VM_SIZE_FMT" max]\n",
5171 objct[0], objct[1], objct[2], objct[3],
5172 coltot[0], coltot[1], coltot[2], coltot[3], coltot[4], coltot[5], coltot[6],
5173 coltot[7], coltot[8], pct_waste,
5174 (uintptr_t)bytes_allocated, heap_use_frac, (uintptr_t)dynamic_space_size);
5175 OUTPUT(linebuf, linelen);
5176 #undef OUTPUT
5178 #ifdef LISP_FEATURE_X86
5179 fpu_restore(fpu_state);
5180 #endif