From e2caf0e77d5adc5ddec07633ec1c9ceaae464acf Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Wed, 8 Nov 2017 10:56:06 -0800 Subject: [PATCH] libc and pthreads - Fix atfork issues with nmalloc, update dmalloc * Implement atfork handling for nmalloc. As part of this, refactor some of nmalloc. * Remove ZERO_LENGTH_PTR from nmalloc. Instead, force 0-byte allocations to allocate 1 byte. The standard requires unique pointers to be returned. * For now go back to a single depot lock instead of a per-zone lock. It is unclear whether multi-threaded performance will suffer or not, but its the only way to implement atfork handling. * Implement proper atfork interlocks for nmalloc via pthreads to avoid corruption when heavily threaded programs call fork(). * Bring dmalloc up to date in various ways, including properly implementing a minimum 16-byte alignment for allocations >= 16 bytes, and atfork handling. Also use a global depot lock for the same reason we use it in nmalloc, and implement a front-end magazine shortcut for any allocations <= 2MB. Reported-by: mneumann --- lib/libc/include/libc_private.h | 3 + lib/libc/stdlib/Symbol.map | 3 + lib/libc/stdlib/dmalloc.c | 365 ++++++++++++++++++++++++---------- lib/libc/stdlib/nmalloc.c | 62 ++++-- lib/libthread_xu/thread/Makefile.inc | 1 + lib/libthread_xu/thread/thr_init.c | 1 + lib/libthread_xu/thread/thr_malloc.c | 54 +++++ lib/libthread_xu/thread/thr_private.h | 1 + 8 files changed, 368 insertions(+), 122 deletions(-) create mode 100644 lib/libthread_xu/thread/thr_malloc.c diff --git a/lib/libc/include/libc_private.h b/lib/libc/include/libc_private.h index efcf0ab623..70152b477d 100644 --- a/lib/libc/include/libc_private.h +++ b/lib/libc/include/libc_private.h @@ -92,6 +92,9 @@ extern void (*__cleanup)(void); /* execve() with PATH processing to implement posix_spawnp() */ int _execvpe(const char *, char * const *, char * const *); void _nmalloc_thr_init(void); +void _nmalloc_thr_prepfork(void); +void _nmalloc_thr_parentfork(void); +void _nmalloc_thr_childfork(void); struct dl_phdr_info; int __elf_phdr_match_addr(struct dl_phdr_info *, void *); diff --git a/lib/libc/stdlib/Symbol.map b/lib/libc/stdlib/Symbol.map index 526e8ac8be..f4f453adb6 100644 --- a/lib/libc/stdlib/Symbol.map +++ b/lib/libc/stdlib/Symbol.map @@ -3,6 +3,9 @@ DF404.0 { __cxa_finalize; _Exit; _nmalloc_thr_init; + _nmalloc_thr_prepfork; + _nmalloc_thr_parentfork; + _nmalloc_thr_childfork; _system; a64l; abort; diff --git a/lib/libc/stdlib/dmalloc.c b/lib/libc/stdlib/dmalloc.c index 5976336638..f142a98f25 100644 --- a/lib/libc/stdlib/dmalloc.c +++ b/lib/libc/stdlib/dmalloc.c @@ -1,7 +1,7 @@ /* * DMALLOC.C - Dillon's malloc * - * Copyright (c) 2011 The DragonFly Project. All rights reserved. + * Copyright (c) 2011,2017 The DragonFly Project. All rights reserved. * * This code is derived from software contributed to The DragonFly Project * by Matthew Dillon . @@ -34,16 +34,11 @@ * SUCH DAMAGE. */ /* - * This module implements a modified slab allocator drop-in replacement for - * the libc malloc(). The slab algorithm has been adjusted to support dynamic - * sizing of slabs which effectively allows slabs to be used for allocations of - * any size. Because of this we neither have a small-block allocator or a - * big-block allocator and the code paths are simplified to the point where - * allocations, caching, and freeing, is screaming fast. - * - * There is very little interaction between threads. A global depot accessed - * via atomic cmpxchg instructions (only! no spinlocks!) is used as a - * catch-all and to deal with thread exits and such. + * This module implements a modified slab allocator as a drop-in replacement + * for the libc malloc(). The slab algorithm has been adjusted to support + * dynamic sizing of slabs which effectively allows slabs to be used for + * allocations of any size. Because of this we neither have a small-block + * allocator or a big-block allocator and the code paths are simplified. * * To support dynamic slab sizing available user virtual memory is broken * down into ~1024 regions. Each region has fixed slab size whos value is @@ -66,7 +61,7 @@ * 8192-16383 1024 8 * 16384-32767 2048 8 * 32768-65535 4096 8 - * ... continues unlimited ... 4 zones + * ... continues forever ... 4 zones * * For a 2^63 memory space each doubling >= 64K is broken down into * 4 chunking zones, so we support 88 + (48 * 4) = 280 zones. @@ -82,6 +77,9 @@ * + realloc will reuse the passed pointer if possible, within the * limitations of the zone chunking. * + * On top of th slab allocator we also implement a 16-entry-per-thread + * magazine cache for allocations <= NOMSLABSIZE. + * * FUTURE FEATURES * * + [better] garbage collection @@ -234,24 +232,31 @@ struct slab { int flags; region_t region; /* related region */ char *chunks; /* chunk base */ - slglobaldata_t slgd; + slglobaldata_t slgd; /* localized to thread else NULL */ }; /* - * per-thread data + * per-thread data + global depot + * + * NOTE: The magazine shortcut is only used for per-thread data. */ +#define NMAGSHORTCUT 16 + struct slglobaldata { + spinlock_t lock; /* only used by slglobaldepot */ struct zoneinfo { slab_t avail_base; slab_t empty_base; int best_region; + int mag_index; + int avail_count; int empty_count; + void *mag_shortcut[NMAGSHORTCUT]; } zone[NZONES]; - struct slab_list full_zones; /* via entry */ + struct slab_list full_zones; /* via entry */ int masked; int biggest_index; size_t nslabs; - slglobaldata_t sldepot; }; #define SLAB_ZEROD 0x0001 @@ -288,14 +293,22 @@ struct slglobaldata { #exp, __func__); \ } while (0) -/* With this attribute set, do not require a function call for accessing - * this variable when the code is compiled -fPIC */ +/* + * With this attribute set, do not require a function call for accessing + * this variable when the code is compiled -fPIC. + * + * Must be empty for libc_rtld (similar to __thread) + */ +#if defined(__LIBC_RTLD) +#define TLS_ATTRIBUTE +#else #define TLS_ATTRIBUTE __attribute__ ((tls_model ("initial-exec"))); +#endif static __thread struct slglobaldata slglobal TLS_ATTRIBUTE; static pthread_key_t thread_malloc_key; static pthread_once_t thread_malloc_once = PTHREAD_ONCE_INIT; -static struct slglobaldata sldepots[NDEPOTS]; +static struct slglobaldata slglobaldepot; static int opt_madvise = 0; static int opt_free = 0; @@ -303,7 +316,6 @@ static int opt_cache = 4; static int opt_utrace = 0; static int g_malloc_flags = 0; static int malloc_panic; -static int malloc_started = 0; static const int32_t weirdary[16] = { WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR, @@ -355,18 +367,8 @@ static void malloc_init(void) { const char *p = NULL; - static spinlock_t malloc_init_lock; - - if (malloc_started) - return; - if (__isthreaded) { - _SPINLOCK(&malloc_init_lock); - if (malloc_started) { - _SPINUNLOCK(&malloc_init_lock); - return; - } - } + TAILQ_INIT(&slglobal.full_zones); Regions[0].mask = -1; /* disallow activity in lowest region */ @@ -412,11 +414,6 @@ malloc_init(void) } UTRACE((void *) -1, 0, NULL); - _nmalloc_thr_init(); - malloc_started = 1; - - if (__isthreaded) - _SPINUNLOCK(&malloc_init_lock); } /* @@ -434,13 +431,8 @@ void _nmalloc_thr_init(void) { static int did_init; - static int SLGI; - int slgi; - slgi = SLGI++; - cpu_ccfence(); TAILQ_INIT(&slglobal.full_zones); - slglobal.sldepot = &sldepots[slgi & (NDEPOTS - 1)]; if (slglobal.masked) return; @@ -454,6 +446,27 @@ _nmalloc_thr_init(void) slglobal.masked = 0; } +void +_nmalloc_thr_prepfork(void) +{ + if (__isthreaded) + _SPINLOCK(&slglobaldepot.lock); +} + +void +_nmalloc_thr_parentfork(void) +{ + if (__isthreaded) + _SPINUNLOCK(&slglobaldepot.lock); +} + +void +_nmalloc_thr_childfork(void) +{ + if (__isthreaded) + _SPINUNLOCK(&slglobaldepot.lock); +} + /* * Called just once */ @@ -476,19 +489,34 @@ static void _nmalloc_thr_destructor(void *thrp) { slglobaldata_t slgd = thrp; + struct zoneinfo *zinfo; slab_t slab; + void *ptr; int i; + int j; slgd->masked = 1; for (i = 0; i <= slgd->biggest_index; i++) { - while ((slab = slgd->zone[i].empty_base) != NULL) { - slgd->zone[i].empty_base = slab->next; + zinfo = &slgd->zone[i]; + + while ((j = zinfo->mag_index) > 0) { + --j; + ptr = zinfo->mag_shortcut[j]; + zinfo->mag_shortcut[j] = NULL; /* SAFETY */ + zinfo->mag_index = j; + memfree(ptr, 0); + } + + while ((slab = zinfo->empty_base) != NULL) { + zinfo->empty_base = slab->next; + --zinfo->empty_count; slabterm(slgd, slab); } - while ((slab = slgd->zone[i].avail_base) != NULL) { - slgd->zone[i].avail_base = slab->next; + while ((slab = zinfo->avail_base) != NULL) { + zinfo->avail_base = slab->next; + --zinfo->avail_count; slabterm(slgd, slab); } @@ -502,6 +530,9 @@ _nmalloc_thr_destructor(void *thrp) /* * Calculate the zone index for the allocation request size and set the * allocation request size to that particular zone's chunk size. + * + * Minimum alignment is 16 bytes for allocations >= 16 bytes to conform + * with malloc requirements for intel/amd. */ static __inline int zoneindex(size_t *bytes, size_t *chunking) @@ -512,9 +543,15 @@ zoneindex(size_t *bytes, size_t *chunking) int i; if (n < 128) { - *bytes = n = (n + 7) & ~7; - *chunking = 8; - return(n / 8); /* 8 byte chunks, 16 zones */ + if (n < 16) { + *bytes = n = (n + 7) & ~7; + *chunking = 8; + return(n / 8 - 1); /* 8 byte chunks, 2 zones */ + } else { + *bytes = n = (n + 15) & ~15; + *chunking = 16; + return(n / 16 + 2); /* 16 byte chunks, 8 zones */ + } } if (n < 4096) { x = 256; @@ -607,7 +644,7 @@ zoneindex(size_t *bytes, size_t *chunking) * malloc() - call internal slab allocator */ void * -malloc(size_t size) +__malloc(size_t size) { void *ptr; @@ -623,7 +660,7 @@ malloc(size_t size) * calloc() - call internal slab allocator */ void * -calloc(size_t number, size_t size) +__calloc(size_t number, size_t size) { void *ptr; @@ -643,7 +680,7 @@ calloc(size_t number, size_t size) * zone. */ void * -realloc(void *ptr, size_t size) +__realloc(void *ptr, size_t size) { void *ret; @@ -669,7 +706,7 @@ realloc(void *ptr, size_t size) * matching the requirements. */ int -posix_memalign(void **memptr, size_t alignment, size_t size) +__posix_memalign(void **memptr, size_t alignment, size_t size) { /* * OpenGroup spec issue 6 checks @@ -700,7 +737,7 @@ posix_memalign(void **memptr, size_t alignment, size_t size) * free() (SLAB ALLOCATOR) - do the obvious */ void -free(void *ptr) +__free(void *ptr) { if (ptr) { UTRACE(ptr, 0, 0); @@ -727,12 +764,9 @@ memalloc(size_t size, int flags) #ifdef INVARIANTS int i; #endif - size_t off; + int j; char *obj; - if (!malloc_started) - malloc_init(); - /* * If 0 bytes is requested we have to return a unique pointer, allocate * at least one byte. @@ -750,13 +784,26 @@ memalloc(size_t size, int flags) return(NULL); /* + * Try magazine shortcut first + */ + slgd = &slglobal; + zinfo = &slgd->zone[zi]; + + if ((j = zinfo->mag_index) != 0) { + zinfo->mag_index = --j; + obj = zinfo->mag_shortcut[j]; + zinfo->mag_shortcut[j] = NULL; /* SAFETY */ + if (flags & SAFLAG_ZERO) + bzero(obj, size); + return obj; + } + + /* * Locate a slab with available space. If no slabs are available * back-off to the empty list and if we still come up dry allocate * a new slab (which will try the depot first). */ retry: - slgd = &slglobal; - zinfo = &slgd->zone[zi]; if ((slab = zinfo->avail_base) == NULL) { if ((slab = zinfo->empty_base) == NULL) { /* @@ -767,6 +814,7 @@ retry: return(NULL); slab->next = zinfo->avail_base; zinfo->avail_base = slab; + ++zinfo->avail_count; slab->state = AVAIL; if (slgd->biggest_index < zi) slgd->biggest_index = zi; @@ -778,6 +826,7 @@ retry: zinfo->empty_base = slab->next; slab->next = zinfo->avail_base; zinfo->avail_base = slab; + ++zinfo->avail_count; slab->state = AVAIL; --zinfo->empty_count; } @@ -809,6 +858,7 @@ retry: */ if (slab->navail == 0) { zinfo->avail_base = slab->next; + --zinfo->avail_count; slab->state = FULL; TAILQ_INSERT_TAIL(&slgd->full_zones, slab, entry); goto retry; @@ -920,9 +970,9 @@ memfree(void *ptr, int flags) slab_t slab; slab_t stmp; slab_t *slabp; - char *obj; int bmi; int bno; + int j; u_long *bmp; /* @@ -948,7 +998,28 @@ memfree(void *ptr, int flags) else bcopy(weirdary, ptr, sizeof(weirdary)); #endif + slgd = &slglobal; + + /* + * Use mag_shortcut[] when possible + */ + if (slgd->masked == 0 && slab->chunk_size <= NOMSLABSIZE) { + struct zoneinfo *zinfo; + zinfo = &slgd->zone[slab->zone_index]; + j = zinfo->mag_index; + if (j < NMAGSHORTCUT) { + zinfo->mag_shortcut[j] = ptr; + zinfo->mag_index = j + 1; + return; + } + } + + /* + * Free to slab and increment navail. We can delay incrementing + * navail to prevent the slab from being destroyed out from under + * us while we do other optimizations. + */ bno = ((uintptr_t)ptr - (uintptr_t)slab->chunks) / slab->chunk_size; bmi = bno >> LONG_BITS_SHIFT; bno &= (LONG_BITS - 1); @@ -957,15 +1028,16 @@ memfree(void *ptr, int flags) MASSERT(bmi >= 0 && bmi < slab->nmax); MASSERT((*bmp & (1LU << bno)) == 0); atomic_set_long(bmp, 1LU << bno); - atomic_add_int(&slab->navail, 1); - /* - * We can only do the following if we own the slab - */ - slgd = &slglobal; if (slab->slgd == slgd) { + /* + * We can only do the following if we own the slab. Note + * that navail can be incremented by any thread even if + * we own the slab. + */ struct zoneinfo *zinfo; + atomic_add_int(&slab->navail, 1); if (slab->free_index > bmi) { slab->free_index = bmi; slab->free_bit = bno; @@ -976,22 +1048,26 @@ memfree(void *ptr, int flags) zinfo = &slgd->zone[slab->zone_index]; /* - * Freeing an object from a full slab will move it to the - * available list. If the available list already has a - * slab we terminate the full slab instead, moving it to - * the depot. + * Freeing an object from a full slab makes it less than + * full. The slab must be moved to the available list. + * + * If the available list has too many slabs, release some + * to the depot. */ if (slab->state == FULL) { TAILQ_REMOVE(&slgd->full_zones, slab, entry); - if (zinfo->avail_base == NULL) { - slab->state = AVAIL; - stmp = zinfo->avail_base; - slab->next = stmp; - zinfo->avail_base = slab; - } else { + slab->state = AVAIL; + stmp = zinfo->avail_base; + slab->next = stmp; + zinfo->avail_base = slab; + ++zinfo->avail_count; + while (zinfo->avail_count > opt_cache) { + slab = zinfo->avail_base; + zinfo->avail_base = slab->next; + --zinfo->avail_count; slabterm(slgd, slab); - goto done; } + goto done; } /* @@ -1007,6 +1083,7 @@ memfree(void *ptr, int flags) while ((stmp = *slabp) != slab) slabp = &stmp->next; *slabp = slab->next; + --zinfo->avail_count; if (opt_free || opt_cache == 0) { /* @@ -1044,6 +1121,63 @@ memfree(void *ptr, int flags) zinfo->empty_base = slab; } } + } else if (slab->slgd == NULL && slab->navail + 1 == slab->nmax) { + slglobaldata_t sldepot; + + /* + * If freeing to a slab owned by the global depot, and + * the slab becomes completely EMPTY, try to move it to + * the correct list. + */ + sldepot = &slglobaldepot; + if (__isthreaded) + _SPINLOCK(&sldepot->lock); + if (slab->slgd == NULL && slab->navail + 1 == slab->nmax) { + struct zoneinfo *zinfo; + + /* + * Move the slab to the empty list + */ + MASSERT(slab->state == AVAIL); + atomic_add_int(&slab->navail, 1); + zinfo = &sldepot->zone[slab->zone_index]; + slabp = &zinfo->avail_base; + while (slab != *slabp) + slabp = &(*slabp)->next; + *slabp = slab->next; + --zinfo->avail_count; + + /* + * Clean out excessive empty entries from the + * depot. + */ + slab->state = EMPTY; + slab->next = zinfo->empty_base; + zinfo->empty_base = slab; + ++zinfo->empty_count; + while (zinfo->empty_count > opt_cache) { + slab = zinfo->empty_base; + zinfo->empty_base = slab->next; + --zinfo->empty_count; + slab->state = UNKNOWN; + if (__isthreaded) + _SPINUNLOCK(&sldepot->lock); + slabfree(slab); + if (__isthreaded) + _SPINLOCK(&sldepot->lock); + } + } else { + atomic_add_int(&slab->navail, 1); + } + if (__isthreaded) + _SPINUNLOCK(&sldepot->lock); + } else { + /* + * We can't act on the slab other than by adjusting navail + * (and the bitmap which we did in the common code at the + * top). + */ + atomic_add_int(&slab->navail, 1); } done: ; @@ -1061,7 +1195,6 @@ slaballoc(int zi, size_t chunking, size_t chunk_size) region_t region; void *save; slab_t slab; - slab_t stmp; size_t slab_desire; size_t slab_size; size_t region_mask; @@ -1083,20 +1216,24 @@ slaballoc(int zi, size_t chunking, size_t chunk_size) * resulting in a large VSZ. */ slgd = &slglobal; - sldepot = slgd->sldepot; + sldepot = &slglobaldepot; zinfo = &sldepot->zone[zi]; - while ((slab = zinfo->avail_base) != NULL) { - if ((void *)slab == LOCKEDPTR) { - cpu_pause(); - continue; - } - if (atomic_cmpset_ptr(&zinfo->avail_base, slab, LOCKEDPTR)) { + if (zinfo->avail_base) { + if (__isthreaded) + _SPINLOCK(&sldepot->lock); + slab = zinfo->avail_base; + if (slab) { MASSERT(slab->slgd == NULL); slab->slgd = slgd; zinfo->avail_base = slab->next; - return(slab); + --zinfo->avail_count; + if (__isthreaded) + _SPINUNLOCK(&sldepot->lock); + return slab; } + if (__isthreaded) + _SPINUNLOCK(&sldepot->lock); } /* @@ -1107,7 +1244,9 @@ slaballoc(int zi, size_t chunking, size_t chunk_size) /* * Calculate the start of the data chunks relative to the start - * of the slab. + * of the slab. If chunk_size is a power of 2 we guarantee + * power of 2 alignment. If it is not we guarantee alignment + * to the chunk size. */ if ((chunk_size ^ (chunk_size - 1)) == (chunk_size << 1) - 1) { ispower2 = 1; @@ -1410,40 +1549,48 @@ slabfree(slab_t slab) static void slabterm(slglobaldata_t slgd, slab_t slab) { - slglobaldata_t sldepot = slgd->sldepot; + slglobaldata_t sldepot; struct zoneinfo *zinfo; - slab_t dnext; int zi = slab->zone_index; slab->slgd = NULL; --slgd->nslabs; + sldepot = &slglobaldepot; zinfo = &sldepot->zone[zi]; /* - * If the slab can be freed and the depot is either locked or not - * empty, then free the slab. + * Move the slab to the avail list or the empty list. */ - if (slab->navail == slab->nmax && zinfo->avail_base) { - slab->state = UNKNOWN; - slabfree(slab); - return; + if (__isthreaded) + _SPINLOCK(&sldepot->lock); + if (slab->navail == slab->nmax) { + slab->state = EMPTY; + slab->next = zinfo->empty_base; + zinfo->empty_base = slab; + ++zinfo->empty_count; + } else { + slab->state = AVAIL; + slab->next = zinfo->avail_base; + zinfo->avail_base = slab; + ++zinfo->avail_count; } - slab->state = AVAIL; /* - * Link the slab into the depot + * Clean extra slabs out of the empty list */ - for (;;) { - dnext = zinfo->avail_base; - cpu_ccfence(); - if ((void *)dnext == LOCKEDPTR) { - cpu_pause(); - continue; - } - slab->next = dnext; - if (atomic_cmpset_ptr(&zinfo->avail_base, dnext, slab)) - break; + while (zinfo->empty_count > opt_cache) { + slab = zinfo->empty_base; + zinfo->empty_base = slab->next; + --zinfo->empty_count; + slab->state = UNKNOWN; + if (__isthreaded) + _SPINUNLOCK(&sldepot->lock); + slabfree(slab); + if (__isthreaded) + _SPINLOCK(&sldepot->lock); } + if (__isthreaded) + _SPINUNLOCK(&sldepot->lock); } /* @@ -1541,3 +1688,9 @@ _mpanic(const char *ctl, ...) } abort(); } + +__weak_reference(__malloc, malloc); +__weak_reference(__calloc, calloc); +__weak_reference(__posix_memalign, posix_memalign); +__weak_reference(__realloc, realloc); +__weak_reference(__free, free); diff --git a/lib/libc/stdlib/nmalloc.c b/lib/libc/stdlib/nmalloc.c index 75b7a00e2e..59b0789850 100644 --- a/lib/libc/stdlib/nmalloc.c +++ b/lib/libc/stdlib/nmalloc.c @@ -213,8 +213,6 @@ typedef struct slglobaldata { * WARNING: A limited number of spinlocks are available, BIGXSIZE should * not be larger then 64. */ -#define ZERO_LENGTH_PTR ((void *)&malloc_dummy_pointer) - #define BIGHSHIFT 10 /* bigalloc hash table */ #define BIGHSIZE (1 << BIGHSHIFT) #define BIGHMASK (BIGHSIZE - 1) @@ -276,6 +274,7 @@ struct magazine { SLIST_HEAD(magazinelist, magazine); static spinlock_t zone_mag_lock; +static spinlock_t depot_spinlock; static struct magazine zone_magazine = { .flags = M_BURST | M_BURST_EARLY, .capacity = M_ZONE_ROUNDS, @@ -314,8 +313,9 @@ typedef struct thr_mags { /* * With this attribute set, do not require a function call for accessing - * this variable when the code is compiled -fPIC. Empty for libc_rtld - * (like __thread). + * this variable when the code is compiled -fPIC. + * + * Must be empty for libc_rtld (similar to __thread). */ #ifdef __LIBC_RTLD #define TLS_ATTRIBUTE @@ -323,7 +323,6 @@ typedef struct thr_mags { #define TLS_ATTRIBUTE __attribute__ ((tls_model ("initial-exec"))) #endif -static int mtmagazine_free_live; static __thread thr_mags thread_mags TLS_ATTRIBUTE; static pthread_key_t thread_mags_key; static pthread_once_t thread_mags_once = PTHREAD_ONCE_INIT; @@ -347,7 +346,6 @@ static volatile void *bigcache_array[BIGCACHE]; /* atomic swap */ static volatile size_t bigcache_size_array[BIGCACHE]; /* SMP races ok */ static volatile int bigcache_index; /* SMP races ok */ static int malloc_panic; -static int malloc_dummy_pointer; static size_t excess_alloc; /* excess big allocs */ static void *_slaballoc(size_t size, int flags); @@ -417,6 +415,7 @@ malloc_init(void) void _nmalloc_thr_init(void) { + static int init_once; thr_mags *tp; /* @@ -426,14 +425,41 @@ _nmalloc_thr_init(void) tp = &thread_mags; tp->init = -1; - if (mtmagazine_free_live == 0) { - mtmagazine_free_live = 1; + if (init_once == 0) { + init_once = 1; pthread_once(&thread_mags_once, mtmagazine_init); } pthread_setspecific(thread_mags_key, tp); tp->init = 1; } +void +_nmalloc_thr_prepfork(void) +{ + if (__isthreaded) { + _SPINLOCK(&zone_mag_lock); + _SPINLOCK(&depot_spinlock); + } +} + +void +_nmalloc_thr_parentfork(void) +{ + if (__isthreaded) { + _SPINUNLOCK(&depot_spinlock); + _SPINUNLOCK(&zone_mag_lock); + } +} + +void +_nmalloc_thr_childfork(void) +{ + if (__isthreaded) { + _SPINUNLOCK(&depot_spinlock); + _SPINUNLOCK(&zone_mag_lock); + } +} + /* * Thread locks. */ @@ -455,14 +481,22 @@ static __inline void depot_lock(magazine_depot *dp) { if (__isthreaded) + _SPINLOCK(&depot_spinlock); +#if 0 + if (__isthreaded) _SPINLOCK(&dp->lock); +#endif } static __inline void depot_unlock(magazine_depot *dp) { if (__isthreaded) + _SPINUNLOCK(&depot_spinlock); +#if 0 + if (__isthreaded) _SPINUNLOCK(&dp->lock); +#endif } static __inline void @@ -922,7 +956,7 @@ _slaballoc(size_t size, int flags) * also realloc() later on. Joy. */ if (size == 0) - return(ZERO_LENGTH_PTR); + size = 1; /* Capture global flags */ flags |= g_malloc_flags; @@ -1134,14 +1168,12 @@ _slabrealloc(void *ptr, size_t size) slzone_t z; size_t chunking; - if (ptr == NULL || ptr == ZERO_LENGTH_PTR) { + if (ptr == NULL) { return(_slaballoc(size, 0)); } - if (size == 0) { - free(ptr); - return(ZERO_LENGTH_PTR); - } + if (size == 0) + size = 1; /* * Handle oversized allocations. @@ -1320,8 +1352,6 @@ _slabfree(void *ptr, int flags, bigalloc_t *rbigp) */ if (ptr == NULL) return; - if (ptr == ZERO_LENGTH_PTR) - return; /* * Handle oversized allocations. diff --git a/lib/libthread_xu/thread/Makefile.inc b/lib/libthread_xu/thread/Makefile.inc index 2bf5b7030d..19b4d23bb3 100644 --- a/lib/libthread_xu/thread/Makefile.inc +++ b/lib/libthread_xu/thread/Makefile.inc @@ -38,6 +38,7 @@ SRCS+= \ thr_pspinlock.c \ thr_resume_np.c \ thr_rtld.c \ + thr_malloc.c \ thr_rwlock.c \ thr_rwlockattr.c \ thr_self.c \ diff --git a/lib/libthread_xu/thread/thr_init.c b/lib/libthread_xu/thread/thr_init.c index 6bb6730b7d..6a3b3b3821 100644 --- a/lib/libthread_xu/thread/thr_init.c +++ b/lib/libthread_xu/thread/thr_init.c @@ -287,6 +287,7 @@ _libpthread_init(struct pthread *curthread) if (td_eventismember(&_thread_event_mask, TD_CREATE)) _thr_report_creation(curthread, curthread); _thr_rtld_init(); + _thr_malloc_init(); _thr_sem_init(); } } diff --git a/lib/libthread_xu/thread/thr_malloc.c b/lib/libthread_xu/thread/thr_malloc.c new file mode 100644 index 0000000000..cb7786f3a7 --- /dev/null +++ b/lib/libthread_xu/thread/thr_malloc.c @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2017 The DragonFly Project. All rights reserved. + * + * This code is derived from software contributed to The DragonFly Project + * by Matthew Dillon + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "libc_private.h" +#include "namespace.h" +#include +#include +#include +#include + +#include "un-namespace.h" +#include "rtld_lock.h" +#include "thr_private.h" + +void +_thr_malloc_init(void) +{ + static int once = 0; + + if (once == 0) { + once = 1; + _thr_atfork_kern(_nmalloc_thr_prepfork, + _nmalloc_thr_parentfork, + _nmalloc_thr_childfork); + } +} diff --git a/lib/libthread_xu/thread/thr_private.h b/lib/libthread_xu/thread/thr_private.h index e47acaee64..64e5487b7a 100644 --- a/lib/libthread_xu/thread/thr_private.h +++ b/lib/libthread_xu/thread/thr_private.h @@ -676,6 +676,7 @@ int _thr_ref_add(struct pthread *, struct pthread *, int); void _thr_ref_delete(struct pthread *, struct pthread *); void _thr_ref_delete_unlocked(struct pthread *, struct pthread *); int _thr_find_thread(struct pthread *, struct pthread *, int); +void _thr_malloc_init(void); void _thr_rtld_init(void); void _thr_rtld_fini(void); int _thr_stack_alloc(struct pthread_attr *); -- 2.11.4.GIT