1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 // See malloc.h for overview.
7 // TODO(rsc): double-check stats.
17 #include "interface.h"
23 extern MStats mstats; // defined in zruntime_def_$GOOS_$GOARCH.go
25 extern volatile intgo runtime_MemProfileRate
26 __asm__ ("runtime.MemProfileRate");
28 // Allocate an object of at least size bytes.
29 // Small objects are allocated from the per-thread cache's free lists.
30 // Large objects (> 32 kB) are allocated straight from the heap.
32 runtime_mallocgc(uintptr size, uint32 flag, int32 dogc, int32 zeroed)
45 if(g->status == Gsyscall)
47 if(runtime_gcwaiting && g != m->g0 && m->locks == 0 && g->status != Gsyscall) {
52 runtime_throw("malloc/free - deadlock");
57 if(DebugTypeAtBlockEnd)
58 size += sizeof(uintptr);
62 if(size <= MaxSmallSize) {
63 // Allocate from mcache free lists.
64 sizeclass = runtime_SizeToClass(size);
65 size = runtime_class_to_size[sizeclass];
66 v = runtime_MCache_Alloc(c, sizeclass, size, zeroed);
68 runtime_throw("out of memory");
69 c->local_alloc += size;
70 c->local_total_alloc += size;
71 c->local_by_size[sizeclass].nmalloc++;
73 // TODO(rsc): Report tracebacks for very large allocations.
75 // Allocate directly from heap.
76 npages = size >> PageShift;
77 if((size & PageMask) != 0)
79 s = runtime_MHeap_Alloc(&runtime_mheap, npages, 0, 1, zeroed);
81 runtime_throw("out of memory");
82 size = npages<<PageShift;
83 c->local_alloc += size;
84 c->local_total_alloc += size;
85 v = (void*)(s->start << PageShift);
87 // setup for mark sweep
88 runtime_markspan(v, 0, 0, true);
91 if (sizeof(void*) == 4 && c->local_total_alloc >= (1<<30)) {
92 // purge cache stats to prevent overflow
93 runtime_lock(&runtime_mheap);
94 runtime_purgecachedstats(c);
95 runtime_unlock(&runtime_mheap);
98 if(!(flag & FlagNoGC))
99 runtime_markallocated(v, size, (flag&FlagNoPointers) != 0);
101 if(DebugTypeAtBlockEnd)
102 *(uintptr*)((uintptr)v+size-sizeof(uintptr)) = 0;
106 if(!(flag & FlagNoProfiling) && (rate = runtime_MemProfileRate) > 0) {
107 if(size >= (uint32) rate)
109 if((uint32) m->mcache->next_sample > size)
110 m->mcache->next_sample -= size;
112 // pick next profile time
113 // If you change this, also change allocmcache.
114 if(rate > 0x3fffffff) // make 2*rate not overflow
116 m->mcache->next_sample = runtime_fastrand1() % (2*rate);
118 runtime_setblockspecial(v, true);
119 runtime_MProf_Malloc(v, size);
123 if(dogc && mstats.heap_alloc >= mstats.next_gc)
127 runtime_racemalloc(v, size, m->racepc);
134 __go_alloc(uintptr size)
136 return runtime_mallocgc(size, 0, 0, 1);
139 // Free the object whose base pointer is v.
153 // If you change this also change mgc0.c:/^sweep,
154 // which has a copy of the guts of free.
158 runtime_throw("malloc/free - deadlock");
161 if(!runtime_mlookup(v, nil, nil, &s)) {
162 runtime_printf("free %p: not an allocated block\n", v);
163 runtime_throw("free runtime_mlookup");
165 prof = runtime_blockspecial(v);
170 // Find size class for v.
171 sizeclass = s->sizeclass;
175 size = s->npages<<PageShift;
176 *(uintptr*)(s->start<<PageShift) = 1; // mark as "needs to be zeroed"
177 // Must mark v freed before calling unmarkspan and MHeap_Free:
178 // they might coalesce v into other spans and change the bitmap further.
179 runtime_markfreed(v, size);
180 runtime_unmarkspan(v, 1<<PageShift);
181 runtime_MHeap_Free(&runtime_mheap, s, 1);
184 size = runtime_class_to_size[sizeclass];
185 if(size > sizeof(uintptr))
186 ((uintptr*)v)[1] = 1; // mark as "needs to be zeroed"
187 // Must mark v freed before calling MCache_Free:
188 // it might coalesce v and other blocks into a bigger span
189 // and change the bitmap further.
190 runtime_markfreed(v, size);
191 c->local_by_size[sizeclass].nfree++;
192 runtime_MCache_Free(c, v, sizeclass, size);
195 c->local_alloc -= size;
197 runtime_MProf_Free(v, size);
202 runtime_mlookup(void *v, byte **base, uintptr *size, MSpan **sp)
211 m->mcache->local_nlookup++;
212 if (sizeof(void*) == 4 && m->mcache->local_nlookup >= (1<<30)) {
213 // purge cache stats to prevent overflow
214 runtime_lock(&runtime_mheap);
215 runtime_purgecachedstats(m->mcache);
216 runtime_unlock(&runtime_mheap);
219 s = runtime_MHeap_LookupMaybe(&runtime_mheap, v);
223 runtime_checkfreed(v, 1);
231 p = (byte*)((uintptr)s->start<<PageShift);
232 if(s->sizeclass == 0) {
237 *size = s->npages<<PageShift;
241 if((byte*)v >= (byte*)s->limit) {
242 // pointers past the last block do not count as pointers.
248 i = ((byte*)v - p)/n;
258 runtime_allocmcache(void)
263 runtime_lock(&runtime_mheap);
264 c = runtime_FixAlloc_Alloc(&runtime_mheap.cachealloc);
265 mstats.mcache_inuse = runtime_mheap.cachealloc.inuse;
266 mstats.mcache_sys = runtime_mheap.cachealloc.sys;
267 runtime_unlock(&runtime_mheap);
268 runtime_memclr((byte*)c, sizeof(*c));
270 // Set first allocation sample size.
271 rate = runtime_MemProfileRate;
272 if(rate > 0x3fffffff) // make 2*rate not overflow
275 c->next_sample = runtime_fastrand1() % (2*rate);
281 runtime_freemcache(MCache *c)
283 runtime_MCache_ReleaseAll(c);
284 runtime_lock(&runtime_mheap);
285 runtime_purgecachedstats(c);
286 runtime_FixAlloc_Free(&runtime_mheap.cachealloc, c);
287 runtime_unlock(&runtime_mheap);
291 runtime_purgecachedstats(MCache *c)
293 // Protected by either heap or GC lock.
294 mstats.heap_alloc += c->local_cachealloc;
295 c->local_cachealloc = 0;
296 mstats.heap_objects += c->local_objects;
297 c->local_objects = 0;
298 mstats.nmalloc += c->local_nmalloc;
299 c->local_nmalloc = 0;
300 mstats.nfree += c->local_nfree;
302 mstats.nlookup += c->local_nlookup;
303 c->local_nlookup = 0;
304 mstats.alloc += c->local_alloc;
306 mstats.total_alloc += c->local_total_alloc;
307 c->local_total_alloc= 0;
310 extern uintptr runtime_sizeof_C_MStats
311 __asm__ ("runtime.Sizeof_C_MStats");
313 #define MaxArena32 (2U<<30)
316 runtime_mallocinit(void)
319 uintptr arena_size, bitmap_size;
324 runtime_sizeof_C_MStats = sizeof(MStats);
337 limit = runtime_memlimit();
339 // Set up the allocation arena, a contiguous area of memory where
340 // allocated data will be found. The arena begins with a bitmap large
341 // enough to hold 4 bits per allocated word.
342 if(sizeof(void*) == 8 && (limit == 0 || limit > (1<<30))) {
343 // On a 64-bit machine, allocate from a single contiguous reservation.
344 // 128 GB (MaxMem) should be big enough for now.
346 // The code will work with the reservation at any address, but ask
347 // SysReserve to use 0x000000c000000000 if possible.
348 // Allocating a 128 GB region takes away 37 bits, and the amd64
349 // doesn't let us choose the top 17 bits, so that leaves the 11 bits
350 // in the middle of 0x00c0 for us to choose. Choosing 0x00c0 means
351 // that the valid memory addresses will begin 0x00c0, 0x00c1, ..., 0x0x00df.
352 // In little-endian, that's c0 00, c1 00, ..., df 00. None of those are valid
353 // UTF-8 sequences, and they are otherwise as far away from
354 // ff (likely a common byte) as possible. An earlier attempt to use 0x11f8
355 // caused out of memory errors on OS X during thread allocations.
356 // These choices are both for debuggability and to reduce the
357 // odds of the conservative garbage collector not collecting memory
358 // because some non-pointer block of memory had a bit pattern
359 // that matched a memory address.
361 // Actually we reserve 136 GB (because the bitmap ends up being 8 GB)
362 // but it hardly matters: e0 00 is not valid UTF-8 either.
364 // If this fails we fall back to the 32 bit memory mechanism
366 bitmap_size = arena_size / (sizeof(void*)*8/4);
367 p = runtime_SysReserve((void*)(0x00c0ULL<<32), bitmap_size + arena_size);
370 // On a 32-bit machine, we can't typically get away
371 // with a giant virtual address space reservation.
372 // Instead we map the memory information bitmap
373 // immediately after the data segment, large enough
374 // to handle another 2GB of mappings (256 MB),
375 // along with a reservation for another 512 MB of memory.
376 // When that gets used up, we'll start asking the kernel
377 // for any memory anywhere and hope it's in the 2GB
378 // following the bitmap (presumably the executable begins
379 // near the bottom of memory, so we'll have to use up
380 // most of memory before the kernel resorts to giving out
381 // memory before the beginning of the text segment).
383 // Alternatively we could reserve 512 MB bitmap, enough
384 // for 4GB of mappings, and then accept any memory the
385 // kernel threw at us, but normally that's a waste of 512 MB
386 // of address space, which is probably too much in a 32-bit world.
387 bitmap_size = MaxArena32 / (sizeof(void*)*8/4);
388 arena_size = 512<<20;
389 if(limit > 0 && arena_size+bitmap_size > limit) {
390 bitmap_size = (limit / 9) & ~((1<<PageShift) - 1);
391 arena_size = bitmap_size * 8;
394 // SysReserve treats the address we ask for, end, as a hint,
395 // not as an absolute requirement. If we ask for the end
396 // of the data segment but the operating system requires
397 // a little more space before we can start allocating, it will
398 // give out a slightly higher pointer. Except QEMU, which
399 // is buggy, as usual: it won't adjust the pointer upward.
400 // So adjust it upward a little bit ourselves: 1/4 MB to get
401 // away from the running binary image and then round up
403 want = (byte*)(((uintptr)end + (1<<18) + (1<<20) - 1)&~((1<<20)-1));
404 if(0xffffffff - (uintptr)want <= bitmap_size + arena_size)
406 p = runtime_SysReserve(want, bitmap_size + arena_size);
408 runtime_throw("runtime: cannot reserve arena virtual address space");
409 if((uintptr)p & (((uintptr)1<<PageShift)-1))
410 runtime_printf("runtime: SysReserve returned unaligned address %p; asked for %p", p, bitmap_size+arena_size);
412 if((uintptr)p & (((uintptr)1<<PageShift)-1))
413 runtime_throw("runtime: SysReserve returned unaligned address");
415 runtime_mheap.bitmap = p;
416 runtime_mheap.arena_start = p + bitmap_size;
417 runtime_mheap.arena_used = runtime_mheap.arena_start;
418 runtime_mheap.arena_end = runtime_mheap.arena_start + arena_size;
420 // Initialize the rest of the allocator.
421 runtime_MHeap_Init(&runtime_mheap, runtime_SysAlloc);
422 runtime_m()->mcache = runtime_allocmcache();
425 runtime_free(runtime_malloc(1));
429 runtime_MHeap_SysAlloc(MHeap *h, uintptr n)
434 if(n > (uintptr)(h->arena_end - h->arena_used)) {
435 // We are in 32-bit mode, maybe we didn't use all possible address space yet.
436 // Reserve some more space.
440 needed = (uintptr)h->arena_used + n - (uintptr)h->arena_end;
441 // Round wanted arena size to a multiple of 256MB.
442 needed = (needed + (256<<20) - 1) & ~((256<<20)-1);
443 new_end = h->arena_end + needed;
444 if(new_end <= h->arena_start + MaxArena32) {
445 p = runtime_SysReserve(h->arena_end, new_end - h->arena_end);
446 if(p == h->arena_end)
447 h->arena_end = new_end;
450 if(n <= (uintptr)(h->arena_end - h->arena_used)) {
451 // Keep taking from our reservation.
453 runtime_SysMap(p, n);
455 runtime_MHeap_MapBits(h);
457 runtime_racemapshadow(p, n);
461 // If using 64-bit, our reservation is all we have.
462 if(sizeof(void*) == 8 && (uintptr)h->bitmap >= 0xffffffffU)
465 // On 32-bit, once the reservation is gone we can
466 // try to get memory at a location chosen by the OS
467 // and hope that it is in the range we allocated bitmap for.
468 p = runtime_SysAlloc(n);
472 if(p < h->arena_start || (uintptr)(p+n - h->arena_start) >= MaxArena32) {
473 runtime_printf("runtime: memory allocated by OS (%p) not in usable range [%p,%p)\n",
474 p, h->arena_start, h->arena_start+MaxArena32);
475 runtime_SysFree(p, n);
479 if(p+n > h->arena_used) {
481 if(h->arena_used > h->arena_end)
482 h->arena_end = h->arena_used;
483 runtime_MHeap_MapBits(h);
485 runtime_racemapshadow(p, n);
491 static Lock settype_lock;
494 runtime_settype_flush(M *mp, bool sysalloc)
496 uintptr *buf, *endbuf;
497 uintptr size, ofs, j, t;
498 uintptr ntypes, nbytes2, nbytes3;
506 buf = mp->settype_buf;
507 endbuf = buf + mp->settype_bufsize;
509 runtime_lock(&settype_lock);
510 while(buf < endbuf) {
517 // (Manually inlined copy of runtime_MHeap_Lookup)
518 p = (uintptr)v>>PageShift;
519 if(sizeof(void*) == 8)
520 p -= (uintptr)runtime_mheap.arena_start >> PageShift;
521 s = runtime_mheap.map[p];
523 if(s->sizeclass == 0) {
524 s->types.compression = MTypes_Single;
530 ofs = ((uintptr)v - (s->start<<PageShift)) / size;
532 switch(s->types.compression) {
534 ntypes = (s->npages << PageShift) / size;
535 nbytes3 = 8*sizeof(uintptr) + 1*ntypes;
538 data3 = runtime_mallocgc(nbytes3, FlagNoPointers, 0, 1);
540 data3 = runtime_SysAlloc(nbytes3);
541 if(0) runtime_printf("settype(0->3): SysAlloc(%x) --> %p\n", (uint32)nbytes3, data3);
544 s->types.compression = MTypes_Bytes;
545 s->types.sysalloc = sysalloc;
546 s->types.data = (uintptr)data3;
548 ((uintptr*)data3)[1] = typ;
549 data3[8*sizeof(uintptr) + ofs] = 1;
553 ((uintptr*)s->types.data)[ofs] = typ;
557 data3 = (byte*)s->types.data;
559 if(((uintptr*)data3)[j] == typ) {
562 if(((uintptr*)data3)[j] == 0) {
563 ((uintptr*)data3)[j] = typ;
568 data3[8*sizeof(uintptr) + ofs] = j;
570 ntypes = (s->npages << PageShift) / size;
571 nbytes2 = ntypes * sizeof(uintptr);
574 data2 = runtime_mallocgc(nbytes2, FlagNoPointers, 0, 1);
576 data2 = runtime_SysAlloc(nbytes2);
577 if(0) runtime_printf("settype.(3->2): SysAlloc(%x) --> %p\n", (uint32)nbytes2, data2);
580 sysalloc3 = s->types.sysalloc;
582 s->types.compression = MTypes_Words;
583 s->types.sysalloc = sysalloc;
584 s->types.data = (uintptr)data2;
586 // Move the contents of data3 to data2. Then deallocate data3.
587 for(j=0; j<ntypes; j++) {
588 t = data3[8*sizeof(uintptr) + j];
589 t = ((uintptr*)data3)[t];
593 nbytes3 = 8*sizeof(uintptr) + 1*ntypes;
594 if(0) runtime_printf("settype.(3->2): SysFree(%p,%x)\n", data3, (uint32)nbytes3);
595 runtime_SysFree(data3, nbytes3);
603 runtime_unlock(&settype_lock);
605 mp->settype_bufsize = 0;
608 // It is forbidden to use this function if it is possible that
609 // explicit deallocation via calling runtime_free(v) may happen.
611 runtime_settype(void *v, uintptr t)
619 runtime_throw("settype: zero type");
622 buf = mp->settype_buf;
623 i = mp->settype_bufsize;
624 buf[i+0] = (uintptr)v;
627 mp->settype_bufsize = i;
629 if(i == nelem(mp->settype_buf)) {
630 runtime_settype_flush(mp, false);
633 if(DebugTypeAtBlockEnd) {
634 s = runtime_MHeap_Lookup(&runtime_mheap, v);
635 *(uintptr*)((uintptr)v+s->elemsize-sizeof(uintptr)) = t;
640 runtime_settype_sysfree(MSpan *s)
642 uintptr ntypes, nbytes;
644 if(!s->types.sysalloc)
647 nbytes = (uintptr)-1;
649 switch (s->types.compression) {
651 ntypes = (s->npages << PageShift) / s->elemsize;
652 nbytes = ntypes * sizeof(uintptr);
655 ntypes = (s->npages << PageShift) / s->elemsize;
656 nbytes = 8*sizeof(uintptr) + 1*ntypes;
660 if(nbytes != (uintptr)-1) {
661 if(0) runtime_printf("settype: SysFree(%p,%x)\n", (void*)s->types.data, (uint32)nbytes);
662 runtime_SysFree((void*)s->types.data, nbytes);
667 runtime_gettype(void *v)
673 s = runtime_MHeap_LookupMaybe(&runtime_mheap, v);
676 switch(s->types.compression) {
683 ofs = (uintptr)v - (s->start<<PageShift);
684 t = ((uintptr*)s->types.data)[ofs/s->elemsize];
687 ofs = (uintptr)v - (s->start<<PageShift);
688 data = (byte*)s->types.data;
689 t = data[8*sizeof(uintptr) + ofs/s->elemsize];
690 t = ((uintptr*)data)[t];
693 runtime_throw("runtime_gettype: invalid compression kind");
696 runtime_lock(&settype_lock);
697 runtime_printf("%p -> %d,%X\n", v, (int32)s->types.compression, (int64)t);
698 runtime_unlock(&settype_lock);
708 runtime_mal(uintptr n)
710 return runtime_mallocgc(n, 0, 1, 1);
714 runtime_new(const Type *typ)
720 runtime_m()->racepc = runtime_getcallerpc(&typ);
721 flag = typ->__code&GO_NO_POINTERS ? FlagNoPointers : 0;
722 ret = runtime_mallocgc(typ->__size, flag, 1, 1);
724 if(UseSpanType && !flag) {
726 runtime_printf("new %S: %p\n", *typ->__reflection, ret);
728 runtime_settype(ret, (uintptr)typ | TypeInfo_SingleObject);
738 func SetFinalizer(obj Eface, finalizer Eface) {
743 if(obj.__type_descriptor == nil) {
744 runtime_printf("runtime.SetFinalizer: first argument is nil interface\n");
747 if(obj.__type_descriptor->__code != GO_PTR) {
748 runtime_printf("runtime.SetFinalizer: first argument is %S, not pointer\n", *obj.__type_descriptor->__reflection);
751 if(!runtime_mlookup(obj.__object, &base, &size, nil) || obj.__object != base) {
752 runtime_printf("runtime.SetFinalizer: pointer not at beginning of allocated block\n");
756 if(finalizer.__type_descriptor != nil) {
757 if(finalizer.__type_descriptor->__code != GO_FUNC)
759 ft = (const FuncType*)finalizer.__type_descriptor;
760 if(ft->__dotdotdot || ft->__in.__count != 1 || !__go_type_descriptors_equal(*(Type**)ft->__in.__values, obj.__type_descriptor))
764 if(!runtime_addfinalizer(obj.__object, finalizer.__type_descriptor != nil ? *(void**)finalizer.__object : nil, ft)) {
765 runtime_printf("runtime.SetFinalizer: finalizer already set\n");
771 runtime_printf("runtime.SetFinalizer: second argument is %S, not func(%S)\n", *finalizer.__type_descriptor->__reflection, *obj.__type_descriptor->__reflection);
773 runtime_throw("runtime.SetFinalizer");