1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 // See malloc.h for overview.
7 // TODO(rsc): double-check stats.
17 #include "interface.h"
23 int32 runtime_checking;
25 extern MStats mstats; // defined in zruntime_def_$GOOS_$GOARCH.go
27 extern volatile intgo runtime_MemProfileRate
28 __asm__ (GOSYM_PREFIX "runtime.MemProfileRate");
30 // Allocate an object of at least size bytes.
31 // Small objects are allocated from the per-thread cache's free lists.
32 // Large objects (> 32 kB) are allocated straight from the heap.
34 runtime_mallocgc(uintptr size, uint32 flag, int32 dogc, int32 zeroed)
50 if(m->mcache == nil && g->ncgo > 0) {
51 // For gccgo this case can occur when a cgo or SWIG function
52 // has an interface return type and the function
53 // returns a non-pointer, so memory allocation occurs
54 // after syscall.Cgocall but before syscall.CgocallDone.
55 // We treat it as a callback.
56 runtime_exitsyscall();
62 if(runtime_gcwaiting && g != m->g0 && m->locks == 0 && dogc) {
67 runtime_throw("malloc/free - deadlock");
72 if(DebugTypeAtBlockEnd)
73 size += sizeof(uintptr);
77 if(size <= MaxSmallSize) {
78 // Allocate from mcache free lists.
79 sizeclass = runtime_SizeToClass(size);
80 size = runtime_class_to_size[sizeclass];
81 v = runtime_MCache_Alloc(c, sizeclass, size, zeroed);
83 runtime_throw("out of memory");
84 c->local_alloc += size;
85 c->local_total_alloc += size;
86 c->local_by_size[sizeclass].nmalloc++;
88 // TODO(rsc): Report tracebacks for very large allocations.
90 // Allocate directly from heap.
91 npages = size >> PageShift;
92 if((size & PageMask) != 0)
94 s = runtime_MHeap_Alloc(runtime_mheap, npages, 0, 1, zeroed);
96 runtime_throw("out of memory");
97 size = npages<<PageShift;
98 c->local_alloc += size;
99 c->local_total_alloc += size;
100 v = (void*)(s->start << PageShift);
102 // setup for mark sweep
103 runtime_markspan(v, 0, 0, true);
106 if (sizeof(void*) == 4 && c->local_total_alloc >= (1<<30)) {
107 // purge cache stats to prevent overflow
108 runtime_lock(runtime_mheap);
109 runtime_purgecachedstats(c);
110 runtime_unlock(runtime_mheap);
113 if(!(flag & FlagNoGC))
114 runtime_markallocated(v, size, (flag&FlagNoPointers) != 0);
116 if(DebugTypeAtBlockEnd)
117 *(uintptr*)((uintptr)v+size-sizeof(uintptr)) = 0;
121 if(!(flag & FlagNoProfiling) && (rate = runtime_MemProfileRate) > 0) {
122 if(size >= (uint32) rate)
124 if((uint32) m->mcache->next_sample > size)
125 m->mcache->next_sample -= size;
127 // pick next profile time
128 // If you change this, also change allocmcache.
129 if(rate > 0x3fffffff) // make 2*rate not overflow
131 m->mcache->next_sample = runtime_fastrand1() % (2*rate);
133 runtime_setblockspecial(v, true);
134 runtime_MProf_Malloc(v, size);
138 if(dogc && mstats.heap_alloc >= mstats.next_gc)
142 runtime_racemalloc(v, size, m->racepc);
147 runtime_entersyscall();
153 __go_alloc(uintptr size)
155 return runtime_mallocgc(size, 0, 0, 1);
158 // Free the object whose base pointer is v.
172 // If you change this also change mgc0.c:/^sweep,
173 // which has a copy of the guts of free.
177 runtime_throw("malloc/free - deadlock");
180 if(!runtime_mlookup(v, nil, nil, &s)) {
181 runtime_printf("free %p: not an allocated block\n", v);
182 runtime_throw("free runtime_mlookup");
184 prof = runtime_blockspecial(v);
189 // Find size class for v.
190 sizeclass = s->sizeclass;
194 size = s->npages<<PageShift;
195 *(uintptr*)(s->start<<PageShift) = (uintptr)0xfeedfeedfeedfeedll; // mark as "needs to be zeroed"
196 // Must mark v freed before calling unmarkspan and MHeap_Free:
197 // they might coalesce v into other spans and change the bitmap further.
198 runtime_markfreed(v, size);
199 runtime_unmarkspan(v, 1<<PageShift);
200 runtime_MHeap_Free(runtime_mheap, s, 1);
203 size = runtime_class_to_size[sizeclass];
204 if(size > sizeof(uintptr))
205 ((uintptr*)v)[1] = (uintptr)0xfeedfeedfeedfeedll; // mark as "needs to be zeroed"
206 // Must mark v freed before calling MCache_Free:
207 // it might coalesce v and other blocks into a bigger span
208 // and change the bitmap further.
209 runtime_markfreed(v, size);
210 c->local_by_size[sizeclass].nfree++;
211 runtime_MCache_Free(c, v, sizeclass, size);
214 c->local_alloc -= size;
216 runtime_MProf_Free(v, size);
221 runtime_mlookup(void *v, byte **base, uintptr *size, MSpan **sp)
230 m->mcache->local_nlookup++;
231 if (sizeof(void*) == 4 && m->mcache->local_nlookup >= (1<<30)) {
232 // purge cache stats to prevent overflow
233 runtime_lock(runtime_mheap);
234 runtime_purgecachedstats(m->mcache);
235 runtime_unlock(runtime_mheap);
238 s = runtime_MHeap_LookupMaybe(runtime_mheap, v);
242 runtime_checkfreed(v, 1);
250 p = (byte*)((uintptr)s->start<<PageShift);
251 if(s->sizeclass == 0) {
256 *size = s->npages<<PageShift;
260 if((byte*)v >= (byte*)s->limit) {
261 // pointers past the last block do not count as pointers.
267 i = ((byte*)v - p)/n;
277 runtime_allocmcache(void)
282 runtime_lock(runtime_mheap);
283 c = runtime_FixAlloc_Alloc(&runtime_mheap->cachealloc);
284 mstats.mcache_inuse = runtime_mheap->cachealloc.inuse;
285 mstats.mcache_sys = runtime_mheap->cachealloc.sys;
286 runtime_unlock(runtime_mheap);
287 runtime_memclr((byte*)c, sizeof(*c));
289 // Set first allocation sample size.
290 rate = runtime_MemProfileRate;
291 if(rate > 0x3fffffff) // make 2*rate not overflow
294 c->next_sample = runtime_fastrand1() % (2*rate);
300 runtime_freemcache(MCache *c)
302 runtime_MCache_ReleaseAll(c);
303 runtime_lock(runtime_mheap);
304 runtime_purgecachedstats(c);
305 runtime_FixAlloc_Free(&runtime_mheap->cachealloc, c);
306 runtime_unlock(runtime_mheap);
310 runtime_purgecachedstats(MCache *c)
312 // Protected by either heap or GC lock.
313 mstats.heap_alloc += c->local_cachealloc;
314 c->local_cachealloc = 0;
315 mstats.heap_objects += c->local_objects;
316 c->local_objects = 0;
317 mstats.nmalloc += c->local_nmalloc;
318 c->local_nmalloc = 0;
319 mstats.nfree += c->local_nfree;
321 mstats.nlookup += c->local_nlookup;
322 c->local_nlookup = 0;
323 mstats.alloc += c->local_alloc;
325 mstats.total_alloc += c->local_total_alloc;
326 c->local_total_alloc= 0;
329 extern uintptr runtime_sizeof_C_MStats
330 __asm__ (GOSYM_PREFIX "runtime.Sizeof_C_MStats");
332 #define MaxArena32 (2U<<30)
335 runtime_mallocinit(void)
338 uintptr arena_size, bitmap_size;
343 runtime_sizeof_C_MStats = sizeof(MStats);
354 if((runtime_mheap = runtime_SysAlloc(sizeof(*runtime_mheap))) == nil)
355 runtime_throw("runtime: cannot allocate heap metadata");
359 // limit = runtime_memlimit();
360 // See https://code.google.com/p/go/issues/detail?id=5049
361 // TODO(rsc): Fix after 1.1.
364 // Set up the allocation arena, a contiguous area of memory where
365 // allocated data will be found. The arena begins with a bitmap large
366 // enough to hold 4 bits per allocated word.
367 if(sizeof(void*) == 8 && (limit == 0 || limit > (1<<30))) {
368 // On a 64-bit machine, allocate from a single contiguous reservation.
369 // 128 GB (MaxMem) should be big enough for now.
371 // The code will work with the reservation at any address, but ask
372 // SysReserve to use 0x000000c000000000 if possible.
373 // Allocating a 128 GB region takes away 37 bits, and the amd64
374 // doesn't let us choose the top 17 bits, so that leaves the 11 bits
375 // in the middle of 0x00c0 for us to choose. Choosing 0x00c0 means
376 // that the valid memory addresses will begin 0x00c0, 0x00c1, ..., 0x0x00df.
377 // In little-endian, that's c0 00, c1 00, ..., df 00. None of those are valid
378 // UTF-8 sequences, and they are otherwise as far away from
379 // ff (likely a common byte) as possible. An earlier attempt to use 0x11f8
380 // caused out of memory errors on OS X during thread allocations.
381 // These choices are both for debuggability and to reduce the
382 // odds of the conservative garbage collector not collecting memory
383 // because some non-pointer block of memory had a bit pattern
384 // that matched a memory address.
386 // Actually we reserve 136 GB (because the bitmap ends up being 8 GB)
387 // but it hardly matters: e0 00 is not valid UTF-8 either.
389 // If this fails we fall back to the 32 bit memory mechanism
391 bitmap_size = arena_size / (sizeof(void*)*8/4);
392 p = runtime_SysReserve((void*)(0x00c0ULL<<32), bitmap_size + arena_size);
395 // On a 32-bit machine, we can't typically get away
396 // with a giant virtual address space reservation.
397 // Instead we map the memory information bitmap
398 // immediately after the data segment, large enough
399 // to handle another 2GB of mappings (256 MB),
400 // along with a reservation for another 512 MB of memory.
401 // When that gets used up, we'll start asking the kernel
402 // for any memory anywhere and hope it's in the 2GB
403 // following the bitmap (presumably the executable begins
404 // near the bottom of memory, so we'll have to use up
405 // most of memory before the kernel resorts to giving out
406 // memory before the beginning of the text segment).
408 // Alternatively we could reserve 512 MB bitmap, enough
409 // for 4GB of mappings, and then accept any memory the
410 // kernel threw at us, but normally that's a waste of 512 MB
411 // of address space, which is probably too much in a 32-bit world.
412 bitmap_size = MaxArena32 / (sizeof(void*)*8/4);
413 arena_size = 512<<20;
414 if(limit > 0 && arena_size+bitmap_size > limit) {
415 bitmap_size = (limit / 9) & ~((1<<PageShift) - 1);
416 arena_size = bitmap_size * 8;
419 // SysReserve treats the address we ask for, end, as a hint,
420 // not as an absolute requirement. If we ask for the end
421 // of the data segment but the operating system requires
422 // a little more space before we can start allocating, it will
423 // give out a slightly higher pointer. Except QEMU, which
424 // is buggy, as usual: it won't adjust the pointer upward.
425 // So adjust it upward a little bit ourselves: 1/4 MB to get
426 // away from the running binary image and then round up
428 want = (byte*)(((uintptr)_end + (1<<18) + (1<<20) - 1)&~((1<<20)-1));
429 if(0xffffffff - (uintptr)want <= bitmap_size + arena_size)
431 p = runtime_SysReserve(want, bitmap_size + arena_size);
433 runtime_throw("runtime: cannot reserve arena virtual address space");
434 if((uintptr)p & (((uintptr)1<<PageShift)-1))
435 runtime_printf("runtime: SysReserve returned unaligned address %p; asked for %p", p, bitmap_size+arena_size);
437 if((uintptr)p & (((uintptr)1<<PageShift)-1))
438 runtime_throw("runtime: SysReserve returned unaligned address");
440 runtime_mheap->bitmap = p;
441 runtime_mheap->arena_start = p + bitmap_size;
442 runtime_mheap->arena_used = runtime_mheap->arena_start;
443 runtime_mheap->arena_end = runtime_mheap->arena_start + arena_size;
445 // Initialize the rest of the allocator.
446 runtime_MHeap_Init(runtime_mheap, runtime_SysAlloc);
447 runtime_m()->mcache = runtime_allocmcache();
450 runtime_free(runtime_malloc(1));
454 runtime_MHeap_SysAlloc(MHeap *h, uintptr n)
459 if(n > (uintptr)(h->arena_end - h->arena_used)) {
460 // We are in 32-bit mode, maybe we didn't use all possible address space yet.
461 // Reserve some more space.
465 needed = (uintptr)h->arena_used + n - (uintptr)h->arena_end;
466 // Round wanted arena size to a multiple of 256MB.
467 needed = (needed + (256<<20) - 1) & ~((256<<20)-1);
468 new_end = h->arena_end + needed;
469 if(new_end <= h->arena_start + MaxArena32) {
470 p = runtime_SysReserve(h->arena_end, new_end - h->arena_end);
471 if(p == h->arena_end)
472 h->arena_end = new_end;
475 if(n <= (uintptr)(h->arena_end - h->arena_used)) {
476 // Keep taking from our reservation.
478 runtime_SysMap(p, n);
480 runtime_MHeap_MapBits(h);
482 runtime_racemapshadow(p, n);
486 // If using 64-bit, our reservation is all we have.
487 if(sizeof(void*) == 8 && (uintptr)h->bitmap >= 0xffffffffU)
490 // On 32-bit, once the reservation is gone we can
491 // try to get memory at a location chosen by the OS
492 // and hope that it is in the range we allocated bitmap for.
493 p = runtime_SysAlloc(n);
497 if(p < h->arena_start || (uintptr)(p+n - h->arena_start) >= MaxArena32) {
498 runtime_printf("runtime: memory allocated by OS (%p) not in usable range [%p,%p)\n",
499 p, h->arena_start, h->arena_start+MaxArena32);
500 runtime_SysFree(p, n);
504 if(p+n > h->arena_used) {
506 if(h->arena_used > h->arena_end)
507 h->arena_end = h->arena_used;
508 runtime_MHeap_MapBits(h);
510 runtime_racemapshadow(p, n);
516 static Lock settype_lock;
519 runtime_settype_flush(M *mp, bool sysalloc)
521 uintptr *buf, *endbuf;
522 uintptr size, ofs, j, t;
523 uintptr ntypes, nbytes2, nbytes3;
531 buf = mp->settype_buf;
532 endbuf = buf + mp->settype_bufsize;
534 runtime_lock(&settype_lock);
535 while(buf < endbuf) {
542 // (Manually inlined copy of runtime_MHeap_Lookup)
543 p = (uintptr)v>>PageShift;
544 p -= (uintptr)runtime_mheap->arena_start >> PageShift;
545 s = runtime_mheap->map[p];
547 if(s->sizeclass == 0) {
548 s->types.compression = MTypes_Single;
554 ofs = ((uintptr)v - (s->start<<PageShift)) / size;
556 switch(s->types.compression) {
558 ntypes = (s->npages << PageShift) / size;
559 nbytes3 = 8*sizeof(uintptr) + 1*ntypes;
562 data3 = runtime_mallocgc(nbytes3, FlagNoProfiling|FlagNoPointers, 0, 1);
564 data3 = runtime_SysAlloc(nbytes3);
566 runtime_throw("runtime: cannot allocate memory");
567 if(0) runtime_printf("settype(0->3): SysAlloc(%x) --> %p\n", (uint32)nbytes3, data3);
570 s->types.compression = MTypes_Bytes;
571 s->types.sysalloc = sysalloc;
572 s->types.data = (uintptr)data3;
574 ((uintptr*)data3)[1] = typ;
575 data3[8*sizeof(uintptr) + ofs] = 1;
579 ((uintptr*)s->types.data)[ofs] = typ;
583 data3 = (byte*)s->types.data;
585 if(((uintptr*)data3)[j] == typ) {
588 if(((uintptr*)data3)[j] == 0) {
589 ((uintptr*)data3)[j] = typ;
594 data3[8*sizeof(uintptr) + ofs] = j;
596 ntypes = (s->npages << PageShift) / size;
597 nbytes2 = ntypes * sizeof(uintptr);
600 data2 = runtime_mallocgc(nbytes2, FlagNoProfiling|FlagNoPointers, 0, 1);
602 data2 = runtime_SysAlloc(nbytes2);
604 runtime_throw("runtime: cannot allocate memory");
605 if(0) runtime_printf("settype.(3->2): SysAlloc(%x) --> %p\n", (uint32)nbytes2, data2);
608 sysalloc3 = s->types.sysalloc;
610 s->types.compression = MTypes_Words;
611 s->types.sysalloc = sysalloc;
612 s->types.data = (uintptr)data2;
614 // Move the contents of data3 to data2. Then deallocate data3.
615 for(j=0; j<ntypes; j++) {
616 t = data3[8*sizeof(uintptr) + j];
617 t = ((uintptr*)data3)[t];
621 nbytes3 = 8*sizeof(uintptr) + 1*ntypes;
622 if(0) runtime_printf("settype.(3->2): SysFree(%p,%x)\n", data3, (uint32)nbytes3);
623 runtime_SysFree(data3, nbytes3);
631 runtime_unlock(&settype_lock);
633 mp->settype_bufsize = 0;
636 // It is forbidden to use this function if it is possible that
637 // explicit deallocation via calling runtime_free(v) may happen.
639 runtime_settype(void *v, uintptr t)
647 runtime_throw("settype: zero type");
650 buf = mp->settype_buf;
651 i = mp->settype_bufsize;
652 buf[i+0] = (uintptr)v;
655 mp->settype_bufsize = i;
657 if(i == nelem(mp->settype_buf)) {
658 runtime_settype_flush(mp, false);
661 if(DebugTypeAtBlockEnd) {
662 s = runtime_MHeap_Lookup(runtime_mheap, v);
663 *(uintptr*)((uintptr)v+s->elemsize-sizeof(uintptr)) = t;
668 runtime_settype_sysfree(MSpan *s)
670 uintptr ntypes, nbytes;
672 if(!s->types.sysalloc)
675 nbytes = (uintptr)-1;
677 switch (s->types.compression) {
679 ntypes = (s->npages << PageShift) / s->elemsize;
680 nbytes = ntypes * sizeof(uintptr);
683 ntypes = (s->npages << PageShift) / s->elemsize;
684 nbytes = 8*sizeof(uintptr) + 1*ntypes;
688 if(nbytes != (uintptr)-1) {
689 if(0) runtime_printf("settype: SysFree(%p,%x)\n", (void*)s->types.data, (uint32)nbytes);
690 runtime_SysFree((void*)s->types.data, nbytes);
695 runtime_gettype(void *v)
701 s = runtime_MHeap_LookupMaybe(runtime_mheap, v);
704 switch(s->types.compression) {
711 ofs = (uintptr)v - (s->start<<PageShift);
712 t = ((uintptr*)s->types.data)[ofs/s->elemsize];
715 ofs = (uintptr)v - (s->start<<PageShift);
716 data = (byte*)s->types.data;
717 t = data[8*sizeof(uintptr) + ofs/s->elemsize];
718 t = ((uintptr*)data)[t];
721 runtime_throw("runtime_gettype: invalid compression kind");
724 runtime_lock(&settype_lock);
725 runtime_printf("%p -> %d,%X\n", v, (int32)s->types.compression, (int64)t);
726 runtime_unlock(&settype_lock);
736 runtime_mal(uintptr n)
738 return runtime_mallocgc(n, 0, 1, 1);
742 runtime_new(const Type *typ)
748 runtime_m()->racepc = runtime_getcallerpc(&typ);
750 if(typ->__size == 0) {
751 // All 0-length allocations use this pointer.
752 // The language does not require the allocations to
753 // have distinct values.
754 ret = (uint8*)&runtime_zerobase;
756 flag = typ->__code&GO_NO_POINTERS ? FlagNoPointers : 0;
757 ret = runtime_mallocgc(typ->__size, flag, 1, 1);
759 if(UseSpanType && !flag) {
761 runtime_printf("new %S: %p\n", *typ->__reflection, ret);
762 runtime_settype(ret, (uintptr)typ | TypeInfo_SingleObject);
770 cnew(const Type *typ, intgo n, int32 objtyp)
775 if((objtyp&(PtrSize-1)) != objtyp)
776 runtime_throw("runtime: invalid objtyp");
777 if(n < 0 || (typ->__size > 0 && (uintptr)n > (MaxMem/typ->__size)))
778 runtime_panicstring("runtime: allocation size out of range");
779 if(typ->__size == 0 || n == 0) {
780 // All 0-length allocations use this pointer.
781 // The language does not require the allocations to
782 // have distinct values.
783 return &runtime_zerobase;
785 flag = typ->__code&GO_NO_POINTERS ? FlagNoPointers : 0;
786 ret = runtime_mallocgc(typ->__size*n, flag, 1, 1);
787 if(UseSpanType && !flag) {
789 runtime_printf("cnew [%D]%S: %p\n", (int64)n, *typ->__reflection, ret);
790 runtime_settype(ret, (uintptr)typ | TypeInfo_SingleObject);
795 // same as runtime_new, but callable from C
797 runtime_cnew(const Type *typ)
799 return cnew(typ, 1, TypeInfo_SingleObject);
803 runtime_cnewarray(const Type *typ, intgo n)
805 return cnew(typ, n, TypeInfo_Array);
812 func SetFinalizer(obj Eface, finalizer Eface) {
817 if(obj.__type_descriptor == nil) {
818 runtime_printf("runtime.SetFinalizer: first argument is nil interface\n");
821 if(obj.__type_descriptor->__code != GO_PTR) {
822 runtime_printf("runtime.SetFinalizer: first argument is %S, not pointer\n", *obj.__type_descriptor->__reflection);
825 if(!runtime_mlookup(obj.__object, &base, &size, nil) || obj.__object != base) {
826 runtime_printf("runtime.SetFinalizer: pointer not at beginning of allocated block\n");
830 if(finalizer.__type_descriptor != nil) {
831 if(finalizer.__type_descriptor->__code != GO_FUNC)
833 ft = (const FuncType*)finalizer.__type_descriptor;
834 if(ft->__dotdotdot || ft->__in.__count != 1 || !__go_type_descriptors_equal(*(Type**)ft->__in.__values, obj.__type_descriptor))
838 if(!runtime_addfinalizer(obj.__object, finalizer.__type_descriptor != nil ? *(void**)finalizer.__object : nil, ft)) {
839 runtime_printf("runtime.SetFinalizer: finalizer already set\n");
845 runtime_printf("runtime.SetFinalizer: second argument is %S, not func(%S)\n", *finalizer.__type_descriptor->__reflection, *obj.__type_descriptor->__reflection);
847 runtime_throw("runtime.SetFinalizer");