1 // Copyright 2016 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 // The gccgo version of mem_*.go.
10 "runtime/internal/sys"
14 // Functions called by C code.
15 //go:linkname sysAlloc runtime.sysAlloc
18 func mmap(addr unsafe
.Pointer
, n
uintptr, prot
, flags
, fd
int32, off
uintptr) unsafe
.Pointer
21 func munmap(addr unsafe
.Pointer
, length
uintptr) int32
24 func mincore(addr unsafe
.Pointer
, n
uintptr, dst
*byte) int32
27 func madvise(addr unsafe
.Pointer
, n
uintptr, flags
int32) int32
29 var mmapFD
= int32(-1)
31 var devZero
= []byte("/dev/zero\x00")
35 mmapFD
= open(&devZero
[0], 0 /* O_RDONLY */, 0)
37 println("open /dev/zero: errno=", errno())
43 // NOTE: vec must be just 1 byte long here.
44 // Mincore returns ENOMEM if any of the pages are unmapped,
45 // but we want to know that all of the pages are unmapped.
46 // To make these the same, we can only ask about one page
47 // at a time. See golang.org/issue/7476.
48 var addrspace_vec
[1]byte
50 func addrspace_free(v unsafe
.Pointer
, n
uintptr) bool {
51 for off
:= uintptr(0); off
< n
; off
+= physPageSize
{
52 // Use a length of 1 byte, which the kernel will round
53 // up to one physical page regardless of the true
54 // physical page size.
56 if mincore(unsafe
.Pointer(uintptr(v
)+off
), 1, &addrspace_vec
[0]) < 0 {
59 if errval
== _ENOSYS
{
60 // mincore is not available on this system.
61 // Assume the address is available.
64 if errval
== _EINVAL
{
65 // Address is not a multiple of the physical
66 // page size. Shouldn't happen, but just ignore it.
69 // ENOMEM means unmapped, which is what we want.
70 // Anything else we assume means the pages are mapped.
71 if errval
!= _ENOMEM
{
78 func mmap_fixed(v unsafe
.Pointer
, n
uintptr, prot
, flags
, fd
int32, offset
uintptr) unsafe
.Pointer
{
79 p
:= mmap(v
, n
, prot
, flags
, fd
, offset
)
80 // On some systems, mmap ignores v without
81 // MAP_FIXED, so retry if the address space is free.
82 if p
!= v
&& addrspace_free(v
, n
) {
83 if uintptr(p
) != _MAP_FAILED
{
86 p
= mmap(v
, n
, prot
, flags|_MAP_FIXED
, fd
, offset
)
91 // Don't split the stack as this method may be invoked without a valid G, which
92 // prevents us from allocating more stack.
94 func sysAlloc(n
uintptr, sysStat
*uint64) unsafe
.Pointer
{
95 p
:= mmap(nil, n
, _PROT_READ|_PROT_WRITE
, _MAP_ANON|_MAP_PRIVATE
, mmapFD
, 0)
96 if uintptr(p
) == _MAP_FAILED
{
98 if errval
== _EACCES
{
99 print("runtime: mmap: access denied\n")
102 if errval
== _EAGAIN
{
103 print("runtime: mmap: too much locked memory (check 'ulimit -l').\n")
108 mSysStatInc(sysStat
, n
)
112 func sysUnused(v unsafe
.Pointer
, n
uintptr) {
113 // By default, Linux's "transparent huge page" support will
114 // merge pages into a huge page if there's even a single
115 // present regular page, undoing the effects of the DONTNEED
116 // below. On amd64, that means khugepaged can turn a single
117 // 4KB page to 2MB, bloating the process's RSS by as much as
118 // 512X. (See issue #8832 and Linux kernel bug
119 // https://bugzilla.kernel.org/show_bug.cgi?id=93111)
121 // To work around this, we explicitly disable transparent huge
122 // pages when we release pages of the heap. However, we have
123 // to do this carefully because changing this flag tends to
124 // split the VMA (memory mapping) containing v in to three
125 // VMAs in order to track the different values of the
126 // MADV_NOHUGEPAGE flag in the different regions. There's a
127 // default limit of 65530 VMAs per address space (sysctl
128 // vm.max_map_count), so we must be careful not to create too
129 // many VMAs (see issue #12233).
131 // Since huge pages are huge, there's little use in adjusting
132 // the MADV_NOHUGEPAGE flag on a fine granularity, so we avoid
133 // exploding the number of VMAs by only adjusting the
134 // MADV_NOHUGEPAGE flag on a large granularity. This still
135 // gets most of the benefit of huge pages while keeping the
136 // number of VMAs under control. With hugePageSize = 2MB, even
137 // a pessimal heap can reach 128GB before running out of VMAs.
138 if sys
.HugePageSize
!= 0 && _MADV_NOHUGEPAGE
!= 0 {
139 var s
uintptr = sys
.HugePageSize
// division by constant 0 is a compile-time error :(
141 // If it's a large allocation, we want to leave huge
142 // pages enabled. Hence, we only adjust the huge page
143 // flag on the huge pages containing v and v+n-1, and
144 // only if those aren't aligned.
145 var head
, tail
uintptr
146 if uintptr(v
)%s
!= 0 {
147 // Compute huge page containing v.
148 head
= uintptr(v
) &^ (s
- 1)
150 if (uintptr(v
)+n
)%s
!= 0 {
151 // Compute huge page containing v+n-1.
152 tail
= (uintptr(v
) + n
- 1) &^ (s
- 1)
155 // Note that madvise will return EINVAL if the flag is
156 // already set, which is quite likely. We ignore
158 if head
!= 0 && head
+sys
.HugePageSize
== tail
{
159 // head and tail are different but adjacent,
160 // so do this in one call.
161 madvise(unsafe
.Pointer(head
), 2*sys
.HugePageSize
, _MADV_NOHUGEPAGE
)
163 // Advise the huge pages containing v and v+n-1.
165 madvise(unsafe
.Pointer(head
), sys
.HugePageSize
, _MADV_NOHUGEPAGE
)
167 if tail
!= 0 && tail
!= head
{
168 madvise(unsafe
.Pointer(tail
), sys
.HugePageSize
, _MADV_NOHUGEPAGE
)
173 if uintptr(v
)&(physPageSize
-1) != 0 || n
&(physPageSize
-1) != 0 {
174 // madvise will round this to any physical page
175 // *covered* by this range, so an unaligned madvise
176 // will release more memory than intended.
177 throw("unaligned sysUnused")
180 if _MADV_DONTNEED
!= 0 {
181 madvise(v
, n
, _MADV_DONTNEED
)
182 } else if _MADV_FREE
!= 0 {
183 madvise(v
, n
, _MADV_FREE
)
187 func sysUsed(v unsafe
.Pointer
, n
uintptr) {
188 if sys
.HugePageSize
!= 0 && _MADV_HUGEPAGE
!= 0 {
189 // Partially undo the NOHUGEPAGE marks from sysUnused
190 // for whole huge pages between v and v+n. This may
191 // leave huge pages off at the end points v and v+n
192 // even though allocations may cover these entire huge
193 // pages. We could detect this and undo NOHUGEPAGE on
194 // the end points as well, but it's probably not worth
195 // the cost because when neighboring allocations are
196 // freed sysUnused will just set NOHUGEPAGE again.
197 var s
uintptr = sys
.HugePageSize
199 // Round v up to a huge page boundary.
200 beg
:= (uintptr(v
) + (s
- 1)) &^ (s
- 1)
201 // Round v+n down to a huge page boundary.
202 end
:= (uintptr(v
) + n
) &^ (s
- 1)
205 madvise(unsafe
.Pointer(beg
), end
-beg
, _MADV_HUGEPAGE
)
210 // Don't split the stack as this function may be invoked without a valid G,
211 // which prevents us from allocating more stack.
213 func sysFree(v unsafe
.Pointer
, n
uintptr, sysStat
*uint64) {
214 mSysStatDec(sysStat
, n
)
218 func sysFault(v unsafe
.Pointer
, n
uintptr) {
219 mmap(v
, n
, _PROT_NONE
, _MAP_ANON|_MAP_PRIVATE|_MAP_FIXED
, mmapFD
, 0)
222 func sysReserve(v unsafe
.Pointer
, n
uintptr, reserved
*bool) unsafe
.Pointer
{
223 // On 64-bit, people with ulimit -v set complain if we reserve too
224 // much address space. Instead, assume that the reservation is okay
225 // if we can reserve at least 64K and check the assumption in SysMap.
226 // Only user-mode Linux (UML) rejects these requests.
227 if sys
.PtrSize
== 8 && uint64(n
) > 1<<32 {
228 p
:= mmap_fixed(v
, 64<<10, _PROT_NONE
, _MAP_ANON|_MAP_PRIVATE
, mmapFD
, 0)
230 if uintptr(p
) != _MAP_FAILED
{
240 p
:= mmap(v
, n
, _PROT_NONE
, _MAP_ANON|_MAP_PRIVATE
, mmapFD
, 0)
241 if uintptr(p
) == _MAP_FAILED
{
248 func sysMap(v unsafe
.Pointer
, n
uintptr, reserved
bool, sysStat
*uint64) {
249 mSysStatInc(sysStat
, n
)
251 // On 64-bit, we don't actually have v reserved, so tread carefully.
253 flags
:= int32(_MAP_ANON | _MAP_PRIVATE
)
254 if GOOS
== "dragonfly" {
255 // TODO(jsing): For some reason DragonFly seems to return
256 // memory at a different address than we requested, even when
257 // there should be no reason for it to do so. This can be
258 // avoided by using MAP_FIXED, but I'm not sure we should need
259 // to do this - we do not on other platforms.
262 p
:= mmap_fixed(v
, n
, _PROT_READ|_PROT_WRITE
, flags
, mmapFD
, 0)
263 if uintptr(p
) == _MAP_FAILED
&& errno() == _ENOMEM
{
264 throw("runtime: out of memory")
267 print("runtime: address space conflict: map(", v
, ") = ", p
, "\n")
268 throw("runtime: address space conflict")
274 // AIX does not allow mapping a range that is already mapped.
275 // So always unmap first even if it is already unmapped.
278 p
:= mmap(v
, n
, _PROT_READ|_PROT_WRITE
, _MAP_ANON|_MAP_FIXED|_MAP_PRIVATE
, mmapFD
, 0)
279 if uintptr(p
) == _MAP_FAILED
&& errno() == _ENOMEM
{
280 throw("runtime: out of memory")
283 throw("runtime: cannot map pages in arena address space")