1 // Copyright 2016 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 // The gccgo version of mem_*.go.
10 "runtime/internal/sys"
14 // Functions called by C code.
15 //go:linkname sysAlloc runtime.sysAlloc
16 //go:linkname sysFree runtime.sysFree
19 func sysMmap(addr unsafe
.Pointer
, n
uintptr, prot
, flags
, fd
int32, off
uintptr) unsafe
.Pointer
22 func munmap(addr unsafe
.Pointer
, length
uintptr) int32
25 func mincore(addr unsafe
.Pointer
, n
uintptr, dst
*byte) int32
28 func madvise(addr unsafe
.Pointer
, n
uintptr, flags
int32) int32
30 var mmapFD
= int32(-1)
32 var devZero
= []byte("/dev/zero\x00")
36 mmapFD
= open(&devZero
[0], 0 /* O_RDONLY */, 0)
38 println("open /dev/zero: errno=", errno())
44 func mmap(addr unsafe
.Pointer
, n
uintptr, prot
, flags
, fd
int32, off
uintptr) (unsafe
.Pointer
, int) {
45 p
:= sysMmap(addr
, n
, prot
, flags
, fd
, off
)
46 if uintptr(p
) == _MAP_FAILED
{
52 // NOTE: vec must be just 1 byte long here.
53 // Mincore returns ENOMEM if any of the pages are unmapped,
54 // but we want to know that all of the pages are unmapped.
55 // To make these the same, we can only ask about one page
56 // at a time. See golang.org/issue/7476.
57 var addrspace_vec
[1]byte
59 func addrspace_free(v unsafe
.Pointer
, n
uintptr) bool {
60 for off
:= uintptr(0); off
< n
; off
+= physPageSize
{
61 // Use a length of 1 byte, which the kernel will round
62 // up to one physical page regardless of the true
63 // physical page size.
65 if mincore(unsafe
.Pointer(uintptr(v
)+off
), 1, &addrspace_vec
[0]) < 0 {
68 if errval
== _ENOSYS
{
69 // mincore is not available on this system.
70 // Assume the address is available.
73 if errval
== _EINVAL
{
74 // Address is not a multiple of the physical
75 // page size. Shouldn't happen, but just ignore it.
78 // ENOMEM means unmapped, which is what we want.
79 // Anything else we assume means the pages are mapped.
80 if errval
!= _ENOMEM
{
87 func mmap_fixed(v unsafe
.Pointer
, n
uintptr, prot
, flags
, fd
int32, offset
uintptr) (unsafe
.Pointer
, int) {
88 p
, err
:= mmap(v
, n
, prot
, flags
, fd
, offset
)
89 // On some systems, mmap ignores v without
90 // MAP_FIXED, so retry if the address space is free.
91 if p
!= v
&& addrspace_free(v
, n
) {
95 p
, err
= mmap(v
, n
, prot
, flags|_MAP_FIXED
, fd
, offset
)
100 // Don't split the stack as this method may be invoked without a valid G, which
101 // prevents us from allocating more stack.
103 func sysAlloc(n
uintptr, sysStat
*uint64) unsafe
.Pointer
{
104 p
, err
:= mmap(nil, n
, _PROT_READ|_PROT_WRITE
, _MAP_ANON|_MAP_PRIVATE
, mmapFD
, 0)
107 print("runtime: mmap: access denied\n")
111 print("runtime: mmap: too much locked memory (check 'ulimit -l').\n")
116 mSysStatInc(sysStat
, n
)
120 func sysUnused(v unsafe
.Pointer
, n
uintptr) {
121 // By default, Linux's "transparent huge page" support will
122 // merge pages into a huge page if there's even a single
123 // present regular page, undoing the effects of the DONTNEED
124 // below. On amd64, that means khugepaged can turn a single
125 // 4KB page to 2MB, bloating the process's RSS by as much as
126 // 512X. (See issue #8832 and Linux kernel bug
127 // https://bugzilla.kernel.org/show_bug.cgi?id=93111)
129 // To work around this, we explicitly disable transparent huge
130 // pages when we release pages of the heap. However, we have
131 // to do this carefully because changing this flag tends to
132 // split the VMA (memory mapping) containing v in to three
133 // VMAs in order to track the different values of the
134 // MADV_NOHUGEPAGE flag in the different regions. There's a
135 // default limit of 65530 VMAs per address space (sysctl
136 // vm.max_map_count), so we must be careful not to create too
137 // many VMAs (see issue #12233).
139 // Since huge pages are huge, there's little use in adjusting
140 // the MADV_NOHUGEPAGE flag on a fine granularity, so we avoid
141 // exploding the number of VMAs by only adjusting the
142 // MADV_NOHUGEPAGE flag on a large granularity. This still
143 // gets most of the benefit of huge pages while keeping the
144 // number of VMAs under control. With hugePageSize = 2MB, even
145 // a pessimal heap can reach 128GB before running out of VMAs.
146 if sys
.HugePageSize
!= 0 && _MADV_NOHUGEPAGE
!= 0 {
147 var s
uintptr = sys
.HugePageSize
// division by constant 0 is a compile-time error :(
149 // If it's a large allocation, we want to leave huge
150 // pages enabled. Hence, we only adjust the huge page
151 // flag on the huge pages containing v and v+n-1, and
152 // only if those aren't aligned.
153 var head
, tail
uintptr
154 if uintptr(v
)%s
!= 0 {
155 // Compute huge page containing v.
156 head
= uintptr(v
) &^ (s
- 1)
158 if (uintptr(v
)+n
)%s
!= 0 {
159 // Compute huge page containing v+n-1.
160 tail
= (uintptr(v
) + n
- 1) &^ (s
- 1)
163 // Note that madvise will return EINVAL if the flag is
164 // already set, which is quite likely. We ignore
166 if head
!= 0 && head
+sys
.HugePageSize
== tail
{
167 // head and tail are different but adjacent,
168 // so do this in one call.
169 madvise(unsafe
.Pointer(head
), 2*sys
.HugePageSize
, _MADV_NOHUGEPAGE
)
171 // Advise the huge pages containing v and v+n-1.
173 madvise(unsafe
.Pointer(head
), sys
.HugePageSize
, _MADV_NOHUGEPAGE
)
175 if tail
!= 0 && tail
!= head
{
176 madvise(unsafe
.Pointer(tail
), sys
.HugePageSize
, _MADV_NOHUGEPAGE
)
181 if uintptr(v
)&(physPageSize
-1) != 0 || n
&(physPageSize
-1) != 0 {
182 // madvise will round this to any physical page
183 // *covered* by this range, so an unaligned madvise
184 // will release more memory than intended.
185 throw("unaligned sysUnused")
188 if _MADV_DONTNEED
!= 0 {
189 madvise(v
, n
, _MADV_DONTNEED
)
190 } else if _MADV_FREE
!= 0 {
191 madvise(v
, n
, _MADV_FREE
)
195 func sysUsed(v unsafe
.Pointer
, n
uintptr) {
196 if sys
.HugePageSize
!= 0 && _MADV_HUGEPAGE
!= 0 {
197 // Partially undo the NOHUGEPAGE marks from sysUnused
198 // for whole huge pages between v and v+n. This may
199 // leave huge pages off at the end points v and v+n
200 // even though allocations may cover these entire huge
201 // pages. We could detect this and undo NOHUGEPAGE on
202 // the end points as well, but it's probably not worth
203 // the cost because when neighboring allocations are
204 // freed sysUnused will just set NOHUGEPAGE again.
205 var s
uintptr = sys
.HugePageSize
207 // Round v up to a huge page boundary.
208 beg
:= (uintptr(v
) + (s
- 1)) &^ (s
- 1)
209 // Round v+n down to a huge page boundary.
210 end
:= (uintptr(v
) + n
) &^ (s
- 1)
213 madvise(unsafe
.Pointer(beg
), end
-beg
, _MADV_HUGEPAGE
)
218 // Don't split the stack as this function may be invoked without a valid G,
219 // which prevents us from allocating more stack.
221 func sysFree(v unsafe
.Pointer
, n
uintptr, sysStat
*uint64) {
222 mSysStatDec(sysStat
, n
)
226 func sysFault(v unsafe
.Pointer
, n
uintptr) {
227 mmap(v
, n
, _PROT_NONE
, _MAP_ANON|_MAP_PRIVATE|_MAP_FIXED
, mmapFD
, 0)
230 func sysReserve(v unsafe
.Pointer
, n
uintptr, reserved
*bool) unsafe
.Pointer
{
231 // On 64-bit, people with ulimit -v set complain if we reserve too
232 // much address space. Instead, assume that the reservation is okay
233 // if we can reserve at least 64K and check the assumption in SysMap.
234 // Only user-mode Linux (UML) rejects these requests.
235 if sys
.PtrSize
== 8 && uint64(n
) > 1<<32 {
236 p
, err
:= mmap_fixed(v
, 64<<10, _PROT_NONE
, _MAP_ANON|_MAP_PRIVATE
, mmapFD
, 0)
237 if p
!= v || err
!= 0 {
248 p
, err
:= mmap(v
, n
, _PROT_NONE
, _MAP_ANON|_MAP_PRIVATE
, mmapFD
, 0)
256 func sysMap(v unsafe
.Pointer
, n
uintptr, reserved
bool, sysStat
*uint64) {
257 mSysStatInc(sysStat
, n
)
259 // On 64-bit, we don't actually have v reserved, so tread carefully.
261 flags
:= int32(_MAP_ANON | _MAP_PRIVATE
)
262 if GOOS
== "dragonfly" {
263 // TODO(jsing): For some reason DragonFly seems to return
264 // memory at a different address than we requested, even when
265 // there should be no reason for it to do so. This can be
266 // avoided by using MAP_FIXED, but I'm not sure we should need
267 // to do this - we do not on other platforms.
270 p
, err
:= mmap_fixed(v
, n
, _PROT_READ|_PROT_WRITE
, flags
, mmapFD
, 0)
272 throw("runtime: out of memory")
274 if p
!= v || err
!= 0 {
275 print("runtime: address space conflict: map(", v
, ") = ", p
, " (err ", err
, ")\n")
276 throw("runtime: address space conflict")
282 // AIX does not allow mapping a range that is already mapped.
283 // So always unmap first even if it is already unmapped.
286 p
, err
:= mmap(v
, n
, _PROT_READ|_PROT_WRITE
, _MAP_ANON|_MAP_FIXED|_MAP_PRIVATE
, mmapFD
, 0)
288 throw("runtime: out of memory")
290 if p
!= v || err
!= 0 {
291 throw("runtime: cannot map pages in arena address space")