2 +----------------------------------------------------------------------+
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
16 #include "hphp/util/alloc.h"
27 #include <sys/sysctl.h>
30 #include <folly/portability/SysMman.h>
31 #include <folly/portability/SysResource.h>
33 #include "hphp/util/address-range.h"
34 #include "hphp/util/bump-mapper.h"
35 #include "hphp/util/extent-hooks.h"
36 #include "hphp/util/hugetlb.h"
37 #include "hphp/util/kernel-version.h"
38 #include "hphp/util/managed-arena.h"
39 #include "hphp/util/numa.h"
40 #include "hphp/util/slab-manager.h"
43 ///////////////////////////////////////////////////////////////////////////////
45 void flush_thread_caches() {
47 mallctlCall
<true>("thread.tcache.flush");
48 #if USE_JEMALLOC_EXTENT_HOOKS
49 arenas_thread_flush();
54 __thread
int32_t s_numaNode
;
56 __thread
uintptr_t s_stackLimit
;
57 __thread
size_t s_stackSize
;
58 const size_t s_pageSize
= sysconf(_SC_PAGESIZE
);
60 __thread MemBlock s_tlSpace
;
61 __thread MemBlock s_hugeRange
;
63 __thread TLStaticArena
* tl_static_arena
;
64 bool s_enable_static_arena
= false;
66 static NEVER_INLINE
uintptr_t get_stack_top() {
68 DECLARE_FRAME_POINTER(fp
);
69 return uintptr_t(fp
) - s_pageSize
;
72 void init_stack_limits(pthread_attr_t
* attr
) {
73 size_t stacksize
, guardsize
;
78 if (pthread_attr_getstack(attr
, &stackaddr
, &stacksize
) != 0) {
82 // We must use the following (undocumented) APIs because pthread_attr_getstack
83 // returns incorrect values on OSX.
84 pthread_t self
= pthread_self();
85 stackaddr
= pthread_get_stackaddr_np(self
);
86 stacksize
= pthread_get_stacksize_np(self
);
88 // On OSX 10.9, we are lied to about the main thread's stack size. Set it to
89 // the minimum stack size, which is set earlier by execute_program_impl.
90 if (pthread_main_np() == 1) {
91 if (s_stackSize
< kStackSizeMinimum
) {
93 size_t osReleaseSize
= sizeof(osRelease
);
94 if (sysctlbyname("kern.osrelease", osRelease
, &osReleaseSize
,
96 if (atoi(osRelease
) >= 13) {
97 stacksize
= kStackSizeMinimum
;
103 // stackaddr is not base, but top of the stack. Yes, really.
104 stackaddr
= ((char*) stackaddr
) - stacksize
;
107 // Get the guard page's size, because the stack address returned
108 // above starts at the guard page, so the thread's stack limit is
109 // stackaddr + guardsize.
110 if (pthread_attr_getguardsize(attr
, &guardsize
) != 0) {
114 assert(stackaddr
!= nullptr);
115 assert(stacksize
>= PTHREAD_STACK_MIN
);
116 s_stackLimit
= uintptr_t(stackaddr
) + guardsize
;
117 s_stackSize
= stacksize
- guardsize
;
119 // The main thread's native stack may be larger than desired if
120 // set_stack_size() failed. Make sure that even if the native stack is
121 // extremely large (in which case anonymous mmap() could map some of the
122 // "stack space"), we can differentiate between the part of the native stack
123 // that could conceivably be used in practice and all anonymous mmap() memory.
124 if (getrlimit(RLIMIT_STACK
, &rlim
) == 0 && rlim
.rlim_cur
== RLIM_INFINITY
&&
125 s_stackSize
> kStackSizeMinimum
) {
126 s_stackLimit
+= s_stackSize
- kStackSizeMinimum
;
127 s_stackSize
= kStackSizeMinimum
;
131 void flush_thread_stack() {
132 uintptr_t top
= get_stack_top() & (s_pageSize
- 1);
133 auto const hugeBase
= reinterpret_cast<uintptr_t>(s_hugeRange
.ptr
);
134 if (top
> hugeBase
) top
= hugeBase
;
135 if (top
<= s_stackLimit
) return;
136 size_t len
= top
- s_stackLimit
;
137 if (madvise((void*)s_stackLimit
, len
, MADV_DONTNEED
) != 0 &&
139 fprintf(stderr
, "%s failed to madvise with error %d\n", __func__
, errno
);
143 ssize_t
purgeable_bytes() {
145 return s_pageSize
* mallctl_all_pdirty();
151 #if !defined USE_JEMALLOC || !defined HAVE_NUMA
152 void set_numa_binding(int node
) {}
153 void* mallocx_on_node(size_t size
, int node
, size_t align
) {
155 posix_memalign(&ret
, align
, size
);
161 unsigned low_arena
= 0;
162 unsigned lower_arena
= 0;
163 unsigned low_cold_arena
= 0;
164 unsigned high_arena
= 0;
165 unsigned high_cold_arena
= 0;
166 __thread
unsigned local_arena
= 0;
168 int low_arena_flags
= 0;
169 int lower_arena_flags
= 0;
170 int low_cold_arena_flags
= 0;
171 int high_cold_arena_flags
= 0;
172 __thread
int high_arena_flags
= 0;
173 __thread
int local_arena_flags
= 0;
175 #if USE_JEMALLOC_EXTENT_HOOKS
176 // Keep track of the size of recently freed memory that might be in the high1g
177 // arena when it is disabled, so that we know when to reenable it.
178 std::atomic_uint g_highArenaRecentlyFreed
;
180 alloc::BumpFileMapper
* cold_file_mapper
= nullptr;
182 // Customized hooks to use 1g pages for jemalloc metadata.
183 static extent_hooks_t huge_page_metadata_hooks
;
184 static extent_alloc_t
* orig_alloc
= nullptr;
186 static bool enableArenaMetadata1GPage
= false;
187 static bool enableNumaArenaMetadata1GPage
= false;
188 // jemalloc metadata is allocated through the internal base allocator, which
189 // expands memory with an increasingly larger sequence. The default reserved
190 // space (216MB)is a sum of the sequence, from 2MB to 40MB.
191 static size_t a0MetadataReservedSize
= 0;
192 static std::atomic
<bool> jemallocMetadataCanUseHuge(false);
193 static void* a0ReservedBase
= nullptr;
194 static std::atomic
<size_t> a0ReservedLeft(0);
196 // Explicit per-thread tcache arenas needing it.
197 // In jemalloc/include/jemalloc/jemalloc_macros.h.in, we have
198 // #define MALLOCX_TCACHE_NONE MALLOCX_TCACHE(-1)
199 __thread
int high_arena_tcache
= -1;
200 __thread
int local_arena_tcache
= -1;
203 static unsigned base_arena
;
207 void set_numa_binding(int node
) {
208 if (node
< 0) return; // thread not created from JobQueue
210 unsigned arena
= base_arena
+ node
;
211 mallctlWrite("thread.arena", arena
);
214 numa_sched_setaffinity(0, node_to_cpu_mask
[node
]);
215 numa_set_interleave_mask(numa_no_nodes_ptr
);
216 bitmask
* nodes
= numa_allocate_nodemask();
217 numa_bitmask_setbit(nodes
, node
);
218 numa_set_membind(nodes
);
219 numa_bitmask_free(nodes
);
223 void* mallocx_on_node(size_t size
, int node
, size_t align
) {
224 assert((align
& (align
- 1)) == 0);
225 int flags
= MALLOCX_ALIGN(align
);
226 if (node
< 0) return mallocx(size
, flags
);
227 int arena
= base_arena
+ node
;
228 flags
|= MALLOCX_ARENA(arena
) | MALLOCX_TCACHE_NONE
;
229 return mallocx(size
, flags
);
234 #if USE_JEMALLOC_EXTENT_HOOKS
235 using namespace alloc
;
237 RangeMapper
* getMapperChain(RangeState
& range
, unsigned n1GPages
,
238 bool use2MPages
, unsigned n2MPages
,
240 int numaMask
, short nextNode
) {
241 RangeMapper
* head
= nullptr;
242 RangeMapper
** ptail
= &head
;
244 RangeMapper::append(ptail
,
245 new Bump1GMapper(range
, n1GPages
, numaMask
, nextNode
));
248 RangeMapper::append(ptail
, new Bump2MMapper(range
, n2MPages
, numaMask
));
250 if (useNormalPages
) {
251 RangeMapper::append(ptail
, new BumpNormalMapper(range
, 0, numaMask
));
257 // Find the first 2M mapper for the range, and grant it some 2M page budget.
258 // Return the actual number of pages granted. The actual number can be different
259 // from the input, because some part of the range may have already been mapped
261 unsigned allocate2MPagesToRange(AddrRangeClass c
, unsigned pages
) {
262 auto& range
= getRange(c
);
263 auto mapper
= range
.getLowMapper();
264 if (!mapper
) return 0;
265 // Search for the first 2M mapper.
267 if (auto mapper2m
= dynamic_cast<Bump2MMapper
*>(mapper
)) {
268 const unsigned maxPages
= (range
.capacity() - range
.mapped()) / size2m
;
269 auto const assigned
= std::min(pages
, maxPages
);
270 mapper2m
->setMaxPages(assigned
);
273 mapper
= mapper
->next();
278 void setup_low_arena(PageSpec s
) {
279 auto const lowArenaStart
= lowArenaMinAddr();
280 assert(reinterpret_cast<uintptr_t>(sbrk(0)) <= lowArenaStart
);
281 always_assert_flog(lowArenaStart
<= (2ull << 30),
282 "low arena min addr ({}) must be <= 2GB",
284 // Initialize mappers for the VeryLow and Low address ranges.
285 auto& veryLowRange
= getRange(AddrRangeClass::VeryLow
);
286 auto& lowRange
= getRange(AddrRangeClass::Low
);
287 auto& emergencyRange
= getRange(AddrRangeClass::LowEmergency
);
289 getMapperChain(veryLowRange
,
290 (s
.n1GPages
!= 0) ? 1 : 0,
291 true, s
.n2MPages
, // 2M
295 getMapperChain(lowRange
,
296 (s
.n1GPages
> 1) ? (s
.n1GPages
- 1) : 0,
300 auto emergencyMapper
=
301 new BumpEmergencyMapper([]{kill(getpid(), SIGTERM
);}, emergencyRange
);
302 veryLowRange
.setLowMapper(veryLowMapper
);
303 lowRange
.setLowMapper(lowMapper
);
304 emergencyRange
.setLowMapper(emergencyMapper
);
306 auto veryLowColdMapper
=
307 new BumpNormalMapper
<Direction::HighToLow
>(veryLowRange
, 0, numa_node_set
);
309 new BumpNormalMapper
<Direction::HighToLow
>(lowRange
, 0, numa_node_set
);
310 veryLowRange
.setHighMapper(veryLowColdMapper
);
311 lowRange
.setHighMapper(lowColdMapper
);
313 auto ma
= LowArena::CreateAt(&g_lowArena
);
314 ma
->appendMapper(lowMapper
);
315 ma
->appendMapper(veryLowMapper
);
316 ma
->appendMapper(emergencyMapper
);
317 low_arena
= ma
->id();
318 low_arena_flags
= MALLOCX_ARENA(low_arena
) | MALLOCX_TCACHE_NONE
;
320 ma
= LowArena::CreateAt(&g_lowerArena
);
321 ma
->appendMapper(veryLowMapper
);
322 ma
->appendMapper(lowMapper
);
323 ma
->appendMapper(emergencyMapper
);
324 lower_arena
= ma
->id();
325 lower_arena_flags
= MALLOCX_ARENA(lower_arena
) | MALLOCX_TCACHE_NONE
;
327 ma
= LowArena::CreateAt(&g_lowColdArena
);
328 ma
->appendMapper(lowColdMapper
);
329 ma
->appendMapper(veryLowColdMapper
);
330 ma
->appendMapper(emergencyMapper
);
331 low_cold_arena
= ma
->id();
332 low_cold_arena_flags
= MALLOCX_ARENA(low_cold_arena
) | MALLOCX_TCACHE_NONE
;
335 void setup_high_arena(PageSpec s
) {
336 auto& range
= getRange(AddrRangeClass::Uncounted
);
337 auto mapper
= getMapperChain(range
, s
.n1GPages
,
338 true, s
.n2MPages
, // 2M pages can be added later
339 true, // use normal pages
341 num_numa_nodes() / 2 + 1);
342 range
.setLowMapper(mapper
);
344 auto arena
= HighArena::CreateAt(&g_highArena
);
345 arena
->appendMapper(range
.getLowMapper());
346 high_arena
= arena
->id();
348 auto& fileRange
= getRange(AddrRangeClass::UncountedCold
);
349 cold_file_mapper
= new BumpFileMapper(fileRange
);
350 fileRange
.setLowMapper(cold_file_mapper
);
352 new BumpNormalMapper
<Direction::HighToLow
>(range
, 0, numa_node_set
);
353 range
.setHighMapper(coldMapper
);
354 auto coldArena
= HighArena::CreateAt(&g_coldArena
);
355 coldArena
->appendMapper(cold_file_mapper
);
356 coldArena
->appendMapper(coldMapper
);
357 high_cold_arena
= coldArena
->id();
358 high_cold_arena_flags
= MALLOCX_ARENA(high_cold_arena
) | MALLOCX_TCACHE_NONE
;
361 void setup_arena0(PageSpec s
) {
362 size_t size
= size1g
* s
.n1GPages
+ size2m
* s
.n2MPages
;
363 if (size
== 0) return;
364 // Give arena 0 some huge pages, starting at 2TB.
365 auto ret
= mmap(reinterpret_cast<void*>(kArena0Base
),
366 size
+ size1g
, PROT_NONE
,
367 MAP_ANONYMOUS
| MAP_PRIVATE
| MAP_NORESERVE
,
369 auto base
= reinterpret_cast<uintptr_t>(ret
);
370 if (auto r
= base
% size1g
) { // align to 1G boundary
371 base
= base
+ size1g
- r
;
373 assertx(base
% size1g
== 0);
375 auto a0
= PreMappedArena::AttachTo(low_malloc(sizeof(PreMappedArena
)), 0,
376 base
, base
+ size
, Reserved
{});
377 auto mapper
= getMapperChain(*a0
, s
.n1GPages
,
378 s
.n2MPages
, s
.n2MPages
,
381 a0
->setLowMapper(mapper
);
385 // Set up extra arenas for use in non-VM threads, when we have short bursts of
386 // worker threads running, e.g., during deserialization of profile data.
387 static std::vector
<std::pair
<std::vector
<DefaultArena
*>,
388 std::atomic_uint
*>> s_extra_arenas
;
389 static unsigned s_extra_arena_per_node
;
390 bool setup_extra_arenas(unsigned count
) {
391 if (count
== 0) return false;
392 // This may be called when we have many other threads running. So hold the
393 // lock while making changes.
394 static std::mutex lock
;
395 std::lock_guard
<std::mutex
> g(lock
);
396 // only the first call allocate the arenas.
397 if (!s_extra_arenas
.empty()) {
398 return count
<= s_extra_arenas
.size() * s_extra_arenas
[0].first
.size();
400 // `count` needs to be a multiple of `num_numa_nodes()`, if it isn't, we round
401 // it up to make it easy to balance across nodes.
402 auto const nNodes
= std::max(1u, num_numa_nodes());
403 s_extra_arena_per_node
= (count
+ nNodes
- 1) / nNodes
;
404 assert(s_extra_arena_per_node
>= 1);
405 s_extra_arenas
.resize(nNodes
);
406 for (unsigned n
= 0; n
< nNodes
; ++n
) {
407 s_extra_arenas
[n
].first
.resize(s_extra_arena_per_node
);
408 auto constexpr kArenaSize
=
409 (sizeof(DefaultArena
) + alignof(DefaultArena
) - 1)
410 / alignof(DefaultArena
) * alignof(DefaultArena
);
411 auto const allocSize
= kArenaSize
* s_extra_arena_per_node
412 + sizeof(std::atomic_uint
);
413 void* addr
= mallocx_on_node(allocSize
, n
, alignof(DefaultArena
));
414 memset(addr
, 0, allocSize
);
415 for (unsigned i
= 0; i
< s_extra_arena_per_node
; ++i
) {
416 s_extra_arenas
[n
].first
[i
] = DefaultArena::CreateAt(addr
);
417 addr
= (char*)addr
+ kArenaSize
;
419 s_extra_arenas
[n
].second
= static_cast<std::atomic_uint
*>(addr
);
424 DefaultArena
* next_extra_arena(int node
) {
425 if (s_extra_arena_per_node
== 0) return nullptr;
426 if (node
>= s_extra_arenas
.size()) return nullptr;
427 if (node
< 0) node
= 0;
428 auto const n
= static_cast<unsigned>(node
);
429 auto counter
= s_extra_arenas
[n
].second
;
430 auto const next
= counter
->fetch_add(1, std::memory_order_relaxed
);
431 return s_extra_arenas
[n
].first
[next
% s_extra_arena_per_node
];
434 void* huge_page_extent_alloc(extent_hooks_t
* extent_hooks
, void* addr
,
435 size_t size
, size_t alignment
, bool* zero
,
436 bool* commit
, unsigned arena_ind
) {
437 // This is used for arena 0's extent_alloc. No malloc / free allowed within
438 // this function since reentrancy is not supported for a0's extent hooks.
440 // Note that, only metadata will use 2M alignment (size will be multiple of 2M
441 // as well). Aligned allocation doesn't require alignment by default, because
442 // of the way virtual memory is expanded with opt.retain (which is the
443 // default). The current extent hook API has no other way to tell if the
444 // allocation is for metadata. The next major jemalloc release will include
445 // this information in the API.
446 if (!jemallocMetadataCanUseHuge
.load() || alignment
!= size2m
) {
450 assert(a0ReservedBase
!= nullptr && (size
& (size2m
- 1)) == 0);
451 if (arena_ind
== 0) {
453 while (size
<= (oldValue
= a0ReservedLeft
.load())) {
454 // Try placing a0 metadata on 1G huge pages.
455 if (a0ReservedLeft
.compare_exchange_weak(oldValue
, oldValue
- size
)) {
456 assert((oldValue
& (size2m
- 1)) == 0);
458 reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(a0ReservedBase
) +
459 (a0MetadataReservedSize
- oldValue
));
462 } else if (auto ma
= alloc::highArena()) {
463 // For non arena 0: malloc / free allowed in this branch.
464 void* ret
= ma
->extent_alloc(extent_hooks
, addr
, size
, alignment
, zero
,
466 if (ret
!= nullptr) return ret
;
469 return orig_alloc(extent_hooks
, addr
, size
, alignment
, zero
,
474 * Customize arena 0's extent hook to use 1g pages for metadata.
476 void setup_jemalloc_metadata_extent_hook(bool enable
, bool enable_numa_arena
,
478 #if !JEMALLOC_METADATA_1G_PAGES
481 assert(!jemallocMetadataCanUseHuge
.load());
482 enableArenaMetadata1GPage
= enable
;
483 enableNumaArenaMetadata1GPage
= enable_numa_arena
;
484 a0MetadataReservedSize
= reserved
;
486 auto ma
= alloc::highArena();
488 bool retain_enabled
= false;
489 mallctlRead("opt.retain", &retain_enabled
);
490 if (!enableArenaMetadata1GPage
|| !retain_enabled
) return;
492 bool zero
= true, commit
= true;
493 void* ret
= ma
->extent_alloc(nullptr, nullptr, a0MetadataReservedSize
, size2m
,
494 &zero
, &commit
, high_arena
);
497 a0ReservedBase
= ret
;
498 a0ReservedLeft
.store(a0MetadataReservedSize
);
500 extent_hooks_t
* orig_hooks
;
501 int err
= mallctlRead
<extent_hooks_t
*, true>("arena.0.extent_hooks",
505 orig_alloc
= orig_hooks
->alloc
;
506 huge_page_metadata_hooks
= *orig_hooks
;
507 huge_page_metadata_hooks
.alloc
= &huge_page_extent_alloc
;
509 err
= mallctlWrite
<extent_hooks_t
*, true>("arena.0.extent_hooks",
510 &huge_page_metadata_hooks
);
513 jemallocMetadataCanUseHuge
.store(true);
516 void arenas_thread_init() {
517 if (high_arena_tcache
== -1) {
518 mallctlRead
<int, true>("tcache.create", &high_arena_tcache
);
520 MALLOCX_ARENA(high_arena
) | MALLOCX_TCACHE(high_arena_tcache
);
522 if (local_arena_tcache
== -1) {
523 local_arena
= get_local_arena(s_numaNode
);
525 mallctlRead
<int, true>("tcache.create", &local_arena_tcache
);
527 MALLOCX_ARENA(local_arena
) | MALLOCX_TCACHE(local_arena_tcache
);
530 if (s_enable_static_arena
) {
531 assertx(!tl_static_arena
);
532 constexpr size_t kStaticArenaChunkSize
= 256 * 1024;
533 static TaggedSlabList s_static_pool
;
534 tl_static_arena
= new TLStaticArena(kStaticArenaChunkSize
, &s_static_pool
);
538 void arenas_thread_flush() {
539 // It is OK if flushing fails
540 if (high_arena_tcache
!= -1) {
541 mallctlWrite
<int, true>("tcache.flush", high_arena_tcache
);
543 if (local_arena_tcache
!= -1) {
544 mallctlWrite
<int, true>("tcache.flush", local_arena_tcache
);
548 void arenas_thread_exit() {
549 if (high_arena_tcache
!= -1) {
550 mallctlWrite
<int, true>("tcache.destroy", high_arena_tcache
);
551 high_arena_tcache
= -1;
552 // Ideally we shouldn't read high_arena_flags any more, but just in case.
553 high_arena_flags
= MALLOCX_ARENA(high_arena
) | MALLOCX_TCACHE_NONE
;
555 if (local_arena_tcache
!= -1) {
556 mallctlWrite
<int, true>("tcache.destroy", local_arena_tcache
);
557 local_arena_tcache
= -1;
558 // Ideally we shouldn't read local_arena_flags any more, but just in case.
559 local_arena_flags
= MALLOCX_ARENA(local_arena
) | MALLOCX_TCACHE_NONE
;
561 if (tl_static_arena
) {
562 delete tl_static_arena
;
563 tl_static_arena
= nullptr;
567 #endif // USE_JEMALLOC_EXTENT_HOOKS
569 std::vector
<SlabManager
*> s_slab_managers
;
571 void setup_local_arenas(PageSpec spec
, unsigned slabs
) {
572 s_slab_managers
.reserve(num_numa_nodes());
573 slabs
/= num_numa_nodes();
575 mallctlRead
<unsigned>("arenas.narenas", &base_arena
); // throw upon failure
576 // The default one per node.
577 for (int i
= 0; i
< num_numa_nodes(); i
++) {
579 mallctlRead
<unsigned>("arenas.create", &arena
);
580 always_assert(arena
== base_arena
+ i
);
582 auto mem
= low_malloc(sizeof(SlabManager
));
583 s_slab_managers
.push_back(new (mem
) SlabManager
);
585 s_slab_managers
.push_back(nullptr);
589 #if USE_JEMALLOC_EXTENT_HOOKS
590 spec
.n1GPages
= std::min(spec
.n1GPages
, get_huge1g_info().nr_hugepages
);
591 spec
.n1GPages
/= num_numa_nodes();
592 spec
.n2MPages
= std::min(spec
.n2MPages
, get_huge2m_info().nr_hugepages
);
593 spec
.n2MPages
/= num_numa_nodes();
594 const size_t reserveSize
=
595 spec
.n1GPages
* size1g
+ spec
.n2MPages
* size2m
;
596 if (reserveSize
== 0) return;
598 g_local_arenas
.resize(num_numa_nodes(), 0);
599 for (unsigned i
= 0; i
< num_numa_nodes(); ++i
) {
600 static_assert(kLocalArenaMinAddr
% size1g
== 0, "");
601 auto const desiredBase
= kLocalArenaMinAddr
+ i
* kLocalArenaSizeLimit
;
602 // Try to get the desired address range, but don't use MAP_FIXED.
603 auto ret
= mmap(reinterpret_cast<void*>(desiredBase
),
604 reserveSize
+ size1g
, PROT_NONE
,
605 MAP_ANONYMOUS
| MAP_PRIVATE
| MAP_NORESERVE
,
607 if (ret
== MAP_FAILED
) {
608 throw std::runtime_error
{"mmap() failed to reserve address range"};
610 auto base
= reinterpret_cast<uintptr_t>(ret
);
611 if (base
% size1g
) { // adjust to start at 1GB boundary
612 auto const newBase
= (base
+ size1g
- 1) & ~(size1g
- 1);
613 munmap(reinterpret_cast<void*>(base
), newBase
- base
);
616 assert(base
% size1g
== 0);
617 auto arena
= PreMappedArena::CreateAt(low_malloc(sizeof(PreMappedArena
)),
618 base
, base
+ reserveSize
, Reserved
{});
619 auto mapper
= getMapperChain(*arena
,
623 false, // don't use normal pages
626 // Allocate some slabs first, which are not given to the arena, but managed
627 // separately by the slab manager.
628 auto const totalSlabSize
= std::min(slabs
* kSlabSize
, reserveSize
);
630 auto slabRange
= mapper
->alloc(totalSlabSize
, kSlabAlign
);
632 s_slab_managers
[i
]->addRange
<true>(slabRange
, totalSlabSize
);
635 if (totalSlabSize
== reserveSize
) continue;
636 arena
->setLowMapper(mapper
);
637 g_local_arenas
[i
] = arena
;
642 unsigned get_local_arena(uint32_t node
) {
643 #if USE_JEMALLOC_EXTENT_HOOKS
644 if (node
>= g_local_arenas
.size()) return 0;
645 auto const arena
= g_local_arenas
[node
];
646 if (arena
== nullptr) return 0;
653 SlabManager
* get_local_slab_manager(uint32_t node
) {
654 if (node
>= s_slab_managers
.size()) return nullptr;
655 return s_slab_managers
[node
];
658 void shutdown_slab_managers() {
659 for (auto slab_manager
: s_slab_managers
) {
660 if (slab_manager
) slab_manager
->shutdown();
664 #endif // USE_JEMALLOC
666 ssize_t
get_free_slab_bytes() {
669 for (auto const slabManager
: s_slab_managers
) {
671 bytes
+= slabManager
->bytes();
674 #endif // USE_JEMALLOC
678 struct JEMallocInitializer
{
679 JEMallocInitializer() {
680 // The following comes from malloc_extension.cc in google-perftools
682 // GNU libc++ versions 3.3 and 3.4 obey the environment variables
683 // GLIBCPP_FORCE_NEW and GLIBCXX_FORCE_NEW respectively. Setting
684 // one of these variables forces the STL default allocator to call
685 // new() or delete() for each allocation or deletion. Otherwise
686 // the STL allocator tries to avoid the high cost of doing
687 // allocations by pooling memory internally.
688 setenv("GLIBCPP_FORCE_NEW", "1", false /* no overwrite*/);
689 setenv("GLIBCXX_FORCE_NEW", "1", false /* no overwrite*/);
691 // Now we need to make the setenv 'stick', which it may not do since
692 // the env is flakey before main() is called. But luckily stl only
693 // looks at this env var the first time it tries to do an alloc, and
694 // caches what it finds. So we just cause an stl alloc here.
695 std::string
dummy("I need to be allocated");
696 dummy
+= "!"; // so the definition of dummy isn't optimized out
697 #endif /* __GLIBC__ */
699 // Enable backtracing through PHP frames (t9814472).
700 setenv("UNW_RBP_ALWAYS_VALID", "1", false);
704 #if !USE_JEMALLOC_EXTENT_HOOKS
705 // Create the legacy low arena that uses brk() instead of mmap(). When
706 // using newer versions of jemalloc, we use extent hooks to get more
707 // control. If the mallctl fails, it will always_assert in mallctlHelper.
708 if (mallctlRead
<unsigned, true>("arenas.create", &low_arena
)) {
712 snprintf(buf
, sizeof(buf
), "arena.%u.dss", low_arena
);
713 if (mallctlWrite
<const char*, true>(buf
, "primary") != 0) {
717 low_arena_flags
= MALLOCX_ARENA(low_arena
) | MALLOCX_TCACHE_NONE
;
718 lower_arena
= low_arena
;
719 lower_arena_flags
= low_arena_flags
;
720 low_cold_arena
= low_arena
;
721 low_cold_arena_flags
= low_arena_flags
;
723 // We normally maintain the invariant that the region surrounding the
724 // current brk is mapped huge, but we don't know yet whether huge pages
725 // are enabled for low memory. Round up to the start of a huge page,
726 // and set the high water mark to one below.
727 constexpr size_t kHugePageSize
= size2m
;
728 constexpr size_t kHugePageMask
= kHugePageSize
- 1;
729 unsigned leftInPage
= kHugePageSize
- (uintptr_t(sbrk(0)) & kHugePageMask
);
730 (void) sbrk(leftInPage
);
731 assert((uintptr_t(sbrk(0)) & kHugePageMask
) == 0);
733 #else // USE_JEMALLOC_EXTENT_HOOKS
734 unsigned low_1g_pages
= 0;
735 if (char* buffer
= getenv("HHVM_LOW_1G_PAGE")) {
736 if (!sscanf(buffer
, "%u", &low_1g_pages
)) {
738 "Bad environment variable HHVM_LOW_1G_PAGE: %s\n", buffer
);
742 unsigned high_1g_pages
= 0;
743 if (char* buffer
= getenv("HHVM_HIGH_1G_PAGE")) {
744 if (!sscanf(buffer
, "%u", &high_1g_pages
)) {
746 "Bad environment variable HHVM_HIGH_1G_PAGE: %s\n", buffer
);
750 unsigned low_2m_pages
= 0;
751 if (char* buffer
= getenv("HHVM_LOW_2M_PAGE")) {
752 if (!sscanf(buffer
, "%u", &low_2m_pages
)) {
754 "Bad environment variable HHVM_LOW_2M_PAGE: %s\n", buffer
);
758 unsigned high_2m_pages
= 0;
759 if (char* buffer
= getenv("HHVM_HIGH_2M_PAGE")) {
760 if (!sscanf(buffer
, "%u", &high_2m_pages
)) {
762 "Bad environment variable HHVM_HIGH_2M_PAGE: %s\n", buffer
);
767 HugePageInfo info
= get_huge1g_info();
768 unsigned remaining
= static_cast<unsigned>(info
.nr_hugepages
);
769 if (remaining
== 0) {
770 low_1g_pages
= high_1g_pages
= 0;
771 } else if (low_1g_pages
> 0 || high_1g_pages
> 0) {
772 KernelVersion version
;
773 if (version
.m_major
< 3 ||
774 (version
.m_major
== 3 && version
.m_minor
< 9)) {
775 // Older kernels need an explicit hugetlbfs mount point.
776 find_hugetlbfs_path() || auto_mount_hugetlbfs();
780 // Do some allocation between low and high 1G arenas. We use at most 2 1G
781 // pages for the low 1G arena; usually 1 is good enough.
782 auto const origLow1G
= low_1g_pages
;
783 auto const origHigh1G
= high_1g_pages
;
784 if (low_1g_pages
> 0) {
785 if (low_1g_pages
> 2) {
788 if (low_1g_pages
+ high_1g_pages
> remaining
) {
791 assert(remaining
>= low_1g_pages
);
792 remaining
-= low_1g_pages
;
796 "using %u (specified %u) 1G huge pages for low arena\n",
797 low_1g_pages
, origLow1G
);
799 setup_low_arena({low_1g_pages
, low_2m_pages
});
801 if (high_1g_pages
> remaining
) {
802 high_1g_pages
= remaining
;
806 "using %u (specified %u) 1G huge pages for high arena\n",
807 high_1g_pages
, origHigh1G
);
809 setup_high_arena({high_1g_pages
, high_2m_pages
});
810 // Make sure high/low arenas are available to the current thread.
811 arenas_thread_init();
813 // Initialize global mibs
819 #if defined(__GNUC__) && !defined(__APPLE__)
820 // Construct this object before any others.
821 // 101 is the highest priority allowed by the init_priority attribute.
822 // http://gcc.gnu.org/onlinedocs/gcc-4.0.4/gcc/C_002b_002b-Attributes.html
823 #define MAX_CONSTRUCTOR_PRIORITY __attribute__((__init_priority__(101)))
825 // init_priority is a gcc extension, so we can't use it on other compilers.
826 // However, since constructor ordering is only known to be an issue with
827 // GNU libc++ we're probably OK on other compilers so let the situation pass
828 // silently instead of issuing a warning.
829 #define MAX_CONSTRUCTOR_PRIORITY
832 static JEMallocInitializer initJEMalloc MAX_CONSTRUCTOR_PRIORITY
;
834 void low_2m_pages(uint32_t pages
) {
835 #if USE_JEMALLOC_EXTENT_HOOKS
836 pages
-= allocate2MPagesToRange(AddrRangeClass::VeryLow
, pages
);
837 allocate2MPagesToRange(AddrRangeClass::Low
, pages
);
841 void high_2m_pages(uint32_t pages
) {
842 #if USE_JEMALLOC_EXTENT_HOOKS
843 allocate2MPagesToRange(AddrRangeClass::Uncounted
, pages
);
847 void enable_high_cold_file() {
848 #if USE_JEMALLOC_EXTENT_HOOKS
849 if (cold_file_mapper
) {
850 cold_file_mapper
->enable();
855 void set_cold_file_dir(const char* dir
) {
856 #if USE_JEMALLOC_EXTENT_HOOKS
857 if (cold_file_mapper
) {
858 cold_file_mapper
->setDirectory(dir
);
863 ///////////////////////////////////////////////////////////////////////////////
867 const char* malloc_conf
= "narenas:1,lg_tcache_max:16"
868 #if (JEMALLOC_VERSION_MAJOR == 5 && JEMALLOC_VERSION_MINOR >= 1) || \
869 (JEMALLOC_VERSION_MAJOR > 5) // requires jemalloc >= 5.1
870 ",metadata_thp:disabled"
873 ",prof:true,prof_active:false,prof_thread_active_init:false"