Toplevel entrypoints for classes/traits/interfaces
[hiphop-php.git] / hphp / util / alloc.cpp
blob0f7f3d46e1f18b3bbe84023b3f495ddabe9373c7
1 /*
2 +----------------------------------------------------------------------+
3 | HipHop for PHP |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
16 #include "hphp/util/alloc.h"
18 #include <atomic>
19 #include <mutex>
21 #include <errno.h>
22 #include <signal.h>
23 #include <stdlib.h>
24 #include <unistd.h>
26 #ifdef __APPLE__
27 #include <sys/sysctl.h>
28 #endif
30 #include <folly/portability/SysMman.h>
31 #include <folly/portability/SysResource.h>
33 #include "hphp/util/address-range.h"
34 #include "hphp/util/bump-mapper.h"
35 #include "hphp/util/extent-hooks.h"
36 #include "hphp/util/hugetlb.h"
37 #include "hphp/util/kernel-version.h"
38 #include "hphp/util/managed-arena.h"
39 #include "hphp/util/numa.h"
40 #include "hphp/util/slab-manager.h"
42 namespace HPHP {
43 ///////////////////////////////////////////////////////////////////////////////
45 void flush_thread_caches() {
46 #ifdef USE_JEMALLOC
47 mallctlCall<true>("thread.tcache.flush");
48 #if USE_JEMALLOC_EXTENT_HOOKS
49 arenas_thread_flush();
50 #endif
51 #endif
54 __thread int32_t s_numaNode;
56 __thread uintptr_t s_stackLimit;
57 __thread size_t s_stackSize;
58 const size_t s_pageSize = sysconf(_SC_PAGESIZE);
60 __thread MemBlock s_tlSpace;
61 __thread MemBlock s_hugeRange;
63 __thread TLStaticArena* tl_static_arena;
64 bool s_enable_static_arena = false;
66 static NEVER_INLINE uintptr_t get_stack_top() {
67 using ActRec = char;
68 DECLARE_FRAME_POINTER(fp);
69 return uintptr_t(fp) - s_pageSize;
72 void init_stack_limits(pthread_attr_t* attr) {
73 size_t stacksize, guardsize;
74 void *stackaddr;
75 struct rlimit rlim;
77 #ifndef __APPLE__
78 if (pthread_attr_getstack(attr, &stackaddr, &stacksize) != 0) {
79 always_assert(false);
81 #else
82 // We must use the following (undocumented) APIs because pthread_attr_getstack
83 // returns incorrect values on OSX.
84 pthread_t self = pthread_self();
85 stackaddr = pthread_get_stackaddr_np(self);
86 stacksize = pthread_get_stacksize_np(self);
88 // On OSX 10.9, we are lied to about the main thread's stack size. Set it to
89 // the minimum stack size, which is set earlier by execute_program_impl.
90 if (pthread_main_np() == 1) {
91 if (s_stackSize < kStackSizeMinimum) {
92 char osRelease[256];
93 size_t osReleaseSize = sizeof(osRelease);
94 if (sysctlbyname("kern.osrelease", osRelease, &osReleaseSize,
95 nullptr, 0) == 0) {
96 if (atoi(osRelease) >= 13) {
97 stacksize = kStackSizeMinimum;
103 // stackaddr is not base, but top of the stack. Yes, really.
104 stackaddr = ((char*) stackaddr) - stacksize;
105 #endif
107 // Get the guard page's size, because the stack address returned
108 // above starts at the guard page, so the thread's stack limit is
109 // stackaddr + guardsize.
110 if (pthread_attr_getguardsize(attr, &guardsize) != 0) {
111 guardsize = 0;
114 assert(stackaddr != nullptr);
115 assert(stacksize >= PTHREAD_STACK_MIN);
116 s_stackLimit = uintptr_t(stackaddr) + guardsize;
117 s_stackSize = stacksize - guardsize;
119 // The main thread's native stack may be larger than desired if
120 // set_stack_size() failed. Make sure that even if the native stack is
121 // extremely large (in which case anonymous mmap() could map some of the
122 // "stack space"), we can differentiate between the part of the native stack
123 // that could conceivably be used in practice and all anonymous mmap() memory.
124 if (getrlimit(RLIMIT_STACK, &rlim) == 0 && rlim.rlim_cur == RLIM_INFINITY &&
125 s_stackSize > kStackSizeMinimum) {
126 s_stackLimit += s_stackSize - kStackSizeMinimum;
127 s_stackSize = kStackSizeMinimum;
131 void flush_thread_stack() {
132 uintptr_t top = get_stack_top() & (s_pageSize - 1);
133 auto const hugeBase = reinterpret_cast<uintptr_t>(s_hugeRange.ptr);
134 if (top > hugeBase) top = hugeBase;
135 if (top <= s_stackLimit) return;
136 size_t len = top - s_stackLimit;
137 if (madvise((void*)s_stackLimit, len, MADV_DONTNEED) != 0 &&
138 errno != EAGAIN) {
139 fprintf(stderr, "%s failed to madvise with error %d\n", __func__, errno);
143 ssize_t purgeable_bytes() {
144 #ifdef USE_JEMALLOC
145 return s_pageSize * mallctl_all_pdirty();
146 #else
147 return 0;
148 #endif
151 #if !defined USE_JEMALLOC || !defined HAVE_NUMA
152 void set_numa_binding(int node) {}
153 void* mallocx_on_node(size_t size, int node, size_t align) {
154 void* ret = nullptr;
155 posix_memalign(&ret, align, size);
156 return ret;
158 #endif
160 #ifdef USE_JEMALLOC
161 unsigned low_arena = 0;
162 unsigned lower_arena = 0;
163 unsigned low_cold_arena = 0;
164 unsigned high_arena = 0;
165 unsigned high_cold_arena = 0;
166 __thread unsigned local_arena = 0;
168 int low_arena_flags = 0;
169 int lower_arena_flags = 0;
170 int low_cold_arena_flags = 0;
171 int high_cold_arena_flags = 0;
172 __thread int high_arena_flags = 0;
173 __thread int local_arena_flags = 0;
175 #if USE_JEMALLOC_EXTENT_HOOKS
176 // Keep track of the size of recently freed memory that might be in the high1g
177 // arena when it is disabled, so that we know when to reenable it.
178 std::atomic_uint g_highArenaRecentlyFreed;
180 alloc::BumpFileMapper* cold_file_mapper = nullptr;
182 // Customized hooks to use 1g pages for jemalloc metadata.
183 static extent_hooks_t huge_page_metadata_hooks;
184 static extent_alloc_t* orig_alloc = nullptr;
186 static bool enableArenaMetadata1GPage = false;
187 static bool enableNumaArenaMetadata1GPage = false;
188 // jemalloc metadata is allocated through the internal base allocator, which
189 // expands memory with an increasingly larger sequence. The default reserved
190 // space (216MB)is a sum of the sequence, from 2MB to 40MB.
191 static size_t a0MetadataReservedSize = 0;
192 static std::atomic<bool> jemallocMetadataCanUseHuge(false);
193 static void* a0ReservedBase = nullptr;
194 static std::atomic<size_t> a0ReservedLeft(0);
196 // Explicit per-thread tcache arenas needing it.
197 // In jemalloc/include/jemalloc/jemalloc_macros.h.in, we have
198 // #define MALLOCX_TCACHE_NONE MALLOCX_TCACHE(-1)
199 __thread int high_arena_tcache = -1;
200 __thread int local_arena_tcache = -1;
201 #endif
203 static unsigned base_arena;
205 #ifdef HAVE_NUMA
207 void set_numa_binding(int node) {
208 if (node < 0) return; // thread not created from JobQueue
209 s_numaNode = node;
210 unsigned arena = base_arena + node;
211 mallctlWrite("thread.arena", arena);
213 if (use_numa) {
214 numa_sched_setaffinity(0, node_to_cpu_mask[node]);
215 numa_set_interleave_mask(numa_no_nodes_ptr);
216 bitmask* nodes = numa_allocate_nodemask();
217 numa_bitmask_setbit(nodes, node);
218 numa_set_membind(nodes);
219 numa_bitmask_free(nodes);
223 void* mallocx_on_node(size_t size, int node, size_t align) {
224 assert((align & (align - 1)) == 0);
225 int flags = MALLOCX_ALIGN(align);
226 if (node < 0) return mallocx(size, flags);
227 int arena = base_arena + node;
228 flags |= MALLOCX_ARENA(arena) | MALLOCX_TCACHE_NONE;
229 return mallocx(size, flags);
232 #endif // HAVE_NUMA
234 #if USE_JEMALLOC_EXTENT_HOOKS
235 using namespace alloc;
236 static NEVER_INLINE
237 RangeMapper* getMapperChain(RangeState& range, unsigned n1GPages,
238 bool use2MPages, unsigned n2MPages,
239 bool useNormalPages,
240 int numaMask, short nextNode) {
241 RangeMapper* head = nullptr;
242 RangeMapper** ptail = &head;
243 if (n1GPages) {
244 RangeMapper::append(ptail,
245 new Bump1GMapper(range, n1GPages, numaMask, nextNode));
247 if (use2MPages) {
248 RangeMapper::append(ptail, new Bump2MMapper(range, n2MPages, numaMask));
250 if (useNormalPages) {
251 RangeMapper::append(ptail, new BumpNormalMapper(range, 0, numaMask));
253 assertx(head);
254 return head;
257 // Find the first 2M mapper for the range, and grant it some 2M page budget.
258 // Return the actual number of pages granted. The actual number can be different
259 // from the input, because some part of the range may have already been mapped
260 // in.
261 unsigned allocate2MPagesToRange(AddrRangeClass c, unsigned pages) {
262 auto& range = getRange(c);
263 auto mapper = range.getLowMapper();
264 if (!mapper) return 0;
265 // Search for the first 2M mapper.
266 do {
267 if (auto mapper2m = dynamic_cast<Bump2MMapper*>(mapper)) {
268 const unsigned maxPages = (range.capacity() - range.mapped()) / size2m;
269 auto const assigned = std::min(pages, maxPages);
270 mapper2m->setMaxPages(assigned);
271 return assigned;
273 mapper = mapper->next();
274 } while (mapper);
275 return 0;
278 void setup_low_arena(PageSpec s) {
279 auto const lowArenaStart = lowArenaMinAddr();
280 assert(reinterpret_cast<uintptr_t>(sbrk(0)) <= lowArenaStart);
281 always_assert_flog(lowArenaStart <= (2ull << 30),
282 "low arena min addr ({}) must be <= 2GB",
283 lowArenaStart);
284 // Initialize mappers for the VeryLow and Low address ranges.
285 auto& veryLowRange = getRange(AddrRangeClass::VeryLow);
286 auto& lowRange = getRange(AddrRangeClass::Low);
287 auto& emergencyRange = getRange(AddrRangeClass::LowEmergency);
288 auto veryLowMapper =
289 getMapperChain(veryLowRange,
290 (s.n1GPages != 0) ? 1 : 0,
291 true, s.n2MPages, // 2M
292 true, // 4K
293 numa_node_set, 0);
294 auto lowMapper =
295 getMapperChain(lowRange,
296 (s.n1GPages > 1) ? (s.n1GPages - 1) : 0,
297 true, 0, // 2M
298 true, // 4K
299 numa_node_set, 1);
300 auto emergencyMapper =
301 new BumpEmergencyMapper([]{kill(getpid(), SIGTERM);}, emergencyRange);
302 veryLowRange.setLowMapper(veryLowMapper);
303 lowRange.setLowMapper(lowMapper);
304 emergencyRange.setLowMapper(emergencyMapper);
306 auto veryLowColdMapper =
307 new BumpNormalMapper<Direction::HighToLow>(veryLowRange, 0, numa_node_set);
308 auto lowColdMapper =
309 new BumpNormalMapper<Direction::HighToLow>(lowRange, 0, numa_node_set);
310 veryLowRange.setHighMapper(veryLowColdMapper);
311 lowRange.setHighMapper(lowColdMapper);
313 auto ma = LowArena::CreateAt(&g_lowArena);
314 ma->appendMapper(lowMapper);
315 ma->appendMapper(veryLowMapper);
316 ma->appendMapper(emergencyMapper);
317 low_arena = ma->id();
318 low_arena_flags = MALLOCX_ARENA(low_arena) | MALLOCX_TCACHE_NONE;
320 ma = LowArena::CreateAt(&g_lowerArena);
321 ma->appendMapper(veryLowMapper);
322 ma->appendMapper(lowMapper);
323 ma->appendMapper(emergencyMapper);
324 lower_arena = ma->id();
325 lower_arena_flags = MALLOCX_ARENA(lower_arena) | MALLOCX_TCACHE_NONE;
327 ma = LowArena::CreateAt(&g_lowColdArena);
328 ma->appendMapper(lowColdMapper);
329 ma->appendMapper(veryLowColdMapper);
330 ma->appendMapper(emergencyMapper);
331 low_cold_arena = ma->id();
332 low_cold_arena_flags = MALLOCX_ARENA(low_cold_arena) | MALLOCX_TCACHE_NONE;
335 void setup_high_arena(PageSpec s) {
336 auto& range = getRange(AddrRangeClass::Uncounted);
337 auto mapper = getMapperChain(range, s.n1GPages,
338 true, s.n2MPages, // 2M pages can be added later
339 true, // use normal pages
340 numa_node_set,
341 num_numa_nodes() / 2 + 1);
342 range.setLowMapper(mapper);
344 auto arena = HighArena::CreateAt(&g_highArena);
345 arena->appendMapper(range.getLowMapper());
346 high_arena = arena->id();
348 auto& fileRange = getRange(AddrRangeClass::UncountedCold);
349 cold_file_mapper = new BumpFileMapper(fileRange);
350 fileRange.setLowMapper(cold_file_mapper);
351 auto coldMapper =
352 new BumpNormalMapper<Direction::HighToLow>(range, 0, numa_node_set);
353 range.setHighMapper(coldMapper);
354 auto coldArena = HighArena::CreateAt(&g_coldArena);
355 coldArena->appendMapper(cold_file_mapper);
356 coldArena->appendMapper(coldMapper);
357 high_cold_arena = coldArena->id();
358 high_cold_arena_flags = MALLOCX_ARENA(high_cold_arena) | MALLOCX_TCACHE_NONE;
361 void setup_arena0(PageSpec s) {
362 size_t size = size1g * s.n1GPages + size2m * s.n2MPages;
363 if (size == 0) return;
364 // Give arena 0 some huge pages, starting at 2TB.
365 auto ret = mmap(reinterpret_cast<void*>(kArena0Base),
366 size + size1g, PROT_NONE,
367 MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE,
368 -1, 0);
369 auto base = reinterpret_cast<uintptr_t>(ret);
370 if (auto r = base % size1g) { // align to 1G boundary
371 base = base + size1g - r;
373 assertx(base % size1g == 0);
375 auto a0 = PreMappedArena::AttachTo(low_malloc(sizeof(PreMappedArena)), 0,
376 base, base + size, Reserved{});
377 auto mapper = getMapperChain(*a0, s.n1GPages,
378 s.n2MPages, s.n2MPages,
379 false,
380 numa_node_set, 0);
381 a0->setLowMapper(mapper);
382 g_arena0 = a0;
385 // Set up extra arenas for use in non-VM threads, when we have short bursts of
386 // worker threads running, e.g., during deserialization of profile data.
387 static std::vector<std::pair<std::vector<DefaultArena*>,
388 std::atomic_uint*>> s_extra_arenas;
389 static unsigned s_extra_arena_per_node;
390 bool setup_extra_arenas(unsigned count) {
391 if (count == 0) return false;
392 // This may be called when we have many other threads running. So hold the
393 // lock while making changes.
394 static std::mutex lock;
395 std::lock_guard<std::mutex> g(lock);
396 // only the first call allocate the arenas.
397 if (!s_extra_arenas.empty()) {
398 return count <= s_extra_arenas.size() * s_extra_arenas[0].first.size();
400 // `count` needs to be a multiple of `num_numa_nodes()`, if it isn't, we round
401 // it up to make it easy to balance across nodes.
402 auto const nNodes = std::max(1u, num_numa_nodes());
403 s_extra_arena_per_node = (count + nNodes - 1) / nNodes;
404 assert(s_extra_arena_per_node >= 1);
405 s_extra_arenas.resize(nNodes);
406 for (unsigned n = 0; n < nNodes; ++n) {
407 s_extra_arenas[n].first.resize(s_extra_arena_per_node);
408 auto constexpr kArenaSize =
409 (sizeof(DefaultArena) + alignof(DefaultArena) - 1)
410 / alignof(DefaultArena) * alignof(DefaultArena);
411 auto const allocSize = kArenaSize * s_extra_arena_per_node
412 + sizeof(std::atomic_uint);
413 void* addr = mallocx_on_node(allocSize, n, alignof(DefaultArena));
414 memset(addr, 0, allocSize);
415 for (unsigned i = 0; i < s_extra_arena_per_node; ++i) {
416 s_extra_arenas[n].first[i] = DefaultArena::CreateAt(addr);
417 addr = (char*)addr + kArenaSize;
419 s_extra_arenas[n].second = static_cast<std::atomic_uint*>(addr);
421 return true;
424 DefaultArena* next_extra_arena(int node) {
425 if (s_extra_arena_per_node == 0) return nullptr;
426 if (node >= s_extra_arenas.size()) return nullptr;
427 if (node < 0) node = 0;
428 auto const n = static_cast<unsigned>(node);
429 auto counter = s_extra_arenas[n].second;
430 auto const next = counter->fetch_add(1, std::memory_order_relaxed);
431 return s_extra_arenas[n].first[next % s_extra_arena_per_node];
434 void* huge_page_extent_alloc(extent_hooks_t* extent_hooks, void* addr,
435 size_t size, size_t alignment, bool* zero,
436 bool* commit, unsigned arena_ind) {
437 // This is used for arena 0's extent_alloc. No malloc / free allowed within
438 // this function since reentrancy is not supported for a0's extent hooks.
440 // Note that, only metadata will use 2M alignment (size will be multiple of 2M
441 // as well). Aligned allocation doesn't require alignment by default, because
442 // of the way virtual memory is expanded with opt.retain (which is the
443 // default). The current extent hook API has no other way to tell if the
444 // allocation is for metadata. The next major jemalloc release will include
445 // this information in the API.
446 if (!jemallocMetadataCanUseHuge.load() || alignment != size2m) {
447 goto default_alloc;
450 assert(a0ReservedBase != nullptr && (size & (size2m - 1)) == 0);
451 if (arena_ind == 0) {
452 size_t oldValue;
453 while (size <= (oldValue = a0ReservedLeft.load())) {
454 // Try placing a0 metadata on 1G huge pages.
455 if (a0ReservedLeft.compare_exchange_weak(oldValue, oldValue - size)) {
456 assert((oldValue & (size2m - 1)) == 0);
457 return
458 reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(a0ReservedBase) +
459 (a0MetadataReservedSize - oldValue));
462 } else if (auto ma = alloc::highArena()) {
463 // For non arena 0: malloc / free allowed in this branch.
464 void* ret = ma->extent_alloc(extent_hooks, addr, size, alignment, zero,
465 commit, high_arena);
466 if (ret != nullptr) return ret;
468 default_alloc:
469 return orig_alloc(extent_hooks, addr, size, alignment, zero,
470 commit, arena_ind);
474 * Customize arena 0's extent hook to use 1g pages for metadata.
476 void setup_jemalloc_metadata_extent_hook(bool enable, bool enable_numa_arena,
477 size_t reserved) {
478 #if !JEMALLOC_METADATA_1G_PAGES
479 return;
480 #endif
481 assert(!jemallocMetadataCanUseHuge.load());
482 enableArenaMetadata1GPage = enable;
483 enableNumaArenaMetadata1GPage = enable_numa_arena;
484 a0MetadataReservedSize = reserved;
486 auto ma = alloc::highArena();
487 if (!ma) return;
488 bool retain_enabled = false;
489 mallctlRead("opt.retain", &retain_enabled);
490 if (!enableArenaMetadata1GPage || !retain_enabled) return;
492 bool zero = true, commit = true;
493 void* ret = ma->extent_alloc(nullptr, nullptr, a0MetadataReservedSize, size2m,
494 &zero, &commit, high_arena);
495 if (!ret) return;
497 a0ReservedBase = ret;
498 a0ReservedLeft.store(a0MetadataReservedSize);
500 extent_hooks_t* orig_hooks;
501 int err = mallctlRead<extent_hooks_t*, true>("arena.0.extent_hooks",
502 &orig_hooks);
503 if (err) return;
505 orig_alloc = orig_hooks->alloc;
506 huge_page_metadata_hooks = *orig_hooks;
507 huge_page_metadata_hooks.alloc = &huge_page_extent_alloc;
509 err = mallctlWrite<extent_hooks_t*, true>("arena.0.extent_hooks",
510 &huge_page_metadata_hooks);
511 if (err) return;
513 jemallocMetadataCanUseHuge.store(true);
516 void arenas_thread_init() {
517 if (high_arena_tcache == -1) {
518 mallctlRead<int, true>("tcache.create", &high_arena_tcache);
519 high_arena_flags =
520 MALLOCX_ARENA(high_arena) | MALLOCX_TCACHE(high_arena_tcache);
522 if (local_arena_tcache == -1) {
523 local_arena = get_local_arena(s_numaNode);
524 if (local_arena) {
525 mallctlRead<int, true>("tcache.create", &local_arena_tcache);
526 local_arena_flags =
527 MALLOCX_ARENA(local_arena) | MALLOCX_TCACHE(local_arena_tcache);
530 if (s_enable_static_arena) {
531 assertx(!tl_static_arena);
532 constexpr size_t kStaticArenaChunkSize = 256 * 1024;
533 static TaggedSlabList s_static_pool;
534 tl_static_arena = new TLStaticArena(kStaticArenaChunkSize, &s_static_pool);
538 void arenas_thread_flush() {
539 // It is OK if flushing fails
540 if (high_arena_tcache != -1) {
541 mallctlWrite<int, true>("tcache.flush", high_arena_tcache);
543 if (local_arena_tcache != -1) {
544 mallctlWrite<int, true>("tcache.flush", local_arena_tcache);
548 void arenas_thread_exit() {
549 if (high_arena_tcache != -1) {
550 mallctlWrite<int, true>("tcache.destroy", high_arena_tcache);
551 high_arena_tcache = -1;
552 // Ideally we shouldn't read high_arena_flags any more, but just in case.
553 high_arena_flags = MALLOCX_ARENA(high_arena) | MALLOCX_TCACHE_NONE;
555 if (local_arena_tcache != -1) {
556 mallctlWrite<int, true>("tcache.destroy", local_arena_tcache);
557 local_arena_tcache = -1;
558 // Ideally we shouldn't read local_arena_flags any more, but just in case.
559 local_arena_flags = MALLOCX_ARENA(local_arena) | MALLOCX_TCACHE_NONE;
561 if (tl_static_arena) {
562 delete tl_static_arena;
563 tl_static_arena = nullptr;
567 #endif // USE_JEMALLOC_EXTENT_HOOKS
569 std::vector<SlabManager*> s_slab_managers;
571 void setup_local_arenas(PageSpec spec, unsigned slabs) {
572 s_slab_managers.reserve(num_numa_nodes());
573 slabs /= num_numa_nodes();
575 mallctlRead<unsigned>("arenas.narenas", &base_arena); // throw upon failure
576 // The default one per node.
577 for (int i = 0; i < num_numa_nodes(); i++) {
578 unsigned arena = 0;
579 mallctlRead<unsigned>("arenas.create", &arena);
580 always_assert(arena == base_arena + i);
581 if (slabs) {
582 auto mem = low_malloc(sizeof(SlabManager));
583 s_slab_managers.push_back(new (mem) SlabManager);
584 } else {
585 s_slab_managers.push_back(nullptr);
589 #if USE_JEMALLOC_EXTENT_HOOKS
590 spec.n1GPages = std::min(spec.n1GPages, get_huge1g_info().nr_hugepages);
591 spec.n1GPages /= num_numa_nodes();
592 spec.n2MPages = std::min(spec.n2MPages, get_huge2m_info().nr_hugepages);
593 spec.n2MPages /= num_numa_nodes();
594 const size_t reserveSize =
595 spec.n1GPages * size1g + spec.n2MPages * size2m;
596 if (reserveSize == 0) return;
598 g_local_arenas.resize(num_numa_nodes(), 0);
599 for (unsigned i = 0; i < num_numa_nodes(); ++i) {
600 static_assert(kLocalArenaMinAddr % size1g == 0, "");
601 auto const desiredBase = kLocalArenaMinAddr + i * kLocalArenaSizeLimit;
602 // Try to get the desired address range, but don't use MAP_FIXED.
603 auto ret = mmap(reinterpret_cast<void*>(desiredBase),
604 reserveSize + size1g, PROT_NONE,
605 MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE,
606 -1, 0);
607 if (ret == MAP_FAILED) {
608 throw std::runtime_error{"mmap() failed to reserve address range"};
610 auto base = reinterpret_cast<uintptr_t>(ret);
611 if (base % size1g) { // adjust to start at 1GB boundary
612 auto const newBase = (base + size1g - 1) & ~(size1g - 1);
613 munmap(reinterpret_cast<void*>(base), newBase - base);
614 base = newBase;
616 assert(base % size1g == 0);
617 auto arena = PreMappedArena::CreateAt(low_malloc(sizeof(PreMappedArena)),
618 base, base + reserveSize, Reserved{});
619 auto mapper = getMapperChain(*arena,
620 spec.n1GPages,
621 (bool)spec.n2MPages,
622 spec.n2MPages,
623 false, // don't use normal pages
624 1u << i,
626 // Allocate some slabs first, which are not given to the arena, but managed
627 // separately by the slab manager.
628 auto const totalSlabSize = std::min(slabs * kSlabSize, reserveSize);
629 if (totalSlabSize) {
630 auto slabRange = mapper->alloc(totalSlabSize, kSlabAlign);
631 if (slabRange) {
632 s_slab_managers[i]->addRange<true>(slabRange, totalSlabSize);
635 if (totalSlabSize == reserveSize) continue;
636 arena->setLowMapper(mapper);
637 g_local_arenas[i] = arena;
639 #endif
642 unsigned get_local_arena(uint32_t node) {
643 #if USE_JEMALLOC_EXTENT_HOOKS
644 if (node >= g_local_arenas.size()) return 0;
645 auto const arena = g_local_arenas[node];
646 if (arena == nullptr) return 0;
647 return arena->id();
648 #else
649 return 0;
650 #endif
653 SlabManager* get_local_slab_manager(uint32_t node) {
654 if (node >= s_slab_managers.size()) return nullptr;
655 return s_slab_managers[node];
658 void shutdown_slab_managers() {
659 for (auto slab_manager : s_slab_managers) {
660 if (slab_manager) slab_manager->shutdown();
664 #endif // USE_JEMALLOC
666 ssize_t get_free_slab_bytes() {
667 ssize_t bytes = 0;
668 #ifdef USE_JEMALLOC
669 for (auto const slabManager : s_slab_managers) {
670 if (slabManager) {
671 bytes += slabManager->bytes();
674 #endif // USE_JEMALLOC
675 return bytes;
678 struct JEMallocInitializer {
679 JEMallocInitializer() {
680 // The following comes from malloc_extension.cc in google-perftools
681 #ifdef __GLIBC__
682 // GNU libc++ versions 3.3 and 3.4 obey the environment variables
683 // GLIBCPP_FORCE_NEW and GLIBCXX_FORCE_NEW respectively. Setting
684 // one of these variables forces the STL default allocator to call
685 // new() or delete() for each allocation or deletion. Otherwise
686 // the STL allocator tries to avoid the high cost of doing
687 // allocations by pooling memory internally.
688 setenv("GLIBCPP_FORCE_NEW", "1", false /* no overwrite*/);
689 setenv("GLIBCXX_FORCE_NEW", "1", false /* no overwrite*/);
691 // Now we need to make the setenv 'stick', which it may not do since
692 // the env is flakey before main() is called. But luckily stl only
693 // looks at this env var the first time it tries to do an alloc, and
694 // caches what it finds. So we just cause an stl alloc here.
695 std::string dummy("I need to be allocated");
696 dummy += "!"; // so the definition of dummy isn't optimized out
697 #endif /* __GLIBC__ */
699 // Enable backtracing through PHP frames (t9814472).
700 setenv("UNW_RBP_ALWAYS_VALID", "1", false);
702 init_numa();
703 #ifdef USE_JEMALLOC
704 #if !USE_JEMALLOC_EXTENT_HOOKS
705 // Create the legacy low arena that uses brk() instead of mmap(). When
706 // using newer versions of jemalloc, we use extent hooks to get more
707 // control. If the mallctl fails, it will always_assert in mallctlHelper.
708 if (mallctlRead<unsigned, true>("arenas.create", &low_arena)) {
709 return;
711 char buf[32];
712 snprintf(buf, sizeof(buf), "arena.%u.dss", low_arena);
713 if (mallctlWrite<const char*, true>(buf, "primary") != 0) {
714 // Error; bail out.
715 return;
717 low_arena_flags = MALLOCX_ARENA(low_arena) | MALLOCX_TCACHE_NONE;
718 lower_arena = low_arena;
719 lower_arena_flags = low_arena_flags;
720 low_cold_arena = low_arena;
721 low_cold_arena_flags = low_arena_flags;
723 // We normally maintain the invariant that the region surrounding the
724 // current brk is mapped huge, but we don't know yet whether huge pages
725 // are enabled for low memory. Round up to the start of a huge page,
726 // and set the high water mark to one below.
727 constexpr size_t kHugePageSize = size2m;
728 constexpr size_t kHugePageMask = kHugePageSize - 1;
729 unsigned leftInPage = kHugePageSize - (uintptr_t(sbrk(0)) & kHugePageMask);
730 (void) sbrk(leftInPage);
731 assert((uintptr_t(sbrk(0)) & kHugePageMask) == 0);
733 #else // USE_JEMALLOC_EXTENT_HOOKS
734 unsigned low_1g_pages = 0;
735 if (char* buffer = getenv("HHVM_LOW_1G_PAGE")) {
736 if (!sscanf(buffer, "%u", &low_1g_pages)) {
737 fprintf(stderr,
738 "Bad environment variable HHVM_LOW_1G_PAGE: %s\n", buffer);
739 abort();
742 unsigned high_1g_pages = 0;
743 if (char* buffer = getenv("HHVM_HIGH_1G_PAGE")) {
744 if (!sscanf(buffer, "%u", &high_1g_pages)) {
745 fprintf(stderr,
746 "Bad environment variable HHVM_HIGH_1G_PAGE: %s\n", buffer);
747 abort();
750 unsigned low_2m_pages = 0;
751 if (char* buffer = getenv("HHVM_LOW_2M_PAGE")) {
752 if (!sscanf(buffer, "%u", &low_2m_pages)) {
753 fprintf(stderr,
754 "Bad environment variable HHVM_LOW_2M_PAGE: %s\n", buffer);
755 abort();
758 unsigned high_2m_pages = 0;
759 if (char* buffer = getenv("HHVM_HIGH_2M_PAGE")) {
760 if (!sscanf(buffer, "%u", &high_2m_pages)) {
761 fprintf(stderr,
762 "Bad environment variable HHVM_HIGH_2M_PAGE: %s\n", buffer);
763 abort();
767 HugePageInfo info = get_huge1g_info();
768 unsigned remaining = static_cast<unsigned>(info.nr_hugepages);
769 if (remaining == 0) {
770 low_1g_pages = high_1g_pages = 0;
771 } else if (low_1g_pages > 0 || high_1g_pages > 0) {
772 KernelVersion version;
773 if (version.m_major < 3 ||
774 (version.m_major == 3 && version.m_minor < 9)) {
775 // Older kernels need an explicit hugetlbfs mount point.
776 find_hugetlbfs_path() || auto_mount_hugetlbfs();
780 // Do some allocation between low and high 1G arenas. We use at most 2 1G
781 // pages for the low 1G arena; usually 1 is good enough.
782 auto const origLow1G = low_1g_pages;
783 auto const origHigh1G = high_1g_pages;
784 if (low_1g_pages > 0) {
785 if (low_1g_pages > 2) {
786 low_1g_pages = 2;
788 if (low_1g_pages + high_1g_pages > remaining) {
789 low_1g_pages = 1;
791 assert(remaining >= low_1g_pages);
792 remaining -= low_1g_pages;
794 if (origLow1G) {
795 fprintf(stderr,
796 "using %u (specified %u) 1G huge pages for low arena\n",
797 low_1g_pages, origLow1G);
799 setup_low_arena({low_1g_pages, low_2m_pages});
801 if (high_1g_pages > remaining) {
802 high_1g_pages = remaining;
804 if (origHigh1G) {
805 fprintf(stderr,
806 "using %u (specified %u) 1G huge pages for high arena\n",
807 high_1g_pages, origHigh1G);
809 setup_high_arena({high_1g_pages, high_2m_pages});
810 // Make sure high/low arenas are available to the current thread.
811 arenas_thread_init();
812 #endif
813 // Initialize global mibs
814 init_mallctl_mibs();
815 #endif
819 #if defined(__GNUC__) && !defined(__APPLE__)
820 // Construct this object before any others.
821 // 101 is the highest priority allowed by the init_priority attribute.
822 // http://gcc.gnu.org/onlinedocs/gcc-4.0.4/gcc/C_002b_002b-Attributes.html
823 #define MAX_CONSTRUCTOR_PRIORITY __attribute__((__init_priority__(101)))
824 #else
825 // init_priority is a gcc extension, so we can't use it on other compilers.
826 // However, since constructor ordering is only known to be an issue with
827 // GNU libc++ we're probably OK on other compilers so let the situation pass
828 // silently instead of issuing a warning.
829 #define MAX_CONSTRUCTOR_PRIORITY
830 #endif
832 static JEMallocInitializer initJEMalloc MAX_CONSTRUCTOR_PRIORITY;
834 void low_2m_pages(uint32_t pages) {
835 #if USE_JEMALLOC_EXTENT_HOOKS
836 pages -= allocate2MPagesToRange(AddrRangeClass::VeryLow, pages);
837 allocate2MPagesToRange(AddrRangeClass::Low, pages);
838 #endif
841 void high_2m_pages(uint32_t pages) {
842 #if USE_JEMALLOC_EXTENT_HOOKS
843 allocate2MPagesToRange(AddrRangeClass::Uncounted, pages);
844 #endif
847 void enable_high_cold_file() {
848 #if USE_JEMALLOC_EXTENT_HOOKS
849 if (cold_file_mapper) {
850 cold_file_mapper->enable();
852 #endif
855 void set_cold_file_dir(const char* dir) {
856 #if USE_JEMALLOC_EXTENT_HOOKS
857 if (cold_file_mapper) {
858 cold_file_mapper->setDirectory(dir);
860 #endif
863 ///////////////////////////////////////////////////////////////////////////////
866 extern "C" {
867 const char* malloc_conf = "narenas:1,lg_tcache_max:16"
868 #if (JEMALLOC_VERSION_MAJOR == 5 && JEMALLOC_VERSION_MINOR >= 1) || \
869 (JEMALLOC_VERSION_MAJOR > 5) // requires jemalloc >= 5.1
870 ",metadata_thp:disabled"
871 #endif
872 #ifdef ENABLE_HHPROF
873 ",prof:true,prof_active:false,prof_thread_active_init:false"
874 #endif