Toplevel entrypoints for classes/traits/interfaces
[hiphop-php.git] / hphp / util / alloc.h
blobb45630c9f7f87b327fc2254de0a8f26120bd3104
1 /*
2 +----------------------------------------------------------------------+
3 | HipHop for PHP |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
17 #ifndef incl_HPHP_UTIL_ALLOC_H_
18 #define incl_HPHP_UTIL_ALLOC_H_
20 #include <array>
21 #include <atomic>
23 #include <stdint.h>
25 #include <folly/CPortability.h>
26 #include <folly/Portability.h>
27 #include <folly/portability/PThread.h>
29 #include "hphp/util/address-range.h"
30 #include "hphp/util/alloc-defs.h"
31 #include "hphp/util/assertions.h"
32 #include "hphp/util/exception.h"
33 #include "hphp/util/jemalloc-util.h"
34 #include "hphp/util/low-ptr-def.h"
35 #include "hphp/util/read-only-arena.h"
36 #include "hphp/util/slab-manager.h"
38 enum class NotNull {};
41 * The placement-new provided by the standard library is required by the
42 * C++ specification to perform a null check because it is marked with noexcept
43 * or throw() depending on the compiler version. This override of placement
44 * new doesn't use either of these, so it is allowed to omit the null check.
46 inline void* operator new(size_t, NotNull, void* location) {
47 assert(location);
48 return location;
51 namespace HPHP {
52 ///////////////////////////////////////////////////////////////////////////////
54 struct OutOfMemoryException : Exception {
55 explicit OutOfMemoryException(size_t size)
56 : Exception("Unable to allocate %zu bytes of memory", size) {}
57 EXCEPTION_COMMON_IMPL(OutOfMemoryException);
60 ///////////////////////////////////////////////////////////////////////////////
63 #ifdef USE_JEMALLOC
65 // When jemalloc 5 and above is used, we use the extent hooks to create the
66 // following arenas, to gain detailed control over address space, huge page
67 // mapping, and data layout.
69 // - low arena, lower arena, and low cold arena try to give addresses that fit
70 // in 32 bits. Use lower arena when 31-bit address is preferred, and when we
71 // want to make full use of the huge pages there (if present). low and low
72 // cold areans prefer addresses between 2G and 4G, to conserve space in the
73 // lower range. These are just preferences, all these arenas are able to use
74 // spare space in the 1G to 4G region, when the preferred range is used up. In
75 // LOWPTR builds, running out of space in any of the low arenas will cause a
76 // crash (we hope).
78 // - high arena and high cold arena span addresses from 4G to kHighArenaMaxAddr.
79 // It is currently used for some VM metadata and APC (the table, and all
80 // uncounted data). high_cold_arena can be used for global cold data. We don't
81 // expect to run out of memory in the high arenas.
83 // - local arena only exists in some threads, mostly for data that is not
84 // accessed by other threads. In some threads, local arena is 0, and the
85 // automatic arena is used in that case.
87 // A cold arena shares an address range with its hotter counterparts, but
88 // tries to give separte address ranges. This is done by allocating from higher
89 // address downwards, while the hotter ones go from lower address upwards.
91 // Some prior experiments showed that high_arena needs tcache, due to spikiness
92 // in APC-related memory allocation and deallocation behaviors. Other arenas
93 // shouldn't need tcache.
95 // With earlier jemalloc versions, only the lower arena exists (using dss), and
96 // low arena and low cold arena alias to lower arena. Allocations in the high
97 // arenas are served using default malloc(), and no assumption about the
98 // resulting address range can be made.
100 extern unsigned low_arena;
101 extern unsigned lower_arena;
102 extern unsigned low_cold_arena;
103 extern unsigned high_arena;
104 extern unsigned high_cold_arena;
105 extern __thread unsigned local_arena;
107 extern int low_arena_flags;
108 extern int lower_arena_flags;
109 extern int low_cold_arena_flags;
110 extern int high_cold_arena_flags;
111 extern __thread int high_arena_flags;
112 extern __thread int local_arena_flags;
114 struct PageSpec {
115 unsigned n1GPages{0};
116 unsigned n2MPages{0};
119 void setup_local_arenas(PageSpec, unsigned slabs);
120 unsigned get_local_arena(uint32_t node);
121 SlabManager* get_local_slab_manager(uint32_t node);
122 void shutdown_slab_managers();
124 void setup_arena0(PageSpec);
126 #if USE_JEMALLOC_EXTENT_HOOKS
128 // Explicit per-thread tcache for high arena.
129 extern __thread int high_arena_tcache;
131 /* Set up extent hooks to use 1g pages for jemalloc metadata. */
132 void setup_jemalloc_metadata_extent_hook(bool enable, bool enable_numa_arena,
133 size_t reserved);
135 // Functions to run upon thread creation/flush/exit.
136 void arenas_thread_init();
137 void arenas_thread_flush();
138 void arenas_thread_exit();
140 #endif // USE_JEMALLOC_EXTENT_HOOKS
142 #endif // USE_JEMALLOC
145 * Get the number of bytes held by the slab managers, but are free for request
146 * use.
148 * The value is calculated using relaxed atomic adds and subs, and may become
149 * negative at moments due to the unpredictable memory ordering.
151 ssize_t get_free_slab_bytes();
153 void low_2m_pages(uint32_t pages);
154 void high_2m_pages(uint32_t pages);
156 void set_cold_file_dir(const char* dir);
157 void enable_high_cold_file();
160 * Safe memory allocation.
162 inline void* safe_malloc(size_t size) {
163 void* p = malloc(size);
164 if (!p) throw OutOfMemoryException(size);
165 return p;
168 inline void* safe_calloc(size_t count, size_t size) {
169 void* p = calloc(count, size);
170 if (!p) throw OutOfMemoryException(size);
171 return p;
174 inline void* safe_realloc(void* ptr, size_t size) {
175 ptr = realloc(ptr, size);
176 if (!ptr && size > 0) throw OutOfMemoryException(size);
177 return ptr;
180 inline void safe_free(void* ptr) {
181 return free(ptr);
184 inline void* safe_aligned_alloc(size_t align, size_t size) {
185 auto p = aligned_alloc(align, size);
186 if (!p) throw OutOfMemoryException(size);
187 return p;
191 * Instruct low level memory allocator to free memory back to system. Called
192 * when thread's been idle and predicted to continue to be idle for a while.
194 void flush_thread_caches();
197 * Get the number of bytes that could be purged via `purge_all()`.
198 * JEMalloc holds pages in three states:
199 * - active: In use by the application
200 * - dirty: Held by JEMalloc for future allocations
201 * - muzzy: madvise(FREE) but not madvised(DONTNEED), so mapping may still
202 * exist, but kernel could reclaim if necessary
203 * By default pages spend 10s in dirty state after being freed up, and then
204 * move to muzzy state for an additional 10s prior to being
205 * `madvise(DONTNEED)`. This function reports the number of bytes that are in
206 * the dirty state. These are bytes unusable by the kernel, but also unused by
207 * the application. A force purge will make JEMalloc `madvise(DONTNEED)` these
208 * pages immediately.
210 ssize_t purgeable_bytes();
213 * Instruct the kernel to free parts of the unused stack back to the system.
214 * Like flush_thread_caches, this is called when the thread has been idle
215 * and predicted to continue to be idle for a while.
217 void flush_thread_stack();
220 * Like scoped_ptr, but calls free() on destruct
222 struct ScopedMem {
223 private:
224 ScopedMem(const ScopedMem&); // disable copying
225 ScopedMem& operator=(const ScopedMem&);
226 public:
227 ScopedMem() : m_ptr(0) {}
228 explicit ScopedMem(void* ptr) : m_ptr(ptr) {}
229 ~ScopedMem() { free(m_ptr); }
230 ScopedMem& operator=(void* ptr) {
231 assert(!m_ptr);
232 m_ptr = ptr;
233 return *this;
235 private:
236 void* m_ptr;
239 // POD type for tracking arbitrary memory ranges
240 template<class T> struct MemRange {
241 T ptr;
242 size_t size; // bytes
245 using MemBlock = MemRange<void*>;
247 extern __thread uintptr_t s_stackLimit;
248 extern __thread size_t s_stackSize;
249 void init_stack_limits(pthread_attr_t* attr);
252 * The numa node this thread is bound to
254 extern __thread int32_t s_numaNode;
256 * The optional preallocated space collocated with thread stack.
258 extern __thread MemBlock s_tlSpace;
260 * The part of thread stack and s_tlSpace that lives on huge pages. It could be
261 * empty if huge page isn't used for this thread.
263 extern __thread MemBlock s_hugeRange;
266 * Set the thread affinity, and the jemalloc arena for the current
267 * thread.
268 * Also initializes s_numaNode
270 void set_numa_binding(int node);
272 * Allocate on a specific NUMA node, with alignment requirement.
274 void* mallocx_on_node(size_t size, int node, size_t align);
276 ///////////////////////////////////////////////////////////////////////////////
278 // Helpers (malloc, free, sized_free) to allocate/deallocate on a specific arena
279 // given flags. When not using event hooks, fallback version is used. `fallback`
280 // can be empty, in which case generic malloc/free will be used when not using
281 // extent hooks. These functions will crash with 0-sized alloc/deallocs.
282 #if USE_JEMALLOC_EXTENT_HOOKS
283 #define DEF_ALLOC_FUNCS(prefix, flag, fallback) \
284 inline void* prefix##_malloc(size_t size) { \
285 assert(size != 0); \
286 return mallocx(size, flag); \
288 inline void prefix##_free(void* ptr) { \
289 assert(ptr != nullptr); \
290 return dallocx(ptr, flag); \
292 inline void* prefix##_realloc(void* ptr, size_t size) { \
293 assert(size != 0); \
294 return rallocx(ptr, size, flag); \
296 inline void prefix##_sized_free(void* ptr, size_t size) { \
297 assert(ptr != nullptr); \
298 assert(sallocx(ptr, flag) == nallocx(size, flag)); \
299 return sdallocx(ptr, size, flag); \
301 #else
302 #define DEF_ALLOC_FUNCS(prefix, flag, fallback) \
303 inline void* prefix##_malloc(size_t size) { \
304 return fallback##malloc(size); \
306 inline void prefix##_free(void* ptr) { \
307 return fallback##free(ptr); \
309 inline void* prefix##_realloc(void* ptr, size_t size) { \
310 assert(size != 0); \
311 return fallback##realloc(ptr, size); \
313 inline void prefix##_sized_free(void* ptr, size_t size) { \
314 return fallback##free(ptr); \
316 #endif
318 DEF_ALLOC_FUNCS(vm, high_arena_flags, )
319 DEF_ALLOC_FUNCS(vm_cold, high_cold_arena_flags, )
321 // Allocations that are guaranteed to live below kUncountedMaxAddr when
322 // USE_JEMALLOC_EXTENT_HOOKS. This provides a new way to check for countedness
323 // for arrays and strings.
324 DEF_ALLOC_FUNCS(uncounted, high_arena_flags, )
326 // Allocations for the APC but do not necessarily live below kUncountedMaxAddr,
327 // e.g., APCObject, or the hash table. Currently they live below
328 // kUncountedMaxAddr anyway, but this may change later.
329 DEF_ALLOC_FUNCS(apc, high_arena_flags, )
331 // Thread-local allocations that are not accessed outside the thread.
332 DEF_ALLOC_FUNCS(local, local_arena_flags, )
334 // Low arena is always present when jemalloc is used, even when arena hooks are
335 // not used.
336 inline void* low_malloc(size_t size) {
337 #ifndef USE_JEMALLOC
338 return malloc(size);
339 #else
340 assert(size);
341 auto ptr = mallocx(size, low_arena_flags);
342 #ifndef USE_LOWPTR
343 if (ptr == nullptr) ptr = uncounted_malloc(size);
344 #endif
345 return ptr;
346 #endif
349 inline void low_free(void* ptr) {
350 #ifndef USE_JEMALLOC
351 free(ptr);
352 #else
353 assert(ptr);
354 dallocx(ptr, low_arena_flags);
355 #endif
358 inline void* low_realloc(void* ptr, size_t size) {
359 #ifndef USE_JEMALLOC
360 return realloc(ptr, size);
361 #else
362 assert(ptr);
363 return rallocx(ptr, size, low_arena_flags);
364 #endif
367 inline void low_sized_free(void* ptr, size_t size) {
368 #ifndef USE_JEMALLOC
369 free(ptr);
370 #else
371 assert(ptr);
372 sdallocx(ptr, size, low_arena_flags);
373 #endif
376 // lower arena and low_cold arena alias low arena when extent hooks are not
377 // used.
378 DEF_ALLOC_FUNCS(lower, lower_arena_flags, low_)
379 DEF_ALLOC_FUNCS(low_cold, low_cold_arena_flags, low_)
381 #undef DEF_ALLOC_FUNCS
383 // General purpose adaptor that wraps allocation and sized deallocation function
384 // into an allocator that works with STL-stype containers.
385 template <void* (*AF)(size_t), void (*DF)(void*, size_t), typename T>
386 struct WrapAllocator {
387 using value_type = T;
388 using reference = T&;
389 using const_reference = const T&;
390 using pointer = T*;
391 using const_pointer = const T*;
392 using size_type = std::size_t;
393 using difference_type = std::ptrdiff_t;
395 template <class U> struct rebind { using other = WrapAllocator<AF, DF, U>; };
397 WrapAllocator() noexcept {}
398 template<class U>
399 explicit WrapAllocator(const WrapAllocator<AF, DF, U>&) noexcept {}
400 ~WrapAllocator() noexcept {}
402 pointer allocate(size_t num) {
403 if (num == 0) return nullptr;
404 return (pointer)AF(num * sizeof(T));
406 void deallocate(pointer p, size_t num) {
407 if (p == nullptr) return;
408 DF((void*)p, num * sizeof(T));
410 template<class U, class... Args>
411 void construct(U* p, Args&&... args) {
412 ::new ((void*)p) U(std::forward<Args>(args)...);
414 void destroy(pointer p) {
415 p->~T();
417 template<class U> bool operator==(const WrapAllocator<AF, DF, U>&) const {
418 return true;
420 template<class U> bool operator!=(const WrapAllocator<AF, DF, U>&) const {
421 return false;
425 template<typename T> using LowAllocator =
426 WrapAllocator<low_malloc, low_sized_free, T>;
427 template<typename T> using LowerAllocator =
428 WrapAllocator<lower_malloc, lower_sized_free, T>;
429 template<typename T> using LowColdAllocator =
430 WrapAllocator<low_cold_malloc, low_cold_sized_free, T>;
431 template<typename T> using VMAllocator =
432 WrapAllocator<vm_malloc, vm_sized_free, T>;
433 template<typename T> using VMColdAllocator =
434 WrapAllocator<vm_cold_malloc, vm_cold_sized_free, T>;
435 template<typename T> using APCAllocator =
436 WrapAllocator<apc_malloc, apc_sized_free, T>;
437 template<typename T> using LocalAllocator =
438 WrapAllocator<local_malloc, local_sized_free, T>;
440 // Per-thread buffer for global data, using a bump allocator.
441 using TLStaticArena = ReadOnlyArena<LowerAllocator<char>, true, 8>;
442 extern __thread TLStaticArena* tl_static_arena;
443 extern bool s_enable_static_arena;
445 inline void* static_alloc(size_t size) {
446 if (tl_static_arena) return tl_static_arena->allocate(size);
447 return lower_malloc(size);
450 // This can only free the memory allocated using static_alloc(), immediately
451 // after allocation, and it must happen in the same thread where allocation
452 // happens.
453 inline void static_try_free(void* ptr, size_t size) {
454 if (tl_static_arena) return tl_static_arena->deallocate(ptr, size);
455 return lower_sized_free(ptr, size);
458 ///////////////////////////////////////////////////////////////////////////////
461 #endif // incl_HPHP_UTIL_ALLOC_H_