2 +----------------------------------------------------------------------+
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
17 #ifndef incl_HPHP_UTIL_ALLOC_H_
18 #define incl_HPHP_UTIL_ALLOC_H_
25 #include <folly/CPortability.h>
26 #include <folly/Portability.h>
27 #include <folly/portability/PThread.h>
29 #include "hphp/util/address-range.h"
30 #include "hphp/util/alloc-defs.h"
31 #include "hphp/util/assertions.h"
32 #include "hphp/util/exception.h"
33 #include "hphp/util/jemalloc-util.h"
34 #include "hphp/util/low-ptr-def.h"
35 #include "hphp/util/read-only-arena.h"
36 #include "hphp/util/slab-manager.h"
38 enum class NotNull
{};
41 * The placement-new provided by the standard library is required by the
42 * C++ specification to perform a null check because it is marked with noexcept
43 * or throw() depending on the compiler version. This override of placement
44 * new doesn't use either of these, so it is allowed to omit the null check.
46 inline void* operator new(size_t, NotNull
, void* location
) {
52 ///////////////////////////////////////////////////////////////////////////////
54 struct OutOfMemoryException
: Exception
{
55 explicit OutOfMemoryException(size_t size
)
56 : Exception("Unable to allocate %zu bytes of memory", size
) {}
57 EXCEPTION_COMMON_IMPL(OutOfMemoryException
);
60 ///////////////////////////////////////////////////////////////////////////////
65 // When jemalloc 5 and above is used, we use the extent hooks to create the
66 // following arenas, to gain detailed control over address space, huge page
67 // mapping, and data layout.
69 // - low arena, lower arena, and low cold arena try to give addresses that fit
70 // in 32 bits. Use lower arena when 31-bit address is preferred, and when we
71 // want to make full use of the huge pages there (if present). low and low
72 // cold areans prefer addresses between 2G and 4G, to conserve space in the
73 // lower range. These are just preferences, all these arenas are able to use
74 // spare space in the 1G to 4G region, when the preferred range is used up. In
75 // LOWPTR builds, running out of space in any of the low arenas will cause a
78 // - high arena and high cold arena span addresses from 4G to kHighArenaMaxAddr.
79 // It is currently used for some VM metadata and APC (the table, and all
80 // uncounted data). high_cold_arena can be used for global cold data. We don't
81 // expect to run out of memory in the high arenas.
83 // - local arena only exists in some threads, mostly for data that is not
84 // accessed by other threads. In some threads, local arena is 0, and the
85 // automatic arena is used in that case.
87 // A cold arena shares an address range with its hotter counterparts, but
88 // tries to give separte address ranges. This is done by allocating from higher
89 // address downwards, while the hotter ones go from lower address upwards.
91 // Some prior experiments showed that high_arena needs tcache, due to spikiness
92 // in APC-related memory allocation and deallocation behaviors. Other arenas
93 // shouldn't need tcache.
95 // With earlier jemalloc versions, only the lower arena exists (using dss), and
96 // low arena and low cold arena alias to lower arena. Allocations in the high
97 // arenas are served using default malloc(), and no assumption about the
98 // resulting address range can be made.
100 extern unsigned low_arena
;
101 extern unsigned lower_arena
;
102 extern unsigned low_cold_arena
;
103 extern unsigned high_arena
;
104 extern unsigned high_cold_arena
;
105 extern __thread
unsigned local_arena
;
107 extern int low_arena_flags
;
108 extern int lower_arena_flags
;
109 extern int low_cold_arena_flags
;
110 extern int high_cold_arena_flags
;
111 extern __thread
int high_arena_flags
;
112 extern __thread
int local_arena_flags
;
115 unsigned n1GPages
{0};
116 unsigned n2MPages
{0};
119 void setup_local_arenas(PageSpec
, unsigned slabs
);
120 unsigned get_local_arena(uint32_t node
);
121 SlabManager
* get_local_slab_manager(uint32_t node
);
122 void shutdown_slab_managers();
124 void setup_arena0(PageSpec
);
126 #if USE_JEMALLOC_EXTENT_HOOKS
128 // Explicit per-thread tcache for high arena.
129 extern __thread
int high_arena_tcache
;
131 /* Set up extent hooks to use 1g pages for jemalloc metadata. */
132 void setup_jemalloc_metadata_extent_hook(bool enable
, bool enable_numa_arena
,
135 // Functions to run upon thread creation/flush/exit.
136 void arenas_thread_init();
137 void arenas_thread_flush();
138 void arenas_thread_exit();
140 #endif // USE_JEMALLOC_EXTENT_HOOKS
142 #endif // USE_JEMALLOC
145 * Get the number of bytes held by the slab managers, but are free for request
148 * The value is calculated using relaxed atomic adds and subs, and may become
149 * negative at moments due to the unpredictable memory ordering.
151 ssize_t
get_free_slab_bytes();
153 void low_2m_pages(uint32_t pages
);
154 void high_2m_pages(uint32_t pages
);
156 void set_cold_file_dir(const char* dir
);
157 void enable_high_cold_file();
160 * Safe memory allocation.
162 inline void* safe_malloc(size_t size
) {
163 void* p
= malloc(size
);
164 if (!p
) throw OutOfMemoryException(size
);
168 inline void* safe_calloc(size_t count
, size_t size
) {
169 void* p
= calloc(count
, size
);
170 if (!p
) throw OutOfMemoryException(size
);
174 inline void* safe_realloc(void* ptr
, size_t size
) {
175 ptr
= realloc(ptr
, size
);
176 if (!ptr
&& size
> 0) throw OutOfMemoryException(size
);
180 inline void safe_free(void* ptr
) {
184 inline void* safe_aligned_alloc(size_t align
, size_t size
) {
185 auto p
= aligned_alloc(align
, size
);
186 if (!p
) throw OutOfMemoryException(size
);
191 * Instruct low level memory allocator to free memory back to system. Called
192 * when thread's been idle and predicted to continue to be idle for a while.
194 void flush_thread_caches();
197 * Get the number of bytes that could be purged via `purge_all()`.
198 * JEMalloc holds pages in three states:
199 * - active: In use by the application
200 * - dirty: Held by JEMalloc for future allocations
201 * - muzzy: madvise(FREE) but not madvised(DONTNEED), so mapping may still
202 * exist, but kernel could reclaim if necessary
203 * By default pages spend 10s in dirty state after being freed up, and then
204 * move to muzzy state for an additional 10s prior to being
205 * `madvise(DONTNEED)`. This function reports the number of bytes that are in
206 * the dirty state. These are bytes unusable by the kernel, but also unused by
207 * the application. A force purge will make JEMalloc `madvise(DONTNEED)` these
210 ssize_t
purgeable_bytes();
213 * Instruct the kernel to free parts of the unused stack back to the system.
214 * Like flush_thread_caches, this is called when the thread has been idle
215 * and predicted to continue to be idle for a while.
217 void flush_thread_stack();
220 * Like scoped_ptr, but calls free() on destruct
224 ScopedMem(const ScopedMem
&); // disable copying
225 ScopedMem
& operator=(const ScopedMem
&);
227 ScopedMem() : m_ptr(0) {}
228 explicit ScopedMem(void* ptr
) : m_ptr(ptr
) {}
229 ~ScopedMem() { free(m_ptr
); }
230 ScopedMem
& operator=(void* ptr
) {
239 // POD type for tracking arbitrary memory ranges
240 template<class T
> struct MemRange
{
242 size_t size
; // bytes
245 using MemBlock
= MemRange
<void*>;
247 extern __thread
uintptr_t s_stackLimit
;
248 extern __thread
size_t s_stackSize
;
249 void init_stack_limits(pthread_attr_t
* attr
);
252 * The numa node this thread is bound to
254 extern __thread
int32_t s_numaNode
;
256 * The optional preallocated space collocated with thread stack.
258 extern __thread MemBlock s_tlSpace
;
260 * The part of thread stack and s_tlSpace that lives on huge pages. It could be
261 * empty if huge page isn't used for this thread.
263 extern __thread MemBlock s_hugeRange
;
266 * Set the thread affinity, and the jemalloc arena for the current
268 * Also initializes s_numaNode
270 void set_numa_binding(int node
);
272 * Allocate on a specific NUMA node, with alignment requirement.
274 void* mallocx_on_node(size_t size
, int node
, size_t align
);
276 ///////////////////////////////////////////////////////////////////////////////
278 // Helpers (malloc, free, sized_free) to allocate/deallocate on a specific arena
279 // given flags. When not using event hooks, fallback version is used. `fallback`
280 // can be empty, in which case generic malloc/free will be used when not using
281 // extent hooks. These functions will crash with 0-sized alloc/deallocs.
282 #if USE_JEMALLOC_EXTENT_HOOKS
283 #define DEF_ALLOC_FUNCS(prefix, flag, fallback) \
284 inline void* prefix##_malloc(size_t size) { \
286 return mallocx(size, flag); \
288 inline void prefix##_free(void* ptr) { \
289 assert(ptr != nullptr); \
290 return dallocx(ptr, flag); \
292 inline void* prefix##_realloc(void* ptr, size_t size) { \
294 return rallocx(ptr, size, flag); \
296 inline void prefix##_sized_free(void* ptr, size_t size) { \
297 assert(ptr != nullptr); \
298 assert(sallocx(ptr, flag) == nallocx(size, flag)); \
299 return sdallocx(ptr, size, flag); \
302 #define DEF_ALLOC_FUNCS(prefix, flag, fallback) \
303 inline void* prefix##_malloc(size_t size) { \
304 return fallback##malloc(size); \
306 inline void prefix##_free(void* ptr) { \
307 return fallback##free(ptr); \
309 inline void* prefix##_realloc(void* ptr, size_t size) { \
311 return fallback##realloc(ptr, size); \
313 inline void prefix##_sized_free(void* ptr, size_t size) { \
314 return fallback##free(ptr); \
318 DEF_ALLOC_FUNCS(vm
, high_arena_flags
, )
319 DEF_ALLOC_FUNCS(vm_cold
, high_cold_arena_flags
, )
321 // Allocations that are guaranteed to live below kUncountedMaxAddr when
322 // USE_JEMALLOC_EXTENT_HOOKS. This provides a new way to check for countedness
323 // for arrays and strings.
324 DEF_ALLOC_FUNCS(uncounted
, high_arena_flags
, )
326 // Allocations for the APC but do not necessarily live below kUncountedMaxAddr,
327 // e.g., APCObject, or the hash table. Currently they live below
328 // kUncountedMaxAddr anyway, but this may change later.
329 DEF_ALLOC_FUNCS(apc
, high_arena_flags
, )
331 // Thread-local allocations that are not accessed outside the thread.
332 DEF_ALLOC_FUNCS(local
, local_arena_flags
, )
334 // Low arena is always present when jemalloc is used, even when arena hooks are
336 inline void* low_malloc(size_t size
) {
341 auto ptr
= mallocx(size
, low_arena_flags
);
343 if (ptr
== nullptr) ptr
= uncounted_malloc(size
);
349 inline void low_free(void* ptr
) {
354 dallocx(ptr
, low_arena_flags
);
358 inline void* low_realloc(void* ptr
, size_t size
) {
360 return realloc(ptr
, size
);
363 return rallocx(ptr
, size
, low_arena_flags
);
367 inline void low_sized_free(void* ptr
, size_t size
) {
372 sdallocx(ptr
, size
, low_arena_flags
);
376 // lower arena and low_cold arena alias low arena when extent hooks are not
378 DEF_ALLOC_FUNCS(lower
, lower_arena_flags
, low_
)
379 DEF_ALLOC_FUNCS(low_cold
, low_cold_arena_flags
, low_
)
381 #undef DEF_ALLOC_FUNCS
383 // General purpose adaptor that wraps allocation and sized deallocation function
384 // into an allocator that works with STL-stype containers.
385 template <void* (*AF
)(size_t), void (*DF
)(void*, size_t), typename T
>
386 struct WrapAllocator
{
387 using value_type
= T
;
388 using reference
= T
&;
389 using const_reference
= const T
&;
391 using const_pointer
= const T
*;
392 using size_type
= std::size_t;
393 using difference_type
= std::ptrdiff_t;
395 template <class U
> struct rebind
{ using other
= WrapAllocator
<AF
, DF
, U
>; };
397 WrapAllocator() noexcept
{}
399 explicit WrapAllocator(const WrapAllocator
<AF
, DF
, U
>&) noexcept
{}
400 ~WrapAllocator() noexcept
{}
402 pointer
allocate(size_t num
) {
403 if (num
== 0) return nullptr;
404 return (pointer
)AF(num
* sizeof(T
));
406 void deallocate(pointer p
, size_t num
) {
407 if (p
== nullptr) return;
408 DF((void*)p
, num
* sizeof(T
));
410 template<class U
, class... Args
>
411 void construct(U
* p
, Args
&&... args
) {
412 ::new ((void*)p
) U(std::forward
<Args
>(args
)...);
414 void destroy(pointer p
) {
417 template<class U
> bool operator==(const WrapAllocator
<AF
, DF
, U
>&) const {
420 template<class U
> bool operator!=(const WrapAllocator
<AF
, DF
, U
>&) const {
425 template<typename T
> using LowAllocator
=
426 WrapAllocator
<low_malloc
, low_sized_free
, T
>;
427 template<typename T
> using LowerAllocator
=
428 WrapAllocator
<lower_malloc
, lower_sized_free
, T
>;
429 template<typename T
> using LowColdAllocator
=
430 WrapAllocator
<low_cold_malloc
, low_cold_sized_free
, T
>;
431 template<typename T
> using VMAllocator
=
432 WrapAllocator
<vm_malloc
, vm_sized_free
, T
>;
433 template<typename T
> using VMColdAllocator
=
434 WrapAllocator
<vm_cold_malloc
, vm_cold_sized_free
, T
>;
435 template<typename T
> using APCAllocator
=
436 WrapAllocator
<apc_malloc
, apc_sized_free
, T
>;
437 template<typename T
> using LocalAllocator
=
438 WrapAllocator
<local_malloc
, local_sized_free
, T
>;
440 // Per-thread buffer for global data, using a bump allocator.
441 using TLStaticArena
= ReadOnlyArena
<LowerAllocator
<char>, true, 8>;
442 extern __thread TLStaticArena
* tl_static_arena
;
443 extern bool s_enable_static_arena
;
445 inline void* static_alloc(size_t size
) {
446 if (tl_static_arena
) return tl_static_arena
->allocate(size
);
447 return lower_malloc(size
);
450 // This can only free the memory allocated using static_alloc(), immediately
451 // after allocation, and it must happen in the same thread where allocation
453 inline void static_try_free(void* ptr
, size_t size
) {
454 if (tl_static_arena
) return tl_static_arena
->deallocate(ptr
, size
);
455 return lower_sized_free(ptr
, size
);
458 ///////////////////////////////////////////////////////////////////////////////
461 #endif // incl_HPHP_UTIL_ALLOC_H_