Track free bytes held in slab alocators
[hiphop-php.git] / hphp / util / alloc.h
blob638d947a2ee7a6e5d694ac32a3aa7d04b4b4a8ef
1 /*
2 +----------------------------------------------------------------------+
3 | HipHop for PHP |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
17 #ifndef incl_HPHP_UTIL_ALLOC_H_
18 #define incl_HPHP_UTIL_ALLOC_H_
20 #include <array>
21 #include <atomic>
23 #include <stdint.h>
25 #include <folly/CPortability.h>
26 #include <folly/Portability.h>
27 #include <folly/portability/PThread.h>
29 #include "hphp/util/address-range.h"
30 #include "hphp/util/alloc-defs.h"
31 #include "hphp/util/assertions.h"
32 #include "hphp/util/exception.h"
33 #include "hphp/util/jemalloc-util.h"
34 #include "hphp/util/low-ptr-def.h"
35 #include "hphp/util/slab-manager.h"
37 enum class NotNull {};
40 * The placement-new provided by the standard library is required by the
41 * C++ specification to perform a null check because it is marked with noexcept
42 * or throw() depending on the compiler version. This override of placement
43 * new doesn't use either of these, so it is allowed to omit the null check.
45 inline void* operator new(size_t, NotNull, void* location) {
46 assert(location);
47 return location;
50 namespace HPHP {
51 ///////////////////////////////////////////////////////////////////////////////
53 struct OutOfMemoryException : Exception {
54 explicit OutOfMemoryException(size_t size)
55 : Exception("Unable to allocate %zu bytes of memory", size) {}
56 EXCEPTION_COMMON_IMPL(OutOfMemoryException);
59 ///////////////////////////////////////////////////////////////////////////////
62 #ifdef USE_JEMALLOC
64 // When jemalloc 5 and above is used, we use the extent hooks to create the
65 // following arenas, to gain detailed control over address space, huge page
66 // mapping, and data layout.
68 // - low arena, lower arena, and low cold arena try to give addresses that fit
69 // in 32 bits. Use lower arena when 31-bit address is preferred, and when we
70 // want to make full use of the huge pages there (if present). low and low
71 // cold areans prefer addresses between 2G and 4G, to conserve space in the
72 // lower range. These are just preferences, all these arenas are able to use
73 // spare space in the 1G to 4G region, when the preferred range is used up. In
74 // LOWPTR builds, running out of space in any of the low arenas will cause a
75 // crash (we hope).
77 // - high arena and high cold arena span addresses from 4G to kHighArenaMaxAddr.
78 // It is currently used for some VM metadata and APC (the table, and all
79 // uncounted data). high_cold_arena can be used for global cold data. We don't
80 // expect to run out of memory in the high arenas.
82 // - local arena only exists in some threads, mostly for data that is not
83 // accessed by other threads. In some threads, local arena is 0, and the
84 // automatic arena is used in that case.
86 // A cold arena shares an address range with its hotter counterparts, but
87 // tries to give separte address ranges. This is done by allocating from higher
88 // address downwards, while the hotter ones go from lower address upwards.
90 // Some prior experiments showed that high_arena needs tcache, due to spikiness
91 // in APC-related memory allocation and deallocation behaviors. Other arenas
92 // shouldn't need tcache.
94 // With earlier jemalloc versions, only the lower arena exists (using dss), and
95 // low arena and low cold arena alias to lower arena. Allocations in the high
96 // arenas are served using default malloc(), and no assumption about the
97 // resulting address range can be made.
99 extern unsigned low_arena;
100 extern unsigned lower_arena;
101 extern unsigned low_cold_arena;
102 extern unsigned high_arena;
103 extern unsigned high_cold_arena;
104 extern __thread unsigned local_arena;
106 extern int low_arena_flags;
107 extern int lower_arena_flags;
108 extern int low_cold_arena_flags;
109 extern int high_cold_arena_flags;
110 extern __thread int high_arena_flags;
111 extern __thread int local_arena_flags;
113 struct PageSpec {
114 unsigned n1GPages{0};
115 unsigned n2MPages{0};
118 void setup_local_arenas(PageSpec, unsigned slabs);
119 unsigned get_local_arena(uint32_t node);
120 SlabManager* get_local_slab_manager(uint32_t node);
122 void setup_arena0(PageSpec);
124 #if USE_JEMALLOC_EXTENT_HOOKS
126 // Explicit per-thread tcache for high arena.
127 extern __thread int high_arena_tcache;
129 /* Set up extent hooks to use 1g pages for jemalloc metadata. */
130 void setup_jemalloc_metadata_extent_hook(bool enable, bool enable_numa_arena,
131 size_t reserved);
133 // Functions to run upon thread creation/flush/exit.
134 void arenas_thread_init();
135 void arenas_thread_flush();
136 void arenas_thread_exit();
138 #endif // USE_JEMALLOC_EXTENT_HOOKS
140 #endif // USE_JEMALLOC
143 * Get the number of bytes held by the slab managers, but are free for request
144 * use.
146 * The value is calculated using relaxed atomic adds and subs, and may become
147 * negative at moments due to the unpredictable memory ordering.
149 ssize_t get_free_slab_bytes();
151 void low_2m_pages(uint32_t pages);
152 void high_2m_pages(uint32_t pages);
155 * Safe memory allocation.
157 inline void* safe_malloc(size_t size) {
158 void* p = malloc(size);
159 if (!p) throw OutOfMemoryException(size);
160 return p;
163 inline void* safe_calloc(size_t count, size_t size) {
164 void* p = calloc(count, size);
165 if (!p) throw OutOfMemoryException(size);
166 return p;
169 inline void* safe_realloc(void* ptr, size_t size) {
170 ptr = realloc(ptr, size);
171 if (!ptr && size > 0) throw OutOfMemoryException(size);
172 return ptr;
175 inline void safe_free(void* ptr) {
176 return free(ptr);
179 inline void* safe_aligned_alloc(size_t align, size_t size) {
180 auto p = aligned_alloc(align, size);
181 if (!p) throw OutOfMemoryException(size);
182 return p;
186 * Instruct low level memory allocator to free memory back to system. Called
187 * when thread's been idle and predicted to continue to be idle for a while.
189 void flush_thread_caches();
192 * Instruct the kernel to free parts of the unused stack back to the system.
193 * Like flush_thread_caches, this is called when the thread has been idle
194 * and predicted to continue to be idle for a while.
196 void flush_thread_stack();
199 * Like scoped_ptr, but calls free() on destruct
201 struct ScopedMem {
202 private:
203 ScopedMem(const ScopedMem&); // disable copying
204 ScopedMem& operator=(const ScopedMem&);
205 public:
206 ScopedMem() : m_ptr(0) {}
207 explicit ScopedMem(void* ptr) : m_ptr(ptr) {}
208 ~ScopedMem() { free(m_ptr); }
209 ScopedMem& operator=(void* ptr) {
210 assert(!m_ptr);
211 m_ptr = ptr;
212 return *this;
214 private:
215 void* m_ptr;
218 // POD type for tracking arbitrary memory ranges
219 template<class T> struct MemRange {
220 T ptr;
221 size_t size; // bytes
224 using MemBlock = MemRange<void*>;
226 extern __thread uintptr_t s_stackLimit;
227 extern __thread size_t s_stackSize;
228 void init_stack_limits(pthread_attr_t* attr);
231 * The numa node this thread is bound to
233 extern __thread int32_t s_numaNode;
235 * The optional preallocated space collocated with thread stack.
237 extern __thread MemBlock s_tlSpace;
239 * The part of thread stack and s_tlSpace that lives on huge pages. It could be
240 * empty if huge page isn't used for this thread.
242 extern __thread MemBlock s_hugeRange;
245 * Set the thread affinity, and the jemalloc arena for the current
246 * thread.
247 * Also initializes s_numaNode
249 void set_numa_binding(int node);
251 * Allocate on a specific NUMA node, with alignment requirement.
253 void* mallocx_on_node(size_t size, int node, size_t align);
255 ///////////////////////////////////////////////////////////////////////////////
257 // Helpers (malloc, free, sized_free) to allocate/deallocate on a specific arena
258 // given flags. When not using event hooks, fallback version is used. `fallback`
259 // can be empty, in which case generic malloc/free will be used when not using
260 // extent hooks. These functions will crash with 0-sized alloc/deallocs.
261 #if USE_JEMALLOC_EXTENT_HOOKS
262 #define DEF_ALLOC_FUNCS(prefix, flag, fallback) \
263 inline void* prefix##_malloc(size_t size) { \
264 assert(size != 0); \
265 return mallocx(size, flag); \
267 inline void prefix##_free(void* ptr) { \
268 assert(ptr != nullptr); \
269 return dallocx(ptr, flag); \
271 inline void* prefix##_realloc(void* ptr, size_t size) { \
272 assert(size != 0); \
273 return rallocx(ptr, size, flag); \
275 inline void prefix##_sized_free(void* ptr, size_t size) { \
276 assert(ptr != nullptr); \
277 assert(sallocx(ptr, flag) == nallocx(size, flag)); \
278 return sdallocx(ptr, size, flag); \
280 #else
281 #define DEF_ALLOC_FUNCS(prefix, flag, fallback) \
282 inline void* prefix##_malloc(size_t size) { \
283 return fallback##malloc(size); \
285 inline void prefix##_free(void* ptr) { \
286 return fallback##free(ptr); \
288 inline void* prefix##_realloc(void* ptr, size_t size) { \
289 assert(size != 0); \
290 return fallback##realloc(ptr, size); \
292 inline void prefix##_sized_free(void* ptr, size_t size) { \
293 return fallback##free(ptr); \
295 #endif
297 DEF_ALLOC_FUNCS(vm, high_arena_flags, )
298 DEF_ALLOC_FUNCS(vm_cold, high_cold_arena_flags, )
300 // Allocations that are guaranteed to live below kUncountedMaxAddr when
301 // USE_JEMALLOC_EXTENT_HOOKS. This provides a new way to check for countedness
302 // for arrays and strings.
303 DEF_ALLOC_FUNCS(uncounted, high_arena_flags, )
305 // Allocations for the APC but do not necessarily live below kUncountedMaxAddr,
306 // e.g., APCObject, or the hash table. Currently they live below
307 // kUncountedMaxAddr anyway, but this may change later.
308 DEF_ALLOC_FUNCS(apc, high_arena_flags, )
310 // Thread-local allocations that are not accessed outside the thread.
311 DEF_ALLOC_FUNCS(local, local_arena_flags, )
313 // Low arena is always present when jemalloc is used, even when arena hooks are
314 // not used.
315 inline void* low_malloc(size_t size) {
316 #ifndef USE_JEMALLOC
317 return malloc(size);
318 #else
319 assert(size);
320 auto ptr = mallocx(size, low_arena_flags);
321 #ifndef USE_LOWPTR
322 if (ptr == nullptr) ptr = uncounted_malloc(size);
323 #endif
324 return ptr;
325 #endif
328 inline void low_free(void* ptr) {
329 #ifndef USE_JEMALLOC
330 free(ptr);
331 #else
332 assert(ptr);
333 dallocx(ptr, low_arena_flags);
334 #endif
337 inline void* low_realloc(void* ptr, size_t size) {
338 #ifndef USE_JEMALLOC
339 return realloc(ptr, size);
340 #else
341 assert(ptr);
342 return rallocx(ptr, size, low_arena_flags);
343 #endif
346 inline void low_sized_free(void* ptr, size_t size) {
347 #ifndef USE_JEMALLOC
348 free(ptr);
349 #else
350 assert(ptr);
351 sdallocx(ptr, size, low_arena_flags);
352 #endif
355 // lower arena and low_cold arena alias low arena when extent hooks are not
356 // used.
357 DEF_ALLOC_FUNCS(lower, lower_arena_flags, low_)
358 DEF_ALLOC_FUNCS(low_cold, low_cold_arena_flags, low_)
360 #undef DEF_ALLOC_FUNCS
362 // General purpose adaptor that wraps allocation and sized deallocation function
363 // into an allocator that works with STL-stype containers.
364 template <void* (*AF)(size_t), void (*DF)(void*, size_t), typename T>
365 struct WrapAllocator {
366 using value_type = T;
367 using reference = T&;
368 using const_reference = const T&;
369 using pointer = T*;
370 using const_pointer = const T*;
371 using size_type = std::size_t;
372 using difference_type = std::ptrdiff_t;
374 template <class U> struct rebind { using other = WrapAllocator<AF, DF, U>; };
376 WrapAllocator() noexcept {}
377 template<class U>
378 explicit WrapAllocator(const WrapAllocator<AF, DF, U>&) noexcept {}
379 ~WrapAllocator() noexcept {}
381 pointer allocate(size_t num) {
382 if (num == 0) return nullptr;
383 return (pointer)AF(num * sizeof(T));
385 void deallocate(pointer p, size_t num) {
386 if (p == nullptr) return;
387 DF((void*)p, num * sizeof(T));
389 template<class U, class... Args>
390 void construct(U* p, Args&&... args) {
391 ::new ((void*)p) U(std::forward<Args>(args)...);
393 void destroy(pointer p) {
394 p->~T();
396 template<class U> bool operator==(const WrapAllocator<AF, DF, U>&) const {
397 return true;
399 template<class U> bool operator!=(const WrapAllocator<AF, DF, U>&) const {
400 return false;
404 template<typename T> using LowAllocator =
405 WrapAllocator<low_malloc, low_sized_free, T>;
406 template<typename T> using LowerAllocator =
407 WrapAllocator<lower_malloc, lower_sized_free, T>;
408 template<typename T> using LowColdAllocator =
409 WrapAllocator<low_cold_malloc, low_cold_sized_free, T>;
410 template<typename T> using VMAllocator =
411 WrapAllocator<vm_malloc, vm_sized_free, T>;
412 template<typename T> using VMColdAllocator =
413 WrapAllocator<vm_cold_malloc, vm_cold_sized_free, T>;
414 template<typename T> using APCAllocator =
415 WrapAllocator<apc_malloc, apc_sized_free, T>;
416 template<typename T> using LocalAllocator =
417 WrapAllocator<local_malloc, local_sized_free, T>;
419 ///////////////////////////////////////////////////////////////////////////////
422 #endif // incl_HPHP_UTIL_ALLOC_H_