1 //===-- sanitizer_allocator.h -----------------------------------*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // Specialized memory allocator for ThreadSanitizer, MemorySanitizer, etc.
12 //===----------------------------------------------------------------------===//
14 #ifndef SANITIZER_ALLOCATOR_H
15 #define SANITIZER_ALLOCATOR_H
17 #include "sanitizer_internal_defs.h"
18 #include "sanitizer_common.h"
19 #include "sanitizer_libc.h"
20 #include "sanitizer_list.h"
21 #include "sanitizer_mutex.h"
22 #include "sanitizer_lfstack.h"
24 namespace __sanitizer
{
26 // SizeClassMap maps allocation sizes into size classes and back.
27 // Class 0 corresponds to size 0.
28 // Classes 1 - 16 correspond to sizes 8 - 128 (size = class_id * 8).
29 // Next 8 classes: 128 + i * 16 (i = 1 to 8).
30 // Next 8 classes: 256 + i * 32 (i = 1 to 8).
32 // Next 8 classes: 2^k + i * 2^(k-3) (i = 1 to 8).
33 // Last class corresponds to kMaxSize = 1 << kMaxSizeLog.
35 // This structure of the size class map gives us:
36 // - Efficient table-free class-to-size and size-to-class functions.
37 // - Difference between two consequent size classes is betweed 12% and 6%
39 // This class also gives a hint to a thread-caching allocator about the amount
40 // of chunks that need to be cached per-thread:
41 // - kMaxNumCached is the maximal number of chunks per size class.
42 // - (1 << kMaxBytesCachedLog) is the maximal number of bytes per size class.
44 // Part of output of SizeClassMap::Print():
45 // c00 => s: 0 diff: +0 00% l 0 cached: 0 0; id 0
46 // c01 => s: 8 diff: +8 00% l 3 cached: 256 2048; id 1
47 // c02 => s: 16 diff: +8 100% l 4 cached: 256 4096; id 2
49 // c07 => s: 56 diff: +8 16% l 5 cached: 256 14336; id 7
51 // c08 => s: 64 diff: +8 14% l 6 cached: 256 16384; id 8
53 // c15 => s: 120 diff: +8 07% l 6 cached: 256 30720; id 15
55 // c16 => s: 128 diff: +8 06% l 7 cached: 256 32768; id 16
56 // c17 => s: 144 diff: +16 12% l 7 cached: 227 32688; id 17
58 // c23 => s: 240 diff: +16 07% l 7 cached: 136 32640; id 23
60 // c24 => s: 256 diff: +16 06% l 8 cached: 128 32768; id 24
61 // c25 => s: 288 diff: +32 12% l 8 cached: 113 32544; id 25
63 // c31 => s: 480 diff: +32 07% l 8 cached: 68 32640; id 31
65 // c32 => s: 512 diff: +32 06% l 9 cached: 64 32768; id 32
68 template <uptr kMaxSizeLog
, uptr kMaxNumCachedT
, uptr kMaxBytesCachedLog
,
71 static const uptr kMinSizeLog
= 3;
72 static const uptr kMidSizeLog
= kMinSizeLog
+ 4;
73 static const uptr kMinSize
= 1 << kMinSizeLog
;
74 static const uptr kMidSize
= 1 << kMidSizeLog
;
75 static const uptr kMidClass
= kMidSize
/ kMinSize
;
76 static const uptr S
= 3;
77 static const uptr M
= (1 << S
) - 1;
80 static const uptr kMaxNumCached
= kMaxNumCachedT
;
81 struct TransferBatch
{
84 void *batch
[kMaxNumCached
];
87 static const uptr kMinBatchClass
= kMinBatchClassT
;
88 static const uptr kMaxSize
= 1 << kMaxSizeLog
;
89 static const uptr kNumClasses
=
90 kMidClass
+ ((kMaxSizeLog
- kMidSizeLog
) << S
) + 1;
91 COMPILER_CHECK(kNumClasses
>= 32 && kNumClasses
<= 256);
92 static const uptr kNumClassesRounded
=
93 kNumClasses
== 32 ? 32 :
94 kNumClasses
<= 64 ? 64 :
95 kNumClasses
<= 128 ? 128 : 256;
97 static uptr
Size(uptr class_id
) {
98 if (class_id
<= kMidClass
)
99 return kMinSize
* class_id
;
100 class_id
-= kMidClass
;
101 uptr t
= kMidSize
<< (class_id
>> S
);
102 return t
+ (t
>> S
) * (class_id
& M
);
105 static uptr
ClassID(uptr size
) {
106 if (size
<= kMidSize
)
107 return (size
+ kMinSize
- 1) >> kMinSizeLog
;
108 if (size
> kMaxSize
) return 0;
109 uptr l
= SANITIZER_WORDSIZE
- 1 - __builtin_clzl(size
);
110 uptr hbits
= (size
>> (l
- S
)) & M
;
111 uptr lbits
= size
& ((1 << (l
- S
)) - 1);
112 uptr l1
= l
- kMidSizeLog
;
113 return kMidClass
+ (l1
<< S
) + hbits
+ (lbits
> 0);
116 static uptr
MaxCached(uptr class_id
) {
117 if (class_id
== 0) return 0;
118 uptr n
= (1UL << kMaxBytesCachedLog
) / Size(class_id
);
119 return Max
<uptr
>(1, Min(kMaxNumCached
, n
));
122 static void Print() {
124 uptr total_cached
= 0;
125 for (uptr i
= 0; i
< kNumClasses
; i
++) {
127 if (s
>= kMidSize
/ 2 && (s
& (s
- 1)) == 0)
130 uptr p
= prev_s
? (d
* 100 / prev_s
) : 0;
131 uptr l
= SANITIZER_WORDSIZE
- 1 - __builtin_clzl(s
);
132 uptr cached
= MaxCached(i
) * s
;
133 Printf("c%02zd => s: %zd diff: +%zd %02zd%% l %zd "
134 "cached: %zd %zd; id %zd\n",
135 i
, Size(i
), d
, p
, l
, MaxCached(i
), cached
, ClassID(s
));
136 total_cached
+= cached
;
139 Printf("Total cached: %zd\n", total_cached
);
142 static void Validate() {
143 for (uptr c
= 1; c
< kNumClasses
; c
++) {
144 // Printf("Validate: c%zd\n", c);
146 CHECK_EQ(ClassID(s
), c
);
147 if (c
!= kNumClasses
- 1)
148 CHECK_EQ(ClassID(s
+ 1), c
+ 1);
149 CHECK_EQ(ClassID(s
- 1), c
);
151 CHECK_GT(Size(c
), Size(c
-1));
153 CHECK_EQ(ClassID(kMaxSize
+ 1), 0);
155 for (uptr s
= 1; s
<= kMaxSize
; s
++) {
157 // Printf("s%zd => c%zd\n", s, c);
158 CHECK_LT(c
, kNumClasses
);
159 CHECK_GE(Size(c
), s
);
161 CHECK_LT(Size(c
-1), s
);
164 // TransferBatch for kMinBatchClass must fit into the block itself.
165 const uptr batch_size
= sizeof(TransferBatch
)
166 - sizeof(void*) // NOLINT
167 * (kMaxNumCached
- MaxCached(kMinBatchClass
));
168 CHECK_LE(batch_size
, Size(kMinBatchClass
));
169 // TransferBatch for kMinBatchClass-1 must not fit into the block itself.
170 const uptr batch_size1
= sizeof(TransferBatch
)
171 - sizeof(void*) // NOLINT
172 * (kMaxNumCached
- MaxCached(kMinBatchClass
- 1));
173 CHECK_GT(batch_size1
, Size(kMinBatchClass
- 1));
177 typedef SizeClassMap
<17, 256, 16, FIRST_32_SECOND_64(33, 36)>
179 typedef SizeClassMap
<17, 64, 14, FIRST_32_SECOND_64(25, 28)>
181 template<class SizeClassAllocator
> struct SizeClassAllocatorLocalCache
;
183 // Memory allocator statistics
185 AllocatorStatMalloced
,
187 AllocatorStatMmapped
,
188 AllocatorStatUnmapped
,
192 typedef u64 AllocatorStatCounters
[AllocatorStatCount
];
194 // Per-thread stats, live in per-thread cache.
195 class AllocatorStats
{
198 internal_memset(this, 0, sizeof(*this));
201 void Add(AllocatorStat i
, u64 v
) {
202 v
+= atomic_load(&stats_
[i
], memory_order_relaxed
);
203 atomic_store(&stats_
[i
], v
, memory_order_relaxed
);
206 void Set(AllocatorStat i
, u64 v
) {
207 atomic_store(&stats_
[i
], v
, memory_order_relaxed
);
210 u64
Get(AllocatorStat i
) const {
211 return atomic_load(&stats_
[i
], memory_order_relaxed
);
215 friend class AllocatorGlobalStats
;
216 AllocatorStats
*next_
;
217 AllocatorStats
*prev_
;
218 atomic_uint64_t stats_
[AllocatorStatCount
];
221 // Global stats, used for aggregation and querying.
222 class AllocatorGlobalStats
: public AllocatorStats
{
225 internal_memset(this, 0, sizeof(*this));
230 void Register(AllocatorStats
*s
) {
231 SpinMutexLock
l(&mu_
);
238 void Unregister(AllocatorStats
*s
) {
239 SpinMutexLock
l(&mu_
);
240 s
->prev_
->next_
= s
->next_
;
241 s
->next_
->prev_
= s
->prev_
;
242 for (int i
= 0; i
< AllocatorStatCount
; i
++)
243 Add(AllocatorStat(i
), s
->Get(AllocatorStat(i
)));
246 void Get(AllocatorStatCounters s
) const {
247 internal_memset(s
, 0, AllocatorStatCount
* sizeof(u64
));
248 SpinMutexLock
l(&mu_
);
249 const AllocatorStats
*stats
= this;
251 for (int i
= 0; i
< AllocatorStatCount
; i
++)
252 s
[i
] += stats
->Get(AllocatorStat(i
));
253 stats
= stats
->next_
;
260 mutable SpinMutex mu_
;
263 // Allocators call these callbacks on mmap/munmap.
264 struct NoOpMapUnmapCallback
{
265 void OnMap(uptr p
, uptr size
) const { }
266 void OnUnmap(uptr p
, uptr size
) const { }
269 // SizeClassAllocator64 -- allocator for 64-bit address space.
271 // Space: a portion of address space of kSpaceSize bytes starting at
272 // a fixed address (kSpaceBeg). Both constants are powers of two and
273 // kSpaceBeg is kSpaceSize-aligned.
274 // At the beginning the entire space is mprotect-ed, then small parts of it
275 // are mapped on demand.
277 // Region: a part of Space dedicated to a single size class.
278 // There are kNumClasses Regions of equal size.
280 // UserChunk: a piece of memory returned to user.
281 // MetaChunk: kMetadataSize bytes of metadata associated with a UserChunk.
283 // A Region looks like this:
284 // UserChunk1 ... UserChunkN <gap> MetaChunkN ... MetaChunk1
285 template <const uptr kSpaceBeg
, const uptr kSpaceSize
,
286 const uptr kMetadataSize
, class SizeClassMap
,
287 class MapUnmapCallback
= NoOpMapUnmapCallback
>
288 class SizeClassAllocator64
{
290 typedef typename
SizeClassMap::TransferBatch Batch
;
291 typedef SizeClassAllocator64
<kSpaceBeg
, kSpaceSize
, kMetadataSize
,
292 SizeClassMap
, MapUnmapCallback
> ThisT
;
293 typedef SizeClassAllocatorLocalCache
<ThisT
> AllocatorCache
;
297 reinterpret_cast<uptr
>(Mprotect(kSpaceBeg
, kSpaceSize
)));
298 MapWithCallback(kSpaceEnd
, AdditionalSize());
301 void MapWithCallback(uptr beg
, uptr size
) {
302 CHECK_EQ(beg
, reinterpret_cast<uptr
>(MmapFixedOrDie(beg
, size
)));
303 MapUnmapCallback().OnMap(beg
, size
);
306 void UnmapWithCallback(uptr beg
, uptr size
) {
307 MapUnmapCallback().OnUnmap(beg
, size
);
308 UnmapOrDie(reinterpret_cast<void *>(beg
), size
);
311 static bool CanAllocate(uptr size
, uptr alignment
) {
312 return size
<= SizeClassMap::kMaxSize
&&
313 alignment
<= SizeClassMap::kMaxSize
;
316 NOINLINE Batch
* AllocateBatch(AllocatorStats
*stat
, AllocatorCache
*c
,
318 CHECK_LT(class_id
, kNumClasses
);
319 RegionInfo
*region
= GetRegionInfo(class_id
);
320 Batch
*b
= region
->free_list
.Pop();
322 b
= PopulateFreeList(stat
, c
, class_id
, region
);
323 region
->n_allocated
+= b
->count
;
327 NOINLINE
void DeallocateBatch(AllocatorStats
*stat
, uptr class_id
, Batch
*b
) {
328 RegionInfo
*region
= GetRegionInfo(class_id
);
329 region
->free_list
.Push(b
);
330 region
->n_freed
+= b
->count
;
333 static bool PointerIsMine(void *p
) {
334 return reinterpret_cast<uptr
>(p
) / kSpaceSize
== kSpaceBeg
/ kSpaceSize
;
337 static uptr
GetSizeClass(void *p
) {
338 return (reinterpret_cast<uptr
>(p
) / kRegionSize
) % kNumClassesRounded
;
341 void *GetBlockBegin(void *p
) {
342 uptr class_id
= GetSizeClass(p
);
343 uptr size
= SizeClassMap::Size(class_id
);
344 uptr chunk_idx
= GetChunkIdx((uptr
)p
, size
);
345 uptr reg_beg
= (uptr
)p
& ~(kRegionSize
- 1);
346 uptr beg
= chunk_idx
* size
;
347 uptr next_beg
= beg
+ size
;
348 RegionInfo
*region
= GetRegionInfo(class_id
);
349 if (region
->mapped_user
>= next_beg
)
350 return reinterpret_cast<void*>(reg_beg
+ beg
);
354 static uptr
GetActuallyAllocatedSize(void *p
) {
355 CHECK(PointerIsMine(p
));
356 return SizeClassMap::Size(GetSizeClass(p
));
359 uptr
ClassID(uptr size
) { return SizeClassMap::ClassID(size
); }
361 void *GetMetaData(void *p
) {
362 uptr class_id
= GetSizeClass(p
);
363 uptr size
= SizeClassMap::Size(class_id
);
364 uptr chunk_idx
= GetChunkIdx(reinterpret_cast<uptr
>(p
), size
);
365 return reinterpret_cast<void*>(kSpaceBeg
+ (kRegionSize
* (class_id
+ 1)) -
366 (1 + chunk_idx
) * kMetadataSize
);
369 uptr
TotalMemoryUsed() {
371 for (uptr i
= 0; i
< kNumClasses
; i
++)
372 res
+= GetRegionInfo(i
)->allocated_user
;
377 void TestOnlyUnmap() {
378 UnmapWithCallback(kSpaceBeg
, kSpaceSize
+ AdditionalSize());
382 uptr total_mapped
= 0;
383 uptr n_allocated
= 0;
385 for (uptr class_id
= 1; class_id
< kNumClasses
; class_id
++) {
386 RegionInfo
*region
= GetRegionInfo(class_id
);
387 total_mapped
+= region
->mapped_user
;
388 n_allocated
+= region
->n_allocated
;
389 n_freed
+= region
->n_freed
;
391 Printf("Stats: SizeClassAllocator64: %zdM mapped in %zd allocations; "
393 total_mapped
>> 20, n_allocated
, n_allocated
- n_freed
);
394 for (uptr class_id
= 1; class_id
< kNumClasses
; class_id
++) {
395 RegionInfo
*region
= GetRegionInfo(class_id
);
396 if (region
->mapped_user
== 0) continue;
397 Printf(" %02zd (%zd): total: %zd K allocs: %zd remains: %zd\n",
399 SizeClassMap::Size(class_id
),
400 region
->mapped_user
>> 10,
402 region
->n_allocated
- region
->n_freed
);
406 typedef SizeClassMap SizeClassMapT
;
407 static const uptr kNumClasses
= SizeClassMap::kNumClasses
;
408 static const uptr kNumClassesRounded
= SizeClassMap::kNumClassesRounded
;
411 static const uptr kRegionSize
= kSpaceSize
/ kNumClassesRounded
;
412 static const uptr kSpaceEnd
= kSpaceBeg
+ kSpaceSize
;
413 COMPILER_CHECK(kSpaceBeg
% kSpaceSize
== 0);
414 // kRegionSize must be >= 2^32.
415 COMPILER_CHECK((kRegionSize
) >= (1ULL << (SANITIZER_WORDSIZE
/ 2)));
416 // Populate the free list with at most this number of bytes at once
417 // or with one element if its size is greater.
418 static const uptr kPopulateSize
= 1 << 14;
419 // Call mmap for user memory with at least this size.
420 static const uptr kUserMapSize
= 1 << 16;
421 // Call mmap for metadata memory with at least this size.
422 static const uptr kMetaMapSize
= 1 << 16;
426 LFStack
<Batch
> free_list
;
427 uptr allocated_user
; // Bytes allocated for user memory.
428 uptr allocated_meta
; // Bytes allocated for metadata.
429 uptr mapped_user
; // Bytes mapped for user memory.
430 uptr mapped_meta
; // Bytes mapped for metadata.
431 uptr n_allocated
, n_freed
; // Just stats.
433 COMPILER_CHECK(sizeof(RegionInfo
) >= kCacheLineSize
);
435 static uptr
AdditionalSize() {
436 return RoundUpTo(sizeof(RegionInfo
) * kNumClassesRounded
,
437 GetPageSizeCached());
440 RegionInfo
*GetRegionInfo(uptr class_id
) {
441 CHECK_LT(class_id
, kNumClasses
);
442 RegionInfo
*regions
= reinterpret_cast<RegionInfo
*>(kSpaceBeg
+ kSpaceSize
);
443 return ®ions
[class_id
];
446 static uptr
GetChunkIdx(uptr chunk
, uptr size
) {
447 u32 offset
= chunk
% kRegionSize
;
448 // Here we divide by a non-constant. This is costly.
449 // We require that kRegionSize is at least 2^32 so that offset is 32-bit.
450 // We save 2x by using 32-bit div, but may need to use a 256-way switch.
451 return offset
/ (u32
)size
;
454 NOINLINE Batch
* PopulateFreeList(AllocatorStats
*stat
, AllocatorCache
*c
,
455 uptr class_id
, RegionInfo
*region
) {
456 BlockingMutexLock
l(®ion
->mutex
);
457 Batch
*b
= region
->free_list
.Pop();
460 uptr size
= SizeClassMap::Size(class_id
);
461 uptr count
= size
< kPopulateSize
? SizeClassMap::MaxCached(class_id
) : 1;
462 uptr beg_idx
= region
->allocated_user
;
463 uptr end_idx
= beg_idx
+ count
* size
;
464 uptr region_beg
= kSpaceBeg
+ kRegionSize
* class_id
;
465 if (end_idx
+ size
> region
->mapped_user
) {
466 // Do the mmap for the user memory.
467 uptr map_size
= kUserMapSize
;
468 while (end_idx
+ size
> region
->mapped_user
+ map_size
)
469 map_size
+= kUserMapSize
;
470 CHECK_GE(region
->mapped_user
+ map_size
, end_idx
);
471 MapWithCallback(region_beg
+ region
->mapped_user
, map_size
);
472 stat
->Add(AllocatorStatMmapped
, map_size
);
473 region
->mapped_user
+= map_size
;
475 uptr total_count
= (region
->mapped_user
- beg_idx
- size
)
476 / size
/ count
* count
;
477 region
->allocated_meta
+= total_count
* kMetadataSize
;
478 if (region
->allocated_meta
> region
->mapped_meta
) {
479 uptr map_size
= kMetaMapSize
;
480 while (region
->allocated_meta
> region
->mapped_meta
+ map_size
)
481 map_size
+= kMetaMapSize
;
482 // Do the mmap for the metadata.
483 CHECK_GE(region
->mapped_meta
+ map_size
, region
->allocated_meta
);
484 MapWithCallback(region_beg
+ kRegionSize
-
485 region
->mapped_meta
- map_size
, map_size
);
486 region
->mapped_meta
+= map_size
;
488 CHECK_LE(region
->allocated_meta
, region
->mapped_meta
);
489 if (region
->allocated_user
+ region
->allocated_meta
> kRegionSize
) {
490 Printf("Out of memory. Dying.\n");
491 Printf("The process has exhausted %zuMB for size class %zu.\n",
492 kRegionSize
/ 1024 / 1024, size
);
496 if (class_id
< SizeClassMap::kMinBatchClass
)
497 b
= (Batch
*)c
->Allocate(this, SizeClassMap::ClassID(sizeof(Batch
)));
499 b
= (Batch
*)(region_beg
+ beg_idx
);
501 for (uptr i
= 0; i
< count
; i
++)
502 b
->batch
[i
] = (void*)(region_beg
+ beg_idx
+ i
* size
);
503 region
->allocated_user
+= count
* size
;
504 CHECK_LE(region
->allocated_user
, region
->mapped_user
);
505 beg_idx
+= count
* size
;
506 if (beg_idx
+ count
* size
+ size
> region
->mapped_user
)
508 region
->free_list
.Push(b
);
514 // SizeClassAllocator32 -- allocator for 32-bit address space.
515 // This allocator can theoretically be used on 64-bit arch, but there it is less
516 // efficient than SizeClassAllocator64.
518 // [kSpaceBeg, kSpaceBeg + kSpaceSize) is the range of addresses which can
519 // be returned by MmapOrDie().
522 // a result of a single call to MmapAlignedOrDie(kRegionSize, kRegionSize).
523 // Since the regions are aligned by kRegionSize, there are exactly
524 // kNumPossibleRegions possible regions in the address space and so we keep
525 // an u8 array possible_regions[kNumPossibleRegions] to store the size classes.
526 // 0 size class means the region is not used by the allocator.
528 // One Region is used to allocate chunks of a single size class.
529 // A Region looks like this:
530 // UserChunk1 .. UserChunkN <gap> MetaChunkN .. MetaChunk1
532 // In order to avoid false sharing the objects of this class should be
533 // chache-line aligned.
534 template <const uptr kSpaceBeg
, const u64 kSpaceSize
,
535 const uptr kMetadataSize
, class SizeClassMap
,
536 class MapUnmapCallback
= NoOpMapUnmapCallback
>
537 class SizeClassAllocator32
{
539 typedef typename
SizeClassMap::TransferBatch Batch
;
540 typedef SizeClassAllocator32
<kSpaceBeg
, kSpaceSize
, kMetadataSize
,
541 SizeClassMap
, MapUnmapCallback
> ThisT
;
542 typedef SizeClassAllocatorLocalCache
<ThisT
> AllocatorCache
;
545 state_
= reinterpret_cast<State
*>(MapWithCallback(sizeof(State
)));
548 void *MapWithCallback(uptr size
) {
549 size
= RoundUpTo(size
, GetPageSizeCached());
550 void *res
= MmapOrDie(size
, "SizeClassAllocator32");
551 MapUnmapCallback().OnMap((uptr
)res
, size
);
555 void UnmapWithCallback(uptr beg
, uptr size
) {
556 MapUnmapCallback().OnUnmap(beg
, size
);
557 UnmapOrDie(reinterpret_cast<void *>(beg
), size
);
560 static bool CanAllocate(uptr size
, uptr alignment
) {
561 return size
<= SizeClassMap::kMaxSize
&&
562 alignment
<= SizeClassMap::kMaxSize
;
565 void *GetMetaData(void *p
) {
566 CHECK(PointerIsMine(p
));
567 uptr mem
= reinterpret_cast<uptr
>(p
);
568 uptr beg
= ComputeRegionBeg(mem
);
569 uptr size
= SizeClassMap::Size(GetSizeClass(p
));
570 u32 offset
= mem
- beg
;
571 uptr n
= offset
/ (u32
)size
; // 32-bit division
572 uptr meta
= (beg
+ kRegionSize
) - (n
+ 1) * kMetadataSize
;
573 return reinterpret_cast<void*>(meta
);
576 NOINLINE Batch
* AllocateBatch(AllocatorStats
*stat
, AllocatorCache
*c
,
578 CHECK_LT(class_id
, kNumClasses
);
579 SizeClassInfo
*sci
= GetSizeClassInfo(class_id
);
580 SpinMutexLock
l(&sci
->mutex
);
581 if (sci
->free_list
.empty())
582 PopulateFreeList(stat
, c
, sci
, class_id
);
583 CHECK(!sci
->free_list
.empty());
584 Batch
*b
= sci
->free_list
.front();
585 sci
->free_list
.pop_front();
589 NOINLINE
void DeallocateBatch(AllocatorStats
*stat
, uptr class_id
, Batch
*b
) {
590 CHECK_LT(class_id
, kNumClasses
);
591 SizeClassInfo
*sci
= GetSizeClassInfo(class_id
);
592 SpinMutexLock
l(&sci
->mutex
);
593 sci
->free_list
.push_front(b
);
596 bool PointerIsMine(void *p
) {
597 return GetSizeClass(p
) != 0;
600 uptr
GetSizeClass(void *p
) {
601 return state_
->possible_regions
[ComputeRegionId(reinterpret_cast<uptr
>(p
))];
604 void *GetBlockBegin(void *p
) {
605 CHECK(PointerIsMine(p
));
606 uptr mem
= reinterpret_cast<uptr
>(p
);
607 uptr beg
= ComputeRegionBeg(mem
);
608 uptr size
= SizeClassMap::Size(GetSizeClass(p
));
609 u32 offset
= mem
- beg
;
610 u32 n
= offset
/ (u32
)size
; // 32-bit division
611 uptr res
= beg
+ (n
* (u32
)size
);
612 return reinterpret_cast<void*>(res
);
615 uptr
GetActuallyAllocatedSize(void *p
) {
616 CHECK(PointerIsMine(p
));
617 return SizeClassMap::Size(GetSizeClass(p
));
620 uptr
ClassID(uptr size
) { return SizeClassMap::ClassID(size
); }
622 uptr
TotalMemoryUsed() {
623 // No need to lock here.
625 for (uptr i
= 0; i
< kNumPossibleRegions
; i
++)
626 if (state_
->possible_regions
[i
])
631 void TestOnlyUnmap() {
632 for (uptr i
= 0; i
< kNumPossibleRegions
; i
++)
633 if (state_
->possible_regions
[i
])
634 UnmapWithCallback((i
* kRegionSize
), kRegionSize
);
635 UnmapWithCallback(reinterpret_cast<uptr
>(state_
), sizeof(State
));
641 typedef SizeClassMap SizeClassMapT
;
642 static const uptr kNumClasses
= SizeClassMap::kNumClasses
;
645 static const uptr kRegionSizeLog
= SANITIZER_WORDSIZE
== 64 ? 24 : 20;
646 static const uptr kRegionSize
= 1 << kRegionSizeLog
;
647 static const uptr kNumPossibleRegions
= kSpaceSize
/ kRegionSize
;
649 struct SizeClassInfo
{
651 IntrusiveList
<Batch
> free_list
;
652 char padding
[kCacheLineSize
- sizeof(uptr
) - sizeof(IntrusiveList
<Batch
>)];
654 COMPILER_CHECK(sizeof(SizeClassInfo
) == kCacheLineSize
);
656 uptr
ComputeRegionId(uptr mem
) {
657 uptr res
= mem
>> kRegionSizeLog
;
658 CHECK_LT(res
, kNumPossibleRegions
);
662 uptr
ComputeRegionBeg(uptr mem
) {
663 return mem
& ~(kRegionSize
- 1);
666 uptr
AllocateRegion(AllocatorStats
*stat
, uptr class_id
) {
667 CHECK_LT(class_id
, kNumClasses
);
668 uptr res
= reinterpret_cast<uptr
>(MmapAlignedOrDie(kRegionSize
, kRegionSize
,
669 "SizeClassAllocator32"));
670 MapUnmapCallback().OnMap(res
, kRegionSize
);
671 stat
->Add(AllocatorStatMmapped
, kRegionSize
);
672 CHECK_EQ(0U, (res
& (kRegionSize
- 1)));
673 CHECK_EQ(0U, state_
->possible_regions
[ComputeRegionId(res
)]);
674 state_
->possible_regions
[ComputeRegionId(res
)] = class_id
;
678 SizeClassInfo
*GetSizeClassInfo(uptr class_id
) {
679 CHECK_LT(class_id
, kNumClasses
);
680 return &state_
->size_class_info_array
[class_id
];
683 void PopulateFreeList(AllocatorStats
*stat
, AllocatorCache
*c
,
684 SizeClassInfo
*sci
, uptr class_id
) {
685 uptr size
= SizeClassMap::Size(class_id
);
686 uptr reg
= AllocateRegion(stat
, class_id
);
687 uptr n_chunks
= kRegionSize
/ (size
+ kMetadataSize
);
688 uptr max_count
= SizeClassMap::MaxCached(class_id
);
690 for (uptr i
= reg
; i
< reg
+ n_chunks
* size
; i
+= size
) {
692 if (class_id
< SizeClassMap::kMinBatchClass
)
693 b
= (Batch
*)c
->Allocate(this, SizeClassMap::ClassID(sizeof(Batch
)));
698 b
->batch
[b
->count
++] = (void*)i
;
699 if (b
->count
== max_count
) {
700 sci
->free_list
.push_back(b
);
705 sci
->free_list
.push_back(b
);
709 u8 possible_regions
[kNumPossibleRegions
];
710 SizeClassInfo size_class_info_array
[kNumClasses
];
715 // Objects of this type should be used as local caches for SizeClassAllocator64
716 // or SizeClassAllocator32. Since the typical use of this class is to have one
717 // object per thread in TLS, is has to be POD.
718 template<class SizeClassAllocator
>
719 struct SizeClassAllocatorLocalCache
{
720 typedef SizeClassAllocator Allocator
;
721 static const uptr kNumClasses
= SizeClassAllocator::kNumClasses
;
723 void Init(AllocatorGlobalStats
*s
) {
726 s
->Register(&stats_
);
729 void Destroy(SizeClassAllocator
*allocator
, AllocatorGlobalStats
*s
) {
732 s
->Unregister(&stats_
);
735 void *Allocate(SizeClassAllocator
*allocator
, uptr class_id
) {
736 CHECK_NE(class_id
, 0UL);
737 CHECK_LT(class_id
, kNumClasses
);
738 stats_
.Add(AllocatorStatMalloced
, SizeClassMap::Size(class_id
));
739 PerClass
*c
= &per_class_
[class_id
];
740 if (UNLIKELY(c
->count
== 0))
741 Refill(allocator
, class_id
);
742 void *res
= c
->batch
[--c
->count
];
743 PREFETCH(c
->batch
[c
->count
- 1]);
747 void Deallocate(SizeClassAllocator
*allocator
, uptr class_id
, void *p
) {
748 CHECK_NE(class_id
, 0UL);
749 CHECK_LT(class_id
, kNumClasses
);
750 stats_
.Add(AllocatorStatFreed
, SizeClassMap::Size(class_id
));
751 PerClass
*c
= &per_class_
[class_id
];
752 if (UNLIKELY(c
->count
== c
->max_count
))
753 Drain(allocator
, class_id
);
754 c
->batch
[c
->count
++] = p
;
757 void Drain(SizeClassAllocator
*allocator
) {
758 for (uptr class_id
= 0; class_id
< kNumClasses
; class_id
++) {
759 PerClass
*c
= &per_class_
[class_id
];
761 Drain(allocator
, class_id
);
766 typedef typename
SizeClassAllocator::SizeClassMapT SizeClassMap
;
767 typedef typename
SizeClassMap::TransferBatch Batch
;
771 void *batch
[2 * SizeClassMap::kMaxNumCached
];
773 PerClass per_class_
[kNumClasses
];
774 AllocatorStats stats_
;
777 if (per_class_
[0].max_count
)
779 for (uptr i
= 0; i
< kNumClasses
; i
++) {
780 PerClass
*c
= &per_class_
[i
];
781 c
->max_count
= 2 * SizeClassMap::MaxCached(i
);
785 NOINLINE
void Refill(SizeClassAllocator
*allocator
, uptr class_id
) {
787 PerClass
*c
= &per_class_
[class_id
];
788 Batch
*b
= allocator
->AllocateBatch(&stats_
, this, class_id
);
789 CHECK_GT(b
->count
, 0);
790 for (uptr i
= 0; i
< b
->count
; i
++)
791 c
->batch
[i
] = b
->batch
[i
];
793 if (class_id
< SizeClassMap::kMinBatchClass
)
794 Deallocate(allocator
, SizeClassMap::ClassID(sizeof(Batch
)), b
);
797 NOINLINE
void Drain(SizeClassAllocator
*allocator
, uptr class_id
) {
799 PerClass
*c
= &per_class_
[class_id
];
801 if (class_id
< SizeClassMap::kMinBatchClass
)
802 b
= (Batch
*)Allocate(allocator
, SizeClassMap::ClassID(sizeof(Batch
)));
804 b
= (Batch
*)c
->batch
[0];
805 uptr cnt
= Min(c
->max_count
/ 2, c
->count
);
806 for (uptr i
= 0; i
< cnt
; i
++) {
807 b
->batch
[i
] = c
->batch
[i
];
808 c
->batch
[i
] = c
->batch
[i
+ c
->max_count
/ 2];
812 allocator
->DeallocateBatch(&stats_
, class_id
, b
);
816 // This class can (de)allocate only large chunks of memory using mmap/unmap.
817 // The main purpose of this allocator is to cover large and rare allocation
818 // sizes not covered by more efficient allocators (e.g. SizeClassAllocator64).
819 template <class MapUnmapCallback
= NoOpMapUnmapCallback
>
820 class LargeMmapAllocator
{
823 internal_memset(this, 0, sizeof(*this));
824 page_size_
= GetPageSizeCached();
827 void *Allocate(AllocatorStats
*stat
, uptr size
, uptr alignment
) {
828 CHECK(IsPowerOfTwo(alignment
));
829 uptr map_size
= RoundUpMapSize(size
);
830 if (alignment
> page_size_
)
831 map_size
+= alignment
;
832 if (map_size
< size
) return 0; // Overflow.
833 uptr map_beg
= reinterpret_cast<uptr
>(
834 MmapOrDie(map_size
, "LargeMmapAllocator"));
835 MapUnmapCallback().OnMap(map_beg
, map_size
);
836 uptr map_end
= map_beg
+ map_size
;
837 uptr res
= map_beg
+ page_size_
;
838 if (res
& (alignment
- 1)) // Align.
839 res
+= alignment
- (res
& (alignment
- 1));
840 CHECK_EQ(0, res
& (alignment
- 1));
841 CHECK_LE(res
+ size
, map_end
);
842 Header
*h
= GetHeader(res
);
844 h
->map_beg
= map_beg
;
845 h
->map_size
= map_size
;
846 uptr size_log
= SANITIZER_WORDSIZE
- __builtin_clzl(map_size
) - 1;
847 CHECK_LT(size_log
, ARRAY_SIZE(stats
.by_size_log
));
849 SpinMutexLock
l(&mutex_
);
850 uptr idx
= n_chunks_
++;
851 CHECK_LT(idx
, kMaxNumChunks
);
855 stats
.currently_allocated
+= map_size
;
856 stats
.max_allocated
= Max(stats
.max_allocated
, stats
.currently_allocated
);
857 stats
.by_size_log
[size_log
]++;
858 stat
->Add(AllocatorStatMalloced
, map_size
);
859 stat
->Add(AllocatorStatMmapped
, map_size
);
861 return reinterpret_cast<void*>(res
);
864 void Deallocate(AllocatorStats
*stat
, void *p
) {
865 Header
*h
= GetHeader(p
);
867 SpinMutexLock
l(&mutex_
);
868 uptr idx
= h
->chunk_idx
;
869 CHECK_EQ(chunks_
[idx
], h
);
870 CHECK_LT(idx
, n_chunks_
);
871 chunks_
[idx
] = chunks_
[n_chunks_
- 1];
872 chunks_
[idx
]->chunk_idx
= idx
;
875 stats
.currently_allocated
-= h
->map_size
;
876 stat
->Add(AllocatorStatFreed
, h
->map_size
);
877 stat
->Add(AllocatorStatUnmapped
, h
->map_size
);
879 MapUnmapCallback().OnUnmap(h
->map_beg
, h
->map_size
);
880 UnmapOrDie(reinterpret_cast<void*>(h
->map_beg
), h
->map_size
);
883 uptr
TotalMemoryUsed() {
884 SpinMutexLock
l(&mutex_
);
886 for (uptr i
= 0; i
< n_chunks_
; i
++) {
887 Header
*h
= chunks_
[i
];
888 CHECK_EQ(h
->chunk_idx
, i
);
889 res
+= RoundUpMapSize(h
->size
);
894 bool PointerIsMine(void *p
) {
895 return GetBlockBegin(p
) != 0;
898 uptr
GetActuallyAllocatedSize(void *p
) {
899 return RoundUpTo(GetHeader(p
)->size
, page_size_
);
902 // At least page_size_/2 metadata bytes is available.
903 void *GetMetaData(void *p
) {
904 // Too slow: CHECK_EQ(p, GetBlockBegin(p));
905 CHECK(IsAligned(reinterpret_cast<uptr
>(p
), page_size_
));
906 return GetHeader(p
) + 1;
909 void *GetBlockBegin(void *ptr
) {
910 uptr p
= reinterpret_cast<uptr
>(ptr
);
911 SpinMutexLock
l(&mutex_
);
912 uptr nearest_chunk
= 0;
913 // Cache-friendly linear search.
914 for (uptr i
= 0; i
< n_chunks_
; i
++) {
915 uptr ch
= reinterpret_cast<uptr
>(chunks_
[i
]);
916 if (p
< ch
) continue; // p is at left to this chunk, skip it.
917 if (p
- ch
< p
- nearest_chunk
)
922 Header
*h
= reinterpret_cast<Header
*>(nearest_chunk
);
923 CHECK_GE(nearest_chunk
, h
->map_beg
);
924 CHECK_LT(nearest_chunk
, h
->map_beg
+ h
->map_size
);
925 CHECK_LE(nearest_chunk
, p
);
926 if (h
->map_beg
+ h
->map_size
< p
)
932 Printf("Stats: LargeMmapAllocator: allocated %zd times, "
933 "remains %zd (%zd K) max %zd M; by size logs: ",
934 stats
.n_allocs
, stats
.n_allocs
- stats
.n_frees
,
935 stats
.currently_allocated
>> 10, stats
.max_allocated
>> 20);
936 for (uptr i
= 0; i
< ARRAY_SIZE(stats
.by_size_log
); i
++) {
937 uptr c
= stats
.by_size_log
[i
];
939 Printf("%zd:%zd; ", i
, c
);
945 static const int kMaxNumChunks
= 1 << FIRST_32_SECOND_64(15, 18);
953 Header
*GetHeader(uptr p
) {
954 CHECK_EQ(p
% page_size_
, 0);
955 return reinterpret_cast<Header
*>(p
- page_size_
);
957 Header
*GetHeader(void *p
) { return GetHeader(reinterpret_cast<uptr
>(p
)); }
959 void *GetUser(Header
*h
) {
960 CHECK_EQ((uptr
)h
% page_size_
, 0);
961 return reinterpret_cast<void*>(reinterpret_cast<uptr
>(h
) + page_size_
);
964 uptr
RoundUpMapSize(uptr size
) {
965 return RoundUpTo(size
, page_size_
) + page_size_
;
969 Header
*chunks_
[kMaxNumChunks
];
972 uptr n_allocs
, n_frees
, currently_allocated
, max_allocated
, by_size_log
[64];
977 // This class implements a complete memory allocator by using two
978 // internal allocators:
979 // PrimaryAllocator is efficient, but may not allocate some sizes (alignments).
980 // When allocating 2^x bytes it should return 2^x aligned chunk.
981 // PrimaryAllocator is used via a local AllocatorCache.
982 // SecondaryAllocator can allocate anything, but is not efficient.
983 template <class PrimaryAllocator
, class AllocatorCache
,
984 class SecondaryAllocator
> // NOLINT
985 class CombinedAllocator
{
993 void *Allocate(AllocatorCache
*cache
, uptr size
, uptr alignment
,
994 bool cleared
= false) {
995 // Returning 0 on malloc(0) may break a lot of code.
998 if (size
+ alignment
< size
)
1001 size
= RoundUpTo(size
, alignment
);
1003 if (primary_
.CanAllocate(size
, alignment
))
1004 res
= cache
->Allocate(&primary_
, primary_
.ClassID(size
));
1006 res
= secondary_
.Allocate(&stats_
, size
, alignment
);
1008 CHECK_EQ(reinterpret_cast<uptr
>(res
) & (alignment
- 1), 0);
1010 internal_memset(res
, 0, size
);
1014 void Deallocate(AllocatorCache
*cache
, void *p
) {
1016 if (primary_
.PointerIsMine(p
))
1017 cache
->Deallocate(&primary_
, primary_
.GetSizeClass(p
), p
);
1019 secondary_
.Deallocate(&stats_
, p
);
1022 void *Reallocate(AllocatorCache
*cache
, void *p
, uptr new_size
,
1025 return Allocate(cache
, new_size
, alignment
);
1027 Deallocate(cache
, p
);
1030 CHECK(PointerIsMine(p
));
1031 uptr old_size
= GetActuallyAllocatedSize(p
);
1032 uptr memcpy_size
= Min(new_size
, old_size
);
1033 void *new_p
= Allocate(cache
, new_size
, alignment
);
1035 internal_memcpy(new_p
, p
, memcpy_size
);
1036 Deallocate(cache
, p
);
1040 bool PointerIsMine(void *p
) {
1041 if (primary_
.PointerIsMine(p
))
1043 return secondary_
.PointerIsMine(p
);
1046 bool FromPrimary(void *p
) {
1047 return primary_
.PointerIsMine(p
);
1050 void *GetMetaData(void *p
) {
1051 if (primary_
.PointerIsMine(p
))
1052 return primary_
.GetMetaData(p
);
1053 return secondary_
.GetMetaData(p
);
1056 void *GetBlockBegin(void *p
) {
1057 if (primary_
.PointerIsMine(p
))
1058 return primary_
.GetBlockBegin(p
);
1059 return secondary_
.GetBlockBegin(p
);
1062 uptr
GetActuallyAllocatedSize(void *p
) {
1063 if (primary_
.PointerIsMine(p
))
1064 return primary_
.GetActuallyAllocatedSize(p
);
1065 return secondary_
.GetActuallyAllocatedSize(p
);
1068 uptr
TotalMemoryUsed() {
1069 return primary_
.TotalMemoryUsed() + secondary_
.TotalMemoryUsed();
1072 void TestOnlyUnmap() { primary_
.TestOnlyUnmap(); }
1074 void InitCache(AllocatorCache
*cache
) {
1075 cache
->Init(&stats_
);
1078 void DestroyCache(AllocatorCache
*cache
) {
1079 cache
->Destroy(&primary_
, &stats_
);
1082 void SwallowCache(AllocatorCache
*cache
) {
1083 cache
->Drain(&primary_
);
1086 void GetStats(AllocatorStatCounters s
) const {
1091 primary_
.PrintStats();
1092 secondary_
.PrintStats();
1096 PrimaryAllocator primary_
;
1097 SecondaryAllocator secondary_
;
1098 AllocatorGlobalStats stats_
;
1101 // Returns true if calloc(size, n) should return 0 due to overflow in size*n.
1102 bool CallocShouldReturnNullDueToOverflow(uptr size
, uptr n
);
1104 } // namespace __sanitizer
1106 #endif // SANITIZER_ALLOCATOR_H