1 //===-- sanitizer_allocator.h -----------------------------------*- C++ -*-===//
3 // This file is distributed under the University of Illinois Open Source
4 // License. See LICENSE.TXT for details.
6 //===----------------------------------------------------------------------===//
8 // Specialized memory allocator for ThreadSanitizer, MemorySanitizer, etc.
10 //===----------------------------------------------------------------------===//
12 #ifndef SANITIZER_ALLOCATOR_H
13 #define SANITIZER_ALLOCATOR_H
15 #include "sanitizer_internal_defs.h"
16 #include "sanitizer_common.h"
17 #include "sanitizer_libc.h"
18 #include "sanitizer_list.h"
19 #include "sanitizer_mutex.h"
20 #include "sanitizer_lfstack.h"
22 namespace __sanitizer
{
24 // SizeClassMap maps allocation sizes into size classes and back.
25 // Class 0 corresponds to size 0.
26 // Classes 1 - 16 correspond to sizes 16 to 256 (size = class_id * 16).
27 // Next 8 classes: 256 + i * 32 (i = 1 to 8).
28 // Next 8 classes: 512 + i * 64 (i = 1 to 8).
30 // Next 8 classes: 2^k + i * 2^(k-3) (i = 1 to 8).
31 // Last class corresponds to kMaxSize = 1 << kMaxSizeLog.
33 // This structure of the size class map gives us:
34 // - Efficient table-free class-to-size and size-to-class functions.
35 // - Difference between two consequent size classes is betweed 12% and 6%
37 // This class also gives a hint to a thread-caching allocator about the amount
38 // of chunks that need to be cached per-thread:
39 // - kMaxNumCached is the maximal number of chunks per size class.
40 // - (1 << kMaxBytesCachedLog) is the maximal number of bytes per size class.
42 // Part of output of SizeClassMap::Print():
43 // c00 => s: 0 diff: +0 00% l 0 cached: 0 0; id 0
44 // c01 => s: 16 diff: +16 00% l 4 cached: 256 4096; id 1
45 // c02 => s: 32 diff: +16 100% l 5 cached: 256 8192; id 2
46 // c03 => s: 48 diff: +16 50% l 5 cached: 256 12288; id 3
47 // c04 => s: 64 diff: +16 33% l 6 cached: 256 16384; id 4
48 // c05 => s: 80 diff: +16 25% l 6 cached: 256 20480; id 5
49 // c06 => s: 96 diff: +16 20% l 6 cached: 256 24576; id 6
50 // c07 => s: 112 diff: +16 16% l 6 cached: 256 28672; id 7
52 // c08 => s: 128 diff: +16 14% l 7 cached: 256 32768; id 8
53 // c09 => s: 144 diff: +16 12% l 7 cached: 256 36864; id 9
54 // c10 => s: 160 diff: +16 11% l 7 cached: 256 40960; id 10
55 // c11 => s: 176 diff: +16 10% l 7 cached: 256 45056; id 11
56 // c12 => s: 192 diff: +16 09% l 7 cached: 256 49152; id 12
57 // c13 => s: 208 diff: +16 08% l 7 cached: 256 53248; id 13
58 // c14 => s: 224 diff: +16 07% l 7 cached: 256 57344; id 14
59 // c15 => s: 240 diff: +16 07% l 7 cached: 256 61440; id 15
61 // c16 => s: 256 diff: +16 06% l 8 cached: 256 65536; id 16
62 // c17 => s: 288 diff: +32 12% l 8 cached: 227 65376; id 17
63 // c18 => s: 320 diff: +32 11% l 8 cached: 204 65280; id 18
64 // c19 => s: 352 diff: +32 10% l 8 cached: 186 65472; id 19
65 // c20 => s: 384 diff: +32 09% l 8 cached: 170 65280; id 20
66 // c21 => s: 416 diff: +32 08% l 8 cached: 157 65312; id 21
67 // c22 => s: 448 diff: +32 07% l 8 cached: 146 65408; id 22
68 // c23 => s: 480 diff: +32 07% l 8 cached: 136 65280; id 23
70 // c24 => s: 512 diff: +32 06% l 9 cached: 128 65536; id 24
71 // c25 => s: 576 diff: +64 12% l 9 cached: 113 65088; id 25
72 // c26 => s: 640 diff: +64 11% l 9 cached: 102 65280; id 26
73 // c27 => s: 704 diff: +64 10% l 9 cached: 93 65472; id 27
74 // c28 => s: 768 diff: +64 09% l 9 cached: 85 65280; id 28
75 // c29 => s: 832 diff: +64 08% l 9 cached: 78 64896; id 29
76 // c30 => s: 896 diff: +64 07% l 9 cached: 73 65408; id 30
77 // c31 => s: 960 diff: +64 07% l 9 cached: 68 65280; id 31
79 // c32 => s: 1024 diff: +64 06% l 10 cached: 64 65536; id 32
81 template <uptr kMaxSizeLog
, uptr kMaxNumCachedT
, uptr kMaxBytesCachedLog
,
84 static const uptr kMinSizeLog
= 4;
85 static const uptr kMidSizeLog
= kMinSizeLog
+ 4;
86 static const uptr kMinSize
= 1 << kMinSizeLog
;
87 static const uptr kMidSize
= 1 << kMidSizeLog
;
88 static const uptr kMidClass
= kMidSize
/ kMinSize
;
89 static const uptr S
= 3;
90 static const uptr M
= (1 << S
) - 1;
93 static const uptr kMaxNumCached
= kMaxNumCachedT
;
94 struct TransferBatch
{
97 void *batch
[kMaxNumCached
];
100 static const uptr kMinBatchClass
= kMinBatchClassT
;
101 static const uptr kMaxSize
= 1 << kMaxSizeLog
;
102 static const uptr kNumClasses
=
103 kMidClass
+ ((kMaxSizeLog
- kMidSizeLog
) << S
) + 1;
104 COMPILER_CHECK(kNumClasses
>= 32 && kNumClasses
<= 256);
105 static const uptr kNumClassesRounded
=
106 kNumClasses
== 32 ? 32 :
107 kNumClasses
<= 64 ? 64 :
108 kNumClasses
<= 128 ? 128 : 256;
110 static uptr
Size(uptr class_id
) {
111 if (class_id
<= kMidClass
)
112 return kMinSize
* class_id
;
113 class_id
-= kMidClass
;
114 uptr t
= kMidSize
<< (class_id
>> S
);
115 return t
+ (t
>> S
) * (class_id
& M
);
118 static uptr
ClassID(uptr size
) {
119 if (size
<= kMidSize
)
120 return (size
+ kMinSize
- 1) >> kMinSizeLog
;
121 if (size
> kMaxSize
) return 0;
122 uptr l
= MostSignificantSetBitIndex(size
);
123 uptr hbits
= (size
>> (l
- S
)) & M
;
124 uptr lbits
= size
& ((1 << (l
- S
)) - 1);
125 uptr l1
= l
- kMidSizeLog
;
126 return kMidClass
+ (l1
<< S
) + hbits
+ (lbits
> 0);
129 static uptr
MaxCached(uptr class_id
) {
130 if (class_id
== 0) return 0;
131 uptr n
= (1UL << kMaxBytesCachedLog
) / Size(class_id
);
132 return Max
<uptr
>(1, Min(kMaxNumCached
, n
));
135 static void Print() {
137 uptr total_cached
= 0;
138 for (uptr i
= 0; i
< kNumClasses
; i
++) {
140 if (s
>= kMidSize
/ 2 && (s
& (s
- 1)) == 0)
143 uptr p
= prev_s
? (d
* 100 / prev_s
) : 0;
144 uptr l
= MostSignificantSetBitIndex(s
);
145 uptr cached
= MaxCached(i
) * s
;
146 Printf("c%02zd => s: %zd diff: +%zd %02zd%% l %zd "
147 "cached: %zd %zd; id %zd\n",
148 i
, Size(i
), d
, p
, l
, MaxCached(i
), cached
, ClassID(s
));
149 total_cached
+= cached
;
152 Printf("Total cached: %zd\n", total_cached
);
155 static void Validate() {
156 for (uptr c
= 1; c
< kNumClasses
; c
++) {
157 // Printf("Validate: c%zd\n", c);
159 CHECK_EQ(ClassID(s
), c
);
160 if (c
!= kNumClasses
- 1)
161 CHECK_EQ(ClassID(s
+ 1), c
+ 1);
162 CHECK_EQ(ClassID(s
- 1), c
);
164 CHECK_GT(Size(c
), Size(c
-1));
166 CHECK_EQ(ClassID(kMaxSize
+ 1), 0);
168 for (uptr s
= 1; s
<= kMaxSize
; s
++) {
170 // Printf("s%zd => c%zd\n", s, c);
171 CHECK_LT(c
, kNumClasses
);
172 CHECK_GE(Size(c
), s
);
174 CHECK_LT(Size(c
-1), s
);
177 // TransferBatch for kMinBatchClass must fit into the block itself.
178 const uptr batch_size
= sizeof(TransferBatch
)
179 - sizeof(void*) // NOLINT
180 * (kMaxNumCached
- MaxCached(kMinBatchClass
));
181 CHECK_LE(batch_size
, Size(kMinBatchClass
));
182 // TransferBatch for kMinBatchClass-1 must not fit into the block itself.
183 const uptr batch_size1
= sizeof(TransferBatch
)
184 - sizeof(void*) // NOLINT
185 * (kMaxNumCached
- MaxCached(kMinBatchClass
- 1));
186 CHECK_GT(batch_size1
, Size(kMinBatchClass
- 1));
190 typedef SizeClassMap
<17, 256, 16, FIRST_32_SECOND_64(25, 28)>
192 typedef SizeClassMap
<17, 64, 14, FIRST_32_SECOND_64(17, 20)>
194 template<class SizeClassAllocator
> struct SizeClassAllocatorLocalCache
;
196 // Memory allocator statistics
198 AllocatorStatMalloced
,
200 AllocatorStatMmapped
,
201 AllocatorStatUnmapped
,
205 typedef u64 AllocatorStatCounters
[AllocatorStatCount
];
207 // Per-thread stats, live in per-thread cache.
208 class AllocatorStats
{
211 internal_memset(this, 0, sizeof(*this));
214 void Add(AllocatorStat i
, u64 v
) {
215 v
+= atomic_load(&stats_
[i
], memory_order_relaxed
);
216 atomic_store(&stats_
[i
], v
, memory_order_relaxed
);
219 void Set(AllocatorStat i
, u64 v
) {
220 atomic_store(&stats_
[i
], v
, memory_order_relaxed
);
223 u64
Get(AllocatorStat i
) const {
224 return atomic_load(&stats_
[i
], memory_order_relaxed
);
228 friend class AllocatorGlobalStats
;
229 AllocatorStats
*next_
;
230 AllocatorStats
*prev_
;
231 atomic_uint64_t stats_
[AllocatorStatCount
];
234 // Global stats, used for aggregation and querying.
235 class AllocatorGlobalStats
: public AllocatorStats
{
238 internal_memset(this, 0, sizeof(*this));
243 void Register(AllocatorStats
*s
) {
244 SpinMutexLock
l(&mu_
);
251 void Unregister(AllocatorStats
*s
) {
252 SpinMutexLock
l(&mu_
);
253 s
->prev_
->next_
= s
->next_
;
254 s
->next_
->prev_
= s
->prev_
;
255 for (int i
= 0; i
< AllocatorStatCount
; i
++)
256 Add(AllocatorStat(i
), s
->Get(AllocatorStat(i
)));
259 void Get(AllocatorStatCounters s
) const {
260 internal_memset(s
, 0, AllocatorStatCount
* sizeof(u64
));
261 SpinMutexLock
l(&mu_
);
262 const AllocatorStats
*stats
= this;
264 for (int i
= 0; i
< AllocatorStatCount
; i
++)
265 s
[i
] += stats
->Get(AllocatorStat(i
));
266 stats
= stats
->next_
;
273 mutable SpinMutex mu_
;
276 // Allocators call these callbacks on mmap/munmap.
277 struct NoOpMapUnmapCallback
{
278 void OnMap(uptr p
, uptr size
) const { }
279 void OnUnmap(uptr p
, uptr size
) const { }
282 // SizeClassAllocator64 -- allocator for 64-bit address space.
284 // Space: a portion of address space of kSpaceSize bytes starting at
285 // a fixed address (kSpaceBeg). Both constants are powers of two and
286 // kSpaceBeg is kSpaceSize-aligned.
287 // At the beginning the entire space is mprotect-ed, then small parts of it
288 // are mapped on demand.
290 // Region: a part of Space dedicated to a single size class.
291 // There are kNumClasses Regions of equal size.
293 // UserChunk: a piece of memory returned to user.
294 // MetaChunk: kMetadataSize bytes of metadata associated with a UserChunk.
296 // A Region looks like this:
297 // UserChunk1 ... UserChunkN <gap> MetaChunkN ... MetaChunk1
298 template <const uptr kSpaceBeg
, const uptr kSpaceSize
,
299 const uptr kMetadataSize
, class SizeClassMap
,
300 class MapUnmapCallback
= NoOpMapUnmapCallback
>
301 class SizeClassAllocator64
{
303 typedef typename
SizeClassMap::TransferBatch Batch
;
304 typedef SizeClassAllocator64
<kSpaceBeg
, kSpaceSize
, kMetadataSize
,
305 SizeClassMap
, MapUnmapCallback
> ThisT
;
306 typedef SizeClassAllocatorLocalCache
<ThisT
> AllocatorCache
;
310 reinterpret_cast<uptr
>(Mprotect(kSpaceBeg
, kSpaceSize
)));
311 MapWithCallback(kSpaceEnd
, AdditionalSize());
314 void MapWithCallback(uptr beg
, uptr size
) {
315 CHECK_EQ(beg
, reinterpret_cast<uptr
>(MmapFixedOrDie(beg
, size
)));
316 MapUnmapCallback().OnMap(beg
, size
);
319 void UnmapWithCallback(uptr beg
, uptr size
) {
320 MapUnmapCallback().OnUnmap(beg
, size
);
321 UnmapOrDie(reinterpret_cast<void *>(beg
), size
);
324 static bool CanAllocate(uptr size
, uptr alignment
) {
325 return size
<= SizeClassMap::kMaxSize
&&
326 alignment
<= SizeClassMap::kMaxSize
;
329 NOINLINE Batch
* AllocateBatch(AllocatorStats
*stat
, AllocatorCache
*c
,
331 CHECK_LT(class_id
, kNumClasses
);
332 RegionInfo
*region
= GetRegionInfo(class_id
);
333 Batch
*b
= region
->free_list
.Pop();
335 b
= PopulateFreeList(stat
, c
, class_id
, region
);
336 region
->n_allocated
+= b
->count
;
340 NOINLINE
void DeallocateBatch(AllocatorStats
*stat
, uptr class_id
, Batch
*b
) {
341 RegionInfo
*region
= GetRegionInfo(class_id
);
342 region
->free_list
.Push(b
);
343 region
->n_freed
+= b
->count
;
346 static bool PointerIsMine(void *p
) {
347 return reinterpret_cast<uptr
>(p
) / kSpaceSize
== kSpaceBeg
/ kSpaceSize
;
350 static uptr
GetSizeClass(void *p
) {
351 return (reinterpret_cast<uptr
>(p
) / kRegionSize
) % kNumClassesRounded
;
354 void *GetBlockBegin(void *p
) {
355 uptr class_id
= GetSizeClass(p
);
356 uptr size
= SizeClassMap::Size(class_id
);
357 uptr chunk_idx
= GetChunkIdx((uptr
)p
, size
);
358 uptr reg_beg
= (uptr
)p
& ~(kRegionSize
- 1);
359 uptr beg
= chunk_idx
* size
;
360 uptr next_beg
= beg
+ size
;
361 RegionInfo
*region
= GetRegionInfo(class_id
);
362 if (region
->mapped_user
>= next_beg
)
363 return reinterpret_cast<void*>(reg_beg
+ beg
);
367 static uptr
GetActuallyAllocatedSize(void *p
) {
368 CHECK(PointerIsMine(p
));
369 return SizeClassMap::Size(GetSizeClass(p
));
372 uptr
ClassID(uptr size
) { return SizeClassMap::ClassID(size
); }
374 void *GetMetaData(void *p
) {
375 uptr class_id
= GetSizeClass(p
);
376 uptr size
= SizeClassMap::Size(class_id
);
377 uptr chunk_idx
= GetChunkIdx(reinterpret_cast<uptr
>(p
), size
);
378 return reinterpret_cast<void*>(kSpaceBeg
+ (kRegionSize
* (class_id
+ 1)) -
379 (1 + chunk_idx
) * kMetadataSize
);
382 uptr
TotalMemoryUsed() {
384 for (uptr i
= 0; i
< kNumClasses
; i
++)
385 res
+= GetRegionInfo(i
)->allocated_user
;
390 void TestOnlyUnmap() {
391 UnmapWithCallback(kSpaceBeg
, kSpaceSize
+ AdditionalSize());
395 uptr total_mapped
= 0;
396 uptr n_allocated
= 0;
398 for (uptr class_id
= 1; class_id
< kNumClasses
; class_id
++) {
399 RegionInfo
*region
= GetRegionInfo(class_id
);
400 total_mapped
+= region
->mapped_user
;
401 n_allocated
+= region
->n_allocated
;
402 n_freed
+= region
->n_freed
;
404 Printf("Stats: SizeClassAllocator64: %zdM mapped in %zd allocations; "
406 total_mapped
>> 20, n_allocated
, n_allocated
- n_freed
);
407 for (uptr class_id
= 1; class_id
< kNumClasses
; class_id
++) {
408 RegionInfo
*region
= GetRegionInfo(class_id
);
409 if (region
->mapped_user
== 0) continue;
410 Printf(" %02zd (%zd): total: %zd K allocs: %zd remains: %zd\n",
412 SizeClassMap::Size(class_id
),
413 region
->mapped_user
>> 10,
415 region
->n_allocated
- region
->n_freed
);
419 // ForceLock() and ForceUnlock() are needed to implement Darwin malloc zone
420 // introspection API.
422 for (uptr i
= 0; i
< kNumClasses
; i
++) {
423 GetRegionInfo(i
)->mutex
.Lock();
428 for (int i
= (int)kNumClasses
- 1; i
>= 0; i
--) {
429 GetRegionInfo(i
)->mutex
.Unlock();
433 typedef SizeClassMap SizeClassMapT
;
434 static const uptr kNumClasses
= SizeClassMap::kNumClasses
;
435 static const uptr kNumClassesRounded
= SizeClassMap::kNumClassesRounded
;
438 static const uptr kRegionSize
= kSpaceSize
/ kNumClassesRounded
;
439 static const uptr kSpaceEnd
= kSpaceBeg
+ kSpaceSize
;
440 COMPILER_CHECK(kSpaceBeg
% kSpaceSize
== 0);
441 // kRegionSize must be >= 2^32.
442 COMPILER_CHECK((kRegionSize
) >= (1ULL << (SANITIZER_WORDSIZE
/ 2)));
443 // Populate the free list with at most this number of bytes at once
444 // or with one element if its size is greater.
445 static const uptr kPopulateSize
= 1 << 14;
446 // Call mmap for user memory with at least this size.
447 static const uptr kUserMapSize
= 1 << 16;
448 // Call mmap for metadata memory with at least this size.
449 static const uptr kMetaMapSize
= 1 << 16;
453 LFStack
<Batch
> free_list
;
454 uptr allocated_user
; // Bytes allocated for user memory.
455 uptr allocated_meta
; // Bytes allocated for metadata.
456 uptr mapped_user
; // Bytes mapped for user memory.
457 uptr mapped_meta
; // Bytes mapped for metadata.
458 uptr n_allocated
, n_freed
; // Just stats.
460 COMPILER_CHECK(sizeof(RegionInfo
) >= kCacheLineSize
);
462 static uptr
AdditionalSize() {
463 return RoundUpTo(sizeof(RegionInfo
) * kNumClassesRounded
,
464 GetPageSizeCached());
467 RegionInfo
*GetRegionInfo(uptr class_id
) {
468 CHECK_LT(class_id
, kNumClasses
);
469 RegionInfo
*regions
= reinterpret_cast<RegionInfo
*>(kSpaceBeg
+ kSpaceSize
);
470 return ®ions
[class_id
];
473 static uptr
GetChunkIdx(uptr chunk
, uptr size
) {
474 u32 offset
= chunk
% kRegionSize
;
475 // Here we divide by a non-constant. This is costly.
476 // We require that kRegionSize is at least 2^32 so that offset is 32-bit.
477 // We save 2x by using 32-bit div, but may need to use a 256-way switch.
478 return offset
/ (u32
)size
;
481 NOINLINE Batch
* PopulateFreeList(AllocatorStats
*stat
, AllocatorCache
*c
,
482 uptr class_id
, RegionInfo
*region
) {
483 BlockingMutexLock
l(®ion
->mutex
);
484 Batch
*b
= region
->free_list
.Pop();
487 uptr size
= SizeClassMap::Size(class_id
);
488 uptr count
= size
< kPopulateSize
? SizeClassMap::MaxCached(class_id
) : 1;
489 uptr beg_idx
= region
->allocated_user
;
490 uptr end_idx
= beg_idx
+ count
* size
;
491 uptr region_beg
= kSpaceBeg
+ kRegionSize
* class_id
;
492 if (end_idx
+ size
> region
->mapped_user
) {
493 // Do the mmap for the user memory.
494 uptr map_size
= kUserMapSize
;
495 while (end_idx
+ size
> region
->mapped_user
+ map_size
)
496 map_size
+= kUserMapSize
;
497 CHECK_GE(region
->mapped_user
+ map_size
, end_idx
);
498 MapWithCallback(region_beg
+ region
->mapped_user
, map_size
);
499 stat
->Add(AllocatorStatMmapped
, map_size
);
500 region
->mapped_user
+= map_size
;
502 uptr total_count
= (region
->mapped_user
- beg_idx
- size
)
503 / size
/ count
* count
;
504 region
->allocated_meta
+= total_count
* kMetadataSize
;
505 if (region
->allocated_meta
> region
->mapped_meta
) {
506 uptr map_size
= kMetaMapSize
;
507 while (region
->allocated_meta
> region
->mapped_meta
+ map_size
)
508 map_size
+= kMetaMapSize
;
509 // Do the mmap for the metadata.
510 CHECK_GE(region
->mapped_meta
+ map_size
, region
->allocated_meta
);
511 MapWithCallback(region_beg
+ kRegionSize
-
512 region
->mapped_meta
- map_size
, map_size
);
513 region
->mapped_meta
+= map_size
;
515 CHECK_LE(region
->allocated_meta
, region
->mapped_meta
);
516 if (region
->allocated_user
+ region
->allocated_meta
> kRegionSize
) {
517 Printf("Out of memory. Dying.\n");
518 Printf("The process has exhausted %zuMB for size class %zu.\n",
519 kRegionSize
/ 1024 / 1024, size
);
523 if (class_id
< SizeClassMap::kMinBatchClass
)
524 b
= (Batch
*)c
->Allocate(this, SizeClassMap::ClassID(sizeof(Batch
)));
526 b
= (Batch
*)(region_beg
+ beg_idx
);
528 for (uptr i
= 0; i
< count
; i
++)
529 b
->batch
[i
] = (void*)(region_beg
+ beg_idx
+ i
* size
);
530 region
->allocated_user
+= count
* size
;
531 CHECK_LE(region
->allocated_user
, region
->mapped_user
);
532 beg_idx
+= count
* size
;
533 if (beg_idx
+ count
* size
+ size
> region
->mapped_user
)
535 region
->free_list
.Push(b
);
541 // SizeClassAllocator32 -- allocator for 32-bit address space.
542 // This allocator can theoretically be used on 64-bit arch, but there it is less
543 // efficient than SizeClassAllocator64.
545 // [kSpaceBeg, kSpaceBeg + kSpaceSize) is the range of addresses which can
546 // be returned by MmapOrDie().
549 // a result of a single call to MmapAlignedOrDie(kRegionSize, kRegionSize).
550 // Since the regions are aligned by kRegionSize, there are exactly
551 // kNumPossibleRegions possible regions in the address space and so we keep
552 // an u8 array possible_regions[kNumPossibleRegions] to store the size classes.
553 // 0 size class means the region is not used by the allocator.
555 // One Region is used to allocate chunks of a single size class.
556 // A Region looks like this:
557 // UserChunk1 .. UserChunkN <gap> MetaChunkN .. MetaChunk1
559 // In order to avoid false sharing the objects of this class should be
560 // chache-line aligned.
561 template <const uptr kSpaceBeg
, const u64 kSpaceSize
,
562 const uptr kMetadataSize
, class SizeClassMap
,
563 class MapUnmapCallback
= NoOpMapUnmapCallback
>
564 class SizeClassAllocator32
{
566 typedef typename
SizeClassMap::TransferBatch Batch
;
567 typedef SizeClassAllocator32
<kSpaceBeg
, kSpaceSize
, kMetadataSize
,
568 SizeClassMap
, MapUnmapCallback
> ThisT
;
569 typedef SizeClassAllocatorLocalCache
<ThisT
> AllocatorCache
;
572 state_
= reinterpret_cast<State
*>(MapWithCallback(sizeof(State
)));
575 void *MapWithCallback(uptr size
) {
576 size
= RoundUpTo(size
, GetPageSizeCached());
577 void *res
= MmapOrDie(size
, "SizeClassAllocator32");
578 MapUnmapCallback().OnMap((uptr
)res
, size
);
582 void UnmapWithCallback(uptr beg
, uptr size
) {
583 MapUnmapCallback().OnUnmap(beg
, size
);
584 UnmapOrDie(reinterpret_cast<void *>(beg
), size
);
587 static bool CanAllocate(uptr size
, uptr alignment
) {
588 return size
<= SizeClassMap::kMaxSize
&&
589 alignment
<= SizeClassMap::kMaxSize
;
592 void *GetMetaData(void *p
) {
593 CHECK(PointerIsMine(p
));
594 uptr mem
= reinterpret_cast<uptr
>(p
);
595 uptr beg
= ComputeRegionBeg(mem
);
596 uptr size
= SizeClassMap::Size(GetSizeClass(p
));
597 u32 offset
= mem
- beg
;
598 uptr n
= offset
/ (u32
)size
; // 32-bit division
599 uptr meta
= (beg
+ kRegionSize
) - (n
+ 1) * kMetadataSize
;
600 return reinterpret_cast<void*>(meta
);
603 NOINLINE Batch
* AllocateBatch(AllocatorStats
*stat
, AllocatorCache
*c
,
605 CHECK_LT(class_id
, kNumClasses
);
606 SizeClassInfo
*sci
= GetSizeClassInfo(class_id
);
607 SpinMutexLock
l(&sci
->mutex
);
608 if (sci
->free_list
.empty())
609 PopulateFreeList(stat
, c
, sci
, class_id
);
610 CHECK(!sci
->free_list
.empty());
611 Batch
*b
= sci
->free_list
.front();
612 sci
->free_list
.pop_front();
616 NOINLINE
void DeallocateBatch(AllocatorStats
*stat
, uptr class_id
, Batch
*b
) {
617 CHECK_LT(class_id
, kNumClasses
);
618 SizeClassInfo
*sci
= GetSizeClassInfo(class_id
);
619 SpinMutexLock
l(&sci
->mutex
);
620 sci
->free_list
.push_front(b
);
623 bool PointerIsMine(void *p
) {
624 return GetSizeClass(p
) != 0;
627 uptr
GetSizeClass(void *p
) {
628 return state_
->possible_regions
[ComputeRegionId(reinterpret_cast<uptr
>(p
))];
631 void *GetBlockBegin(void *p
) {
632 CHECK(PointerIsMine(p
));
633 uptr mem
= reinterpret_cast<uptr
>(p
);
634 uptr beg
= ComputeRegionBeg(mem
);
635 uptr size
= SizeClassMap::Size(GetSizeClass(p
));
636 u32 offset
= mem
- beg
;
637 u32 n
= offset
/ (u32
)size
; // 32-bit division
638 uptr res
= beg
+ (n
* (u32
)size
);
639 return reinterpret_cast<void*>(res
);
642 uptr
GetActuallyAllocatedSize(void *p
) {
643 CHECK(PointerIsMine(p
));
644 return SizeClassMap::Size(GetSizeClass(p
));
647 uptr
ClassID(uptr size
) { return SizeClassMap::ClassID(size
); }
649 uptr
TotalMemoryUsed() {
650 // No need to lock here.
652 for (uptr i
= 0; i
< kNumPossibleRegions
; i
++)
653 if (state_
->possible_regions
[i
])
658 void TestOnlyUnmap() {
659 for (uptr i
= 0; i
< kNumPossibleRegions
; i
++)
660 if (state_
->possible_regions
[i
])
661 UnmapWithCallback((i
* kRegionSize
), kRegionSize
);
662 UnmapWithCallback(reinterpret_cast<uptr
>(state_
), sizeof(State
));
665 // ForceLock() and ForceUnlock() are needed to implement Darwin malloc zone
666 // introspection API.
668 for (uptr i
= 0; i
< kNumClasses
; i
++) {
669 GetSizeClassInfo(i
)->mutex
.Lock();
674 for (int i
= kNumClasses
- 1; i
>= 0; i
--) {
675 GetSizeClassInfo(i
)->mutex
.Unlock();
682 typedef SizeClassMap SizeClassMapT
;
683 static const uptr kNumClasses
= SizeClassMap::kNumClasses
;
686 static const uptr kRegionSizeLog
= SANITIZER_WORDSIZE
== 64 ? 24 : 20;
687 static const uptr kRegionSize
= 1 << kRegionSizeLog
;
688 static const uptr kNumPossibleRegions
= kSpaceSize
/ kRegionSize
;
690 struct SizeClassInfo
{
692 IntrusiveList
<Batch
> free_list
;
693 char padding
[kCacheLineSize
- sizeof(uptr
) - sizeof(IntrusiveList
<Batch
>)];
695 COMPILER_CHECK(sizeof(SizeClassInfo
) == kCacheLineSize
);
697 uptr
ComputeRegionId(uptr mem
) {
698 uptr res
= mem
>> kRegionSizeLog
;
699 CHECK_LT(res
, kNumPossibleRegions
);
703 uptr
ComputeRegionBeg(uptr mem
) {
704 return mem
& ~(kRegionSize
- 1);
707 uptr
AllocateRegion(AllocatorStats
*stat
, uptr class_id
) {
708 CHECK_LT(class_id
, kNumClasses
);
709 uptr res
= reinterpret_cast<uptr
>(MmapAlignedOrDie(kRegionSize
, kRegionSize
,
710 "SizeClassAllocator32"));
711 MapUnmapCallback().OnMap(res
, kRegionSize
);
712 stat
->Add(AllocatorStatMmapped
, kRegionSize
);
713 CHECK_EQ(0U, (res
& (kRegionSize
- 1)));
714 CHECK_EQ(0U, state_
->possible_regions
[ComputeRegionId(res
)]);
715 state_
->possible_regions
[ComputeRegionId(res
)] = class_id
;
719 SizeClassInfo
*GetSizeClassInfo(uptr class_id
) {
720 CHECK_LT(class_id
, kNumClasses
);
721 return &state_
->size_class_info_array
[class_id
];
724 void PopulateFreeList(AllocatorStats
*stat
, AllocatorCache
*c
,
725 SizeClassInfo
*sci
, uptr class_id
) {
726 uptr size
= SizeClassMap::Size(class_id
);
727 uptr reg
= AllocateRegion(stat
, class_id
);
728 uptr n_chunks
= kRegionSize
/ (size
+ kMetadataSize
);
729 uptr max_count
= SizeClassMap::MaxCached(class_id
);
731 for (uptr i
= reg
; i
< reg
+ n_chunks
* size
; i
+= size
) {
733 if (class_id
< SizeClassMap::kMinBatchClass
)
734 b
= (Batch
*)c
->Allocate(this, SizeClassMap::ClassID(sizeof(Batch
)));
739 b
->batch
[b
->count
++] = (void*)i
;
740 if (b
->count
== max_count
) {
741 sci
->free_list
.push_back(b
);
746 sci
->free_list
.push_back(b
);
750 u8 possible_regions
[kNumPossibleRegions
];
751 SizeClassInfo size_class_info_array
[kNumClasses
];
756 // Objects of this type should be used as local caches for SizeClassAllocator64
757 // or SizeClassAllocator32. Since the typical use of this class is to have one
758 // object per thread in TLS, is has to be POD.
759 template<class SizeClassAllocator
>
760 struct SizeClassAllocatorLocalCache
{
761 typedef SizeClassAllocator Allocator
;
762 static const uptr kNumClasses
= SizeClassAllocator::kNumClasses
;
764 void Init(AllocatorGlobalStats
*s
) {
767 s
->Register(&stats_
);
770 void Destroy(SizeClassAllocator
*allocator
, AllocatorGlobalStats
*s
) {
773 s
->Unregister(&stats_
);
776 void *Allocate(SizeClassAllocator
*allocator
, uptr class_id
) {
777 CHECK_NE(class_id
, 0UL);
778 CHECK_LT(class_id
, kNumClasses
);
779 stats_
.Add(AllocatorStatMalloced
, SizeClassMap::Size(class_id
));
780 PerClass
*c
= &per_class_
[class_id
];
781 if (UNLIKELY(c
->count
== 0))
782 Refill(allocator
, class_id
);
783 void *res
= c
->batch
[--c
->count
];
784 PREFETCH(c
->batch
[c
->count
- 1]);
788 void Deallocate(SizeClassAllocator
*allocator
, uptr class_id
, void *p
) {
789 CHECK_NE(class_id
, 0UL);
790 CHECK_LT(class_id
, kNumClasses
);
791 stats_
.Add(AllocatorStatFreed
, SizeClassMap::Size(class_id
));
792 PerClass
*c
= &per_class_
[class_id
];
793 if (UNLIKELY(c
->count
== c
->max_count
))
794 Drain(allocator
, class_id
);
795 c
->batch
[c
->count
++] = p
;
798 void Drain(SizeClassAllocator
*allocator
) {
799 for (uptr class_id
= 0; class_id
< kNumClasses
; class_id
++) {
800 PerClass
*c
= &per_class_
[class_id
];
802 Drain(allocator
, class_id
);
807 typedef typename
SizeClassAllocator::SizeClassMapT SizeClassMap
;
808 typedef typename
SizeClassMap::TransferBatch Batch
;
812 void *batch
[2 * SizeClassMap::kMaxNumCached
];
814 PerClass per_class_
[kNumClasses
];
815 AllocatorStats stats_
;
818 if (per_class_
[0].max_count
)
820 for (uptr i
= 0; i
< kNumClasses
; i
++) {
821 PerClass
*c
= &per_class_
[i
];
822 c
->max_count
= 2 * SizeClassMap::MaxCached(i
);
826 NOINLINE
void Refill(SizeClassAllocator
*allocator
, uptr class_id
) {
828 PerClass
*c
= &per_class_
[class_id
];
829 Batch
*b
= allocator
->AllocateBatch(&stats_
, this, class_id
);
830 CHECK_GT(b
->count
, 0);
831 for (uptr i
= 0; i
< b
->count
; i
++)
832 c
->batch
[i
] = b
->batch
[i
];
834 if (class_id
< SizeClassMap::kMinBatchClass
)
835 Deallocate(allocator
, SizeClassMap::ClassID(sizeof(Batch
)), b
);
838 NOINLINE
void Drain(SizeClassAllocator
*allocator
, uptr class_id
) {
840 PerClass
*c
= &per_class_
[class_id
];
842 if (class_id
< SizeClassMap::kMinBatchClass
)
843 b
= (Batch
*)Allocate(allocator
, SizeClassMap::ClassID(sizeof(Batch
)));
845 b
= (Batch
*)c
->batch
[0];
846 uptr cnt
= Min(c
->max_count
/ 2, c
->count
);
847 for (uptr i
= 0; i
< cnt
; i
++) {
848 b
->batch
[i
] = c
->batch
[i
];
849 c
->batch
[i
] = c
->batch
[i
+ c
->max_count
/ 2];
853 allocator
->DeallocateBatch(&stats_
, class_id
, b
);
857 // This class can (de)allocate only large chunks of memory using mmap/unmap.
858 // The main purpose of this allocator is to cover large and rare allocation
859 // sizes not covered by more efficient allocators (e.g. SizeClassAllocator64).
860 template <class MapUnmapCallback
= NoOpMapUnmapCallback
>
861 class LargeMmapAllocator
{
864 internal_memset(this, 0, sizeof(*this));
865 page_size_
= GetPageSizeCached();
868 void *Allocate(AllocatorStats
*stat
, uptr size
, uptr alignment
) {
869 CHECK(IsPowerOfTwo(alignment
));
870 uptr map_size
= RoundUpMapSize(size
);
871 if (alignment
> page_size_
)
872 map_size
+= alignment
;
873 if (map_size
< size
) return 0; // Overflow.
874 uptr map_beg
= reinterpret_cast<uptr
>(
875 MmapOrDie(map_size
, "LargeMmapAllocator"));
876 MapUnmapCallback().OnMap(map_beg
, map_size
);
877 uptr map_end
= map_beg
+ map_size
;
878 uptr res
= map_beg
+ page_size_
;
879 if (res
& (alignment
- 1)) // Align.
880 res
+= alignment
- (res
& (alignment
- 1));
881 CHECK_EQ(0, res
& (alignment
- 1));
882 CHECK_LE(res
+ size
, map_end
);
883 Header
*h
= GetHeader(res
);
885 h
->map_beg
= map_beg
;
886 h
->map_size
= map_size
;
887 uptr size_log
= MostSignificantSetBitIndex(map_size
);
888 CHECK_LT(size_log
, ARRAY_SIZE(stats
.by_size_log
));
890 SpinMutexLock
l(&mutex_
);
891 uptr idx
= n_chunks_
++;
892 CHECK_LT(idx
, kMaxNumChunks
);
896 stats
.currently_allocated
+= map_size
;
897 stats
.max_allocated
= Max(stats
.max_allocated
, stats
.currently_allocated
);
898 stats
.by_size_log
[size_log
]++;
899 stat
->Add(AllocatorStatMalloced
, map_size
);
900 stat
->Add(AllocatorStatMmapped
, map_size
);
902 return reinterpret_cast<void*>(res
);
905 void Deallocate(AllocatorStats
*stat
, void *p
) {
906 Header
*h
= GetHeader(p
);
908 SpinMutexLock
l(&mutex_
);
909 uptr idx
= h
->chunk_idx
;
910 CHECK_EQ(chunks_
[idx
], h
);
911 CHECK_LT(idx
, n_chunks_
);
912 chunks_
[idx
] = chunks_
[n_chunks_
- 1];
913 chunks_
[idx
]->chunk_idx
= idx
;
916 stats
.currently_allocated
-= h
->map_size
;
917 stat
->Add(AllocatorStatFreed
, h
->map_size
);
918 stat
->Add(AllocatorStatUnmapped
, h
->map_size
);
920 MapUnmapCallback().OnUnmap(h
->map_beg
, h
->map_size
);
921 UnmapOrDie(reinterpret_cast<void*>(h
->map_beg
), h
->map_size
);
924 uptr
TotalMemoryUsed() {
925 SpinMutexLock
l(&mutex_
);
927 for (uptr i
= 0; i
< n_chunks_
; i
++) {
928 Header
*h
= chunks_
[i
];
929 CHECK_EQ(h
->chunk_idx
, i
);
930 res
+= RoundUpMapSize(h
->size
);
935 bool PointerIsMine(void *p
) {
936 return GetBlockBegin(p
) != 0;
939 uptr
GetActuallyAllocatedSize(void *p
) {
940 return RoundUpTo(GetHeader(p
)->size
, page_size_
);
943 // At least page_size_/2 metadata bytes is available.
944 void *GetMetaData(void *p
) {
945 // Too slow: CHECK_EQ(p, GetBlockBegin(p));
946 CHECK(IsAligned(reinterpret_cast<uptr
>(p
), page_size_
));
947 return GetHeader(p
) + 1;
950 void *GetBlockBegin(void *ptr
) {
951 uptr p
= reinterpret_cast<uptr
>(ptr
);
952 SpinMutexLock
l(&mutex_
);
953 uptr nearest_chunk
= 0;
954 // Cache-friendly linear search.
955 for (uptr i
= 0; i
< n_chunks_
; i
++) {
956 uptr ch
= reinterpret_cast<uptr
>(chunks_
[i
]);
957 if (p
< ch
) continue; // p is at left to this chunk, skip it.
958 if (p
- ch
< p
- nearest_chunk
)
963 Header
*h
= reinterpret_cast<Header
*>(nearest_chunk
);
964 CHECK_GE(nearest_chunk
, h
->map_beg
);
965 CHECK_LT(nearest_chunk
, h
->map_beg
+ h
->map_size
);
966 CHECK_LE(nearest_chunk
, p
);
967 if (h
->map_beg
+ h
->map_size
< p
)
973 Printf("Stats: LargeMmapAllocator: allocated %zd times, "
974 "remains %zd (%zd K) max %zd M; by size logs: ",
975 stats
.n_allocs
, stats
.n_allocs
- stats
.n_frees
,
976 stats
.currently_allocated
>> 10, stats
.max_allocated
>> 20);
977 for (uptr i
= 0; i
< ARRAY_SIZE(stats
.by_size_log
); i
++) {
978 uptr c
= stats
.by_size_log
[i
];
980 Printf("%zd:%zd; ", i
, c
);
985 // ForceLock() and ForceUnlock() are needed to implement Darwin malloc zone
986 // introspection API.
996 static const int kMaxNumChunks
= 1 << FIRST_32_SECOND_64(15, 18);
1004 Header
*GetHeader(uptr p
) {
1005 CHECK_EQ(p
% page_size_
, 0);
1006 return reinterpret_cast<Header
*>(p
- page_size_
);
1008 Header
*GetHeader(void *p
) { return GetHeader(reinterpret_cast<uptr
>(p
)); }
1010 void *GetUser(Header
*h
) {
1011 CHECK_EQ((uptr
)h
% page_size_
, 0);
1012 return reinterpret_cast<void*>(reinterpret_cast<uptr
>(h
) + page_size_
);
1015 uptr
RoundUpMapSize(uptr size
) {
1016 return RoundUpTo(size
, page_size_
) + page_size_
;
1020 Header
*chunks_
[kMaxNumChunks
];
1023 uptr n_allocs
, n_frees
, currently_allocated
, max_allocated
, by_size_log
[64];
1028 // This class implements a complete memory allocator by using two
1029 // internal allocators:
1030 // PrimaryAllocator is efficient, but may not allocate some sizes (alignments).
1031 // When allocating 2^x bytes it should return 2^x aligned chunk.
1032 // PrimaryAllocator is used via a local AllocatorCache.
1033 // SecondaryAllocator can allocate anything, but is not efficient.
1034 template <class PrimaryAllocator
, class AllocatorCache
,
1035 class SecondaryAllocator
> // NOLINT
1036 class CombinedAllocator
{
1044 void *Allocate(AllocatorCache
*cache
, uptr size
, uptr alignment
,
1045 bool cleared
= false) {
1046 // Returning 0 on malloc(0) may break a lot of code.
1049 if (size
+ alignment
< size
)
1052 size
= RoundUpTo(size
, alignment
);
1054 if (primary_
.CanAllocate(size
, alignment
))
1055 res
= cache
->Allocate(&primary_
, primary_
.ClassID(size
));
1057 res
= secondary_
.Allocate(&stats_
, size
, alignment
);
1059 CHECK_EQ(reinterpret_cast<uptr
>(res
) & (alignment
- 1), 0);
1061 internal_memset(res
, 0, size
);
1065 void Deallocate(AllocatorCache
*cache
, void *p
) {
1067 if (primary_
.PointerIsMine(p
))
1068 cache
->Deallocate(&primary_
, primary_
.GetSizeClass(p
), p
);
1070 secondary_
.Deallocate(&stats_
, p
);
1073 void *Reallocate(AllocatorCache
*cache
, void *p
, uptr new_size
,
1076 return Allocate(cache
, new_size
, alignment
);
1078 Deallocate(cache
, p
);
1081 CHECK(PointerIsMine(p
));
1082 uptr old_size
= GetActuallyAllocatedSize(p
);
1083 uptr memcpy_size
= Min(new_size
, old_size
);
1084 void *new_p
= Allocate(cache
, new_size
, alignment
);
1086 internal_memcpy(new_p
, p
, memcpy_size
);
1087 Deallocate(cache
, p
);
1091 bool PointerIsMine(void *p
) {
1092 if (primary_
.PointerIsMine(p
))
1094 return secondary_
.PointerIsMine(p
);
1097 bool FromPrimary(void *p
) {
1098 return primary_
.PointerIsMine(p
);
1101 void *GetMetaData(void *p
) {
1102 if (primary_
.PointerIsMine(p
))
1103 return primary_
.GetMetaData(p
);
1104 return secondary_
.GetMetaData(p
);
1107 void *GetBlockBegin(void *p
) {
1108 if (primary_
.PointerIsMine(p
))
1109 return primary_
.GetBlockBegin(p
);
1110 return secondary_
.GetBlockBegin(p
);
1113 uptr
GetActuallyAllocatedSize(void *p
) {
1114 if (primary_
.PointerIsMine(p
))
1115 return primary_
.GetActuallyAllocatedSize(p
);
1116 return secondary_
.GetActuallyAllocatedSize(p
);
1119 uptr
TotalMemoryUsed() {
1120 return primary_
.TotalMemoryUsed() + secondary_
.TotalMemoryUsed();
1123 void TestOnlyUnmap() { primary_
.TestOnlyUnmap(); }
1125 void InitCache(AllocatorCache
*cache
) {
1126 cache
->Init(&stats_
);
1129 void DestroyCache(AllocatorCache
*cache
) {
1130 cache
->Destroy(&primary_
, &stats_
);
1133 void SwallowCache(AllocatorCache
*cache
) {
1134 cache
->Drain(&primary_
);
1137 void GetStats(AllocatorStatCounters s
) const {
1142 primary_
.PrintStats();
1143 secondary_
.PrintStats();
1146 // ForceLock() and ForceUnlock() are needed to implement Darwin malloc zone
1147 // introspection API.
1149 primary_
.ForceLock();
1150 secondary_
.ForceLock();
1153 void ForceUnlock() {
1154 secondary_
.ForceUnlock();
1155 primary_
.ForceUnlock();
1159 PrimaryAllocator primary_
;
1160 SecondaryAllocator secondary_
;
1161 AllocatorGlobalStats stats_
;
1164 // Returns true if calloc(size, n) should return 0 due to overflow in size*n.
1165 bool CallocShouldReturnNullDueToOverflow(uptr size
, uptr n
);
1167 } // namespace __sanitizer
1169 #endif // SANITIZER_ALLOCATOR_H