Use the correct order of NOINLINE vs ret type to fix Windows build
[blocksruntime.git] / lib / sanitizer_common / sanitizer_allocator.h
blob9d881210620587df7b88cd946e9d2e9ca829c717
1 //===-- sanitizer_allocator.h -----------------------------------*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // Specialized memory allocator for ThreadSanitizer, MemorySanitizer, etc.
12 //===----------------------------------------------------------------------===//
14 #ifndef SANITIZER_ALLOCATOR_H
15 #define SANITIZER_ALLOCATOR_H
17 #include "sanitizer_internal_defs.h"
18 #include "sanitizer_common.h"
19 #include "sanitizer_libc.h"
20 #include "sanitizer_list.h"
21 #include "sanitizer_mutex.h"
22 #include "sanitizer_lfstack.h"
24 namespace __sanitizer {
26 // SizeClassMap maps allocation sizes into size classes and back.
27 // Class 0 corresponds to size 0.
28 // Classes 1 - 16 correspond to sizes 8 - 128 (size = class_id * 8).
29 // Next 8 classes: 128 + i * 16 (i = 1 to 8).
30 // Next 8 classes: 256 + i * 32 (i = 1 to 8).
31 // ...
32 // Next 8 classes: 2^k + i * 2^(k-3) (i = 1 to 8).
33 // Last class corresponds to kMaxSize = 1 << kMaxSizeLog.
35 // This structure of the size class map gives us:
36 // - Efficient table-free class-to-size and size-to-class functions.
37 // - Difference between two consequent size classes is betweed 12% and 6%
39 // This class also gives a hint to a thread-caching allocator about the amount
40 // of chunks that need to be cached per-thread:
41 // - kMaxNumCached is the maximal number of chunks per size class.
42 // - (1 << kMaxBytesCachedLog) is the maximal number of bytes per size class.
44 // Part of output of SizeClassMap::Print():
45 // c00 => s: 0 diff: +0 00% l 0 cached: 0 0; id 0
46 // c01 => s: 8 diff: +8 00% l 3 cached: 256 2048; id 1
47 // c02 => s: 16 diff: +8 100% l 4 cached: 256 4096; id 2
48 // ...
49 // c07 => s: 56 diff: +8 16% l 5 cached: 256 14336; id 7
51 // c08 => s: 64 diff: +8 14% l 6 cached: 256 16384; id 8
52 // ...
53 // c15 => s: 120 diff: +8 07% l 6 cached: 256 30720; id 15
55 // c16 => s: 128 diff: +8 06% l 7 cached: 256 32768; id 16
56 // c17 => s: 144 diff: +16 12% l 7 cached: 227 32688; id 17
57 // ...
58 // c23 => s: 240 diff: +16 07% l 7 cached: 136 32640; id 23
60 // c24 => s: 256 diff: +16 06% l 8 cached: 128 32768; id 24
61 // c25 => s: 288 diff: +32 12% l 8 cached: 113 32544; id 25
62 // ...
63 // c31 => s: 480 diff: +32 07% l 8 cached: 68 32640; id 31
65 // c32 => s: 512 diff: +32 06% l 9 cached: 64 32768; id 32
68 template <uptr kMaxSizeLog, uptr kMaxNumCachedT, uptr kMaxBytesCachedLog,
69 uptr kMinBatchClassT>
70 class SizeClassMap {
71 static const uptr kMinSizeLog = 3;
72 static const uptr kMidSizeLog = kMinSizeLog + 4;
73 static const uptr kMinSize = 1 << kMinSizeLog;
74 static const uptr kMidSize = 1 << kMidSizeLog;
75 static const uptr kMidClass = kMidSize / kMinSize;
76 static const uptr S = 3;
77 static const uptr M = (1 << S) - 1;
79 public:
80 static const uptr kMaxNumCached = kMaxNumCachedT;
81 struct TransferBatch {
82 TransferBatch *next;
83 uptr count;
84 void *batch[kMaxNumCached];
87 static const uptr kMinBatchClass = kMinBatchClassT;
88 static const uptr kMaxSize = 1 << kMaxSizeLog;
89 static const uptr kNumClasses =
90 kMidClass + ((kMaxSizeLog - kMidSizeLog) << S) + 1;
91 COMPILER_CHECK(kNumClasses >= 32 && kNumClasses <= 256);
92 static const uptr kNumClassesRounded =
93 kNumClasses == 32 ? 32 :
94 kNumClasses <= 64 ? 64 :
95 kNumClasses <= 128 ? 128 : 256;
97 static uptr Size(uptr class_id) {
98 if (class_id <= kMidClass)
99 return kMinSize * class_id;
100 class_id -= kMidClass;
101 uptr t = kMidSize << (class_id >> S);
102 return t + (t >> S) * (class_id & M);
105 static uptr ClassID(uptr size) {
106 if (size <= kMidSize)
107 return (size + kMinSize - 1) >> kMinSizeLog;
108 if (size > kMaxSize) return 0;
109 uptr l = SANITIZER_WORDSIZE - 1 - __builtin_clzl(size);
110 uptr hbits = (size >> (l - S)) & M;
111 uptr lbits = size & ((1 << (l - S)) - 1);
112 uptr l1 = l - kMidSizeLog;
113 return kMidClass + (l1 << S) + hbits + (lbits > 0);
116 static uptr MaxCached(uptr class_id) {
117 if (class_id == 0) return 0;
118 uptr n = (1UL << kMaxBytesCachedLog) / Size(class_id);
119 return Max(1UL, Min(kMaxNumCached, n));
122 static void Print() {
123 uptr prev_s = 0;
124 uptr total_cached = 0;
125 for (uptr i = 0; i < kNumClasses; i++) {
126 uptr s = Size(i);
127 if (s >= kMidSize / 2 && (s & (s - 1)) == 0)
128 Printf("\n");
129 uptr d = s - prev_s;
130 uptr p = prev_s ? (d * 100 / prev_s) : 0;
131 uptr l = SANITIZER_WORDSIZE - 1 - __builtin_clzl(s);
132 uptr cached = MaxCached(i) * s;
133 Printf("c%02zd => s: %zd diff: +%zd %02zd%% l %zd "
134 "cached: %zd %zd; id %zd\n",
135 i, Size(i), d, p, l, MaxCached(i), cached, ClassID(s));
136 total_cached += cached;
137 prev_s = s;
139 Printf("Total cached: %zd\n", total_cached);
142 static void Validate() {
143 for (uptr c = 1; c < kNumClasses; c++) {
144 // Printf("Validate: c%zd\n", c);
145 uptr s = Size(c);
146 CHECK_EQ(ClassID(s), c);
147 if (c != kNumClasses - 1)
148 CHECK_EQ(ClassID(s + 1), c + 1);
149 CHECK_EQ(ClassID(s - 1), c);
150 if (c)
151 CHECK_GT(Size(c), Size(c-1));
153 CHECK_EQ(ClassID(kMaxSize + 1), 0);
155 for (uptr s = 1; s <= kMaxSize; s++) {
156 uptr c = ClassID(s);
157 // Printf("s%zd => c%zd\n", s, c);
158 CHECK_LT(c, kNumClasses);
159 CHECK_GE(Size(c), s);
160 if (c > 0)
161 CHECK_LT(Size(c-1), s);
164 // TransferBatch for kMinBatchClass must fit into the block itself.
165 const uptr batch_size = sizeof(TransferBatch)
166 - sizeof(void*) // NOLINT
167 * (kMaxNumCached - MaxCached(kMinBatchClass));
168 CHECK_LE(batch_size, Size(kMinBatchClass));
169 // TransferBatch for kMinBatchClass-1 must not fit into the block itself.
170 const uptr batch_size1 = sizeof(TransferBatch)
171 - sizeof(void*) // NOLINT
172 * (kMaxNumCached - MaxCached(kMinBatchClass - 1));
173 CHECK_GT(batch_size1, Size(kMinBatchClass - 1));
177 typedef SizeClassMap<17, 256, 16, FIRST_32_SECOND_64(33, 36)>
178 DefaultSizeClassMap;
179 typedef SizeClassMap<17, 64, 14, FIRST_32_SECOND_64(25, 28)>
180 CompactSizeClassMap;
181 template<class SizeClassAllocator> struct SizeClassAllocatorLocalCache;
183 // Memory allocator statistics
184 enum AllocatorStat {
185 AllocatorStatMalloced,
186 AllocatorStatFreed,
187 AllocatorStatMmapped,
188 AllocatorStatUnmapped,
189 AllocatorStatCount
192 typedef u64 AllocatorStatCounters[AllocatorStatCount];
194 // Per-thread stats, live in per-thread cache.
195 class AllocatorStats {
196 public:
197 void Init() {
198 internal_memset(this, 0, sizeof(*this));
201 void Add(AllocatorStat i, u64 v) {
202 v += atomic_load(&stats_[i], memory_order_relaxed);
203 atomic_store(&stats_[i], v, memory_order_relaxed);
206 void Set(AllocatorStat i, u64 v) {
207 atomic_store(&stats_[i], v, memory_order_relaxed);
210 u64 Get(AllocatorStat i) const {
211 return atomic_load(&stats_[i], memory_order_relaxed);
214 private:
215 friend class AllocatorGlobalStats;
216 AllocatorStats *next_;
217 AllocatorStats *prev_;
218 atomic_uint64_t stats_[AllocatorStatCount];
221 // Global stats, used for aggregation and querying.
222 class AllocatorGlobalStats : public AllocatorStats {
223 public:
224 void Init() {
225 internal_memset(this, 0, sizeof(*this));
226 next_ = this;
227 prev_ = this;
230 void Register(AllocatorStats *s) {
231 SpinMutexLock l(&mu_);
232 s->next_ = next_;
233 s->prev_ = this;
234 next_->prev_ = s;
235 next_ = s;
238 void Unregister(AllocatorStats *s) {
239 SpinMutexLock l(&mu_);
240 s->prev_->next_ = s->next_;
241 s->next_->prev_ = s->prev_;
242 for (int i = 0; i < AllocatorStatCount; i++)
243 Add(AllocatorStat(i), s->Get(AllocatorStat(i)));
246 void Get(AllocatorStatCounters s) const {
247 internal_memset(s, 0, AllocatorStatCount * sizeof(u64));
248 SpinMutexLock l(&mu_);
249 const AllocatorStats *stats = this;
250 for (;;) {
251 for (int i = 0; i < AllocatorStatCount; i++)
252 s[i] += stats->Get(AllocatorStat(i));
253 stats = stats->next_;
254 if (stats == this)
255 break;
259 private:
260 mutable SpinMutex mu_;
263 // Allocators call these callbacks on mmap/munmap.
264 struct NoOpMapUnmapCallback {
265 void OnMap(uptr p, uptr size) const { }
266 void OnUnmap(uptr p, uptr size) const { }
269 // SizeClassAllocator64 -- allocator for 64-bit address space.
271 // Space: a portion of address space of kSpaceSize bytes starting at
272 // a fixed address (kSpaceBeg). Both constants are powers of two and
273 // kSpaceBeg is kSpaceSize-aligned.
274 // At the beginning the entire space is mprotect-ed, then small parts of it
275 // are mapped on demand.
277 // Region: a part of Space dedicated to a single size class.
278 // There are kNumClasses Regions of equal size.
280 // UserChunk: a piece of memory returned to user.
281 // MetaChunk: kMetadataSize bytes of metadata associated with a UserChunk.
283 // A Region looks like this:
284 // UserChunk1 ... UserChunkN <gap> MetaChunkN ... MetaChunk1
285 template <const uptr kSpaceBeg, const uptr kSpaceSize,
286 const uptr kMetadataSize, class SizeClassMap,
287 class MapUnmapCallback = NoOpMapUnmapCallback>
288 class SizeClassAllocator64 {
289 public:
290 typedef typename SizeClassMap::TransferBatch Batch;
291 typedef SizeClassAllocator64<kSpaceBeg, kSpaceSize, kMetadataSize,
292 SizeClassMap, MapUnmapCallback> ThisT;
293 typedef SizeClassAllocatorLocalCache<ThisT> AllocatorCache;
295 void Init() {
296 CHECK_EQ(kSpaceBeg,
297 reinterpret_cast<uptr>(Mprotect(kSpaceBeg, kSpaceSize)));
298 MapWithCallback(kSpaceEnd, AdditionalSize());
301 void MapWithCallback(uptr beg, uptr size) {
302 CHECK_EQ(beg, reinterpret_cast<uptr>(MmapFixedOrDie(beg, size)));
303 MapUnmapCallback().OnMap(beg, size);
306 void UnmapWithCallback(uptr beg, uptr size) {
307 MapUnmapCallback().OnUnmap(beg, size);
308 UnmapOrDie(reinterpret_cast<void *>(beg), size);
311 static bool CanAllocate(uptr size, uptr alignment) {
312 return size <= SizeClassMap::kMaxSize &&
313 alignment <= SizeClassMap::kMaxSize;
316 NOINLINE Batch* AllocateBatch(AllocatorStats *stat, AllocatorCache *c,
317 uptr class_id) {
318 CHECK_LT(class_id, kNumClasses);
319 RegionInfo *region = GetRegionInfo(class_id);
320 Batch *b = region->free_list.Pop();
321 if (b == 0)
322 b = PopulateFreeList(stat, c, class_id, region);
323 region->n_allocated += b->count;
324 return b;
327 NOINLINE void DeallocateBatch(AllocatorStats *stat, uptr class_id, Batch *b) {
328 RegionInfo *region = GetRegionInfo(class_id);
329 region->free_list.Push(b);
330 region->n_freed += b->count;
333 static bool PointerIsMine(void *p) {
334 return reinterpret_cast<uptr>(p) / kSpaceSize == kSpaceBeg / kSpaceSize;
337 static uptr GetSizeClass(void *p) {
338 return (reinterpret_cast<uptr>(p) / kRegionSize) % kNumClassesRounded;
341 void *GetBlockBegin(void *p) {
342 uptr class_id = GetSizeClass(p);
343 uptr size = SizeClassMap::Size(class_id);
344 uptr chunk_idx = GetChunkIdx((uptr)p, size);
345 uptr reg_beg = (uptr)p & ~(kRegionSize - 1);
346 uptr beg = chunk_idx * size;
347 uptr next_beg = beg + size;
348 RegionInfo *region = GetRegionInfo(class_id);
349 if (region->mapped_user >= next_beg)
350 return reinterpret_cast<void*>(reg_beg + beg);
351 return 0;
354 static uptr GetActuallyAllocatedSize(void *p) {
355 CHECK(PointerIsMine(p));
356 return SizeClassMap::Size(GetSizeClass(p));
359 uptr ClassID(uptr size) { return SizeClassMap::ClassID(size); }
361 void *GetMetaData(void *p) {
362 uptr class_id = GetSizeClass(p);
363 uptr size = SizeClassMap::Size(class_id);
364 uptr chunk_idx = GetChunkIdx(reinterpret_cast<uptr>(p), size);
365 return reinterpret_cast<void*>(kSpaceBeg + (kRegionSize * (class_id + 1)) -
366 (1 + chunk_idx) * kMetadataSize);
369 uptr TotalMemoryUsed() {
370 uptr res = 0;
371 for (uptr i = 0; i < kNumClasses; i++)
372 res += GetRegionInfo(i)->allocated_user;
373 return res;
376 // Test-only.
377 void TestOnlyUnmap() {
378 UnmapWithCallback(kSpaceBeg, kSpaceSize + AdditionalSize());
381 void PrintStats() {
382 uptr total_mapped = 0;
383 uptr n_allocated = 0;
384 uptr n_freed = 0;
385 for (uptr class_id = 1; class_id < kNumClasses; class_id++) {
386 RegionInfo *region = GetRegionInfo(class_id);
387 total_mapped += region->mapped_user;
388 n_allocated += region->n_allocated;
389 n_freed += region->n_freed;
391 Printf("Stats: SizeClassAllocator64: %zdM mapped in %zd allocations; "
392 "remains %zd\n",
393 total_mapped >> 20, n_allocated, n_allocated - n_freed);
394 for (uptr class_id = 1; class_id < kNumClasses; class_id++) {
395 RegionInfo *region = GetRegionInfo(class_id);
396 if (region->mapped_user == 0) continue;
397 Printf(" %02zd (%zd): total: %zd K allocs: %zd remains: %zd\n",
398 class_id,
399 SizeClassMap::Size(class_id),
400 region->mapped_user >> 10,
401 region->n_allocated,
402 region->n_allocated - region->n_freed);
406 typedef SizeClassMap SizeClassMapT;
407 static const uptr kNumClasses = SizeClassMap::kNumClasses;
408 static const uptr kNumClassesRounded = SizeClassMap::kNumClassesRounded;
410 private:
411 static const uptr kRegionSize = kSpaceSize / kNumClassesRounded;
412 static const uptr kSpaceEnd = kSpaceBeg + kSpaceSize;
413 COMPILER_CHECK(kSpaceBeg % kSpaceSize == 0);
414 // kRegionSize must be >= 2^32.
415 COMPILER_CHECK((kRegionSize) >= (1ULL << (SANITIZER_WORDSIZE / 2)));
416 // Populate the free list with at most this number of bytes at once
417 // or with one element if its size is greater.
418 static const uptr kPopulateSize = 1 << 14;
419 // Call mmap for user memory with at least this size.
420 static const uptr kUserMapSize = 1 << 16;
421 // Call mmap for metadata memory with at least this size.
422 static const uptr kMetaMapSize = 1 << 16;
424 struct RegionInfo {
425 BlockingMutex mutex;
426 LFStack<Batch> free_list;
427 uptr allocated_user; // Bytes allocated for user memory.
428 uptr allocated_meta; // Bytes allocated for metadata.
429 uptr mapped_user; // Bytes mapped for user memory.
430 uptr mapped_meta; // Bytes mapped for metadata.
431 uptr n_allocated, n_freed; // Just stats.
433 COMPILER_CHECK(sizeof(RegionInfo) >= kCacheLineSize);
435 static uptr AdditionalSize() {
436 return RoundUpTo(sizeof(RegionInfo) * kNumClassesRounded,
437 GetPageSizeCached());
440 RegionInfo *GetRegionInfo(uptr class_id) {
441 CHECK_LT(class_id, kNumClasses);
442 RegionInfo *regions = reinterpret_cast<RegionInfo*>(kSpaceBeg + kSpaceSize);
443 return &regions[class_id];
446 static uptr GetChunkIdx(uptr chunk, uptr size) {
447 u32 offset = chunk % kRegionSize;
448 // Here we divide by a non-constant. This is costly.
449 // We require that kRegionSize is at least 2^32 so that offset is 32-bit.
450 // We save 2x by using 32-bit div, but may need to use a 256-way switch.
451 return offset / (u32)size;
454 NOINLINE Batch* PopulateFreeList(AllocatorStats *stat, AllocatorCache *c,
455 uptr class_id, RegionInfo *region) {
456 BlockingMutexLock l(&region->mutex);
457 Batch *b = region->free_list.Pop();
458 if (b)
459 return b;
460 uptr size = SizeClassMap::Size(class_id);
461 uptr count = size < kPopulateSize ? SizeClassMap::MaxCached(class_id) : 1;
462 uptr beg_idx = region->allocated_user;
463 uptr end_idx = beg_idx + count * size;
464 uptr region_beg = kSpaceBeg + kRegionSize * class_id;
465 if (end_idx + size > region->mapped_user) {
466 // Do the mmap for the user memory.
467 uptr map_size = kUserMapSize;
468 while (end_idx + size > region->mapped_user + map_size)
469 map_size += kUserMapSize;
470 CHECK_GE(region->mapped_user + map_size, end_idx);
471 MapWithCallback(region_beg + region->mapped_user, map_size);
472 stat->Add(AllocatorStatMmapped, map_size);
473 region->mapped_user += map_size;
475 uptr total_count = (region->mapped_user - beg_idx - size)
476 / size / count * count;
477 region->allocated_meta += total_count * kMetadataSize;
478 if (region->allocated_meta > region->mapped_meta) {
479 uptr map_size = kMetaMapSize;
480 while (region->allocated_meta > region->mapped_meta + map_size)
481 map_size += kMetaMapSize;
482 // Do the mmap for the metadata.
483 CHECK_GE(region->mapped_meta + map_size, region->allocated_meta);
484 MapWithCallback(region_beg + kRegionSize -
485 region->mapped_meta - map_size, map_size);
486 region->mapped_meta += map_size;
488 CHECK_LE(region->allocated_meta, region->mapped_meta);
489 if (region->allocated_user + region->allocated_meta > kRegionSize) {
490 Printf("Out of memory. Dying.\n");
491 Printf("The process has exhausted %zuMB for size class %zu.\n",
492 kRegionSize / 1024 / 1024, size);
493 Die();
495 for (;;) {
496 if (class_id < SizeClassMap::kMinBatchClass)
497 b = (Batch*)c->Allocate(this, SizeClassMap::ClassID(sizeof(Batch)));
498 else
499 b = (Batch*)(region_beg + beg_idx);
500 b->count = count;
501 for (uptr i = 0; i < count; i++)
502 b->batch[i] = (void*)(region_beg + beg_idx + i * size);
503 region->allocated_user += count * size;
504 CHECK_LE(region->allocated_user, region->mapped_user);
505 beg_idx += count * size;
506 if (beg_idx + count * size + size > region->mapped_user)
507 break;
508 region->free_list.Push(b);
510 return b;
514 // SizeClassAllocator32 -- allocator for 32-bit address space.
515 // This allocator can theoretically be used on 64-bit arch, but there it is less
516 // efficient than SizeClassAllocator64.
518 // [kSpaceBeg, kSpaceBeg + kSpaceSize) is the range of addresses which can
519 // be returned by MmapOrDie().
521 // Region:
522 // a result of a single call to MmapAlignedOrDie(kRegionSize, kRegionSize).
523 // Since the regions are aligned by kRegionSize, there are exactly
524 // kNumPossibleRegions possible regions in the address space and so we keep
525 // an u8 array possible_regions[kNumPossibleRegions] to store the size classes.
526 // 0 size class means the region is not used by the allocator.
528 // One Region is used to allocate chunks of a single size class.
529 // A Region looks like this:
530 // UserChunk1 .. UserChunkN <gap> MetaChunkN .. MetaChunk1
532 // In order to avoid false sharing the objects of this class should be
533 // chache-line aligned.
534 template <const uptr kSpaceBeg, const u64 kSpaceSize,
535 const uptr kMetadataSize, class SizeClassMap,
536 class MapUnmapCallback = NoOpMapUnmapCallback>
537 class SizeClassAllocator32 {
538 public:
539 typedef typename SizeClassMap::TransferBatch Batch;
540 typedef SizeClassAllocator32<kSpaceBeg, kSpaceSize, kMetadataSize,
541 SizeClassMap, MapUnmapCallback> ThisT;
542 typedef SizeClassAllocatorLocalCache<ThisT> AllocatorCache;
544 void Init() {
545 state_ = reinterpret_cast<State *>(MapWithCallback(sizeof(State)));
548 void *MapWithCallback(uptr size) {
549 size = RoundUpTo(size, GetPageSizeCached());
550 void *res = MmapOrDie(size, "SizeClassAllocator32");
551 MapUnmapCallback().OnMap((uptr)res, size);
552 return res;
555 void UnmapWithCallback(uptr beg, uptr size) {
556 MapUnmapCallback().OnUnmap(beg, size);
557 UnmapOrDie(reinterpret_cast<void *>(beg), size);
560 static bool CanAllocate(uptr size, uptr alignment) {
561 return size <= SizeClassMap::kMaxSize &&
562 alignment <= SizeClassMap::kMaxSize;
565 void *GetMetaData(void *p) {
566 CHECK(PointerIsMine(p));
567 uptr mem = reinterpret_cast<uptr>(p);
568 uptr beg = ComputeRegionBeg(mem);
569 uptr size = SizeClassMap::Size(GetSizeClass(p));
570 u32 offset = mem - beg;
571 uptr n = offset / (u32)size; // 32-bit division
572 uptr meta = (beg + kRegionSize) - (n + 1) * kMetadataSize;
573 return reinterpret_cast<void*>(meta);
576 NOINLINE Batch* AllocateBatch(AllocatorStats *stat, AllocatorCache *c,
577 uptr class_id) {
578 CHECK_LT(class_id, kNumClasses);
579 SizeClassInfo *sci = GetSizeClassInfo(class_id);
580 SpinMutexLock l(&sci->mutex);
581 if (sci->free_list.empty())
582 PopulateFreeList(stat, c, sci, class_id);
583 CHECK(!sci->free_list.empty());
584 Batch *b = sci->free_list.front();
585 sci->free_list.pop_front();
586 return b;
589 NOINLINE void DeallocateBatch(AllocatorStats *stat, uptr class_id, Batch *b) {
590 CHECK_LT(class_id, kNumClasses);
591 SizeClassInfo *sci = GetSizeClassInfo(class_id);
592 SpinMutexLock l(&sci->mutex);
593 sci->free_list.push_front(b);
596 bool PointerIsMine(void *p) {
597 return GetSizeClass(p) != 0;
600 uptr GetSizeClass(void *p) {
601 return state_->possible_regions[ComputeRegionId(reinterpret_cast<uptr>(p))];
604 void *GetBlockBegin(void *p) {
605 CHECK(PointerIsMine(p));
606 uptr mem = reinterpret_cast<uptr>(p);
607 uptr beg = ComputeRegionBeg(mem);
608 uptr size = SizeClassMap::Size(GetSizeClass(p));
609 u32 offset = mem - beg;
610 u32 n = offset / (u32)size; // 32-bit division
611 uptr res = beg + (n * (u32)size);
612 return reinterpret_cast<void*>(res);
615 uptr GetActuallyAllocatedSize(void *p) {
616 CHECK(PointerIsMine(p));
617 return SizeClassMap::Size(GetSizeClass(p));
620 uptr ClassID(uptr size) { return SizeClassMap::ClassID(size); }
622 uptr TotalMemoryUsed() {
623 // No need to lock here.
624 uptr res = 0;
625 for (uptr i = 0; i < kNumPossibleRegions; i++)
626 if (state_->possible_regions[i])
627 res += kRegionSize;
628 return res;
631 void TestOnlyUnmap() {
632 for (uptr i = 0; i < kNumPossibleRegions; i++)
633 if (state_->possible_regions[i])
634 UnmapWithCallback((i * kRegionSize), kRegionSize);
635 UnmapWithCallback(reinterpret_cast<uptr>(state_), sizeof(State));
638 void PrintStats() {
641 typedef SizeClassMap SizeClassMapT;
642 static const uptr kNumClasses = SizeClassMap::kNumClasses;
644 private:
645 static const uptr kRegionSizeLog = SANITIZER_WORDSIZE == 64 ? 24 : 20;
646 static const uptr kRegionSize = 1 << kRegionSizeLog;
647 static const uptr kNumPossibleRegions = kSpaceSize / kRegionSize;
649 struct SizeClassInfo {
650 SpinMutex mutex;
651 IntrusiveList<Batch> free_list;
652 char padding[kCacheLineSize - sizeof(uptr) - sizeof(IntrusiveList<Batch>)];
654 COMPILER_CHECK(sizeof(SizeClassInfo) == kCacheLineSize);
656 uptr ComputeRegionId(uptr mem) {
657 uptr res = mem >> kRegionSizeLog;
658 CHECK_LT(res, kNumPossibleRegions);
659 return res;
662 uptr ComputeRegionBeg(uptr mem) {
663 return mem & ~(kRegionSize - 1);
666 uptr AllocateRegion(AllocatorStats *stat, uptr class_id) {
667 CHECK_LT(class_id, kNumClasses);
668 uptr res = reinterpret_cast<uptr>(MmapAlignedOrDie(kRegionSize, kRegionSize,
669 "SizeClassAllocator32"));
670 MapUnmapCallback().OnMap(res, kRegionSize);
671 stat->Add(AllocatorStatMmapped, kRegionSize);
672 CHECK_EQ(0U, (res & (kRegionSize - 1)));
673 CHECK_EQ(0U, state_->possible_regions[ComputeRegionId(res)]);
674 state_->possible_regions[ComputeRegionId(res)] = class_id;
675 return res;
678 SizeClassInfo *GetSizeClassInfo(uptr class_id) {
679 CHECK_LT(class_id, kNumClasses);
680 return &state_->size_class_info_array[class_id];
683 void PopulateFreeList(AllocatorStats *stat, AllocatorCache *c,
684 SizeClassInfo *sci, uptr class_id) {
685 uptr size = SizeClassMap::Size(class_id);
686 uptr reg = AllocateRegion(stat, class_id);
687 uptr n_chunks = kRegionSize / (size + kMetadataSize);
688 uptr max_count = SizeClassMap::MaxCached(class_id);
689 Batch *b = 0;
690 for (uptr i = reg; i < reg + n_chunks * size; i += size) {
691 if (b == 0) {
692 if (class_id < SizeClassMap::kMinBatchClass)
693 b = (Batch*)c->Allocate(this, SizeClassMap::ClassID(sizeof(Batch)));
694 else
695 b = (Batch*)i;
696 b->count = 0;
698 b->batch[b->count++] = (void*)i;
699 if (b->count == max_count) {
700 sci->free_list.push_back(b);
701 b = 0;
704 if (b)
705 sci->free_list.push_back(b);
708 struct State {
709 u8 possible_regions[kNumPossibleRegions];
710 SizeClassInfo size_class_info_array[kNumClasses];
712 State *state_;
715 // Objects of this type should be used as local caches for SizeClassAllocator64
716 // or SizeClassAllocator32. Since the typical use of this class is to have one
717 // object per thread in TLS, is has to be POD.
718 template<class SizeClassAllocator>
719 struct SizeClassAllocatorLocalCache {
720 typedef SizeClassAllocator Allocator;
721 static const uptr kNumClasses = SizeClassAllocator::kNumClasses;
723 void Init(AllocatorGlobalStats *s) {
724 stats_.Init();
725 if (s)
726 s->Register(&stats_);
729 void Destroy(SizeClassAllocator *allocator, AllocatorGlobalStats *s) {
730 Drain(allocator);
731 if (s)
732 s->Unregister(&stats_);
735 void *Allocate(SizeClassAllocator *allocator, uptr class_id) {
736 CHECK_NE(class_id, 0UL);
737 CHECK_LT(class_id, kNumClasses);
738 stats_.Add(AllocatorStatMalloced, SizeClassMap::Size(class_id));
739 PerClass *c = &per_class_[class_id];
740 if (UNLIKELY(c->count == 0))
741 Refill(allocator, class_id);
742 void *res = c->batch[--c->count];
743 PREFETCH(c->batch[c->count - 1]);
744 return res;
747 void Deallocate(SizeClassAllocator *allocator, uptr class_id, void *p) {
748 CHECK_NE(class_id, 0UL);
749 CHECK_LT(class_id, kNumClasses);
750 stats_.Add(AllocatorStatFreed, SizeClassMap::Size(class_id));
751 PerClass *c = &per_class_[class_id];
752 if (UNLIKELY(c->count == c->max_count))
753 Drain(allocator, class_id);
754 c->batch[c->count++] = p;
757 void Drain(SizeClassAllocator *allocator) {
758 for (uptr class_id = 0; class_id < kNumClasses; class_id++) {
759 PerClass *c = &per_class_[class_id];
760 while (c->count > 0)
761 Drain(allocator, class_id);
765 // private:
766 typedef typename SizeClassAllocator::SizeClassMapT SizeClassMap;
767 typedef typename SizeClassMap::TransferBatch Batch;
768 struct PerClass {
769 uptr count;
770 uptr max_count;
771 void *batch[2 * SizeClassMap::kMaxNumCached];
773 PerClass per_class_[kNumClasses];
774 AllocatorStats stats_;
776 void InitCache() {
777 if (per_class_[0].max_count)
778 return;
779 for (uptr i = 0; i < kNumClasses; i++) {
780 PerClass *c = &per_class_[i];
781 c->max_count = 2 * SizeClassMap::MaxCached(i);
785 NOINLINE void Refill(SizeClassAllocator *allocator, uptr class_id) {
786 InitCache();
787 PerClass *c = &per_class_[class_id];
788 Batch *b = allocator->AllocateBatch(&stats_, this, class_id);
789 CHECK_GT(b->count, 0);
790 for (uptr i = 0; i < b->count; i++)
791 c->batch[i] = b->batch[i];
792 c->count = b->count;
793 if (class_id < SizeClassMap::kMinBatchClass)
794 Deallocate(allocator, SizeClassMap::ClassID(sizeof(Batch)), b);
797 NOINLINE void Drain(SizeClassAllocator *allocator, uptr class_id) {
798 InitCache();
799 PerClass *c = &per_class_[class_id];
800 Batch *b;
801 if (class_id < SizeClassMap::kMinBatchClass)
802 b = (Batch*)Allocate(allocator, SizeClassMap::ClassID(sizeof(Batch)));
803 else
804 b = (Batch*)c->batch[0];
805 uptr cnt = Min(c->max_count / 2, c->count);
806 for (uptr i = 0; i < cnt; i++) {
807 b->batch[i] = c->batch[i];
808 c->batch[i] = c->batch[i + c->max_count / 2];
810 b->count = cnt;
811 c->count -= cnt;
812 allocator->DeallocateBatch(&stats_, class_id, b);
816 // This class can (de)allocate only large chunks of memory using mmap/unmap.
817 // The main purpose of this allocator is to cover large and rare allocation
818 // sizes not covered by more efficient allocators (e.g. SizeClassAllocator64).
819 template <class MapUnmapCallback = NoOpMapUnmapCallback>
820 class LargeMmapAllocator {
821 public:
822 void Init() {
823 internal_memset(this, 0, sizeof(*this));
824 page_size_ = GetPageSizeCached();
827 void *Allocate(AllocatorStats *stat, uptr size, uptr alignment) {
828 CHECK(IsPowerOfTwo(alignment));
829 uptr map_size = RoundUpMapSize(size);
830 if (alignment > page_size_)
831 map_size += alignment;
832 if (map_size < size) return 0; // Overflow.
833 uptr map_beg = reinterpret_cast<uptr>(
834 MmapOrDie(map_size, "LargeMmapAllocator"));
835 MapUnmapCallback().OnMap(map_beg, map_size);
836 uptr map_end = map_beg + map_size;
837 uptr res = map_beg + page_size_;
838 if (res & (alignment - 1)) // Align.
839 res += alignment - (res & (alignment - 1));
840 CHECK_EQ(0, res & (alignment - 1));
841 CHECK_LE(res + size, map_end);
842 Header *h = GetHeader(res);
843 h->size = size;
844 h->map_beg = map_beg;
845 h->map_size = map_size;
846 uptr size_log = SANITIZER_WORDSIZE - __builtin_clzl(map_size) - 1;
847 CHECK_LT(size_log, ARRAY_SIZE(stats.by_size_log));
849 SpinMutexLock l(&mutex_);
850 uptr idx = n_chunks_++;
851 CHECK_LT(idx, kMaxNumChunks);
852 h->chunk_idx = idx;
853 chunks_[idx] = h;
854 stats.n_allocs++;
855 stats.currently_allocated += map_size;
856 stats.max_allocated = Max(stats.max_allocated, stats.currently_allocated);
857 stats.by_size_log[size_log]++;
858 stat->Add(AllocatorStatMalloced, map_size);
859 stat->Add(AllocatorStatMmapped, map_size);
861 return reinterpret_cast<void*>(res);
864 void Deallocate(AllocatorStats *stat, void *p) {
865 Header *h = GetHeader(p);
867 SpinMutexLock l(&mutex_);
868 uptr idx = h->chunk_idx;
869 CHECK_EQ(chunks_[idx], h);
870 CHECK_LT(idx, n_chunks_);
871 chunks_[idx] = chunks_[n_chunks_ - 1];
872 chunks_[idx]->chunk_idx = idx;
873 n_chunks_--;
874 stats.n_frees++;
875 stats.currently_allocated -= h->map_size;
876 stat->Add(AllocatorStatFreed, h->map_size);
877 stat->Add(AllocatorStatUnmapped, h->map_size);
879 MapUnmapCallback().OnUnmap(h->map_beg, h->map_size);
880 UnmapOrDie(reinterpret_cast<void*>(h->map_beg), h->map_size);
883 uptr TotalMemoryUsed() {
884 SpinMutexLock l(&mutex_);
885 uptr res = 0;
886 for (uptr i = 0; i < n_chunks_; i++) {
887 Header *h = chunks_[i];
888 CHECK_EQ(h->chunk_idx, i);
889 res += RoundUpMapSize(h->size);
891 return res;
894 bool PointerIsMine(void *p) {
895 return GetBlockBegin(p) != 0;
898 uptr GetActuallyAllocatedSize(void *p) {
899 return RoundUpTo(GetHeader(p)->size, page_size_);
902 // At least page_size_/2 metadata bytes is available.
903 void *GetMetaData(void *p) {
904 // Too slow: CHECK_EQ(p, GetBlockBegin(p));
905 CHECK(IsAligned(reinterpret_cast<uptr>(p), page_size_));
906 return GetHeader(p) + 1;
909 void *GetBlockBegin(void *ptr) {
910 uptr p = reinterpret_cast<uptr>(ptr);
911 SpinMutexLock l(&mutex_);
912 uptr nearest_chunk = 0;
913 // Cache-friendly linear search.
914 for (uptr i = 0; i < n_chunks_; i++) {
915 uptr ch = reinterpret_cast<uptr>(chunks_[i]);
916 if (p < ch) continue; // p is at left to this chunk, skip it.
917 if (p - ch < p - nearest_chunk)
918 nearest_chunk = ch;
920 if (!nearest_chunk)
921 return 0;
922 Header *h = reinterpret_cast<Header *>(nearest_chunk);
923 CHECK_GE(nearest_chunk, h->map_beg);
924 CHECK_LT(nearest_chunk, h->map_beg + h->map_size);
925 CHECK_LE(nearest_chunk, p);
926 if (h->map_beg + h->map_size < p)
927 return 0;
928 return GetUser(h);
931 void PrintStats() {
932 Printf("Stats: LargeMmapAllocator: allocated %zd times, "
933 "remains %zd (%zd K) max %zd M; by size logs: ",
934 stats.n_allocs, stats.n_allocs - stats.n_frees,
935 stats.currently_allocated >> 10, stats.max_allocated >> 20);
936 for (uptr i = 0; i < ARRAY_SIZE(stats.by_size_log); i++) {
937 uptr c = stats.by_size_log[i];
938 if (!c) continue;
939 Printf("%zd:%zd; ", i, c);
941 Printf("\n");
944 private:
945 static const int kMaxNumChunks = 1 << FIRST_32_SECOND_64(15, 18);
946 struct Header {
947 uptr map_beg;
948 uptr map_size;
949 uptr size;
950 uptr chunk_idx;
953 Header *GetHeader(uptr p) {
954 CHECK_EQ(p % page_size_, 0);
955 return reinterpret_cast<Header*>(p - page_size_);
957 Header *GetHeader(void *p) { return GetHeader(reinterpret_cast<uptr>(p)); }
959 void *GetUser(Header *h) {
960 CHECK_EQ((uptr)h % page_size_, 0);
961 return reinterpret_cast<void*>(reinterpret_cast<uptr>(h) + page_size_);
964 uptr RoundUpMapSize(uptr size) {
965 return RoundUpTo(size, page_size_) + page_size_;
968 uptr page_size_;
969 Header *chunks_[kMaxNumChunks];
970 uptr n_chunks_;
971 struct Stats {
972 uptr n_allocs, n_frees, currently_allocated, max_allocated, by_size_log[64];
973 } stats;
974 SpinMutex mutex_;
977 // This class implements a complete memory allocator by using two
978 // internal allocators:
979 // PrimaryAllocator is efficient, but may not allocate some sizes (alignments).
980 // When allocating 2^x bytes it should return 2^x aligned chunk.
981 // PrimaryAllocator is used via a local AllocatorCache.
982 // SecondaryAllocator can allocate anything, but is not efficient.
983 template <class PrimaryAllocator, class AllocatorCache,
984 class SecondaryAllocator> // NOLINT
985 class CombinedAllocator {
986 public:
987 void Init() {
988 primary_.Init();
989 secondary_.Init();
990 stats_.Init();
993 void *Allocate(AllocatorCache *cache, uptr size, uptr alignment,
994 bool cleared = false) {
995 // Returning 0 on malloc(0) may break a lot of code.
996 if (size == 0)
997 size = 1;
998 if (size + alignment < size)
999 return 0;
1000 if (alignment > 8)
1001 size = RoundUpTo(size, alignment);
1002 void *res;
1003 if (primary_.CanAllocate(size, alignment))
1004 res = cache->Allocate(&primary_, primary_.ClassID(size));
1005 else
1006 res = secondary_.Allocate(&stats_, size, alignment);
1007 if (alignment > 8)
1008 CHECK_EQ(reinterpret_cast<uptr>(res) & (alignment - 1), 0);
1009 if (cleared && res)
1010 internal_memset(res, 0, size);
1011 return res;
1014 void Deallocate(AllocatorCache *cache, void *p) {
1015 if (!p) return;
1016 if (primary_.PointerIsMine(p))
1017 cache->Deallocate(&primary_, primary_.GetSizeClass(p), p);
1018 else
1019 secondary_.Deallocate(&stats_, p);
1022 void *Reallocate(AllocatorCache *cache, void *p, uptr new_size,
1023 uptr alignment) {
1024 if (!p)
1025 return Allocate(cache, new_size, alignment);
1026 if (!new_size) {
1027 Deallocate(cache, p);
1028 return 0;
1030 CHECK(PointerIsMine(p));
1031 uptr old_size = GetActuallyAllocatedSize(p);
1032 uptr memcpy_size = Min(new_size, old_size);
1033 void *new_p = Allocate(cache, new_size, alignment);
1034 if (new_p)
1035 internal_memcpy(new_p, p, memcpy_size);
1036 Deallocate(cache, p);
1037 return new_p;
1040 bool PointerIsMine(void *p) {
1041 if (primary_.PointerIsMine(p))
1042 return true;
1043 return secondary_.PointerIsMine(p);
1046 bool FromPrimary(void *p) {
1047 return primary_.PointerIsMine(p);
1050 void *GetMetaData(void *p) {
1051 if (primary_.PointerIsMine(p))
1052 return primary_.GetMetaData(p);
1053 return secondary_.GetMetaData(p);
1056 void *GetBlockBegin(void *p) {
1057 if (primary_.PointerIsMine(p))
1058 return primary_.GetBlockBegin(p);
1059 return secondary_.GetBlockBegin(p);
1062 uptr GetActuallyAllocatedSize(void *p) {
1063 if (primary_.PointerIsMine(p))
1064 return primary_.GetActuallyAllocatedSize(p);
1065 return secondary_.GetActuallyAllocatedSize(p);
1068 uptr TotalMemoryUsed() {
1069 return primary_.TotalMemoryUsed() + secondary_.TotalMemoryUsed();
1072 void TestOnlyUnmap() { primary_.TestOnlyUnmap(); }
1074 void InitCache(AllocatorCache *cache) {
1075 cache->Init(&stats_);
1078 void DestroyCache(AllocatorCache *cache) {
1079 cache->Destroy(&primary_, &stats_);
1082 void SwallowCache(AllocatorCache *cache) {
1083 cache->Drain(&primary_);
1086 void GetStats(AllocatorStatCounters s) const {
1087 stats_.Get(s);
1090 void PrintStats() {
1091 primary_.PrintStats();
1092 secondary_.PrintStats();
1095 private:
1096 PrimaryAllocator primary_;
1097 SecondaryAllocator secondary_;
1098 AllocatorGlobalStats stats_;
1101 // Returns true if calloc(size, n) should return 0 due to overflow in size*n.
1102 bool CallocShouldReturnNullDueToOverflow(uptr size, uptr n);
1104 } // namespace __sanitizer
1106 #endif // SANITIZER_ALLOCATOR_H