2013-09-25 Marc Glisse <marc.glisse@inria.fr>
[official-gcc.git] / libsanitizer / sanitizer_common / sanitizer_allocator.h
blob889281216fc79215ec6a708f4647b2d50af89a0d
1 //===-- sanitizer_allocator.h -----------------------------------*- C++ -*-===//
2 //
3 // This file is distributed under the University of Illinois Open Source
4 // License. See LICENSE.TXT for details.
5 //
6 //===----------------------------------------------------------------------===//
7 //
8 // Specialized memory allocator for ThreadSanitizer, MemorySanitizer, etc.
9 //
10 //===----------------------------------------------------------------------===//
12 #ifndef SANITIZER_ALLOCATOR_H
13 #define SANITIZER_ALLOCATOR_H
15 #include "sanitizer_internal_defs.h"
16 #include "sanitizer_common.h"
17 #include "sanitizer_libc.h"
18 #include "sanitizer_list.h"
19 #include "sanitizer_mutex.h"
20 #include "sanitizer_lfstack.h"
22 namespace __sanitizer {
24 // SizeClassMap maps allocation sizes into size classes and back.
25 // Class 0 corresponds to size 0.
26 // Classes 1 - 16 correspond to sizes 16 to 256 (size = class_id * 16).
27 // Next 8 classes: 256 + i * 32 (i = 1 to 8).
28 // Next 8 classes: 512 + i * 64 (i = 1 to 8).
29 // ...
30 // Next 8 classes: 2^k + i * 2^(k-3) (i = 1 to 8).
31 // Last class corresponds to kMaxSize = 1 << kMaxSizeLog.
33 // This structure of the size class map gives us:
34 // - Efficient table-free class-to-size and size-to-class functions.
35 // - Difference between two consequent size classes is betweed 12% and 6%
37 // This class also gives a hint to a thread-caching allocator about the amount
38 // of chunks that need to be cached per-thread:
39 // - kMaxNumCached is the maximal number of chunks per size class.
40 // - (1 << kMaxBytesCachedLog) is the maximal number of bytes per size class.
42 // Part of output of SizeClassMap::Print():
43 // c00 => s: 0 diff: +0 00% l 0 cached: 0 0; id 0
44 // c01 => s: 16 diff: +16 00% l 4 cached: 256 4096; id 1
45 // c02 => s: 32 diff: +16 100% l 5 cached: 256 8192; id 2
46 // c03 => s: 48 diff: +16 50% l 5 cached: 256 12288; id 3
47 // c04 => s: 64 diff: +16 33% l 6 cached: 256 16384; id 4
48 // c05 => s: 80 diff: +16 25% l 6 cached: 256 20480; id 5
49 // c06 => s: 96 diff: +16 20% l 6 cached: 256 24576; id 6
50 // c07 => s: 112 diff: +16 16% l 6 cached: 256 28672; id 7
52 // c08 => s: 128 diff: +16 14% l 7 cached: 256 32768; id 8
53 // c09 => s: 144 diff: +16 12% l 7 cached: 256 36864; id 9
54 // c10 => s: 160 diff: +16 11% l 7 cached: 256 40960; id 10
55 // c11 => s: 176 diff: +16 10% l 7 cached: 256 45056; id 11
56 // c12 => s: 192 diff: +16 09% l 7 cached: 256 49152; id 12
57 // c13 => s: 208 diff: +16 08% l 7 cached: 256 53248; id 13
58 // c14 => s: 224 diff: +16 07% l 7 cached: 256 57344; id 14
59 // c15 => s: 240 diff: +16 07% l 7 cached: 256 61440; id 15
61 // c16 => s: 256 diff: +16 06% l 8 cached: 256 65536; id 16
62 // c17 => s: 288 diff: +32 12% l 8 cached: 227 65376; id 17
63 // c18 => s: 320 diff: +32 11% l 8 cached: 204 65280; id 18
64 // c19 => s: 352 diff: +32 10% l 8 cached: 186 65472; id 19
65 // c20 => s: 384 diff: +32 09% l 8 cached: 170 65280; id 20
66 // c21 => s: 416 diff: +32 08% l 8 cached: 157 65312; id 21
67 // c22 => s: 448 diff: +32 07% l 8 cached: 146 65408; id 22
68 // c23 => s: 480 diff: +32 07% l 8 cached: 136 65280; id 23
70 // c24 => s: 512 diff: +32 06% l 9 cached: 128 65536; id 24
71 // c25 => s: 576 diff: +64 12% l 9 cached: 113 65088; id 25
72 // c26 => s: 640 diff: +64 11% l 9 cached: 102 65280; id 26
73 // c27 => s: 704 diff: +64 10% l 9 cached: 93 65472; id 27
74 // c28 => s: 768 diff: +64 09% l 9 cached: 85 65280; id 28
75 // c29 => s: 832 diff: +64 08% l 9 cached: 78 64896; id 29
76 // c30 => s: 896 diff: +64 07% l 9 cached: 73 65408; id 30
77 // c31 => s: 960 diff: +64 07% l 9 cached: 68 65280; id 31
79 // c32 => s: 1024 diff: +64 06% l 10 cached: 64 65536; id 32
81 template <uptr kMaxSizeLog, uptr kMaxNumCachedT, uptr kMaxBytesCachedLog,
82 uptr kMinBatchClassT>
83 class SizeClassMap {
84 static const uptr kMinSizeLog = 4;
85 static const uptr kMidSizeLog = kMinSizeLog + 4;
86 static const uptr kMinSize = 1 << kMinSizeLog;
87 static const uptr kMidSize = 1 << kMidSizeLog;
88 static const uptr kMidClass = kMidSize / kMinSize;
89 static const uptr S = 3;
90 static const uptr M = (1 << S) - 1;
92 public:
93 static const uptr kMaxNumCached = kMaxNumCachedT;
94 struct TransferBatch {
95 TransferBatch *next;
96 uptr count;
97 void *batch[kMaxNumCached];
100 static const uptr kMinBatchClass = kMinBatchClassT;
101 static const uptr kMaxSize = 1 << kMaxSizeLog;
102 static const uptr kNumClasses =
103 kMidClass + ((kMaxSizeLog - kMidSizeLog) << S) + 1;
104 COMPILER_CHECK(kNumClasses >= 32 && kNumClasses <= 256);
105 static const uptr kNumClassesRounded =
106 kNumClasses == 32 ? 32 :
107 kNumClasses <= 64 ? 64 :
108 kNumClasses <= 128 ? 128 : 256;
110 static uptr Size(uptr class_id) {
111 if (class_id <= kMidClass)
112 return kMinSize * class_id;
113 class_id -= kMidClass;
114 uptr t = kMidSize << (class_id >> S);
115 return t + (t >> S) * (class_id & M);
118 static uptr ClassID(uptr size) {
119 if (size <= kMidSize)
120 return (size + kMinSize - 1) >> kMinSizeLog;
121 if (size > kMaxSize) return 0;
122 uptr l = MostSignificantSetBitIndex(size);
123 uptr hbits = (size >> (l - S)) & M;
124 uptr lbits = size & ((1 << (l - S)) - 1);
125 uptr l1 = l - kMidSizeLog;
126 return kMidClass + (l1 << S) + hbits + (lbits > 0);
129 static uptr MaxCached(uptr class_id) {
130 if (class_id == 0) return 0;
131 uptr n = (1UL << kMaxBytesCachedLog) / Size(class_id);
132 return Max<uptr>(1, Min(kMaxNumCached, n));
135 static void Print() {
136 uptr prev_s = 0;
137 uptr total_cached = 0;
138 for (uptr i = 0; i < kNumClasses; i++) {
139 uptr s = Size(i);
140 if (s >= kMidSize / 2 && (s & (s - 1)) == 0)
141 Printf("\n");
142 uptr d = s - prev_s;
143 uptr p = prev_s ? (d * 100 / prev_s) : 0;
144 uptr l = MostSignificantSetBitIndex(s);
145 uptr cached = MaxCached(i) * s;
146 Printf("c%02zd => s: %zd diff: +%zd %02zd%% l %zd "
147 "cached: %zd %zd; id %zd\n",
148 i, Size(i), d, p, l, MaxCached(i), cached, ClassID(s));
149 total_cached += cached;
150 prev_s = s;
152 Printf("Total cached: %zd\n", total_cached);
155 static void Validate() {
156 for (uptr c = 1; c < kNumClasses; c++) {
157 // Printf("Validate: c%zd\n", c);
158 uptr s = Size(c);
159 CHECK_EQ(ClassID(s), c);
160 if (c != kNumClasses - 1)
161 CHECK_EQ(ClassID(s + 1), c + 1);
162 CHECK_EQ(ClassID(s - 1), c);
163 if (c)
164 CHECK_GT(Size(c), Size(c-1));
166 CHECK_EQ(ClassID(kMaxSize + 1), 0);
168 for (uptr s = 1; s <= kMaxSize; s++) {
169 uptr c = ClassID(s);
170 // Printf("s%zd => c%zd\n", s, c);
171 CHECK_LT(c, kNumClasses);
172 CHECK_GE(Size(c), s);
173 if (c > 0)
174 CHECK_LT(Size(c-1), s);
177 // TransferBatch for kMinBatchClass must fit into the block itself.
178 const uptr batch_size = sizeof(TransferBatch)
179 - sizeof(void*) // NOLINT
180 * (kMaxNumCached - MaxCached(kMinBatchClass));
181 CHECK_LE(batch_size, Size(kMinBatchClass));
182 // TransferBatch for kMinBatchClass-1 must not fit into the block itself.
183 const uptr batch_size1 = sizeof(TransferBatch)
184 - sizeof(void*) // NOLINT
185 * (kMaxNumCached - MaxCached(kMinBatchClass - 1));
186 CHECK_GT(batch_size1, Size(kMinBatchClass - 1));
190 typedef SizeClassMap<17, 256, 16, FIRST_32_SECOND_64(25, 28)>
191 DefaultSizeClassMap;
192 typedef SizeClassMap<17, 64, 14, FIRST_32_SECOND_64(17, 20)>
193 CompactSizeClassMap;
194 template<class SizeClassAllocator> struct SizeClassAllocatorLocalCache;
196 // Memory allocator statistics
197 enum AllocatorStat {
198 AllocatorStatMalloced,
199 AllocatorStatFreed,
200 AllocatorStatMmapped,
201 AllocatorStatUnmapped,
202 AllocatorStatCount
205 typedef u64 AllocatorStatCounters[AllocatorStatCount];
207 // Per-thread stats, live in per-thread cache.
208 class AllocatorStats {
209 public:
210 void Init() {
211 internal_memset(this, 0, sizeof(*this));
214 void Add(AllocatorStat i, u64 v) {
215 v += atomic_load(&stats_[i], memory_order_relaxed);
216 atomic_store(&stats_[i], v, memory_order_relaxed);
219 void Set(AllocatorStat i, u64 v) {
220 atomic_store(&stats_[i], v, memory_order_relaxed);
223 u64 Get(AllocatorStat i) const {
224 return atomic_load(&stats_[i], memory_order_relaxed);
227 private:
228 friend class AllocatorGlobalStats;
229 AllocatorStats *next_;
230 AllocatorStats *prev_;
231 atomic_uint64_t stats_[AllocatorStatCount];
234 // Global stats, used for aggregation and querying.
235 class AllocatorGlobalStats : public AllocatorStats {
236 public:
237 void Init() {
238 internal_memset(this, 0, sizeof(*this));
239 next_ = this;
240 prev_ = this;
243 void Register(AllocatorStats *s) {
244 SpinMutexLock l(&mu_);
245 s->next_ = next_;
246 s->prev_ = this;
247 next_->prev_ = s;
248 next_ = s;
251 void Unregister(AllocatorStats *s) {
252 SpinMutexLock l(&mu_);
253 s->prev_->next_ = s->next_;
254 s->next_->prev_ = s->prev_;
255 for (int i = 0; i < AllocatorStatCount; i++)
256 Add(AllocatorStat(i), s->Get(AllocatorStat(i)));
259 void Get(AllocatorStatCounters s) const {
260 internal_memset(s, 0, AllocatorStatCount * sizeof(u64));
261 SpinMutexLock l(&mu_);
262 const AllocatorStats *stats = this;
263 for (;;) {
264 for (int i = 0; i < AllocatorStatCount; i++)
265 s[i] += stats->Get(AllocatorStat(i));
266 stats = stats->next_;
267 if (stats == this)
268 break;
272 private:
273 mutable SpinMutex mu_;
276 // Allocators call these callbacks on mmap/munmap.
277 struct NoOpMapUnmapCallback {
278 void OnMap(uptr p, uptr size) const { }
279 void OnUnmap(uptr p, uptr size) const { }
282 // SizeClassAllocator64 -- allocator for 64-bit address space.
284 // Space: a portion of address space of kSpaceSize bytes starting at
285 // a fixed address (kSpaceBeg). Both constants are powers of two and
286 // kSpaceBeg is kSpaceSize-aligned.
287 // At the beginning the entire space is mprotect-ed, then small parts of it
288 // are mapped on demand.
290 // Region: a part of Space dedicated to a single size class.
291 // There are kNumClasses Regions of equal size.
293 // UserChunk: a piece of memory returned to user.
294 // MetaChunk: kMetadataSize bytes of metadata associated with a UserChunk.
296 // A Region looks like this:
297 // UserChunk1 ... UserChunkN <gap> MetaChunkN ... MetaChunk1
298 template <const uptr kSpaceBeg, const uptr kSpaceSize,
299 const uptr kMetadataSize, class SizeClassMap,
300 class MapUnmapCallback = NoOpMapUnmapCallback>
301 class SizeClassAllocator64 {
302 public:
303 typedef typename SizeClassMap::TransferBatch Batch;
304 typedef SizeClassAllocator64<kSpaceBeg, kSpaceSize, kMetadataSize,
305 SizeClassMap, MapUnmapCallback> ThisT;
306 typedef SizeClassAllocatorLocalCache<ThisT> AllocatorCache;
308 void Init() {
309 CHECK_EQ(kSpaceBeg,
310 reinterpret_cast<uptr>(Mprotect(kSpaceBeg, kSpaceSize)));
311 MapWithCallback(kSpaceEnd, AdditionalSize());
314 void MapWithCallback(uptr beg, uptr size) {
315 CHECK_EQ(beg, reinterpret_cast<uptr>(MmapFixedOrDie(beg, size)));
316 MapUnmapCallback().OnMap(beg, size);
319 void UnmapWithCallback(uptr beg, uptr size) {
320 MapUnmapCallback().OnUnmap(beg, size);
321 UnmapOrDie(reinterpret_cast<void *>(beg), size);
324 static bool CanAllocate(uptr size, uptr alignment) {
325 return size <= SizeClassMap::kMaxSize &&
326 alignment <= SizeClassMap::kMaxSize;
329 NOINLINE Batch* AllocateBatch(AllocatorStats *stat, AllocatorCache *c,
330 uptr class_id) {
331 CHECK_LT(class_id, kNumClasses);
332 RegionInfo *region = GetRegionInfo(class_id);
333 Batch *b = region->free_list.Pop();
334 if (b == 0)
335 b = PopulateFreeList(stat, c, class_id, region);
336 region->n_allocated += b->count;
337 return b;
340 NOINLINE void DeallocateBatch(AllocatorStats *stat, uptr class_id, Batch *b) {
341 RegionInfo *region = GetRegionInfo(class_id);
342 region->free_list.Push(b);
343 region->n_freed += b->count;
346 static bool PointerIsMine(void *p) {
347 return reinterpret_cast<uptr>(p) / kSpaceSize == kSpaceBeg / kSpaceSize;
350 static uptr GetSizeClass(void *p) {
351 return (reinterpret_cast<uptr>(p) / kRegionSize) % kNumClassesRounded;
354 void *GetBlockBegin(void *p) {
355 uptr class_id = GetSizeClass(p);
356 uptr size = SizeClassMap::Size(class_id);
357 uptr chunk_idx = GetChunkIdx((uptr)p, size);
358 uptr reg_beg = (uptr)p & ~(kRegionSize - 1);
359 uptr beg = chunk_idx * size;
360 uptr next_beg = beg + size;
361 RegionInfo *region = GetRegionInfo(class_id);
362 if (region->mapped_user >= next_beg)
363 return reinterpret_cast<void*>(reg_beg + beg);
364 return 0;
367 static uptr GetActuallyAllocatedSize(void *p) {
368 CHECK(PointerIsMine(p));
369 return SizeClassMap::Size(GetSizeClass(p));
372 uptr ClassID(uptr size) { return SizeClassMap::ClassID(size); }
374 void *GetMetaData(void *p) {
375 uptr class_id = GetSizeClass(p);
376 uptr size = SizeClassMap::Size(class_id);
377 uptr chunk_idx = GetChunkIdx(reinterpret_cast<uptr>(p), size);
378 return reinterpret_cast<void*>(kSpaceBeg + (kRegionSize * (class_id + 1)) -
379 (1 + chunk_idx) * kMetadataSize);
382 uptr TotalMemoryUsed() {
383 uptr res = 0;
384 for (uptr i = 0; i < kNumClasses; i++)
385 res += GetRegionInfo(i)->allocated_user;
386 return res;
389 // Test-only.
390 void TestOnlyUnmap() {
391 UnmapWithCallback(kSpaceBeg, kSpaceSize + AdditionalSize());
394 void PrintStats() {
395 uptr total_mapped = 0;
396 uptr n_allocated = 0;
397 uptr n_freed = 0;
398 for (uptr class_id = 1; class_id < kNumClasses; class_id++) {
399 RegionInfo *region = GetRegionInfo(class_id);
400 total_mapped += region->mapped_user;
401 n_allocated += region->n_allocated;
402 n_freed += region->n_freed;
404 Printf("Stats: SizeClassAllocator64: %zdM mapped in %zd allocations; "
405 "remains %zd\n",
406 total_mapped >> 20, n_allocated, n_allocated - n_freed);
407 for (uptr class_id = 1; class_id < kNumClasses; class_id++) {
408 RegionInfo *region = GetRegionInfo(class_id);
409 if (region->mapped_user == 0) continue;
410 Printf(" %02zd (%zd): total: %zd K allocs: %zd remains: %zd\n",
411 class_id,
412 SizeClassMap::Size(class_id),
413 region->mapped_user >> 10,
414 region->n_allocated,
415 region->n_allocated - region->n_freed);
419 // ForceLock() and ForceUnlock() are needed to implement Darwin malloc zone
420 // introspection API.
421 void ForceLock() {
422 for (uptr i = 0; i < kNumClasses; i++) {
423 GetRegionInfo(i)->mutex.Lock();
427 void ForceUnlock() {
428 for (int i = (int)kNumClasses - 1; i >= 0; i--) {
429 GetRegionInfo(i)->mutex.Unlock();
433 typedef SizeClassMap SizeClassMapT;
434 static const uptr kNumClasses = SizeClassMap::kNumClasses;
435 static const uptr kNumClassesRounded = SizeClassMap::kNumClassesRounded;
437 private:
438 static const uptr kRegionSize = kSpaceSize / kNumClassesRounded;
439 static const uptr kSpaceEnd = kSpaceBeg + kSpaceSize;
440 COMPILER_CHECK(kSpaceBeg % kSpaceSize == 0);
441 // kRegionSize must be >= 2^32.
442 COMPILER_CHECK((kRegionSize) >= (1ULL << (SANITIZER_WORDSIZE / 2)));
443 // Populate the free list with at most this number of bytes at once
444 // or with one element if its size is greater.
445 static const uptr kPopulateSize = 1 << 14;
446 // Call mmap for user memory with at least this size.
447 static const uptr kUserMapSize = 1 << 16;
448 // Call mmap for metadata memory with at least this size.
449 static const uptr kMetaMapSize = 1 << 16;
451 struct RegionInfo {
452 BlockingMutex mutex;
453 LFStack<Batch> free_list;
454 uptr allocated_user; // Bytes allocated for user memory.
455 uptr allocated_meta; // Bytes allocated for metadata.
456 uptr mapped_user; // Bytes mapped for user memory.
457 uptr mapped_meta; // Bytes mapped for metadata.
458 uptr n_allocated, n_freed; // Just stats.
460 COMPILER_CHECK(sizeof(RegionInfo) >= kCacheLineSize);
462 static uptr AdditionalSize() {
463 return RoundUpTo(sizeof(RegionInfo) * kNumClassesRounded,
464 GetPageSizeCached());
467 RegionInfo *GetRegionInfo(uptr class_id) {
468 CHECK_LT(class_id, kNumClasses);
469 RegionInfo *regions = reinterpret_cast<RegionInfo*>(kSpaceBeg + kSpaceSize);
470 return &regions[class_id];
473 static uptr GetChunkIdx(uptr chunk, uptr size) {
474 u32 offset = chunk % kRegionSize;
475 // Here we divide by a non-constant. This is costly.
476 // We require that kRegionSize is at least 2^32 so that offset is 32-bit.
477 // We save 2x by using 32-bit div, but may need to use a 256-way switch.
478 return offset / (u32)size;
481 NOINLINE Batch* PopulateFreeList(AllocatorStats *stat, AllocatorCache *c,
482 uptr class_id, RegionInfo *region) {
483 BlockingMutexLock l(&region->mutex);
484 Batch *b = region->free_list.Pop();
485 if (b)
486 return b;
487 uptr size = SizeClassMap::Size(class_id);
488 uptr count = size < kPopulateSize ? SizeClassMap::MaxCached(class_id) : 1;
489 uptr beg_idx = region->allocated_user;
490 uptr end_idx = beg_idx + count * size;
491 uptr region_beg = kSpaceBeg + kRegionSize * class_id;
492 if (end_idx + size > region->mapped_user) {
493 // Do the mmap for the user memory.
494 uptr map_size = kUserMapSize;
495 while (end_idx + size > region->mapped_user + map_size)
496 map_size += kUserMapSize;
497 CHECK_GE(region->mapped_user + map_size, end_idx);
498 MapWithCallback(region_beg + region->mapped_user, map_size);
499 stat->Add(AllocatorStatMmapped, map_size);
500 region->mapped_user += map_size;
502 uptr total_count = (region->mapped_user - beg_idx - size)
503 / size / count * count;
504 region->allocated_meta += total_count * kMetadataSize;
505 if (region->allocated_meta > region->mapped_meta) {
506 uptr map_size = kMetaMapSize;
507 while (region->allocated_meta > region->mapped_meta + map_size)
508 map_size += kMetaMapSize;
509 // Do the mmap for the metadata.
510 CHECK_GE(region->mapped_meta + map_size, region->allocated_meta);
511 MapWithCallback(region_beg + kRegionSize -
512 region->mapped_meta - map_size, map_size);
513 region->mapped_meta += map_size;
515 CHECK_LE(region->allocated_meta, region->mapped_meta);
516 if (region->allocated_user + region->allocated_meta > kRegionSize) {
517 Printf("Out of memory. Dying.\n");
518 Printf("The process has exhausted %zuMB for size class %zu.\n",
519 kRegionSize / 1024 / 1024, size);
520 Die();
522 for (;;) {
523 if (class_id < SizeClassMap::kMinBatchClass)
524 b = (Batch*)c->Allocate(this, SizeClassMap::ClassID(sizeof(Batch)));
525 else
526 b = (Batch*)(region_beg + beg_idx);
527 b->count = count;
528 for (uptr i = 0; i < count; i++)
529 b->batch[i] = (void*)(region_beg + beg_idx + i * size);
530 region->allocated_user += count * size;
531 CHECK_LE(region->allocated_user, region->mapped_user);
532 beg_idx += count * size;
533 if (beg_idx + count * size + size > region->mapped_user)
534 break;
535 region->free_list.Push(b);
537 return b;
541 // SizeClassAllocator32 -- allocator for 32-bit address space.
542 // This allocator can theoretically be used on 64-bit arch, but there it is less
543 // efficient than SizeClassAllocator64.
545 // [kSpaceBeg, kSpaceBeg + kSpaceSize) is the range of addresses which can
546 // be returned by MmapOrDie().
548 // Region:
549 // a result of a single call to MmapAlignedOrDie(kRegionSize, kRegionSize).
550 // Since the regions are aligned by kRegionSize, there are exactly
551 // kNumPossibleRegions possible regions in the address space and so we keep
552 // an u8 array possible_regions[kNumPossibleRegions] to store the size classes.
553 // 0 size class means the region is not used by the allocator.
555 // One Region is used to allocate chunks of a single size class.
556 // A Region looks like this:
557 // UserChunk1 .. UserChunkN <gap> MetaChunkN .. MetaChunk1
559 // In order to avoid false sharing the objects of this class should be
560 // chache-line aligned.
561 template <const uptr kSpaceBeg, const u64 kSpaceSize,
562 const uptr kMetadataSize, class SizeClassMap,
563 class MapUnmapCallback = NoOpMapUnmapCallback>
564 class SizeClassAllocator32 {
565 public:
566 typedef typename SizeClassMap::TransferBatch Batch;
567 typedef SizeClassAllocator32<kSpaceBeg, kSpaceSize, kMetadataSize,
568 SizeClassMap, MapUnmapCallback> ThisT;
569 typedef SizeClassAllocatorLocalCache<ThisT> AllocatorCache;
571 void Init() {
572 state_ = reinterpret_cast<State *>(MapWithCallback(sizeof(State)));
575 void *MapWithCallback(uptr size) {
576 size = RoundUpTo(size, GetPageSizeCached());
577 void *res = MmapOrDie(size, "SizeClassAllocator32");
578 MapUnmapCallback().OnMap((uptr)res, size);
579 return res;
582 void UnmapWithCallback(uptr beg, uptr size) {
583 MapUnmapCallback().OnUnmap(beg, size);
584 UnmapOrDie(reinterpret_cast<void *>(beg), size);
587 static bool CanAllocate(uptr size, uptr alignment) {
588 return size <= SizeClassMap::kMaxSize &&
589 alignment <= SizeClassMap::kMaxSize;
592 void *GetMetaData(void *p) {
593 CHECK(PointerIsMine(p));
594 uptr mem = reinterpret_cast<uptr>(p);
595 uptr beg = ComputeRegionBeg(mem);
596 uptr size = SizeClassMap::Size(GetSizeClass(p));
597 u32 offset = mem - beg;
598 uptr n = offset / (u32)size; // 32-bit division
599 uptr meta = (beg + kRegionSize) - (n + 1) * kMetadataSize;
600 return reinterpret_cast<void*>(meta);
603 NOINLINE Batch* AllocateBatch(AllocatorStats *stat, AllocatorCache *c,
604 uptr class_id) {
605 CHECK_LT(class_id, kNumClasses);
606 SizeClassInfo *sci = GetSizeClassInfo(class_id);
607 SpinMutexLock l(&sci->mutex);
608 if (sci->free_list.empty())
609 PopulateFreeList(stat, c, sci, class_id);
610 CHECK(!sci->free_list.empty());
611 Batch *b = sci->free_list.front();
612 sci->free_list.pop_front();
613 return b;
616 NOINLINE void DeallocateBatch(AllocatorStats *stat, uptr class_id, Batch *b) {
617 CHECK_LT(class_id, kNumClasses);
618 SizeClassInfo *sci = GetSizeClassInfo(class_id);
619 SpinMutexLock l(&sci->mutex);
620 sci->free_list.push_front(b);
623 bool PointerIsMine(void *p) {
624 return GetSizeClass(p) != 0;
627 uptr GetSizeClass(void *p) {
628 return state_->possible_regions[ComputeRegionId(reinterpret_cast<uptr>(p))];
631 void *GetBlockBegin(void *p) {
632 CHECK(PointerIsMine(p));
633 uptr mem = reinterpret_cast<uptr>(p);
634 uptr beg = ComputeRegionBeg(mem);
635 uptr size = SizeClassMap::Size(GetSizeClass(p));
636 u32 offset = mem - beg;
637 u32 n = offset / (u32)size; // 32-bit division
638 uptr res = beg + (n * (u32)size);
639 return reinterpret_cast<void*>(res);
642 uptr GetActuallyAllocatedSize(void *p) {
643 CHECK(PointerIsMine(p));
644 return SizeClassMap::Size(GetSizeClass(p));
647 uptr ClassID(uptr size) { return SizeClassMap::ClassID(size); }
649 uptr TotalMemoryUsed() {
650 // No need to lock here.
651 uptr res = 0;
652 for (uptr i = 0; i < kNumPossibleRegions; i++)
653 if (state_->possible_regions[i])
654 res += kRegionSize;
655 return res;
658 void TestOnlyUnmap() {
659 for (uptr i = 0; i < kNumPossibleRegions; i++)
660 if (state_->possible_regions[i])
661 UnmapWithCallback((i * kRegionSize), kRegionSize);
662 UnmapWithCallback(reinterpret_cast<uptr>(state_), sizeof(State));
665 // ForceLock() and ForceUnlock() are needed to implement Darwin malloc zone
666 // introspection API.
667 void ForceLock() {
668 for (uptr i = 0; i < kNumClasses; i++) {
669 GetSizeClassInfo(i)->mutex.Lock();
673 void ForceUnlock() {
674 for (int i = kNumClasses - 1; i >= 0; i--) {
675 GetSizeClassInfo(i)->mutex.Unlock();
679 void PrintStats() {
682 typedef SizeClassMap SizeClassMapT;
683 static const uptr kNumClasses = SizeClassMap::kNumClasses;
685 private:
686 static const uptr kRegionSizeLog = SANITIZER_WORDSIZE == 64 ? 24 : 20;
687 static const uptr kRegionSize = 1 << kRegionSizeLog;
688 static const uptr kNumPossibleRegions = kSpaceSize / kRegionSize;
690 struct SizeClassInfo {
691 SpinMutex mutex;
692 IntrusiveList<Batch> free_list;
693 char padding[kCacheLineSize - sizeof(uptr) - sizeof(IntrusiveList<Batch>)];
695 COMPILER_CHECK(sizeof(SizeClassInfo) == kCacheLineSize);
697 uptr ComputeRegionId(uptr mem) {
698 uptr res = mem >> kRegionSizeLog;
699 CHECK_LT(res, kNumPossibleRegions);
700 return res;
703 uptr ComputeRegionBeg(uptr mem) {
704 return mem & ~(kRegionSize - 1);
707 uptr AllocateRegion(AllocatorStats *stat, uptr class_id) {
708 CHECK_LT(class_id, kNumClasses);
709 uptr res = reinterpret_cast<uptr>(MmapAlignedOrDie(kRegionSize, kRegionSize,
710 "SizeClassAllocator32"));
711 MapUnmapCallback().OnMap(res, kRegionSize);
712 stat->Add(AllocatorStatMmapped, kRegionSize);
713 CHECK_EQ(0U, (res & (kRegionSize - 1)));
714 CHECK_EQ(0U, state_->possible_regions[ComputeRegionId(res)]);
715 state_->possible_regions[ComputeRegionId(res)] = class_id;
716 return res;
719 SizeClassInfo *GetSizeClassInfo(uptr class_id) {
720 CHECK_LT(class_id, kNumClasses);
721 return &state_->size_class_info_array[class_id];
724 void PopulateFreeList(AllocatorStats *stat, AllocatorCache *c,
725 SizeClassInfo *sci, uptr class_id) {
726 uptr size = SizeClassMap::Size(class_id);
727 uptr reg = AllocateRegion(stat, class_id);
728 uptr n_chunks = kRegionSize / (size + kMetadataSize);
729 uptr max_count = SizeClassMap::MaxCached(class_id);
730 Batch *b = 0;
731 for (uptr i = reg; i < reg + n_chunks * size; i += size) {
732 if (b == 0) {
733 if (class_id < SizeClassMap::kMinBatchClass)
734 b = (Batch*)c->Allocate(this, SizeClassMap::ClassID(sizeof(Batch)));
735 else
736 b = (Batch*)i;
737 b->count = 0;
739 b->batch[b->count++] = (void*)i;
740 if (b->count == max_count) {
741 sci->free_list.push_back(b);
742 b = 0;
745 if (b)
746 sci->free_list.push_back(b);
749 struct State {
750 u8 possible_regions[kNumPossibleRegions];
751 SizeClassInfo size_class_info_array[kNumClasses];
753 State *state_;
756 // Objects of this type should be used as local caches for SizeClassAllocator64
757 // or SizeClassAllocator32. Since the typical use of this class is to have one
758 // object per thread in TLS, is has to be POD.
759 template<class SizeClassAllocator>
760 struct SizeClassAllocatorLocalCache {
761 typedef SizeClassAllocator Allocator;
762 static const uptr kNumClasses = SizeClassAllocator::kNumClasses;
764 void Init(AllocatorGlobalStats *s) {
765 stats_.Init();
766 if (s)
767 s->Register(&stats_);
770 void Destroy(SizeClassAllocator *allocator, AllocatorGlobalStats *s) {
771 Drain(allocator);
772 if (s)
773 s->Unregister(&stats_);
776 void *Allocate(SizeClassAllocator *allocator, uptr class_id) {
777 CHECK_NE(class_id, 0UL);
778 CHECK_LT(class_id, kNumClasses);
779 stats_.Add(AllocatorStatMalloced, SizeClassMap::Size(class_id));
780 PerClass *c = &per_class_[class_id];
781 if (UNLIKELY(c->count == 0))
782 Refill(allocator, class_id);
783 void *res = c->batch[--c->count];
784 PREFETCH(c->batch[c->count - 1]);
785 return res;
788 void Deallocate(SizeClassAllocator *allocator, uptr class_id, void *p) {
789 CHECK_NE(class_id, 0UL);
790 CHECK_LT(class_id, kNumClasses);
791 stats_.Add(AllocatorStatFreed, SizeClassMap::Size(class_id));
792 PerClass *c = &per_class_[class_id];
793 if (UNLIKELY(c->count == c->max_count))
794 Drain(allocator, class_id);
795 c->batch[c->count++] = p;
798 void Drain(SizeClassAllocator *allocator) {
799 for (uptr class_id = 0; class_id < kNumClasses; class_id++) {
800 PerClass *c = &per_class_[class_id];
801 while (c->count > 0)
802 Drain(allocator, class_id);
806 // private:
807 typedef typename SizeClassAllocator::SizeClassMapT SizeClassMap;
808 typedef typename SizeClassMap::TransferBatch Batch;
809 struct PerClass {
810 uptr count;
811 uptr max_count;
812 void *batch[2 * SizeClassMap::kMaxNumCached];
814 PerClass per_class_[kNumClasses];
815 AllocatorStats stats_;
817 void InitCache() {
818 if (per_class_[0].max_count)
819 return;
820 for (uptr i = 0; i < kNumClasses; i++) {
821 PerClass *c = &per_class_[i];
822 c->max_count = 2 * SizeClassMap::MaxCached(i);
826 NOINLINE void Refill(SizeClassAllocator *allocator, uptr class_id) {
827 InitCache();
828 PerClass *c = &per_class_[class_id];
829 Batch *b = allocator->AllocateBatch(&stats_, this, class_id);
830 CHECK_GT(b->count, 0);
831 for (uptr i = 0; i < b->count; i++)
832 c->batch[i] = b->batch[i];
833 c->count = b->count;
834 if (class_id < SizeClassMap::kMinBatchClass)
835 Deallocate(allocator, SizeClassMap::ClassID(sizeof(Batch)), b);
838 NOINLINE void Drain(SizeClassAllocator *allocator, uptr class_id) {
839 InitCache();
840 PerClass *c = &per_class_[class_id];
841 Batch *b;
842 if (class_id < SizeClassMap::kMinBatchClass)
843 b = (Batch*)Allocate(allocator, SizeClassMap::ClassID(sizeof(Batch)));
844 else
845 b = (Batch*)c->batch[0];
846 uptr cnt = Min(c->max_count / 2, c->count);
847 for (uptr i = 0; i < cnt; i++) {
848 b->batch[i] = c->batch[i];
849 c->batch[i] = c->batch[i + c->max_count / 2];
851 b->count = cnt;
852 c->count -= cnt;
853 allocator->DeallocateBatch(&stats_, class_id, b);
857 // This class can (de)allocate only large chunks of memory using mmap/unmap.
858 // The main purpose of this allocator is to cover large and rare allocation
859 // sizes not covered by more efficient allocators (e.g. SizeClassAllocator64).
860 template <class MapUnmapCallback = NoOpMapUnmapCallback>
861 class LargeMmapAllocator {
862 public:
863 void Init() {
864 internal_memset(this, 0, sizeof(*this));
865 page_size_ = GetPageSizeCached();
868 void *Allocate(AllocatorStats *stat, uptr size, uptr alignment) {
869 CHECK(IsPowerOfTwo(alignment));
870 uptr map_size = RoundUpMapSize(size);
871 if (alignment > page_size_)
872 map_size += alignment;
873 if (map_size < size) return 0; // Overflow.
874 uptr map_beg = reinterpret_cast<uptr>(
875 MmapOrDie(map_size, "LargeMmapAllocator"));
876 MapUnmapCallback().OnMap(map_beg, map_size);
877 uptr map_end = map_beg + map_size;
878 uptr res = map_beg + page_size_;
879 if (res & (alignment - 1)) // Align.
880 res += alignment - (res & (alignment - 1));
881 CHECK_EQ(0, res & (alignment - 1));
882 CHECK_LE(res + size, map_end);
883 Header *h = GetHeader(res);
884 h->size = size;
885 h->map_beg = map_beg;
886 h->map_size = map_size;
887 uptr size_log = MostSignificantSetBitIndex(map_size);
888 CHECK_LT(size_log, ARRAY_SIZE(stats.by_size_log));
890 SpinMutexLock l(&mutex_);
891 uptr idx = n_chunks_++;
892 CHECK_LT(idx, kMaxNumChunks);
893 h->chunk_idx = idx;
894 chunks_[idx] = h;
895 stats.n_allocs++;
896 stats.currently_allocated += map_size;
897 stats.max_allocated = Max(stats.max_allocated, stats.currently_allocated);
898 stats.by_size_log[size_log]++;
899 stat->Add(AllocatorStatMalloced, map_size);
900 stat->Add(AllocatorStatMmapped, map_size);
902 return reinterpret_cast<void*>(res);
905 void Deallocate(AllocatorStats *stat, void *p) {
906 Header *h = GetHeader(p);
908 SpinMutexLock l(&mutex_);
909 uptr idx = h->chunk_idx;
910 CHECK_EQ(chunks_[idx], h);
911 CHECK_LT(idx, n_chunks_);
912 chunks_[idx] = chunks_[n_chunks_ - 1];
913 chunks_[idx]->chunk_idx = idx;
914 n_chunks_--;
915 stats.n_frees++;
916 stats.currently_allocated -= h->map_size;
917 stat->Add(AllocatorStatFreed, h->map_size);
918 stat->Add(AllocatorStatUnmapped, h->map_size);
920 MapUnmapCallback().OnUnmap(h->map_beg, h->map_size);
921 UnmapOrDie(reinterpret_cast<void*>(h->map_beg), h->map_size);
924 uptr TotalMemoryUsed() {
925 SpinMutexLock l(&mutex_);
926 uptr res = 0;
927 for (uptr i = 0; i < n_chunks_; i++) {
928 Header *h = chunks_[i];
929 CHECK_EQ(h->chunk_idx, i);
930 res += RoundUpMapSize(h->size);
932 return res;
935 bool PointerIsMine(void *p) {
936 return GetBlockBegin(p) != 0;
939 uptr GetActuallyAllocatedSize(void *p) {
940 return RoundUpTo(GetHeader(p)->size, page_size_);
943 // At least page_size_/2 metadata bytes is available.
944 void *GetMetaData(void *p) {
945 // Too slow: CHECK_EQ(p, GetBlockBegin(p));
946 CHECK(IsAligned(reinterpret_cast<uptr>(p), page_size_));
947 return GetHeader(p) + 1;
950 void *GetBlockBegin(void *ptr) {
951 uptr p = reinterpret_cast<uptr>(ptr);
952 SpinMutexLock l(&mutex_);
953 uptr nearest_chunk = 0;
954 // Cache-friendly linear search.
955 for (uptr i = 0; i < n_chunks_; i++) {
956 uptr ch = reinterpret_cast<uptr>(chunks_[i]);
957 if (p < ch) continue; // p is at left to this chunk, skip it.
958 if (p - ch < p - nearest_chunk)
959 nearest_chunk = ch;
961 if (!nearest_chunk)
962 return 0;
963 Header *h = reinterpret_cast<Header *>(nearest_chunk);
964 CHECK_GE(nearest_chunk, h->map_beg);
965 CHECK_LT(nearest_chunk, h->map_beg + h->map_size);
966 CHECK_LE(nearest_chunk, p);
967 if (h->map_beg + h->map_size < p)
968 return 0;
969 return GetUser(h);
972 void PrintStats() {
973 Printf("Stats: LargeMmapAllocator: allocated %zd times, "
974 "remains %zd (%zd K) max %zd M; by size logs: ",
975 stats.n_allocs, stats.n_allocs - stats.n_frees,
976 stats.currently_allocated >> 10, stats.max_allocated >> 20);
977 for (uptr i = 0; i < ARRAY_SIZE(stats.by_size_log); i++) {
978 uptr c = stats.by_size_log[i];
979 if (!c) continue;
980 Printf("%zd:%zd; ", i, c);
982 Printf("\n");
985 // ForceLock() and ForceUnlock() are needed to implement Darwin malloc zone
986 // introspection API.
987 void ForceLock() {
988 mutex_.Lock();
991 void ForceUnlock() {
992 mutex_.Unlock();
995 private:
996 static const int kMaxNumChunks = 1 << FIRST_32_SECOND_64(15, 18);
997 struct Header {
998 uptr map_beg;
999 uptr map_size;
1000 uptr size;
1001 uptr chunk_idx;
1004 Header *GetHeader(uptr p) {
1005 CHECK_EQ(p % page_size_, 0);
1006 return reinterpret_cast<Header*>(p - page_size_);
1008 Header *GetHeader(void *p) { return GetHeader(reinterpret_cast<uptr>(p)); }
1010 void *GetUser(Header *h) {
1011 CHECK_EQ((uptr)h % page_size_, 0);
1012 return reinterpret_cast<void*>(reinterpret_cast<uptr>(h) + page_size_);
1015 uptr RoundUpMapSize(uptr size) {
1016 return RoundUpTo(size, page_size_) + page_size_;
1019 uptr page_size_;
1020 Header *chunks_[kMaxNumChunks];
1021 uptr n_chunks_;
1022 struct Stats {
1023 uptr n_allocs, n_frees, currently_allocated, max_allocated, by_size_log[64];
1024 } stats;
1025 SpinMutex mutex_;
1028 // This class implements a complete memory allocator by using two
1029 // internal allocators:
1030 // PrimaryAllocator is efficient, but may not allocate some sizes (alignments).
1031 // When allocating 2^x bytes it should return 2^x aligned chunk.
1032 // PrimaryAllocator is used via a local AllocatorCache.
1033 // SecondaryAllocator can allocate anything, but is not efficient.
1034 template <class PrimaryAllocator, class AllocatorCache,
1035 class SecondaryAllocator> // NOLINT
1036 class CombinedAllocator {
1037 public:
1038 void Init() {
1039 primary_.Init();
1040 secondary_.Init();
1041 stats_.Init();
1044 void *Allocate(AllocatorCache *cache, uptr size, uptr alignment,
1045 bool cleared = false) {
1046 // Returning 0 on malloc(0) may break a lot of code.
1047 if (size == 0)
1048 size = 1;
1049 if (size + alignment < size)
1050 return 0;
1051 if (alignment > 8)
1052 size = RoundUpTo(size, alignment);
1053 void *res;
1054 if (primary_.CanAllocate(size, alignment))
1055 res = cache->Allocate(&primary_, primary_.ClassID(size));
1056 else
1057 res = secondary_.Allocate(&stats_, size, alignment);
1058 if (alignment > 8)
1059 CHECK_EQ(reinterpret_cast<uptr>(res) & (alignment - 1), 0);
1060 if (cleared && res)
1061 internal_memset(res, 0, size);
1062 return res;
1065 void Deallocate(AllocatorCache *cache, void *p) {
1066 if (!p) return;
1067 if (primary_.PointerIsMine(p))
1068 cache->Deallocate(&primary_, primary_.GetSizeClass(p), p);
1069 else
1070 secondary_.Deallocate(&stats_, p);
1073 void *Reallocate(AllocatorCache *cache, void *p, uptr new_size,
1074 uptr alignment) {
1075 if (!p)
1076 return Allocate(cache, new_size, alignment);
1077 if (!new_size) {
1078 Deallocate(cache, p);
1079 return 0;
1081 CHECK(PointerIsMine(p));
1082 uptr old_size = GetActuallyAllocatedSize(p);
1083 uptr memcpy_size = Min(new_size, old_size);
1084 void *new_p = Allocate(cache, new_size, alignment);
1085 if (new_p)
1086 internal_memcpy(new_p, p, memcpy_size);
1087 Deallocate(cache, p);
1088 return new_p;
1091 bool PointerIsMine(void *p) {
1092 if (primary_.PointerIsMine(p))
1093 return true;
1094 return secondary_.PointerIsMine(p);
1097 bool FromPrimary(void *p) {
1098 return primary_.PointerIsMine(p);
1101 void *GetMetaData(void *p) {
1102 if (primary_.PointerIsMine(p))
1103 return primary_.GetMetaData(p);
1104 return secondary_.GetMetaData(p);
1107 void *GetBlockBegin(void *p) {
1108 if (primary_.PointerIsMine(p))
1109 return primary_.GetBlockBegin(p);
1110 return secondary_.GetBlockBegin(p);
1113 uptr GetActuallyAllocatedSize(void *p) {
1114 if (primary_.PointerIsMine(p))
1115 return primary_.GetActuallyAllocatedSize(p);
1116 return secondary_.GetActuallyAllocatedSize(p);
1119 uptr TotalMemoryUsed() {
1120 return primary_.TotalMemoryUsed() + secondary_.TotalMemoryUsed();
1123 void TestOnlyUnmap() { primary_.TestOnlyUnmap(); }
1125 void InitCache(AllocatorCache *cache) {
1126 cache->Init(&stats_);
1129 void DestroyCache(AllocatorCache *cache) {
1130 cache->Destroy(&primary_, &stats_);
1133 void SwallowCache(AllocatorCache *cache) {
1134 cache->Drain(&primary_);
1137 void GetStats(AllocatorStatCounters s) const {
1138 stats_.Get(s);
1141 void PrintStats() {
1142 primary_.PrintStats();
1143 secondary_.PrintStats();
1146 // ForceLock() and ForceUnlock() are needed to implement Darwin malloc zone
1147 // introspection API.
1148 void ForceLock() {
1149 primary_.ForceLock();
1150 secondary_.ForceLock();
1153 void ForceUnlock() {
1154 secondary_.ForceUnlock();
1155 primary_.ForceUnlock();
1158 private:
1159 PrimaryAllocator primary_;
1160 SecondaryAllocator secondary_;
1161 AllocatorGlobalStats stats_;
1164 // Returns true if calloc(size, n) should return 0 due to overflow in size*n.
1165 bool CallocShouldReturnNullDueToOverflow(uptr size, uptr n);
1167 } // namespace __sanitizer
1169 #endif // SANITIZER_ALLOCATOR_H