libsanitizer/sanitizer_common/sanitizer_allocator.h

   1 //===-- sanitizer_allocator.h -----------------------------------*- C++ -*-===//
   2 //
   3 // This file is distributed under the University of Illinois Open Source
   4 // License. See LICENSE.TXT for details.
   5 //
   6 //===----------------------------------------------------------------------===//
   7 //
   8 // Specialized memory allocator for ThreadSanitizer, MemorySanitizer, etc.
   9 //
  10 //===----------------------------------------------------------------------===//
  11
  12 #ifndef SANITIZER_ALLOCATOR_H
  13 #define SANITIZER_ALLOCATOR_H
  14
  15 #include "sanitizer_internal_defs.h"
  16 #include "sanitizer_common.h"
  17 #include "sanitizer_libc.h"
  18 #include "sanitizer_list.h"
  19 #include "sanitizer_mutex.h"
  20 #include "sanitizer_lfstack.h"
  21
  22 namespace __sanitizer {
  23
  24 // Depending on allocator_may_return_null either return 0 or crash.
  25 void *AllocatorReturnNull();
  26
  27 // SizeClassMap maps allocation sizes into size classes and back.
  28 // Class 0 corresponds to size 0.
  29 // Classes 1 - 16 correspond to sizes 16 to 256 (size = class_id * 16).
  30 // Next 4 classes: 256 + i * 64  (i = 1 to 4).
  31 // Next 4 classes: 512 + i * 128 (i = 1 to 4).
  32 // ...
  33 // Next 4 classes: 2^k + i * 2^(k-2) (i = 1 to 4).
  34 // Last class corresponds to kMaxSize = 1 << kMaxSizeLog.
  35 //
  36 // This structure of the size class map gives us:
  37 //   - Efficient table-free class-to-size and size-to-class functions.
  38 //   - Difference between two consequent size classes is betweed 14% and 25%
  39 //
  40 // This class also gives a hint to a thread-caching allocator about the amount
  41 // of chunks that need to be cached per-thread:
  42 //  - kMaxNumCached is the maximal number of chunks per size class.
  43 //  - (1 << kMaxBytesCachedLog) is the maximal number of bytes per size class.
  44 //
  45 // Part of output of SizeClassMap::Print():
  46 // c00 => s: 0 diff: +0 00% l 0 cached: 0 0; id 0
  47 // c01 => s: 16 diff: +16 00% l 4 cached: 256 4096; id 1
  48 // c02 => s: 32 diff: +16 100% l 5 cached: 256 8192; id 2
  49 // c03 => s: 48 diff: +16 50% l 5 cached: 256 12288; id 3
  50 // c04 => s: 64 diff: +16 33% l 6 cached: 256 16384; id 4
  51 // c05 => s: 80 diff: +16 25% l 6 cached: 256 20480; id 5
  52 // c06 => s: 96 diff: +16 20% l 6 cached: 256 24576; id 6
  53 // c07 => s: 112 diff: +16 16% l 6 cached: 256 28672; id 7
  54 //
  55 // c08 => s: 128 diff: +16 14% l 7 cached: 256 32768; id 8
  56 // c09 => s: 144 diff: +16 12% l 7 cached: 256 36864; id 9
  57 // c10 => s: 160 diff: +16 11% l 7 cached: 256 40960; id 10
  58 // c11 => s: 176 diff: +16 10% l 7 cached: 256 45056; id 11
  59 // c12 => s: 192 diff: +16 09% l 7 cached: 256 49152; id 12
  60 // c13 => s: 208 diff: +16 08% l 7 cached: 256 53248; id 13
  61 // c14 => s: 224 diff: +16 07% l 7 cached: 256 57344; id 14
  62 // c15 => s: 240 diff: +16 07% l 7 cached: 256 61440; id 15
  63 //
  64 // c16 => s: 256 diff: +16 06% l 8 cached: 256 65536; id 16
  65 // c17 => s: 320 diff: +64 25% l 8 cached: 204 65280; id 17
  66 // c18 => s: 384 diff: +64 20% l 8 cached: 170 65280; id 18
  67 // c19 => s: 448 diff: +64 16% l 8 cached: 146 65408; id 19
  68 //
  69 // c20 => s: 512 diff: +64 14% l 9 cached: 128 65536; id 20
  70 // c21 => s: 640 diff: +128 25% l 9 cached: 102 65280; id 21
  71 // c22 => s: 768 diff: +128 20% l 9 cached: 85 65280; id 22
  72 // c23 => s: 896 diff: +128 16% l 9 cached: 73 65408; id 23
  73 //
  74 // c24 => s: 1024 diff: +128 14% l 10 cached: 64 65536; id 24
  75 // c25 => s: 1280 diff: +256 25% l 10 cached: 51 65280; id 25
  76 // c26 => s: 1536 diff: +256 20% l 10 cached: 42 64512; id 26
  77 // c27 => s: 1792 diff: +256 16% l 10 cached: 36 64512; id 27
  78 //
  79 // ...
  80 //
  81 // c48 => s: 65536 diff: +8192 14% l 16 cached: 1 65536; id 48
  82 // c49 => s: 81920 diff: +16384 25% l 16 cached: 1 81920; id 49
  83 // c50 => s: 98304 diff: +16384 20% l 16 cached: 1 98304; id 50
  84 // c51 => s: 114688 diff: +16384 16% l 16 cached: 1 114688; id 51
  85 //
  86 // c52 => s: 131072 diff: +16384 14% l 17 cached: 1 131072; id 52
  87
  88 template <uptr kMaxSizeLog, uptr kMaxNumCachedT, uptr kMaxBytesCachedLog>
  89 class SizeClassMap {
  90   static const uptr kMinSizeLog = 4;
  91   static const uptr kMidSizeLog = kMinSizeLog + 4;
  92   static const uptr kMinSize = 1 << kMinSizeLog;
  93   static const uptr kMidSize = 1 << kMidSizeLog;
  94   static const uptr kMidClass = kMidSize / kMinSize;
  95   static const uptr S = 2;
  96   static const uptr M = (1 << S) - 1;
  97
  98  public:
  99   static const uptr kMaxNumCached = kMaxNumCachedT;
 100   // We transfer chunks between central and thread-local free lists in batches.
 101   // For small size classes we allocate batches separately.
 102   // For large size classes we use one of the chunks to store the batch.
 103   struct TransferBatch {
 104     TransferBatch *next;
 105     uptr count;
 106     void *batch[kMaxNumCached];
 107   };
 108
 109   static const uptr kMaxSize = 1UL << kMaxSizeLog;
 110   static const uptr kNumClasses =
 111       kMidClass + ((kMaxSizeLog - kMidSizeLog) << S) + 1;
 112   COMPILER_CHECK(kNumClasses >= 32 && kNumClasses <= 256);
 113   static const uptr kNumClassesRounded =
 114       kNumClasses == 32  ? 32 :
 115       kNumClasses <= 64  ? 64 :
 116       kNumClasses <= 128 ? 128 : 256;
 117
 118   static uptr Size(uptr class_id) {
 119     if (class_id <= kMidClass)
 120       return kMinSize * class_id;
 121     class_id -= kMidClass;
 122     uptr t = kMidSize << (class_id >> S);
 123     return t + (t >> S) * (class_id & M);
 124   }
 125
 126   static uptr ClassID(uptr size) {
 127     if (size <= kMidSize)
 128       return (size + kMinSize - 1) >> kMinSizeLog;
 129     if (size > kMaxSize) return 0;
 130     uptr l = MostSignificantSetBitIndex(size);
 131     uptr hbits = (size >> (l - S)) & M;
 132     uptr lbits = size & ((1 << (l - S)) - 1);
 133     uptr l1 = l - kMidSizeLog;
 134     return kMidClass + (l1 << S) + hbits + (lbits > 0);
 135   }
 136
 137   static uptr MaxCached(uptr class_id) {
 138     if (class_id == 0) return 0;
 139     uptr n = (1UL << kMaxBytesCachedLog) / Size(class_id);
 140     return Max<uptr>(1, Min(kMaxNumCached, n));
 141   }
 142
 143   static void Print() {
 144     uptr prev_s = 0;
 145     uptr total_cached = 0;
 146     for (uptr i = 0; i < kNumClasses; i++) {
 147       uptr s = Size(i);
 148       if (s >= kMidSize / 2 && (s & (s - 1)) == 0)
 149         Printf("\n");
 150       uptr d = s - prev_s;
 151       uptr p = prev_s ? (d * 100 / prev_s) : 0;
 152       uptr l = s ? MostSignificantSetBitIndex(s) : 0;
 153       uptr cached = MaxCached(i) * s;
 154       Printf("c%02zd => s: %zd diff: +%zd %02zd%% l %zd "
 155              "cached: %zd %zd; id %zd\n",
 156              i, Size(i), d, p, l, MaxCached(i), cached, ClassID(s));
 157       total_cached += cached;
 158       prev_s = s;
 159     }
 160     Printf("Total cached: %zd\n", total_cached);
 161   }
 162
 163   static bool SizeClassRequiresSeparateTransferBatch(uptr class_id) {
 164     return Size(class_id) < sizeof(TransferBatch) -
 165         sizeof(uptr) * (kMaxNumCached - MaxCached(class_id));
 166   }
 167
 168   static void Validate() {
 169     for (uptr c = 1; c < kNumClasses; c++) {
 170       // Printf("Validate: c%zd\n", c);
 171       uptr s = Size(c);
 172       CHECK_NE(s, 0U);
 173       CHECK_EQ(ClassID(s), c);
 174       if (c != kNumClasses - 1)
 175         CHECK_EQ(ClassID(s + 1), c + 1);
 176       CHECK_EQ(ClassID(s - 1), c);
 177       if (c)
 178         CHECK_GT(Size(c), Size(c-1));
 179     }
 180     CHECK_EQ(ClassID(kMaxSize + 1), 0);
 181
 182     for (uptr s = 1; s <= kMaxSize; s++) {
 183       uptr c = ClassID(s);
 184       // Printf("s%zd => c%zd\n", s, c);
 185       CHECK_LT(c, kNumClasses);
 186       CHECK_GE(Size(c), s);
 187       if (c > 0)
 188         CHECK_LT(Size(c-1), s);
 189     }
 190   }
 191 };
 192
 193 typedef SizeClassMap<17, 128, 16> DefaultSizeClassMap;
 194 typedef SizeClassMap<17, 64,  14> CompactSizeClassMap;
 195 template<class SizeClassAllocator> struct SizeClassAllocatorLocalCache;
 196
 197 // Memory allocator statistics
 198 enum AllocatorStat {
 199   AllocatorStatAllocated,
 200   AllocatorStatMapped,
 201   AllocatorStatCount
 202 };
 203
 204 typedef uptr AllocatorStatCounters[AllocatorStatCount];
 205
 206 // Per-thread stats, live in per-thread cache.
 207 class AllocatorStats {
 208  public:
 209   void Init() {
 210     internal_memset(this, 0, sizeof(*this));
 211   }
 212
 213   void Add(AllocatorStat i, uptr v) {
 214     v += atomic_load(&stats_[i], memory_order_relaxed);
 215     atomic_store(&stats_[i], v, memory_order_relaxed);
 216   }
 217
 218   void Sub(AllocatorStat i, uptr v) {
 219     v = atomic_load(&stats_[i], memory_order_relaxed) - v;
 220     atomic_store(&stats_[i], v, memory_order_relaxed);
 221   }
 222
 223   void Set(AllocatorStat i, uptr v) {
 224     atomic_store(&stats_[i], v, memory_order_relaxed);
 225   }
 226
 227   uptr Get(AllocatorStat i) const {
 228     return atomic_load(&stats_[i], memory_order_relaxed);
 229   }
 230
 231  private:
 232   friend class AllocatorGlobalStats;
 233   AllocatorStats *next_;
 234   AllocatorStats *prev_;
 235   atomic_uintptr_t stats_[AllocatorStatCount];
 236 };
 237
 238 // Global stats, used for aggregation and querying.
 239 class AllocatorGlobalStats : public AllocatorStats {
 240  public:
 241   void Init() {
 242     internal_memset(this, 0, sizeof(*this));
 243     next_ = this;
 244     prev_ = this;
 245   }
 246
 247   void Register(AllocatorStats *s) {
 248     SpinMutexLock l(&mu_);
 249     s->next_ = next_;
 250     s->prev_ = this;
 251     next_->prev_ = s;
 252     next_ = s;
 253   }
 254
 255   void Unregister(AllocatorStats *s) {
 256     SpinMutexLock l(&mu_);
 257     s->prev_->next_ = s->next_;
 258     s->next_->prev_ = s->prev_;
 259     for (int i = 0; i < AllocatorStatCount; i++)
 260       Add(AllocatorStat(i), s->Get(AllocatorStat(i)));
 261   }
 262
 263   void Get(AllocatorStatCounters s) const {
 264     internal_memset(s, 0, AllocatorStatCount * sizeof(uptr));
 265     SpinMutexLock l(&mu_);
 266     const AllocatorStats *stats = this;
 267     for (;;) {
 268       for (int i = 0; i < AllocatorStatCount; i++)
 269         s[i] += stats->Get(AllocatorStat(i));
 270       stats = stats->next_;
 271       if (stats == this)
 272         break;
 273     }
 274     // All stats must be non-negative.
 275     for (int i = 0; i < AllocatorStatCount; i++)
 276       s[i] = ((sptr)s[i]) >= 0 ? s[i] : 0;
 277   }
 278
 279  private:
 280   mutable SpinMutex mu_;
 281 };
 282
 283 // Allocators call these callbacks on mmap/munmap.
 284 struct NoOpMapUnmapCallback {
 285   void OnMap(uptr p, uptr size) const { }
 286   void OnUnmap(uptr p, uptr size) const { }
 287 };
 288
 289 // Callback type for iterating over chunks.
 290 typedef void (*ForEachChunkCallback)(uptr chunk, void *arg);
 291
 292 // SizeClassAllocator64 -- allocator for 64-bit address space.
 293 //
 294 // Space: a portion of address space of kSpaceSize bytes starting at
 295 // a fixed address (kSpaceBeg). Both constants are powers of two and
 296 // kSpaceBeg is kSpaceSize-aligned.
 297 // At the beginning the entire space is mprotect-ed, then small parts of it
 298 // are mapped on demand.
 299 //
 300 // Region: a part of Space dedicated to a single size class.
 301 // There are kNumClasses Regions of equal size.
 302 //
 303 // UserChunk: a piece of memory returned to user.
 304 // MetaChunk: kMetadataSize bytes of metadata associated with a UserChunk.
 305 //
 306 // A Region looks like this:
 307 // UserChunk1 ... UserChunkN <gap> MetaChunkN ... MetaChunk1
 308 template <const uptr kSpaceBeg, const uptr kSpaceSize,
 309           const uptr kMetadataSize, class SizeClassMap,
 310           class MapUnmapCallback = NoOpMapUnmapCallback>
 311 class SizeClassAllocator64 {
 312  public:
 313   typedef typename SizeClassMap::TransferBatch Batch;
 314   typedef SizeClassAllocator64<kSpaceBeg, kSpaceSize, kMetadataSize,
 315       SizeClassMap, MapUnmapCallback> ThisT;
 316   typedef SizeClassAllocatorLocalCache<ThisT> AllocatorCache;
 317
 318   void Init() {
 319     CHECK_EQ(kSpaceBeg,
 320              reinterpret_cast<uptr>(Mprotect(kSpaceBeg, kSpaceSize)));
 321     MapWithCallback(kSpaceEnd, AdditionalSize());
 322   }
 323
 324   void MapWithCallback(uptr beg, uptr size) {
 325     CHECK_EQ(beg, reinterpret_cast<uptr>(MmapFixedOrDie(beg, size)));
 326     MapUnmapCallback().OnMap(beg, size);
 327   }
 328
 329   void UnmapWithCallback(uptr beg, uptr size) {
 330     MapUnmapCallback().OnUnmap(beg, size);
 331     UnmapOrDie(reinterpret_cast<void *>(beg), size);
 332   }
 333
 334   static bool CanAllocate(uptr size, uptr alignment) {
 335     return size <= SizeClassMap::kMaxSize &&
 336       alignment <= SizeClassMap::kMaxSize;
 337   }
 338
 339   NOINLINE Batch* AllocateBatch(AllocatorStats *stat, AllocatorCache *c,
 340                                 uptr class_id) {
 341     CHECK_LT(class_id, kNumClasses);
 342     RegionInfo *region = GetRegionInfo(class_id);
 343     Batch *b = region->free_list.Pop();
 344     if (b == 0)
 345       b = PopulateFreeList(stat, c, class_id, region);
 346     region->n_allocated += b->count;
 347     return b;
 348   }
 349
 350   NOINLINE void DeallocateBatch(AllocatorStats *stat, uptr class_id, Batch *b) {
 351     RegionInfo *region = GetRegionInfo(class_id);
 352     CHECK_GT(b->count, 0);
 353     region->free_list.Push(b);
 354     region->n_freed += b->count;
 355   }
 356
 357   static bool PointerIsMine(const void *p) {
 358     return reinterpret_cast<uptr>(p) / kSpaceSize == kSpaceBeg / kSpaceSize;
 359   }
 360
 361   static uptr GetSizeClass(const void *p) {
 362     return (reinterpret_cast<uptr>(p) / kRegionSize) % kNumClassesRounded;
 363   }
 364
 365   void *GetBlockBegin(const void *p) {
 366     uptr class_id = GetSizeClass(p);
 367     uptr size = SizeClassMap::Size(class_id);
 368     if (!size) return 0;
 369     uptr chunk_idx = GetChunkIdx((uptr)p, size);
 370     uptr reg_beg = (uptr)p & ~(kRegionSize - 1);
 371     uptr beg = chunk_idx * size;
 372     uptr next_beg = beg + size;
 373     if (class_id >= kNumClasses) return 0;
 374     RegionInfo *region = GetRegionInfo(class_id);
 375     if (region->mapped_user >= next_beg)
 376       return reinterpret_cast<void*>(reg_beg + beg);
 377     return 0;
 378   }
 379
 380   static uptr GetActuallyAllocatedSize(void *p) {
 381     CHECK(PointerIsMine(p));
 382     return SizeClassMap::Size(GetSizeClass(p));
 383   }
 384
 385   uptr ClassID(uptr size) { return SizeClassMap::ClassID(size); }
 386
 387   void *GetMetaData(const void *p) {
 388     uptr class_id = GetSizeClass(p);
 389     uptr size = SizeClassMap::Size(class_id);
 390     uptr chunk_idx = GetChunkIdx(reinterpret_cast<uptr>(p), size);
 391     return reinterpret_cast<void*>(kSpaceBeg + (kRegionSize * (class_id + 1)) -
 392                                    (1 + chunk_idx) * kMetadataSize);
 393   }
 394
 395   uptr TotalMemoryUsed() {
 396     uptr res = 0;
 397     for (uptr i = 0; i < kNumClasses; i++)
 398       res += GetRegionInfo(i)->allocated_user;
 399     return res;
 400   }
 401
 402   // Test-only.
 403   void TestOnlyUnmap() {
 404     UnmapWithCallback(kSpaceBeg, kSpaceSize + AdditionalSize());
 405   }
 406
 407   void PrintStats() {
 408     uptr total_mapped = 0;
 409     uptr n_allocated = 0;
 410     uptr n_freed = 0;
 411     for (uptr class_id = 1; class_id < kNumClasses; class_id++) {
 412       RegionInfo *region = GetRegionInfo(class_id);
 413       total_mapped += region->mapped_user;
 414       n_allocated += region->n_allocated;
 415       n_freed += region->n_freed;
 416     }
 417     Printf("Stats: SizeClassAllocator64: %zdM mapped in %zd allocations; "
 418            "remains %zd\n",
 419            total_mapped >> 20, n_allocated, n_allocated - n_freed);
 420     for (uptr class_id = 1; class_id < kNumClasses; class_id++) {
 421       RegionInfo *region = GetRegionInfo(class_id);
 422       if (region->mapped_user == 0) continue;
 423       Printf("  %02zd (%zd): total: %zd K allocs: %zd remains: %zd\n",
 424              class_id,
 425              SizeClassMap::Size(class_id),
 426              region->mapped_user >> 10,
 427              region->n_allocated,
 428              region->n_allocated - region->n_freed);
 429     }
 430   }
 431
 432   // ForceLock() and ForceUnlock() are needed to implement Darwin malloc zone
 433   // introspection API.
 434   void ForceLock() {
 435     for (uptr i = 0; i < kNumClasses; i++) {
 436       GetRegionInfo(i)->mutex.Lock();
 437     }
 438   }
 439
 440   void ForceUnlock() {
 441     for (int i = (int)kNumClasses - 1; i >= 0; i--) {
 442       GetRegionInfo(i)->mutex.Unlock();
 443     }
 444   }
 445
 446   // Iterate over all existing chunks.
 447   // The allocator must be locked when calling this function.
 448   void ForEachChunk(ForEachChunkCallback callback, void *arg) {
 449     for (uptr class_id = 1; class_id < kNumClasses; class_id++) {
 450       RegionInfo *region = GetRegionInfo(class_id);
 451       uptr chunk_size = SizeClassMap::Size(class_id);
 452       uptr region_beg = kSpaceBeg + class_id * kRegionSize;
 453       for (uptr chunk = region_beg;
 454            chunk < region_beg + region->allocated_user;
 455            chunk += chunk_size) {
 456         // Too slow: CHECK_EQ((void *)chunk, GetBlockBegin((void *)chunk));
 457         callback(chunk, arg);
 458       }
 459     }
 460   }
 461
 462   static uptr AdditionalSize() {
 463     return RoundUpTo(sizeof(RegionInfo) * kNumClassesRounded,
 464                      GetPageSizeCached());
 465   }
 466
 467   typedef SizeClassMap SizeClassMapT;
 468   static const uptr kNumClasses = SizeClassMap::kNumClasses;
 469   static const uptr kNumClassesRounded = SizeClassMap::kNumClassesRounded;
 470
 471  private:
 472   static const uptr kRegionSize = kSpaceSize / kNumClassesRounded;
 473   static const uptr kSpaceEnd = kSpaceBeg + kSpaceSize;
 474   COMPILER_CHECK(kSpaceBeg % kSpaceSize == 0);
 475   // kRegionSize must be >= 2^32.
 476   COMPILER_CHECK((kRegionSize) >= (1ULL << (SANITIZER_WORDSIZE / 2)));
 477   // Populate the free list with at most this number of bytes at once
 478   // or with one element if its size is greater.
 479   static const uptr kPopulateSize = 1 << 14;
 480   // Call mmap for user memory with at least this size.
 481   static const uptr kUserMapSize = 1 << 16;
 482   // Call mmap for metadata memory with at least this size.
 483   static const uptr kMetaMapSize = 1 << 16;
 484
 485   struct RegionInfo {
 486     BlockingMutex mutex;
 487     LFStack<Batch> free_list;
 488     uptr allocated_user;  // Bytes allocated for user memory.
 489     uptr allocated_meta;  // Bytes allocated for metadata.
 490     uptr mapped_user;  // Bytes mapped for user memory.
 491     uptr mapped_meta;  // Bytes mapped for metadata.
 492     uptr n_allocated, n_freed;  // Just stats.
 493   };
 494   COMPILER_CHECK(sizeof(RegionInfo) >= kCacheLineSize);
 495
 496   RegionInfo *GetRegionInfo(uptr class_id) {
 497     CHECK_LT(class_id, kNumClasses);
 498     RegionInfo *regions = reinterpret_cast<RegionInfo*>(kSpaceBeg + kSpaceSize);
 499     return &regions[class_id];
 500   }
 501
 502   static uptr GetChunkIdx(uptr chunk, uptr size) {
 503     uptr offset = chunk % kRegionSize;
 504     // Here we divide by a non-constant. This is costly.
 505     // size always fits into 32-bits. If the offset fits too, use 32-bit div.
 506     if (offset >> (SANITIZER_WORDSIZE / 2))
 507       return offset / size;
 508     return (u32)offset / (u32)size;
 509   }
 510
 511   NOINLINE Batch* PopulateFreeList(AllocatorStats *stat, AllocatorCache *c,
 512                                    uptr class_id, RegionInfo *region) {
 513     BlockingMutexLock l(&region->mutex);
 514     Batch *b = region->free_list.Pop();
 515     if (b)
 516       return b;
 517     uptr size = SizeClassMap::Size(class_id);
 518     uptr count = size < kPopulateSize ? SizeClassMap::MaxCached(class_id) : 1;
 519     uptr beg_idx = region->allocated_user;
 520     uptr end_idx = beg_idx + count * size;
 521     uptr region_beg = kSpaceBeg + kRegionSize * class_id;
 522     if (end_idx + size > region->mapped_user) {
 523       // Do the mmap for the user memory.
 524       uptr map_size = kUserMapSize;
 525       while (end_idx + size > region->mapped_user + map_size)
 526         map_size += kUserMapSize;
 527       CHECK_GE(region->mapped_user + map_size, end_idx);
 528       MapWithCallback(region_beg + region->mapped_user, map_size);
 529       stat->Add(AllocatorStatMapped, map_size);
 530       region->mapped_user += map_size;
 531     }
 532     uptr total_count = (region->mapped_user - beg_idx - size)
 533         / size / count * count;
 534     region->allocated_meta += total_count * kMetadataSize;
 535     if (region->allocated_meta > region->mapped_meta) {
 536       uptr map_size = kMetaMapSize;
 537       while (region->allocated_meta > region->mapped_meta + map_size)
 538         map_size += kMetaMapSize;
 539       // Do the mmap for the metadata.
 540       CHECK_GE(region->mapped_meta + map_size, region->allocated_meta);
 541       MapWithCallback(region_beg + kRegionSize -
 542                       region->mapped_meta - map_size, map_size);
 543       region->mapped_meta += map_size;
 544     }
 545     CHECK_LE(region->allocated_meta, region->mapped_meta);
 546     if (region->mapped_user + region->mapped_meta > kRegionSize) {
 547       Printf("%s: Out of memory. Dying. ", SanitizerToolName);
 548       Printf("The process has exhausted %zuMB for size class %zu.\n",
 549           kRegionSize / 1024 / 1024, size);
 550       Die();
 551     }
 552     for (;;) {
 553       if (SizeClassMap::SizeClassRequiresSeparateTransferBatch(class_id))
 554         b = (Batch*)c->Allocate(this, SizeClassMap::ClassID(sizeof(Batch)));
 555       else
 556         b = (Batch*)(region_beg + beg_idx);
 557       b->count = count;
 558       for (uptr i = 0; i < count; i++)
 559         b->batch[i] = (void*)(region_beg + beg_idx + i * size);
 560       region->allocated_user += count * size;
 561       CHECK_LE(region->allocated_user, region->mapped_user);
 562       beg_idx += count * size;
 563       if (beg_idx + count * size + size > region->mapped_user)
 564         break;
 565       CHECK_GT(b->count, 0);
 566       region->free_list.Push(b);
 567     }
 568     return b;
 569   }
 570 };
 571
 572 // Maps integers in rage [0, kSize) to u8 values.
 573 template<u64 kSize>
 574 class FlatByteMap {
 575  public:
 576   void TestOnlyInit() {
 577     internal_memset(map_, 0, sizeof(map_));
 578   }
 579
 580   void set(uptr idx, u8 val) {
 581     CHECK_LT(idx, kSize);
 582     CHECK_EQ(0U, map_[idx]);
 583     map_[idx] = val;
 584   }
 585   u8 operator[] (uptr idx) {
 586     CHECK_LT(idx, kSize);
 587     // FIXME: CHECK may be too expensive here.
 588     return map_[idx];
 589   }
 590  private:
 591   u8 map_[kSize];
 592 };
 593
 594 // TwoLevelByteMap maps integers in range [0, kSize1*kSize2) to u8 values.
 595 // It is implemented as a two-dimensional array: array of kSize1 pointers
 596 // to kSize2-byte arrays. The secondary arrays are mmaped on demand.
 597 // Each value is initially zero and can be set to something else only once.
 598 // Setting and getting values from multiple threads is safe w/o extra locking.
 599 template <u64 kSize1, u64 kSize2, class MapUnmapCallback = NoOpMapUnmapCallback>
 600 class TwoLevelByteMap {
 601  public:
 602   void TestOnlyInit() {
 603     internal_memset(map1_, 0, sizeof(map1_));
 604     mu_.Init();
 605   }
 606   void TestOnlyUnmap() {
 607     for (uptr i = 0; i < kSize1; i++) {
 608       u8 *p = Get(i);
 609       if (!p) continue;
 610       MapUnmapCallback().OnUnmap(reinterpret_cast<uptr>(p), kSize2);
 611       UnmapOrDie(p, kSize2);
 612     }
 613   }
 614
 615   uptr size() const { return kSize1 * kSize2; }
 616   uptr size1() const { return kSize1; }
 617   uptr size2() const { return kSize2; }
 618
 619   void set(uptr idx, u8 val) {
 620     CHECK_LT(idx, kSize1 * kSize2);
 621     u8 *map2 = GetOrCreate(idx / kSize2);
 622     CHECK_EQ(0U, map2[idx % kSize2]);
 623     map2[idx % kSize2] = val;
 624   }
 625
 626   u8 operator[] (uptr idx) const {
 627     CHECK_LT(idx, kSize1 * kSize2);
 628     u8 *map2 = Get(idx / kSize2);
 629     if (!map2) return 0;
 630     return map2[idx % kSize2];
 631   }
 632
 633  private:
 634   u8 *Get(uptr idx) const {
 635     CHECK_LT(idx, kSize1);
 636     return reinterpret_cast<u8 *>(
 637         atomic_load(&map1_[idx], memory_order_acquire));
 638   }
 639
 640   u8 *GetOrCreate(uptr idx) {
 641     u8 *res = Get(idx);
 642     if (!res) {
 643       SpinMutexLock l(&mu_);
 644       if (!(res = Get(idx))) {
 645         res = (u8*)MmapOrDie(kSize2, "TwoLevelByteMap");
 646         MapUnmapCallback().OnMap(reinterpret_cast<uptr>(res), kSize2);
 647         atomic_store(&map1_[idx], reinterpret_cast<uptr>(res),
 648                      memory_order_release);
 649       }
 650     }
 651     return res;
 652   }
 653
 654   atomic_uintptr_t map1_[kSize1];
 655   StaticSpinMutex mu_;
 656 };
 657
 658 // SizeClassAllocator32 -- allocator for 32-bit address space.
 659 // This allocator can theoretically be used on 64-bit arch, but there it is less
 660 // efficient than SizeClassAllocator64.
 661 //
 662 // [kSpaceBeg, kSpaceBeg + kSpaceSize) is the range of addresses which can
 663 // be returned by MmapOrDie().
 664 //
 665 // Region:
 666 //   a result of a single call to MmapAlignedOrDie(kRegionSize, kRegionSize).
 667 // Since the regions are aligned by kRegionSize, there are exactly
 668 // kNumPossibleRegions possible regions in the address space and so we keep
 669 // a ByteMap possible_regions to store the size classes of each Region.
 670 // 0 size class means the region is not used by the allocator.
 671 //
 672 // One Region is used to allocate chunks of a single size class.
 673 // A Region looks like this:
 674 // UserChunk1 .. UserChunkN <gap> MetaChunkN .. MetaChunk1
 675 //
 676 // In order to avoid false sharing the objects of this class should be
 677 // chache-line aligned.
 678 template <const uptr kSpaceBeg, const u64 kSpaceSize,
 679           const uptr kMetadataSize, class SizeClassMap,
 680           const uptr kRegionSizeLog,
 681           class ByteMap,
 682           class MapUnmapCallback = NoOpMapUnmapCallback>
 683 class SizeClassAllocator32 {
 684  public:
 685   typedef typename SizeClassMap::TransferBatch Batch;
 686   typedef SizeClassAllocator32<kSpaceBeg, kSpaceSize, kMetadataSize,
 687       SizeClassMap, kRegionSizeLog, ByteMap, MapUnmapCallback> ThisT;
 688   typedef SizeClassAllocatorLocalCache<ThisT> AllocatorCache;
 689
 690   void Init() {
 691     possible_regions.TestOnlyInit();
 692     internal_memset(size_class_info_array, 0, sizeof(size_class_info_array));
 693   }
 694
 695   void *MapWithCallback(uptr size) {
 696     size = RoundUpTo(size, GetPageSizeCached());
 697     void *res = MmapOrDie(size, "SizeClassAllocator32");
 698     MapUnmapCallback().OnMap((uptr)res, size);
 699     return res;
 700   }
 701
 702   void UnmapWithCallback(uptr beg, uptr size) {
 703     MapUnmapCallback().OnUnmap(beg, size);
 704     UnmapOrDie(reinterpret_cast<void *>(beg), size);
 705   }
 706
 707   static bool CanAllocate(uptr size, uptr alignment) {
 708     return size <= SizeClassMap::kMaxSize &&
 709       alignment <= SizeClassMap::kMaxSize;
 710   }
 711
 712   void *GetMetaData(const void *p) {
 713     CHECK(PointerIsMine(p));
 714     uptr mem = reinterpret_cast<uptr>(p);
 715     uptr beg = ComputeRegionBeg(mem);
 716     uptr size = SizeClassMap::Size(GetSizeClass(p));
 717     u32 offset = mem - beg;
 718     uptr n = offset / (u32)size;  // 32-bit division
 719     uptr meta = (beg + kRegionSize) - (n + 1) * kMetadataSize;
 720     return reinterpret_cast<void*>(meta);
 721   }
 722
 723   NOINLINE Batch* AllocateBatch(AllocatorStats *stat, AllocatorCache *c,
 724                                 uptr class_id) {
 725     CHECK_LT(class_id, kNumClasses);
 726     SizeClassInfo *sci = GetSizeClassInfo(class_id);
 727     SpinMutexLock l(&sci->mutex);
 728     if (sci->free_list.empty())
 729       PopulateFreeList(stat, c, sci, class_id);
 730     CHECK(!sci->free_list.empty());
 731     Batch *b = sci->free_list.front();
 732     sci->free_list.pop_front();
 733     return b;
 734   }
 735
 736   NOINLINE void DeallocateBatch(AllocatorStats *stat, uptr class_id, Batch *b) {
 737     CHECK_LT(class_id, kNumClasses);
 738     SizeClassInfo *sci = GetSizeClassInfo(class_id);
 739     SpinMutexLock l(&sci->mutex);
 740     CHECK_GT(b->count, 0);
 741     sci->free_list.push_front(b);
 742   }
 743
 744   bool PointerIsMine(const void *p) {
 745     return GetSizeClass(p) != 0;
 746   }
 747
 748   uptr GetSizeClass(const void *p) {
 749     return possible_regions[ComputeRegionId(reinterpret_cast<uptr>(p))];
 750   }
 751
 752   void *GetBlockBegin(const void *p) {
 753     CHECK(PointerIsMine(p));
 754     uptr mem = reinterpret_cast<uptr>(p);
 755     uptr beg = ComputeRegionBeg(mem);
 756     uptr size = SizeClassMap::Size(GetSizeClass(p));
 757     u32 offset = mem - beg;
 758     u32 n = offset / (u32)size;  // 32-bit division
 759     uptr res = beg + (n * (u32)size);
 760     return reinterpret_cast<void*>(res);
 761   }
 762
 763   uptr GetActuallyAllocatedSize(void *p) {
 764     CHECK(PointerIsMine(p));
 765     return SizeClassMap::Size(GetSizeClass(p));
 766   }
 767
 768   uptr ClassID(uptr size) { return SizeClassMap::ClassID(size); }
 769
 770   uptr TotalMemoryUsed() {
 771     // No need to lock here.
 772     uptr res = 0;
 773     for (uptr i = 0; i < kNumPossibleRegions; i++)
 774       if (possible_regions[i])
 775         res += kRegionSize;
 776     return res;
 777   }
 778
 779   void TestOnlyUnmap() {
 780     for (uptr i = 0; i < kNumPossibleRegions; i++)
 781       if (possible_regions[i])
 782         UnmapWithCallback((i * kRegionSize), kRegionSize);
 783   }
 784
 785   // ForceLock() and ForceUnlock() are needed to implement Darwin malloc zone
 786   // introspection API.
 787   void ForceLock() {
 788     for (uptr i = 0; i < kNumClasses; i++) {
 789       GetSizeClassInfo(i)->mutex.Lock();
 790     }
 791   }
 792
 793   void ForceUnlock() {
 794     for (int i = kNumClasses - 1; i >= 0; i--) {
 795       GetSizeClassInfo(i)->mutex.Unlock();
 796     }
 797   }
 798
 799   // Iterate over all existing chunks.
 800   // The allocator must be locked when calling this function.
 801   void ForEachChunk(ForEachChunkCallback callback, void *arg) {
 802     for (uptr region = 0; region < kNumPossibleRegions; region++)
 803       if (possible_regions[region]) {
 804         uptr chunk_size = SizeClassMap::Size(possible_regions[region]);
 805         uptr max_chunks_in_region = kRegionSize / (chunk_size + kMetadataSize);
 806         uptr region_beg = region * kRegionSize;
 807         for (uptr chunk = region_beg;
 808              chunk < region_beg + max_chunks_in_region * chunk_size;
 809              chunk += chunk_size) {
 810           // Too slow: CHECK_EQ((void *)chunk, GetBlockBegin((void *)chunk));
 811           callback(chunk, arg);
 812         }
 813       }
 814   }
 815
 816   void PrintStats() {
 817   }
 818
 819   typedef SizeClassMap SizeClassMapT;
 820   static const uptr kNumClasses = SizeClassMap::kNumClasses;
 821
 822  private:
 823   static const uptr kRegionSize = 1 << kRegionSizeLog;
 824   static const uptr kNumPossibleRegions = kSpaceSize / kRegionSize;
 825
 826   struct SizeClassInfo {
 827     SpinMutex mutex;
 828     IntrusiveList<Batch> free_list;
 829     char padding[kCacheLineSize - sizeof(uptr) - sizeof(IntrusiveList<Batch>)];
 830   };
 831   COMPILER_CHECK(sizeof(SizeClassInfo) == kCacheLineSize);
 832
 833   uptr ComputeRegionId(uptr mem) {
 834     uptr res = mem >> kRegionSizeLog;
 835     CHECK_LT(res, kNumPossibleRegions);
 836     return res;
 837   }
 838
 839   uptr ComputeRegionBeg(uptr mem) {
 840     return mem & ~(kRegionSize - 1);
 841   }
 842
 843   uptr AllocateRegion(AllocatorStats *stat, uptr class_id) {
 844     CHECK_LT(class_id, kNumClasses);
 845     uptr res = reinterpret_cast<uptr>(MmapAlignedOrDie(kRegionSize, kRegionSize,
 846                                       "SizeClassAllocator32"));
 847     MapUnmapCallback().OnMap(res, kRegionSize);
 848     stat->Add(AllocatorStatMapped, kRegionSize);
 849     CHECK_EQ(0U, (res & (kRegionSize - 1)));
 850     possible_regions.set(ComputeRegionId(res), static_cast<u8>(class_id));
 851     return res;
 852   }
 853
 854   SizeClassInfo *GetSizeClassInfo(uptr class_id) {
 855     CHECK_LT(class_id, kNumClasses);
 856     return &size_class_info_array[class_id];
 857   }
 858
 859   void PopulateFreeList(AllocatorStats *stat, AllocatorCache *c,
 860                         SizeClassInfo *sci, uptr class_id) {
 861     uptr size = SizeClassMap::Size(class_id);
 862     uptr reg = AllocateRegion(stat, class_id);
 863     uptr n_chunks = kRegionSize / (size + kMetadataSize);
 864     uptr max_count = SizeClassMap::MaxCached(class_id);
 865     Batch *b = 0;
 866     for (uptr i = reg; i < reg + n_chunks * size; i += size) {
 867       if (b == 0) {
 868         if (SizeClassMap::SizeClassRequiresSeparateTransferBatch(class_id))
 869           b = (Batch*)c->Allocate(this, SizeClassMap::ClassID(sizeof(Batch)));
 870         else
 871           b = (Batch*)i;
 872         b->count = 0;
 873       }
 874       b->batch[b->count++] = (void*)i;
 875       if (b->count == max_count) {
 876         CHECK_GT(b->count, 0);
 877         sci->free_list.push_back(b);
 878         b = 0;
 879       }
 880     }
 881     if (b) {
 882       CHECK_GT(b->count, 0);
 883       sci->free_list.push_back(b);
 884     }
 885   }
 886
 887   ByteMap possible_regions;
 888   SizeClassInfo size_class_info_array[kNumClasses];
 889 };
 890
 891 // Objects of this type should be used as local caches for SizeClassAllocator64
 892 // or SizeClassAllocator32. Since the typical use of this class is to have one
 893 // object per thread in TLS, is has to be POD.
 894 template<class SizeClassAllocator>
 895 struct SizeClassAllocatorLocalCache {
 896   typedef SizeClassAllocator Allocator;
 897   static const uptr kNumClasses = SizeClassAllocator::kNumClasses;
 898
 899   void Init(AllocatorGlobalStats *s) {
 900     stats_.Init();
 901     if (s)
 902       s->Register(&stats_);
 903   }
 904
 905   void Destroy(SizeClassAllocator *allocator, AllocatorGlobalStats *s) {
 906     Drain(allocator);
 907     if (s)
 908       s->Unregister(&stats_);
 909   }
 910
 911   void *Allocate(SizeClassAllocator *allocator, uptr class_id) {
 912     CHECK_NE(class_id, 0UL);
 913     CHECK_LT(class_id, kNumClasses);
 914     stats_.Add(AllocatorStatAllocated, SizeClassMap::Size(class_id));
 915     PerClass *c = &per_class_[class_id];
 916     if (UNLIKELY(c->count == 0))
 917       Refill(allocator, class_id);
 918     void *res = c->batch[--c->count];
 919     PREFETCH(c->batch[c->count - 1]);
 920     return res;
 921   }
 922
 923   void Deallocate(SizeClassAllocator *allocator, uptr class_id, void *p) {
 924     CHECK_NE(class_id, 0UL);
 925     CHECK_LT(class_id, kNumClasses);
 926     // If the first allocator call on a new thread is a deallocation, then
 927     // max_count will be zero, leading to check failure.
 928     InitCache();
 929     stats_.Sub(AllocatorStatAllocated, SizeClassMap::Size(class_id));
 930     PerClass *c = &per_class_[class_id];
 931     CHECK_NE(c->max_count, 0UL);
 932     if (UNLIKELY(c->count == c->max_count))
 933       Drain(allocator, class_id);
 934     c->batch[c->count++] = p;
 935   }
 936
 937   void Drain(SizeClassAllocator *allocator) {
 938     for (uptr class_id = 0; class_id < kNumClasses; class_id++) {
 939       PerClass *c = &per_class_[class_id];
 940       while (c->count > 0)
 941         Drain(allocator, class_id);
 942     }
 943   }
 944
 945   // private:
 946   typedef typename SizeClassAllocator::SizeClassMapT SizeClassMap;
 947   typedef typename SizeClassMap::TransferBatch Batch;
 948   struct PerClass {
 949     uptr count;
 950     uptr max_count;
 951     void *batch[2 * SizeClassMap::kMaxNumCached];
 952   };
 953   PerClass per_class_[kNumClasses];
 954   AllocatorStats stats_;
 955
 956   void InitCache() {
 957     if (per_class_[1].max_count)
 958       return;
 959     for (uptr i = 0; i < kNumClasses; i++) {
 960       PerClass *c = &per_class_[i];
 961       c->max_count = 2 * SizeClassMap::MaxCached(i);
 962     }
 963   }
 964
 965   NOINLINE void Refill(SizeClassAllocator *allocator, uptr class_id) {
 966     InitCache();
 967     PerClass *c = &per_class_[class_id];
 968     Batch *b = allocator->AllocateBatch(&stats_, this, class_id);
 969     CHECK_GT(b->count, 0);
 970     for (uptr i = 0; i < b->count; i++)
 971       c->batch[i] = b->batch[i];
 972     c->count = b->count;
 973     if (SizeClassMap::SizeClassRequiresSeparateTransferBatch(class_id))
 974       Deallocate(allocator, SizeClassMap::ClassID(sizeof(Batch)), b);
 975   }
 976
 977   NOINLINE void Drain(SizeClassAllocator *allocator, uptr class_id) {
 978     InitCache();
 979     PerClass *c = &per_class_[class_id];
 980     Batch *b;
 981     if (SizeClassMap::SizeClassRequiresSeparateTransferBatch(class_id))
 982       b = (Batch*)Allocate(allocator, SizeClassMap::ClassID(sizeof(Batch)));
 983     else
 984       b = (Batch*)c->batch[0];
 985     uptr cnt = Min(c->max_count / 2, c->count);
 986     for (uptr i = 0; i < cnt; i++) {
 987       b->batch[i] = c->batch[i];
 988       c->batch[i] = c->batch[i + c->max_count / 2];
 989     }
 990     b->count = cnt;
 991     c->count -= cnt;
 992     CHECK_GT(b->count, 0);
 993     allocator->DeallocateBatch(&stats_, class_id, b);
 994   }
 995 };
 996
 997 // This class can (de)allocate only large chunks of memory using mmap/unmap.
 998 // The main purpose of this allocator is to cover large and rare allocation
 999 // sizes not covered by more efficient allocators (e.g. SizeClassAllocator64).
1000 template <class MapUnmapCallback = NoOpMapUnmapCallback>
1001 class LargeMmapAllocator {
1002  public:
1003   void Init() {
1004     internal_memset(this, 0, sizeof(*this));
1005     page_size_ = GetPageSizeCached();
1006   }
1007
1008   void *Allocate(AllocatorStats *stat, uptr size, uptr alignment) {
1009     CHECK(IsPowerOfTwo(alignment));
1010     uptr map_size = RoundUpMapSize(size);
1011     if (alignment > page_size_)
1012       map_size += alignment;
1013     if (map_size < size) return AllocatorReturnNull();  // Overflow.
1014     uptr map_beg = reinterpret_cast<uptr>(
1015         MmapOrDie(map_size, "LargeMmapAllocator"));
1016     CHECK(IsAligned(map_beg, page_size_));
1017     MapUnmapCallback().OnMap(map_beg, map_size);
1018     uptr map_end = map_beg + map_size;
1019     uptr res = map_beg + page_size_;
1020     if (res & (alignment - 1))  // Align.
1021       res += alignment - (res & (alignment - 1));
1022     CHECK(IsAligned(res, alignment));
1023     CHECK(IsAligned(res, page_size_));
1024     CHECK_GE(res + size, map_beg);
1025     CHECK_LE(res + size, map_end);
1026     Header *h = GetHeader(res);
1027     h->size = size;
1028     h->map_beg = map_beg;
1029     h->map_size = map_size;
1030     uptr size_log = MostSignificantSetBitIndex(map_size);
1031     CHECK_LT(size_log, ARRAY_SIZE(stats.by_size_log));
1032     {
1033       SpinMutexLock l(&mutex_);
1034       uptr idx = n_chunks_++;
1035       chunks_sorted_ = false;
1036       CHECK_LT(idx, kMaxNumChunks);
1037       h->chunk_idx = idx;
1038       chunks_[idx] = h;
1039       stats.n_allocs++;
1040       stats.currently_allocated += map_size;
1041       stats.max_allocated = Max(stats.max_allocated, stats.currently_allocated);
1042       stats.by_size_log[size_log]++;
1043       stat->Add(AllocatorStatAllocated, map_size);
1044       stat->Add(AllocatorStatMapped, map_size);
1045     }
1046     return reinterpret_cast<void*>(res);
1047   }
1048
1049   void Deallocate(AllocatorStats *stat, void *p) {
1050     Header *h = GetHeader(p);
1051     {
1052       SpinMutexLock l(&mutex_);
1053       uptr idx = h->chunk_idx;
1054       CHECK_EQ(chunks_[idx], h);
1055       CHECK_LT(idx, n_chunks_);
1056       chunks_[idx] = chunks_[n_chunks_ - 1];
1057       chunks_[idx]->chunk_idx = idx;
1058       n_chunks_--;
1059       chunks_sorted_ = false;
1060       stats.n_frees++;
1061       stats.currently_allocated -= h->map_size;
1062       stat->Sub(AllocatorStatAllocated, h->map_size);
1063       stat->Sub(AllocatorStatMapped, h->map_size);
1064     }
1065     MapUnmapCallback().OnUnmap(h->map_beg, h->map_size);
1066     UnmapOrDie(reinterpret_cast<void*>(h->map_beg), h->map_size);
1067   }
1068
1069   uptr TotalMemoryUsed() {
1070     SpinMutexLock l(&mutex_);
1071     uptr res = 0;
1072     for (uptr i = 0; i < n_chunks_; i++) {
1073       Header *h = chunks_[i];
1074       CHECK_EQ(h->chunk_idx, i);
1075       res += RoundUpMapSize(h->size);
1076     }
1077     return res;
1078   }
1079
1080   bool PointerIsMine(const void *p) {
1081     return GetBlockBegin(p) != 0;
1082   }
1083
1084   uptr GetActuallyAllocatedSize(void *p) {
1085     return RoundUpTo(GetHeader(p)->size, page_size_);
1086   }
1087
1088   // At least page_size_/2 metadata bytes is available.
1089   void *GetMetaData(const void *p) {
1090     // Too slow: CHECK_EQ(p, GetBlockBegin(p));
1091     if (!IsAligned(reinterpret_cast<uptr>(p), page_size_)) {
1092       Printf("%s: bad pointer %p\n", SanitizerToolName, p);
1093       CHECK(IsAligned(reinterpret_cast<uptr>(p), page_size_));
1094     }
1095     return GetHeader(p) + 1;
1096   }
1097
1098   void *GetBlockBegin(const void *ptr) {
1099     uptr p = reinterpret_cast<uptr>(ptr);
1100     SpinMutexLock l(&mutex_);
1101     uptr nearest_chunk = 0;
1102     // Cache-friendly linear search.
1103     for (uptr i = 0; i < n_chunks_; i++) {
1104       uptr ch = reinterpret_cast<uptr>(chunks_[i]);
1105       if (p < ch) continue;  // p is at left to this chunk, skip it.
1106       if (p - ch < p - nearest_chunk)
1107         nearest_chunk = ch;
1108     }
1109     if (!nearest_chunk)
1110       return 0;
1111     Header *h = reinterpret_cast<Header *>(nearest_chunk);
1112     CHECK_GE(nearest_chunk, h->map_beg);
1113     CHECK_LT(nearest_chunk, h->map_beg + h->map_size);
1114     CHECK_LE(nearest_chunk, p);
1115     if (h->map_beg + h->map_size <= p)
1116       return 0;
1117     return GetUser(h);
1118   }
1119
1120   // This function does the same as GetBlockBegin, but is much faster.
1121   // Must be called with the allocator locked.
1122   void *GetBlockBeginFastLocked(void *ptr) {
1123     mutex_.CheckLocked();
1124     uptr p = reinterpret_cast<uptr>(ptr);
1125     uptr n = n_chunks_;
1126     if (!n) return 0;
1127     if (!chunks_sorted_) {
1128       // Do one-time sort. chunks_sorted_ is reset in Allocate/Deallocate.
1129       SortArray(reinterpret_cast<uptr*>(chunks_), n);
1130       for (uptr i = 0; i < n; i++)
1131         chunks_[i]->chunk_idx = i;
1132       chunks_sorted_ = true;
1133       min_mmap_ = reinterpret_cast<uptr>(chunks_[0]);
1134       max_mmap_ = reinterpret_cast<uptr>(chunks_[n - 1]) +
1135           chunks_[n - 1]->map_size;
1136     }
1137     if (p < min_mmap_ || p >= max_mmap_)
1138       return 0;
1139     uptr beg = 0, end = n - 1;
1140     // This loop is a log(n) lower_bound. It does not check for the exact match
1141     // to avoid expensive cache-thrashing loads.
1142     while (end - beg >= 2) {
1143       uptr mid = (beg + end) / 2;  // Invariant: mid >= beg + 1
1144       if (p < reinterpret_cast<uptr>(chunks_[mid]))
1145         end = mid - 1;  // We are not interested in chunks_[mid].
1146       else
1147         beg = mid;  // chunks_[mid] may still be what we want.
1148     }
1149
1150     if (beg < end) {
1151       CHECK_EQ(beg + 1, end);
1152       // There are 2 chunks left, choose one.
1153       if (p >= reinterpret_cast<uptr>(chunks_[end]))
1154         beg = end;
1155     }
1156
1157     Header *h = chunks_[beg];
1158     if (h->map_beg + h->map_size <= p || p < h->map_beg)
1159       return 0;
1160     return GetUser(h);
1161   }
1162
1163   void PrintStats() {
1164     Printf("Stats: LargeMmapAllocator: allocated %zd times, "
1165            "remains %zd (%zd K) max %zd M; by size logs: ",
1166            stats.n_allocs, stats.n_allocs - stats.n_frees,
1167            stats.currently_allocated >> 10, stats.max_allocated >> 20);
1168     for (uptr i = 0; i < ARRAY_SIZE(stats.by_size_log); i++) {
1169       uptr c = stats.by_size_log[i];
1170       if (!c) continue;
1171       Printf("%zd:%zd; ", i, c);
1172     }
1173     Printf("\n");
1174   }
1175
1176   // ForceLock() and ForceUnlock() are needed to implement Darwin malloc zone
1177   // introspection API.
1178   void ForceLock() {
1179     mutex_.Lock();
1180   }
1181
1182   void ForceUnlock() {
1183     mutex_.Unlock();
1184   }
1185
1186   // Iterate over all existing chunks.
1187   // The allocator must be locked when calling this function.
1188   void ForEachChunk(ForEachChunkCallback callback, void *arg) {
1189     for (uptr i = 0; i < n_chunks_; i++)
1190       callback(reinterpret_cast<uptr>(GetUser(chunks_[i])), arg);
1191   }
1192
1193  private:
1194   static const int kMaxNumChunks = 1 << FIRST_32_SECOND_64(15, 18);
1195   struct Header {
1196     uptr map_beg;
1197     uptr map_size;
1198     uptr size;
1199     uptr chunk_idx;
1200   };
1201
1202   Header *GetHeader(uptr p) {
1203     CHECK(IsAligned(p, page_size_));
1204     return reinterpret_cast<Header*>(p - page_size_);
1205   }
1206   Header *GetHeader(const void *p) {
1207     return GetHeader(reinterpret_cast<uptr>(p));
1208   }
1209
1210   void *GetUser(Header *h) {
1211     CHECK(IsAligned((uptr)h, page_size_));
1212     return reinterpret_cast<void*>(reinterpret_cast<uptr>(h) + page_size_);
1213   }
1214
1215   uptr RoundUpMapSize(uptr size) {
1216     return RoundUpTo(size, page_size_) + page_size_;
1217   }
1218
1219   uptr page_size_;
1220   Header *chunks_[kMaxNumChunks];
1221   uptr n_chunks_;
1222   uptr min_mmap_, max_mmap_;
1223   bool chunks_sorted_;
1224   struct Stats {
1225     uptr n_allocs, n_frees, currently_allocated, max_allocated, by_size_log[64];
1226   } stats;
1227   SpinMutex mutex_;
1228 };
1229
1230 // This class implements a complete memory allocator by using two
1231 // internal allocators:
1232 // PrimaryAllocator is efficient, but may not allocate some sizes (alignments).
1233 //  When allocating 2^x bytes it should return 2^x aligned chunk.
1234 // PrimaryAllocator is used via a local AllocatorCache.
1235 // SecondaryAllocator can allocate anything, but is not efficient.
1236 template <class PrimaryAllocator, class AllocatorCache,
1237           class SecondaryAllocator>  // NOLINT
1238 class CombinedAllocator {
1239  public:
1240   void Init() {
1241     primary_.Init();
1242     secondary_.Init();
1243     stats_.Init();
1244   }
1245
1246   void *Allocate(AllocatorCache *cache, uptr size, uptr alignment,
1247                  bool cleared = false) {
1248     // Returning 0 on malloc(0) may break a lot of code.
1249     if (size == 0)
1250       size = 1;
1251     if (size + alignment < size)
1252       return AllocatorReturnNull();
1253     if (alignment > 8)
1254       size = RoundUpTo(size, alignment);
1255     void *res;
1256     bool from_primary = primary_.CanAllocate(size, alignment);
1257     if (from_primary)
1258       res = cache->Allocate(&primary_, primary_.ClassID(size));
1259     else
1260       res = secondary_.Allocate(&stats_, size, alignment);
1261     if (alignment > 8)
1262       CHECK_EQ(reinterpret_cast<uptr>(res) & (alignment - 1), 0);
1263     if (cleared && res && from_primary)
1264       internal_bzero_aligned16(res, RoundUpTo(size, 16));
1265     return res;
1266   }
1267
1268   void Deallocate(AllocatorCache *cache, void *p) {
1269     if (!p) return;
1270     if (primary_.PointerIsMine(p))
1271       cache->Deallocate(&primary_, primary_.GetSizeClass(p), p);
1272     else
1273       secondary_.Deallocate(&stats_, p);
1274   }
1275
1276   void *Reallocate(AllocatorCache *cache, void *p, uptr new_size,
1277                    uptr alignment) {
1278     if (!p)
1279       return Allocate(cache, new_size, alignment);
1280     if (!new_size) {
1281       Deallocate(cache, p);
1282       return 0;
1283     }
1284     CHECK(PointerIsMine(p));
1285     uptr old_size = GetActuallyAllocatedSize(p);
1286     uptr memcpy_size = Min(new_size, old_size);
1287     void *new_p = Allocate(cache, new_size, alignment);
1288     if (new_p)
1289       internal_memcpy(new_p, p, memcpy_size);
1290     Deallocate(cache, p);
1291     return new_p;
1292   }
1293
1294   bool PointerIsMine(void *p) {
1295     if (primary_.PointerIsMine(p))
1296       return true;
1297     return secondary_.PointerIsMine(p);
1298   }
1299
1300   bool FromPrimary(void *p) {
1301     return primary_.PointerIsMine(p);
1302   }
1303
1304   void *GetMetaData(const void *p) {
1305     if (primary_.PointerIsMine(p))
1306       return primary_.GetMetaData(p);
1307     return secondary_.GetMetaData(p);
1308   }
1309
1310   void *GetBlockBegin(const void *p) {
1311     if (primary_.PointerIsMine(p))
1312       return primary_.GetBlockBegin(p);
1313     return secondary_.GetBlockBegin(p);
1314   }
1315
1316   // This function does the same as GetBlockBegin, but is much faster.
1317   // Must be called with the allocator locked.
1318   void *GetBlockBeginFastLocked(void *p) {
1319     if (primary_.PointerIsMine(p))
1320       return primary_.GetBlockBegin(p);
1321     return secondary_.GetBlockBeginFastLocked(p);
1322   }
1323
1324   uptr GetActuallyAllocatedSize(void *p) {
1325     if (primary_.PointerIsMine(p))
1326       return primary_.GetActuallyAllocatedSize(p);
1327     return secondary_.GetActuallyAllocatedSize(p);
1328   }
1329
1330   uptr TotalMemoryUsed() {
1331     return primary_.TotalMemoryUsed() + secondary_.TotalMemoryUsed();
1332   }
1333
1334   void TestOnlyUnmap() { primary_.TestOnlyUnmap(); }
1335
1336   void InitCache(AllocatorCache *cache) {
1337     cache->Init(&stats_);
1338   }
1339
1340   void DestroyCache(AllocatorCache *cache) {
1341     cache->Destroy(&primary_, &stats_);
1342   }
1343
1344   void SwallowCache(AllocatorCache *cache) {
1345     cache->Drain(&primary_);
1346   }
1347
1348   void GetStats(AllocatorStatCounters s) const {
1349     stats_.Get(s);
1350   }
1351
1352   void PrintStats() {
1353     primary_.PrintStats();
1354     secondary_.PrintStats();
1355   }
1356
1357   // ForceLock() and ForceUnlock() are needed to implement Darwin malloc zone
1358   // introspection API.
1359   void ForceLock() {
1360     primary_.ForceLock();
1361     secondary_.ForceLock();
1362   }
1363
1364   void ForceUnlock() {
1365     secondary_.ForceUnlock();
1366     primary_.ForceUnlock();
1367   }
1368
1369   // Iterate over all existing chunks.
1370   // The allocator must be locked when calling this function.
1371   void ForEachChunk(ForEachChunkCallback callback, void *arg) {
1372     primary_.ForEachChunk(callback, arg);
1373     secondary_.ForEachChunk(callback, arg);
1374   }
1375
1376  private:
1377   PrimaryAllocator primary_;
1378   SecondaryAllocator secondary_;
1379   AllocatorGlobalStats stats_;
1380 };
1381
1382 // Returns true if calloc(size, n) should return 0 due to overflow in size*n.
1383 bool CallocShouldReturnNullDueToOverflow(uptr size, uptr n);
1384
1385 }  // namespace __sanitizer
1386
1387 #endif  // SANITIZER_ALLOCATOR_H