hphp/runtime/base/memory-manager.cpp

   1 /*
   2    +----------------------------------------------------------------------+
   3    | HipHop for PHP                                                       |
   4    +----------------------------------------------------------------------+
   5    | Copyright (c) 2010-2014 Facebook, Inc. (http://www.facebook.com)     |
   6    +----------------------------------------------------------------------+
   7    | This source file is subject to version 3.01 of the PHP license,      |
   8    | that is bundled with this package in the file LICENSE, and is        |
   9    | available through the world-wide-web at the following url:           |
  10    | http://www.php.net/license/3_01.txt                                  |
  11    | If you did not receive a copy of the PHP license and are unable to   |
  12    | obtain it through the world-wide-web, please send a note to          |
  13    | license@php.net so we can mail you a copy immediately.               |
  14    +----------------------------------------------------------------------+
  15 */
  16 #include "hphp/runtime/base/memory-manager.h"
  17
  18 #include <algorithm>
  19 #include <cstdint>
  20
  21 #include "hphp/runtime/base/sweepable.h"
  22 #include "hphp/runtime/base/memory-profile.h"
  23 #include "hphp/runtime/base/builtin-functions.h"
  24 #include "hphp/runtime/base/runtime-option.h"
  25 #include "hphp/runtime/server/http-server.h"
  26 #include "hphp/runtime/vm/native-data.h"
  27 #include "hphp/util/alloc.h"
  28 #include "hphp/util/process.h"
  29 #include "hphp/util/trace.h"
  30 #include "folly/ScopeGuard.h"
  31
  32 namespace HPHP {
  33
  34 TRACE_SET_MOD(smartalloc);
  35
  36 //////////////////////////////////////////////////////////////////////
  37
  38 const uint32_t SLAB_SIZE = 2 << 20;
  39
  40 //////////////////////////////////////////////////////////////////////
  41
  42 #ifdef USE_JEMALLOC
  43 bool MemoryManager::s_statsEnabled = false;
  44 size_t MemoryManager::s_cactiveLimitCeiling = 0;
  45
  46 static size_t threadAllocatedpMib[2];
  47 static size_t threadDeallocatedpMib[2];
  48 static size_t statsCactiveMib[2];
  49 static pthread_once_t threadStatsOnce = PTHREAD_ONCE_INIT;
  50
  51 void MemoryManager::threadStatsInit() {
  52   if (!mallctlnametomib) return;
  53   size_t miblen = sizeof(threadAllocatedpMib) / sizeof(size_t);
  54   if (mallctlnametomib("thread.allocatedp", threadAllocatedpMib, &miblen)) {
  55     return;
  56   }
  57   miblen = sizeof(threadDeallocatedpMib) / sizeof(size_t);
  58   if (mallctlnametomib("thread.deallocatedp", threadDeallocatedpMib, &miblen)) {
  59     return;
  60   }
  61   miblen = sizeof(statsCactiveMib) / sizeof(size_t);
  62   if (mallctlnametomib("stats.cactive", statsCactiveMib, &miblen)) {
  63     return;
  64   }
  65   MemoryManager::s_statsEnabled = true;
  66
  67   // In threadStats() we wish to solve for cactiveLimit in:
  68   //
  69   //   footprint + cactiveLimit + headRoom == MemTotal
  70   //
  71   // However, headRoom comes from RuntimeOption::ServerMemoryHeadRoom, which
  72   // isn't initialized until after the code here runs.  Therefore, compute
  73   // s_cactiveLimitCeiling here in order to amortize the cost of introspecting
  74   // footprint and MemTotal.
  75   //
  76   //   cactiveLimit == (MemTotal - footprint) - headRoom
  77   //
  78   //   cactiveLimit == s_cactiveLimitCeiling - headRoom
  79   // where
  80   //   s_cactiveLimitCeiling == MemTotal - footprint
  81   size_t footprint = Process::GetCodeFootprint(Process::GetProcessId());
  82   size_t MemTotal  = 0;
  83 #ifndef __APPLE__
  84   size_t pageSize = size_t(sysconf(_SC_PAGESIZE));
  85   MemTotal = size_t(sysconf(_SC_PHYS_PAGES)) * pageSize;
  86 #else
  87   int mib[2] = { CTL_HW, HW_MEMSIZE };
  88   u_int namelen = sizeof(mib) / sizeof(mib[0]);
  89   size_t len = sizeof(MemTotal);
  90   sysctl(mib, namelen, &MemTotal, &len, nullptr, 0);
  91 #endif
  92   if (MemTotal > footprint) {
  93     MemoryManager::s_cactiveLimitCeiling = MemTotal - footprint;
  94   }
  95 }
  96
  97 inline
  98 void MemoryManager::threadStats(uint64_t*& allocated, uint64_t*& deallocated,
  99                                 size_t*& cactive, size_t& cactiveLimit) {
 100   pthread_once(&threadStatsOnce, threadStatsInit);
 101   if (!MemoryManager::s_statsEnabled) return;
 102
 103   size_t len = sizeof(allocated);
 104   if (mallctlbymib(threadAllocatedpMib,
 105                    sizeof(threadAllocatedpMib) / sizeof(size_t),
 106                    &allocated, &len, nullptr, 0)) {
 107     not_reached();
 108   }
 109
 110   len = sizeof(deallocated);
 111   if (mallctlbymib(threadDeallocatedpMib,
 112                    sizeof(threadDeallocatedpMib) / sizeof(size_t),
 113                    &deallocated, &len, nullptr, 0)) {
 114     not_reached();
 115   }
 116
 117   len = sizeof(cactive);
 118   if (mallctlbymib(statsCactiveMib,
 119                    sizeof(statsCactiveMib) / sizeof(size_t),
 120                    &cactive, &len, nullptr, 0)) {
 121     not_reached();
 122   }
 123
 124   size_t headRoom = RuntimeOption::ServerMemoryHeadRoom;
 125   // Compute cactiveLimit based on s_cactiveLimitCeiling, as computed in
 126   // threadStatsInit().
 127   if (headRoom != 0 && headRoom < MemoryManager::s_cactiveLimitCeiling) {
 128     cactiveLimit = MemoryManager::s_cactiveLimitCeiling - headRoom;
 129   } else {
 130     cactiveLimit = std::numeric_limits<size_t>::max();
 131   }
 132 }
 133 #endif
 134
 135 static void* MemoryManagerInit() {
 136   // We store the free list pointers right at the start of each
 137   // object, overlapping SmartHeader.data, and we also clobber _count
 138   // as a free-object flag when the object is deallocated.  This
 139   // assert just makes sure they don't overflow.
 140   assert(FAST_REFCOUNT_OFFSET + sizeof(int) <=
 141     MemoryManager::smartSizeClass(1));
 142   MemoryManager::TlsWrapper tls;
 143   return (void*)tls.getNoCheck;
 144 }
 145
 146 void* MemoryManager::TlsInitSetup = MemoryManagerInit();
 147
 148 void MemoryManager::Create(void* storage) {
 149   new (storage) MemoryManager();
 150 }
 151
 152 void MemoryManager::Delete(MemoryManager* mm) {
 153   mm->~MemoryManager();
 154 }
 155
 156 void MemoryManager::OnThreadExit(MemoryManager* mm) {
 157   mm->~MemoryManager();
 158 }
 159
 160 MemoryManager::MemoryManager()
 161     : m_front(nullptr)
 162     , m_limit(nullptr)
 163     , m_sweeping(false) {
 164 #ifdef USE_JEMALLOC
 165   threadStats(m_allocated, m_deallocated, m_cactive, m_cactiveLimit);
 166 #endif
 167   resetStats();
 168   m_stats.maxBytes = std::numeric_limits<int64_t>::max();
 169   // make the circular-lists empty.
 170   m_sweep.next = m_sweep.prev = &m_sweep;
 171   m_strings.next = m_strings.prev = &m_strings;
 172 }
 173
 174 void MemoryManager::resetStats() {
 175   m_stats.usage = 0;
 176   m_stats.alloc = 0;
 177   m_stats.peakUsage = 0;
 178   m_stats.peakAlloc = 0;
 179   m_stats.totalAlloc = 0;
 180 #ifdef USE_JEMALLOC
 181   if (s_statsEnabled) {
 182     m_stats.jemallocDebt = 0;
 183     m_prevAllocated = int64_t(*m_allocated);
 184     m_delta = m_prevAllocated - int64_t(*m_deallocated);
 185   }
 186 #endif
 187 }
 188
 189 NEVER_INLINE
 190 void MemoryManager::refreshStatsHelper() {
 191   refreshStats();
 192 }
 193
 194 void MemoryManager::refreshStatsHelperExceeded() const {
 195   ThreadInfo* info = ThreadInfo::s_threadInfo.getNoCheck();
 196   info->m_reqInjectionData.setMemExceededFlag();
 197 }
 198
 199 #ifdef USE_JEMALLOC
 200 void MemoryManager::refreshStatsHelperStop() {
 201   HttpServer::Server->stop();
 202   // Increase the limit to the maximum possible value, so that this method
 203   // won't be called again.
 204   m_cactiveLimit = std::numeric_limits<size_t>::max();
 205 }
 206 #endif
 207
 208 void MemoryManager::sweep() {
 209   assert(!sweeping());
 210   m_sweeping = true;
 211   SCOPE_EXIT { m_sweeping = false; };
 212   Sweepable::SweepAll();
 213   Native::sweepNativeData();
 214 }
 215
 216 void MemoryManager::resetAllocator() {
 217   StringData::sweepAll();
 218
 219   // free smart-malloc slabs
 220   for (auto slab : m_slabs) {
 221     free(slab);
 222   }
 223   m_slabs.clear();
 224
 225   // free large allocation blocks
 226   for (SweepNode *n = m_sweep.next, *next; n != &m_sweep; n = next) {
 227     next = n->next;
 228     free(n);
 229   }
 230   m_sweep.next = m_sweep.prev = &m_sweep;
 231
 232   // zero out freelists
 233   for (auto& i : m_freelists) i.head = nullptr;
 234   m_front = m_limit = 0;
 235 }
 236
 237 /*
 238  * smart_malloc & friends implementation notes
 239  *
 240  * There are three kinds of smart mallocation:
 241  *
 242  *  a) Large allocations.  (size >= kMaxSmartSize)
 243  *
 244  *     In this case we behave as a wrapper around the normal libc
 245  *     malloc/free.  We insert a SweepNode header at the front of the
 246  *     allocation in order to find these at sweep time (end of
 247  *     request) so we can give them back to libc.
 248  *
 249  *  b) Size-tracked small allocations.
 250  *
 251  *     This is used for the generic case, for callers who can't tell
 252  *     us the size of the allocation at free time.
 253  *
 254  *     In this situation, we put a SmallNode header at the front of
 255  *     the block that tells us the size for when we need to free it
 256  *     later.  We differentiate this from a SweepNode (for a big
 257  *     allocation) by assuming that no SweepNode::prev will point to
 258  *     an address in the first kMaxSmartSize bytes of virtual address
 259  *     space.
 260  *
 261  *  c) Size-untracked small allocation
 262  *
 263  *     Many callers have an easy time telling you how big the object
 264  *     was when they need to free it.  In this case we can avoid the
 265  *     SmallNode, which saves us some memory and also let's us give
 266  *     out 16-byte aligned pointers easily.
 267  *
 268  *     We know when we have one of these because it has to be freed
 269  *     through a different entry point.  (E.g. MM().smartFreeSize or
 270  *     MM().smartFreeSizeBig.)
 271  *
 272  * When small blocks are freed (case b and c), they're placed the
 273  * appropriate size-segregated freelist.  Large blocks are immediately
 274  * passed back to libc via free.
 275  *
 276  * There are currently two kinds of freelist entries: entries where
 277  * there is already a valid SmallNode on the list (case b), and
 278  * entries where there isn't (case c).  The reason for this is that
 279  * that way, when allocating for case b, you don't need to store the
 280  * SmallNode size again.  Much of the heap is going through case b at
 281  * the time of this writing, so it is a measurable regression to try
 282  * to just combine the free lists, but presumably we can move more to
 283  * case c and combine the lists eventually.
 284  */
 285
 286 inline void* MemoryManager::smartMalloc(size_t nbytes) {
 287   auto const nbytes_padded = nbytes + sizeof(SmallNode);
 288   if (UNLIKELY(nbytes_padded > kMaxSmartSize)) {
 289     return smartMallocBig(nbytes);
 290   }
 291
 292   auto const ptr = static_cast<SmallNode*>(smartMallocSize(nbytes_padded));
 293   ptr->padbytes = nbytes_padded;
 294   return ptr + 1;
 295 }
 296
 297 inline void MemoryManager::smartFree(void* ptr) {
 298   assert(ptr != 0);
 299   auto const n = static_cast<SweepNode*>(ptr) - 1;
 300   auto const padbytes = n->padbytes;
 301   if (LIKELY(padbytes <= kMaxSmartSize)) {
 302     return smartFreeSize(static_cast<SmallNode*>(ptr) - 1, n->padbytes);
 303   }
 304   smartFreeBig(n);
 305 }
 306
 307 inline void* MemoryManager::smartRealloc(void* inputPtr, size_t nbytes) {
 308   FTRACE(1, "smartRealloc: {} to {}\n", inputPtr, nbytes);
 309   assert(nbytes > 0);
 310
 311   void* ptr = debug ? static_cast<DebugHeader*>(inputPtr) - 1 : inputPtr;
 312
 313   auto const n = static_cast<SweepNode*>(ptr) - 1;
 314   if (LIKELY(n->padbytes <= kMaxSmartSize)) {
 315     void* newmem = smart_malloc(nbytes);
 316     auto const copySize = std::min(
 317       n->padbytes - sizeof(SmallNode) - (debug ? sizeof(DebugHeader) : 0),
 318       nbytes
 319     );
 320     newmem = memcpy(newmem, inputPtr, copySize);
 321     smart_free(inputPtr);
 322     return newmem;
 323   }
 324
 325   // Ok, it's a big allocation.  Since we don't know how big it is
 326   // (i.e. how much data we should memcpy), we have no choice but to
 327   // ask malloc to realloc for us.
 328   auto const oldNext = n->next;
 329   auto const oldPrev = n->prev;
 330
 331   auto const newNode = static_cast<SweepNode*>(
 332     safe_realloc(n, debugAddExtra(nbytes + sizeof(SweepNode)))
 333   );
 334
 335   refreshStatsHelper();
 336   if (newNode != n) {
 337     oldNext->prev = oldPrev->next = newNode;
 338   }
 339   return debugPostAllocate(newNode + 1, 0, 0);
 340 }
 341
 342 /*
 343  * Get a new slab, then allocate nbytes from it and install it in our
 344  * slab list.  Return the newly allocated nbytes-sized block.
 345  */
 346 NEVER_INLINE char* MemoryManager::newSlab(size_t nbytes) {
 347   if (UNLIKELY(m_stats.usage > m_stats.maxBytes)) {
 348     refreshStatsHelper();
 349   }
 350   char* slab = (char*) safe_malloc(SLAB_SIZE);
 351   assert(uintptr_t(slab) % 16 == 0);
 352   JEMALLOC_STATS_ADJUST(&m_stats, SLAB_SIZE);
 353   m_stats.alloc += SLAB_SIZE;
 354   if (m_stats.alloc > m_stats.peakAlloc) {
 355     m_stats.peakAlloc = m_stats.alloc;
 356   }
 357   m_slabs.push_back(slab);
 358   m_front = slab + nbytes;
 359   m_limit = slab + SLAB_SIZE;
 360   FTRACE(1, "newSlab: adding slab at {} to limit {}\n",
 361          static_cast<void*>(slab),
 362          static_cast<void*>(m_limit));
 363   return slab;
 364 }
 365
 366 // allocate nbytes from the current slab, aligned to 16-bytes
 367 void* MemoryManager::slabAlloc(size_t nbytes) {
 368   const size_t kAlignMask = 15;
 369   assert((nbytes & 7) == 0);
 370   char* ptr = (char*)(uintptr_t(m_front + kAlignMask) & ~kAlignMask);
 371   if (ptr + nbytes <= m_limit) {
 372     m_front = ptr + nbytes;
 373     return ptr;
 374   }
 375   return newSlab(nbytes);
 376 }
 377
 378 NEVER_INLINE
 379 void* MemoryManager::smartMallocSlab(size_t padbytes) {
 380   SmallNode* n = (SmallNode*) slabAlloc(padbytes);
 381   n->padbytes = padbytes;
 382   FTRACE(1, "smartMallocSlab: {} -> {}\n", padbytes,
 383          static_cast<void*>(n + 1));
 384   return n + 1;
 385 }
 386
 387 inline void* MemoryManager::smartEnlist(SweepNode* n) {
 388   if (UNLIKELY(m_stats.usage > m_stats.maxBytes)) {
 389     refreshStatsHelper();
 390   }
 391   // link after m_sweep
 392   SweepNode* next = m_sweep.next;
 393   n->next = next;
 394   n->prev = &m_sweep;
 395   next->prev = m_sweep.next = n;
 396   assert(n->padbytes > kMaxSmartSize);
 397   return n + 1;
 398 }
 399
 400 NEVER_INLINE
 401 void* MemoryManager::smartMallocBig(size_t nbytes) {
 402   assert(nbytes > 0);
 403   auto const n = static_cast<SweepNode*>(
 404     safe_malloc(nbytes + sizeof(SweepNode))
 405   );
 406   return smartEnlist(n);
 407 }
 408
 409 #ifdef USE_JEMALLOC
 410 NEVER_INLINE
 411 void* MemoryManager::smartMallocSizeBigHelper(void*& ptr,
 412                                               size_t& szOut,
 413                                               size_t bytes) {
 414   m_stats.usage += bytes;
 415   allocm(&ptr, &szOut, debugAddExtra(bytes + sizeof(SweepNode)), 0);
 416   szOut = debugRemoveExtra(szOut - sizeof(SweepNode));
 417   return debugPostAllocate(
 418     smartEnlist(static_cast<SweepNode*>(ptr)),
 419     bytes,
 420     szOut
 421   );
 422 }
 423 #endif
 424
 425 NEVER_INLINE
 426 void* MemoryManager::smartCallocBig(size_t totalbytes) {
 427   assert(totalbytes > 0);
 428   auto const n = static_cast<SweepNode*>(
 429     safe_calloc(totalbytes + sizeof(SweepNode), 1)
 430   );
 431   return smartEnlist(n);
 432 }
 433
 434 NEVER_INLINE
 435 void MemoryManager::smartFreeBig(SweepNode* n) {
 436   SweepNode* next = n->next;
 437   SweepNode* prev = n->prev;
 438   next->prev = prev;
 439   prev->next = next;
 440   free(n);
 441 }
 442
 443 // smart_malloc api entry points, with support for malloc/free corner cases.
 444
 445 void* smart_malloc(size_t nbytes) {
 446   auto& mm = MM();
 447   auto const size = mm.debugAddExtra(std::max(nbytes, size_t(1)));
 448   return mm.debugPostAllocate(mm.smartMalloc(size), 0, 0);
 449 }
 450
 451 void* smart_calloc(size_t count, size_t nbytes) {
 452   auto& mm = MM();
 453   auto const totalBytes = std::max<size_t>(count * nbytes, 1);
 454   if (totalBytes <= kMaxSmartSize) {
 455     return memset(smart_malloc(totalBytes), 0, totalBytes);
 456   }
 457   auto const withExtra = mm.debugAddExtra(totalBytes);
 458   return mm.debugPostAllocate(
 459     mm.smartCallocBig(withExtra), 0, 0
 460   );
 461 }
 462
 463 void* smart_realloc(void* ptr, size_t nbytes) {
 464   auto& mm = MM();
 465   if (!ptr) return smart_malloc(nbytes);
 466   if (!nbytes) {
 467     smart_free(ptr);
 468     return nullptr;
 469   }
 470   return mm.smartRealloc(ptr, nbytes);
 471 }
 472
 473 void smart_free(void* ptr) {
 474   if (!ptr) return;
 475   auto& mm = MM();
 476   mm.smartFree(mm.debugPreFree(ptr, 0, 0));
 477 }
 478
 479 //////////////////////////////////////////////////////////////////////
 480
 481 #ifdef DEBUG
 482
 483 void* MemoryManager::debugPostAllocate(void* p,
 484                                        size_t bytes,
 485                                        size_t returnedCap) {
 486   auto const header = static_cast<DebugHeader*>(p);
 487   header->allocatedMagic = DebugHeader::kAllocatedMagic;
 488   header->requestedSize = bytes;
 489   header->returnedCap = returnedCap;
 490   header->padding = 0;
 491   return header + 1;
 492 }
 493
 494 void* MemoryManager::debugPreFree(void* p,
 495                                   size_t bytes,
 496                                   size_t userSpecifiedBytes) {
 497   auto const header = reinterpret_cast<DebugHeader*>(p) - 1;
 498   assert(checkPreFree(header, bytes, userSpecifiedBytes));
 499   header->allocatedMagic = 0; // will get a freelist pointer shortly
 500   header->requestedSize = DebugHeader::kFreedMagic;
 501   memset(header + 1, kSmartFreeFill, bytes);
 502   return header;
 503 }
 504
 505 #endif
 506
 507 bool MemoryManager::checkPreFree(DebugHeader* p,
 508                                  size_t bytes,
 509                                  size_t userSpecifiedBytes) const {
 510   assert(debug);
 511
 512   assert(p->allocatedMagic == DebugHeader::kAllocatedMagic);
 513
 514   if (userSpecifiedBytes != 0) {
 515     // For size-specified frees, the size they report when freeing
 516     // must be either what they asked for, or what we returned as the
 517     // actual capacity;
 518     assert(userSpecifiedBytes == p->requestedSize ||
 519            userSpecifiedBytes == p->returnedCap);
 520   }
 521   if (bytes != 0 && bytes <= kMaxSmartSize) {
 522     auto const ptrInt = reinterpret_cast<uintptr_t>(p);
 523     DEBUG_ONLY auto it = std::find_if(
 524       begin(m_slabs), end(m_slabs),
 525       [&] (char* base) {
 526         auto const baseInt = reinterpret_cast<uintptr_t>(base);
 527         return ptrInt >= baseInt && ptrInt < baseInt + SLAB_SIZE;
 528       }
 529     );
 530     assert(it != end(m_slabs));
 531   }
 532
 533   return true;
 534 }
 535
 536 void MemoryManager::logAllocation(void* p, size_t bytes) {
 537   MemoryProfile::logAllocation(p, bytes);
 538 }
 539
 540 void MemoryManager::logDeallocation(void* p) {
 541   MemoryProfile::logDeallocation(p);
 542 }
 543
 544 ///////////////////////////////////////////////////////////////////////////////
 545
 546 }