Bug 1671114 - pt 7. Make an ambigious comment clearer r=glandium
[gecko.git] / memory / replace / logalloc / replay / Replay.cpp
blob1d8de41205d97880c381b49071ef5ad9c70adf2c
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 #define MOZ_MEMORY_IMPL
8 #include "mozmemory_wrap.h"
10 #ifdef _WIN32
11 # include <windows.h>
12 # include <io.h>
13 typedef intptr_t ssize_t;
14 #else
15 # include <sys/mman.h>
16 # include <unistd.h>
17 #endif
18 #ifdef XP_LINUX
19 # include <sys/types.h>
20 # include <sys/stat.h>
21 # include <fcntl.h>
22 # include <stdlib.h>
23 # include <ctype.h>
24 #endif
25 #include <algorithm>
26 #include <cmath>
27 #include <cstdio>
28 #include <cstring>
30 #include "mozilla/Assertions.h"
31 #include "mozilla/MathAlgorithms.h"
32 #include "mozilla/Maybe.h"
33 #include "FdPrintf.h"
35 using namespace mozilla;
37 static void die(const char* message) {
38 /* Here, it doesn't matter that fprintf may allocate memory. */
39 fprintf(stderr, "%s\n", message);
40 exit(1);
43 #ifdef XP_LINUX
44 static size_t sPageSize = []() { return sysconf(_SC_PAGESIZE); }();
45 #endif
47 /* We don't want to be using malloc() to allocate our internal tracking
48 * data, because that would change the parameters of what is being measured,
49 * so we want to use data types that directly use mmap/VirtualAlloc. */
50 template <typename T, size_t Len>
51 class MappedArray {
52 public:
53 MappedArray() : mPtr(nullptr) {
54 #ifdef XP_LINUX
55 MOZ_RELEASE_ASSERT(!((sizeof(T) * Len) & (sPageSize - 1)),
56 "MappedArray size must be a multiple of the page size");
57 #endif
60 ~MappedArray() {
61 if (mPtr) {
62 #ifdef _WIN32
63 VirtualFree(mPtr, sizeof(T) * Len, MEM_RELEASE);
64 #elif defined(XP_LINUX)
65 munmap(reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(mPtr) -
66 sPageSize),
67 sizeof(T) * Len + sPageSize * 2);
68 #else
69 munmap(mPtr, sizeof(T) * Len);
70 #endif
74 T& operator[](size_t aIndex) const {
75 if (mPtr) {
76 return mPtr[aIndex];
79 #ifdef _WIN32
80 mPtr = reinterpret_cast<T*>(VirtualAlloc(
81 nullptr, sizeof(T) * Len, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE));
82 if (mPtr == nullptr) {
83 die("VirtualAlloc error");
85 #else
86 size_t data_size = sizeof(T) * Len;
87 size_t size = data_size;
88 # ifdef XP_LINUX
89 // See below
90 size += sPageSize * 2;
91 # endif
92 mPtr = reinterpret_cast<T*>(mmap(nullptr, size, PROT_READ | PROT_WRITE,
93 MAP_ANON | MAP_PRIVATE, -1, 0));
94 if (mPtr == MAP_FAILED) {
95 die("Mmap error");
97 # ifdef XP_LINUX
98 // On Linux we request a page on either side of the allocation and
99 // mprotect them. This prevents mappings in /proc/self/smaps from being
100 // merged and allows us to parse this file to calculate the allocator's RSS.
101 MOZ_ASSERT(0 == mprotect(mPtr, sPageSize, 0));
102 MOZ_ASSERT(0 == mprotect(reinterpret_cast<void*>(
103 reinterpret_cast<uintptr_t>(mPtr) + data_size +
104 sPageSize),
105 sPageSize, 0));
106 mPtr = reinterpret_cast<T*>(reinterpret_cast<uintptr_t>(mPtr) + sPageSize);
107 # endif
108 #endif
109 return mPtr[aIndex];
112 bool ownsMapping(uintptr_t addr) const { return addr == (uintptr_t)mPtr; }
114 bool allocated() const { return !!mPtr; }
116 private:
117 mutable T* mPtr;
120 /* Type for records of allocations. */
121 struct MemSlot {
122 void* mPtr;
124 // mRequest is only valid if mPtr is non-null. It doesn't need to be cleared
125 // when memory is freed or realloc()ed.
126 size_t mRequest;
129 /* An almost infinite list of slots.
130 * In essence, this is a linked list of arrays of groups of slots.
131 * Each group is 1MB. On 64-bits, one group allows to store 64k allocations.
132 * Each MemSlotList instance can store 1023 such groups, which means more
133 * than 67M allocations. In case more would be needed, we chain to another
134 * MemSlotList, and so on.
135 * Using 1023 groups makes the MemSlotList itself page sized on 32-bits
136 * and 2 pages-sized on 64-bits.
138 class MemSlotList {
139 static constexpr size_t kGroups = 1024 - 1;
140 static constexpr size_t kGroupSize = (1024 * 1024) / sizeof(MemSlot);
142 MappedArray<MemSlot, kGroupSize> mSlots[kGroups];
143 MappedArray<MemSlotList, 1> mNext;
145 public:
146 MemSlot& operator[](size_t aIndex) const {
147 if (aIndex < kGroupSize * kGroups) {
148 return mSlots[aIndex / kGroupSize][aIndex % kGroupSize];
150 aIndex -= kGroupSize * kGroups;
151 return mNext[0][aIndex];
154 // Ask if any of the memory-mapped buffers use this range.
155 bool ownsMapping(uintptr_t aStart) const {
156 for (const auto& slot : mSlots) {
157 if (slot.allocated() && slot.ownsMapping(aStart)) {
158 return true;
161 return mNext.ownsMapping(aStart) ||
162 (mNext.allocated() && mNext[0].ownsMapping(aStart));
166 /* Helper class for memory buffers */
167 class Buffer {
168 public:
169 Buffer() : mBuf(nullptr), mLength(0) {}
171 Buffer(const void* aBuf, size_t aLength)
172 : mBuf(reinterpret_cast<const char*>(aBuf)), mLength(aLength) {}
174 /* Constructor for string literals. */
175 template <size_t Size>
176 explicit Buffer(const char (&aStr)[Size]) : mBuf(aStr), mLength(Size - 1) {}
178 /* Returns a sub-buffer up-to but not including the given aNeedle character.
179 * The "parent" buffer itself is altered to begin after the aNeedle
180 * character.
181 * If the aNeedle character is not found, return the entire buffer, and empty
182 * the "parent" buffer. */
183 Buffer SplitChar(char aNeedle) {
184 char* buf = const_cast<char*>(mBuf);
185 char* c = reinterpret_cast<char*>(memchr(buf, aNeedle, mLength));
186 if (!c) {
187 return Split(mLength);
190 Buffer result = Split(c - buf);
191 // Remove the aNeedle character itself.
192 Split(1);
193 return result;
196 // Advance to the position after aNeedle. This is like SplitChar but does not
197 // return the skipped portion.
198 void Skip(char aNeedle, unsigned nTimes = 1) {
199 for (unsigned i = 0; i < nTimes; i++) {
200 SplitChar(aNeedle);
204 void SkipWhitespace() {
205 while (mLength > 0) {
206 if (!isspace(mBuf[0])) {
207 break;
209 mBuf++;
210 mLength--;
214 /* Returns a sub-buffer of at most aLength characters. The "parent" buffer is
215 * amputated of those aLength characters. If the "parent" buffer is smaller
216 * than aLength, then its length is used instead. */
217 Buffer Split(size_t aLength) {
218 Buffer result(mBuf, std::min(aLength, mLength));
219 mLength -= result.mLength;
220 mBuf += result.mLength;
221 return result;
224 /* Move the buffer (including its content) to the memory address of the aOther
225 * buffer. */
226 void Slide(Buffer aOther) {
227 memmove(const_cast<char*>(aOther.mBuf), mBuf, mLength);
228 mBuf = aOther.mBuf;
231 /* Returns whether the two involved buffers have the same content. */
232 bool operator==(Buffer aOther) {
233 return mLength == aOther.mLength &&
234 (mBuf == aOther.mBuf || !strncmp(mBuf, aOther.mBuf, mLength));
237 bool operator!=(Buffer aOther) { return !(*this == aOther); }
239 /* Returns true if the buffer is not empty. */
240 explicit operator bool() { return mLength; }
242 char operator[](size_t n) const { return mBuf[n]; }
244 /* Returns the memory location of the buffer. */
245 const char* get() { return mBuf; }
247 /* Returns the memory location of the end of the buffer (technically, the
248 * first byte after the buffer). */
249 const char* GetEnd() { return mBuf + mLength; }
251 /* Extend the buffer over the content of the other buffer, assuming it is
252 * adjacent. */
253 void Extend(Buffer aOther) {
254 MOZ_ASSERT(aOther.mBuf == GetEnd());
255 mLength += aOther.mLength;
258 size_t Length() const { return mLength; }
260 private:
261 const char* mBuf;
262 size_t mLength;
265 /* Helper class to read from a file descriptor line by line. */
266 class FdReader {
267 public:
268 explicit FdReader(int aFd, bool aNeedClose = false)
269 : mFd(aFd),
270 mNeedClose(aNeedClose),
271 mData(&mRawBuf, 0),
272 mBuf(&mRawBuf, sizeof(mRawBuf)) {}
274 FdReader(FdReader&& aOther) noexcept
275 : mFd(aOther.mFd),
276 mNeedClose(aOther.mNeedClose),
277 mData(&mRawBuf, 0),
278 mBuf(&mRawBuf, sizeof(mRawBuf)) {
279 memcpy(mRawBuf, aOther.mRawBuf, sizeof(mRawBuf));
280 aOther.mFd = -1;
281 aOther.mNeedClose = false;
282 aOther.mData = Buffer();
283 aOther.mBuf = Buffer();
286 FdReader& operator=(const FdReader&) = delete;
287 FdReader(const FdReader&) = delete;
289 ~FdReader() {
290 if (mNeedClose) {
291 close(mFd);
295 /* Read a line from the file descriptor and returns it as a Buffer instance */
296 Buffer ReadLine() {
297 while (true) {
298 Buffer result = mData.SplitChar('\n');
300 /* There are essentially three different cases here:
301 * - '\n' was found "early". In this case, the end of the result buffer
302 * is before the beginning of the mData buffer (since SplitChar
303 * amputated it).
304 * - '\n' was found as the last character of mData. In this case, mData
305 * is empty, but still points at the end of mBuf. result points to what
306 * used to be in mData, without the last character.
307 * - '\n' was not found. In this case too, mData is empty and points at
308 * the end of mBuf. But result points to the entire buffer that used to
309 * be pointed by mData.
310 * Only in the latter case do both result and mData's end match, and it's
311 * the only case where we need to refill the buffer.
313 if (result.GetEnd() != mData.GetEnd()) {
314 return result;
317 /* Since SplitChar emptied mData, make it point to what it had before. */
318 mData = result;
320 /* And move it to the beginning of the read buffer. */
321 mData.Slide(mBuf);
323 FillBuffer();
325 if (!mData) {
326 return Buffer();
331 private:
332 /* Fill the read buffer. */
333 void FillBuffer() {
334 size_t size = mBuf.GetEnd() - mData.GetEnd();
335 Buffer remainder(mData.GetEnd(), size);
337 ssize_t len = 1;
338 while (remainder && len > 0) {
339 len = ::read(mFd, const_cast<char*>(remainder.get()), size);
340 if (len < 0) {
341 die("Read error");
343 size -= len;
344 mData.Extend(remainder.Split(len));
348 /* File descriptor to read from. */
349 int mFd;
350 bool mNeedClose;
352 /* Part of data that was read from the file descriptor but not returned with
353 * ReadLine yet. */
354 Buffer mData;
355 /* Buffer representation of mRawBuf */
356 Buffer mBuf;
357 /* read() buffer */
358 char mRawBuf[4096];
361 MOZ_BEGIN_EXTERN_C
363 /* Function declarations for all the replace_malloc _impl functions.
364 * See memory/build/replace_malloc.c */
365 #define MALLOC_DECL(name, return_type, ...) \
366 return_type name##_impl(__VA_ARGS__);
367 #define MALLOC_FUNCS MALLOC_FUNCS_MALLOC
368 #include "malloc_decls.h"
370 #define MALLOC_DECL(name, return_type, ...) return_type name(__VA_ARGS__);
371 #define MALLOC_FUNCS MALLOC_FUNCS_JEMALLOC
372 #include "malloc_decls.h"
374 #ifdef ANDROID
376 /* mozjemalloc and jemalloc use pthread_atfork, which Android doesn't have.
377 * While gecko has one in libmozglue, the replay program can't use that.
378 * Since we're not going to fork anyways, make it a dummy function. */
379 int pthread_atfork(void (*aPrepare)(void), void (*aParent)(void),
380 void (*aChild)(void)) {
381 return 0;
383 #endif
385 MOZ_END_EXTERN_C
387 template <unsigned Base = 10>
388 size_t parseNumber(Buffer aBuf) {
389 if (!aBuf) {
390 die("Malformed input");
393 size_t result = 0;
394 for (const char *c = aBuf.get(), *end = aBuf.GetEnd(); c < end; c++) {
395 result *= Base;
396 if ((*c >= '0' && *c <= '9')) {
397 result += *c - '0';
398 } else if (Base == 16 && *c >= 'a' && *c <= 'f') {
399 result += *c - 'a' + 10;
400 } else if (Base == 16 && *c >= 'A' && *c <= 'F') {
401 result += *c - 'A' + 10;
402 } else {
403 die("Malformed input");
406 return result;
409 static size_t percent(size_t a, size_t b) {
410 if (!b) {
411 return 0;
413 return size_t(round(double(a) / double(b) * 100.0));
416 class Distribution {
417 public:
418 // Default constructor used for array initialisation.
419 Distribution()
420 : mMaxSize(0),
421 mNextSmallest(0),
422 mShift(0),
423 mArrayOffset(0),
424 mArraySlots(0),
425 mTotalRequests(0),
426 mRequests{0} {}
428 Distribution(size_t max_size, size_t next_smallest, size_t bucket_size)
429 : mMaxSize(max_size),
430 mNextSmallest(next_smallest),
431 mShift(CeilingLog2(bucket_size)),
432 mArrayOffset(1 + next_smallest),
433 mArraySlots((max_size - next_smallest) >> mShift),
434 mTotalRequests(0),
435 mRequests{
438 MOZ_ASSERT(mMaxSize);
439 MOZ_RELEASE_ASSERT(mArraySlots <= MAX_NUM_BUCKETS);
442 Distribution& operator=(const Distribution& aOther) = default;
444 void addRequest(size_t request) {
445 MOZ_ASSERT(mMaxSize);
447 mRequests[(request - mArrayOffset) >> mShift]++;
448 mTotalRequests++;
451 void printDist(intptr_t std_err) {
452 MOZ_ASSERT(mMaxSize);
454 // The translation to turn a slot index into a memory request size.
455 const size_t array_offset_add = (1 << mShift) + mNextSmallest;
457 FdPrintf(std_err, "\n%zu-bin Distribution:\n", mMaxSize);
458 FdPrintf(std_err, " request : count percent\n");
459 size_t range_start = mNextSmallest + 1;
460 for (size_t j = 0; j < mArraySlots; j++) {
461 size_t range_end = (j << mShift) + array_offset_add;
462 FdPrintf(std_err, "%5zu - %5zu: %6zu %6zu%%\n", range_start, range_end,
463 mRequests[j], percent(mRequests[j], mTotalRequests));
464 range_start = range_end + 1;
468 size_t maxSize() const { return mMaxSize; }
470 private:
471 static constexpr size_t MAX_NUM_BUCKETS = 16;
473 // If size is zero this distribution is uninitialised.
474 size_t mMaxSize;
475 size_t mNextSmallest;
477 // Parameters to convert a size into a slot number.
478 unsigned mShift;
479 unsigned mArrayOffset;
481 // The number of slots.
482 unsigned mArraySlots;
484 size_t mTotalRequests;
485 size_t mRequests[MAX_NUM_BUCKETS];
488 #ifdef XP_LINUX
489 struct MemoryMap {
490 uintptr_t mStart;
491 uintptr_t mEnd;
492 bool mReadable;
493 bool mPrivate;
494 bool mAnon;
495 bool mIsStack;
496 bool mIsSpecial;
497 size_t mRSS;
499 bool IsCandidate() const {
500 // Candidates mappings are:
501 // * anonymous
502 // * they are private (not shared),
503 // * anonymous or "[heap]" (not another area such as stack),
505 // The only mappings we're falsely including are the .bss segments for
506 // shared libraries.
507 return mReadable && mPrivate && mAnon && !mIsStack && !mIsSpecial;
511 class SMapsReader : private FdReader {
512 private:
513 explicit SMapsReader(FdReader&& reader) : FdReader(std::move(reader)) {}
515 public:
516 static Maybe<SMapsReader> open() {
517 int fd = ::open(FILENAME, O_RDONLY);
518 if (fd < 0) {
519 perror(FILENAME);
520 return mozilla::Nothing();
523 return Some(SMapsReader(FdReader(fd, true)));
526 Maybe<MemoryMap> readMap(intptr_t aStdErr) {
527 // This is not very tolerant of format changes because things like
528 // parseNumber will crash if they get a bad value. TODO: make this
529 // soft-fail.
531 Buffer line = ReadLine();
532 if (!line) {
533 return Nothing();
536 // We're going to be at the start of an entry, start tokenising the first
537 // line.
539 // Range
540 Buffer range = line.SplitChar(' ');
541 uintptr_t range_start = parseNumber<16>(range.SplitChar('-'));
542 uintptr_t range_end = parseNumber<16>(range);
544 // Mode.
545 Buffer mode = line.SplitChar(' ');
546 if (mode.Length() != 4) {
547 FdPrintf(aStdErr, "Couldn't parse SMAPS file\n");
548 return Nothing();
550 bool readable = mode[0] == 'r';
551 bool private_ = mode[3] == 'p';
553 // Offset, device and inode.
554 line.SkipWhitespace();
555 bool zero_offset = !parseNumber<16>(line.SplitChar(' '));
556 line.SkipWhitespace();
557 bool no_device = line.SplitChar(' ') == Buffer("00:00");
558 line.SkipWhitespace();
559 bool zero_inode = !parseNumber(line.SplitChar(' '));
560 bool is_anon = zero_offset && no_device && zero_inode;
562 // Filename, or empty for anon mappings.
563 line.SkipWhitespace();
564 Buffer filename = line.SplitChar(' ');
566 bool is_stack;
567 bool is_special;
568 if (filename && filename[0] == '[') {
569 is_stack = filename == Buffer("[stack]");
570 is_special = filename == Buffer("[vdso]") ||
571 filename == Buffer("[vvar]") ||
572 filename == Buffer("[vsyscall]");
573 } else {
574 is_stack = false;
575 is_special = false;
578 size_t rss = 0;
579 while ((line = ReadLine())) {
580 Buffer field = line.SplitChar(':');
581 if (field == Buffer("VmFlags")) {
582 // This is the last field, at least in the current format. Break this
583 // loop to read the next mapping.
584 break;
587 if (field == Buffer("Rss")) {
588 line.SkipWhitespace();
589 Buffer value = line.SplitChar(' ');
590 rss = parseNumber(value) * 1024;
594 return Some(MemoryMap({range_start, range_end, readable, private_, is_anon,
595 is_stack, is_special, rss}));
598 static constexpr char FILENAME[] = "/proc/self/smaps";
600 #endif // XP_LINUX
602 /* Class to handle dispatching the replay function calls to replace-malloc. */
603 class Replay {
604 public:
605 Replay() {
606 #ifdef _WIN32
607 // See comment in FdPrintf.h as to why native win32 handles are used.
608 mStdErr = reinterpret_cast<intptr_t>(GetStdHandle(STD_ERROR_HANDLE));
609 #else
610 mStdErr = fileno(stderr);
611 #endif
614 void enableSlopCalculation() { mCalculateSlop = true; }
615 void enableMemset() { mDoMemset = true; }
617 MemSlot& operator[](size_t index) const { return mSlots[index]; }
619 void malloc(Buffer& aArgs, Buffer& aResult) {
620 MemSlot& aSlot = SlotForResult(aResult);
621 mOps++;
622 size_t size = parseNumber(aArgs);
623 aSlot.mPtr = ::malloc_impl(size);
624 if (aSlot.mPtr) {
625 aSlot.mRequest = size;
626 MaybeCommit(aSlot);
627 if (mCalculateSlop) {
628 mTotalRequestedSize += size;
629 mTotalAllocatedSize += ::malloc_usable_size_impl(aSlot.mPtr);
634 void posix_memalign(Buffer& aArgs, Buffer& aResult) {
635 MemSlot& aSlot = SlotForResult(aResult);
636 mOps++;
637 size_t alignment = parseNumber(aArgs.SplitChar(','));
638 size_t size = parseNumber(aArgs);
639 void* ptr;
640 if (::posix_memalign_impl(&ptr, alignment, size) == 0) {
641 aSlot.mPtr = ptr;
642 aSlot.mRequest = size;
643 MaybeCommit(aSlot);
644 if (mCalculateSlop) {
645 mTotalRequestedSize += size;
646 mTotalAllocatedSize += ::malloc_usable_size_impl(aSlot.mPtr);
648 } else {
649 aSlot.mPtr = nullptr;
653 void aligned_alloc(Buffer& aArgs, Buffer& aResult) {
654 MemSlot& aSlot = SlotForResult(aResult);
655 mOps++;
656 size_t alignment = parseNumber(aArgs.SplitChar(','));
657 size_t size = parseNumber(aArgs);
658 aSlot.mPtr = ::aligned_alloc_impl(alignment, size);
659 if (aSlot.mPtr) {
660 aSlot.mRequest = size;
661 MaybeCommit(aSlot);
662 if (mCalculateSlop) {
663 mTotalRequestedSize += size;
664 mTotalAllocatedSize += ::malloc_usable_size_impl(aSlot.mPtr);
669 void calloc(Buffer& aArgs, Buffer& aResult) {
670 MemSlot& aSlot = SlotForResult(aResult);
671 mOps++;
672 size_t num = parseNumber(aArgs.SplitChar(','));
673 size_t size = parseNumber(aArgs);
674 aSlot.mPtr = ::calloc_impl(num, size);
675 if (aSlot.mPtr) {
676 aSlot.mRequest = num * size;
677 MaybeCommit(aSlot);
678 if (mCalculateSlop) {
679 mTotalRequestedSize += num * size;
680 mTotalAllocatedSize += ::malloc_usable_size_impl(aSlot.mPtr);
685 void realloc(Buffer& aArgs, Buffer& aResult) {
686 MemSlot& aSlot = SlotForResult(aResult);
687 mOps++;
688 Buffer dummy = aArgs.SplitChar('#');
689 if (dummy) {
690 die("Malformed input");
692 size_t slot_id = parseNumber(aArgs.SplitChar(','));
693 size_t size = parseNumber(aArgs);
694 MemSlot& old_slot = (*this)[slot_id];
695 void* old_ptr = old_slot.mPtr;
696 old_slot.mPtr = nullptr;
697 aSlot.mPtr = ::realloc_impl(old_ptr, size);
698 if (aSlot.mPtr) {
699 aSlot.mRequest = size;
700 MaybeCommit(aSlot);
701 if (mCalculateSlop) {
702 mTotalRequestedSize += size;
703 mTotalAllocatedSize += ::malloc_usable_size_impl(aSlot.mPtr);
708 void free(Buffer& aArgs, Buffer& aResult) {
709 if (aResult) {
710 die("Malformed input");
712 mOps++;
713 Buffer dummy = aArgs.SplitChar('#');
714 if (dummy) {
715 die("Malformed input");
717 size_t slot_id = parseNumber(aArgs);
718 MemSlot& slot = (*this)[slot_id];
719 ::free_impl(slot.mPtr);
720 slot.mPtr = nullptr;
723 void memalign(Buffer& aArgs, Buffer& aResult) {
724 MemSlot& aSlot = SlotForResult(aResult);
725 mOps++;
726 size_t alignment = parseNumber(aArgs.SplitChar(','));
727 size_t size = parseNumber(aArgs);
728 aSlot.mPtr = ::memalign_impl(alignment, size);
729 if (aSlot.mPtr) {
730 aSlot.mRequest = size;
731 MaybeCommit(aSlot);
732 if (mCalculateSlop) {
733 mTotalRequestedSize += size;
734 mTotalAllocatedSize += ::malloc_usable_size_impl(aSlot.mPtr);
739 void valloc(Buffer& aArgs, Buffer& aResult) {
740 MemSlot& aSlot = SlotForResult(aResult);
741 mOps++;
742 size_t size = parseNumber(aArgs);
743 aSlot.mPtr = ::valloc_impl(size);
744 if (aSlot.mPtr) {
745 aSlot.mRequest = size;
746 MaybeCommit(aSlot);
747 if (mCalculateSlop) {
748 mTotalRequestedSize += size;
749 mTotalAllocatedSize += ::malloc_usable_size_impl(aSlot.mPtr);
754 void jemalloc_stats(Buffer& aArgs, Buffer& aResult) {
755 if (aArgs || aResult) {
756 die("Malformed input");
758 mOps++;
759 jemalloc_stats_t stats;
760 jemalloc_bin_stats_t bin_stats[JEMALLOC_MAX_STATS_BINS];
761 ::jemalloc_stats_internal(&stats, bin_stats);
763 #ifdef XP_LINUX
764 size_t rss = get_rss();
765 #endif
767 size_t num_objects = 0;
768 size_t num_sloppy_objects = 0;
769 size_t total_allocated = 0;
770 size_t total_slop = 0;
771 size_t large_slop = 0;
772 size_t large_used = 0;
773 size_t huge_slop = 0;
774 size_t huge_used = 0;
775 size_t bin_slop[JEMALLOC_MAX_STATS_BINS] = {0};
777 for (size_t slot_id = 0; slot_id < mNumUsedSlots; slot_id++) {
778 MemSlot& slot = mSlots[slot_id];
779 if (slot.mPtr) {
780 size_t used = ::malloc_usable_size_impl(slot.mPtr);
781 size_t slop = used - slot.mRequest;
782 total_allocated += used;
783 total_slop += slop;
784 num_objects++;
785 if (slop) {
786 num_sloppy_objects++;
789 if (used <= stats.page_size / 2) {
790 // We know that this is an inefficient linear search, but there's a
791 // small number of bins and this is simple.
792 for (unsigned i = 0; i < JEMALLOC_MAX_STATS_BINS; i++) {
793 auto& bin = bin_stats[i];
794 if (used == bin.size) {
795 bin_slop[i] += slop;
796 break;
799 } else if (used <= stats.large_max) {
800 large_slop += slop;
801 large_used += used;
802 } else {
803 huge_slop += slop;
804 huge_used += used;
809 // This formula corresponds to the calculation of wasted (from committed and
810 // the other parameters) within jemalloc_stats()
811 size_t committed = stats.allocated + stats.waste + stats.page_cache +
812 stats.bookkeeping + stats.bin_unused;
814 FdPrintf(mStdErr, "\n");
815 FdPrintf(mStdErr, "Objects: %9zu\n", num_objects);
816 FdPrintf(mStdErr, "Slots: %9zu\n", mNumUsedSlots);
817 FdPrintf(mStdErr, "Ops: %9zu\n", mOps);
818 FdPrintf(mStdErr, "mapped: %9zu\n", stats.mapped);
819 FdPrintf(mStdErr, "committed: %9zu\n", committed);
820 #ifdef XP_LINUX
821 if (rss) {
822 FdPrintf(mStdErr, "rss: %9zu\n", rss);
824 #endif
825 FdPrintf(mStdErr, "allocated: %9zu\n", stats.allocated);
826 FdPrintf(mStdErr, "waste: %9zu\n", stats.waste);
827 FdPrintf(mStdErr, "dirty: %9zu\n", stats.page_cache);
828 FdPrintf(mStdErr, "bookkeep: %9zu\n", stats.bookkeeping);
829 FdPrintf(mStdErr, "bin-unused: %9zu\n", stats.bin_unused);
830 FdPrintf(mStdErr, "quantum-max: %9zu\n", stats.quantum_max);
831 FdPrintf(mStdErr, "subpage-max: %9zu\n", stats.page_size / 2);
832 FdPrintf(mStdErr, "large-max: %9zu\n", stats.large_max);
833 if (mCalculateSlop) {
834 size_t slop = mTotalAllocatedSize - mTotalRequestedSize;
835 FdPrintf(mStdErr,
836 "Total slop for all allocations: %zuKiB/%zuKiB (%zu%%)\n",
837 slop / 1024, mTotalAllocatedSize / 1024,
838 percent(slop, mTotalAllocatedSize));
840 FdPrintf(mStdErr, "Live sloppy objects: %zu/%zu (%zu%%)\n",
841 num_sloppy_objects, num_objects,
842 percent(num_sloppy_objects, num_objects));
843 FdPrintf(mStdErr, "Live sloppy bytes: %zuKiB/%zuKiB (%zu%%)\n",
844 total_slop / 1024, total_allocated / 1024,
845 percent(total_slop, total_allocated));
847 FdPrintf(mStdErr, "\n%8s %11s %10s %8s %9s %9s %8s\n", "bin-size",
848 "unused (c)", "total (c)", "used (c)", "non-full (r)", "total (r)",
849 "used (r)");
850 for (auto& bin : bin_stats) {
851 if (bin.size) {
852 FdPrintf(mStdErr, "%8zu %8zuKiB %7zuKiB %7zu%% %12zu %9zu %7zu%%\n",
853 bin.size, bin.bytes_unused / 1024, bin.bytes_total / 1024,
854 percent(bin.bytes_total - bin.bytes_unused, bin.bytes_total),
855 bin.num_non_full_runs, bin.num_runs,
856 percent(bin.num_runs - bin.num_non_full_runs, bin.num_runs));
860 FdPrintf(mStdErr, "\n%5s %8s %9s %7s\n", "bin", "slop", "used", "percent");
861 for (unsigned i = 0; i < JEMALLOC_MAX_STATS_BINS; i++) {
862 auto& bin = bin_stats[i];
863 if (bin.size) {
864 size_t used = bin.bytes_total - bin.bytes_unused;
865 FdPrintf(mStdErr, "%5zu %8zu %9zu %6zu%%\n", bin.size, bin_slop[i],
866 used, percent(bin_slop[i], used));
869 FdPrintf(mStdErr, "%5s %8zu %9zu %6zu%%\n", "large", large_slop, large_used,
870 percent(large_slop, large_used));
871 FdPrintf(mStdErr, "%5s %8zu %9zu %6zu%%\n", "huge", huge_slop, huge_used,
872 percent(huge_slop, huge_used));
874 print_distributions(stats, bin_stats);
877 private:
879 * Create and print frequency distributions of memory requests.
881 void print_distributions(
882 jemalloc_stats_t& stats,
883 jemalloc_bin_stats_t (&bin_stats)[JEMALLOC_MAX_STATS_BINS]) {
884 // We compute distributions for all of the bins for small allocations
885 // (JEMALLOC_MAX_STATS_BINS) plus two more distributions for larger
886 // allocations.
887 Distribution dists[JEMALLOC_MAX_STATS_BINS + 2];
889 unsigned last_size = 0;
890 unsigned num_dists = 0;
891 for (auto& bin : bin_stats) {
892 if (bin.size == 0) {
893 break;
895 auto& dist = dists[num_dists++];
897 if (bin.size <= 16) {
898 // 1 byte buckets.
899 dist = Distribution(bin.size, last_size, 1);
900 } else if (bin.size <= stats.quantum_max) {
901 // 4 buckets, (4 bytes per bucket with a 16 byte quantum).
902 dist = Distribution(bin.size, last_size, stats.quantum / 4);
903 } else {
904 // 16 buckets.
905 dist = Distribution(bin.size, last_size, (bin.size - last_size) / 16);
907 last_size = bin.size;
910 // 16 buckets.
911 dists[num_dists] = Distribution(stats.page_size, last_size,
912 (stats.page_size - last_size) / 16);
913 num_dists++;
915 // Buckets are 1/4 of the page size (12 buckets).
916 dists[num_dists] =
917 Distribution(stats.page_size * 4, stats.page_size, stats.page_size / 4);
918 num_dists++;
920 MOZ_RELEASE_ASSERT(num_dists <= JEMALLOC_MAX_STATS_BINS + 2);
922 for (size_t slot_id = 0; slot_id < mNumUsedSlots; slot_id++) {
923 MemSlot& slot = mSlots[slot_id];
924 if (slot.mPtr) {
925 for (size_t i = 0; i < num_dists; i++) {
926 if (slot.mRequest <= dists[i].maxSize()) {
927 dists[i].addRequest(slot.mRequest);
928 break;
934 for (unsigned i = 0; i < num_dists; i++) {
935 dists[i].printDist(mStdErr);
939 #ifdef XP_LINUX
940 size_t get_rss() {
941 if (mGetRSSFailed) {
942 return 0;
945 // On Linux we can determine the RSS of the heap area by examining the
946 // smaps file.
947 mozilla::Maybe<SMapsReader> reader = SMapsReader::open();
948 if (!reader) {
949 mGetRSSFailed = true;
950 return 0;
953 size_t rss = 0;
954 while (Maybe<MemoryMap> map = reader->readMap(mStdErr)) {
955 if (map->IsCandidate() && !mSlots.ownsMapping(map->mStart) &&
956 !InitialMapsContains(map->mStart)) {
957 rss += map->mRSS;
961 return rss;
964 bool InitialMapsContains(uintptr_t aRangeStart) {
965 for (unsigned i = 0; i < mNumInitialMaps; i++) {
966 MOZ_ASSERT(i < MAX_INITIAL_MAPS);
968 if (mInitialMaps[i] == aRangeStart) {
969 return true;
972 return false;
975 public:
976 void BuildInitialMapInfo() {
977 if (mGetRSSFailed) {
978 return;
981 Maybe<SMapsReader> reader = SMapsReader::open();
982 if (!reader) {
983 mGetRSSFailed = true;
984 return;
987 while (Maybe<MemoryMap> map = reader->readMap(mStdErr)) {
988 if (map->IsCandidate()) {
989 if (mNumInitialMaps >= MAX_INITIAL_MAPS) {
990 FdPrintf(mStdErr, "Too many initial mappings, can't compute RSS\n");
991 mGetRSSFailed = false;
992 return;
995 mInitialMaps[mNumInitialMaps++] = map->mStart;
999 #endif
1001 private:
1002 MemSlot& SlotForResult(Buffer& aResult) {
1003 /* Parse result value and get the corresponding slot. */
1004 Buffer dummy = aResult.SplitChar('=');
1005 Buffer dummy2 = aResult.SplitChar('#');
1006 if (dummy || dummy2) {
1007 die("Malformed input");
1010 size_t slot_id = parseNumber(aResult);
1011 mNumUsedSlots = std::max(mNumUsedSlots, slot_id + 1);
1013 return mSlots[slot_id];
1016 void MaybeCommit(MemSlot& aSlot) {
1017 if (mDoMemset) {
1018 // Write any byte, 0x55 isn't significant.
1019 memset(aSlot.mPtr, 0x55, aSlot.mRequest);
1023 intptr_t mStdErr;
1024 size_t mOps = 0;
1026 // The number of slots that have been used. It is used to iterate over slots
1027 // without accessing those we haven't initialised.
1028 size_t mNumUsedSlots = 0;
1030 MemSlotList mSlots;
1031 size_t mTotalRequestedSize = 0;
1032 size_t mTotalAllocatedSize = 0;
1033 // Whether to calculate slop for all allocations over the runtime of a
1034 // process.
1035 bool mCalculateSlop = false;
1036 bool mDoMemset = false;
1038 #ifdef XP_LINUX
1039 // If we have a failure reading smaps info then this is used to disable that
1040 // feature.
1041 bool mGetRSSFailed = false;
1043 // The initial memory mappings are recorded here at start up. We exclude
1044 // memory in these mappings when computing RSS. We assume they do not grow
1045 // and that no regions are allocated near them, this is true because they'll
1046 // only record the .bss and .data segments from our binary and shared objects
1047 // or regions that logalloc-replay has created for MappedArrays.
1049 // 64 should be enough for anybody.
1050 static constexpr unsigned MAX_INITIAL_MAPS = 64;
1051 uintptr_t mInitialMaps[MAX_INITIAL_MAPS];
1052 unsigned mNumInitialMaps = 0;
1053 #endif // XP_LINUX
1056 int main(int argc, const char* argv[]) {
1057 size_t first_pid = 0;
1058 FdReader reader(0);
1059 Replay replay;
1061 for (int i = 1; i < argc; i++) {
1062 const char* option = argv[i];
1063 if (strcmp(option, "-s") == 0) {
1064 // Do accounting to calculate allocation slop.
1065 replay.enableSlopCalculation();
1066 } else if (strcmp(option, "-c") == 0) {
1067 // Touch memory as we allocate it.
1068 replay.enableMemset();
1069 } else {
1070 fprintf(stderr, "Unknown command line option: %s\n", option);
1071 return EXIT_FAILURE;
1075 #ifdef XP_LINUX
1076 replay.BuildInitialMapInfo();
1077 #endif
1079 /* Read log from stdin and dispatch function calls to the Replay instance.
1080 * The log format is essentially:
1081 * <pid> <tid> <function>([<args>])[=<result>]
1082 * <args> is a comma separated list of arguments.
1084 * The logs are expected to be preprocessed so that allocations are
1085 * attributed a tracking slot. The input is trusted not to have crazy
1086 * values for these slot numbers.
1088 * <result>, as well as some of the args to some of the function calls are
1089 * such slot numbers.
1091 while (true) {
1092 Buffer line = reader.ReadLine();
1094 if (!line) {
1095 break;
1098 size_t pid = parseNumber(line.SplitChar(' '));
1099 if (!first_pid) {
1100 first_pid = pid;
1103 /* The log may contain data for several processes, only entries for the
1104 * very first that appears are treated. */
1105 if (first_pid != pid) {
1106 continue;
1109 /* The log contains thread ids for manual analysis, but we just ignore them
1110 * for now. */
1111 parseNumber(line.SplitChar(' '));
1113 Buffer func = line.SplitChar('(');
1114 Buffer args = line.SplitChar(')');
1116 if (func == Buffer("jemalloc_stats")) {
1117 replay.jemalloc_stats(args, line);
1118 } else if (func == Buffer("free")) {
1119 replay.free(args, line);
1120 } else if (func == Buffer("malloc")) {
1121 replay.malloc(args, line);
1122 } else if (func == Buffer("posix_memalign")) {
1123 replay.posix_memalign(args, line);
1124 } else if (func == Buffer("aligned_alloc")) {
1125 replay.aligned_alloc(args, line);
1126 } else if (func == Buffer("calloc")) {
1127 replay.calloc(args, line);
1128 } else if (func == Buffer("realloc")) {
1129 replay.realloc(args, line);
1130 } else if (func == Buffer("memalign")) {
1131 replay.memalign(args, line);
1132 } else if (func == Buffer("valloc")) {
1133 replay.valloc(args, line);
1134 } else {
1135 die("Malformed input");
1139 return 0;