1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 #define MOZ_MEMORY_IMPL
8 #include "mozmemory_wrap.h"
13 typedef intptr_t ssize_t
;
15 # include <sys/mman.h>
19 # include <sys/types.h>
20 # include <sys/stat.h>
30 #include "mozilla/Assertions.h"
31 #include "mozilla/MathAlgorithms.h"
32 #include "mozilla/Maybe.h"
35 using namespace mozilla
;
37 static void die(const char* message
) {
38 /* Here, it doesn't matter that fprintf may allocate memory. */
39 fprintf(stderr
, "%s\n", message
);
44 static size_t sPageSize
= []() { return sysconf(_SC_PAGESIZE
); }();
47 /* We don't want to be using malloc() to allocate our internal tracking
48 * data, because that would change the parameters of what is being measured,
49 * so we want to use data types that directly use mmap/VirtualAlloc. */
50 template <typename T
, size_t Len
>
53 MappedArray() : mPtr(nullptr) {
55 MOZ_RELEASE_ASSERT(!((sizeof(T
) * Len
) & (sPageSize
- 1)),
56 "MappedArray size must be a multiple of the page size");
63 VirtualFree(mPtr
, sizeof(T
) * Len
, MEM_RELEASE
);
64 #elif defined(XP_LINUX)
65 munmap(reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(mPtr
) -
67 sizeof(T
) * Len
+ sPageSize
* 2);
69 munmap(mPtr
, sizeof(T
) * Len
);
74 T
& operator[](size_t aIndex
) const {
80 mPtr
= reinterpret_cast<T
*>(VirtualAlloc(
81 nullptr, sizeof(T
) * Len
, MEM_COMMIT
| MEM_RESERVE
, PAGE_READWRITE
));
82 if (mPtr
== nullptr) {
83 die("VirtualAlloc error");
86 size_t data_size
= sizeof(T
) * Len
;
87 size_t size
= data_size
;
90 size
+= sPageSize
* 2;
92 mPtr
= reinterpret_cast<T
*>(mmap(nullptr, size
, PROT_READ
| PROT_WRITE
,
93 MAP_ANON
| MAP_PRIVATE
, -1, 0));
94 if (mPtr
== MAP_FAILED
) {
98 // On Linux we request a page on either side of the allocation and
99 // mprotect them. This prevents mappings in /proc/self/smaps from being
100 // merged and allows us to parse this file to calculate the allocator's RSS.
101 MOZ_ASSERT(0 == mprotect(mPtr
, sPageSize
, 0));
102 MOZ_ASSERT(0 == mprotect(reinterpret_cast<void*>(
103 reinterpret_cast<uintptr_t>(mPtr
) + data_size
+
106 mPtr
= reinterpret_cast<T
*>(reinterpret_cast<uintptr_t>(mPtr
) + sPageSize
);
112 bool ownsMapping(uintptr_t addr
) const { return addr
== (uintptr_t)mPtr
; }
114 bool allocated() const { return !!mPtr
; }
120 /* Type for records of allocations. */
124 // mRequest is only valid if mPtr is non-null. It doesn't need to be cleared
125 // when memory is freed or realloc()ed.
129 /* An almost infinite list of slots.
130 * In essence, this is a linked list of arrays of groups of slots.
131 * Each group is 1MB. On 64-bits, one group allows to store 64k allocations.
132 * Each MemSlotList instance can store 1023 such groups, which means more
133 * than 67M allocations. In case more would be needed, we chain to another
134 * MemSlotList, and so on.
135 * Using 1023 groups makes the MemSlotList itself page sized on 32-bits
136 * and 2 pages-sized on 64-bits.
139 static constexpr size_t kGroups
= 1024 - 1;
140 static constexpr size_t kGroupSize
= (1024 * 1024) / sizeof(MemSlot
);
142 MappedArray
<MemSlot
, kGroupSize
> mSlots
[kGroups
];
143 MappedArray
<MemSlotList
, 1> mNext
;
146 MemSlot
& operator[](size_t aIndex
) const {
147 if (aIndex
< kGroupSize
* kGroups
) {
148 return mSlots
[aIndex
/ kGroupSize
][aIndex
% kGroupSize
];
150 aIndex
-= kGroupSize
* kGroups
;
151 return mNext
[0][aIndex
];
154 // Ask if any of the memory-mapped buffers use this range.
155 bool ownsMapping(uintptr_t aStart
) const {
156 for (const auto& slot
: mSlots
) {
157 if (slot
.allocated() && slot
.ownsMapping(aStart
)) {
161 return mNext
.ownsMapping(aStart
) ||
162 (mNext
.allocated() && mNext
[0].ownsMapping(aStart
));
166 /* Helper class for memory buffers */
169 Buffer() : mBuf(nullptr), mLength(0) {}
171 Buffer(const void* aBuf
, size_t aLength
)
172 : mBuf(reinterpret_cast<const char*>(aBuf
)), mLength(aLength
) {}
174 /* Constructor for string literals. */
175 template <size_t Size
>
176 explicit Buffer(const char (&aStr
)[Size
]) : mBuf(aStr
), mLength(Size
- 1) {}
178 /* Returns a sub-buffer up-to but not including the given aNeedle character.
179 * The "parent" buffer itself is altered to begin after the aNeedle
181 * If the aNeedle character is not found, return the entire buffer, and empty
182 * the "parent" buffer. */
183 Buffer
SplitChar(char aNeedle
) {
184 char* buf
= const_cast<char*>(mBuf
);
185 char* c
= reinterpret_cast<char*>(memchr(buf
, aNeedle
, mLength
));
187 return Split(mLength
);
190 Buffer result
= Split(c
- buf
);
191 // Remove the aNeedle character itself.
196 // Advance to the position after aNeedle. This is like SplitChar but does not
197 // return the skipped portion.
198 void Skip(char aNeedle
, unsigned nTimes
= 1) {
199 for (unsigned i
= 0; i
< nTimes
; i
++) {
204 void SkipWhitespace() {
205 while (mLength
> 0) {
206 if (!isspace(mBuf
[0])) {
214 /* Returns a sub-buffer of at most aLength characters. The "parent" buffer is
215 * amputated of those aLength characters. If the "parent" buffer is smaller
216 * than aLength, then its length is used instead. */
217 Buffer
Split(size_t aLength
) {
218 Buffer
result(mBuf
, std::min(aLength
, mLength
));
219 mLength
-= result
.mLength
;
220 mBuf
+= result
.mLength
;
224 /* Move the buffer (including its content) to the memory address of the aOther
226 void Slide(Buffer aOther
) {
227 memmove(const_cast<char*>(aOther
.mBuf
), mBuf
, mLength
);
231 /* Returns whether the two involved buffers have the same content. */
232 bool operator==(Buffer aOther
) {
233 return mLength
== aOther
.mLength
&&
234 (mBuf
== aOther
.mBuf
|| !strncmp(mBuf
, aOther
.mBuf
, mLength
));
237 bool operator!=(Buffer aOther
) { return !(*this == aOther
); }
239 /* Returns true if the buffer is not empty. */
240 explicit operator bool() { return mLength
; }
242 char operator[](size_t n
) const { return mBuf
[n
]; }
244 /* Returns the memory location of the buffer. */
245 const char* get() { return mBuf
; }
247 /* Returns the memory location of the end of the buffer (technically, the
248 * first byte after the buffer). */
249 const char* GetEnd() { return mBuf
+ mLength
; }
251 /* Extend the buffer over the content of the other buffer, assuming it is
253 void Extend(Buffer aOther
) {
254 MOZ_ASSERT(aOther
.mBuf
== GetEnd());
255 mLength
+= aOther
.mLength
;
258 size_t Length() const { return mLength
; }
265 /* Helper class to read from a file descriptor line by line. */
268 explicit FdReader(int aFd
, bool aNeedClose
= false)
270 mNeedClose(aNeedClose
),
272 mBuf(&mRawBuf
, sizeof(mRawBuf
)) {}
274 FdReader(FdReader
&& aOther
) noexcept
276 mNeedClose(aOther
.mNeedClose
),
278 mBuf(&mRawBuf
, sizeof(mRawBuf
)) {
279 memcpy(mRawBuf
, aOther
.mRawBuf
, sizeof(mRawBuf
));
281 aOther
.mNeedClose
= false;
282 aOther
.mData
= Buffer();
283 aOther
.mBuf
= Buffer();
286 FdReader
& operator=(const FdReader
&) = delete;
287 FdReader(const FdReader
&) = delete;
295 /* Read a line from the file descriptor and returns it as a Buffer instance */
298 Buffer result
= mData
.SplitChar('\n');
300 /* There are essentially three different cases here:
301 * - '\n' was found "early". In this case, the end of the result buffer
302 * is before the beginning of the mData buffer (since SplitChar
304 * - '\n' was found as the last character of mData. In this case, mData
305 * is empty, but still points at the end of mBuf. result points to what
306 * used to be in mData, without the last character.
307 * - '\n' was not found. In this case too, mData is empty and points at
308 * the end of mBuf. But result points to the entire buffer that used to
309 * be pointed by mData.
310 * Only in the latter case do both result and mData's end match, and it's
311 * the only case where we need to refill the buffer.
313 if (result
.GetEnd() != mData
.GetEnd()) {
317 /* Since SplitChar emptied mData, make it point to what it had before. */
320 /* And move it to the beginning of the read buffer. */
332 /* Fill the read buffer. */
334 size_t size
= mBuf
.GetEnd() - mData
.GetEnd();
335 Buffer
remainder(mData
.GetEnd(), size
);
338 while (remainder
&& len
> 0) {
339 len
= ::read(mFd
, const_cast<char*>(remainder
.get()), size
);
344 mData
.Extend(remainder
.Split(len
));
348 /* File descriptor to read from. */
352 /* Part of data that was read from the file descriptor but not returned with
355 /* Buffer representation of mRawBuf */
363 /* Function declarations for all the replace_malloc _impl functions.
364 * See memory/build/replace_malloc.c */
365 #define MALLOC_DECL(name, return_type, ...) \
366 return_type name##_impl(__VA_ARGS__);
367 #define MALLOC_FUNCS MALLOC_FUNCS_MALLOC
368 #include "malloc_decls.h"
370 #define MALLOC_DECL(name, return_type, ...) return_type name(__VA_ARGS__);
371 #define MALLOC_FUNCS MALLOC_FUNCS_JEMALLOC
372 #include "malloc_decls.h"
376 /* mozjemalloc and jemalloc use pthread_atfork, which Android doesn't have.
377 * While gecko has one in libmozglue, the replay program can't use that.
378 * Since we're not going to fork anyways, make it a dummy function. */
379 int pthread_atfork(void (*aPrepare
)(void), void (*aParent
)(void),
380 void (*aChild
)(void)) {
387 template <unsigned Base
= 10>
388 size_t parseNumber(Buffer aBuf
) {
390 die("Malformed input");
394 for (const char *c
= aBuf
.get(), *end
= aBuf
.GetEnd(); c
< end
; c
++) {
396 if ((*c
>= '0' && *c
<= '9')) {
398 } else if (Base
== 16 && *c
>= 'a' && *c
<= 'f') {
399 result
+= *c
- 'a' + 10;
400 } else if (Base
== 16 && *c
>= 'A' && *c
<= 'F') {
401 result
+= *c
- 'A' + 10;
403 die("Malformed input");
409 static size_t percent(size_t a
, size_t b
) {
413 return size_t(round(double(a
) / double(b
) * 100.0));
418 // Default constructor used for array initialisation.
428 Distribution(size_t max_size
, size_t next_smallest
, size_t bucket_size
)
429 : mMaxSize(max_size
),
430 mNextSmallest(next_smallest
),
431 mShift(CeilingLog2(bucket_size
)),
432 mArrayOffset(1 + next_smallest
),
433 mArraySlots((max_size
- next_smallest
) >> mShift
),
438 MOZ_ASSERT(mMaxSize
);
439 MOZ_RELEASE_ASSERT(mArraySlots
<= MAX_NUM_BUCKETS
);
442 Distribution
& operator=(const Distribution
& aOther
) = default;
444 void addRequest(size_t request
) {
445 MOZ_ASSERT(mMaxSize
);
447 mRequests
[(request
- mArrayOffset
) >> mShift
]++;
451 void printDist(intptr_t std_err
) {
452 MOZ_ASSERT(mMaxSize
);
454 // The translation to turn a slot index into a memory request size.
455 const size_t array_offset_add
= (1 << mShift
) + mNextSmallest
;
457 FdPrintf(std_err
, "\n%zu-bin Distribution:\n", mMaxSize
);
458 FdPrintf(std_err
, " request : count percent\n");
459 size_t range_start
= mNextSmallest
+ 1;
460 for (size_t j
= 0; j
< mArraySlots
; j
++) {
461 size_t range_end
= (j
<< mShift
) + array_offset_add
;
462 FdPrintf(std_err
, "%5zu - %5zu: %6zu %6zu%%\n", range_start
, range_end
,
463 mRequests
[j
], percent(mRequests
[j
], mTotalRequests
));
464 range_start
= range_end
+ 1;
468 size_t maxSize() const { return mMaxSize
; }
471 static constexpr size_t MAX_NUM_BUCKETS
= 16;
473 // If size is zero this distribution is uninitialised.
475 size_t mNextSmallest
;
477 // Parameters to convert a size into a slot number.
479 unsigned mArrayOffset
;
481 // The number of slots.
482 unsigned mArraySlots
;
484 size_t mTotalRequests
;
485 size_t mRequests
[MAX_NUM_BUCKETS
];
499 bool IsCandidate() const {
500 // Candidates mappings are:
502 // * they are private (not shared),
503 // * anonymous or "[heap]" (not another area such as stack),
505 // The only mappings we're falsely including are the .bss segments for
507 return mReadable
&& mPrivate
&& mAnon
&& !mIsStack
&& !mIsSpecial
;
511 class SMapsReader
: private FdReader
{
513 explicit SMapsReader(FdReader
&& reader
) : FdReader(std::move(reader
)) {}
516 static Maybe
<SMapsReader
> open() {
517 int fd
= ::open(FILENAME
, O_RDONLY
);
520 return mozilla::Nothing();
523 return Some(SMapsReader(FdReader(fd
, true)));
526 Maybe
<MemoryMap
> readMap(intptr_t aStdErr
) {
527 // This is not very tolerant of format changes because things like
528 // parseNumber will crash if they get a bad value. TODO: make this
531 Buffer line
= ReadLine();
536 // We're going to be at the start of an entry, start tokenising the first
540 Buffer range
= line
.SplitChar(' ');
541 uintptr_t range_start
= parseNumber
<16>(range
.SplitChar('-'));
542 uintptr_t range_end
= parseNumber
<16>(range
);
545 Buffer mode
= line
.SplitChar(' ');
546 if (mode
.Length() != 4) {
547 FdPrintf(aStdErr
, "Couldn't parse SMAPS file\n");
550 bool readable
= mode
[0] == 'r';
551 bool private_
= mode
[3] == 'p';
553 // Offset, device and inode.
554 line
.SkipWhitespace();
555 bool zero_offset
= !parseNumber
<16>(line
.SplitChar(' '));
556 line
.SkipWhitespace();
557 bool no_device
= line
.SplitChar(' ') == Buffer("00:00");
558 line
.SkipWhitespace();
559 bool zero_inode
= !parseNumber(line
.SplitChar(' '));
560 bool is_anon
= zero_offset
&& no_device
&& zero_inode
;
562 // Filename, or empty for anon mappings.
563 line
.SkipWhitespace();
564 Buffer filename
= line
.SplitChar(' ');
568 if (filename
&& filename
[0] == '[') {
569 is_stack
= filename
== Buffer("[stack]");
570 is_special
= filename
== Buffer("[vdso]") ||
571 filename
== Buffer("[vvar]") ||
572 filename
== Buffer("[vsyscall]");
579 while ((line
= ReadLine())) {
580 Buffer field
= line
.SplitChar(':');
581 if (field
== Buffer("VmFlags")) {
582 // This is the last field, at least in the current format. Break this
583 // loop to read the next mapping.
587 if (field
== Buffer("Rss")) {
588 line
.SkipWhitespace();
589 Buffer value
= line
.SplitChar(' ');
590 rss
= parseNumber(value
) * 1024;
594 return Some(MemoryMap({range_start
, range_end
, readable
, private_
, is_anon
,
595 is_stack
, is_special
, rss
}));
598 static constexpr char FILENAME
[] = "/proc/self/smaps";
602 /* Class to handle dispatching the replay function calls to replace-malloc. */
607 // See comment in FdPrintf.h as to why native win32 handles are used.
608 mStdErr
= reinterpret_cast<intptr_t>(GetStdHandle(STD_ERROR_HANDLE
));
610 mStdErr
= fileno(stderr
);
614 void enableSlopCalculation() { mCalculateSlop
= true; }
615 void enableMemset() { mDoMemset
= true; }
617 MemSlot
& operator[](size_t index
) const { return mSlots
[index
]; }
619 void malloc(Buffer
& aArgs
, Buffer
& aResult
) {
620 MemSlot
& aSlot
= SlotForResult(aResult
);
622 size_t size
= parseNumber(aArgs
);
623 aSlot
.mPtr
= ::malloc_impl(size
);
625 aSlot
.mRequest
= size
;
627 if (mCalculateSlop
) {
628 mTotalRequestedSize
+= size
;
629 mTotalAllocatedSize
+= ::malloc_usable_size_impl(aSlot
.mPtr
);
634 void posix_memalign(Buffer
& aArgs
, Buffer
& aResult
) {
635 MemSlot
& aSlot
= SlotForResult(aResult
);
637 size_t alignment
= parseNumber(aArgs
.SplitChar(','));
638 size_t size
= parseNumber(aArgs
);
640 if (::posix_memalign_impl(&ptr
, alignment
, size
) == 0) {
642 aSlot
.mRequest
= size
;
644 if (mCalculateSlop
) {
645 mTotalRequestedSize
+= size
;
646 mTotalAllocatedSize
+= ::malloc_usable_size_impl(aSlot
.mPtr
);
649 aSlot
.mPtr
= nullptr;
653 void aligned_alloc(Buffer
& aArgs
, Buffer
& aResult
) {
654 MemSlot
& aSlot
= SlotForResult(aResult
);
656 size_t alignment
= parseNumber(aArgs
.SplitChar(','));
657 size_t size
= parseNumber(aArgs
);
658 aSlot
.mPtr
= ::aligned_alloc_impl(alignment
, size
);
660 aSlot
.mRequest
= size
;
662 if (mCalculateSlop
) {
663 mTotalRequestedSize
+= size
;
664 mTotalAllocatedSize
+= ::malloc_usable_size_impl(aSlot
.mPtr
);
669 void calloc(Buffer
& aArgs
, Buffer
& aResult
) {
670 MemSlot
& aSlot
= SlotForResult(aResult
);
672 size_t num
= parseNumber(aArgs
.SplitChar(','));
673 size_t size
= parseNumber(aArgs
);
674 aSlot
.mPtr
= ::calloc_impl(num
, size
);
676 aSlot
.mRequest
= num
* size
;
678 if (mCalculateSlop
) {
679 mTotalRequestedSize
+= num
* size
;
680 mTotalAllocatedSize
+= ::malloc_usable_size_impl(aSlot
.mPtr
);
685 void realloc(Buffer
& aArgs
, Buffer
& aResult
) {
686 MemSlot
& aSlot
= SlotForResult(aResult
);
688 Buffer dummy
= aArgs
.SplitChar('#');
690 die("Malformed input");
692 size_t slot_id
= parseNumber(aArgs
.SplitChar(','));
693 size_t size
= parseNumber(aArgs
);
694 MemSlot
& old_slot
= (*this)[slot_id
];
695 void* old_ptr
= old_slot
.mPtr
;
696 old_slot
.mPtr
= nullptr;
697 aSlot
.mPtr
= ::realloc_impl(old_ptr
, size
);
699 aSlot
.mRequest
= size
;
701 if (mCalculateSlop
) {
702 mTotalRequestedSize
+= size
;
703 mTotalAllocatedSize
+= ::malloc_usable_size_impl(aSlot
.mPtr
);
708 void free(Buffer
& aArgs
, Buffer
& aResult
) {
710 die("Malformed input");
713 Buffer dummy
= aArgs
.SplitChar('#');
715 die("Malformed input");
717 size_t slot_id
= parseNumber(aArgs
);
718 MemSlot
& slot
= (*this)[slot_id
];
719 ::free_impl(slot
.mPtr
);
723 void memalign(Buffer
& aArgs
, Buffer
& aResult
) {
724 MemSlot
& aSlot
= SlotForResult(aResult
);
726 size_t alignment
= parseNumber(aArgs
.SplitChar(','));
727 size_t size
= parseNumber(aArgs
);
728 aSlot
.mPtr
= ::memalign_impl(alignment
, size
);
730 aSlot
.mRequest
= size
;
732 if (mCalculateSlop
) {
733 mTotalRequestedSize
+= size
;
734 mTotalAllocatedSize
+= ::malloc_usable_size_impl(aSlot
.mPtr
);
739 void valloc(Buffer
& aArgs
, Buffer
& aResult
) {
740 MemSlot
& aSlot
= SlotForResult(aResult
);
742 size_t size
= parseNumber(aArgs
);
743 aSlot
.mPtr
= ::valloc_impl(size
);
745 aSlot
.mRequest
= size
;
747 if (mCalculateSlop
) {
748 mTotalRequestedSize
+= size
;
749 mTotalAllocatedSize
+= ::malloc_usable_size_impl(aSlot
.mPtr
);
754 void jemalloc_stats(Buffer
& aArgs
, Buffer
& aResult
) {
755 if (aArgs
|| aResult
) {
756 die("Malformed input");
759 jemalloc_stats_t stats
;
760 jemalloc_bin_stats_t bin_stats
[JEMALLOC_MAX_STATS_BINS
];
761 ::jemalloc_stats_internal(&stats
, bin_stats
);
764 size_t rss
= get_rss();
767 size_t num_objects
= 0;
768 size_t num_sloppy_objects
= 0;
769 size_t total_allocated
= 0;
770 size_t total_slop
= 0;
771 size_t large_slop
= 0;
772 size_t large_used
= 0;
773 size_t huge_slop
= 0;
774 size_t huge_used
= 0;
775 size_t bin_slop
[JEMALLOC_MAX_STATS_BINS
] = {0};
777 for (size_t slot_id
= 0; slot_id
< mNumUsedSlots
; slot_id
++) {
778 MemSlot
& slot
= mSlots
[slot_id
];
780 size_t used
= ::malloc_usable_size_impl(slot
.mPtr
);
781 size_t slop
= used
- slot
.mRequest
;
782 total_allocated
+= used
;
786 num_sloppy_objects
++;
789 if (used
<= stats
.page_size
/ 2) {
790 // We know that this is an inefficient linear search, but there's a
791 // small number of bins and this is simple.
792 for (unsigned i
= 0; i
< JEMALLOC_MAX_STATS_BINS
; i
++) {
793 auto& bin
= bin_stats
[i
];
794 if (used
== bin
.size
) {
799 } else if (used
<= stats
.large_max
) {
809 // This formula corresponds to the calculation of wasted (from committed and
810 // the other parameters) within jemalloc_stats()
811 size_t committed
= stats
.allocated
+ stats
.waste
+ stats
.page_cache
+
812 stats
.bookkeeping
+ stats
.bin_unused
;
814 FdPrintf(mStdErr
, "\n");
815 FdPrintf(mStdErr
, "Objects: %9zu\n", num_objects
);
816 FdPrintf(mStdErr
, "Slots: %9zu\n", mNumUsedSlots
);
817 FdPrintf(mStdErr
, "Ops: %9zu\n", mOps
);
818 FdPrintf(mStdErr
, "mapped: %9zu\n", stats
.mapped
);
819 FdPrintf(mStdErr
, "committed: %9zu\n", committed
);
822 FdPrintf(mStdErr
, "rss: %9zu\n", rss
);
825 FdPrintf(mStdErr
, "allocated: %9zu\n", stats
.allocated
);
826 FdPrintf(mStdErr
, "waste: %9zu\n", stats
.waste
);
827 FdPrintf(mStdErr
, "dirty: %9zu\n", stats
.page_cache
);
828 FdPrintf(mStdErr
, "bookkeep: %9zu\n", stats
.bookkeeping
);
829 FdPrintf(mStdErr
, "bin-unused: %9zu\n", stats
.bin_unused
);
830 FdPrintf(mStdErr
, "quantum-max: %9zu\n", stats
.quantum_max
);
831 FdPrintf(mStdErr
, "subpage-max: %9zu\n", stats
.page_size
/ 2);
832 FdPrintf(mStdErr
, "large-max: %9zu\n", stats
.large_max
);
833 if (mCalculateSlop
) {
834 size_t slop
= mTotalAllocatedSize
- mTotalRequestedSize
;
836 "Total slop for all allocations: %zuKiB/%zuKiB (%zu%%)\n",
837 slop
/ 1024, mTotalAllocatedSize
/ 1024,
838 percent(slop
, mTotalAllocatedSize
));
840 FdPrintf(mStdErr
, "Live sloppy objects: %zu/%zu (%zu%%)\n",
841 num_sloppy_objects
, num_objects
,
842 percent(num_sloppy_objects
, num_objects
));
843 FdPrintf(mStdErr
, "Live sloppy bytes: %zuKiB/%zuKiB (%zu%%)\n",
844 total_slop
/ 1024, total_allocated
/ 1024,
845 percent(total_slop
, total_allocated
));
847 FdPrintf(mStdErr
, "\n%8s %11s %10s %8s %9s %9s %8s\n", "bin-size",
848 "unused (c)", "total (c)", "used (c)", "non-full (r)", "total (r)",
850 for (auto& bin
: bin_stats
) {
852 FdPrintf(mStdErr
, "%8zu %8zuKiB %7zuKiB %7zu%% %12zu %9zu %7zu%%\n",
853 bin
.size
, bin
.bytes_unused
/ 1024, bin
.bytes_total
/ 1024,
854 percent(bin
.bytes_total
- bin
.bytes_unused
, bin
.bytes_total
),
855 bin
.num_non_full_runs
, bin
.num_runs
,
856 percent(bin
.num_runs
- bin
.num_non_full_runs
, bin
.num_runs
));
860 FdPrintf(mStdErr
, "\n%5s %8s %9s %7s\n", "bin", "slop", "used", "percent");
861 for (unsigned i
= 0; i
< JEMALLOC_MAX_STATS_BINS
; i
++) {
862 auto& bin
= bin_stats
[i
];
864 size_t used
= bin
.bytes_total
- bin
.bytes_unused
;
865 FdPrintf(mStdErr
, "%5zu %8zu %9zu %6zu%%\n", bin
.size
, bin_slop
[i
],
866 used
, percent(bin_slop
[i
], used
));
869 FdPrintf(mStdErr
, "%5s %8zu %9zu %6zu%%\n", "large", large_slop
, large_used
,
870 percent(large_slop
, large_used
));
871 FdPrintf(mStdErr
, "%5s %8zu %9zu %6zu%%\n", "huge", huge_slop
, huge_used
,
872 percent(huge_slop
, huge_used
));
874 print_distributions(stats
, bin_stats
);
879 * Create and print frequency distributions of memory requests.
881 void print_distributions(
882 jemalloc_stats_t
& stats
,
883 jemalloc_bin_stats_t (&bin_stats
)[JEMALLOC_MAX_STATS_BINS
]) {
884 // We compute distributions for all of the bins for small allocations
885 // (JEMALLOC_MAX_STATS_BINS) plus two more distributions for larger
887 Distribution dists
[JEMALLOC_MAX_STATS_BINS
+ 2];
889 unsigned last_size
= 0;
890 unsigned num_dists
= 0;
891 for (auto& bin
: bin_stats
) {
895 auto& dist
= dists
[num_dists
++];
897 if (bin
.size
<= 16) {
899 dist
= Distribution(bin
.size
, last_size
, 1);
900 } else if (bin
.size
<= stats
.quantum_max
) {
901 // 4 buckets, (4 bytes per bucket with a 16 byte quantum).
902 dist
= Distribution(bin
.size
, last_size
, stats
.quantum
/ 4);
905 dist
= Distribution(bin
.size
, last_size
, (bin
.size
- last_size
) / 16);
907 last_size
= bin
.size
;
911 dists
[num_dists
] = Distribution(stats
.page_size
, last_size
,
912 (stats
.page_size
- last_size
) / 16);
915 // Buckets are 1/4 of the page size (12 buckets).
917 Distribution(stats
.page_size
* 4, stats
.page_size
, stats
.page_size
/ 4);
920 MOZ_RELEASE_ASSERT(num_dists
<= JEMALLOC_MAX_STATS_BINS
+ 2);
922 for (size_t slot_id
= 0; slot_id
< mNumUsedSlots
; slot_id
++) {
923 MemSlot
& slot
= mSlots
[slot_id
];
925 for (size_t i
= 0; i
< num_dists
; i
++) {
926 if (slot
.mRequest
<= dists
[i
].maxSize()) {
927 dists
[i
].addRequest(slot
.mRequest
);
934 for (unsigned i
= 0; i
< num_dists
; i
++) {
935 dists
[i
].printDist(mStdErr
);
945 // On Linux we can determine the RSS of the heap area by examining the
947 mozilla::Maybe
<SMapsReader
> reader
= SMapsReader::open();
949 mGetRSSFailed
= true;
954 while (Maybe
<MemoryMap
> map
= reader
->readMap(mStdErr
)) {
955 if (map
->IsCandidate() && !mSlots
.ownsMapping(map
->mStart
) &&
956 !InitialMapsContains(map
->mStart
)) {
964 bool InitialMapsContains(uintptr_t aRangeStart
) {
965 for (unsigned i
= 0; i
< mNumInitialMaps
; i
++) {
966 MOZ_ASSERT(i
< MAX_INITIAL_MAPS
);
968 if (mInitialMaps
[i
] == aRangeStart
) {
976 void BuildInitialMapInfo() {
981 Maybe
<SMapsReader
> reader
= SMapsReader::open();
983 mGetRSSFailed
= true;
987 while (Maybe
<MemoryMap
> map
= reader
->readMap(mStdErr
)) {
988 if (map
->IsCandidate()) {
989 if (mNumInitialMaps
>= MAX_INITIAL_MAPS
) {
990 FdPrintf(mStdErr
, "Too many initial mappings, can't compute RSS\n");
991 mGetRSSFailed
= false;
995 mInitialMaps
[mNumInitialMaps
++] = map
->mStart
;
1002 MemSlot
& SlotForResult(Buffer
& aResult
) {
1003 /* Parse result value and get the corresponding slot. */
1004 Buffer dummy
= aResult
.SplitChar('=');
1005 Buffer dummy2
= aResult
.SplitChar('#');
1006 if (dummy
|| dummy2
) {
1007 die("Malformed input");
1010 size_t slot_id
= parseNumber(aResult
);
1011 mNumUsedSlots
= std::max(mNumUsedSlots
, slot_id
+ 1);
1013 return mSlots
[slot_id
];
1016 void MaybeCommit(MemSlot
& aSlot
) {
1018 // Write any byte, 0x55 isn't significant.
1019 memset(aSlot
.mPtr
, 0x55, aSlot
.mRequest
);
1026 // The number of slots that have been used. It is used to iterate over slots
1027 // without accessing those we haven't initialised.
1028 size_t mNumUsedSlots
= 0;
1031 size_t mTotalRequestedSize
= 0;
1032 size_t mTotalAllocatedSize
= 0;
1033 // Whether to calculate slop for all allocations over the runtime of a
1035 bool mCalculateSlop
= false;
1036 bool mDoMemset
= false;
1039 // If we have a failure reading smaps info then this is used to disable that
1041 bool mGetRSSFailed
= false;
1043 // The initial memory mappings are recorded here at start up. We exclude
1044 // memory in these mappings when computing RSS. We assume they do not grow
1045 // and that no regions are allocated near them, this is true because they'll
1046 // only record the .bss and .data segments from our binary and shared objects
1047 // or regions that logalloc-replay has created for MappedArrays.
1049 // 64 should be enough for anybody.
1050 static constexpr unsigned MAX_INITIAL_MAPS
= 64;
1051 uintptr_t mInitialMaps
[MAX_INITIAL_MAPS
];
1052 unsigned mNumInitialMaps
= 0;
1056 int main(int argc
, const char* argv
[]) {
1057 size_t first_pid
= 0;
1061 for (int i
= 1; i
< argc
; i
++) {
1062 const char* option
= argv
[i
];
1063 if (strcmp(option
, "-s") == 0) {
1064 // Do accounting to calculate allocation slop.
1065 replay
.enableSlopCalculation();
1066 } else if (strcmp(option
, "-c") == 0) {
1067 // Touch memory as we allocate it.
1068 replay
.enableMemset();
1070 fprintf(stderr
, "Unknown command line option: %s\n", option
);
1071 return EXIT_FAILURE
;
1076 replay
.BuildInitialMapInfo();
1079 /* Read log from stdin and dispatch function calls to the Replay instance.
1080 * The log format is essentially:
1081 * <pid> <tid> <function>([<args>])[=<result>]
1082 * <args> is a comma separated list of arguments.
1084 * The logs are expected to be preprocessed so that allocations are
1085 * attributed a tracking slot. The input is trusted not to have crazy
1086 * values for these slot numbers.
1088 * <result>, as well as some of the args to some of the function calls are
1089 * such slot numbers.
1092 Buffer line
= reader
.ReadLine();
1098 size_t pid
= parseNumber(line
.SplitChar(' '));
1103 /* The log may contain data for several processes, only entries for the
1104 * very first that appears are treated. */
1105 if (first_pid
!= pid
) {
1109 /* The log contains thread ids for manual analysis, but we just ignore them
1111 parseNumber(line
.SplitChar(' '));
1113 Buffer func
= line
.SplitChar('(');
1114 Buffer args
= line
.SplitChar(')');
1116 if (func
== Buffer("jemalloc_stats")) {
1117 replay
.jemalloc_stats(args
, line
);
1118 } else if (func
== Buffer("free")) {
1119 replay
.free(args
, line
);
1120 } else if (func
== Buffer("malloc")) {
1121 replay
.malloc(args
, line
);
1122 } else if (func
== Buffer("posix_memalign")) {
1123 replay
.posix_memalign(args
, line
);
1124 } else if (func
== Buffer("aligned_alloc")) {
1125 replay
.aligned_alloc(args
, line
);
1126 } else if (func
== Buffer("calloc")) {
1127 replay
.calloc(args
, line
);
1128 } else if (func
== Buffer("realloc")) {
1129 replay
.realloc(args
, line
);
1130 } else if (func
== Buffer("memalign")) {
1131 replay
.memalign(args
, line
);
1132 } else if (func
== Buffer("valloc")) {
1133 replay
.valloc(args
, line
);
1135 die("Malformed input");