2 +----------------------------------------------------------------------+
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
17 #include "hphp/util/hardware-counter.h"
19 #ifndef NO_HARDWARE_COUNTERS
21 #include <folly/ScopeGuard.h>
23 #include "hphp/util/alloc.h"
24 #include "hphp/util/logger.h"
25 #include "hphp/util/service-data.h"
26 #include "hphp/util/struct-log.h"
27 #include "hphp/util/timer.h"
36 #include <sys/ioctl.h>
37 #include <asm/unistd.h>
38 #include <sys/prctl.h>
39 #include <linux/perf_event.h>
41 #include <folly/String.h>
42 #include <folly/Memory.h>
43 #include <folly/portability/SysMman.h>
44 #include <folly/portability/Unistd.h>
47 ///////////////////////////////////////////////////////////////////////////////
49 THREAD_LOCAL_NO_CHECK(HardwareCounter
, HardwareCounter::s_counter
);
51 static bool s_recordSubprocessTimes
= false;
52 static bool s_excludeKernel
= false;
53 static bool s_profileHWEnable
;
54 static bool s_fastReads
= false;
55 static int s_exportInterval
= -1;
56 static std::string s_profileHWEvents
;
58 static inline bool useCounters() {
62 return s_profileHWEnable
;
67 * Turning this on helps with the resolution of multiplexed counters
68 * (provided cap_user_time is true in the
69 * perf_event_mmap_page). However, experiments show that periodically,
70 * time_offset and the result of rdtsc "jump" (this is probably when
71 * the thread migrates from one cpu to another); when they do, they
72 * jump by appropriate amounts so that enabled and runtime progress
73 * monotonically (and by sensible values) - but they don't seem to
74 * jump atomically, so there can be one sample where only one has
75 * jumped. This can cause a temporary blip in enabled and or runtime.
77 * I'm adding this so we can choose to *not* use rdtsc, and avoid the
80 * It turns out that doing so does degrade the accuracy when there's a
81 * lot of multiplexing going on, and a bit more experimentation shows
82 * that the blip is only really a problem if we record it in the
83 * baseline during a reset (since that then affects every read until
84 * the next reset), so for now, turn it on but don't use it for
87 static constexpr auto use_cap_time
= true;
89 #if defined(__x86_64__)
90 #define barrier() __asm__ volatile("" ::: "memory")
91 #elif defined(__aarch64__)
92 #define barrier() asm volatile("dmb ish" : : : "memory")
93 #define isb() asm volatile("isb" : : : "memory")
98 static uint64_t rdtsc() {
99 #if defined(__x86_64__)
101 asm volatile ( "rdtsc\n\t" // Returns the time in EDX:EAX.
102 "shl $32, %%rdx\n\t" // Shift the upper bits left.
103 "or %%rdx, %0" // 'Or' in the lower bits.
109 always_assert(false);
112 static uint64_t rdpmc(uint32_t counter
) {
113 #if defined(__x86_64__)
116 __asm__
volatile("rdpmc" : "=a" (low
), "=d" (high
) : "c" (counter
));
117 return low
| ((uint64_t)high
<< 32);
118 #elif defined(__aarch64__)
120 if (counter
== PERF_COUNT_HW_CPU_CYCLES
)
121 asm volatile("mrs %0, pmccntr_el0" : "=r" (ret
));
123 asm volatile("msr pmselr_el0, %0" : : "r" ((uint64_t)(counter
-1)));
124 asm volatile("mrs %0, pmxevcntr_el0" : "=r" (ret
));
130 always_assert(false);
133 static ServiceData::ExportedTimeSeries
*
134 createTimeSeries(const std::string
& name
) {
135 assertx(!name
.empty());
137 if (s_exportInterval
== -1) {
138 // We're initializing counters for the main thread in a server process,
139 // which won't be running requests and shouldn't have any time series. Or
140 // someone manually disabled time series exporting in the config. Either
141 // way, bail out early.
145 static const std::vector
<ServiceData::StatsType
> exportTypes
{
146 ServiceData::StatsType::AVG
,
147 ServiceData::StatsType::SUM
,
150 return ServiceData::createTimeSeries(
153 {std::chrono::seconds(s_exportInterval
)}
157 struct HardwareCounterImpl
{
158 HardwareCounterImpl(int type
, unsigned long config
, const char* desc
)
159 : m_desc(desc
? desc
: "")
160 , m_timeSeries(createTimeSeries(m_desc
))
161 , m_timeSeriesNonPsp(createTimeSeries(m_desc
+ "-nonpsp")) {
163 pe
.size
= sizeof (struct perf_event_attr
);
165 pe
.inherit
= s_recordSubprocessTimes
;
168 pe
.exclude_kernel
= s_excludeKernel
;
171 PERF_FORMAT_TOTAL_TIME_ENABLED
|PERF_FORMAT_TOTAL_TIME_RUNNING
;
174 ~HardwareCounterImpl() {
178 void updateServiceData(StructuredLogEntry
* entry
, bool includingPsp
) {
179 auto const value
= read();
180 auto timeSeries
= includingPsp
? m_timeSeries
: m_timeSeriesNonPsp
;
183 if (entry
) entry
->setInt(m_desc
, value
);
184 if (timeSeries
) timeSeries
->addValue(value
);
190 * perf_event_open(struct perf_event_attr *hw_event_uptr, pid_t pid,
191 * int cpu, int group_fd, unsigned long flags)
195 m_fd
= syscall(__NR_perf_event_open
, &pe
, 0, -1, -1, 0);
197 Logger::FWarning("HardwareCounter: perf_event_open failed with: {}",
198 folly::errnoStr(errno
));
203 fcntl(m_fd
, F_SETFD
, O_CLOEXEC
);
205 if (ioctl(m_fd
, PERF_EVENT_IOC_ENABLE
, 0) < 0) {
206 Logger::FWarning("perf_event failed to enable: {}",
207 folly::errnoStr(errno
));
213 if (!s_fastReads
) return;
215 auto const base
= mmap(nullptr, s_pageSize
, PROT_READ
| PROT_WRITE
,
216 MAP_SHARED
, m_fd
, 0);
217 if (base
== MAP_FAILED
) {
218 Logger::FWarning("HardwareCounter: failed to mmap perf_event: {}",
219 folly::errnoStr(errno
));
221 m_meta
= static_cast<perf_event_mmap_page
*>(base
);
222 if (!m_meta
->cap_user_rdpmc
||
223 (use_cap_time
&& !m_meta
->cap_user_time
)) {
224 munmap(m_meta
, s_pageSize
);
227 ioctl(m_fd
, PERF_EVENT_IOC_RESET
, 0);
235 if (auto const width
= readRaw(values
)) {
236 values
[0] -= reset_values
[0];
237 values
[1] -= reset_values
[1];
238 values
[2] -= reset_values
[2];
240 auto const mask
= (1uLL << width
) - 1;
242 if (values
[0] > (mask
>> 1)) return extra
;
243 } else if (values
[0] > std::numeric_limits
<int64_t>::max()) {
246 if (values
[1] == values
[2]) {
247 return values
[0] + extra
;
252 int64_t value
= (double)values
[0] * values
[1] / values
[2];
253 return value
+ extra
;
258 void incCount(int64_t amount
) {
263 * read current value, enabled time, and running time for the
266 * returns the width of the counter in bits, or zero on failure.
268 uint32_t readRaw(uint64_t* values
, bool forReset
= false) {
269 if (m_err
|| !useCounters()) return 0;
272 // try to read the values in user space
274 uint32_t seq
, time_mult
, time_shift
, idx
, width
;
275 uint64_t cyc
, time_offset
;
276 uint64_t count
, enabled
, running
;
281 enabled
= m_meta
->time_enabled
;
282 running
= m_meta
->time_running
;
284 if (use_cap_time
&& !forReset
) {
285 assertx(m_meta
->cap_user_time
);
288 time_offset
= m_meta
->time_offset
;
289 time_mult
= m_meta
->time_mult
;
290 time_shift
= m_meta
->time_shift
;
294 count
= m_meta
->offset
;
295 width
= m_meta
->pmc_width
;
297 assertx(m_meta
->cap_user_rdpmc
);
299 count
+= rdpmc(idx
- 1);
303 } while (m_meta
->lock
!= seq
);
307 if (!idx
&& !count
) {
308 // enabled and running don't get meaningful values until
309 // the first time the counter is enabled. This only really
310 // matters if this call is being used to initialize the
311 // reset_values, because we'll get garbage values for the
313 enabled
= running
= 0;
318 if (use_cap_time
&& !forReset
) {
319 auto const quot
= (cyc
>> time_shift
);
320 auto const rem
= cyc
& (((uint64_t)1 << time_shift
) - 1);
321 auto const delta
= time_offset
+ quot
* time_mult
+
322 ((rem
* time_mult
) >> time_shift
);
325 if (idx
) running
+= delta
;
335 if (m_fd
<= 0) return 0;
337 * read the count + scaling values
339 * It is not necessary to stop an event to read its value
341 auto ret
= ::read(m_fd
, values
, sizeof(*values
) * 3);
342 return ret
== sizeof(*values
) * 3 ? 64 : 0;
346 if (m_err
|| !useCounters()) return;
350 if (!m_meta
&& ioctl(m_fd
, PERF_EVENT_IOC_RESET
, 0) < 0) {
351 Logger::FWarning("perf_event failed to reset with: {}",
352 folly::errnoStr(errno
));
356 if (!readRaw(reset_values
, true)) {
357 Logger::FWarning("perf_event failed to reset with: {}",
358 folly::errnoStr(errno
));
371 bool ever_active
{false};
372 ServiceData::ExportedTimeSeries
* m_timeSeries
;
373 ServiceData::ExportedTimeSeries
* m_timeSeriesNonPsp
;
374 struct perf_event_attr pe
{};
375 uint64_t reset_values
[3];
377 perf_event_mmap_page
* m_meta
{};
384 munmap(m_meta
, s_pageSize
);
391 HardwareCounter::HardwareCounter()
392 : m_countersSet(false) {
393 m_instructionCounter
= std::make_unique
<HardwareCounterImpl
>(
394 PERF_TYPE_HARDWARE
, PERF_COUNT_HW_INSTRUCTIONS
, "instructions"
396 if (s_profileHWEvents
.empty()) {
397 m_loadCounter
= std::make_unique
<HardwareCounterImpl
>(
399 PERF_COUNT_HW_CACHE_L1D
| ((PERF_COUNT_HW_CACHE_OP_READ
) << 8),
402 m_storeCounter
= std::make_unique
<HardwareCounterImpl
>(
404 PERF_COUNT_HW_CACHE_L1D
| ((PERF_COUNT_HW_CACHE_OP_WRITE
) << 8),
408 m_countersSet
= true;
409 setPerfEvents(s_profileHWEvents
);
413 HardwareCounter::~HardwareCounter() {
416 void HardwareCounter::RecordSubprocessTimes() {
417 s_recordSubprocessTimes
= true;
420 void HardwareCounter::ExcludeKernel() {
421 s_excludeKernel
= true;
424 void HardwareCounter::Init(bool enable
, const std::string
& events
,
428 int exportInterval
) {
429 s_profileHWEnable
= enable
;
430 s_profileHWEvents
= events
;
431 s_recordSubprocessTimes
= subProc
;
432 s_excludeKernel
= excludeKernel
;
433 s_fastReads
= fastReads
,
434 s_exportInterval
= exportInterval
;
437 void HardwareCounter::Reset() {
441 void HardwareCounter::reset() {
442 m_instructionCounter
->reset();
443 if (!m_countersSet
) {
444 m_storeCounter
->reset();
445 m_loadCounter
->reset();
447 for (unsigned i
= 0; i
< m_counters
.size(); i
++) {
448 m_counters
[i
]->reset();
452 int64_t HardwareCounter::GetInstructionCount() {
453 return s_counter
->getInstructionCount();
456 int64_t HardwareCounter::getInstructionCount() {
457 return m_instructionCounter
->read();
460 int64_t HardwareCounter::GetLoadCount() {
461 return s_counter
->getLoadCount();
464 int64_t HardwareCounter::getLoadCount() {
465 return m_loadCounter
? m_loadCounter
->read() : 0;
468 int64_t HardwareCounter::GetStoreCount() {
469 return s_counter
->getStoreCount();
472 int64_t HardwareCounter::getStoreCount() {
473 return m_storeCounter
? m_storeCounter
->read() : 0;
476 void HardwareCounter::IncInstructionCount(int64_t amount
) {
477 s_counter
->m_instructionCounter
->incCount(amount
);
480 void HardwareCounter::IncLoadCount(int64_t amount
) {
481 if (!s_counter
->m_countersSet
) {
482 s_counter
->m_loadCounter
->incCount(amount
);
486 void HardwareCounter::IncStoreCount(int64_t amount
) {
487 if (!s_counter
->m_countersSet
) {
488 s_counter
->m_storeCounter
->incCount(amount
);
492 struct PerfTable perfTable
[] = {
493 /* PERF_TYPE_HARDWARE events */
494 #define PC(n) PERF_TYPE_HARDWARE, PERF_COUNT_HW_ ## n
495 { "cpu-cycles", PC(CPU_CYCLES
) },
496 { "cycles", PC(CPU_CYCLES
) },
497 { "instructions", PC(INSTRUCTIONS
) },
498 { "cache-references", PC(CACHE_REFERENCES
) },
499 { "cache-misses", PC(CACHE_MISSES
) },
500 { "branch-instructions", PC(BRANCH_INSTRUCTIONS
) },
501 { "branches", PC(BRANCH_INSTRUCTIONS
) },
502 { "branch-misses", PC(BRANCH_MISSES
) },
503 { "bus-cycles", PC(BUS_CYCLES
) },
504 { "stalled-cycles-frontend", PC(STALLED_CYCLES_FRONTEND
) },
505 { "stalled-cycles-backend", PC(STALLED_CYCLES_BACKEND
) },
507 /* PERF_TYPE_HW_CACHE hw_cache_id */
508 #define PCC(n) PERF_TYPE_HW_CACHE, PERF_COUNT_HW_CACHE_ ## n
509 { "L1-dcache-", PCC(L1D
) },
510 { "L1-icache-", PCC(L1I
) },
512 { "dTLB-", PCC(DTLB
) },
513 { "iTLB-", PCC(ITLB
) },
514 { "branch-", PCC(BPU
) },
516 /* PERF_TYPE_HW_CACHE hw_cache_op, hw_cache_result */
517 #define PCCO(n, m) PERF_TYPE_HW_CACHE, \
518 ((PERF_COUNT_HW_CACHE_OP_ ## n) << 8 | \
519 (PERF_COUNT_HW_CACHE_RESULT_ ## m) << 16)
520 { "loads", PCCO(READ
, ACCESS
) },
521 { "load-misses", PCCO(READ
, MISS
) },
522 { "stores", PCCO(WRITE
, ACCESS
) },
523 { "store-misses", PCCO(WRITE
, MISS
) },
524 { "prefetches", PCCO(PREFETCH
, ACCESS
) },
525 { "prefetch-misses", PCCO(PREFETCH
, MISS
) }
528 static int findEvent(const char *event
, struct PerfTable
*t
,
529 int len
, int *match_len
) {
532 for (i
= 0; i
< len
; i
++) {
533 if (!strncmp(event
, t
[i
].name
, strlen(t
[i
].name
))) {
534 *match_len
= strlen(t
[i
].name
);
541 #define CPUID_STEPPING(x) ((x) & 0xf)
542 #define CPUID_MODEL(x) (((x) & 0xf0) >> 4)
543 #define CPUID_FAMILY(x) (((x) & 0xf00) >> 8)
544 #define CPUID_TYPE(x) (((x) & 0x3000) >> 12)
546 // hack to get LLC counters on perflab frc machines
547 static bool isIntelE5_2670() {
550 asm volatile ("cpuid" : "=a"(x
): "a"(1) : "ebx", "ecx", "edx");
551 return CPUID_STEPPING(x
) == 6 && CPUID_MODEL(x
) == 0xd
552 && CPUID_FAMILY(x
) == 6 && CPUID_TYPE(x
) == 0;
558 static void checkLLCHack(const char* event
, uint32_t& type
, uint64_t& config
) {
559 if (!strncmp(event
, "LLC-load", 8) && isIntelE5_2670()) {
560 type
= PERF_TYPE_RAW
;
561 if (!strncmp(&event
[4], "loads", 5)) {
563 } else if (!strncmp(&event
[4], "load-misses", 11)) {
569 bool HardwareCounter::addPerfEvent(const char* event
) {
574 const char* ev
= event
;
576 while ((i
= findEvent(ev
, perfTable
,
577 sizeof(perfTable
)/sizeof(struct PerfTable
),
582 type
= perfTable
[i
].type
;
583 } else if (type
!= perfTable
[i
].type
) {
584 Logger::FWarning("failed to find perf event: {}", event
);
587 config
|= perfTable
[i
].config
;
591 checkLLCHack(event
, type
, config
);
593 // Check if we have a raw spec.
594 if (!found
&& event
[0] == 'r' && event
[1] != 0) {
595 config
= strtoull(event
+ 1, const_cast<char**>(&ev
), 16);
598 type
= PERF_TYPE_RAW
;
603 Logger::FWarning("failed to find perf event: {}", event
);
606 auto hwc
= std::make_unique
<HardwareCounterImpl
>(type
, config
, event
);
608 Logger::FWarning("failed to set perf event: {}", event
);
611 m_counters
.emplace_back(std::move(hwc
));
612 if (!m_countersSet
) {
613 // reset load and store counters. This is because
614 // perf does not seem to handle more than three counters
616 m_loadCounter
.reset();
617 m_storeCounter
.reset();
618 m_countersSet
= true;
623 bool HardwareCounter::eventExists(const char *event
) {
624 // hopefully m_counters set is small, so a linear scan does not hurt
625 for(unsigned i
= 0; i
< m_counters
.size(); i
++) {
626 if (!strcmp(event
, m_counters
[i
]->m_desc
.c_str())) {
633 bool HardwareCounter::setPerfEvents(folly::StringPiece sevents
) {
634 // Make a copy of the string for use with strtok.
635 auto const sevents_buf
= static_cast<char*>(malloc(sevents
.size() + 1));
636 SCOPE_EXIT
{ free(sevents_buf
); };
637 memcpy(sevents_buf
, sevents
.data(), sevents
.size());
638 sevents_buf
[sevents
.size()] = '\0';
640 char* strtok_buf
= nullptr;
641 char* s
= strtok_r(sevents_buf
, ",", &strtok_buf
);
643 if (!eventExists(s
) && !addPerfEvent(s
)) {
646 s
= strtok_r(nullptr, ",", &strtok_buf
);
651 bool HardwareCounter::SetPerfEvents(folly::StringPiece events
) {
652 return s_counter
->setPerfEvents(events
);
655 void HardwareCounter::clearPerfEvents() {
659 void HardwareCounter::ClearPerfEvents() {
660 s_counter
->clearPerfEvents();
663 void HardwareCounter::updateServiceData(StructuredLogEntry
* entry
,
665 forEachCounter([entry
,includingPsp
](HardwareCounterImpl
& counter
) {
666 counter
.updateServiceData(entry
, includingPsp
);
670 void HardwareCounter::UpdateServiceData(const timespec
& cpu_begin
,
671 const timespec
& wall_begin
,
672 StructuredLogEntry
* entry
,
674 // The begin timespec should be what was recorded at the beginning of the
675 // request, so we subtract that out from the current measurement. The
676 // perf-based counters owned by this file are reset to 0 at the same time as
677 // the begin timespec is recorded, so there's no subtraction needed for
679 struct timespec cpu_now
;
680 gettime(CLOCK_THREAD_CPUTIME_ID
, &cpu_now
);
682 s_counter
->updateServiceData(entry
, includingPsp
);
684 static auto cpuTimeSeries
= createTimeSeries("cpu-time-us");
685 static auto cpuTimeNonPspSeries
= createTimeSeries("cpu-time-us-nonpsp");
686 auto cpu_series
= includingPsp
? cpuTimeSeries
: cpuTimeNonPspSeries
;
687 auto const cpuTimeUs
= gettime_diff_us(cpu_begin
, cpu_now
);
689 if (entry
) entry
->setInt("cpu-time-us", cpuTimeUs
);
690 if (cpu_series
) cpu_series
->addValue(cpuTimeUs
);
693 struct timespec wall_now
;
694 Timer::GetMonotonicTime(wall_now
);
695 static auto wallTimeSeries
= createTimeSeries("wall-time-us");
696 static auto wallTimeNonPspSeries
= createTimeSeries("wall-time-us-nonpsp");
697 auto wall_series
= includingPsp
? wallTimeSeries
: wallTimeNonPspSeries
;
698 auto const wallTimeUs
= gettime_diff_us(wall_begin
, wall_now
);
699 if (wallTimeUs
> 0) {
700 if (entry
) entry
->setInt("wall-time-us", wallTimeUs
);
701 if (wall_series
) wall_series
->addValue(wallTimeUs
);
704 if (entry
) entry
->setInt("includingPsp", includingPsp
);
707 void HardwareCounter::getPerfEvents(PerfEventCallback f
, void* data
) {
708 forEachCounter([f
, data
](HardwareCounterImpl
& counter
) {
709 f(counter
.m_desc
, counter
.read(), data
);
714 void HardwareCounter::forEachCounter(F func
) {
715 func(*m_instructionCounter
);
716 if (!m_countersSet
) {
717 func(*m_loadCounter
);
718 func(*m_storeCounter
);
720 for (auto& counter
: m_counters
) func(*counter
);
723 void HardwareCounter::GetPerfEvents(PerfEventCallback f
, void* data
) {
724 s_counter
->getPerfEvents(f
, data
);
727 ///////////////////////////////////////////////////////////////////////////////
731 #else // NO_HARDWARE_COUNTERS
734 ///////////////////////////////////////////////////////////////////////////////
736 HardwareCounter
HardwareCounter::s_counter
;
738 ///////////////////////////////////////////////////////////////////////////////
741 #endif // NO_HARDWARE_COUNTERS