2 +----------------------------------------------------------------------+
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
17 #include "hphp/util/hardware-counter.h"
19 #ifndef NO_HARDWARE_COUNTERS
21 #include <folly/ScopeGuard.h>
23 #include "hphp/util/logger.h"
32 #include <sys/ioctl.h>
33 #include <asm/unistd.h>
34 #include <sys/prctl.h>
35 #include <linux/perf_event.h>
37 #include <folly/String.h>
38 #include <folly/Memory.h>
39 #include <folly/portability/SysMman.h>
40 #include <folly/portability/Unistd.h>
43 ///////////////////////////////////////////////////////////////////////////////
45 IMPLEMENT_THREAD_LOCAL_NO_CHECK(HardwareCounter
,
46 HardwareCounter::s_counter
);
48 static bool s_recordSubprocessTimes
= false;
49 static bool s_profileHWEnable
;
50 static std::string s_profileHWEvents
;
52 static inline bool useCounters() {
56 return s_profileHWEnable
;
60 struct HardwareCounterImpl
{
61 HardwareCounterImpl(int type
, unsigned long config
,
62 const char* desc
= nullptr)
63 : m_desc(desc
? desc
: ""), m_err(0), m_fd(-1), inited(false) {
64 memset (&pe
, 0, sizeof (struct perf_event_attr
));
66 pe
.size
= sizeof (struct perf_event_attr
);
68 pe
.inherit
= s_recordSubprocessTimes
;
71 pe
.exclude_kernel
= 0;
74 PERF_FORMAT_TOTAL_TIME_ENABLED
|PERF_FORMAT_TOTAL_TIME_RUNNING
;
77 ~HardwareCounterImpl() {
83 * perf_event_open(struct perf_event_attr *hw_event_uptr, pid_t pid,
84 * int cpu, int group_fd, unsigned long flags)
88 m_fd
= syscall(__NR_perf_event_open
, &pe
, 0, -1, -1, 0);
90 Logger::Warning("perf_event_open failed with: %s",
91 folly::errnoStr(errno
).c_str());
96 fcntl(m_fd
, F_SETFD
, O_CLOEXEC
);
98 if (ioctl(m_fd
, PERF_EVENT_IOC_ENABLE
, 0) < 0) {
99 Logger::Warning("perf_event failed to enable: %s",
100 folly::errnoStr(errno
).c_str());
110 if (readRaw(values
)) {
111 if (!values
[2]) return 0;
112 int64_t value
= (double)values
[0] * values
[1] / values
[2];
113 return value
+ extra
;
118 void incCount(int64_t amount
) {
122 bool readRaw(uint64_t* values
) {
123 if (m_err
|| !useCounters()) return false;
128 * read the count + scaling values
130 * It is not necessary to stop an event to read its value
132 auto ret
= ::read(m_fd
, values
, sizeof(*values
) * 3);
133 if (ret
== sizeof(*values
) * 3) {
134 values
[0] -= reset_values
[0];
135 values
[1] -= reset_values
[1];
136 if (values
[2] > reset_values
[2]) {
137 values
[2] -= reset_values
[2];
148 if (m_err
|| !useCounters()) return;
152 if (ioctl (m_fd
, PERF_EVENT_IOC_RESET
, 0) < 0) {
153 Logger::Warning("perf_event failed to reset with: %s",
154 folly::errnoStr(errno
).c_str());
158 auto ret
= ::read(m_fd
, reset_values
, sizeof(reset_values
));
159 if (ret
!= sizeof(reset_values
)) {
160 Logger::Warning("perf_event failed to reset with: %s",
161 folly::errnoStr(errno
).c_str());
173 struct perf_event_attr pe
;
175 uint64_t reset_values
[3];
186 struct InstructionCounter
: HardwareCounterImpl
{
187 InstructionCounter() :
188 HardwareCounterImpl(PERF_TYPE_HARDWARE
, PERF_COUNT_HW_INSTRUCTIONS
) {}
191 struct LoadCounter
: HardwareCounterImpl
{
193 HardwareCounterImpl(PERF_TYPE_HW_CACHE
,
194 (PERF_COUNT_HW_CACHE_L1D
| ((PERF_COUNT_HW_CACHE_OP_READ
) << 8))) {}
197 struct StoreCounter
: HardwareCounterImpl
{
199 HardwareCounterImpl(PERF_TYPE_HW_CACHE
,
200 PERF_COUNT_HW_CACHE_L1D
| ((PERF_COUNT_HW_CACHE_OP_WRITE
) << 8)) {}
203 HardwareCounter::HardwareCounter()
204 : m_countersSet(false) {
205 m_instructionCounter
.reset(new InstructionCounter());
206 if (s_profileHWEvents
.empty()) {
207 m_loadCounter
.reset(new LoadCounter());
208 m_storeCounter
.reset(new StoreCounter());
210 m_countersSet
= true;
211 setPerfEvents(s_profileHWEvents
);
215 HardwareCounter::~HardwareCounter() {
218 void HardwareCounter::RecordSubprocessTimes() {
219 s_recordSubprocessTimes
= true;
222 void HardwareCounter::Init(bool enable
, const std::string
& events
,
224 s_profileHWEnable
= enable
;
225 s_profileHWEvents
= events
;
226 s_recordSubprocessTimes
= subProc
;
229 void HardwareCounter::Reset() {
233 void HardwareCounter::reset() {
234 m_instructionCounter
->reset();
235 if (!m_countersSet
) {
236 m_storeCounter
->reset();
237 m_loadCounter
->reset();
239 for (unsigned i
= 0; i
< m_counters
.size(); i
++) {
240 m_counters
[i
]->reset();
244 int64_t HardwareCounter::GetInstructionCount() {
245 return s_counter
->getInstructionCount();
248 int64_t HardwareCounter::getInstructionCount() {
249 return m_instructionCounter
->read();
252 int64_t HardwareCounter::GetLoadCount() {
253 return s_counter
->getLoadCount();
256 int64_t HardwareCounter::getLoadCount() {
257 return m_loadCounter
->read();
260 int64_t HardwareCounter::GetStoreCount() {
261 return s_counter
->getStoreCount();
264 int64_t HardwareCounter::getStoreCount() {
265 return m_storeCounter
->read();
268 void HardwareCounter::IncInstructionCount(int64_t amount
) {
269 s_counter
->m_instructionCounter
->incCount(amount
);
272 void HardwareCounter::IncLoadCount(int64_t amount
) {
273 if (!s_counter
->m_countersSet
) {
274 s_counter
->m_loadCounter
->incCount(amount
);
278 void HardwareCounter::IncStoreCount(int64_t amount
) {
279 if (!s_counter
->m_countersSet
) {
280 s_counter
->m_storeCounter
->incCount(amount
);
284 struct PerfTable perfTable
[] = {
285 /* PERF_TYPE_HARDWARE events */
286 #define PC(n) PERF_TYPE_HARDWARE, PERF_COUNT_HW_ ## n
287 { "cpu-cycles", PC(CPU_CYCLES
) },
288 { "cycles", PC(CPU_CYCLES
) },
289 { "instructions", PC(INSTRUCTIONS
) },
290 { "cache-references", PC(CACHE_REFERENCES
) },
291 { "cache-misses", PC(CACHE_MISSES
) },
292 { "branch-instructions", PC(BRANCH_INSTRUCTIONS
) },
293 { "branches", PC(BRANCH_INSTRUCTIONS
) },
294 { "branch-misses", PC(BRANCH_MISSES
) },
295 { "bus-cycles", PC(BUS_CYCLES
) },
296 { "stalled-cycles-frontend", PC(STALLED_CYCLES_FRONTEND
) },
297 { "stalled-cycles-backend", PC(STALLED_CYCLES_BACKEND
) },
299 /* PERF_TYPE_HW_CACHE hw_cache_id */
300 #define PCC(n) PERF_TYPE_HW_CACHE, PERF_COUNT_HW_CACHE_ ## n
301 { "L1-dcache-", PCC(L1D
) },
302 { "L1-icache-", PCC(L1I
) },
304 { "dTLB-", PCC(DTLB
) },
305 { "iTLB-", PCC(ITLB
) },
306 { "branch-", PCC(BPU
) },
308 /* PERF_TYPE_HW_CACHE hw_cache_op, hw_cache_result */
309 #define PCCO(n, m) PERF_TYPE_HW_CACHE, \
310 ((PERF_COUNT_HW_CACHE_OP_ ## n) << 8 | \
311 (PERF_COUNT_HW_CACHE_RESULT_ ## m) << 16)
312 { "loads", PCCO(READ
, ACCESS
) },
313 { "load-misses", PCCO(READ
, MISS
) },
314 { "stores", PCCO(WRITE
, ACCESS
) },
315 { "store-misses", PCCO(WRITE
, MISS
) },
316 { "prefetches", PCCO(PREFETCH
, ACCESS
) },
317 { "prefetch-misses", PCCO(PREFETCH
, MISS
) }
320 static int findEvent(const char *event
, struct PerfTable
*t
,
321 int len
, int *match_len
) {
324 for (i
= 0; i
< len
; i
++) {
325 if (!strncmp(event
, t
[i
].name
, strlen(t
[i
].name
))) {
326 *match_len
= strlen(t
[i
].name
);
333 #define CPUID_STEPPING(x) ((x) & 0xf)
334 #define CPUID_MODEL(x) (((x) & 0xf0) >> 4)
335 #define CPUID_FAMILY(x) (((x) & 0xf00) >> 8)
336 #define CPUID_TYPE(x) (((x) & 0x3000) >> 12)
338 // hack to get LLC counters on perflab frc machines
339 static bool isIntelE5_2670() {
342 asm volatile ("cpuid" : "=a"(x
): "a"(1) : "ebx", "ecx", "edx");
343 return CPUID_STEPPING(x
) == 6 && CPUID_MODEL(x
) == 0xd
344 && CPUID_FAMILY(x
) == 6 && CPUID_TYPE(x
) == 0;
350 static void checkLLCHack(const char* event
, uint32_t& type
, uint64_t& config
) {
351 if (!strncmp(event
, "LLC-load", 8) && isIntelE5_2670()) {
352 type
= PERF_TYPE_RAW
;
353 if (!strncmp(&event
[4], "loads", 5)) {
355 } else if (!strncmp(&event
[4], "load-misses", 11)) {
361 bool HardwareCounter::addPerfEvent(const char* event
) {
366 const char* ev
= event
;
368 while ((i
= findEvent(ev
, perfTable
,
369 sizeof(perfTable
)/sizeof(struct PerfTable
),
374 type
= perfTable
[i
].type
;
375 } else if (type
!= perfTable
[i
].type
) {
376 Logger::Warning("failed to find perf event: %s", event
);
379 config
|= perfTable
[i
].config
;
383 checkLLCHack(event
, type
, config
);
385 // Check if we have a raw spec.
386 if (!found
&& event
[0] == 'r' && event
[1] != 0) {
387 config
= strtoull(event
+ 1, const_cast<char**>(&ev
), 16);
390 type
= PERF_TYPE_RAW
;
395 Logger::Warning("failed to find perf event: %s", event
);
398 auto hwc
= folly::make_unique
<HardwareCounterImpl
>(type
, config
, event
);
400 Logger::Warning("failed to set perf event: %s", event
);
403 m_counters
.emplace_back(std::move(hwc
));
404 if (!m_countersSet
) {
405 // reset load and store counters. This is because
406 // perf does not seem to handle more than three counters
408 m_loadCounter
.reset();
409 m_storeCounter
.reset();
410 m_countersSet
= true;
415 bool HardwareCounter::eventExists(const char *event
) {
416 // hopefully m_counters set is small, so a linear scan does not hurt
417 for(unsigned i
= 0; i
< m_counters
.size(); i
++) {
418 if (!strcmp(event
, m_counters
[i
]->m_desc
.c_str())) {
425 bool HardwareCounter::setPerfEvents(folly::StringPiece sevents
) {
426 // Make a copy of the string for use with strtok.
427 auto const sevents_buf
= static_cast<char*>(malloc(sevents
.size() + 1));
428 SCOPE_EXIT
{ free(sevents_buf
); };
429 memcpy(sevents_buf
, sevents
.data(), sevents
.size());
430 sevents_buf
[sevents
.size()] = '\0';
432 char* strtok_buf
= nullptr;
433 char* s
= strtok_r(sevents_buf
, ",", &strtok_buf
);
435 if (!eventExists(s
) && !addPerfEvent(s
)) {
438 s
= strtok_r(nullptr, ",", &strtok_buf
);
443 bool HardwareCounter::SetPerfEvents(folly::StringPiece events
) {
444 return s_counter
->setPerfEvents(events
);
447 void HardwareCounter::clearPerfEvents() {
451 void HardwareCounter::ClearPerfEvents() {
452 s_counter
->clearPerfEvents();
456 s_instructions("instructions"),
460 void HardwareCounter::getPerfEvents(PerfEventCallback f
, void* data
) {
461 f(s_instructions
, getInstructionCount(), data
);
462 if (!m_countersSet
) {
463 f(s_loads
, getLoadCount(), data
);
464 f(s_stores
, getStoreCount(), data
);
466 for (unsigned i
= 0; i
< m_counters
.size(); i
++) {
467 f(m_counters
[i
]->m_desc
, m_counters
[i
]->read(), data
);
471 void HardwareCounter::GetPerfEvents(PerfEventCallback f
, void* data
) {
472 s_counter
->getPerfEvents(f
, data
);
475 ///////////////////////////////////////////////////////////////////////////////
479 #else // NO_HARDWARE_COUNTERS
482 ///////////////////////////////////////////////////////////////////////////////
484 HardwareCounter
HardwareCounter::s_counter
;
486 ///////////////////////////////////////////////////////////////////////////////
489 #endif // NO_HARDWARE_COUNTERS