codemod 2010-2016 to 2010-present
[hiphop-php.git] / hphp / util / hardware-counter.cpp
blob1434cb9a519a18b89b554372e6a3f92de458dd8d
1 /*
2 +----------------------------------------------------------------------+
3 | HipHop for PHP |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
17 #include "hphp/util/hardware-counter.h"
19 #ifndef NO_HARDWARE_COUNTERS
21 #include <folly/ScopeGuard.h>
23 #include "hphp/util/logger.h"
25 #define _GNU_SOURCE 1
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <fcntl.h>
30 #include <errno.h>
31 #include <assert.h>
32 #include <sys/ioctl.h>
33 #include <asm/unistd.h>
34 #include <sys/prctl.h>
35 #include <linux/perf_event.h>
37 #include <folly/String.h>
38 #include <folly/Memory.h>
39 #include <folly/portability/SysMman.h>
40 #include <folly/portability/Unistd.h>
42 namespace HPHP {
43 ///////////////////////////////////////////////////////////////////////////////
45 IMPLEMENT_THREAD_LOCAL_NO_CHECK(HardwareCounter,
46 HardwareCounter::s_counter);
48 static bool s_recordSubprocessTimes = false;
49 static bool s_profileHWEnable;
50 static std::string s_profileHWEvents;
52 static inline bool useCounters() {
53 #ifdef VALGRIND
54 return false;
55 #else
56 return s_profileHWEnable;
57 #endif
60 struct HardwareCounterImpl {
61 HardwareCounterImpl(int type, unsigned long config,
62 const char* desc = nullptr)
63 : m_desc(desc ? desc : ""), m_err(0), m_fd(-1), inited(false) {
64 memset (&pe, 0, sizeof (struct perf_event_attr));
65 pe.type = type;
66 pe.size = sizeof (struct perf_event_attr);
67 pe.config = config;
68 pe.inherit = s_recordSubprocessTimes;
69 pe.disabled = 1;
70 pe.pinned = 0;
71 pe.exclude_kernel = 0;
72 pe.exclude_hv = 1;
73 pe.read_format =
74 PERF_FORMAT_TOTAL_TIME_ENABLED|PERF_FORMAT_TOTAL_TIME_RUNNING;
77 ~HardwareCounterImpl() {
78 close();
81 void init_if_not() {
83 * perf_event_open(struct perf_event_attr *hw_event_uptr, pid_t pid,
84 * int cpu, int group_fd, unsigned long flags)
86 if (inited) return;
87 inited = true;
88 m_fd = syscall(__NR_perf_event_open, &pe, 0, -1, -1, 0);
89 if (m_fd < 0) {
90 Logger::Warning("perf_event_open failed with: %s",
91 folly::errnoStr(errno).c_str());
92 m_err = -1;
93 return;
96 fcntl(m_fd, F_SETFD, O_CLOEXEC);
98 if (ioctl(m_fd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
99 Logger::Warning("perf_event failed to enable: %s",
100 folly::errnoStr(errno).c_str());
101 close();
102 m_err = -1;
103 return;
105 reset();
108 int64_t read() {
109 uint64_t values[3];
110 if (readRaw(values)) {
111 if (!values[2]) return 0;
112 int64_t value = (double)values[0] * values[1] / values[2];
113 return value + extra;
115 return 0;
118 void incCount(int64_t amount) {
119 extra += amount;
122 bool readRaw(uint64_t* values) {
123 if (m_err || !useCounters()) return false;
124 init_if_not();
126 if (m_fd > 0) {
128 * read the count + scaling values
130 * It is not necessary to stop an event to read its value
132 auto ret = ::read(m_fd, values, sizeof(*values) * 3);
133 if (ret == sizeof(*values) * 3) {
134 values[0] -= reset_values[0];
135 values[1] -= reset_values[1];
136 if (values[2] > reset_values[2]) {
137 values[2] -= reset_values[2];
138 } else {
139 values[2] = 0;
141 return true;
144 return false;
147 void reset() {
148 if (m_err || !useCounters()) return;
149 init_if_not();
150 extra = 0;
151 if (m_fd > 0) {
152 if (ioctl (m_fd, PERF_EVENT_IOC_RESET, 0) < 0) {
153 Logger::Warning("perf_event failed to reset with: %s",
154 folly::errnoStr(errno).c_str());
155 m_err = -1;
156 return;
158 auto ret = ::read(m_fd, reset_values, sizeof(reset_values));
159 if (ret != sizeof(reset_values)) {
160 Logger::Warning("perf_event failed to reset with: %s",
161 folly::errnoStr(errno).c_str());
162 m_err = -1;
163 return;
168 public:
169 std::string m_desc;
170 int m_err;
171 private:
172 int m_fd;
173 struct perf_event_attr pe;
174 bool inited;
175 uint64_t reset_values[3];
176 uint64_t extra{0};
178 void close() {
179 if (m_fd > 0) {
180 ::close(m_fd);
181 m_fd = -1;
186 struct InstructionCounter : HardwareCounterImpl {
187 InstructionCounter() :
188 HardwareCounterImpl(PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS) {}
191 struct LoadCounter : HardwareCounterImpl {
192 LoadCounter() :
193 HardwareCounterImpl(PERF_TYPE_HW_CACHE,
194 (PERF_COUNT_HW_CACHE_L1D | ((PERF_COUNT_HW_CACHE_OP_READ) << 8))) {}
197 struct StoreCounter : HardwareCounterImpl {
198 StoreCounter() :
199 HardwareCounterImpl(PERF_TYPE_HW_CACHE,
200 PERF_COUNT_HW_CACHE_L1D | ((PERF_COUNT_HW_CACHE_OP_WRITE) << 8)) {}
203 HardwareCounter::HardwareCounter()
204 : m_countersSet(false) {
205 m_instructionCounter.reset(new InstructionCounter());
206 if (s_profileHWEvents.empty()) {
207 m_loadCounter.reset(new LoadCounter());
208 m_storeCounter.reset(new StoreCounter());
209 } else {
210 m_countersSet = true;
211 setPerfEvents(s_profileHWEvents);
215 HardwareCounter::~HardwareCounter() {
218 void HardwareCounter::RecordSubprocessTimes() {
219 s_recordSubprocessTimes = true;
222 void HardwareCounter::Init(bool enable, const std::string& events,
223 bool subProc) {
224 s_profileHWEnable = enable;
225 s_profileHWEvents = events;
226 s_recordSubprocessTimes = subProc;
229 void HardwareCounter::Reset() {
230 s_counter->reset();
233 void HardwareCounter::reset() {
234 m_instructionCounter->reset();
235 if (!m_countersSet) {
236 m_storeCounter->reset();
237 m_loadCounter->reset();
239 for (unsigned i = 0; i < m_counters.size(); i++) {
240 m_counters[i]->reset();
244 int64_t HardwareCounter::GetInstructionCount() {
245 return s_counter->getInstructionCount();
248 int64_t HardwareCounter::getInstructionCount() {
249 return m_instructionCounter->read();
252 int64_t HardwareCounter::GetLoadCount() {
253 return s_counter->getLoadCount();
256 int64_t HardwareCounter::getLoadCount() {
257 return m_loadCounter->read();
260 int64_t HardwareCounter::GetStoreCount() {
261 return s_counter->getStoreCount();
264 int64_t HardwareCounter::getStoreCount() {
265 return m_storeCounter->read();
268 void HardwareCounter::IncInstructionCount(int64_t amount) {
269 s_counter->m_instructionCounter->incCount(amount);
272 void HardwareCounter::IncLoadCount(int64_t amount) {
273 if (!s_counter->m_countersSet) {
274 s_counter->m_loadCounter->incCount(amount);
278 void HardwareCounter::IncStoreCount(int64_t amount) {
279 if (!s_counter->m_countersSet) {
280 s_counter->m_storeCounter->incCount(amount);
284 struct PerfTable perfTable[] = {
285 /* PERF_TYPE_HARDWARE events */
286 #define PC(n) PERF_TYPE_HARDWARE, PERF_COUNT_HW_ ## n
287 { "cpu-cycles", PC(CPU_CYCLES) },
288 { "cycles", PC(CPU_CYCLES) },
289 { "instructions", PC(INSTRUCTIONS) },
290 { "cache-references", PC(CACHE_REFERENCES) },
291 { "cache-misses", PC(CACHE_MISSES) },
292 { "branch-instructions", PC(BRANCH_INSTRUCTIONS) },
293 { "branches", PC(BRANCH_INSTRUCTIONS) },
294 { "branch-misses", PC(BRANCH_MISSES) },
295 { "bus-cycles", PC(BUS_CYCLES) },
296 { "stalled-cycles-frontend", PC(STALLED_CYCLES_FRONTEND) },
297 { "stalled-cycles-backend", PC(STALLED_CYCLES_BACKEND) },
299 /* PERF_TYPE_HW_CACHE hw_cache_id */
300 #define PCC(n) PERF_TYPE_HW_CACHE, PERF_COUNT_HW_CACHE_ ## n
301 { "L1-dcache-", PCC(L1D) },
302 { "L1-icache-", PCC(L1I) },
303 { "LLC-", PCC(LL) },
304 { "dTLB-", PCC(DTLB) },
305 { "iTLB-", PCC(ITLB) },
306 { "branch-", PCC(BPU) },
308 /* PERF_TYPE_HW_CACHE hw_cache_op, hw_cache_result */
309 #define PCCO(n, m) PERF_TYPE_HW_CACHE, \
310 ((PERF_COUNT_HW_CACHE_OP_ ## n) << 8 | \
311 (PERF_COUNT_HW_CACHE_RESULT_ ## m) << 16)
312 { "loads", PCCO(READ, ACCESS) },
313 { "load-misses", PCCO(READ, MISS) },
314 { "stores", PCCO(WRITE, ACCESS) },
315 { "store-misses", PCCO(WRITE, MISS) },
316 { "prefetches", PCCO(PREFETCH, ACCESS) },
317 { "prefetch-misses", PCCO(PREFETCH, MISS) }
320 static int findEvent(const char *event, struct PerfTable *t,
321 int len, int *match_len) {
322 int i;
324 for (i = 0; i < len; i++) {
325 if (!strncmp(event, t[i].name, strlen(t[i].name))) {
326 *match_len = strlen(t[i].name);
327 return i;
330 return -1;
333 #define CPUID_STEPPING(x) ((x) & 0xf)
334 #define CPUID_MODEL(x) (((x) & 0xf0) >> 4)
335 #define CPUID_FAMILY(x) (((x) & 0xf00) >> 8)
336 #define CPUID_TYPE(x) (((x) & 0x3000) >> 12)
338 // hack to get LLC counters on perflab frc machines
339 static bool isIntelE5_2670() {
340 #ifdef __x86_64__
341 unsigned long x;
342 asm volatile ("cpuid" : "=a"(x): "a"(1) : "ebx", "ecx", "edx");
343 return CPUID_STEPPING(x) == 6 && CPUID_MODEL(x) == 0xd
344 && CPUID_FAMILY(x) == 6 && CPUID_TYPE(x) == 0;
345 #else
346 return false;
347 #endif
350 static void checkLLCHack(const char* event, uint32_t& type, uint64_t& config) {
351 if (!strncmp(event, "LLC-load", 8) && isIntelE5_2670()) {
352 type = PERF_TYPE_RAW;
353 if (!strncmp(&event[4], "loads", 5)) {
354 config = 0x534f2e;
355 } else if (!strncmp(&event[4], "load-misses", 11)) {
356 config = 0x53412e;
361 bool HardwareCounter::addPerfEvent(const char* event) {
362 uint32_t type = 0;
363 uint64_t config = 0;
364 int i, match_len;
365 bool found = false;
366 const char* ev = event;
368 while ((i = findEvent(ev, perfTable,
369 sizeof(perfTable)/sizeof(struct PerfTable),
370 &match_len))
371 != -1) {
372 if (!found) {
373 found = true;
374 type = perfTable[i].type;
375 } else if (type != perfTable[i].type) {
376 Logger::Warning("failed to find perf event: %s", event);
377 return false;
379 config |= perfTable[i].config;
380 ev = &ev[match_len];
383 checkLLCHack(event, type, config);
385 // Check if we have a raw spec.
386 if (!found && event[0] == 'r' && event[1] != 0) {
387 config = strtoull(event + 1, const_cast<char**>(&ev), 16);
388 if (*ev == 0) {
389 found = true;
390 type = PERF_TYPE_RAW;
394 if (!found || *ev) {
395 Logger::Warning("failed to find perf event: %s", event);
396 return false;
398 auto hwc = folly::make_unique<HardwareCounterImpl>(type, config, event);
399 if (hwc->m_err) {
400 Logger::Warning("failed to set perf event: %s", event);
401 return false;
403 m_counters.emplace_back(std::move(hwc));
404 if (!m_countersSet) {
405 // reset load and store counters. This is because
406 // perf does not seem to handle more than three counters
407 // very well.
408 m_loadCounter.reset();
409 m_storeCounter.reset();
410 m_countersSet = true;
412 return true;
415 bool HardwareCounter::eventExists(const char *event) {
416 // hopefully m_counters set is small, so a linear scan does not hurt
417 for(unsigned i = 0; i < m_counters.size(); i++) {
418 if (!strcmp(event, m_counters[i]->m_desc.c_str())) {
419 return true;
422 return false;
425 bool HardwareCounter::setPerfEvents(folly::StringPiece sevents) {
426 // Make a copy of the string for use with strtok.
427 auto const sevents_buf = static_cast<char*>(malloc(sevents.size() + 1));
428 SCOPE_EXIT { free(sevents_buf); };
429 memcpy(sevents_buf, sevents.data(), sevents.size());
430 sevents_buf[sevents.size()] = '\0';
432 char* strtok_buf = nullptr;
433 char* s = strtok_r(sevents_buf, ",", &strtok_buf);
434 while (s) {
435 if (!eventExists(s) && !addPerfEvent(s)) {
436 return false;
438 s = strtok_r(nullptr, ",", &strtok_buf);
440 return true;
443 bool HardwareCounter::SetPerfEvents(folly::StringPiece events) {
444 return s_counter->setPerfEvents(events);
447 void HardwareCounter::clearPerfEvents() {
448 m_counters.clear();
451 void HardwareCounter::ClearPerfEvents() {
452 s_counter->clearPerfEvents();
455 const std::string
456 s_instructions("instructions"),
457 s_loads("loads"),
458 s_stores("stores");
460 void HardwareCounter::getPerfEvents(PerfEventCallback f, void* data) {
461 f(s_instructions, getInstructionCount(), data);
462 if (!m_countersSet) {
463 f(s_loads, getLoadCount(), data);
464 f(s_stores, getStoreCount(), data);
466 for (unsigned i = 0; i < m_counters.size(); i++) {
467 f(m_counters[i]->m_desc, m_counters[i]->read(), data);
471 void HardwareCounter::GetPerfEvents(PerfEventCallback f, void* data) {
472 s_counter->getPerfEvents(f, data);
475 ///////////////////////////////////////////////////////////////////////////////
479 #else // NO_HARDWARE_COUNTERS
481 namespace HPHP {
482 ///////////////////////////////////////////////////////////////////////////////
484 HardwareCounter HardwareCounter::s_counter;
486 ///////////////////////////////////////////////////////////////////////////////
489 #endif // NO_HARDWARE_COUNTERS