This fixes a bug in PHP/HH's crypt_blowfish implementation that can cause a short...
[hiphop-php.git] / hphp / util / perf-event.cpp
blobb1a898facac94ea53fec4b1018a0d58e019105d6
1 /*
2 +----------------------------------------------------------------------+
3 | HipHop for PHP |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
17 #include "hphp/util/perf-event.h"
19 #if defined(__linux__) && defined(__x86_64__) && defined(FACEBOOK)
21 #include "hphp/util/assertions.h"
22 #include "hphp/util/logger.h"
23 #include "hphp/util/safe-cast.h"
25 #include <folly/FileUtil.h>
26 #include <folly/String.h>
28 #include <mutex>
29 #include <string>
31 #include <asm/unistd.h>
32 #include <errno.h>
33 #include <fcntl.h>
34 #include <poll.h>
35 #include <signal.h>
36 #include <sys/ioctl.h>
37 #include <sys/mman.h>
38 #include <sys/syscall.h>
39 #include <sys/types.h>
40 #include <unistd.h>
42 // These two files must be included in this relative order, because the latter
43 // transitively includes a local copy of the former unless it detects that the
44 // system version has already been included.
45 #include <linux/perf_event.h>
46 #include <perfmon/pfmlib_perf_event.h>
48 namespace HPHP {
50 namespace {
52 ///////////////////////////////////////////////////////////////////////////////
55 * Process initialization bit and lock.
57 bool s_did_init = false;
58 std::mutex s_init_lock;
61 * Page size.
63 size_t s_pagesz = 0;
66 * Microarch-dependent event names for perf's cpu/mem-{loads,stores}/ events,
67 * in a form understood by libpfm4.
69 * We could just encode the `config' for perf_event_attr ourselves, but libpfm4
70 * does other things for us, like set the exclusion bits, and the encoding is
71 * not well-specified in the first place. Instead, it just means we had to
72 * match some bits to names ahead of time.
74 * These may be altered when the module is initialized.
76 // On Haswell and later, this is called "LOAD_LATENCY".
77 const char* s_mem_loads = "MEM_TRANS_RETIRED:LATENCY_ABOVE_THRESHOLD";
78 // On Haswell and later, "MEM_UOPS_RETIRED:ALL_STORES" is used instead.
79 const char* s_mem_stores = "MEM_TRANS_RETIRED:PRECISE_STORE";
81 ///////////////////////////////////////////////////////////////////////////////
84 * Metadata for a fully set up perf_event.
86 struct perf_event_handle {
87 perf_event_handle() {}
88 perf_event_handle(int fd, struct perf_event_mmap_page* meta)
89 : fd(fd)
90 , meta(meta)
93 // File descriptor of the opened perf_event.
94 int fd{-1};
96 // Metadata header page, followed by the ring buffer for samples.
97 struct perf_event_mmap_page* meta{nullptr};
99 // Buffer for samples that wrap around.
100 char* buf{nullptr};
101 size_t buf_sz{0};
105 * Per-thread perf_event metadata.
107 thread_local struct {
108 perf_event_handle loads;
109 perf_event_handle stores;
110 perf_event_signal_fn_t signal;
111 } tl_perf_event = {};
114 * Ensure that this module is properly initialized.
116 * Returns true if the module has been initialized successfully (by anyone),
117 * else false.
119 bool perf_event_init() {
120 if (s_did_init) return true;
122 std::lock_guard<std::mutex> l(s_init_lock);
123 if (s_did_init) return true;
125 s_pagesz = sysconf(_SC_PAGESIZE);
127 std::string event_str;
128 if (folly::readFile("/sys/devices/cpu/events/mem-stores", event_str)) {
129 // If the read fails, we'll stick with the {Sandy,Ivy}Bridge event name.
130 // Otherwise, check for the Haswell encoding string.
132 // @see: linux/arch/x86/events/intel/core.c.
133 if (event_str == "event=0xd0,umask=0x82") {
134 s_mem_stores = "MEM_UOPS_RETIRED:ALL_STORES";
136 // `event_str' should be "event=0xcd,umask=0x2" on *Bridge, but we don't
137 // care since we're using that event name as our default.
140 // libpfm4 needs to be initialized exactly once per process lifetime.
141 auto const pfmr = pfm_initialize();
142 if (pfmr != PFM_SUCCESS) {
143 Logger::Warning("perf_event: pfm_initialize failed: %s",
144 pfm_strerror(pfmr));
145 return false;
147 s_did_init = true;
148 return true;
152 * Size of the mmap'd perf_event output ring buffer.
154 * Must be exactly 2^n pages for some `n' (or 1 + 2^n, if we include the
155 * perf_event header page).
157 size_t buffer_sz() { return s_pagesz * (1 << 5); } // ring buffer only
158 size_t mmap_sz() { return s_pagesz + buffer_sz(); } // with header
160 ///////////////////////////////////////////////////////////////////////////////
163 * Register that a perf event was generated.
165 void signal_event(int sig, siginfo_t* info, void* /*context*/) {
166 if (sig != SIGIO || info == nullptr) return;
168 // Older versions of Linux have SIGIO here; newer versions have POLLIN.
169 if (info->si_code != SIGIO && info->si_code != POLLIN) return;
170 // We only care about read signals.
171 if ((info->si_band & POLLERR) || (info->si_band & POLLNVAL)) return;
172 if (!(info->si_band & POLLIN)) return;
174 if (tl_perf_event.signal == nullptr) return;
176 auto const type = [&]() -> Optional<PerfEvent> {
177 if (info->si_fd == tl_perf_event.loads.fd) return PerfEvent::Load;
178 if (info->si_fd == tl_perf_event.stores.fd) return PerfEvent::Store;
179 return std::nullopt;
180 }();
181 if (!type) return;
183 tl_perf_event.signal(*type);
187 * Install `signal_event' to notify the user of new perf_event samples.
189 * Returns true if the handler was successfully installed, else false. If a
190 * handler for SIGIO was already installed, this will fail. Otherwise, if we
191 * install `signal_event' successfully, SIGIO will be unconditionally unblocked
192 * for the calling thread.
194 bool install_sigio_handler() {
195 struct sigaction old_action;
197 if (sigaction(SIGIO, nullptr, &old_action) < 0) {
198 Logger::Warning("perf_event: could not install SIGIO handler: %s",
199 folly::errnoStr(errno).c_str());
200 return false;
203 // Fail if a competing SIGIO handler is found.
204 if (old_action.sa_handler != SIG_DFL &&
205 old_action.sa_handler != SIG_IGN &&
206 old_action.sa_sigaction != signal_event) {
207 Logger::Warning("perf_event: could not install SIGIO handler: "
208 "found existing handler");
209 return false;
212 // Install our signal handler for SIGIO.
213 struct sigaction action = {};
214 action.sa_sigaction = signal_event;
215 action.sa_flags = SA_SIGINFO;
217 if (sigaction(SIGIO, &action, nullptr) < 0) {
218 Logger::Warning("perf_event: could not install SIGIO handler: %s",
219 folly::errnoStr(errno).c_str());
220 return false;
223 // Ensure that SIGIO is unblocked.
224 sigset_t sigs;
225 sigemptyset(&sigs);
226 sigaddset(&sigs, SIGIO);
227 if (pthread_sigmask(SIG_UNBLOCK, &sigs, nullptr) < 0) {
228 Logger::Warning("perf_event: could not unblock SIGIO: %s",
229 folly::errnoStr(errno).c_str());
230 return false;
233 return true;
236 ///////////////////////////////////////////////////////////////////////////////
239 * Pause or resume an event.
241 void pause_event(const perf_event_handle& pe) {
242 ioctl(pe.fd, PERF_EVENT_IOC_DISABLE, 0);
244 void resume_event(const perf_event_handle& pe) {
245 ioctl(pe.fd, PERF_EVENT_IOC_ENABLE, 0);
249 * Logically delete all events that are currently buffered for `pe'.
251 void clear_events(const perf_event_handle& pe) {
252 auto const data_head = pe.meta->data_head;
253 __sync_synchronize(); // smp_mb()
254 pe.meta->data_tail = data_head;
258 * Disable and close a perf event.
260 void close_event(const perf_event_handle& pe) {
261 clear_events(pe);
262 free(pe.buf);
263 ioctl(pe.fd, PERF_EVENT_IOC_DISABLE, 0);
264 munmap(pe.meta, mmap_sz());
265 close(pe.fd);
269 * Open a file descriptor for perf events with `event_name', mmap it, and set
270 * things up so that the calling thread receives SIGIO signals from it.
272 * Returns the perf_event_handle on success, else std::nullopt.
274 Optional<perf_event_handle> enable_event(const char* event_name,
275 uint64_t sample_freq) {
276 struct perf_event_attr attr = {};
277 pfm_perf_encode_arg_t arg = {};
278 arg.attr = &attr;
279 arg.size = sizeof(arg);
281 // Populate the `type', `config', and `exclude_*' members on `attr'.
282 auto const pfmr = pfm_get_os_event_encoding(event_name, PFM_PLM3,
283 PFM_OS_PERF_EVENT, &arg);
284 if (pfmr != PFM_SUCCESS) {
285 Logger::Warning("perf_event: failed to get encoding for %s: %s",
286 event_name, pfm_strerror(pfmr));
287 return std::nullopt;
290 // Finish setting up `attr' and open the event.
291 attr.size = sizeof(attr);
292 attr.disabled = 1;
293 attr.sample_freq = sample_freq;
294 attr.freq = 1;
295 attr.watermark = 0;
296 attr.wakeup_events = 1;
297 attr.precise_ip = 2; // request zero skid
299 attr.sample_type = PERF_SAMPLE_IP
300 | PERF_SAMPLE_TID
301 | PERF_SAMPLE_ADDR
302 | PERF_SAMPLE_CALLCHAIN
303 | PERF_SAMPLE_DATA_SRC
306 auto const ret = syscall(__NR_perf_event_open, &attr, 0, -1, -1, 0);
307 if (ret < 0) {
308 // Some machines might not have PEBS support (needed for precise_ip > 0),
309 // but then PERF_SAMPLE_ADDR will always return zeros instead of the target
310 // memory address. Just fail silently in this case.
311 Logger::Verbose("perf_event: perf_event_open failed with: %s",
312 folly::errnoStr(errno).c_str());
313 return std::nullopt;
315 auto const fd = safe_cast<int>(ret);
317 // Recent versions of Linux have a CLOEXEC flag for perf_event_open(), but
318 // use fcntl() for portability. Note that since we do this after we open the
319 // event, this could in theory race with an exec() from another thread---but
320 // that shouldn't be happening anyway.
321 fcntl(fd, F_SETFD, O_CLOEXEC);
323 // Make sure that any SIGIO sent from `fd' is handled by the calling thread.
324 f_owner_ex owner;
325 owner.type = F_OWNER_TID;
326 owner.pid = syscall(__NR_gettid);
328 // Set up `fd' to send SIGIO with sigaction info.
329 if (fcntl(fd, F_SETFL, O_ASYNC) < 0 ||
330 fcntl(fd, F_SETSIG, SIGIO) < 0 ||
331 fcntl(fd, F_SETOWN_EX, &owner) < 0) {
332 Logger::Warning("perf_event: failed to set up asynchronous I/O: %s",
333 folly::errnoStr(errno).c_str());
334 close(fd);
335 return std::nullopt;
338 // Map the ring buffer for our samples.
339 auto const base = mmap(nullptr, mmap_sz(), PROT_READ | PROT_WRITE,
340 MAP_SHARED, fd, 0);
341 if (base == MAP_FAILED) {
342 Logger::Warning("perf_event: failed to mmap perf_event: %s",
343 folly::errnoStr(errno).c_str());
344 close(fd);
345 return std::nullopt;
347 auto const meta = reinterpret_cast<struct perf_event_mmap_page*>(base);
349 auto const pe = perf_event_handle { fd, meta };
351 // Reset the event. This seems to be present in most examples, but it's
352 // unclear if it's necessary or just good hygiene. (It's possible that it's
353 // necessary on successive opens.)
354 if (ioctl(fd, PERF_EVENT_IOC_RESET, 0) < 0) {
355 Logger::Warning("perf_event: failed to reset perf_event: %s",
356 folly::errnoStr(errno).c_str());
357 close_event(pe);
358 return std::nullopt;
361 // Enable the event. The man page and other examples of usage all suggest
362 // that the right thing to do is to start with the event disabled and then
363 // enable it manually afterwards, so we do the same here even though it seems
364 // strange and circuitous.
365 if (ioctl(fd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
366 Logger::Warning("perf_event: failed to enable perf_event: %s",
367 folly::errnoStr(errno).c_str());
368 close_event(pe);
369 return std::nullopt;
372 return pe;
375 ///////////////////////////////////////////////////////////////////////////////
378 * Ensure that `pe.buf' can hold at least `cap' bytes.
380 void ensure_buffer_capacity(perf_event_handle& pe, size_t cap) {
381 if (pe.buf_sz >= cap) return;
382 free(pe.buf);
383 pe.buf = reinterpret_cast<char*>(malloc(cap * 2));
387 * Iterate through all the pending sampled events in `pe' and pass each one to
388 * `consume'.
390 void consume_events(PerfEvent kind, perf_event_handle& pe,
391 perf_event_consume_fn_t consume) {
392 auto const data_tail = pe.meta->data_tail;
393 auto const data_head = pe.meta->data_head;
395 asm volatile("" : : : "memory"); // smp_rmb()
396 if (data_head == data_tail) return;
398 auto const base = reinterpret_cast<char*>(pe.meta) + s_pagesz;
400 auto const begin = base + data_tail % buffer_sz();
401 auto const end = base + data_head % buffer_sz();
403 auto cur = begin;
405 while (cur != end) {
406 auto header = reinterpret_cast<struct perf_event_header*>(cur);
408 if (cur + header->size > base + buffer_sz()) {
409 // The current entry wraps around the ring buffer. Copy it into a stack
410 // buffer, and update `cur' to wrap around appropriately.
411 auto const prefix_len = base + buffer_sz() - cur;
413 ensure_buffer_capacity(pe, header->size);
415 memcpy(pe.buf, cur, prefix_len);
416 memcpy(pe.buf + prefix_len, base, header->size - prefix_len);
417 header = reinterpret_cast<struct perf_event_header*>(pe.buf);
419 cur = base + header->size - prefix_len;
420 } else if (cur + header->size == base + buffer_sz()) {
421 // Perfect wraparound.
422 cur = base;
423 } else {
424 cur += header->size;
427 if (header->type == PERF_RECORD_SAMPLE) {
428 auto const sample = reinterpret_cast<perf_event_sample*>(header + 1);
430 assertx(header->size == sizeof(struct perf_event_header) +
431 sizeof(perf_event_sample) +
432 sample->nr * sizeof(*sample->ips) +
433 sizeof(perf_event_sample_tail));
434 assertx((char*)(sample->tail() + 1) == (char*)header + header->size);
435 consume(kind, sample);
439 __sync_synchronize(); // smp_mb()
440 pe.meta->data_tail = data_head;
443 ///////////////////////////////////////////////////////////////////////////////
447 ///////////////////////////////////////////////////////////////////////////////
449 perf_event_data_src_info
450 perf_event_data_src(PerfEvent kind, uint64_t data_src) {
451 auto info = perf_event_data_src_info{};
453 DEBUG_ONLY auto const mem_op = data_src;
454 switch (kind) {
455 case PerfEvent::Load:
456 assertx(mem_op & PERF_MEM_OP_LOAD);
457 break;
458 case PerfEvent::Store:
459 assertx(mem_op & PERF_MEM_OP_STORE);
460 break;
463 auto const mem_lvl = data_src >> PERF_MEM_LVL_SHIFT;
465 if (mem_lvl & PERF_MEM_LVL_NA) {
466 info.mem_lvl = "(unknown)";
467 info.mem_hit = 0;
468 } else {
469 info.mem_hit = (mem_lvl & PERF_MEM_LVL_HIT) ? 1 :
470 (mem_lvl & PERF_MEM_LVL_MISS) ? -1 : 0;
472 #define MEM_LVLS \
473 X(L1) \
474 X(LFB) \
475 X(L2) \
476 X(L3) \
477 X(LOC_RAM) \
478 X(REM_RAM1) \
479 X(REM_RAM2) \
480 X(REM_CCE1) \
481 X(REM_CCE2) \
482 X(IO) \
483 X(UNC)
485 auto const mem_lvl_only = mem_lvl & (0x0
486 #define X(lvl) | PERF_MEM_LVL_##lvl
487 MEM_LVLS
488 #undef X
491 info.mem_lvl = [&]() -> const char* {
492 switch (mem_lvl_only) {
493 case 0x0: return "(none)";
494 #define X(lvl) \
495 case PERF_MEM_LVL_##lvl: return #lvl;
496 MEM_LVLS
497 #undef X
498 default: return "(mixed)";
500 }();
503 #undef MEM_LVLS
505 auto const mem_snoop = data_src >> PERF_MEM_SNOOP_SHIFT;
506 if (mem_snoop & PERF_MEM_SNOOP_NA) {
507 info.snoop = 0;
508 info.snoop_hit = 0;
509 info.snoop_hitm = 0;
510 } else {
511 info.snoop_hit = (mem_snoop & PERF_MEM_SNOOP_HIT) ? 1 :
512 (mem_snoop & PERF_MEM_SNOOP_MISS) ? -1 : 0;
513 info.snoop = (mem_snoop & PERF_MEM_SNOOP_NONE) ? -1 : 1;
514 info.snoop_hitm = (mem_snoop & PERF_MEM_SNOOP_HITM) ? 1 : -1;
517 auto const mem_lock = data_src >> PERF_MEM_LOCK_SHIFT;
518 info.locked = (mem_lock & PERF_MEM_LOCK_NA) ? 0 :
519 (mem_lock & PERF_MEM_LOCK_LOCKED) ? 1 : -1;
521 auto const mem_tlb = data_src >> PERF_MEM_TLB_SHIFT;
523 if (mem_tlb & PERF_MEM_TLB_NA) {
524 info.tlb = "(unknown)";
525 info.tlb_hit = 0;
526 } else {
527 info.tlb_hit = (mem_tlb & PERF_MEM_TLB_HIT) ? 1 :
528 (mem_tlb & PERF_MEM_TLB_MISS) ? -1 : 0;
530 #define TLBS \
531 X(L1) \
532 X(L2) \
533 X(WK) \
534 X(OS)
536 auto const tlb_only = mem_tlb & (0x0
537 #define X(tlb) | PERF_MEM_TLB_##tlb
538 TLBS
539 #undef X
542 info.tlb = [&]() -> const char* {
543 switch (tlb_only) {
544 case 0x0: return "(none)";
545 #define X(tlb) \
546 case PERF_MEM_TLB_##tlb: return #tlb;
547 TLBS
548 #undef X
549 case (PERF_MEM_TLB_L1 | PERF_MEM_TLB_L2): return "L1-L2";
550 default: return "(mixed)";
552 }();
555 return info;
558 ///////////////////////////////////////////////////////////////////////////////
560 bool perf_event_enable(uint64_t sample_freq, perf_event_signal_fn_t signal_fn) {
561 if (!perf_event_init()) return false;
563 // If `tl_perf_event' has already been initialized, we're done.
564 if (tl_perf_event.signal != nullptr) return true;
566 if (!install_sigio_handler()) return false;
568 auto const ld_pe = enable_event(s_mem_loads, sample_freq);
569 if (!ld_pe) return false;
571 auto const st_pe = enable_event(s_mem_stores, sample_freq);
572 if (!st_pe) {
573 close_event(*ld_pe);
574 return false;
577 // Set `tl_perf_event'---and in particular, `signal'---only after everything
578 // is enabled. This will cause us to ignore signals until we're ready to
579 // process the events.
580 tl_perf_event.loads = *ld_pe;
581 tl_perf_event.stores = *st_pe;
582 asm volatile("" : : : "memory");
583 tl_perf_event.signal = signal_fn;
585 return true;
588 void perf_event_pause() {
589 if (tl_perf_event.signal == nullptr) return;
590 pause_event(tl_perf_event.loads);
591 pause_event(tl_perf_event.stores);
594 void perf_event_resume() {
595 if (tl_perf_event.signal == nullptr) return;
596 resume_event(tl_perf_event.loads);
597 resume_event(tl_perf_event.stores);
600 void perf_event_disable() {
601 if (tl_perf_event.signal == nullptr) return;
603 close_event(tl_perf_event.loads);
604 close_event(tl_perf_event.stores);
605 tl_perf_event = {};
608 void perf_event_consume(perf_event_consume_fn_t consume) {
609 if (tl_perf_event.signal == nullptr) return;
611 consume_events(PerfEvent::Load, tl_perf_event.loads, consume);
612 consume_events(PerfEvent::Store, tl_perf_event.stores, consume);
615 ///////////////////////////////////////////////////////////////////////////////
619 #else // defined(__linux__) && defined(__x86_64__)
621 namespace HPHP {
623 perf_event_data_src_info
624 perf_event_data_src(PerfEvent kind, uint64_t data_src) {
625 return perf_event_data_src_info{};
628 bool perf_event_enable(uint64_t, perf_event_signal_fn_t) { return false; }
629 void perf_event_disable() {}
630 void perf_event_pause() {}
631 void perf_event_resume() {}
632 void perf_event_consume(perf_event_consume_fn_t) {}
636 #endif