2 +----------------------------------------------------------------------+
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
17 #include "hphp/util/perf-event.h"
19 #if defined(__linux__) && defined(__x86_64__) && defined(FACEBOOK)
21 #include "hphp/util/assertions.h"
22 #include "hphp/util/logger.h"
23 #include "hphp/util/safe-cast.h"
25 #include <folly/FileUtil.h>
26 #include <folly/String.h>
31 #include <asm/unistd.h>
36 #include <sys/ioctl.h>
38 #include <sys/syscall.h>
39 #include <sys/types.h>
42 // These two files must be included in this relative order, because the latter
43 // transitively includes a local copy of the former unless it detects that the
44 // system version has already been included.
45 #include <linux/perf_event.h>
46 #include <perfmon/pfmlib_perf_event.h>
52 ///////////////////////////////////////////////////////////////////////////////
55 * Process initialization bit and lock.
57 bool s_did_init
= false;
58 std::mutex s_init_lock
;
66 * Microarch-dependent event names for perf's cpu/mem-{loads,stores}/ events,
67 * in a form understood by libpfm4.
69 * We could just encode the `config' for perf_event_attr ourselves, but libpfm4
70 * does other things for us, like set the exclusion bits, and the encoding is
71 * not well-specified in the first place. Instead, it just means we had to
72 * match some bits to names ahead of time.
74 * These may be altered when the module is initialized.
76 // On Haswell and later, this is called "LOAD_LATENCY".
77 const char* s_mem_loads
= "MEM_TRANS_RETIRED:LATENCY_ABOVE_THRESHOLD";
78 // On Haswell and later, "MEM_UOPS_RETIRED:ALL_STORES" is used instead.
79 const char* s_mem_stores
= "MEM_TRANS_RETIRED:PRECISE_STORE";
81 ///////////////////////////////////////////////////////////////////////////////
84 * Metadata for a fully set up perf_event.
86 struct perf_event_handle
{
87 perf_event_handle() {}
88 perf_event_handle(int fd
, struct perf_event_mmap_page
* meta
)
93 // File descriptor of the opened perf_event.
96 // Metadata header page, followed by the ring buffer for samples.
97 struct perf_event_mmap_page
* meta
{nullptr};
99 // Buffer for samples that wrap around.
105 * Per-thread perf_event metadata.
107 thread_local
struct {
108 perf_event_handle loads
;
109 perf_event_handle stores
;
110 perf_event_signal_fn_t signal
;
111 } tl_perf_event
= {};
114 * Ensure that this module is properly initialized.
116 * Returns true if the module has been initialized successfully (by anyone),
119 bool perf_event_init() {
120 if (s_did_init
) return true;
122 std::lock_guard
<std::mutex
> l(s_init_lock
);
123 if (s_did_init
) return true;
125 s_pagesz
= sysconf(_SC_PAGESIZE
);
127 std::string event_str
;
128 if (folly::readFile("/sys/devices/cpu/events/mem-stores", event_str
)) {
129 // If the read fails, we'll stick with the {Sandy,Ivy}Bridge event name.
130 // Otherwise, check for the Haswell encoding string.
132 // @see: linux/arch/x86/events/intel/core.c.
133 if (event_str
== "event=0xd0,umask=0x82") {
134 s_mem_stores
= "MEM_UOPS_RETIRED:ALL_STORES";
136 // `event_str' should be "event=0xcd,umask=0x2" on *Bridge, but we don't
137 // care since we're using that event name as our default.
140 // libpfm4 needs to be initialized exactly once per process lifetime.
141 auto const pfmr
= pfm_initialize();
142 if (pfmr
!= PFM_SUCCESS
) {
143 Logger::Warning("perf_event: pfm_initialize failed: %s",
152 * Size of the mmap'd perf_event output ring buffer.
154 * Must be exactly 2^n pages for some `n' (or 1 + 2^n, if we include the
155 * perf_event header page).
157 size_t buffer_sz() { return s_pagesz
* (1 << 5); } // ring buffer only
158 size_t mmap_sz() { return s_pagesz
+ buffer_sz(); } // with header
160 ///////////////////////////////////////////////////////////////////////////////
163 * Register that a perf event was generated.
165 void signal_event(int sig
, siginfo_t
* info
, void* /*context*/) {
166 if (sig
!= SIGIO
|| info
== nullptr) return;
168 // Older versions of Linux have SIGIO here; newer versions have POLLIN.
169 if (info
->si_code
!= SIGIO
&& info
->si_code
!= POLLIN
) return;
170 // We only care about read signals.
171 if ((info
->si_band
& POLLERR
) || (info
->si_band
& POLLNVAL
)) return;
172 if (!(info
->si_band
& POLLIN
)) return;
174 if (tl_perf_event
.signal
== nullptr) return;
176 auto const type
= [&]() -> Optional
<PerfEvent
> {
177 if (info
->si_fd
== tl_perf_event
.loads
.fd
) return PerfEvent::Load
;
178 if (info
->si_fd
== tl_perf_event
.stores
.fd
) return PerfEvent::Store
;
183 tl_perf_event
.signal(*type
);
187 * Install `signal_event' to notify the user of new perf_event samples.
189 * Returns true if the handler was successfully installed, else false. If a
190 * handler for SIGIO was already installed, this will fail. Otherwise, if we
191 * install `signal_event' successfully, SIGIO will be unconditionally unblocked
192 * for the calling thread.
194 bool install_sigio_handler() {
195 struct sigaction old_action
;
197 if (sigaction(SIGIO
, nullptr, &old_action
) < 0) {
198 Logger::Warning("perf_event: could not install SIGIO handler: %s",
199 folly::errnoStr(errno
).c_str());
203 // Fail if a competing SIGIO handler is found.
204 if (old_action
.sa_handler
!= SIG_DFL
&&
205 old_action
.sa_handler
!= SIG_IGN
&&
206 old_action
.sa_sigaction
!= signal_event
) {
207 Logger::Warning("perf_event: could not install SIGIO handler: "
208 "found existing handler");
212 // Install our signal handler for SIGIO.
213 struct sigaction action
= {};
214 action
.sa_sigaction
= signal_event
;
215 action
.sa_flags
= SA_SIGINFO
;
217 if (sigaction(SIGIO
, &action
, nullptr) < 0) {
218 Logger::Warning("perf_event: could not install SIGIO handler: %s",
219 folly::errnoStr(errno
).c_str());
223 // Ensure that SIGIO is unblocked.
226 sigaddset(&sigs
, SIGIO
);
227 if (pthread_sigmask(SIG_UNBLOCK
, &sigs
, nullptr) < 0) {
228 Logger::Warning("perf_event: could not unblock SIGIO: %s",
229 folly::errnoStr(errno
).c_str());
236 ///////////////////////////////////////////////////////////////////////////////
239 * Pause or resume an event.
241 void pause_event(const perf_event_handle
& pe
) {
242 ioctl(pe
.fd
, PERF_EVENT_IOC_DISABLE
, 0);
244 void resume_event(const perf_event_handle
& pe
) {
245 ioctl(pe
.fd
, PERF_EVENT_IOC_ENABLE
, 0);
249 * Logically delete all events that are currently buffered for `pe'.
251 void clear_events(const perf_event_handle
& pe
) {
252 auto const data_head
= pe
.meta
->data_head
;
253 __sync_synchronize(); // smp_mb()
254 pe
.meta
->data_tail
= data_head
;
258 * Disable and close a perf event.
260 void close_event(const perf_event_handle
& pe
) {
263 ioctl(pe
.fd
, PERF_EVENT_IOC_DISABLE
, 0);
264 munmap(pe
.meta
, mmap_sz());
269 * Open a file descriptor for perf events with `event_name', mmap it, and set
270 * things up so that the calling thread receives SIGIO signals from it.
272 * Returns the perf_event_handle on success, else std::nullopt.
274 Optional
<perf_event_handle
> enable_event(const char* event_name
,
275 uint64_t sample_freq
) {
276 struct perf_event_attr attr
= {};
277 pfm_perf_encode_arg_t arg
= {};
279 arg
.size
= sizeof(arg
);
281 // Populate the `type', `config', and `exclude_*' members on `attr'.
282 auto const pfmr
= pfm_get_os_event_encoding(event_name
, PFM_PLM3
,
283 PFM_OS_PERF_EVENT
, &arg
);
284 if (pfmr
!= PFM_SUCCESS
) {
285 Logger::Warning("perf_event: failed to get encoding for %s: %s",
286 event_name
, pfm_strerror(pfmr
));
290 // Finish setting up `attr' and open the event.
291 attr
.size
= sizeof(attr
);
293 attr
.sample_freq
= sample_freq
;
296 attr
.wakeup_events
= 1;
297 attr
.precise_ip
= 2; // request zero skid
299 attr
.sample_type
= PERF_SAMPLE_IP
302 | PERF_SAMPLE_CALLCHAIN
303 | PERF_SAMPLE_DATA_SRC
306 auto const ret
= syscall(__NR_perf_event_open
, &attr
, 0, -1, -1, 0);
308 // Some machines might not have PEBS support (needed for precise_ip > 0),
309 // but then PERF_SAMPLE_ADDR will always return zeros instead of the target
310 // memory address. Just fail silently in this case.
311 Logger::Verbose("perf_event: perf_event_open failed with: %s",
312 folly::errnoStr(errno
).c_str());
315 auto const fd
= safe_cast
<int>(ret
);
317 // Recent versions of Linux have a CLOEXEC flag for perf_event_open(), but
318 // use fcntl() for portability. Note that since we do this after we open the
319 // event, this could in theory race with an exec() from another thread---but
320 // that shouldn't be happening anyway.
321 fcntl(fd
, F_SETFD
, O_CLOEXEC
);
323 // Make sure that any SIGIO sent from `fd' is handled by the calling thread.
325 owner
.type
= F_OWNER_TID
;
326 owner
.pid
= syscall(__NR_gettid
);
328 // Set up `fd' to send SIGIO with sigaction info.
329 if (fcntl(fd
, F_SETFL
, O_ASYNC
) < 0 ||
330 fcntl(fd
, F_SETSIG
, SIGIO
) < 0 ||
331 fcntl(fd
, F_SETOWN_EX
, &owner
) < 0) {
332 Logger::Warning("perf_event: failed to set up asynchronous I/O: %s",
333 folly::errnoStr(errno
).c_str());
338 // Map the ring buffer for our samples.
339 auto const base
= mmap(nullptr, mmap_sz(), PROT_READ
| PROT_WRITE
,
341 if (base
== MAP_FAILED
) {
342 Logger::Warning("perf_event: failed to mmap perf_event: %s",
343 folly::errnoStr(errno
).c_str());
347 auto const meta
= reinterpret_cast<struct perf_event_mmap_page
*>(base
);
349 auto const pe
= perf_event_handle
{ fd
, meta
};
351 // Reset the event. This seems to be present in most examples, but it's
352 // unclear if it's necessary or just good hygiene. (It's possible that it's
353 // necessary on successive opens.)
354 if (ioctl(fd
, PERF_EVENT_IOC_RESET
, 0) < 0) {
355 Logger::Warning("perf_event: failed to reset perf_event: %s",
356 folly::errnoStr(errno
).c_str());
361 // Enable the event. The man page and other examples of usage all suggest
362 // that the right thing to do is to start with the event disabled and then
363 // enable it manually afterwards, so we do the same here even though it seems
364 // strange and circuitous.
365 if (ioctl(fd
, PERF_EVENT_IOC_ENABLE
, 0) < 0) {
366 Logger::Warning("perf_event: failed to enable perf_event: %s",
367 folly::errnoStr(errno
).c_str());
375 ///////////////////////////////////////////////////////////////////////////////
378 * Ensure that `pe.buf' can hold at least `cap' bytes.
380 void ensure_buffer_capacity(perf_event_handle
& pe
, size_t cap
) {
381 if (pe
.buf_sz
>= cap
) return;
383 pe
.buf
= reinterpret_cast<char*>(malloc(cap
* 2));
387 * Iterate through all the pending sampled events in `pe' and pass each one to
390 void consume_events(PerfEvent kind
, perf_event_handle
& pe
,
391 perf_event_consume_fn_t consume
) {
392 auto const data_tail
= pe
.meta
->data_tail
;
393 auto const data_head
= pe
.meta
->data_head
;
395 asm volatile("" : : : "memory"); // smp_rmb()
396 if (data_head
== data_tail
) return;
398 auto const base
= reinterpret_cast<char*>(pe
.meta
) + s_pagesz
;
400 auto const begin
= base
+ data_tail
% buffer_sz();
401 auto const end
= base
+ data_head
% buffer_sz();
406 auto header
= reinterpret_cast<struct perf_event_header
*>(cur
);
408 if (cur
+ header
->size
> base
+ buffer_sz()) {
409 // The current entry wraps around the ring buffer. Copy it into a stack
410 // buffer, and update `cur' to wrap around appropriately.
411 auto const prefix_len
= base
+ buffer_sz() - cur
;
413 ensure_buffer_capacity(pe
, header
->size
);
415 memcpy(pe
.buf
, cur
, prefix_len
);
416 memcpy(pe
.buf
+ prefix_len
, base
, header
->size
- prefix_len
);
417 header
= reinterpret_cast<struct perf_event_header
*>(pe
.buf
);
419 cur
= base
+ header
->size
- prefix_len
;
420 } else if (cur
+ header
->size
== base
+ buffer_sz()) {
421 // Perfect wraparound.
427 if (header
->type
== PERF_RECORD_SAMPLE
) {
428 auto const sample
= reinterpret_cast<perf_event_sample
*>(header
+ 1);
430 assertx(header
->size
== sizeof(struct perf_event_header
) +
431 sizeof(perf_event_sample
) +
432 sample
->nr
* sizeof(*sample
->ips
) +
433 sizeof(perf_event_sample_tail
));
434 assertx((char*)(sample
->tail() + 1) == (char*)header
+ header
->size
);
435 consume(kind
, sample
);
439 __sync_synchronize(); // smp_mb()
440 pe
.meta
->data_tail
= data_head
;
443 ///////////////////////////////////////////////////////////////////////////////
447 ///////////////////////////////////////////////////////////////////////////////
449 perf_event_data_src_info
450 perf_event_data_src(PerfEvent kind
, uint64_t data_src
) {
451 auto info
= perf_event_data_src_info
{};
453 DEBUG_ONLY
auto const mem_op
= data_src
;
455 case PerfEvent::Load
:
456 assertx(mem_op
& PERF_MEM_OP_LOAD
);
458 case PerfEvent::Store
:
459 assertx(mem_op
& PERF_MEM_OP_STORE
);
463 auto const mem_lvl
= data_src
>> PERF_MEM_LVL_SHIFT
;
465 if (mem_lvl
& PERF_MEM_LVL_NA
) {
466 info
.mem_lvl
= "(unknown)";
469 info
.mem_hit
= (mem_lvl
& PERF_MEM_LVL_HIT
) ? 1 :
470 (mem_lvl
& PERF_MEM_LVL_MISS
) ? -1 : 0;
485 auto const mem_lvl_only
= mem_lvl
& (0x0
486 #define X(lvl) | PERF_MEM_LVL_##lvl
491 info
.mem_lvl
= [&]() -> const char* {
492 switch (mem_lvl_only
) {
493 case 0x0: return "(none)";
495 case PERF_MEM_LVL_##lvl: return #lvl;
498 default: return "(mixed)";
505 auto const mem_snoop
= data_src
>> PERF_MEM_SNOOP_SHIFT
;
506 if (mem_snoop
& PERF_MEM_SNOOP_NA
) {
511 info
.snoop_hit
= (mem_snoop
& PERF_MEM_SNOOP_HIT
) ? 1 :
512 (mem_snoop
& PERF_MEM_SNOOP_MISS
) ? -1 : 0;
513 info
.snoop
= (mem_snoop
& PERF_MEM_SNOOP_NONE
) ? -1 : 1;
514 info
.snoop_hitm
= (mem_snoop
& PERF_MEM_SNOOP_HITM
) ? 1 : -1;
517 auto const mem_lock
= data_src
>> PERF_MEM_LOCK_SHIFT
;
518 info
.locked
= (mem_lock
& PERF_MEM_LOCK_NA
) ? 0 :
519 (mem_lock
& PERF_MEM_LOCK_LOCKED
) ? 1 : -1;
521 auto const mem_tlb
= data_src
>> PERF_MEM_TLB_SHIFT
;
523 if (mem_tlb
& PERF_MEM_TLB_NA
) {
524 info
.tlb
= "(unknown)";
527 info
.tlb_hit
= (mem_tlb
& PERF_MEM_TLB_HIT
) ? 1 :
528 (mem_tlb
& PERF_MEM_TLB_MISS
) ? -1 : 0;
536 auto const tlb_only
= mem_tlb
& (0x0
537 #define X(tlb) | PERF_MEM_TLB_##tlb
542 info
.tlb
= [&]() -> const char* {
544 case 0x0: return "(none)";
546 case PERF_MEM_TLB_##tlb: return #tlb;
549 case (PERF_MEM_TLB_L1
| PERF_MEM_TLB_L2
): return "L1-L2";
550 default: return "(mixed)";
558 ///////////////////////////////////////////////////////////////////////////////
560 bool perf_event_enable(uint64_t sample_freq
, perf_event_signal_fn_t signal_fn
) {
561 if (!perf_event_init()) return false;
563 // If `tl_perf_event' has already been initialized, we're done.
564 if (tl_perf_event
.signal
!= nullptr) return true;
566 if (!install_sigio_handler()) return false;
568 auto const ld_pe
= enable_event(s_mem_loads
, sample_freq
);
569 if (!ld_pe
) return false;
571 auto const st_pe
= enable_event(s_mem_stores
, sample_freq
);
577 // Set `tl_perf_event'---and in particular, `signal'---only after everything
578 // is enabled. This will cause us to ignore signals until we're ready to
579 // process the events.
580 tl_perf_event
.loads
= *ld_pe
;
581 tl_perf_event
.stores
= *st_pe
;
582 asm volatile("" : : : "memory");
583 tl_perf_event
.signal
= signal_fn
;
588 void perf_event_pause() {
589 if (tl_perf_event
.signal
== nullptr) return;
590 pause_event(tl_perf_event
.loads
);
591 pause_event(tl_perf_event
.stores
);
594 void perf_event_resume() {
595 if (tl_perf_event
.signal
== nullptr) return;
596 resume_event(tl_perf_event
.loads
);
597 resume_event(tl_perf_event
.stores
);
600 void perf_event_disable() {
601 if (tl_perf_event
.signal
== nullptr) return;
603 close_event(tl_perf_event
.loads
);
604 close_event(tl_perf_event
.stores
);
608 void perf_event_consume(perf_event_consume_fn_t consume
) {
609 if (tl_perf_event
.signal
== nullptr) return;
611 consume_events(PerfEvent::Load
, tl_perf_event
.loads
, consume
);
612 consume_events(PerfEvent::Store
, tl_perf_event
.stores
, consume
);
615 ///////////////////////////////////////////////////////////////////////////////
619 #else // defined(__linux__) && defined(__x86_64__)
623 perf_event_data_src_info
624 perf_event_data_src(PerfEvent kind
, uint64_t data_src
) {
625 return perf_event_data_src_info
{};
628 bool perf_event_enable(uint64_t, perf_event_signal_fn_t
) { return false; }
629 void perf_event_disable() {}
630 void perf_event_pause() {}
631 void perf_event_resume() {}
632 void perf_event_consume(perf_event_consume_fn_t
) {}