2 +----------------------------------------------------------------------+
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
17 #include "hphp/util/perf-event.h"
19 #if defined(__linux__) && defined(__x86_64__) && defined(FACEBOOK)
21 #include "hphp/util/assertions.h"
22 #include "hphp/util/logger.h"
23 #include "hphp/util/safe-cast.h"
25 #include <folly/FileUtil.h>
26 #include <folly/Optional.h>
27 #include <folly/String.h>
32 #include <asm/unistd.h>
37 #include <sys/ioctl.h>
39 #include <sys/syscall.h>
40 #include <sys/types.h>
43 // These two files must be included in this relative order, because the latter
44 // transitively includes a local copy of the former unless it detects that the
45 // system version has already been included.
46 #include <linux/perf_event.h>
47 #include <perfmon/pfmlib_perf_event.h>
53 ///////////////////////////////////////////////////////////////////////////////
56 * Process initialization bit and lock.
58 bool s_did_init
= false;
59 std::mutex s_init_lock
;
67 * Microarch-dependent event names for perf's cpu/mem-{loads,stores}/ events,
68 * in a form understood by libpfm4.
70 * We could just encode the `config' for perf_event_attr ourselves, but libpfm4
71 * does other things for us, like set the exclusion bits, and the encoding is
72 * not well-specified in the first place. Instead, it just means we had to
73 * match some bits to names ahead of time.
75 * These may be altered when the module is initialized.
77 // On Haswell and later, this is called "LOAD_LATENCY".
78 const char* s_mem_loads
= "MEM_TRANS_RETIRED:LATENCY_ABOVE_THRESHOLD";
79 // On Haswell and later, "MEM_UOPS_RETIRED:ALL_STORES" is used instead.
80 const char* s_mem_stores
= "MEM_TRANS_RETIRED:PRECISE_STORE";
82 ///////////////////////////////////////////////////////////////////////////////
85 * Metadata for a fully set up perf_event.
87 struct perf_event_handle
{
88 perf_event_handle() {}
89 perf_event_handle(int fd
, struct perf_event_mmap_page
* meta
)
94 // File descriptor of the opened perf_event.
97 // Metadata header page, followed by the ring buffer for samples.
98 struct perf_event_mmap_page
* meta
{nullptr};
100 // Buffer for samples that wrap around.
106 * Per-thread perf_event metadata.
108 thread_local
struct {
109 perf_event_handle loads
;
110 perf_event_handle stores
;
111 perf_event_signal_fn_t signal
;
112 } tl_perf_event
= {};
115 * Ensure that this module is properly initialized.
117 * Returns true if the module has been initialized successfully (by anyone),
120 bool perf_event_init() {
121 if (s_did_init
) return true;
123 std::lock_guard
<std::mutex
> l(s_init_lock
);
124 if (s_did_init
) return true;
126 s_pagesz
= sysconf(_SC_PAGESIZE
);
128 std::string event_str
;
129 if (folly::readFile("/sys/devices/cpu/events/mem-stores", event_str
)) {
130 // If the read fails, we'll stick with the {Sandy,Ivy}Bridge event name.
131 // Otherwise, check for the Haswell encoding string.
133 // @see: linux/arch/x86/events/intel/core.c.
134 if (event_str
== "event=0xd0,umask=0x82") {
135 s_mem_stores
= "MEM_UOPS_RETIRED:ALL_STORES";
137 // `event_str' should be "event=0xcd,umask=0x2" on *Bridge, but we don't
138 // care since we're using that event name as our default.
141 // libpfm4 needs to be initialized exactly once per process lifetime.
142 auto const pfmr
= pfm_initialize();
143 if (pfmr
!= PFM_SUCCESS
) {
144 Logger::Warning("perf_event: pfm_initialize failed: %s",
153 * Size of the mmap'd perf_event output ring buffer.
155 * Must be exactly 2^n pages for some `n' (or 1 + 2^n, if we include the
156 * perf_event header page).
158 size_t buffer_sz() { return s_pagesz
* (1 << 5); } // ring buffer only
159 size_t mmap_sz() { return s_pagesz
+ buffer_sz(); } // with header
161 ///////////////////////////////////////////////////////////////////////////////
164 * Register that a perf event was generated.
166 void signal_event(int sig
, siginfo_t
* info
, void* /*context*/) {
167 if (sig
!= SIGIO
|| info
== nullptr) return;
169 // Older versions of Linux have SIGIO here; newer versions have POLLIN.
170 if (info
->si_code
!= SIGIO
&& info
->si_code
!= POLLIN
) return;
171 // We only care about read signals.
172 if ((info
->si_band
& POLLERR
) || (info
->si_band
& POLLNVAL
)) return;
173 if (!(info
->si_band
& POLLIN
)) return;
175 if (tl_perf_event
.signal
== nullptr) return;
177 auto const type
= [&]() -> folly::Optional
<PerfEvent
> {
178 if (info
->si_fd
== tl_perf_event
.loads
.fd
) return PerfEvent::Load
;
179 if (info
->si_fd
== tl_perf_event
.stores
.fd
) return PerfEvent::Store
;
184 tl_perf_event
.signal(*type
);
188 * Install `signal_event' to notify the user of new perf_event samples.
190 * Returns true if the handler was successfully installed, else false. If a
191 * handler for SIGIO was already installed, this will fail. Otherwise, if we
192 * install `signal_event' successfully, SIGIO will be unconditionally unblocked
193 * for the calling thread.
195 bool install_sigio_handler() {
196 struct sigaction old_action
;
198 if (sigaction(SIGIO
, nullptr, &old_action
) < 0) {
199 Logger::Warning("perf_event: could not install SIGIO handler: %s",
200 folly::errnoStr(errno
).c_str());
204 // Fail if a competing SIGIO handler is found.
205 if (old_action
.sa_handler
!= SIG_DFL
&&
206 old_action
.sa_handler
!= SIG_IGN
&&
207 old_action
.sa_sigaction
!= signal_event
) {
208 Logger::Warning("perf_event: could not install SIGIO handler: "
209 "found existing handler");
213 // Install our signal handler for SIGIO.
214 struct sigaction action
= {};
215 action
.sa_sigaction
= signal_event
;
216 action
.sa_flags
= SA_SIGINFO
;
218 if (sigaction(SIGIO
, &action
, nullptr) < 0) {
219 Logger::Warning("perf_event: could not install SIGIO handler: %s",
220 folly::errnoStr(errno
).c_str());
224 // Ensure that SIGIO is unblocked.
227 sigaddset(&sigs
, SIGIO
);
228 if (pthread_sigmask(SIG_UNBLOCK
, &sigs
, nullptr) < 0) {
229 Logger::Warning("perf_event: could not unblock SIGIO: %s",
230 folly::errnoStr(errno
).c_str());
237 ///////////////////////////////////////////////////////////////////////////////
240 * Pause or resume an event.
242 void pause_event(const perf_event_handle
& pe
) {
243 ioctl(pe
.fd
, PERF_EVENT_IOC_DISABLE
, 0);
245 void resume_event(const perf_event_handle
& pe
) {
246 ioctl(pe
.fd
, PERF_EVENT_IOC_ENABLE
, 0);
250 * Logically delete all events that are currently buffered for `pe'.
252 void clear_events(const perf_event_handle
& pe
) {
253 auto const data_head
= pe
.meta
->data_head
;
254 __sync_synchronize(); // smp_mb()
255 pe
.meta
->data_tail
= data_head
;
259 * Disable and close a perf event.
261 void close_event(const perf_event_handle
& pe
) {
264 ioctl(pe
.fd
, PERF_EVENT_IOC_DISABLE
, 0);
265 munmap(pe
.meta
, mmap_sz());
270 * Open a file descriptor for perf events with `event_name', mmap it, and set
271 * things up so that the calling thread receives SIGIO signals from it.
273 * Returns the perf_event_handle on success, else folly::none.
275 folly::Optional
<perf_event_handle
> enable_event(const char* event_name
,
276 uint64_t sample_freq
) {
277 struct perf_event_attr attr
= {};
278 pfm_perf_encode_arg_t arg
= {};
280 arg
.size
= sizeof(arg
);
282 // Populate the `type', `config', and `exclude_*' members on `attr'.
283 auto const pfmr
= pfm_get_os_event_encoding(event_name
, PFM_PLM3
,
284 PFM_OS_PERF_EVENT
, &arg
);
285 if (pfmr
!= PFM_SUCCESS
) {
286 Logger::Warning("perf_event: failed to get encoding for %s: %s",
287 event_name
, pfm_strerror(pfmr
));
291 // Finish setting up `attr' and open the event.
292 attr
.size
= sizeof(attr
);
294 attr
.sample_freq
= sample_freq
;
297 attr
.wakeup_events
= 1;
298 attr
.precise_ip
= 2; // request zero skid
300 attr
.sample_type
= PERF_SAMPLE_IP
303 | PERF_SAMPLE_CALLCHAIN
304 | PERF_SAMPLE_DATA_SRC
307 auto const ret
= syscall(__NR_perf_event_open
, &attr
, 0, -1, -1, 0);
309 // Some machines might not have PEBS support (needed for precise_ip > 0),
310 // but then PERF_SAMPLE_ADDR will always return zeros instead of the target
311 // memory address. Just fail silently in this case.
312 Logger::Verbose("perf_event: perf_event_open failed with: %s",
313 folly::errnoStr(errno
).c_str());
316 auto const fd
= safe_cast
<int>(ret
);
318 // Recent versions of Linux have a CLOEXEC flag for perf_event_open(), but
319 // use fcntl() for portability. Note that since we do this after we open the
320 // event, this could in theory race with an exec() from another thread---but
321 // that shouldn't be happening anyway.
322 fcntl(fd
, F_SETFD
, O_CLOEXEC
);
324 // Make sure that any SIGIO sent from `fd' is handled by the calling thread.
326 owner
.type
= F_OWNER_TID
;
327 owner
.pid
= syscall(__NR_gettid
);
329 // Set up `fd' to send SIGIO with sigaction info.
330 if (fcntl(fd
, F_SETFL
, O_ASYNC
) < 0 ||
331 fcntl(fd
, F_SETSIG
, SIGIO
) < 0 ||
332 fcntl(fd
, F_SETOWN_EX
, &owner
) < 0) {
333 Logger::Warning("perf_event: failed to set up asynchronous I/O: %s",
334 folly::errnoStr(errno
).c_str());
339 // Map the ring buffer for our samples.
340 auto const base
= mmap(nullptr, mmap_sz(), PROT_READ
| PROT_WRITE
,
342 if (base
== MAP_FAILED
) {
343 Logger::Warning("perf_event: failed to mmap perf_event: %s",
344 folly::errnoStr(errno
).c_str());
348 auto const meta
= reinterpret_cast<struct perf_event_mmap_page
*>(base
);
350 auto const pe
= perf_event_handle
{ fd
, meta
};
352 // Reset the event. This seems to be present in most examples, but it's
353 // unclear if it's necessary or just good hygeine. (It's possible that it's
354 // necessary on successive opens.)
355 if (ioctl(fd
, PERF_EVENT_IOC_RESET
, 0) < 0) {
356 Logger::Warning("perf_event: failed to reset perf_event: %s",
357 folly::errnoStr(errno
).c_str());
362 // Enable the event. The man page and other examples of usage all suggest
363 // that the right thing to do is to start with the event disabled and then
364 // enable it manually afterwards, so we do the same here even though it seems
365 // strange and circuitous.
366 if (ioctl(fd
, PERF_EVENT_IOC_ENABLE
, 0) < 0) {
367 Logger::Warning("perf_event: failed to enable perf_event: %s",
368 folly::errnoStr(errno
).c_str());
376 ///////////////////////////////////////////////////////////////////////////////
379 * Ensure that `pe.buf' can hold at least `cap' bytes.
381 void ensure_buffer_capacity(perf_event_handle
& pe
, size_t cap
) {
382 if (pe
.buf_sz
>= cap
) return;
384 pe
.buf
= reinterpret_cast<char*>(malloc(cap
* 2));
388 * Iterate through all the pending sampled events in `pe' and pass each one to
391 void consume_events(PerfEvent kind
, perf_event_handle
& pe
,
392 perf_event_consume_fn_t consume
) {
393 auto const data_tail
= pe
.meta
->data_tail
;
394 auto const data_head
= pe
.meta
->data_head
;
396 asm volatile("" : : : "memory"); // smp_rmb()
397 if (data_head
== data_tail
) return;
399 auto const base
= reinterpret_cast<char*>(pe
.meta
) + s_pagesz
;
401 auto const begin
= base
+ data_tail
% buffer_sz();
402 auto const end
= base
+ data_head
% buffer_sz();
407 auto header
= reinterpret_cast<struct perf_event_header
*>(cur
);
409 if (cur
+ header
->size
> base
+ buffer_sz()) {
410 // The current entry wraps around the ring buffer. Copy it into a stack
411 // buffer, and update `cur' to wrap around appropriately.
412 auto const prefix_len
= base
+ buffer_sz() - cur
;
414 ensure_buffer_capacity(pe
, header
->size
);
416 memcpy(pe
.buf
, cur
, prefix_len
);
417 memcpy(pe
.buf
+ prefix_len
, base
, header
->size
- prefix_len
);
418 header
= reinterpret_cast<struct perf_event_header
*>(pe
.buf
);
420 cur
= base
+ header
->size
- prefix_len
;
421 } else if (cur
+ header
->size
== base
+ buffer_sz()) {
422 // Perfect wraparound.
428 if (header
->type
== PERF_RECORD_SAMPLE
) {
429 auto const sample
= reinterpret_cast<perf_event_sample
*>(header
+ 1);
431 assertx(header
->size
== sizeof(struct perf_event_header
) +
432 sizeof(perf_event_sample
) +
433 sample
->nr
* sizeof(*sample
->ips
) +
434 sizeof(perf_event_sample_tail
));
435 assertx((char*)(sample
->tail() + 1) == (char*)header
+ header
->size
);
436 consume(kind
, sample
);
440 __sync_synchronize(); // smp_mb()
441 pe
.meta
->data_tail
= data_head
;
444 ///////////////////////////////////////////////////////////////////////////////
448 ///////////////////////////////////////////////////////////////////////////////
450 perf_event_data_src_info
451 perf_event_data_src(PerfEvent kind
, uint64_t data_src
) {
452 auto info
= perf_event_data_src_info
{};
454 DEBUG_ONLY
auto const mem_op
= data_src
;
456 case PerfEvent::Load
:
457 assertx(mem_op
& PERF_MEM_OP_LOAD
);
459 case PerfEvent::Store
:
460 assertx(mem_op
& PERF_MEM_OP_STORE
);
464 auto const mem_lvl
= data_src
>> PERF_MEM_LVL_SHIFT
;
466 if (mem_lvl
& PERF_MEM_LVL_NA
) {
467 info
.mem_lvl
= "(unknown)";
470 info
.mem_hit
= (mem_lvl
& PERF_MEM_LVL_HIT
) ? 1 :
471 (mem_lvl
& PERF_MEM_LVL_MISS
) ? -1 : 0;
486 auto const mem_lvl_only
= mem_lvl
& (0x0
487 #define X(lvl) | PERF_MEM_LVL_##lvl
492 info
.mem_lvl
= [&]() -> const char* {
493 switch (mem_lvl_only
) {
494 case 0x0: return "(none)";
496 case PERF_MEM_LVL_##lvl: return #lvl;
499 default: return "(mixed)";
506 auto const mem_snoop
= data_src
>> PERF_MEM_SNOOP_SHIFT
;
507 if (mem_snoop
& PERF_MEM_SNOOP_NA
) {
512 info
.snoop_hit
= (mem_snoop
& PERF_MEM_SNOOP_HIT
) ? 1 :
513 (mem_snoop
& PERF_MEM_SNOOP_MISS
) ? -1 : 0;
514 info
.snoop
= (mem_snoop
& PERF_MEM_SNOOP_NONE
) ? -1 : 1;
515 info
.snoop_hitm
= (mem_snoop
& PERF_MEM_SNOOP_HITM
) ? 1 : -1;
518 auto const mem_lock
= data_src
>> PERF_MEM_LOCK_SHIFT
;
519 info
.locked
= (mem_lock
& PERF_MEM_LOCK_NA
) ? 0 :
520 (mem_lock
& PERF_MEM_LOCK_LOCKED
) ? 1 : -1;
522 auto const mem_tlb
= data_src
>> PERF_MEM_TLB_SHIFT
;
524 if (mem_tlb
& PERF_MEM_TLB_NA
) {
525 info
.tlb
= "(unknown)";
528 info
.tlb_hit
= (mem_tlb
& PERF_MEM_TLB_HIT
) ? 1 :
529 (mem_tlb
& PERF_MEM_TLB_MISS
) ? -1 : 0;
537 auto const tlb_only
= mem_tlb
& (0x0
538 #define X(tlb) | PERF_MEM_TLB_##tlb
543 info
.tlb
= [&]() -> const char* {
545 case 0x0: return "(none)";
547 case PERF_MEM_TLB_##tlb: return #tlb;
550 case (PERF_MEM_TLB_L1
| PERF_MEM_TLB_L2
): return "L1-L2";
551 default: return "(mixed)";
559 ///////////////////////////////////////////////////////////////////////////////
561 bool perf_event_enable(uint64_t sample_freq
, perf_event_signal_fn_t signal_fn
) {
562 if (!perf_event_init()) return false;
564 // If `tl_perf_event' has already been initialized, we're done.
565 if (tl_perf_event
.signal
!= nullptr) return true;
567 if (!install_sigio_handler()) return false;
569 auto const ld_pe
= enable_event(s_mem_loads
, sample_freq
);
570 if (!ld_pe
) return false;
572 auto const st_pe
= enable_event(s_mem_stores
, sample_freq
);
578 // Set `tl_perf_event'---and in particular, `signal'---only after everything
579 // is enabled. This will cause us to ignore signals until we're ready to
580 // process the events.
581 tl_perf_event
.loads
= *ld_pe
;
582 tl_perf_event
.stores
= *st_pe
;
583 asm volatile("" : : : "memory");
584 tl_perf_event
.signal
= signal_fn
;
589 void perf_event_pause() {
590 if (tl_perf_event
.signal
== nullptr) return;
591 pause_event(tl_perf_event
.loads
);
592 pause_event(tl_perf_event
.stores
);
595 void perf_event_resume() {
596 if (tl_perf_event
.signal
== nullptr) return;
597 resume_event(tl_perf_event
.loads
);
598 resume_event(tl_perf_event
.stores
);
601 void perf_event_disable() {
602 if (tl_perf_event
.signal
== nullptr) return;
604 close_event(tl_perf_event
.loads
);
605 close_event(tl_perf_event
.stores
);
609 void perf_event_consume(perf_event_consume_fn_t consume
) {
610 if (tl_perf_event
.signal
== nullptr) return;
612 consume_events(PerfEvent::Load
, tl_perf_event
.loads
, consume
);
613 consume_events(PerfEvent::Store
, tl_perf_event
.stores
, consume
);
616 ///////////////////////////////////////////////////////////////////////////////
620 #else // defined(__linux__) && defined(__x86_64__)
624 perf_event_data_src_info
625 perf_event_data_src(PerfEvent kind
, uint64_t data_src
) {
626 return perf_event_data_src_info
{};
629 bool perf_event_enable(uint64_t, perf_event_signal_fn_t
) { return false; }
630 void perf_event_disable() {}
631 void perf_event_pause() {}
632 void perf_event_resume() {}
633 void perf_event_consume(perf_event_consume_fn_t
) {}