mm, compaction: always skip all compound pages by order in migrate scanner
[linux-2.6/btrfs-unstable.git] / tools / perf / builtin-trace.c
blob4e3abba03062f3e84cfbdea5ca2c07adc641b35b
1 /*
2 * builtin-trace.c
4 * Builtin 'trace' command:
6 * Display a continuously updated trace of any workload, CPU, specific PID,
7 * system wide, etc. Default format is loosely strace like, but any other
8 * event may be specified using --event.
10 * Copyright (C) 2012, 2013, 2014, 2015 Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
12 * Initially based on the 'trace' prototype by Thomas Gleixner:
14 * http://lwn.net/Articles/415728/ ("Announcing a new utility: 'trace'")
16 * Released under the GPL v2. (and only v2, not any later version)
19 #include <traceevent/event-parse.h>
20 #include "builtin.h"
21 #include "util/color.h"
22 #include "util/debug.h"
23 #include "util/evlist.h"
24 #include "util/exec_cmd.h"
25 #include "util/machine.h"
26 #include "util/session.h"
27 #include "util/thread.h"
28 #include "util/parse-options.h"
29 #include "util/strlist.h"
30 #include "util/intlist.h"
31 #include "util/thread_map.h"
32 #include "util/stat.h"
33 #include "trace-event.h"
34 #include "util/parse-events.h"
36 #include <libaudit.h>
37 #include <stdlib.h>
38 #include <sys/mman.h>
39 #include <linux/futex.h>
41 /* For older distros: */
42 #ifndef MAP_STACK
43 # define MAP_STACK 0x20000
44 #endif
46 #ifndef MADV_HWPOISON
47 # define MADV_HWPOISON 100
49 #endif
51 #ifndef MADV_MERGEABLE
52 # define MADV_MERGEABLE 12
53 #endif
55 #ifndef MADV_UNMERGEABLE
56 # define MADV_UNMERGEABLE 13
57 #endif
59 #ifndef EFD_SEMAPHORE
60 # define EFD_SEMAPHORE 1
61 #endif
63 #ifndef EFD_NONBLOCK
64 # define EFD_NONBLOCK 00004000
65 #endif
67 #ifndef EFD_CLOEXEC
68 # define EFD_CLOEXEC 02000000
69 #endif
71 #ifndef O_CLOEXEC
72 # define O_CLOEXEC 02000000
73 #endif
75 #ifndef SOCK_DCCP
76 # define SOCK_DCCP 6
77 #endif
79 #ifndef SOCK_CLOEXEC
80 # define SOCK_CLOEXEC 02000000
81 #endif
83 #ifndef SOCK_NONBLOCK
84 # define SOCK_NONBLOCK 00004000
85 #endif
87 #ifndef MSG_CMSG_CLOEXEC
88 # define MSG_CMSG_CLOEXEC 0x40000000
89 #endif
91 #ifndef PERF_FLAG_FD_NO_GROUP
92 # define PERF_FLAG_FD_NO_GROUP (1UL << 0)
93 #endif
95 #ifndef PERF_FLAG_FD_OUTPUT
96 # define PERF_FLAG_FD_OUTPUT (1UL << 1)
97 #endif
99 #ifndef PERF_FLAG_PID_CGROUP
100 # define PERF_FLAG_PID_CGROUP (1UL << 2) /* pid=cgroup id, per-cpu mode only */
101 #endif
103 #ifndef PERF_FLAG_FD_CLOEXEC
104 # define PERF_FLAG_FD_CLOEXEC (1UL << 3) /* O_CLOEXEC */
105 #endif
108 struct tp_field {
109 int offset;
110 union {
111 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
112 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
116 #define TP_UINT_FIELD(bits) \
117 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
119 u##bits value; \
120 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
121 return value; \
124 TP_UINT_FIELD(8);
125 TP_UINT_FIELD(16);
126 TP_UINT_FIELD(32);
127 TP_UINT_FIELD(64);
129 #define TP_UINT_FIELD__SWAPPED(bits) \
130 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
132 u##bits value; \
133 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
134 return bswap_##bits(value);\
137 TP_UINT_FIELD__SWAPPED(16);
138 TP_UINT_FIELD__SWAPPED(32);
139 TP_UINT_FIELD__SWAPPED(64);
141 static int tp_field__init_uint(struct tp_field *field,
142 struct format_field *format_field,
143 bool needs_swap)
145 field->offset = format_field->offset;
147 switch (format_field->size) {
148 case 1:
149 field->integer = tp_field__u8;
150 break;
151 case 2:
152 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
153 break;
154 case 4:
155 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
156 break;
157 case 8:
158 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
159 break;
160 default:
161 return -1;
164 return 0;
167 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
169 return sample->raw_data + field->offset;
172 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
174 field->offset = format_field->offset;
175 field->pointer = tp_field__ptr;
176 return 0;
179 struct syscall_tp {
180 struct tp_field id;
181 union {
182 struct tp_field args, ret;
186 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
187 struct tp_field *field,
188 const char *name)
190 struct format_field *format_field = perf_evsel__field(evsel, name);
192 if (format_field == NULL)
193 return -1;
195 return tp_field__init_uint(field, format_field, evsel->needs_swap);
198 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \
199 ({ struct syscall_tp *sc = evsel->priv;\
200 perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
202 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
203 struct tp_field *field,
204 const char *name)
206 struct format_field *format_field = perf_evsel__field(evsel, name);
208 if (format_field == NULL)
209 return -1;
211 return tp_field__init_ptr(field, format_field);
214 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
215 ({ struct syscall_tp *sc = evsel->priv;\
216 perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
218 static void perf_evsel__delete_priv(struct perf_evsel *evsel)
220 zfree(&evsel->priv);
221 perf_evsel__delete(evsel);
224 static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
226 evsel->priv = malloc(sizeof(struct syscall_tp));
227 if (evsel->priv != NULL) {
228 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
229 goto out_delete;
231 evsel->handler = handler;
232 return 0;
235 return -ENOMEM;
237 out_delete:
238 zfree(&evsel->priv);
239 return -ENOENT;
242 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
244 struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
246 /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
247 if (evsel == NULL)
248 evsel = perf_evsel__newtp("syscalls", direction);
250 if (evsel) {
251 if (perf_evsel__init_syscall_tp(evsel, handler))
252 goto out_delete;
255 return evsel;
257 out_delete:
258 perf_evsel__delete_priv(evsel);
259 return NULL;
262 #define perf_evsel__sc_tp_uint(evsel, name, sample) \
263 ({ struct syscall_tp *fields = evsel->priv; \
264 fields->name.integer(&fields->name, sample); })
266 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \
267 ({ struct syscall_tp *fields = evsel->priv; \
268 fields->name.pointer(&fields->name, sample); })
270 struct syscall_arg {
271 unsigned long val;
272 struct thread *thread;
273 struct trace *trace;
274 void *parm;
275 u8 idx;
276 u8 mask;
279 struct strarray {
280 int offset;
281 int nr_entries;
282 const char **entries;
285 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
286 .nr_entries = ARRAY_SIZE(array), \
287 .entries = array, \
290 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
291 .offset = off, \
292 .nr_entries = ARRAY_SIZE(array), \
293 .entries = array, \
296 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
297 const char *intfmt,
298 struct syscall_arg *arg)
300 struct strarray *sa = arg->parm;
301 int idx = arg->val - sa->offset;
303 if (idx < 0 || idx >= sa->nr_entries)
304 return scnprintf(bf, size, intfmt, arg->val);
306 return scnprintf(bf, size, "%s", sa->entries[idx]);
309 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
310 struct syscall_arg *arg)
312 return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
315 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
317 #if defined(__i386__) || defined(__x86_64__)
319 * FIXME: Make this available to all arches as soon as the ioctl beautifier
320 * gets rewritten to support all arches.
322 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
323 struct syscall_arg *arg)
325 return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
328 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
329 #endif /* defined(__i386__) || defined(__x86_64__) */
331 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
332 struct syscall_arg *arg);
334 #define SCA_FD syscall_arg__scnprintf_fd
336 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
337 struct syscall_arg *arg)
339 int fd = arg->val;
341 if (fd == AT_FDCWD)
342 return scnprintf(bf, size, "CWD");
344 return syscall_arg__scnprintf_fd(bf, size, arg);
347 #define SCA_FDAT syscall_arg__scnprintf_fd_at
349 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
350 struct syscall_arg *arg);
352 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
354 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
355 struct syscall_arg *arg)
357 return scnprintf(bf, size, "%#lx", arg->val);
360 #define SCA_HEX syscall_arg__scnprintf_hex
362 static size_t syscall_arg__scnprintf_int(char *bf, size_t size,
363 struct syscall_arg *arg)
365 return scnprintf(bf, size, "%d", arg->val);
368 #define SCA_INT syscall_arg__scnprintf_int
370 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
371 struct syscall_arg *arg)
373 int printed = 0, prot = arg->val;
375 if (prot == PROT_NONE)
376 return scnprintf(bf, size, "NONE");
377 #define P_MMAP_PROT(n) \
378 if (prot & PROT_##n) { \
379 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
380 prot &= ~PROT_##n; \
383 P_MMAP_PROT(EXEC);
384 P_MMAP_PROT(READ);
385 P_MMAP_PROT(WRITE);
386 #ifdef PROT_SEM
387 P_MMAP_PROT(SEM);
388 #endif
389 P_MMAP_PROT(GROWSDOWN);
390 P_MMAP_PROT(GROWSUP);
391 #undef P_MMAP_PROT
393 if (prot)
394 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
396 return printed;
399 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
401 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
402 struct syscall_arg *arg)
404 int printed = 0, flags = arg->val;
406 #define P_MMAP_FLAG(n) \
407 if (flags & MAP_##n) { \
408 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
409 flags &= ~MAP_##n; \
412 P_MMAP_FLAG(SHARED);
413 P_MMAP_FLAG(PRIVATE);
414 #ifdef MAP_32BIT
415 P_MMAP_FLAG(32BIT);
416 #endif
417 P_MMAP_FLAG(ANONYMOUS);
418 P_MMAP_FLAG(DENYWRITE);
419 P_MMAP_FLAG(EXECUTABLE);
420 P_MMAP_FLAG(FILE);
421 P_MMAP_FLAG(FIXED);
422 P_MMAP_FLAG(GROWSDOWN);
423 #ifdef MAP_HUGETLB
424 P_MMAP_FLAG(HUGETLB);
425 #endif
426 P_MMAP_FLAG(LOCKED);
427 P_MMAP_FLAG(NONBLOCK);
428 P_MMAP_FLAG(NORESERVE);
429 P_MMAP_FLAG(POPULATE);
430 P_MMAP_FLAG(STACK);
431 #ifdef MAP_UNINITIALIZED
432 P_MMAP_FLAG(UNINITIALIZED);
433 #endif
434 #undef P_MMAP_FLAG
436 if (flags)
437 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
439 return printed;
442 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
444 static size_t syscall_arg__scnprintf_mremap_flags(char *bf, size_t size,
445 struct syscall_arg *arg)
447 int printed = 0, flags = arg->val;
449 #define P_MREMAP_FLAG(n) \
450 if (flags & MREMAP_##n) { \
451 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
452 flags &= ~MREMAP_##n; \
455 P_MREMAP_FLAG(MAYMOVE);
456 #ifdef MREMAP_FIXED
457 P_MREMAP_FLAG(FIXED);
458 #endif
459 #undef P_MREMAP_FLAG
461 if (flags)
462 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
464 return printed;
467 #define SCA_MREMAP_FLAGS syscall_arg__scnprintf_mremap_flags
469 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
470 struct syscall_arg *arg)
472 int behavior = arg->val;
474 switch (behavior) {
475 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
476 P_MADV_BHV(NORMAL);
477 P_MADV_BHV(RANDOM);
478 P_MADV_BHV(SEQUENTIAL);
479 P_MADV_BHV(WILLNEED);
480 P_MADV_BHV(DONTNEED);
481 P_MADV_BHV(REMOVE);
482 P_MADV_BHV(DONTFORK);
483 P_MADV_BHV(DOFORK);
484 P_MADV_BHV(HWPOISON);
485 #ifdef MADV_SOFT_OFFLINE
486 P_MADV_BHV(SOFT_OFFLINE);
487 #endif
488 P_MADV_BHV(MERGEABLE);
489 P_MADV_BHV(UNMERGEABLE);
490 #ifdef MADV_HUGEPAGE
491 P_MADV_BHV(HUGEPAGE);
492 #endif
493 #ifdef MADV_NOHUGEPAGE
494 P_MADV_BHV(NOHUGEPAGE);
495 #endif
496 #ifdef MADV_DONTDUMP
497 P_MADV_BHV(DONTDUMP);
498 #endif
499 #ifdef MADV_DODUMP
500 P_MADV_BHV(DODUMP);
501 #endif
502 #undef P_MADV_PHV
503 default: break;
506 return scnprintf(bf, size, "%#x", behavior);
509 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
511 static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
512 struct syscall_arg *arg)
514 int printed = 0, op = arg->val;
516 if (op == 0)
517 return scnprintf(bf, size, "NONE");
518 #define P_CMD(cmd) \
519 if ((op & LOCK_##cmd) == LOCK_##cmd) { \
520 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
521 op &= ~LOCK_##cmd; \
524 P_CMD(SH);
525 P_CMD(EX);
526 P_CMD(NB);
527 P_CMD(UN);
528 P_CMD(MAND);
529 P_CMD(RW);
530 P_CMD(READ);
531 P_CMD(WRITE);
532 #undef P_OP
534 if (op)
535 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
537 return printed;
540 #define SCA_FLOCK syscall_arg__scnprintf_flock
542 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
544 enum syscall_futex_args {
545 SCF_UADDR = (1 << 0),
546 SCF_OP = (1 << 1),
547 SCF_VAL = (1 << 2),
548 SCF_TIMEOUT = (1 << 3),
549 SCF_UADDR2 = (1 << 4),
550 SCF_VAL3 = (1 << 5),
552 int op = arg->val;
553 int cmd = op & FUTEX_CMD_MASK;
554 size_t printed = 0;
556 switch (cmd) {
557 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
558 P_FUTEX_OP(WAIT); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
559 P_FUTEX_OP(WAKE); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
560 P_FUTEX_OP(FD); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
561 P_FUTEX_OP(REQUEUE); arg->mask |= SCF_VAL3|SCF_TIMEOUT; break;
562 P_FUTEX_OP(CMP_REQUEUE); arg->mask |= SCF_TIMEOUT; break;
563 P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT; break;
564 P_FUTEX_OP(WAKE_OP); break;
565 P_FUTEX_OP(LOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
566 P_FUTEX_OP(UNLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
567 P_FUTEX_OP(TRYLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
568 P_FUTEX_OP(WAIT_BITSET); arg->mask |= SCF_UADDR2; break;
569 P_FUTEX_OP(WAKE_BITSET); arg->mask |= SCF_UADDR2; break;
570 P_FUTEX_OP(WAIT_REQUEUE_PI); break;
571 default: printed = scnprintf(bf, size, "%#x", cmd); break;
574 if (op & FUTEX_PRIVATE_FLAG)
575 printed += scnprintf(bf + printed, size - printed, "|PRIV");
577 if (op & FUTEX_CLOCK_REALTIME)
578 printed += scnprintf(bf + printed, size - printed, "|CLKRT");
580 return printed;
583 #define SCA_FUTEX_OP syscall_arg__scnprintf_futex_op
585 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
586 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
588 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
589 static DEFINE_STRARRAY(itimers);
591 static const char *keyctl_options[] = {
592 "GET_KEYRING_ID", "JOIN_SESSION_KEYRING", "UPDATE", "REVOKE", "CHOWN",
593 "SETPERM", "DESCRIBE", "CLEAR", "LINK", "UNLINK", "SEARCH", "READ",
594 "INSTANTIATE", "NEGATE", "SET_REQKEY_KEYRING", "SET_TIMEOUT",
595 "ASSUME_AUTHORITY", "GET_SECURITY", "SESSION_TO_PARENT", "REJECT",
596 "INSTANTIATE_IOV", "INVALIDATE", "GET_PERSISTENT",
598 static DEFINE_STRARRAY(keyctl_options);
600 static const char *whences[] = { "SET", "CUR", "END",
601 #ifdef SEEK_DATA
602 "DATA",
603 #endif
604 #ifdef SEEK_HOLE
605 "HOLE",
606 #endif
608 static DEFINE_STRARRAY(whences);
610 static const char *fcntl_cmds[] = {
611 "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
612 "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
613 "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
614 "F_GETOWNER_UIDS",
616 static DEFINE_STRARRAY(fcntl_cmds);
618 static const char *rlimit_resources[] = {
619 "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
620 "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
621 "RTTIME",
623 static DEFINE_STRARRAY(rlimit_resources);
625 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
626 static DEFINE_STRARRAY(sighow);
628 static const char *clockid[] = {
629 "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
630 "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE", "BOOTTIME",
631 "REALTIME_ALARM", "BOOTTIME_ALARM", "SGI_CYCLE", "TAI"
633 static DEFINE_STRARRAY(clockid);
635 static const char *socket_families[] = {
636 "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
637 "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
638 "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
639 "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
640 "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
641 "ALG", "NFC", "VSOCK",
643 static DEFINE_STRARRAY(socket_families);
645 #ifndef SOCK_TYPE_MASK
646 #define SOCK_TYPE_MASK 0xf
647 #endif
649 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
650 struct syscall_arg *arg)
652 size_t printed;
653 int type = arg->val,
654 flags = type & ~SOCK_TYPE_MASK;
656 type &= SOCK_TYPE_MASK;
658 * Can't use a strarray, MIPS may override for ABI reasons.
660 switch (type) {
661 #define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
662 P_SK_TYPE(STREAM);
663 P_SK_TYPE(DGRAM);
664 P_SK_TYPE(RAW);
665 P_SK_TYPE(RDM);
666 P_SK_TYPE(SEQPACKET);
667 P_SK_TYPE(DCCP);
668 P_SK_TYPE(PACKET);
669 #undef P_SK_TYPE
670 default:
671 printed = scnprintf(bf, size, "%#x", type);
674 #define P_SK_FLAG(n) \
675 if (flags & SOCK_##n) { \
676 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
677 flags &= ~SOCK_##n; \
680 P_SK_FLAG(CLOEXEC);
681 P_SK_FLAG(NONBLOCK);
682 #undef P_SK_FLAG
684 if (flags)
685 printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
687 return printed;
690 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
692 #ifndef MSG_PROBE
693 #define MSG_PROBE 0x10
694 #endif
695 #ifndef MSG_WAITFORONE
696 #define MSG_WAITFORONE 0x10000
697 #endif
698 #ifndef MSG_SENDPAGE_NOTLAST
699 #define MSG_SENDPAGE_NOTLAST 0x20000
700 #endif
701 #ifndef MSG_FASTOPEN
702 #define MSG_FASTOPEN 0x20000000
703 #endif
705 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
706 struct syscall_arg *arg)
708 int printed = 0, flags = arg->val;
710 if (flags == 0)
711 return scnprintf(bf, size, "NONE");
712 #define P_MSG_FLAG(n) \
713 if (flags & MSG_##n) { \
714 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
715 flags &= ~MSG_##n; \
718 P_MSG_FLAG(OOB);
719 P_MSG_FLAG(PEEK);
720 P_MSG_FLAG(DONTROUTE);
721 P_MSG_FLAG(TRYHARD);
722 P_MSG_FLAG(CTRUNC);
723 P_MSG_FLAG(PROBE);
724 P_MSG_FLAG(TRUNC);
725 P_MSG_FLAG(DONTWAIT);
726 P_MSG_FLAG(EOR);
727 P_MSG_FLAG(WAITALL);
728 P_MSG_FLAG(FIN);
729 P_MSG_FLAG(SYN);
730 P_MSG_FLAG(CONFIRM);
731 P_MSG_FLAG(RST);
732 P_MSG_FLAG(ERRQUEUE);
733 P_MSG_FLAG(NOSIGNAL);
734 P_MSG_FLAG(MORE);
735 P_MSG_FLAG(WAITFORONE);
736 P_MSG_FLAG(SENDPAGE_NOTLAST);
737 P_MSG_FLAG(FASTOPEN);
738 P_MSG_FLAG(CMSG_CLOEXEC);
739 #undef P_MSG_FLAG
741 if (flags)
742 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
744 return printed;
747 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
749 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
750 struct syscall_arg *arg)
752 size_t printed = 0;
753 int mode = arg->val;
755 if (mode == F_OK) /* 0 */
756 return scnprintf(bf, size, "F");
757 #define P_MODE(n) \
758 if (mode & n##_OK) { \
759 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
760 mode &= ~n##_OK; \
763 P_MODE(R);
764 P_MODE(W);
765 P_MODE(X);
766 #undef P_MODE
768 if (mode)
769 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
771 return printed;
774 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
776 static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
777 struct syscall_arg *arg);
779 #define SCA_FILENAME syscall_arg__scnprintf_filename
781 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
782 struct syscall_arg *arg)
784 int printed = 0, flags = arg->val;
786 if (!(flags & O_CREAT))
787 arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
789 if (flags == 0)
790 return scnprintf(bf, size, "RDONLY");
791 #define P_FLAG(n) \
792 if (flags & O_##n) { \
793 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
794 flags &= ~O_##n; \
797 P_FLAG(APPEND);
798 P_FLAG(ASYNC);
799 P_FLAG(CLOEXEC);
800 P_FLAG(CREAT);
801 P_FLAG(DIRECT);
802 P_FLAG(DIRECTORY);
803 P_FLAG(EXCL);
804 P_FLAG(LARGEFILE);
805 P_FLAG(NOATIME);
806 P_FLAG(NOCTTY);
807 #ifdef O_NONBLOCK
808 P_FLAG(NONBLOCK);
809 #elif O_NDELAY
810 P_FLAG(NDELAY);
811 #endif
812 #ifdef O_PATH
813 P_FLAG(PATH);
814 #endif
815 P_FLAG(RDWR);
816 #ifdef O_DSYNC
817 if ((flags & O_SYNC) == O_SYNC)
818 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
819 else {
820 P_FLAG(DSYNC);
822 #else
823 P_FLAG(SYNC);
824 #endif
825 P_FLAG(TRUNC);
826 P_FLAG(WRONLY);
827 #undef P_FLAG
829 if (flags)
830 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
832 return printed;
835 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
837 static size_t syscall_arg__scnprintf_perf_flags(char *bf, size_t size,
838 struct syscall_arg *arg)
840 int printed = 0, flags = arg->val;
842 if (flags == 0)
843 return 0;
845 #define P_FLAG(n) \
846 if (flags & PERF_FLAG_##n) { \
847 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
848 flags &= ~PERF_FLAG_##n; \
851 P_FLAG(FD_NO_GROUP);
852 P_FLAG(FD_OUTPUT);
853 P_FLAG(PID_CGROUP);
854 P_FLAG(FD_CLOEXEC);
855 #undef P_FLAG
857 if (flags)
858 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
860 return printed;
863 #define SCA_PERF_FLAGS syscall_arg__scnprintf_perf_flags
865 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
866 struct syscall_arg *arg)
868 int printed = 0, flags = arg->val;
870 if (flags == 0)
871 return scnprintf(bf, size, "NONE");
872 #define P_FLAG(n) \
873 if (flags & EFD_##n) { \
874 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
875 flags &= ~EFD_##n; \
878 P_FLAG(SEMAPHORE);
879 P_FLAG(CLOEXEC);
880 P_FLAG(NONBLOCK);
881 #undef P_FLAG
883 if (flags)
884 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
886 return printed;
889 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
891 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
892 struct syscall_arg *arg)
894 int printed = 0, flags = arg->val;
896 #define P_FLAG(n) \
897 if (flags & O_##n) { \
898 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
899 flags &= ~O_##n; \
902 P_FLAG(CLOEXEC);
903 P_FLAG(NONBLOCK);
904 #undef P_FLAG
906 if (flags)
907 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
909 return printed;
912 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
914 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
916 int sig = arg->val;
918 switch (sig) {
919 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
920 P_SIGNUM(HUP);
921 P_SIGNUM(INT);
922 P_SIGNUM(QUIT);
923 P_SIGNUM(ILL);
924 P_SIGNUM(TRAP);
925 P_SIGNUM(ABRT);
926 P_SIGNUM(BUS);
927 P_SIGNUM(FPE);
928 P_SIGNUM(KILL);
929 P_SIGNUM(USR1);
930 P_SIGNUM(SEGV);
931 P_SIGNUM(USR2);
932 P_SIGNUM(PIPE);
933 P_SIGNUM(ALRM);
934 P_SIGNUM(TERM);
935 P_SIGNUM(CHLD);
936 P_SIGNUM(CONT);
937 P_SIGNUM(STOP);
938 P_SIGNUM(TSTP);
939 P_SIGNUM(TTIN);
940 P_SIGNUM(TTOU);
941 P_SIGNUM(URG);
942 P_SIGNUM(XCPU);
943 P_SIGNUM(XFSZ);
944 P_SIGNUM(VTALRM);
945 P_SIGNUM(PROF);
946 P_SIGNUM(WINCH);
947 P_SIGNUM(IO);
948 P_SIGNUM(PWR);
949 P_SIGNUM(SYS);
950 #ifdef SIGEMT
951 P_SIGNUM(EMT);
952 #endif
953 #ifdef SIGSTKFLT
954 P_SIGNUM(STKFLT);
955 #endif
956 #ifdef SIGSWI
957 P_SIGNUM(SWI);
958 #endif
959 default: break;
962 return scnprintf(bf, size, "%#x", sig);
965 #define SCA_SIGNUM syscall_arg__scnprintf_signum
967 #if defined(__i386__) || defined(__x86_64__)
969 * FIXME: Make this available to all arches.
971 #define TCGETS 0x5401
973 static const char *tioctls[] = {
974 "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
975 "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
976 "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
977 "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
978 "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
979 "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
980 "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
981 "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
982 "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
983 "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
984 "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
985 [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
986 "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
987 "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
988 "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
991 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
992 #endif /* defined(__i386__) || defined(__x86_64__) */
994 #define STRARRAY(arg, name, array) \
995 .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
996 .arg_parm = { [arg] = &strarray__##array, }
998 static struct syscall_fmt {
999 const char *name;
1000 const char *alias;
1001 size_t (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
1002 void *arg_parm[6];
1003 bool errmsg;
1004 bool timeout;
1005 bool hexret;
1006 } syscall_fmts[] = {
1007 { .name = "access", .errmsg = true,
1008 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */
1009 [1] = SCA_ACCMODE, /* mode */ }, },
1010 { .name = "arch_prctl", .errmsg = true, .alias = "prctl", },
1011 { .name = "brk", .hexret = true,
1012 .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
1013 { .name = "chdir", .errmsg = true,
1014 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1015 { .name = "chmod", .errmsg = true,
1016 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1017 { .name = "chroot", .errmsg = true,
1018 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1019 { .name = "clock_gettime", .errmsg = true, STRARRAY(0, clk_id, clockid), },
1020 { .name = "close", .errmsg = true,
1021 .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
1022 { .name = "connect", .errmsg = true, },
1023 { .name = "creat", .errmsg = true,
1024 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1025 { .name = "dup", .errmsg = true,
1026 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1027 { .name = "dup2", .errmsg = true,
1028 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1029 { .name = "dup3", .errmsg = true,
1030 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1031 { .name = "epoll_ctl", .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
1032 { .name = "eventfd2", .errmsg = true,
1033 .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
1034 { .name = "faccessat", .errmsg = true,
1035 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1036 [1] = SCA_FILENAME, /* filename */ }, },
1037 { .name = "fadvise64", .errmsg = true,
1038 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1039 { .name = "fallocate", .errmsg = true,
1040 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1041 { .name = "fchdir", .errmsg = true,
1042 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1043 { .name = "fchmod", .errmsg = true,
1044 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1045 { .name = "fchmodat", .errmsg = true,
1046 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1047 [1] = SCA_FILENAME, /* filename */ }, },
1048 { .name = "fchown", .errmsg = true,
1049 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1050 { .name = "fchownat", .errmsg = true,
1051 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1052 [1] = SCA_FILENAME, /* filename */ }, },
1053 { .name = "fcntl", .errmsg = true,
1054 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1055 [1] = SCA_STRARRAY, /* cmd */ },
1056 .arg_parm = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
1057 { .name = "fdatasync", .errmsg = true,
1058 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1059 { .name = "flock", .errmsg = true,
1060 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1061 [1] = SCA_FLOCK, /* cmd */ }, },
1062 { .name = "fsetxattr", .errmsg = true,
1063 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1064 { .name = "fstat", .errmsg = true, .alias = "newfstat",
1065 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1066 { .name = "fstatat", .errmsg = true, .alias = "newfstatat",
1067 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1068 [1] = SCA_FILENAME, /* filename */ }, },
1069 { .name = "fstatfs", .errmsg = true,
1070 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1071 { .name = "fsync", .errmsg = true,
1072 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1073 { .name = "ftruncate", .errmsg = true,
1074 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1075 { .name = "futex", .errmsg = true,
1076 .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
1077 { .name = "futimesat", .errmsg = true,
1078 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1079 [1] = SCA_FILENAME, /* filename */ }, },
1080 { .name = "getdents", .errmsg = true,
1081 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1082 { .name = "getdents64", .errmsg = true,
1083 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1084 { .name = "getitimer", .errmsg = true, STRARRAY(0, which, itimers), },
1085 { .name = "getrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1086 { .name = "getxattr", .errmsg = true,
1087 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1088 { .name = "inotify_add_watch", .errmsg = true,
1089 .arg_scnprintf = { [1] = SCA_FILENAME, /* pathname */ }, },
1090 { .name = "ioctl", .errmsg = true,
1091 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1092 #if defined(__i386__) || defined(__x86_64__)
1094 * FIXME: Make this available to all arches.
1096 [1] = SCA_STRHEXARRAY, /* cmd */
1097 [2] = SCA_HEX, /* arg */ },
1098 .arg_parm = { [1] = &strarray__tioctls, /* cmd */ }, },
1099 #else
1100 [2] = SCA_HEX, /* arg */ }, },
1101 #endif
1102 { .name = "keyctl", .errmsg = true, STRARRAY(0, option, keyctl_options), },
1103 { .name = "kill", .errmsg = true,
1104 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1105 { .name = "lchown", .errmsg = true,
1106 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1107 { .name = "lgetxattr", .errmsg = true,
1108 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1109 { .name = "linkat", .errmsg = true,
1110 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1111 { .name = "listxattr", .errmsg = true,
1112 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1113 { .name = "llistxattr", .errmsg = true,
1114 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1115 { .name = "lremovexattr", .errmsg = true,
1116 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1117 { .name = "lseek", .errmsg = true,
1118 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1119 [2] = SCA_STRARRAY, /* whence */ },
1120 .arg_parm = { [2] = &strarray__whences, /* whence */ }, },
1121 { .name = "lsetxattr", .errmsg = true,
1122 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1123 { .name = "lstat", .errmsg = true, .alias = "newlstat",
1124 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1125 { .name = "lsxattr", .errmsg = true,
1126 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1127 { .name = "madvise", .errmsg = true,
1128 .arg_scnprintf = { [0] = SCA_HEX, /* start */
1129 [2] = SCA_MADV_BHV, /* behavior */ }, },
1130 { .name = "mkdir", .errmsg = true,
1131 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1132 { .name = "mkdirat", .errmsg = true,
1133 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1134 [1] = SCA_FILENAME, /* pathname */ }, },
1135 { .name = "mknod", .errmsg = true,
1136 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1137 { .name = "mknodat", .errmsg = true,
1138 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1139 [1] = SCA_FILENAME, /* filename */ }, },
1140 { .name = "mlock", .errmsg = true,
1141 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1142 { .name = "mlockall", .errmsg = true,
1143 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1144 { .name = "mmap", .hexret = true,
1145 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
1146 [2] = SCA_MMAP_PROT, /* prot */
1147 [3] = SCA_MMAP_FLAGS, /* flags */
1148 [4] = SCA_FD, /* fd */ }, },
1149 { .name = "mprotect", .errmsg = true,
1150 .arg_scnprintf = { [0] = SCA_HEX, /* start */
1151 [2] = SCA_MMAP_PROT, /* prot */ }, },
1152 { .name = "mq_unlink", .errmsg = true,
1153 .arg_scnprintf = { [0] = SCA_FILENAME, /* u_name */ }, },
1154 { .name = "mremap", .hexret = true,
1155 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
1156 [3] = SCA_MREMAP_FLAGS, /* flags */
1157 [4] = SCA_HEX, /* new_addr */ }, },
1158 { .name = "munlock", .errmsg = true,
1159 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1160 { .name = "munmap", .errmsg = true,
1161 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1162 { .name = "name_to_handle_at", .errmsg = true,
1163 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1164 { .name = "newfstatat", .errmsg = true,
1165 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1166 [1] = SCA_FILENAME, /* filename */ }, },
1167 { .name = "open", .errmsg = true,
1168 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */
1169 [1] = SCA_OPEN_FLAGS, /* flags */ }, },
1170 { .name = "open_by_handle_at", .errmsg = true,
1171 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1172 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1173 { .name = "openat", .errmsg = true,
1174 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1175 [1] = SCA_FILENAME, /* filename */
1176 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1177 { .name = "perf_event_open", .errmsg = true,
1178 .arg_scnprintf = { [1] = SCA_INT, /* pid */
1179 [2] = SCA_INT, /* cpu */
1180 [3] = SCA_FD, /* group_fd */
1181 [4] = SCA_PERF_FLAGS, /* flags */ }, },
1182 { .name = "pipe2", .errmsg = true,
1183 .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
1184 { .name = "poll", .errmsg = true, .timeout = true, },
1185 { .name = "ppoll", .errmsg = true, .timeout = true, },
1186 { .name = "pread", .errmsg = true, .alias = "pread64",
1187 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1188 { .name = "preadv", .errmsg = true, .alias = "pread",
1189 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1190 { .name = "prlimit64", .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
1191 { .name = "pwrite", .errmsg = true, .alias = "pwrite64",
1192 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1193 { .name = "pwritev", .errmsg = true,
1194 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1195 { .name = "read", .errmsg = true,
1196 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1197 { .name = "readlink", .errmsg = true,
1198 .arg_scnprintf = { [0] = SCA_FILENAME, /* path */ }, },
1199 { .name = "readlinkat", .errmsg = true,
1200 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1201 [1] = SCA_FILENAME, /* pathname */ }, },
1202 { .name = "readv", .errmsg = true,
1203 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1204 { .name = "recvfrom", .errmsg = true,
1205 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1206 [3] = SCA_MSG_FLAGS, /* flags */ }, },
1207 { .name = "recvmmsg", .errmsg = true,
1208 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1209 [3] = SCA_MSG_FLAGS, /* flags */ }, },
1210 { .name = "recvmsg", .errmsg = true,
1211 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1212 [2] = SCA_MSG_FLAGS, /* flags */ }, },
1213 { .name = "removexattr", .errmsg = true,
1214 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1215 { .name = "renameat", .errmsg = true,
1216 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1217 { .name = "rmdir", .errmsg = true,
1218 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1219 { .name = "rt_sigaction", .errmsg = true,
1220 .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
1221 { .name = "rt_sigprocmask", .errmsg = true, STRARRAY(0, how, sighow), },
1222 { .name = "rt_sigqueueinfo", .errmsg = true,
1223 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1224 { .name = "rt_tgsigqueueinfo", .errmsg = true,
1225 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1226 { .name = "select", .errmsg = true, .timeout = true, },
1227 { .name = "sendmmsg", .errmsg = true,
1228 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1229 [3] = SCA_MSG_FLAGS, /* flags */ }, },
1230 { .name = "sendmsg", .errmsg = true,
1231 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1232 [2] = SCA_MSG_FLAGS, /* flags */ }, },
1233 { .name = "sendto", .errmsg = true,
1234 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1235 [3] = SCA_MSG_FLAGS, /* flags */ }, },
1236 { .name = "setitimer", .errmsg = true, STRARRAY(0, which, itimers), },
1237 { .name = "setrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1238 { .name = "setxattr", .errmsg = true,
1239 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1240 { .name = "shutdown", .errmsg = true,
1241 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1242 { .name = "socket", .errmsg = true,
1243 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1244 [1] = SCA_SK_TYPE, /* type */ },
1245 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
1246 { .name = "socketpair", .errmsg = true,
1247 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1248 [1] = SCA_SK_TYPE, /* type */ },
1249 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
1250 { .name = "stat", .errmsg = true, .alias = "newstat",
1251 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1252 { .name = "statfs", .errmsg = true,
1253 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1254 { .name = "swapoff", .errmsg = true,
1255 .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
1256 { .name = "swapon", .errmsg = true,
1257 .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
1258 { .name = "symlinkat", .errmsg = true,
1259 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1260 { .name = "tgkill", .errmsg = true,
1261 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1262 { .name = "tkill", .errmsg = true,
1263 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1264 { .name = "truncate", .errmsg = true,
1265 .arg_scnprintf = { [0] = SCA_FILENAME, /* path */ }, },
1266 { .name = "uname", .errmsg = true, .alias = "newuname", },
1267 { .name = "unlinkat", .errmsg = true,
1268 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1269 [1] = SCA_FILENAME, /* pathname */ }, },
1270 { .name = "utime", .errmsg = true,
1271 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1272 { .name = "utimensat", .errmsg = true,
1273 .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */
1274 [1] = SCA_FILENAME, /* filename */ }, },
1275 { .name = "utimes", .errmsg = true,
1276 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1277 { .name = "vmsplice", .errmsg = true,
1278 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1279 { .name = "write", .errmsg = true,
1280 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1281 { .name = "writev", .errmsg = true,
1282 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1285 static int syscall_fmt__cmp(const void *name, const void *fmtp)
1287 const struct syscall_fmt *fmt = fmtp;
1288 return strcmp(name, fmt->name);
1291 static struct syscall_fmt *syscall_fmt__find(const char *name)
1293 const int nmemb = ARRAY_SIZE(syscall_fmts);
1294 return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
1297 struct syscall {
1298 struct event_format *tp_format;
1299 int nr_args;
1300 struct format_field *args;
1301 const char *name;
1302 bool is_exit;
1303 struct syscall_fmt *fmt;
1304 size_t (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1305 void **arg_parm;
1308 static size_t fprintf_duration(unsigned long t, FILE *fp)
1310 double duration = (double)t / NSEC_PER_MSEC;
1311 size_t printed = fprintf(fp, "(");
1313 if (duration >= 1.0)
1314 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
1315 else if (duration >= 0.01)
1316 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
1317 else
1318 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
1319 return printed + fprintf(fp, "): ");
1323 * filename.ptr: The filename char pointer that will be vfs_getname'd
1324 * filename.entry_str_pos: Where to insert the string translated from
1325 * filename.ptr by the vfs_getname tracepoint/kprobe.
1327 struct thread_trace {
1328 u64 entry_time;
1329 u64 exit_time;
1330 bool entry_pending;
1331 unsigned long nr_events;
1332 unsigned long pfmaj, pfmin;
1333 char *entry_str;
1334 double runtime_ms;
1335 struct {
1336 unsigned long ptr;
1337 short int entry_str_pos;
1338 bool pending_open;
1339 unsigned int namelen;
1340 char *name;
1341 } filename;
1342 struct {
1343 int max;
1344 char **table;
1345 } paths;
1347 struct intlist *syscall_stats;
1350 static struct thread_trace *thread_trace__new(void)
1352 struct thread_trace *ttrace = zalloc(sizeof(struct thread_trace));
1354 if (ttrace)
1355 ttrace->paths.max = -1;
1357 ttrace->syscall_stats = intlist__new(NULL);
1359 return ttrace;
1362 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
1364 struct thread_trace *ttrace;
1366 if (thread == NULL)
1367 goto fail;
1369 if (thread__priv(thread) == NULL)
1370 thread__set_priv(thread, thread_trace__new());
1372 if (thread__priv(thread) == NULL)
1373 goto fail;
1375 ttrace = thread__priv(thread);
1376 ++ttrace->nr_events;
1378 return ttrace;
1379 fail:
1380 color_fprintf(fp, PERF_COLOR_RED,
1381 "WARNING: not enough memory, dropping samples!\n");
1382 return NULL;
1385 #define TRACE_PFMAJ (1 << 0)
1386 #define TRACE_PFMIN (1 << 1)
1388 static const size_t trace__entry_str_size = 2048;
1390 struct trace {
1391 struct perf_tool tool;
1392 struct {
1393 int machine;
1394 int open_id;
1395 } audit;
1396 struct {
1397 int max;
1398 struct syscall *table;
1399 struct {
1400 struct perf_evsel *sys_enter,
1401 *sys_exit;
1402 } events;
1403 } syscalls;
1404 struct record_opts opts;
1405 struct perf_evlist *evlist;
1406 struct machine *host;
1407 struct thread *current;
1408 u64 base_time;
1409 FILE *output;
1410 unsigned long nr_events;
1411 struct strlist *ev_qualifier;
1412 struct {
1413 size_t nr;
1414 int *entries;
1415 } ev_qualifier_ids;
1416 struct intlist *tid_list;
1417 struct intlist *pid_list;
1418 struct {
1419 size_t nr;
1420 pid_t *entries;
1421 } filter_pids;
1422 double duration_filter;
1423 double runtime_ms;
1424 struct {
1425 u64 vfs_getname,
1426 proc_getname;
1427 } stats;
1428 bool not_ev_qualifier;
1429 bool live;
1430 bool full_time;
1431 bool sched;
1432 bool multiple_threads;
1433 bool summary;
1434 bool summary_only;
1435 bool show_comm;
1436 bool show_tool_stats;
1437 bool trace_syscalls;
1438 bool force;
1439 bool vfs_getname;
1440 int trace_pgfaults;
1443 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
1445 struct thread_trace *ttrace = thread__priv(thread);
1447 if (fd > ttrace->paths.max) {
1448 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
1450 if (npath == NULL)
1451 return -1;
1453 if (ttrace->paths.max != -1) {
1454 memset(npath + ttrace->paths.max + 1, 0,
1455 (fd - ttrace->paths.max) * sizeof(char *));
1456 } else {
1457 memset(npath, 0, (fd + 1) * sizeof(char *));
1460 ttrace->paths.table = npath;
1461 ttrace->paths.max = fd;
1464 ttrace->paths.table[fd] = strdup(pathname);
1466 return ttrace->paths.table[fd] != NULL ? 0 : -1;
1469 static int thread__read_fd_path(struct thread *thread, int fd)
1471 char linkname[PATH_MAX], pathname[PATH_MAX];
1472 struct stat st;
1473 int ret;
1475 if (thread->pid_ == thread->tid) {
1476 scnprintf(linkname, sizeof(linkname),
1477 "/proc/%d/fd/%d", thread->pid_, fd);
1478 } else {
1479 scnprintf(linkname, sizeof(linkname),
1480 "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1483 if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1484 return -1;
1486 ret = readlink(linkname, pathname, sizeof(pathname));
1488 if (ret < 0 || ret > st.st_size)
1489 return -1;
1491 pathname[ret] = '\0';
1492 return trace__set_fd_pathname(thread, fd, pathname);
1495 static const char *thread__fd_path(struct thread *thread, int fd,
1496 struct trace *trace)
1498 struct thread_trace *ttrace = thread__priv(thread);
1500 if (ttrace == NULL)
1501 return NULL;
1503 if (fd < 0)
1504 return NULL;
1506 if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
1507 if (!trace->live)
1508 return NULL;
1509 ++trace->stats.proc_getname;
1510 if (thread__read_fd_path(thread, fd))
1511 return NULL;
1514 return ttrace->paths.table[fd];
1517 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
1518 struct syscall_arg *arg)
1520 int fd = arg->val;
1521 size_t printed = scnprintf(bf, size, "%d", fd);
1522 const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1524 if (path)
1525 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1527 return printed;
1530 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1531 struct syscall_arg *arg)
1533 int fd = arg->val;
1534 size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1535 struct thread_trace *ttrace = thread__priv(arg->thread);
1537 if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
1538 zfree(&ttrace->paths.table[fd]);
1540 return printed;
1543 static void thread__set_filename_pos(struct thread *thread, const char *bf,
1544 unsigned long ptr)
1546 struct thread_trace *ttrace = thread__priv(thread);
1548 ttrace->filename.ptr = ptr;
1549 ttrace->filename.entry_str_pos = bf - ttrace->entry_str;
1552 static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
1553 struct syscall_arg *arg)
1555 unsigned long ptr = arg->val;
1557 if (!arg->trace->vfs_getname)
1558 return scnprintf(bf, size, "%#x", ptr);
1560 thread__set_filename_pos(arg->thread, bf, ptr);
1561 return 0;
1564 static bool trace__filter_duration(struct trace *trace, double t)
1566 return t < (trace->duration_filter * NSEC_PER_MSEC);
1569 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1571 double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1573 return fprintf(fp, "%10.3f ", ts);
1576 static bool done = false;
1577 static bool interrupted = false;
1579 static void sig_handler(int sig)
1581 done = true;
1582 interrupted = sig == SIGINT;
1585 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1586 u64 duration, u64 tstamp, FILE *fp)
1588 size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1589 printed += fprintf_duration(duration, fp);
1591 if (trace->multiple_threads) {
1592 if (trace->show_comm)
1593 printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1594 printed += fprintf(fp, "%d ", thread->tid);
1597 return printed;
1600 static int trace__process_event(struct trace *trace, struct machine *machine,
1601 union perf_event *event, struct perf_sample *sample)
1603 int ret = 0;
1605 switch (event->header.type) {
1606 case PERF_RECORD_LOST:
1607 color_fprintf(trace->output, PERF_COLOR_RED,
1608 "LOST %" PRIu64 " events!\n", event->lost.lost);
1609 ret = machine__process_lost_event(machine, event, sample);
1610 default:
1611 ret = machine__process_event(machine, event, sample);
1612 break;
1615 return ret;
1618 static int trace__tool_process(struct perf_tool *tool,
1619 union perf_event *event,
1620 struct perf_sample *sample,
1621 struct machine *machine)
1623 struct trace *trace = container_of(tool, struct trace, tool);
1624 return trace__process_event(trace, machine, event, sample);
1627 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1629 int err = symbol__init(NULL);
1631 if (err)
1632 return err;
1634 trace->host = machine__new_host();
1635 if (trace->host == NULL)
1636 return -ENOMEM;
1638 if (trace_event__register_resolver(trace->host, machine__resolve_kernel_addr) < 0)
1639 return -errno;
1641 err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1642 evlist->threads, trace__tool_process, false,
1643 trace->opts.proc_map_timeout);
1644 if (err)
1645 symbol__exit();
1647 return err;
1650 static int syscall__set_arg_fmts(struct syscall *sc)
1652 struct format_field *field;
1653 int idx = 0;
1655 sc->arg_scnprintf = calloc(sc->nr_args, sizeof(void *));
1656 if (sc->arg_scnprintf == NULL)
1657 return -1;
1659 if (sc->fmt)
1660 sc->arg_parm = sc->fmt->arg_parm;
1662 for (field = sc->args; field; field = field->next) {
1663 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1664 sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1665 else if (field->flags & FIELD_IS_POINTER)
1666 sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1667 ++idx;
1670 return 0;
1673 static int trace__read_syscall_info(struct trace *trace, int id)
1675 char tp_name[128];
1676 struct syscall *sc;
1677 const char *name = audit_syscall_to_name(id, trace->audit.machine);
1679 if (name == NULL)
1680 return -1;
1682 if (id > trace->syscalls.max) {
1683 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1685 if (nsyscalls == NULL)
1686 return -1;
1688 if (trace->syscalls.max != -1) {
1689 memset(nsyscalls + trace->syscalls.max + 1, 0,
1690 (id - trace->syscalls.max) * sizeof(*sc));
1691 } else {
1692 memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1695 trace->syscalls.table = nsyscalls;
1696 trace->syscalls.max = id;
1699 sc = trace->syscalls.table + id;
1700 sc->name = name;
1702 sc->fmt = syscall_fmt__find(sc->name);
1704 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1705 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1707 if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
1708 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1709 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1712 if (sc->tp_format == NULL)
1713 return -1;
1715 sc->args = sc->tp_format->format.fields;
1716 sc->nr_args = sc->tp_format->format.nr_fields;
1717 /* drop nr field - not relevant here; does not exist on older kernels */
1718 if (sc->args && strcmp(sc->args->name, "nr") == 0) {
1719 sc->args = sc->args->next;
1720 --sc->nr_args;
1723 sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
1725 return syscall__set_arg_fmts(sc);
1728 static int trace__validate_ev_qualifier(struct trace *trace)
1730 int err = 0, i;
1731 struct str_node *pos;
1733 trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier);
1734 trace->ev_qualifier_ids.entries = malloc(trace->ev_qualifier_ids.nr *
1735 sizeof(trace->ev_qualifier_ids.entries[0]));
1737 if (trace->ev_qualifier_ids.entries == NULL) {
1738 fputs("Error:\tNot enough memory for allocating events qualifier ids\n",
1739 trace->output);
1740 err = -EINVAL;
1741 goto out;
1744 i = 0;
1746 strlist__for_each(pos, trace->ev_qualifier) {
1747 const char *sc = pos->s;
1748 int id = audit_name_to_syscall(sc, trace->audit.machine);
1750 if (id < 0) {
1751 if (err == 0) {
1752 fputs("Error:\tInvalid syscall ", trace->output);
1753 err = -EINVAL;
1754 } else {
1755 fputs(", ", trace->output);
1758 fputs(sc, trace->output);
1761 trace->ev_qualifier_ids.entries[i++] = id;
1764 if (err < 0) {
1765 fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'"
1766 "\nHint:\tand: 'man syscalls'\n", trace->output);
1767 zfree(&trace->ev_qualifier_ids.entries);
1768 trace->ev_qualifier_ids.nr = 0;
1770 out:
1771 return err;
1775 * args is to be interpreted as a series of longs but we need to handle
1776 * 8-byte unaligned accesses. args points to raw_data within the event
1777 * and raw_data is guaranteed to be 8-byte unaligned because it is
1778 * preceded by raw_size which is a u32. So we need to copy args to a temp
1779 * variable to read it. Most notably this avoids extended load instructions
1780 * on unaligned addresses
1783 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1784 unsigned char *args, struct trace *trace,
1785 struct thread *thread)
1787 size_t printed = 0;
1788 unsigned char *p;
1789 unsigned long val;
1791 if (sc->args != NULL) {
1792 struct format_field *field;
1793 u8 bit = 1;
1794 struct syscall_arg arg = {
1795 .idx = 0,
1796 .mask = 0,
1797 .trace = trace,
1798 .thread = thread,
1801 for (field = sc->args; field;
1802 field = field->next, ++arg.idx, bit <<= 1) {
1803 if (arg.mask & bit)
1804 continue;
1806 /* special care for unaligned accesses */
1807 p = args + sizeof(unsigned long) * arg.idx;
1808 memcpy(&val, p, sizeof(val));
1811 * Suppress this argument if its value is zero and
1812 * and we don't have a string associated in an
1813 * strarray for it.
1815 if (val == 0 &&
1816 !(sc->arg_scnprintf &&
1817 sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1818 sc->arg_parm[arg.idx]))
1819 continue;
1821 printed += scnprintf(bf + printed, size - printed,
1822 "%s%s: ", printed ? ", " : "", field->name);
1823 if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1824 arg.val = val;
1825 if (sc->arg_parm)
1826 arg.parm = sc->arg_parm[arg.idx];
1827 printed += sc->arg_scnprintf[arg.idx](bf + printed,
1828 size - printed, &arg);
1829 } else {
1830 printed += scnprintf(bf + printed, size - printed,
1831 "%ld", val);
1834 } else {
1835 int i = 0;
1837 while (i < 6) {
1838 /* special care for unaligned accesses */
1839 p = args + sizeof(unsigned long) * i;
1840 memcpy(&val, p, sizeof(val));
1841 printed += scnprintf(bf + printed, size - printed,
1842 "%sarg%d: %ld",
1843 printed ? ", " : "", i, val);
1844 ++i;
1848 return printed;
1851 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1852 union perf_event *event,
1853 struct perf_sample *sample);
1855 static struct syscall *trace__syscall_info(struct trace *trace,
1856 struct perf_evsel *evsel, int id)
1859 if (id < 0) {
1862 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1863 * before that, leaving at a higher verbosity level till that is
1864 * explained. Reproduced with plain ftrace with:
1866 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1867 * grep "NR -1 " /t/trace_pipe
1869 * After generating some load on the machine.
1871 if (verbose > 1) {
1872 static u64 n;
1873 fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1874 id, perf_evsel__name(evsel), ++n);
1876 return NULL;
1879 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1880 trace__read_syscall_info(trace, id))
1881 goto out_cant_read;
1883 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1884 goto out_cant_read;
1886 return &trace->syscalls.table[id];
1888 out_cant_read:
1889 if (verbose) {
1890 fprintf(trace->output, "Problems reading syscall %d", id);
1891 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1892 fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1893 fputs(" information\n", trace->output);
1895 return NULL;
1898 static void thread__update_stats(struct thread_trace *ttrace,
1899 int id, struct perf_sample *sample)
1901 struct int_node *inode;
1902 struct stats *stats;
1903 u64 duration = 0;
1905 inode = intlist__findnew(ttrace->syscall_stats, id);
1906 if (inode == NULL)
1907 return;
1909 stats = inode->priv;
1910 if (stats == NULL) {
1911 stats = malloc(sizeof(struct stats));
1912 if (stats == NULL)
1913 return;
1914 init_stats(stats);
1915 inode->priv = stats;
1918 if (ttrace->entry_time && sample->time > ttrace->entry_time)
1919 duration = sample->time - ttrace->entry_time;
1921 update_stats(stats, duration);
1924 static int trace__printf_interrupted_entry(struct trace *trace, struct perf_sample *sample)
1926 struct thread_trace *ttrace;
1927 u64 duration;
1928 size_t printed;
1930 if (trace->current == NULL)
1931 return 0;
1933 ttrace = thread__priv(trace->current);
1935 if (!ttrace->entry_pending)
1936 return 0;
1938 duration = sample->time - ttrace->entry_time;
1940 printed = trace__fprintf_entry_head(trace, trace->current, duration, sample->time, trace->output);
1941 printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str);
1942 ttrace->entry_pending = false;
1944 return printed;
1947 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1948 union perf_event *event __maybe_unused,
1949 struct perf_sample *sample)
1951 char *msg;
1952 void *args;
1953 size_t printed = 0;
1954 struct thread *thread;
1955 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
1956 struct syscall *sc = trace__syscall_info(trace, evsel, id);
1957 struct thread_trace *ttrace;
1959 if (sc == NULL)
1960 return -1;
1962 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1963 ttrace = thread__trace(thread, trace->output);
1964 if (ttrace == NULL)
1965 goto out_put;
1967 args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1969 if (ttrace->entry_str == NULL) {
1970 ttrace->entry_str = malloc(trace__entry_str_size);
1971 if (!ttrace->entry_str)
1972 goto out_put;
1975 if (!trace->summary_only)
1976 trace__printf_interrupted_entry(trace, sample);
1978 ttrace->entry_time = sample->time;
1979 msg = ttrace->entry_str;
1980 printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name);
1982 printed += syscall__scnprintf_args(sc, msg + printed, trace__entry_str_size - printed,
1983 args, trace, thread);
1985 if (sc->is_exit) {
1986 if (!trace->duration_filter && !trace->summary_only) {
1987 trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1988 fprintf(trace->output, "%-70s\n", ttrace->entry_str);
1990 } else {
1991 ttrace->entry_pending = true;
1992 /* See trace__vfs_getname & trace__sys_exit */
1993 ttrace->filename.pending_open = false;
1996 if (trace->current != thread) {
1997 thread__put(trace->current);
1998 trace->current = thread__get(thread);
2000 err = 0;
2001 out_put:
2002 thread__put(thread);
2003 return err;
2006 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
2007 union perf_event *event __maybe_unused,
2008 struct perf_sample *sample)
2010 long ret;
2011 u64 duration = 0;
2012 struct thread *thread;
2013 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
2014 struct syscall *sc = trace__syscall_info(trace, evsel, id);
2015 struct thread_trace *ttrace;
2017 if (sc == NULL)
2018 return -1;
2020 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2021 ttrace = thread__trace(thread, trace->output);
2022 if (ttrace == NULL)
2023 goto out_put;
2025 if (trace->summary)
2026 thread__update_stats(ttrace, id, sample);
2028 ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
2030 if (id == trace->audit.open_id && ret >= 0 && ttrace->filename.pending_open) {
2031 trace__set_fd_pathname(thread, ret, ttrace->filename.name);
2032 ttrace->filename.pending_open = false;
2033 ++trace->stats.vfs_getname;
2036 ttrace->exit_time = sample->time;
2038 if (ttrace->entry_time) {
2039 duration = sample->time - ttrace->entry_time;
2040 if (trace__filter_duration(trace, duration))
2041 goto out;
2042 } else if (trace->duration_filter)
2043 goto out;
2045 if (trace->summary_only)
2046 goto out;
2048 trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
2050 if (ttrace->entry_pending) {
2051 fprintf(trace->output, "%-70s", ttrace->entry_str);
2052 } else {
2053 fprintf(trace->output, " ... [");
2054 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
2055 fprintf(trace->output, "]: %s()", sc->name);
2058 if (sc->fmt == NULL) {
2059 signed_print:
2060 fprintf(trace->output, ") = %ld", ret);
2061 } else if (ret < 0 && sc->fmt->errmsg) {
2062 char bf[STRERR_BUFSIZE];
2063 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
2064 *e = audit_errno_to_name(-ret);
2066 fprintf(trace->output, ") = -1 %s %s", e, emsg);
2067 } else if (ret == 0 && sc->fmt->timeout)
2068 fprintf(trace->output, ") = 0 Timeout");
2069 else if (sc->fmt->hexret)
2070 fprintf(trace->output, ") = %#lx", ret);
2071 else
2072 goto signed_print;
2074 fputc('\n', trace->output);
2075 out:
2076 ttrace->entry_pending = false;
2077 err = 0;
2078 out_put:
2079 thread__put(thread);
2080 return err;
2083 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
2084 union perf_event *event __maybe_unused,
2085 struct perf_sample *sample)
2087 struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2088 struct thread_trace *ttrace;
2089 size_t filename_len, entry_str_len, to_move;
2090 ssize_t remaining_space;
2091 char *pos;
2092 const char *filename = perf_evsel__rawptr(evsel, sample, "pathname");
2094 if (!thread)
2095 goto out;
2097 ttrace = thread__priv(thread);
2098 if (!ttrace)
2099 goto out;
2101 filename_len = strlen(filename);
2103 if (ttrace->filename.namelen < filename_len) {
2104 char *f = realloc(ttrace->filename.name, filename_len + 1);
2106 if (f == NULL)
2107 goto out;
2109 ttrace->filename.namelen = filename_len;
2110 ttrace->filename.name = f;
2113 strcpy(ttrace->filename.name, filename);
2114 ttrace->filename.pending_open = true;
2116 if (!ttrace->filename.ptr)
2117 goto out;
2119 entry_str_len = strlen(ttrace->entry_str);
2120 remaining_space = trace__entry_str_size - entry_str_len - 1; /* \0 */
2121 if (remaining_space <= 0)
2122 goto out;
2124 if (filename_len > (size_t)remaining_space) {
2125 filename += filename_len - remaining_space;
2126 filename_len = remaining_space;
2129 to_move = entry_str_len - ttrace->filename.entry_str_pos + 1; /* \0 */
2130 pos = ttrace->entry_str + ttrace->filename.entry_str_pos;
2131 memmove(pos + filename_len, pos, to_move);
2132 memcpy(pos, filename, filename_len);
2134 ttrace->filename.ptr = 0;
2135 ttrace->filename.entry_str_pos = 0;
2136 out:
2137 return 0;
2140 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
2141 union perf_event *event __maybe_unused,
2142 struct perf_sample *sample)
2144 u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
2145 double runtime_ms = (double)runtime / NSEC_PER_MSEC;
2146 struct thread *thread = machine__findnew_thread(trace->host,
2147 sample->pid,
2148 sample->tid);
2149 struct thread_trace *ttrace = thread__trace(thread, trace->output);
2151 if (ttrace == NULL)
2152 goto out_dump;
2154 ttrace->runtime_ms += runtime_ms;
2155 trace->runtime_ms += runtime_ms;
2156 thread__put(thread);
2157 return 0;
2159 out_dump:
2160 fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
2161 evsel->name,
2162 perf_evsel__strval(evsel, sample, "comm"),
2163 (pid_t)perf_evsel__intval(evsel, sample, "pid"),
2164 runtime,
2165 perf_evsel__intval(evsel, sample, "vruntime"));
2166 thread__put(thread);
2167 return 0;
2170 static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
2171 union perf_event *event __maybe_unused,
2172 struct perf_sample *sample)
2174 trace__printf_interrupted_entry(trace, sample);
2175 trace__fprintf_tstamp(trace, sample->time, trace->output);
2177 if (trace->trace_syscalls)
2178 fprintf(trace->output, "( ): ");
2180 fprintf(trace->output, "%s:", evsel->name);
2182 if (evsel->tp_format) {
2183 event_format__fprintf(evsel->tp_format, sample->cpu,
2184 sample->raw_data, sample->raw_size,
2185 trace->output);
2188 fprintf(trace->output, ")\n");
2189 return 0;
2192 static void print_location(FILE *f, struct perf_sample *sample,
2193 struct addr_location *al,
2194 bool print_dso, bool print_sym)
2197 if ((verbose || print_dso) && al->map)
2198 fprintf(f, "%s@", al->map->dso->long_name);
2200 if ((verbose || print_sym) && al->sym)
2201 fprintf(f, "%s+0x%" PRIx64, al->sym->name,
2202 al->addr - al->sym->start);
2203 else if (al->map)
2204 fprintf(f, "0x%" PRIx64, al->addr);
2205 else
2206 fprintf(f, "0x%" PRIx64, sample->addr);
2209 static int trace__pgfault(struct trace *trace,
2210 struct perf_evsel *evsel,
2211 union perf_event *event,
2212 struct perf_sample *sample)
2214 struct thread *thread;
2215 u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
2216 struct addr_location al;
2217 char map_type = 'd';
2218 struct thread_trace *ttrace;
2219 int err = -1;
2221 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2222 ttrace = thread__trace(thread, trace->output);
2223 if (ttrace == NULL)
2224 goto out_put;
2226 if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
2227 ttrace->pfmaj++;
2228 else
2229 ttrace->pfmin++;
2231 if (trace->summary_only)
2232 goto out;
2234 thread__find_addr_location(thread, cpumode, MAP__FUNCTION,
2235 sample->ip, &al);
2237 trace__fprintf_entry_head(trace, thread, 0, sample->time, trace->output);
2239 fprintf(trace->output, "%sfault [",
2240 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
2241 "maj" : "min");
2243 print_location(trace->output, sample, &al, false, true);
2245 fprintf(trace->output, "] => ");
2247 thread__find_addr_location(thread, cpumode, MAP__VARIABLE,
2248 sample->addr, &al);
2250 if (!al.map) {
2251 thread__find_addr_location(thread, cpumode,
2252 MAP__FUNCTION, sample->addr, &al);
2254 if (al.map)
2255 map_type = 'x';
2256 else
2257 map_type = '?';
2260 print_location(trace->output, sample, &al, true, false);
2262 fprintf(trace->output, " (%c%c)\n", map_type, al.level);
2263 out:
2264 err = 0;
2265 out_put:
2266 thread__put(thread);
2267 return err;
2270 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
2272 if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
2273 (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
2274 return false;
2276 if (trace->pid_list || trace->tid_list)
2277 return true;
2279 return false;
2282 static int trace__process_sample(struct perf_tool *tool,
2283 union perf_event *event,
2284 struct perf_sample *sample,
2285 struct perf_evsel *evsel,
2286 struct machine *machine __maybe_unused)
2288 struct trace *trace = container_of(tool, struct trace, tool);
2289 int err = 0;
2291 tracepoint_handler handler = evsel->handler;
2293 if (skip_sample(trace, sample))
2294 return 0;
2296 if (!trace->full_time && trace->base_time == 0)
2297 trace->base_time = sample->time;
2299 if (handler) {
2300 ++trace->nr_events;
2301 handler(trace, evsel, event, sample);
2304 return err;
2307 static int parse_target_str(struct trace *trace)
2309 if (trace->opts.target.pid) {
2310 trace->pid_list = intlist__new(trace->opts.target.pid);
2311 if (trace->pid_list == NULL) {
2312 pr_err("Error parsing process id string\n");
2313 return -EINVAL;
2317 if (trace->opts.target.tid) {
2318 trace->tid_list = intlist__new(trace->opts.target.tid);
2319 if (trace->tid_list == NULL) {
2320 pr_err("Error parsing thread id string\n");
2321 return -EINVAL;
2325 return 0;
2328 static int trace__record(struct trace *trace, int argc, const char **argv)
2330 unsigned int rec_argc, i, j;
2331 const char **rec_argv;
2332 const char * const record_args[] = {
2333 "record",
2334 "-R",
2335 "-m", "1024",
2336 "-c", "1",
2339 const char * const sc_args[] = { "-e", };
2340 unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
2341 const char * const majpf_args[] = { "-e", "major-faults" };
2342 unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
2343 const char * const minpf_args[] = { "-e", "minor-faults" };
2344 unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
2346 /* +1 is for the event string below */
2347 rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 +
2348 majpf_args_nr + minpf_args_nr + argc;
2349 rec_argv = calloc(rec_argc + 1, sizeof(char *));
2351 if (rec_argv == NULL)
2352 return -ENOMEM;
2354 j = 0;
2355 for (i = 0; i < ARRAY_SIZE(record_args); i++)
2356 rec_argv[j++] = record_args[i];
2358 if (trace->trace_syscalls) {
2359 for (i = 0; i < sc_args_nr; i++)
2360 rec_argv[j++] = sc_args[i];
2362 /* event string may be different for older kernels - e.g., RHEL6 */
2363 if (is_valid_tracepoint("raw_syscalls:sys_enter"))
2364 rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
2365 else if (is_valid_tracepoint("syscalls:sys_enter"))
2366 rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
2367 else {
2368 pr_err("Neither raw_syscalls nor syscalls events exist.\n");
2369 return -1;
2373 if (trace->trace_pgfaults & TRACE_PFMAJ)
2374 for (i = 0; i < majpf_args_nr; i++)
2375 rec_argv[j++] = majpf_args[i];
2377 if (trace->trace_pgfaults & TRACE_PFMIN)
2378 for (i = 0; i < minpf_args_nr; i++)
2379 rec_argv[j++] = minpf_args[i];
2381 for (i = 0; i < (unsigned int)argc; i++)
2382 rec_argv[j++] = argv[i];
2384 return cmd_record(j, rec_argv, NULL);
2387 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
2389 static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
2391 struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
2392 if (evsel == NULL)
2393 return false;
2395 if (perf_evsel__field(evsel, "pathname") == NULL) {
2396 perf_evsel__delete(evsel);
2397 return false;
2400 evsel->handler = trace__vfs_getname;
2401 perf_evlist__add(evlist, evsel);
2402 return true;
2405 static int perf_evlist__add_pgfault(struct perf_evlist *evlist,
2406 u64 config)
2408 struct perf_evsel *evsel;
2409 struct perf_event_attr attr = {
2410 .type = PERF_TYPE_SOFTWARE,
2411 .mmap_data = 1,
2414 attr.config = config;
2415 attr.sample_period = 1;
2417 event_attr_init(&attr);
2419 evsel = perf_evsel__new(&attr);
2420 if (!evsel)
2421 return -ENOMEM;
2423 evsel->handler = trace__pgfault;
2424 perf_evlist__add(evlist, evsel);
2426 return 0;
2429 static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
2431 const u32 type = event->header.type;
2432 struct perf_evsel *evsel;
2434 if (!trace->full_time && trace->base_time == 0)
2435 trace->base_time = sample->time;
2437 if (type != PERF_RECORD_SAMPLE) {
2438 trace__process_event(trace, trace->host, event, sample);
2439 return;
2442 evsel = perf_evlist__id2evsel(trace->evlist, sample->id);
2443 if (evsel == NULL) {
2444 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id);
2445 return;
2448 if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
2449 sample->raw_data == NULL) {
2450 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
2451 perf_evsel__name(evsel), sample->tid,
2452 sample->cpu, sample->raw_size);
2453 } else {
2454 tracepoint_handler handler = evsel->handler;
2455 handler(trace, evsel, event, sample);
2459 static int trace__add_syscall_newtp(struct trace *trace)
2461 int ret = -1;
2462 struct perf_evlist *evlist = trace->evlist;
2463 struct perf_evsel *sys_enter, *sys_exit;
2465 sys_enter = perf_evsel__syscall_newtp("sys_enter", trace__sys_enter);
2466 if (sys_enter == NULL)
2467 goto out;
2469 if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
2470 goto out_delete_sys_enter;
2472 sys_exit = perf_evsel__syscall_newtp("sys_exit", trace__sys_exit);
2473 if (sys_exit == NULL)
2474 goto out_delete_sys_enter;
2476 if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
2477 goto out_delete_sys_exit;
2479 perf_evlist__add(evlist, sys_enter);
2480 perf_evlist__add(evlist, sys_exit);
2482 trace->syscalls.events.sys_enter = sys_enter;
2483 trace->syscalls.events.sys_exit = sys_exit;
2485 ret = 0;
2486 out:
2487 return ret;
2489 out_delete_sys_exit:
2490 perf_evsel__delete_priv(sys_exit);
2491 out_delete_sys_enter:
2492 perf_evsel__delete_priv(sys_enter);
2493 goto out;
2496 static int trace__set_ev_qualifier_filter(struct trace *trace)
2498 int err = -1;
2499 char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier,
2500 trace->ev_qualifier_ids.nr,
2501 trace->ev_qualifier_ids.entries);
2503 if (filter == NULL)
2504 goto out_enomem;
2506 if (!perf_evsel__append_filter(trace->syscalls.events.sys_enter, "&&", filter))
2507 err = perf_evsel__append_filter(trace->syscalls.events.sys_exit, "&&", filter);
2509 free(filter);
2510 out:
2511 return err;
2512 out_enomem:
2513 errno = ENOMEM;
2514 goto out;
2517 static int trace__run(struct trace *trace, int argc, const char **argv)
2519 struct perf_evlist *evlist = trace->evlist;
2520 struct perf_evsel *evsel;
2521 int err = -1, i;
2522 unsigned long before;
2523 const bool forks = argc > 0;
2524 bool draining = false;
2526 trace->live = true;
2528 if (trace->trace_syscalls && trace__add_syscall_newtp(trace))
2529 goto out_error_raw_syscalls;
2531 if (trace->trace_syscalls)
2532 trace->vfs_getname = perf_evlist__add_vfs_getname(evlist);
2534 if ((trace->trace_pgfaults & TRACE_PFMAJ) &&
2535 perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MAJ)) {
2536 goto out_error_mem;
2539 if ((trace->trace_pgfaults & TRACE_PFMIN) &&
2540 perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MIN))
2541 goto out_error_mem;
2543 if (trace->sched &&
2544 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
2545 trace__sched_stat_runtime))
2546 goto out_error_sched_stat_runtime;
2548 err = perf_evlist__create_maps(evlist, &trace->opts.target);
2549 if (err < 0) {
2550 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
2551 goto out_delete_evlist;
2554 err = trace__symbols_init(trace, evlist);
2555 if (err < 0) {
2556 fprintf(trace->output, "Problems initializing symbol libraries!\n");
2557 goto out_delete_evlist;
2560 perf_evlist__config(evlist, &trace->opts);
2562 signal(SIGCHLD, sig_handler);
2563 signal(SIGINT, sig_handler);
2565 if (forks) {
2566 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
2567 argv, false, NULL);
2568 if (err < 0) {
2569 fprintf(trace->output, "Couldn't run the workload!\n");
2570 goto out_delete_evlist;
2574 err = perf_evlist__open(evlist);
2575 if (err < 0)
2576 goto out_error_open;
2579 * Better not use !target__has_task() here because we need to cover the
2580 * case where no threads were specified in the command line, but a
2581 * workload was, and in that case we will fill in the thread_map when
2582 * we fork the workload in perf_evlist__prepare_workload.
2584 if (trace->filter_pids.nr > 0)
2585 err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries);
2586 else if (thread_map__pid(evlist->threads, 0) == -1)
2587 err = perf_evlist__set_filter_pid(evlist, getpid());
2589 if (err < 0)
2590 goto out_error_mem;
2592 if (trace->ev_qualifier_ids.nr > 0) {
2593 err = trace__set_ev_qualifier_filter(trace);
2594 if (err < 0)
2595 goto out_errno;
2597 pr_debug("event qualifier tracepoint filter: %s\n",
2598 trace->syscalls.events.sys_exit->filter);
2601 err = perf_evlist__apply_filters(evlist, &evsel);
2602 if (err < 0)
2603 goto out_error_apply_filters;
2605 err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
2606 if (err < 0)
2607 goto out_error_mmap;
2609 if (!target__none(&trace->opts.target))
2610 perf_evlist__enable(evlist);
2612 if (forks)
2613 perf_evlist__start_workload(evlist);
2615 trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 ||
2616 evlist->threads->nr > 1 ||
2617 perf_evlist__first(evlist)->attr.inherit;
2618 again:
2619 before = trace->nr_events;
2621 for (i = 0; i < evlist->nr_mmaps; i++) {
2622 union perf_event *event;
2624 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
2625 struct perf_sample sample;
2627 ++trace->nr_events;
2629 err = perf_evlist__parse_sample(evlist, event, &sample);
2630 if (err) {
2631 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
2632 goto next_event;
2635 trace__handle_event(trace, event, &sample);
2636 next_event:
2637 perf_evlist__mmap_consume(evlist, i);
2639 if (interrupted)
2640 goto out_disable;
2642 if (done && !draining) {
2643 perf_evlist__disable(evlist);
2644 draining = true;
2649 if (trace->nr_events == before) {
2650 int timeout = done ? 100 : -1;
2652 if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
2653 if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0)
2654 draining = true;
2656 goto again;
2658 } else {
2659 goto again;
2662 out_disable:
2663 thread__zput(trace->current);
2665 perf_evlist__disable(evlist);
2667 if (!err) {
2668 if (trace->summary)
2669 trace__fprintf_thread_summary(trace, trace->output);
2671 if (trace->show_tool_stats) {
2672 fprintf(trace->output, "Stats:\n "
2673 " vfs_getname : %" PRIu64 "\n"
2674 " proc_getname: %" PRIu64 "\n",
2675 trace->stats.vfs_getname,
2676 trace->stats.proc_getname);
2680 out_delete_evlist:
2681 perf_evlist__delete(evlist);
2682 trace->evlist = NULL;
2683 trace->live = false;
2684 return err;
2686 char errbuf[BUFSIZ];
2688 out_error_sched_stat_runtime:
2689 debugfs__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime");
2690 goto out_error;
2692 out_error_raw_syscalls:
2693 debugfs__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
2694 goto out_error;
2696 out_error_mmap:
2697 perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
2698 goto out_error;
2700 out_error_open:
2701 perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2703 out_error:
2704 fprintf(trace->output, "%s\n", errbuf);
2705 goto out_delete_evlist;
2707 out_error_apply_filters:
2708 fprintf(trace->output,
2709 "Failed to set filter \"%s\" on event %s with %d (%s)\n",
2710 evsel->filter, perf_evsel__name(evsel), errno,
2711 strerror_r(errno, errbuf, sizeof(errbuf)));
2712 goto out_delete_evlist;
2714 out_error_mem:
2715 fprintf(trace->output, "Not enough memory to run!\n");
2716 goto out_delete_evlist;
2718 out_errno:
2719 fprintf(trace->output, "errno=%d,%s\n", errno, strerror(errno));
2720 goto out_delete_evlist;
2723 static int trace__replay(struct trace *trace)
2725 const struct perf_evsel_str_handler handlers[] = {
2726 { "probe:vfs_getname", trace__vfs_getname, },
2728 struct perf_data_file file = {
2729 .path = input_name,
2730 .mode = PERF_DATA_MODE_READ,
2731 .force = trace->force,
2733 struct perf_session *session;
2734 struct perf_evsel *evsel;
2735 int err = -1;
2737 trace->tool.sample = trace__process_sample;
2738 trace->tool.mmap = perf_event__process_mmap;
2739 trace->tool.mmap2 = perf_event__process_mmap2;
2740 trace->tool.comm = perf_event__process_comm;
2741 trace->tool.exit = perf_event__process_exit;
2742 trace->tool.fork = perf_event__process_fork;
2743 trace->tool.attr = perf_event__process_attr;
2744 trace->tool.tracing_data = perf_event__process_tracing_data;
2745 trace->tool.build_id = perf_event__process_build_id;
2747 trace->tool.ordered_events = true;
2748 trace->tool.ordering_requires_timestamps = true;
2750 /* add tid to output */
2751 trace->multiple_threads = true;
2753 session = perf_session__new(&file, false, &trace->tool);
2754 if (session == NULL)
2755 return -1;
2757 if (symbol__init(&session->header.env) < 0)
2758 goto out;
2760 trace->host = &session->machines.host;
2762 err = perf_session__set_tracepoints_handlers(session, handlers);
2763 if (err)
2764 goto out;
2766 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2767 "raw_syscalls:sys_enter");
2768 /* older kernels have syscalls tp versus raw_syscalls */
2769 if (evsel == NULL)
2770 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2771 "syscalls:sys_enter");
2773 if (evsel &&
2774 (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2775 perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
2776 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2777 goto out;
2780 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2781 "raw_syscalls:sys_exit");
2782 if (evsel == NULL)
2783 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2784 "syscalls:sys_exit");
2785 if (evsel &&
2786 (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2787 perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
2788 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
2789 goto out;
2792 evlist__for_each(session->evlist, evsel) {
2793 if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
2794 (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
2795 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
2796 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS))
2797 evsel->handler = trace__pgfault;
2800 err = parse_target_str(trace);
2801 if (err != 0)
2802 goto out;
2804 setup_pager();
2806 err = perf_session__process_events(session);
2807 if (err)
2808 pr_err("Failed to process events, error %d", err);
2810 else if (trace->summary)
2811 trace__fprintf_thread_summary(trace, trace->output);
2813 out:
2814 perf_session__delete(session);
2816 return err;
2819 static size_t trace__fprintf_threads_header(FILE *fp)
2821 size_t printed;
2823 printed = fprintf(fp, "\n Summary of events:\n\n");
2825 return printed;
2828 static size_t thread__dump_stats(struct thread_trace *ttrace,
2829 struct trace *trace, FILE *fp)
2831 struct stats *stats;
2832 size_t printed = 0;
2833 struct syscall *sc;
2834 struct int_node *inode = intlist__first(ttrace->syscall_stats);
2836 if (inode == NULL)
2837 return 0;
2839 printed += fprintf(fp, "\n");
2841 printed += fprintf(fp, " syscall calls total min avg max stddev\n");
2842 printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n");
2843 printed += fprintf(fp, " --------------- -------- --------- --------- --------- --------- ------\n");
2845 /* each int_node is a syscall */
2846 while (inode) {
2847 stats = inode->priv;
2848 if (stats) {
2849 double min = (double)(stats->min) / NSEC_PER_MSEC;
2850 double max = (double)(stats->max) / NSEC_PER_MSEC;
2851 double avg = avg_stats(stats);
2852 double pct;
2853 u64 n = (u64) stats->n;
2855 pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2856 avg /= NSEC_PER_MSEC;
2858 sc = &trace->syscalls.table[inode->i];
2859 printed += fprintf(fp, " %-15s", sc->name);
2860 printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f %9.3f",
2861 n, avg * n, min, avg);
2862 printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
2865 inode = intlist__next(inode);
2868 printed += fprintf(fp, "\n\n");
2870 return printed;
2873 /* struct used to pass data to per-thread function */
2874 struct summary_data {
2875 FILE *fp;
2876 struct trace *trace;
2877 size_t printed;
2880 static int trace__fprintf_one_thread(struct thread *thread, void *priv)
2882 struct summary_data *data = priv;
2883 FILE *fp = data->fp;
2884 size_t printed = data->printed;
2885 struct trace *trace = data->trace;
2886 struct thread_trace *ttrace = thread__priv(thread);
2887 double ratio;
2889 if (ttrace == NULL)
2890 return 0;
2892 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2894 printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
2895 printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
2896 printed += fprintf(fp, "%.1f%%", ratio);
2897 if (ttrace->pfmaj)
2898 printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
2899 if (ttrace->pfmin)
2900 printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
2901 printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2902 printed += thread__dump_stats(ttrace, trace, fp);
2904 data->printed += printed;
2906 return 0;
2909 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2911 struct summary_data data = {
2912 .fp = fp,
2913 .trace = trace
2915 data.printed = trace__fprintf_threads_header(fp);
2917 machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data);
2919 return data.printed;
2922 static int trace__set_duration(const struct option *opt, const char *str,
2923 int unset __maybe_unused)
2925 struct trace *trace = opt->value;
2927 trace->duration_filter = atof(str);
2928 return 0;
2931 static int trace__set_filter_pids(const struct option *opt, const char *str,
2932 int unset __maybe_unused)
2934 int ret = -1;
2935 size_t i;
2936 struct trace *trace = opt->value;
2938 * FIXME: introduce a intarray class, plain parse csv and create a
2939 * { int nr, int entries[] } struct...
2941 struct intlist *list = intlist__new(str);
2943 if (list == NULL)
2944 return -1;
2946 i = trace->filter_pids.nr = intlist__nr_entries(list) + 1;
2947 trace->filter_pids.entries = calloc(i, sizeof(pid_t));
2949 if (trace->filter_pids.entries == NULL)
2950 goto out;
2952 trace->filter_pids.entries[0] = getpid();
2954 for (i = 1; i < trace->filter_pids.nr; ++i)
2955 trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i;
2957 intlist__delete(list);
2958 ret = 0;
2959 out:
2960 return ret;
2963 static int trace__open_output(struct trace *trace, const char *filename)
2965 struct stat st;
2967 if (!stat(filename, &st) && st.st_size) {
2968 char oldname[PATH_MAX];
2970 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2971 unlink(oldname);
2972 rename(filename, oldname);
2975 trace->output = fopen(filename, "w");
2977 return trace->output == NULL ? -errno : 0;
2980 static int parse_pagefaults(const struct option *opt, const char *str,
2981 int unset __maybe_unused)
2983 int *trace_pgfaults = opt->value;
2985 if (strcmp(str, "all") == 0)
2986 *trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
2987 else if (strcmp(str, "maj") == 0)
2988 *trace_pgfaults |= TRACE_PFMAJ;
2989 else if (strcmp(str, "min") == 0)
2990 *trace_pgfaults |= TRACE_PFMIN;
2991 else
2992 return -1;
2994 return 0;
2997 static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler)
2999 struct perf_evsel *evsel;
3001 evlist__for_each(evlist, evsel)
3002 evsel->handler = handler;
3005 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
3007 const char *trace_usage[] = {
3008 "perf trace [<options>] [<command>]",
3009 "perf trace [<options>] -- <command> [<options>]",
3010 "perf trace record [<options>] [<command>]",
3011 "perf trace record [<options>] -- <command> [<options>]",
3012 NULL
3014 struct trace trace = {
3015 .audit = {
3016 .machine = audit_detect_machine(),
3017 .open_id = audit_name_to_syscall("open", trace.audit.machine),
3019 .syscalls = {
3020 . max = -1,
3022 .opts = {
3023 .target = {
3024 .uid = UINT_MAX,
3025 .uses_mmap = true,
3027 .user_freq = UINT_MAX,
3028 .user_interval = ULLONG_MAX,
3029 .no_buffering = true,
3030 .mmap_pages = UINT_MAX,
3031 .proc_map_timeout = 500,
3033 .output = stderr,
3034 .show_comm = true,
3035 .trace_syscalls = true,
3037 const char *output_name = NULL;
3038 const char *ev_qualifier_str = NULL;
3039 const struct option trace_options[] = {
3040 OPT_CALLBACK(0, "event", &trace.evlist, "event",
3041 "event selector. use 'perf list' to list available events",
3042 parse_events_option),
3043 OPT_BOOLEAN(0, "comm", &trace.show_comm,
3044 "show the thread COMM next to its id"),
3045 OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
3046 OPT_STRING('e', "expr", &ev_qualifier_str, "expr", "list of syscalls to trace"),
3047 OPT_STRING('o', "output", &output_name, "file", "output file name"),
3048 OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
3049 OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
3050 "trace events on existing process id"),
3051 OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
3052 "trace events on existing thread id"),
3053 OPT_CALLBACK(0, "filter-pids", &trace, "CSV list of pids",
3054 "pids to filter (by the kernel)", trace__set_filter_pids),
3055 OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
3056 "system-wide collection from all CPUs"),
3057 OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
3058 "list of cpus to monitor"),
3059 OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
3060 "child tasks do not inherit counters"),
3061 OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
3062 "number of mmap data pages",
3063 perf_evlist__parse_mmap_pages),
3064 OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
3065 "user to profile"),
3066 OPT_CALLBACK(0, "duration", &trace, "float",
3067 "show only events with duration > N.M ms",
3068 trace__set_duration),
3069 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
3070 OPT_INCR('v', "verbose", &verbose, "be more verbose"),
3071 OPT_BOOLEAN('T', "time", &trace.full_time,
3072 "Show full timestamp, not time relative to first start"),
3073 OPT_BOOLEAN('s', "summary", &trace.summary_only,
3074 "Show only syscall summary with statistics"),
3075 OPT_BOOLEAN('S', "with-summary", &trace.summary,
3076 "Show all syscalls and summary with statistics"),
3077 OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
3078 "Trace pagefaults", parse_pagefaults, "maj"),
3079 OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
3080 OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
3081 OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
3082 "per thread proc mmap processing timeout in ms"),
3083 OPT_END()
3085 const char * const trace_subcommands[] = { "record", NULL };
3086 int err;
3087 char bf[BUFSIZ];
3089 signal(SIGSEGV, sighandler_dump_stack);
3090 signal(SIGFPE, sighandler_dump_stack);
3092 trace.evlist = perf_evlist__new();
3094 if (trace.evlist == NULL) {
3095 pr_err("Not enough memory to run!\n");
3096 err = -ENOMEM;
3097 goto out;
3100 argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
3101 trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
3103 if (trace.trace_pgfaults) {
3104 trace.opts.sample_address = true;
3105 trace.opts.sample_time = true;
3108 if (trace.evlist->nr_entries > 0)
3109 evlist__set_evsel_handler(trace.evlist, trace__event_handler);
3111 if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
3112 return trace__record(&trace, argc-1, &argv[1]);
3114 /* summary_only implies summary option, but don't overwrite summary if set */
3115 if (trace.summary_only)
3116 trace.summary = trace.summary_only;
3118 if (!trace.trace_syscalls && !trace.trace_pgfaults &&
3119 trace.evlist->nr_entries == 0 /* Was --events used? */) {
3120 pr_err("Please specify something to trace.\n");
3121 return -1;
3124 if (output_name != NULL) {
3125 err = trace__open_output(&trace, output_name);
3126 if (err < 0) {
3127 perror("failed to create output file");
3128 goto out;
3132 if (ev_qualifier_str != NULL) {
3133 const char *s = ev_qualifier_str;
3134 struct strlist_config slist_config = {
3135 .dirname = system_path(STRACE_GROUPS_DIR),
3138 trace.not_ev_qualifier = *s == '!';
3139 if (trace.not_ev_qualifier)
3140 ++s;
3141 trace.ev_qualifier = strlist__new(s, &slist_config);
3142 if (trace.ev_qualifier == NULL) {
3143 fputs("Not enough memory to parse event qualifier",
3144 trace.output);
3145 err = -ENOMEM;
3146 goto out_close;
3149 err = trace__validate_ev_qualifier(&trace);
3150 if (err)
3151 goto out_close;
3154 err = target__validate(&trace.opts.target);
3155 if (err) {
3156 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
3157 fprintf(trace.output, "%s", bf);
3158 goto out_close;
3161 err = target__parse_uid(&trace.opts.target);
3162 if (err) {
3163 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
3164 fprintf(trace.output, "%s", bf);
3165 goto out_close;
3168 if (!argc && target__none(&trace.opts.target))
3169 trace.opts.target.system_wide = true;
3171 if (input_name)
3172 err = trace__replay(&trace);
3173 else
3174 err = trace__run(&trace, argc, argv);
3176 out_close:
3177 if (output_name != NULL)
3178 fclose(trace.output);
3179 out:
3180 return err;