2 #include "util/color.h"
3 #include "util/evlist.h"
4 #include "util/machine.h"
5 #include "util/thread.h"
6 #include "util/parse-options.h"
7 #include "util/thread_map.h"
8 #include "event-parse.h"
13 static struct syscall_fmt
{
19 { .name
= "access", .errmsg
= true, },
20 { .name
= "arch_prctl", .errmsg
= true, .alias
= "prctl", },
21 { .name
= "fstat", .errmsg
= true, .alias
= "newfstat", },
22 { .name
= "fstatat", .errmsg
= true, .alias
= "newfstatat", },
23 { .name
= "futex", .errmsg
= true, },
24 { .name
= "open", .errmsg
= true, },
25 { .name
= "poll", .errmsg
= true, .timeout
= true, },
26 { .name
= "ppoll", .errmsg
= true, .timeout
= true, },
27 { .name
= "read", .errmsg
= true, },
28 { .name
= "recvfrom", .errmsg
= true, },
29 { .name
= "select", .errmsg
= true, .timeout
= true, },
30 { .name
= "socket", .errmsg
= true, },
31 { .name
= "stat", .errmsg
= true, .alias
= "newstat", },
34 static int syscall_fmt__cmp(const void *name
, const void *fmtp
)
36 const struct syscall_fmt
*fmt
= fmtp
;
37 return strcmp(name
, fmt
->name
);
40 static struct syscall_fmt
*syscall_fmt__find(const char *name
)
42 const int nmemb
= ARRAY_SIZE(syscall_fmts
);
43 return bsearch(name
, syscall_fmts
, nmemb
, sizeof(struct syscall_fmt
), syscall_fmt__cmp
);
47 struct event_format
*tp_format
;
49 struct syscall_fmt
*fmt
;
52 static size_t fprintf_duration(unsigned long t
, FILE *fp
)
54 double duration
= (double)t
/ NSEC_PER_MSEC
;
55 size_t printed
= fprintf(fp
, "(");
58 printed
+= color_fprintf(fp
, PERF_COLOR_RED
, "%6.3f ms", duration
);
59 else if (duration
>= 0.01)
60 printed
+= color_fprintf(fp
, PERF_COLOR_YELLOW
, "%6.3f ms", duration
);
62 printed
+= color_fprintf(fp
, PERF_COLOR_NORMAL
, "%6.3f ms", duration
);
63 return printed
+ fprintf(stdout
, "): ");
70 unsigned long nr_events
;
75 static struct thread_trace
*thread_trace__new(void)
77 return zalloc(sizeof(struct thread_trace
));
80 static struct thread_trace
*thread__trace(struct thread
*thread
)
82 struct thread_trace
*ttrace
;
87 if (thread
->priv
== NULL
)
88 thread
->priv
= thread_trace__new();
90 if (thread
->priv
== NULL
)
93 ttrace
= thread
->priv
;
98 color_fprintf(stdout
, PERF_COLOR_RED
,
99 "WARNING: not enough memory, dropping samples!\n");
107 struct syscall
*table
;
109 struct perf_record_opts opts
;
112 unsigned long nr_events
;
114 bool multiple_threads
;
115 double duration_filter
;
119 static bool trace__filter_duration(struct trace
*trace
, double t
)
121 return t
< (trace
->duration_filter
* NSEC_PER_MSEC
);
124 static size_t trace__fprintf_tstamp(struct trace
*trace
, u64 tstamp
, FILE *fp
)
126 double ts
= (double)(tstamp
- trace
->base_time
) / NSEC_PER_MSEC
;
128 return fprintf(fp
, "%10.3f ", ts
);
131 static bool done
= false;
133 static void sig_handler(int sig __maybe_unused
)
138 static size_t trace__fprintf_entry_head(struct trace
*trace
, struct thread
*thread
,
139 u64 duration
, u64 tstamp
, FILE *fp
)
141 size_t printed
= trace__fprintf_tstamp(trace
, tstamp
, fp
);
142 printed
+= fprintf_duration(duration
, fp
);
144 if (trace
->multiple_threads
)
145 printed
+= fprintf(fp
, "%d ", thread
->pid
);
150 static int trace__process_event(struct machine
*machine
, union perf_event
*event
)
154 switch (event
->header
.type
) {
155 case PERF_RECORD_LOST
:
156 color_fprintf(stdout
, PERF_COLOR_RED
,
157 "LOST %" PRIu64
" events!\n", event
->lost
.lost
);
158 ret
= machine__process_lost_event(machine
, event
);
160 ret
= machine__process_event(machine
, event
);
167 static int trace__tool_process(struct perf_tool
*tool __maybe_unused
,
168 union perf_event
*event
,
169 struct perf_sample
*sample __maybe_unused
,
170 struct machine
*machine
)
172 return trace__process_event(machine
, event
);
175 static int trace__symbols_init(struct trace
*trace
, struct perf_evlist
*evlist
)
177 int err
= symbol__init();
182 machine__init(&trace
->host
, "", HOST_KERNEL_ID
);
183 machine__create_kernel_maps(&trace
->host
);
185 if (perf_target__has_task(&trace
->opts
.target
)) {
186 err
= perf_event__synthesize_thread_map(NULL
, evlist
->threads
,
190 err
= perf_event__synthesize_threads(NULL
, trace__tool_process
,
200 static int trace__read_syscall_info(struct trace
*trace
, int id
)
204 const char *name
= audit_syscall_to_name(id
, trace
->audit_machine
);
209 if (id
> trace
->syscalls
.max
) {
210 struct syscall
*nsyscalls
= realloc(trace
->syscalls
.table
, (id
+ 1) * sizeof(*sc
));
212 if (nsyscalls
== NULL
)
215 if (trace
->syscalls
.max
!= -1) {
216 memset(nsyscalls
+ trace
->syscalls
.max
+ 1, 0,
217 (id
- trace
->syscalls
.max
) * sizeof(*sc
));
219 memset(nsyscalls
, 0, (id
+ 1) * sizeof(*sc
));
222 trace
->syscalls
.table
= nsyscalls
;
223 trace
->syscalls
.max
= id
;
226 sc
= trace
->syscalls
.table
+ id
;
228 sc
->fmt
= syscall_fmt__find(sc
->name
);
230 snprintf(tp_name
, sizeof(tp_name
), "sys_enter_%s", sc
->name
);
231 sc
->tp_format
= event_format__new("syscalls", tp_name
);
233 if (sc
->tp_format
== NULL
&& sc
->fmt
&& sc
->fmt
->alias
) {
234 snprintf(tp_name
, sizeof(tp_name
), "sys_enter_%s", sc
->fmt
->alias
);
235 sc
->tp_format
= event_format__new("syscalls", tp_name
);
238 return sc
->tp_format
!= NULL
? 0 : -1;
241 static size_t syscall__scnprintf_args(struct syscall
*sc
, char *bf
, size_t size
,
247 if (sc
->tp_format
!= NULL
) {
248 struct format_field
*field
;
250 for (field
= sc
->tp_format
->format
.fields
->next
; field
; field
= field
->next
) {
251 printed
+= scnprintf(bf
+ printed
, size
- printed
,
252 "%s%s: %ld", printed
? ", " : "",
253 field
->name
, args
[i
++]);
257 printed
+= scnprintf(bf
+ printed
, size
- printed
,
259 printed
? ", " : "", i
, args
[i
]);
267 typedef int (*tracepoint_handler
)(struct trace
*trace
, struct perf_evsel
*evsel
,
268 struct perf_sample
*sample
);
270 static struct syscall
*trace__syscall_info(struct trace
*trace
,
271 struct perf_evsel
*evsel
,
272 struct perf_sample
*sample
)
274 int id
= perf_evsel__intval(evsel
, sample
, "id");
277 printf("Invalid syscall %d id, skipping...\n", id
);
281 if ((id
> trace
->syscalls
.max
|| trace
->syscalls
.table
[id
].name
== NULL
) &&
282 trace__read_syscall_info(trace
, id
))
285 if ((id
> trace
->syscalls
.max
|| trace
->syscalls
.table
[id
].name
== NULL
))
288 return &trace
->syscalls
.table
[id
];
291 printf("Problems reading syscall %d", id
);
292 if (id
<= trace
->syscalls
.max
&& trace
->syscalls
.table
[id
].name
!= NULL
)
293 printf("(%s)", trace
->syscalls
.table
[id
].name
);
294 puts(" information");
298 static int trace__sys_enter(struct trace
*trace
, struct perf_evsel
*evsel
,
299 struct perf_sample
*sample
)
304 struct thread
*thread
= machine__findnew_thread(&trace
->host
, sample
->tid
);
305 struct syscall
*sc
= trace__syscall_info(trace
, evsel
, sample
);
306 struct thread_trace
*ttrace
= thread__trace(thread
);
308 if (ttrace
== NULL
|| sc
== NULL
)
311 args
= perf_evsel__rawptr(evsel
, sample
, "args");
313 printf("Problems reading syscall arguments\n");
317 ttrace
= thread
->priv
;
319 if (ttrace
->entry_str
== NULL
) {
320 ttrace
->entry_str
= malloc(1024);
321 if (!ttrace
->entry_str
)
325 ttrace
->entry_time
= sample
->time
;
326 msg
= ttrace
->entry_str
;
327 printed
+= scnprintf(msg
+ printed
, 1024 - printed
, "%s(", sc
->name
);
329 printed
+= syscall__scnprintf_args(sc
, msg
+ printed
, 1024 - printed
, args
);
331 if (!strcmp(sc
->name
, "exit_group") || !strcmp(sc
->name
, "exit")) {
332 if (!trace
->duration_filter
) {
333 trace__fprintf_entry_head(trace
, thread
, 1, sample
->time
, stdout
);
334 printf("%-70s\n", ttrace
->entry_str
);
337 ttrace
->entry_pending
= true;
342 static int trace__sys_exit(struct trace
*trace
, struct perf_evsel
*evsel
,
343 struct perf_sample
*sample
)
347 struct thread
*thread
= machine__findnew_thread(&trace
->host
, sample
->tid
);
348 struct thread_trace
*ttrace
= thread__trace(thread
);
349 struct syscall
*sc
= trace__syscall_info(trace
, evsel
, sample
);
351 if (ttrace
== NULL
|| sc
== NULL
)
354 ret
= perf_evsel__intval(evsel
, sample
, "ret");
356 ttrace
= thread
->priv
;
358 ttrace
->exit_time
= sample
->time
;
360 if (ttrace
->entry_time
) {
361 duration
= sample
->time
- ttrace
->entry_time
;
362 if (trace__filter_duration(trace
, duration
))
364 } else if (trace
->duration_filter
)
367 trace__fprintf_entry_head(trace
, thread
, duration
, sample
->time
, stdout
);
369 if (ttrace
->entry_pending
) {
370 printf("%-70s", ttrace
->entry_str
);
373 color_fprintf(stdout
, PERF_COLOR_YELLOW
, "continued");
374 printf("]: %s()", sc
->name
);
377 if (ret
< 0 && sc
->fmt
&& sc
->fmt
->errmsg
) {
379 const char *emsg
= strerror_r(-ret
, bf
, sizeof(bf
)),
380 *e
= audit_errno_to_name(-ret
);
382 printf(") = -1 %s %s", e
, emsg
);
383 } else if (ret
== 0 && sc
->fmt
&& sc
->fmt
->timeout
)
384 printf(") = 0 Timeout");
386 printf(") = %d", ret
);
390 ttrace
->entry_pending
= false;
395 static int trace__sched_stat_runtime(struct trace
*trace
, struct perf_evsel
*evsel
,
396 struct perf_sample
*sample
)
398 u64 runtime
= perf_evsel__intval(evsel
, sample
, "runtime");
399 double runtime_ms
= (double)runtime
/ NSEC_PER_MSEC
;
400 struct thread
*thread
= machine__findnew_thread(&trace
->host
, sample
->tid
);
401 struct thread_trace
*ttrace
= thread__trace(thread
);
406 ttrace
->runtime_ms
+= runtime_ms
;
407 trace
->runtime_ms
+= runtime_ms
;
411 printf("%s: comm=%s,pid=%u,runtime=%" PRIu64
",vruntime=%" PRIu64
")\n",
413 perf_evsel__strval(evsel
, sample
, "comm"),
414 (pid_t
)perf_evsel__intval(evsel
, sample
, "pid"),
416 perf_evsel__intval(evsel
, sample
, "vruntime"));
420 static int trace__run(struct trace
*trace
, int argc
, const char **argv
)
422 struct perf_evlist
*evlist
= perf_evlist__new();
423 struct perf_evsel
*evsel
;
425 unsigned long before
;
426 const bool forks
= argc
> 0;
428 if (evlist
== NULL
) {
429 printf("Not enough memory to run!\n");
433 if (perf_evlist__add_newtp(evlist
, "raw_syscalls", "sys_enter", trace__sys_enter
) ||
434 perf_evlist__add_newtp(evlist
, "raw_syscalls", "sys_exit", trace__sys_exit
)) {
435 printf("Couldn't read the raw_syscalls tracepoints information!\n");
436 goto out_delete_evlist
;
440 perf_evlist__add_newtp(evlist
, "sched", "sched_stat_runtime",
441 trace__sched_stat_runtime
)) {
442 printf("Couldn't read the sched_stat_runtime tracepoint information!\n");
443 goto out_delete_evlist
;
446 err
= perf_evlist__create_maps(evlist
, &trace
->opts
.target
);
448 printf("Problems parsing the target to trace, check your options!\n");
449 goto out_delete_evlist
;
452 err
= trace__symbols_init(trace
, evlist
);
454 printf("Problems initializing symbol libraries!\n");
455 goto out_delete_maps
;
458 perf_evlist__config(evlist
, &trace
->opts
);
460 signal(SIGCHLD
, sig_handler
);
461 signal(SIGINT
, sig_handler
);
464 err
= perf_evlist__prepare_workload(evlist
, &trace
->opts
.target
,
467 printf("Couldn't run the workload!\n");
468 goto out_delete_maps
;
472 err
= perf_evlist__open(evlist
);
474 printf("Couldn't create the events: %s\n", strerror(errno
));
475 goto out_delete_maps
;
478 err
= perf_evlist__mmap(evlist
, UINT_MAX
, false);
480 printf("Couldn't mmap the events: %s\n", strerror(errno
));
481 goto out_close_evlist
;
484 perf_evlist__enable(evlist
);
487 perf_evlist__start_workload(evlist
);
489 trace
->multiple_threads
= evlist
->threads
->map
[0] == -1 || evlist
->threads
->nr
> 1;
491 before
= trace
->nr_events
;
493 for (i
= 0; i
< evlist
->nr_mmaps
; i
++) {
494 union perf_event
*event
;
496 while ((event
= perf_evlist__mmap_read(evlist
, i
)) != NULL
) {
497 const u32 type
= event
->header
.type
;
498 tracepoint_handler handler
;
499 struct perf_sample sample
;
503 err
= perf_evlist__parse_sample(evlist
, event
, &sample
);
505 printf("Can't parse sample, err = %d, skipping...\n", err
);
509 if (trace
->base_time
== 0)
510 trace
->base_time
= sample
.time
;
512 if (type
!= PERF_RECORD_SAMPLE
) {
513 trace__process_event(&trace
->host
, event
);
517 evsel
= perf_evlist__id2evsel(evlist
, sample
.id
);
519 printf("Unknown tp ID %" PRIu64
", skipping...\n", sample
.id
);
523 if (sample
.raw_data
== NULL
) {
524 printf("%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
525 perf_evsel__name(evsel
), sample
.tid
,
526 sample
.cpu
, sample
.raw_size
);
530 handler
= evsel
->handler
.func
;
531 handler(trace
, evsel
, &sample
);
535 if (trace
->nr_events
== before
) {
537 goto out_unmap_evlist
;
539 poll(evlist
->pollfd
, evlist
->nr_fds
, -1);
543 perf_evlist__disable(evlist
);
548 perf_evlist__munmap(evlist
);
550 perf_evlist__close(evlist
);
552 perf_evlist__delete_maps(evlist
);
554 perf_evlist__delete(evlist
);
559 static size_t trace__fprintf_threads_header(FILE *fp
)
563 printed
= fprintf(fp
, "\n _____________________________________________________________________\n");
564 printed
+= fprintf(fp
," __) Summary of events (__\n\n");
565 printed
+= fprintf(fp
," [ task - pid ] [ events ] [ ratio ] [ runtime ]\n");
566 printed
+= fprintf(fp
," _____________________________________________________________________\n\n");
571 static size_t trace__fprintf_thread_summary(struct trace
*trace
, FILE *fp
)
573 size_t printed
= trace__fprintf_threads_header(fp
);
576 for (nd
= rb_first(&trace
->host
.threads
); nd
; nd
= rb_next(nd
)) {
577 struct thread
*thread
= rb_entry(nd
, struct thread
, rb_node
);
578 struct thread_trace
*ttrace
= thread
->priv
;
585 ratio
= (double)ttrace
->nr_events
/ trace
->nr_events
* 100.0;
587 color
= PERF_COLOR_NORMAL
;
589 color
= PERF_COLOR_RED
;
590 else if (ratio
> 25.0)
591 color
= PERF_COLOR_GREEN
;
592 else if (ratio
> 5.0)
593 color
= PERF_COLOR_YELLOW
;
595 printed
+= color_fprintf(fp
, color
, "%20s", thread
->comm
);
596 printed
+= fprintf(fp
, " - %-5d :%11lu [", thread
->pid
, ttrace
->nr_events
);
597 printed
+= color_fprintf(fp
, color
, "%5.1f%%", ratio
);
598 printed
+= fprintf(fp
, " ] %10.3f ms\n", ttrace
->runtime_ms
);
604 static int trace__set_duration(const struct option
*opt
, const char *str
,
605 int unset __maybe_unused
)
607 struct trace
*trace
= opt
->value
;
609 trace
->duration_filter
= atof(str
);
613 int cmd_trace(int argc
, const char **argv
, const char *prefix __maybe_unused
)
615 const char * const trace_usage
[] = {
616 "perf trace [<options>] [<command>]",
617 "perf trace [<options>] -- <command> [<options>]",
620 struct trace trace
= {
621 .audit_machine
= audit_detect_machine(),
630 .user_freq
= UINT_MAX
,
631 .user_interval
= ULLONG_MAX
,
636 const struct option trace_options
[] = {
637 OPT_STRING('p', "pid", &trace
.opts
.target
.pid
, "pid",
638 "trace events on existing process id"),
639 OPT_STRING(0, "tid", &trace
.opts
.target
.tid
, "tid",
640 "trace events on existing thread id"),
641 OPT_BOOLEAN(0, "all-cpus", &trace
.opts
.target
.system_wide
,
642 "system-wide collection from all CPUs"),
643 OPT_STRING(0, "cpu", &trace
.opts
.target
.cpu_list
, "cpu",
644 "list of cpus to monitor"),
645 OPT_BOOLEAN(0, "no-inherit", &trace
.opts
.no_inherit
,
646 "child tasks do not inherit counters"),
647 OPT_UINTEGER(0, "mmap-pages", &trace
.opts
.mmap_pages
,
648 "number of mmap data pages"),
649 OPT_STRING(0, "uid", &trace
.opts
.target
.uid_str
, "user",
651 OPT_CALLBACK(0, "duration", &trace
, "float",
652 "show only events with duration > N.M ms",
653 trace__set_duration
),
654 OPT_BOOLEAN(0, "sched", &trace
.sched
, "show blocking scheduler events"),
660 argc
= parse_options(argc
, argv
, trace_options
, trace_usage
, 0);
662 err
= perf_target__validate(&trace
.opts
.target
);
664 perf_target__strerror(&trace
.opts
.target
, err
, bf
, sizeof(bf
));
669 err
= perf_target__parse_uid(&trace
.opts
.target
);
671 perf_target__strerror(&trace
.opts
.target
, err
, bf
, sizeof(bf
));
676 if (!argc
&& perf_target__none(&trace
.opts
.target
))
677 trace
.opts
.target
.system_wide
= true;
679 err
= trace__run(&trace
, argc
, argv
);
681 if (trace
.sched
&& !err
)
682 trace__fprintf_thread_summary(&trace
, stdout
);