4 * Builtin report command: Analyze the perf.data input file,
5 * look up and read DSOs and symbol information and display
6 * a histogram of results, along various sorting keys.
10 #include "util/util.h"
12 #include "util/color.h"
13 #include <linux/list.h>
14 #include "util/cache.h"
15 #include <linux/rbtree.h>
16 #include "util/symbol.h"
17 #include "util/string.h"
18 #include "util/callchain.h"
19 #include "util/strlist.h"
20 #include "util/values.h"
23 #include "util/debug.h"
24 #include "util/header.h"
26 #include "util/parse-options.h"
27 #include "util/parse-events.h"
29 #include "util/data_map.h"
30 #include "util/thread.h"
31 #include "util/sort.h"
32 #include "util/hist.h"
34 static char const *input_name
= "perf.data";
36 static char *dso_list_str
, *comm_list_str
, *sym_list_str
,
38 static struct strlist
*dso_list
, *comm_list
, *sym_list
;
42 static int full_paths
;
43 static int show_nr_samples
;
45 static int show_threads
;
46 static struct perf_read_values show_threads_values
;
48 static char default_pretty_printing_style
[] = "normal";
49 static char *pretty_printing_style
= default_pretty_printing_style
;
51 static int exclude_other
= 1;
53 static char callchain_default_opt
[] = "fractal,0.5";
55 static struct perf_header
*header
;
57 static u64 sample_type
;
59 struct symbol_conf symbol_conf
;
63 callchain__fprintf_left_margin(FILE *fp
, int left_margin
)
68 ret
= fprintf(fp
, " ");
70 for (i
= 0; i
< left_margin
; i
++)
71 ret
+= fprintf(fp
, " ");
76 static size_t ipchain__fprintf_graph_line(FILE *fp
, int depth
, int depth_mask
,
82 ret
+= callchain__fprintf_left_margin(fp
, left_margin
);
84 for (i
= 0; i
< depth
; i
++)
85 if (depth_mask
& (1 << i
))
86 ret
+= fprintf(fp
, "| ");
88 ret
+= fprintf(fp
, " ");
90 ret
+= fprintf(fp
, "\n");
95 ipchain__fprintf_graph(FILE *fp
, struct callchain_list
*chain
, int depth
,
96 int depth_mask
, int count
, u64 total_samples
,
97 int hits
, int left_margin
)
102 ret
+= callchain__fprintf_left_margin(fp
, left_margin
);
103 for (i
= 0; i
< depth
; i
++) {
104 if (depth_mask
& (1 << i
))
105 ret
+= fprintf(fp
, "|");
107 ret
+= fprintf(fp
, " ");
108 if (!count
&& i
== depth
- 1) {
111 percent
= hits
* 100.0 / total_samples
;
112 ret
+= percent_color_fprintf(fp
, "--%2.2f%%-- ", percent
);
114 ret
+= fprintf(fp
, "%s", " ");
117 ret
+= fprintf(fp
, "%s\n", chain
->sym
->name
);
119 ret
+= fprintf(fp
, "%p\n", (void *)(long)chain
->ip
);
124 static struct symbol
*rem_sq_bracket
;
125 static struct callchain_list rem_hits
;
127 static void init_rem_hits(void)
129 rem_sq_bracket
= malloc(sizeof(*rem_sq_bracket
) + 6);
130 if (!rem_sq_bracket
) {
131 fprintf(stderr
, "Not enough memory to display remaining hits\n");
135 strcpy(rem_sq_bracket
->name
, "[...]");
136 rem_hits
.sym
= rem_sq_bracket
;
140 __callchain__fprintf_graph(FILE *fp
, struct callchain_node
*self
,
141 u64 total_samples
, int depth
, int depth_mask
,
144 struct rb_node
*node
, *next
;
145 struct callchain_node
*child
;
146 struct callchain_list
*chain
;
147 int new_depth_mask
= depth_mask
;
153 if (callchain_param
.mode
== CHAIN_GRAPH_REL
)
154 new_total
= self
->children_hit
;
156 new_total
= total_samples
;
158 remaining
= new_total
;
160 node
= rb_first(&self
->rb_root
);
164 child
= rb_entry(node
, struct callchain_node
, rb_node
);
165 cumul
= cumul_hits(child
);
169 * The depth mask manages the output of pipes that show
170 * the depth. We don't want to keep the pipes of the current
171 * level for the last child of this depth.
172 * Except if we have remaining filtered hits. They will
173 * supersede the last child
175 next
= rb_next(node
);
176 if (!next
&& (callchain_param
.mode
!= CHAIN_GRAPH_REL
|| !remaining
))
177 new_depth_mask
&= ~(1 << (depth
- 1));
180 * But we keep the older depth mask for the line seperator
181 * to keep the level link until we reach the last child
183 ret
+= ipchain__fprintf_graph_line(fp
, depth
, depth_mask
,
186 list_for_each_entry(chain
, &child
->val
, list
) {
187 if (chain
->ip
>= PERF_CONTEXT_MAX
)
189 ret
+= ipchain__fprintf_graph(fp
, chain
, depth
,
195 ret
+= __callchain__fprintf_graph(fp
, child
, new_total
,
197 new_depth_mask
| (1 << depth
),
202 if (callchain_param
.mode
== CHAIN_GRAPH_REL
&&
203 remaining
&& remaining
!= new_total
) {
208 new_depth_mask
&= ~(1 << (depth
- 1));
210 ret
+= ipchain__fprintf_graph(fp
, &rem_hits
, depth
,
211 new_depth_mask
, 0, new_total
,
212 remaining
, left_margin
);
220 callchain__fprintf_graph(FILE *fp
, struct callchain_node
*self
,
221 u64 total_samples
, int left_margin
)
223 struct callchain_list
*chain
;
224 bool printed
= false;
228 list_for_each_entry(chain
, &self
->val
, list
) {
229 if (chain
->ip
>= PERF_CONTEXT_MAX
)
232 if (!i
++ && sort__first_dimension
== SORT_SYM
)
236 ret
+= callchain__fprintf_left_margin(fp
, left_margin
);
237 ret
+= fprintf(fp
, "|\n");
238 ret
+= callchain__fprintf_left_margin(fp
, left_margin
);
239 ret
+= fprintf(fp
, "---");
244 ret
+= callchain__fprintf_left_margin(fp
, left_margin
);
247 ret
+= fprintf(fp
, " %s\n", chain
->sym
->name
);
249 ret
+= fprintf(fp
, " %p\n", (void *)(long)chain
->ip
);
252 ret
+= __callchain__fprintf_graph(fp
, self
, total_samples
, 1, 1, left_margin
);
258 callchain__fprintf_flat(FILE *fp
, struct callchain_node
*self
,
261 struct callchain_list
*chain
;
267 ret
+= callchain__fprintf_flat(fp
, self
->parent
, total_samples
);
270 list_for_each_entry(chain
, &self
->val
, list
) {
271 if (chain
->ip
>= PERF_CONTEXT_MAX
)
274 ret
+= fprintf(fp
, " %s\n", chain
->sym
->name
);
276 ret
+= fprintf(fp
, " %p\n",
277 (void *)(long)chain
->ip
);
284 hist_entry_callchain__fprintf(FILE *fp
, struct hist_entry
*self
,
285 u64 total_samples
, int left_margin
)
287 struct rb_node
*rb_node
;
288 struct callchain_node
*chain
;
291 rb_node
= rb_first(&self
->sorted_chain
);
295 chain
= rb_entry(rb_node
, struct callchain_node
, rb_node
);
296 percent
= chain
->hit
* 100.0 / total_samples
;
297 switch (callchain_param
.mode
) {
299 ret
+= percent_color_fprintf(fp
, " %6.2f%%\n",
301 ret
+= callchain__fprintf_flat(fp
, chain
, total_samples
);
303 case CHAIN_GRAPH_ABS
: /* Falldown */
304 case CHAIN_GRAPH_REL
:
305 ret
+= callchain__fprintf_graph(fp
, chain
, total_samples
,
311 ret
+= fprintf(fp
, "\n");
312 rb_node
= rb_next(rb_node
);
319 hist_entry__fprintf(FILE *fp
, struct hist_entry
*self
, u64 total_samples
)
321 struct sort_entry
*se
;
324 if (exclude_other
&& !self
->parent
)
328 ret
= percent_color_fprintf(fp
,
329 field_sep
? "%.2f" : " %6.2f%%",
330 (self
->count
* 100.0) / total_samples
);
332 ret
= fprintf(fp
, field_sep
? "%lld" : "%12lld ", self
->count
);
334 if (show_nr_samples
) {
336 fprintf(fp
, "%c%lld", *field_sep
, self
->count
);
338 fprintf(fp
, "%11lld", self
->count
);
341 list_for_each_entry(se
, &hist_entry__sort_list
, list
) {
345 fprintf(fp
, "%s", field_sep
?: " ");
346 ret
+= se
->print(fp
, self
, se
->width
? *se
->width
: 0);
349 ret
+= fprintf(fp
, "\n");
354 if (sort__first_dimension
== SORT_COMM
) {
355 se
= list_first_entry(&hist_entry__sort_list
, typeof(*se
),
357 left_margin
= se
->width
? *se
->width
: 0;
358 left_margin
-= thread__comm_len(self
->thread
);
361 hist_entry_callchain__fprintf(fp
, self
, total_samples
,
372 static void dso__calc_col_width(struct dso
*self
)
374 if (!col_width_list_str
&& !field_sep
&&
375 (!dso_list
|| strlist__has_entry(dso_list
, self
->name
))) {
376 unsigned int slen
= strlen(self
->name
);
377 if (slen
> dsos__col_width
)
378 dsos__col_width
= slen
;
381 self
->slen_calculated
= 1;
384 static void thread__comm_adjust(struct thread
*self
)
386 char *comm
= self
->comm
;
388 if (!col_width_list_str
&& !field_sep
&&
389 (!comm_list
|| strlist__has_entry(comm_list
, comm
))) {
390 unsigned int slen
= strlen(comm
);
392 if (slen
> comms__col_width
) {
393 comms__col_width
= slen
;
394 threads__col_width
= slen
+ 6;
399 static int thread__set_comm_adjust(struct thread
*self
, const char *comm
)
401 int ret
= thread__set_comm(self
, comm
);
406 thread__comm_adjust(self
);
411 static int call__match(struct symbol
*sym
)
413 if (sym
->name
&& !regexec(&parent_regex
, sym
->name
, 0, NULL
, 0))
419 static struct symbol
**resolve_callchain(struct thread
*thread
,
420 struct ip_callchain
*chain
,
421 struct symbol
**parent
)
423 u8 cpumode
= PERF_RECORD_MISC_USER
;
424 struct symbol
**syms
= NULL
;
428 syms
= calloc(chain
->nr
, sizeof(*syms
));
430 fprintf(stderr
, "Can't allocate memory for symbols\n");
435 for (i
= 0; i
< chain
->nr
; i
++) {
436 u64 ip
= chain
->ips
[i
];
437 struct addr_location al
;
439 if (ip
>= PERF_CONTEXT_MAX
) {
441 case PERF_CONTEXT_HV
:
442 cpumode
= PERF_RECORD_MISC_HYPERVISOR
; break;
443 case PERF_CONTEXT_KERNEL
:
444 cpumode
= PERF_RECORD_MISC_KERNEL
; break;
445 case PERF_CONTEXT_USER
:
446 cpumode
= PERF_RECORD_MISC_USER
; break;
453 thread__find_addr_location(thread
, cpumode
, MAP__FUNCTION
,
455 if (al
.sym
!= NULL
) {
456 if (sort__has_parent
&& !*parent
&&
469 * collect histogram counts
472 static int hist_entry__add(struct addr_location
*al
,
473 struct ip_callchain
*chain
, u64 count
)
475 struct symbol
**syms
= NULL
, *parent
= NULL
;
477 struct hist_entry
*he
;
479 if ((sort__has_parent
|| callchain
) && chain
)
480 syms
= resolve_callchain(al
->thread
, chain
, &parent
);
482 he
= __hist_entry__add(al
, parent
, count
, &hit
);
491 callchain_init(&he
->callchain
);
492 append_chain(&he
->callchain
, chain
, syms
);
499 static size_t output__fprintf(FILE *fp
, u64 total_samples
)
501 struct hist_entry
*pos
;
502 struct sort_entry
*se
;
506 char *col_width
= col_width_list_str
;
507 int raw_printing_style
;
509 raw_printing_style
= !strcmp(pretty_printing_style
, "raw");
513 fprintf(fp
, "# Samples: %Ld\n", (u64
)total_samples
);
516 fprintf(fp
, "# Overhead");
517 if (show_nr_samples
) {
519 fprintf(fp
, "%cSamples", *field_sep
);
521 fputs(" Samples ", fp
);
523 list_for_each_entry(se
, &hist_entry__sort_list
, list
) {
527 fprintf(fp
, "%c%s", *field_sep
, se
->header
);
530 width
= strlen(se
->header
);
532 if (col_width_list_str
) {
534 *se
->width
= atoi(col_width
);
535 col_width
= strchr(col_width
, ',');
540 width
= *se
->width
= max(*se
->width
, width
);
542 fprintf(fp
, " %*s", width
, se
->header
);
549 fprintf(fp
, "# ........");
551 fprintf(fp
, " ..........");
552 list_for_each_entry(se
, &hist_entry__sort_list
, list
) {
562 width
= strlen(se
->header
);
563 for (i
= 0; i
< width
; i
++)
571 for (nd
= rb_first(&output_hists
); nd
; nd
= rb_next(nd
)) {
572 pos
= rb_entry(nd
, struct hist_entry
, rb_node
);
573 ret
+= hist_entry__fprintf(fp
, pos
, total_samples
);
576 if (sort_order
== default_sort_order
&&
577 parent_pattern
== default_parent_pattern
) {
579 fprintf(fp
, "# (For a higher level overview, try: perf report --sort comm,dso)\n");
584 free(rem_sq_bracket
);
587 perf_read_values_display(fp
, &show_threads_values
,
593 static int validate_chain(struct ip_callchain
*chain
, event_t
*event
)
595 unsigned int chain_size
;
597 chain_size
= event
->header
.size
;
598 chain_size
-= (unsigned long)&event
->ip
.__more_data
- (unsigned long)event
;
600 if (chain
->nr
*sizeof(u64
) > chain_size
)
606 static int process_sample_event(event_t
*event
)
608 u64 ip
= event
->ip
.ip
;
610 void *more_data
= event
->ip
.__more_data
;
611 struct ip_callchain
*chain
= NULL
;
613 struct addr_location al
;
614 struct thread
*thread
= threads__findnew(event
->ip
.pid
);
616 if (sample_type
& PERF_SAMPLE_PERIOD
) {
617 period
= *(u64
*)more_data
;
618 more_data
+= sizeof(u64
);
621 dump_printf("(IP, %d): %d/%d: %p period: %Ld\n",
623 event
->ip
.pid
, event
->ip
.tid
,
627 if (sample_type
& PERF_SAMPLE_CALLCHAIN
) {
630 chain
= (void *)more_data
;
632 dump_printf("... chain: nr:%Lu\n", chain
->nr
);
634 if (validate_chain(chain
, event
) < 0) {
635 pr_debug("call-chain problem with event, "
641 for (i
= 0; i
< chain
->nr
; i
++)
642 dump_printf("..... %2d: %016Lx\n", i
, chain
->ips
[i
]);
646 if (thread
== NULL
) {
647 pr_debug("problem processing %d event, skipping it.\n",
652 dump_printf(" ... thread: %s:%d\n", thread
->comm
, thread
->pid
);
654 if (comm_list
&& !strlist__has_entry(comm_list
, thread
->comm
))
657 cpumode
= event
->header
.misc
& PERF_RECORD_MISC_CPUMODE_MASK
;
659 thread__find_addr_location(thread
, cpumode
,
660 MAP__FUNCTION
, ip
, &al
, NULL
);
662 * We have to do this here as we may have a dso with no symbol hit that
663 * has a name longer than the ones with symbols sampled.
665 if (al
.map
&& !sort_dso
.elide
&& !al
.map
->dso
->slen_calculated
)
666 dso__calc_col_width(al
.map
->dso
);
669 (!al
.map
|| !al
.map
->dso
||
670 !(strlist__has_entry(dso_list
, al
.map
->dso
->short_name
) ||
671 (al
.map
->dso
->short_name
!= al
.map
->dso
->long_name
&&
672 strlist__has_entry(dso_list
, al
.map
->dso
->long_name
)))))
675 if (sym_list
&& al
.sym
&& !strlist__has_entry(sym_list
, al
.sym
->name
))
678 if (hist_entry__add(&al
, chain
, period
)) {
679 pr_debug("problem incrementing symbol count, skipping event\n");
683 event__stats
.total
+= period
;
688 static int process_comm_event(event_t
*event
)
690 struct thread
*thread
= threads__findnew(event
->comm
.pid
);
692 dump_printf(": %s:%d\n", event
->comm
.comm
, event
->comm
.pid
);
694 if (thread
== NULL
||
695 thread__set_comm_adjust(thread
, event
->comm
.comm
)) {
696 dump_printf("problem processing PERF_RECORD_COMM, skipping event.\n");
703 static int process_read_event(event_t
*event
)
705 struct perf_event_attr
*attr
;
707 attr
= perf_header__find_attr(event
->read
.id
, header
);
710 const char *name
= attr
? __event_name(attr
->type
, attr
->config
)
712 perf_read_values_add_value(&show_threads_values
,
713 event
->read
.pid
, event
->read
.tid
,
719 dump_printf(": %d %d %s %Lu\n", event
->read
.pid
, event
->read
.tid
,
720 attr
? __event_name(attr
->type
, attr
->config
) : "FAIL",
726 static int sample_type_check(u64 type
)
730 if (!(sample_type
& PERF_SAMPLE_CALLCHAIN
)) {
731 if (sort__has_parent
) {
732 fprintf(stderr
, "selected --sort parent, but no"
733 " callchain data. Did you call"
734 " perf record without -g?\n");
738 fprintf(stderr
, "selected -g but no callchain data."
739 " Did you call perf record without"
743 } else if (callchain_param
.mode
!= CHAIN_NONE
&& !callchain
) {
745 if (register_callchain_param(&callchain_param
) < 0) {
746 fprintf(stderr
, "Can't register callchain"
755 static struct perf_file_handler file_handler
= {
756 .process_sample_event
= process_sample_event
,
757 .process_mmap_event
= event__process_mmap
,
758 .process_comm_event
= process_comm_event
,
759 .process_exit_event
= event__process_task
,
760 .process_fork_event
= event__process_task
,
761 .process_lost_event
= event__process_lost
,
762 .process_read_event
= process_read_event
,
763 .sample_type_check
= sample_type_check
,
767 static int __cmd_report(void)
772 idle
= register_idle_thread();
773 thread__comm_adjust(idle
);
776 perf_read_values_init(&show_threads_values
);
778 register_perf_file_handler(&file_handler
);
780 ret
= mmap_dispatch_perf_file(&header
, input_name
, force
,
781 full_paths
, &event__cwdlen
, &event__cwd
);
786 event__print_totals();
791 threads__fprintf(stdout
);
794 dsos__fprintf(stdout
);
797 output__resort(event__stats
.total
);
798 output__fprintf(stdout
, event__stats
.total
);
801 perf_read_values_destroy(&show_threads_values
);
807 parse_callchain_opt(const struct option
*opt __used
, const char *arg
,
818 tok
= strtok((char *)arg
, ",");
822 /* get the output mode */
823 if (!strncmp(tok
, "graph", strlen(arg
)))
824 callchain_param
.mode
= CHAIN_GRAPH_ABS
;
826 else if (!strncmp(tok
, "flat", strlen(arg
)))
827 callchain_param
.mode
= CHAIN_FLAT
;
829 else if (!strncmp(tok
, "fractal", strlen(arg
)))
830 callchain_param
.mode
= CHAIN_GRAPH_REL
;
832 else if (!strncmp(tok
, "none", strlen(arg
))) {
833 callchain_param
.mode
= CHAIN_NONE
;
842 /* get the min percentage */
843 tok
= strtok(NULL
, ",");
847 callchain_param
.min_percent
= strtod(tok
, &endptr
);
852 if (register_callchain_param(&callchain_param
) < 0) {
853 fprintf(stderr
, "Can't register callchain params\n");
859 //static const char * const report_usage[] = {
860 const char * const report_usage
[] = {
861 "perf report [<options>] <command>",
865 static const struct option options
[] = {
866 OPT_STRING('i', "input", &input_name
, "file",
868 OPT_BOOLEAN('v', "verbose", &verbose
,
869 "be more verbose (show symbol address, etc)"),
870 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace
,
871 "dump raw trace in ASCII"),
872 OPT_STRING('k', "vmlinux", &symbol_conf
.vmlinux_name
,
873 "file", "vmlinux pathname"),
874 OPT_BOOLEAN('f', "force", &force
, "don't complain, do it"),
875 OPT_BOOLEAN('m', "modules", &symbol_conf
.use_modules
,
876 "load module symbols - WARNING: use only with -k and LIVE kernel"),
877 OPT_BOOLEAN('n', "show-nr-samples", &show_nr_samples
,
878 "Show a column with the number of samples"),
879 OPT_BOOLEAN('T', "threads", &show_threads
,
880 "Show per-thread event counters"),
881 OPT_STRING(0, "pretty", &pretty_printing_style
, "key",
882 "pretty printing style key: normal raw"),
883 OPT_STRING('s', "sort", &sort_order
, "key[,key2...]",
884 "sort by key(s): pid, comm, dso, symbol, parent"),
885 OPT_BOOLEAN('P', "full-paths", &full_paths
,
886 "Don't shorten the pathnames taking into account the cwd"),
887 OPT_STRING('p', "parent", &parent_pattern
, "regex",
888 "regex filter to identify parent, see: '--sort parent'"),
889 OPT_BOOLEAN('x', "exclude-other", &exclude_other
,
890 "Only display entries with parent-match"),
891 OPT_CALLBACK_DEFAULT('g', "call-graph", NULL
, "output_type,min_percent",
892 "Display callchains using output_type and min percent threshold. "
893 "Default: fractal,0.5", &parse_callchain_opt
, callchain_default_opt
),
894 OPT_STRING('d', "dsos", &dso_list_str
, "dso[,dso...]",
895 "only consider symbols in these dsos"),
896 OPT_STRING('C', "comms", &comm_list_str
, "comm[,comm...]",
897 "only consider symbols in these comms"),
898 OPT_STRING('S', "symbols", &sym_list_str
, "symbol[,symbol...]",
899 "only consider these symbols"),
900 OPT_STRING('w', "column-widths", &col_width_list_str
,
902 "don't try to adjust column width, use these fixed values"),
903 OPT_STRING('t', "field-separator", &field_sep
, "separator",
904 "separator for columns, no spaces will be added between "
905 "columns '.' is reserved."),
909 static void setup_sorting(void)
911 char *tmp
, *tok
, *str
= strdup(sort_order
);
913 for (tok
= strtok_r(str
, ", ", &tmp
);
914 tok
; tok
= strtok_r(NULL
, ", ", &tmp
)) {
915 if (sort_dimension__add(tok
) < 0) {
916 error("Unknown --sort key: `%s'", tok
);
917 usage_with_options(report_usage
, options
);
924 static void setup_list(struct strlist
**list
, const char *list_str
,
925 struct sort_entry
*se
, const char *list_name
,
929 *list
= strlist__new(true, list_str
);
931 fprintf(stderr
, "problems parsing %s list\n",
935 if (strlist__nr_entries(*list
) == 1) {
936 fprintf(fp
, "# %s: %s\n", list_name
,
937 strlist__entry(*list
, 0)->s
);
943 int cmd_report(int argc
, const char **argv
, const char *prefix __used
)
945 if (symbol__init(&symbol_conf
) < 0)
948 argc
= parse_options(argc
, argv
, options
, report_usage
, 0);
952 if (parent_pattern
!= default_parent_pattern
) {
953 sort_dimension__add("parent");
954 sort_parent
.elide
= 1;
959 * Any (unrecognized) arguments left?
962 usage_with_options(report_usage
, options
);
966 setup_list(&dso_list
, dso_list_str
, &sort_dso
, "dso", stdout
);
967 setup_list(&comm_list
, comm_list_str
, &sort_comm
, "comm", stdout
);
968 setup_list(&sym_list
, sym_list_str
, &sort_sym
, "symbol", stdout
);
970 if (field_sep
&& *field_sep
== '.') {
971 fputs("'.' is the only non valid --field-separator argument\n",
976 return __cmd_report();