2 * perfstat: /usr/bin/time -alike performance counter statistics utility
4 * It summarizes the counter events of all tasks (and child tasks),
5 * covering all CPUs that the command (or workload) executes on.
6 * It only counts the per-task events of the workload started,
7 * independent of how many other tasks run on those CPUs.
9 * Build with: cc -O2 -g -lrt -Wall -W -o perfstat perfstat.c
14 $ ./perfstat -e 1 -e 3 -e 5 ls -lR /usr/include/ >/dev/null
16 Performance counter stats for 'ls':
18 163516953 instructions
23 * Copyright (C) 2008, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
25 * Released under the GPLv2 (not later).
27 * Percpu counter support by: Yanmin Zhang <yanmin_zhang@linux.intel.com>
28 * Symbolic event options by: Wu Fengguang <fengguang.wu@intel.com>
44 #include <sys/syscall.h>
45 #include <sys/ioctl.h>
46 #include <sys/prctl.h>
47 #include <sys/types.h>
53 #include <linux/unistd.h>
55 #include "perfcounters.h"
57 static int nr_counters
= 0;
58 static int nr_cpus
= 0;
60 static int event_id
[MAX_COUNTERS
] =
61 { -2, -5, -4, -3, 0, 1, 2, 3};
63 static int event_raw
[MAX_COUNTERS
];
65 static int system_wide
= 0;
67 static void display_help(void)
73 "Usage: perfstat [<events...>] <cmd...>\n\n"
74 "PerfStat Options (up to %d event types can be specified):\n\n",
77 " -e EVENT --event=EVENT # symbolic-name abbreviations");
79 for (i
= 0, e
= PERF_HW_EVENTS_MAX
; i
< ARRAY_SIZE(event_symbols
); i
++) {
80 if (e
!= event_symbols
[i
].event
) {
81 e
= event_symbols
[i
].event
;
83 "\n %2d: %-20s", e
, event_symbols
[i
].symbol
);
85 printf(" %s", event_symbols
[i
].symbol
);
89 " rNNN: raw event type\n\n"
90 " -s # system-wide collection\n\n"
91 " -c <cmd..> --command=<cmd..> # command+arguments to be timed.\n"
96 static void process_options(int argc
, char *argv
[])
100 /** Options for getopt */
101 static struct option long_options
[] = {
102 {"event", required_argument
, NULL
, 'e'},
103 {"help", no_argument
, NULL
, 'h'},
104 {"command", no_argument
, NULL
, 'c'},
107 int c
= getopt_long(argc
, argv
, "+:e:c:s",
108 long_options
, &option_index
);
119 parse_events(optarg
);
138 char fault_here
[1000000];
140 static int fd
[MAX_NR_CPUS
][MAX_COUNTERS
];
142 static void create_counter(int counter
)
144 struct perf_counter_hw_event hw_event
;
146 memset(&hw_event
, 0, sizeof(hw_event
));
147 hw_event
.type
= event_id
[counter
];
148 hw_event
.raw
= event_raw
[counter
];
149 hw_event
.record_type
= PERF_RECORD_SIMPLE
;
154 for (cpu
= 0; cpu
< nr_cpus
; cpu
++) {
155 fd
[cpu
][counter
] = sys_perf_counter_open(&hw_event
, -1, cpu
, -1, 0);
156 if (fd
[cpu
][counter
] < 0) {
157 printf("perfstat error: syscall returned with %d (%s)\n",
158 fd
[cpu
][counter
], strerror(errno
));
164 hw_event
.inherit
= 1;
165 hw_event
.disabled
= 1;
167 fd
[0][counter
] = sys_perf_counter_open(&hw_event
, 0, -1, -1, 0);
168 if (fd
[0][counter
] < 0) {
169 printf("perfstat error: syscall returned with %d (%s)\n",
170 fd
[0][counter
], strerror(errno
));
177 int main(int argc
, char *argv
[])
179 unsigned long long t0
, t1
;
185 process_options(argc
, argv
);
188 nr_cpus
= sysconf(_SC_NPROCESSORS_ONLN
);
189 assert(nr_cpus
<= MAX_NR_CPUS
);
190 assert(nr_cpus
>= 0);
194 for (counter
= 0; counter
< nr_counters
; counter
++)
195 create_counter(counter
);
201 * Enable counters and exec the command:
204 prctl(PR_TASK_PERF_COUNTERS_ENABLE
);
206 if ((pid
= fork()) < 0)
207 perror("failed to fork");
209 if (execvp(argv
[0], argv
)) {
214 while (wait(&status
) >= 0)
216 prctl(PR_TASK_PERF_COUNTERS_DISABLE
);
221 fprintf(stderr
, "\n");
222 fprintf(stderr
, " Performance counter stats for \'%s\':\n",
224 fprintf(stderr
, "\n");
226 for (counter
= 0; counter
< nr_counters
; counter
++) {
228 __u64 count
, single_count
;
231 for (cpu
= 0; cpu
< nr_cpus
; cpu
++) {
232 res
= read(fd
[cpu
][counter
],
233 (char *) &single_count
, sizeof(single_count
));
234 assert(res
== sizeof(single_count
));
235 count
+= single_count
;
238 if (!event_raw
[counter
] &&
239 (event_id
[counter
] == PERF_COUNT_CPU_CLOCK
||
240 event_id
[counter
] == PERF_COUNT_TASK_CLOCK
)) {
242 double msecs
= (double)count
/ 1000000;
244 fprintf(stderr
, " %14.6f %-20s (msecs)\n",
245 msecs
, event_name(counter
));
247 fprintf(stderr
, " %14Ld %-20s (events)\n",
248 count
, event_name(counter
));
251 fprintf(stderr
, "\n");
253 fprintf(stderr
, "\n");
254 fprintf(stderr
, " Wall-clock time elapsed: %12.6f msecs\n",
255 (double)(t1
-t0
)/1e6
);
256 fprintf(stderr
, "\n");