perf_counter tools: perf stat: make -l default-on
[linux-2.6/libata-dev.git] / Documentation / perf_counter / builtin-stat.c
blob1fde12762ca4d00e704b51afcaaf8cf2d9106de1
1 /*
2 * kerneltop.c: show top kernel functions - performance counters showcase
4 Build with:
6 cc -O6 -Wall -c -o kerneltop.o kerneltop.c -lrt
8 Sample output:
10 ------------------------------------------------------------------------------
11 KernelTop: 2669 irqs/sec [NMI, cache-misses/cache-refs], (all, cpu: 2)
12 ------------------------------------------------------------------------------
14 weight RIP kernel function
15 ______ ________________ _______________
17 35.20 - ffffffff804ce74b : skb_copy_and_csum_dev
18 33.00 - ffffffff804cb740 : sock_alloc_send_skb
19 31.26 - ffffffff804ce808 : skb_push
20 22.43 - ffffffff80510004 : tcp_established_options
21 19.00 - ffffffff8027d250 : find_get_page
22 15.76 - ffffffff804e4fc9 : eth_type_trans
23 15.20 - ffffffff804d8baa : dst_release
24 14.86 - ffffffff804cf5d8 : skb_release_head_state
25 14.00 - ffffffff802217d5 : read_hpet
26 12.00 - ffffffff804ffb7f : __ip_local_out
27 11.97 - ffffffff804fc0c8 : ip_local_deliver_finish
28 8.54 - ffffffff805001a3 : ip_queue_xmit
32 * perfstat: /usr/bin/time -alike performance counter statistics utility
34 It summarizes the counter events of all tasks (and child tasks),
35 covering all CPUs that the command (or workload) executes on.
36 It only counts the per-task events of the workload started,
37 independent of how many other tasks run on those CPUs.
39 Sample output:
41 $ ./perfstat -e 1 -e 3 -e 5 ls -lR /usr/include/ >/dev/null
43 Performance counter stats for 'ls':
45 163516953 instructions
46 2295 cache-misses
47 2855182 branch-misses
51 * Copyright (C) 2008, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
53 * Improvements and fixes by:
55 * Arjan van de Ven <arjan@linux.intel.com>
56 * Yanmin Zhang <yanmin.zhang@intel.com>
57 * Wu Fengguang <fengguang.wu@intel.com>
58 * Mike Galbraith <efault@gmx.de>
59 * Paul Mackerras <paulus@samba.org>
61 * Released under the GPL v2. (and only v2, not any later version)
64 #include "util/util.h"
66 #include <getopt.h>
67 #include <assert.h>
68 #include <fcntl.h>
69 #include <stdio.h>
70 #include <errno.h>
71 #include <time.h>
72 #include <sched.h>
73 #include <pthread.h>
75 #include <sys/syscall.h>
76 #include <sys/ioctl.h>
77 #include <sys/poll.h>
78 #include <sys/prctl.h>
79 #include <sys/wait.h>
80 #include <sys/uio.h>
81 #include <sys/mman.h>
83 #include <linux/unistd.h>
84 #include <linux/types.h>
86 #include "../../include/linux/perf_counter.h"
90 * prctl(PR_TASK_PERF_COUNTERS_DISABLE) will (cheaply) disable all
91 * counters in the current task.
93 #define PR_TASK_PERF_COUNTERS_DISABLE 31
94 #define PR_TASK_PERF_COUNTERS_ENABLE 32
96 #define rdclock() \
97 ({ \
98 struct timespec ts; \
100 clock_gettime(CLOCK_MONOTONIC, &ts); \
101 ts.tv_sec * 1000000000ULL + ts.tv_nsec; \
105 * Pick up some kernel type conventions:
107 #define __user
108 #define asmlinkage
110 #ifdef __x86_64__
111 #define __NR_perf_counter_open 295
112 #define rmb() asm volatile("lfence" ::: "memory")
113 #define cpu_relax() asm volatile("rep; nop" ::: "memory");
114 #endif
116 #ifdef __i386__
117 #define __NR_perf_counter_open 333
118 #define rmb() asm volatile("lfence" ::: "memory")
119 #define cpu_relax() asm volatile("rep; nop" ::: "memory");
120 #endif
122 #ifdef __powerpc__
123 #define __NR_perf_counter_open 319
124 #define rmb() asm volatile ("sync" ::: "memory")
125 #define cpu_relax() asm volatile ("" ::: "memory");
126 #endif
128 #define unlikely(x) __builtin_expect(!!(x), 0)
129 #define min(x, y) ({ \
130 typeof(x) _min1 = (x); \
131 typeof(y) _min2 = (y); \
132 (void) (&_min1 == &_min2); \
133 _min1 < _min2 ? _min1 : _min2; })
135 extern asmlinkage int sys_perf_counter_open(
136 struct perf_counter_hw_event *hw_event_uptr __user,
137 pid_t pid,
138 int cpu,
139 int group_fd,
140 unsigned long flags);
142 #define MAX_COUNTERS 64
143 #define MAX_NR_CPUS 256
145 #define EID(type, id) (((__u64)(type) << PERF_COUNTER_TYPE_SHIFT) | (id))
147 static int system_wide = 0;
149 static int nr_counters = 0;
150 static __u64 event_id[MAX_COUNTERS] = {
151 EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK),
152 EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES),
153 EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS),
154 EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS),
156 EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES),
157 EID(PERF_TYPE_HARDWARE, PERF_COUNT_INSTRUCTIONS),
158 EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_REFERENCES),
159 EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_MISSES),
161 static int default_interval = 100000;
162 static int event_count[MAX_COUNTERS];
163 static int fd[MAX_NR_CPUS][MAX_COUNTERS];
165 static int tid = -1;
166 static int profile_cpu = -1;
167 static int nr_cpus = 0;
168 static int nmi = 1;
169 static int group = 0;
170 static unsigned int page_size;
172 static int zero;
174 static int scale = 1;
176 static const unsigned int default_count[] = {
177 1000000,
178 1000000,
179 10000,
180 10000,
181 1000000,
182 10000,
185 static char *hw_event_names[] = {
186 "CPU cycles",
187 "instructions",
188 "cache references",
189 "cache misses",
190 "branches",
191 "branch misses",
192 "bus cycles",
195 static char *sw_event_names[] = {
196 "cpu clock ticks",
197 "task clock ticks",
198 "pagefaults",
199 "context switches",
200 "CPU migrations",
201 "minor faults",
202 "major faults",
205 struct event_symbol {
206 __u64 event;
207 char *symbol;
210 static struct event_symbol event_symbols[] = {
211 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES), "cpu-cycles", },
212 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES), "cycles", },
213 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_INSTRUCTIONS), "instructions", },
214 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_REFERENCES), "cache-references", },
215 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_MISSES), "cache-misses", },
216 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_INSTRUCTIONS), "branch-instructions", },
217 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_INSTRUCTIONS), "branches", },
218 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_MISSES), "branch-misses", },
219 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BUS_CYCLES), "bus-cycles", },
221 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_CLOCK), "cpu-clock", },
222 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK), "task-clock", },
223 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS), "page-faults", },
224 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS), "faults", },
225 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS_MIN), "minor-faults", },
226 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS_MAJ), "major-faults", },
227 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES), "context-switches", },
228 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES), "cs", },
229 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS), "cpu-migrations", },
230 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS), "migrations", },
233 #define __PERF_COUNTER_FIELD(config, name) \
234 ((config & PERF_COUNTER_##name##_MASK) >> PERF_COUNTER_##name##_SHIFT)
236 #define PERF_COUNTER_RAW(config) __PERF_COUNTER_FIELD(config, RAW)
237 #define PERF_COUNTER_CONFIG(config) __PERF_COUNTER_FIELD(config, CONFIG)
238 #define PERF_COUNTER_TYPE(config) __PERF_COUNTER_FIELD(config, TYPE)
239 #define PERF_COUNTER_ID(config) __PERF_COUNTER_FIELD(config, EVENT)
241 static void display_events_help(void)
243 unsigned int i;
244 __u64 e;
246 printf(
247 " -e EVENT --event=EVENT # symbolic-name abbreviations");
249 for (i = 0; i < ARRAY_SIZE(event_symbols); i++) {
250 int type, id;
252 e = event_symbols[i].event;
253 type = PERF_COUNTER_TYPE(e);
254 id = PERF_COUNTER_ID(e);
256 printf("\n %d:%d: %-20s",
257 type, id, event_symbols[i].symbol);
260 printf("\n"
261 " rNNN: raw PMU events (eventsel+umask)\n\n");
264 static void display_help(void)
266 printf(
267 "Usage: perfstat [<events...>] <cmd...>\n\n"
268 "PerfStat Options (up to %d event types can be specified):\n\n",
269 MAX_COUNTERS);
271 display_events_help();
273 printf(
274 " -l # scale counter values\n"
275 " -a # system-wide collection\n");
276 exit(0);
279 static char *event_name(int ctr)
281 __u64 config = event_id[ctr];
282 int type = PERF_COUNTER_TYPE(config);
283 int id = PERF_COUNTER_ID(config);
284 static char buf[32];
286 if (PERF_COUNTER_RAW(config)) {
287 sprintf(buf, "raw 0x%llx", PERF_COUNTER_CONFIG(config));
288 return buf;
291 switch (type) {
292 case PERF_TYPE_HARDWARE:
293 if (id < PERF_HW_EVENTS_MAX)
294 return hw_event_names[id];
295 return "unknown-hardware";
297 case PERF_TYPE_SOFTWARE:
298 if (id < PERF_SW_EVENTS_MAX)
299 return sw_event_names[id];
300 return "unknown-software";
302 default:
303 break;
306 return "unknown";
310 * Each event can have multiple symbolic names.
311 * Symbolic names are (almost) exactly matched.
313 static __u64 match_event_symbols(char *str)
315 __u64 config, id;
316 int type;
317 unsigned int i;
319 if (sscanf(str, "r%llx", &config) == 1)
320 return config | PERF_COUNTER_RAW_MASK;
322 if (sscanf(str, "%d:%llu", &type, &id) == 2)
323 return EID(type, id);
325 for (i = 0; i < ARRAY_SIZE(event_symbols); i++) {
326 if (!strncmp(str, event_symbols[i].symbol,
327 strlen(event_symbols[i].symbol)))
328 return event_symbols[i].event;
331 return ~0ULL;
334 static int parse_events(char *str)
336 __u64 config;
338 again:
339 if (nr_counters == MAX_COUNTERS)
340 return -1;
342 config = match_event_symbols(str);
343 if (config == ~0ULL)
344 return -1;
346 event_id[nr_counters] = config;
347 nr_counters++;
349 str = strstr(str, ",");
350 if (str) {
351 str++;
352 goto again;
355 return 0;
360 * perfstat
363 char fault_here[1000000];
365 static void create_perfstat_counter(int counter)
367 struct perf_counter_hw_event hw_event;
369 memset(&hw_event, 0, sizeof(hw_event));
370 hw_event.config = event_id[counter];
371 hw_event.record_type = 0;
372 hw_event.nmi = 0;
373 if (scale)
374 hw_event.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
375 PERF_FORMAT_TOTAL_TIME_RUNNING;
377 if (system_wide) {
378 int cpu;
379 for (cpu = 0; cpu < nr_cpus; cpu ++) {
380 fd[cpu][counter] = sys_perf_counter_open(&hw_event, -1, cpu, -1, 0);
381 if (fd[cpu][counter] < 0) {
382 printf("perfstat error: syscall returned with %d (%s)\n",
383 fd[cpu][counter], strerror(errno));
384 exit(-1);
387 } else {
388 hw_event.inherit = 1;
389 hw_event.disabled = 1;
391 fd[0][counter] = sys_perf_counter_open(&hw_event, 0, -1, -1, 0);
392 if (fd[0][counter] < 0) {
393 printf("perfstat error: syscall returned with %d (%s)\n",
394 fd[0][counter], strerror(errno));
395 exit(-1);
400 int do_perfstat(int argc, char *argv[])
402 unsigned long long t0, t1;
403 int counter;
404 ssize_t res;
405 int status;
406 int pid;
408 if (!system_wide)
409 nr_cpus = 1;
411 for (counter = 0; counter < nr_counters; counter++)
412 create_perfstat_counter(counter);
414 argc -= optind;
415 argv += optind;
417 if (!argc)
418 display_help();
421 * Enable counters and exec the command:
423 t0 = rdclock();
424 prctl(PR_TASK_PERF_COUNTERS_ENABLE);
426 if ((pid = fork()) < 0)
427 perror("failed to fork");
428 if (!pid) {
429 if (execvp(argv[0], argv)) {
430 perror(argv[0]);
431 exit(-1);
434 while (wait(&status) >= 0)
436 prctl(PR_TASK_PERF_COUNTERS_DISABLE);
437 t1 = rdclock();
439 fflush(stdout);
441 fprintf(stderr, "\n");
442 fprintf(stderr, " Performance counter stats for \'%s\':\n",
443 argv[0]);
444 fprintf(stderr, "\n");
446 for (counter = 0; counter < nr_counters; counter++) {
447 int cpu, nv;
448 __u64 count[3], single_count[3];
449 int scaled;
451 count[0] = count[1] = count[2] = 0;
452 nv = scale ? 3 : 1;
453 for (cpu = 0; cpu < nr_cpus; cpu ++) {
454 res = read(fd[cpu][counter],
455 single_count, nv * sizeof(__u64));
456 assert(res == nv * sizeof(__u64));
458 count[0] += single_count[0];
459 if (scale) {
460 count[1] += single_count[1];
461 count[2] += single_count[2];
465 scaled = 0;
466 if (scale) {
467 if (count[2] == 0) {
468 fprintf(stderr, " %14s %-20s\n",
469 "<not counted>", event_name(counter));
470 continue;
472 if (count[2] < count[1]) {
473 scaled = 1;
474 count[0] = (unsigned long long)
475 ((double)count[0] * count[1] / count[2] + 0.5);
479 if (event_id[counter] == EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_CLOCK) ||
480 event_id[counter] == EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK)) {
482 double msecs = (double)count[0] / 1000000;
484 fprintf(stderr, " %14.6f %-20s (msecs)",
485 msecs, event_name(counter));
486 } else {
487 fprintf(stderr, " %14Ld %-20s (events)",
488 count[0], event_name(counter));
490 if (scaled)
491 fprintf(stderr, " (scaled from %.2f%%)",
492 (double) count[2] / count[1] * 100);
493 fprintf(stderr, "\n");
495 fprintf(stderr, "\n");
496 fprintf(stderr, " Wall-clock time elapsed: %12.6f msecs\n",
497 (double)(t1-t0)/1e6);
498 fprintf(stderr, "\n");
500 return 0;
503 static void process_options(int argc, char **argv)
505 int error = 0, counter;
507 for (;;) {
508 int option_index = 0;
509 /** Options for getopt */
510 static struct option long_options[] = {
511 {"count", required_argument, NULL, 'c'},
512 {"cpu", required_argument, NULL, 'C'},
513 {"delay", required_argument, NULL, 'd'},
514 {"dump_symtab", no_argument, NULL, 'D'},
515 {"event", required_argument, NULL, 'e'},
516 {"filter", required_argument, NULL, 'f'},
517 {"group", required_argument, NULL, 'g'},
518 {"help", no_argument, NULL, 'h'},
519 {"nmi", required_argument, NULL, 'n'},
520 {"munmap_info", no_argument, NULL, 'U'},
521 {"pid", required_argument, NULL, 'p'},
522 {"realtime", required_argument, NULL, 'r'},
523 {"scale", no_argument, NULL, 'l'},
524 {"symbol", required_argument, NULL, 's'},
525 {"stat", no_argument, NULL, 'S'},
526 {"vmlinux", required_argument, NULL, 'x'},
527 {"zero", no_argument, NULL, 'z'},
528 {NULL, 0, NULL, 0 }
530 int c = getopt_long(argc, argv, "+:ac:C:d:De:f:g:hln:m:p:r:s:Sx:zMU",
531 long_options, &option_index);
532 if (c == -1)
533 break;
535 switch (c) {
536 case 'a': system_wide = 1; break;
537 case 'c': default_interval = atoi(optarg); break;
538 case 'C':
539 /* CPU and PID are mutually exclusive */
540 if (tid != -1) {
541 printf("WARNING: CPU switch overriding PID\n");
542 sleep(1);
543 tid = -1;
545 profile_cpu = atoi(optarg); break;
547 case 'e': error = parse_events(optarg); break;
549 case 'g': group = atoi(optarg); break;
550 case 'h': display_help(); break;
551 case 'l': scale = 1; break;
552 case 'n': nmi = atoi(optarg); break;
553 case 'p':
554 /* CPU and PID are mutually exclusive */
555 if (profile_cpu != -1) {
556 printf("WARNING: PID switch overriding CPU\n");
557 sleep(1);
558 profile_cpu = -1;
560 tid = atoi(optarg); break;
561 case 'z': zero = 1; break;
562 default: error = 1; break;
565 if (error)
566 display_help();
568 if (!nr_counters) {
569 nr_counters = 8;
572 for (counter = 0; counter < nr_counters; counter++) {
573 if (event_count[counter])
574 continue;
576 event_count[counter] = default_interval;
580 int cmd_stat(int argc, char **argv, const char *prefix)
582 page_size = sysconf(_SC_PAGE_SIZE);
584 process_options(argc, argv);
586 nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
587 assert(nr_cpus <= MAX_NR_CPUS);
588 assert(nr_cpus >= 0);
590 return do_perfstat(argc, argv);