perf_counter tools: move helper library to util/*
[linux-2.6/linux-acpi-2.6/ibm-acpi-2.6.git] / Documentation / perf_counter / builtin-stat.c
blobd7ace631fc4ff10a9b43152335b9cb958fb6f38d
1 /*
2 * kerneltop.c: show top kernel functions - performance counters showcase
4 Build with:
6 cc -O6 -Wall -c -o kerneltop.o kerneltop.c -lrt
8 Sample output:
10 ------------------------------------------------------------------------------
11 KernelTop: 2669 irqs/sec [NMI, cache-misses/cache-refs], (all, cpu: 2)
12 ------------------------------------------------------------------------------
14 weight RIP kernel function
15 ______ ________________ _______________
17 35.20 - ffffffff804ce74b : skb_copy_and_csum_dev
18 33.00 - ffffffff804cb740 : sock_alloc_send_skb
19 31.26 - ffffffff804ce808 : skb_push
20 22.43 - ffffffff80510004 : tcp_established_options
21 19.00 - ffffffff8027d250 : find_get_page
22 15.76 - ffffffff804e4fc9 : eth_type_trans
23 15.20 - ffffffff804d8baa : dst_release
24 14.86 - ffffffff804cf5d8 : skb_release_head_state
25 14.00 - ffffffff802217d5 : read_hpet
26 12.00 - ffffffff804ffb7f : __ip_local_out
27 11.97 - ffffffff804fc0c8 : ip_local_deliver_finish
28 8.54 - ffffffff805001a3 : ip_queue_xmit
32 * perfstat: /usr/bin/time -alike performance counter statistics utility
34 It summarizes the counter events of all tasks (and child tasks),
35 covering all CPUs that the command (or workload) executes on.
36 It only counts the per-task events of the workload started,
37 independent of how many other tasks run on those CPUs.
39 Sample output:
41 $ ./perfstat -e 1 -e 3 -e 5 ls -lR /usr/include/ >/dev/null
43 Performance counter stats for 'ls':
45 163516953 instructions
46 2295 cache-misses
47 2855182 branch-misses
51 * Copyright (C) 2008, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
53 * Improvements and fixes by:
55 * Arjan van de Ven <arjan@linux.intel.com>
56 * Yanmin Zhang <yanmin.zhang@intel.com>
57 * Wu Fengguang <fengguang.wu@intel.com>
58 * Mike Galbraith <efault@gmx.de>
59 * Paul Mackerras <paulus@samba.org>
61 * Released under the GPL v2. (and only v2, not any later version)
64 #include "util/util.h"
66 #include <getopt.h>
67 #include <assert.h>
68 #include <fcntl.h>
69 #include <stdio.h>
70 #include <errno.h>
71 #include <ctype.h>
72 #include <time.h>
73 #include <sched.h>
74 #include <pthread.h>
76 #include <sys/syscall.h>
77 #include <sys/ioctl.h>
78 #include <sys/poll.h>
79 #include <sys/prctl.h>
80 #include <sys/wait.h>
81 #include <sys/uio.h>
82 #include <sys/mman.h>
84 #include <linux/unistd.h>
85 #include <linux/types.h>
87 #include "../../include/linux/perf_counter.h"
91 * prctl(PR_TASK_PERF_COUNTERS_DISABLE) will (cheaply) disable all
92 * counters in the current task.
94 #define PR_TASK_PERF_COUNTERS_DISABLE 31
95 #define PR_TASK_PERF_COUNTERS_ENABLE 32
97 #define rdclock() \
98 ({ \
99 struct timespec ts; \
101 clock_gettime(CLOCK_MONOTONIC, &ts); \
102 ts.tv_sec * 1000000000ULL + ts.tv_nsec; \
106 * Pick up some kernel type conventions:
108 #define __user
109 #define asmlinkage
111 #ifdef __x86_64__
112 #define __NR_perf_counter_open 295
113 #define rmb() asm volatile("lfence" ::: "memory")
114 #define cpu_relax() asm volatile("rep; nop" ::: "memory");
115 #endif
117 #ifdef __i386__
118 #define __NR_perf_counter_open 333
119 #define rmb() asm volatile("lfence" ::: "memory")
120 #define cpu_relax() asm volatile("rep; nop" ::: "memory");
121 #endif
123 #ifdef __powerpc__
124 #define __NR_perf_counter_open 319
125 #define rmb() asm volatile ("sync" ::: "memory")
126 #define cpu_relax() asm volatile ("" ::: "memory");
127 #endif
129 #define unlikely(x) __builtin_expect(!!(x), 0)
130 #define min(x, y) ({ \
131 typeof(x) _min1 = (x); \
132 typeof(y) _min2 = (y); \
133 (void) (&_min1 == &_min2); \
134 _min1 < _min2 ? _min1 : _min2; })
136 extern asmlinkage int sys_perf_counter_open(
137 struct perf_counter_hw_event *hw_event_uptr __user,
138 pid_t pid,
139 int cpu,
140 int group_fd,
141 unsigned long flags);
143 #define MAX_COUNTERS 64
144 #define MAX_NR_CPUS 256
146 #define EID(type, id) (((__u64)(type) << PERF_COUNTER_TYPE_SHIFT) | (id))
148 static int system_wide = 0;
150 static int nr_counters = 0;
151 static __u64 event_id[MAX_COUNTERS] = {
152 EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK),
153 EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES),
154 EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS),
155 EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS),
157 EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES),
158 EID(PERF_TYPE_HARDWARE, PERF_COUNT_INSTRUCTIONS),
159 EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_REFERENCES),
160 EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_MISSES),
162 static int default_interval = 100000;
163 static int event_count[MAX_COUNTERS];
164 static int fd[MAX_NR_CPUS][MAX_COUNTERS];
166 static int tid = -1;
167 static int profile_cpu = -1;
168 static int nr_cpus = 0;
169 static int nmi = 1;
170 static int group = 0;
171 static unsigned int page_size;
173 static int zero;
175 static int scale;
177 static const unsigned int default_count[] = {
178 1000000,
179 1000000,
180 10000,
181 10000,
182 1000000,
183 10000,
186 static char *hw_event_names[] = {
187 "CPU cycles",
188 "instructions",
189 "cache references",
190 "cache misses",
191 "branches",
192 "branch misses",
193 "bus cycles",
196 static char *sw_event_names[] = {
197 "cpu clock ticks",
198 "task clock ticks",
199 "pagefaults",
200 "context switches",
201 "CPU migrations",
202 "minor faults",
203 "major faults",
206 struct event_symbol {
207 __u64 event;
208 char *symbol;
211 static struct event_symbol event_symbols[] = {
212 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES), "cpu-cycles", },
213 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES), "cycles", },
214 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_INSTRUCTIONS), "instructions", },
215 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_REFERENCES), "cache-references", },
216 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_MISSES), "cache-misses", },
217 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_INSTRUCTIONS), "branch-instructions", },
218 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_INSTRUCTIONS), "branches", },
219 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_MISSES), "branch-misses", },
220 {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BUS_CYCLES), "bus-cycles", },
222 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_CLOCK), "cpu-clock", },
223 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK), "task-clock", },
224 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS), "page-faults", },
225 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS), "faults", },
226 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS_MIN), "minor-faults", },
227 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS_MAJ), "major-faults", },
228 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES), "context-switches", },
229 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES), "cs", },
230 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS), "cpu-migrations", },
231 {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS), "migrations", },
234 #define __PERF_COUNTER_FIELD(config, name) \
235 ((config & PERF_COUNTER_##name##_MASK) >> PERF_COUNTER_##name##_SHIFT)
237 #define PERF_COUNTER_RAW(config) __PERF_COUNTER_FIELD(config, RAW)
238 #define PERF_COUNTER_CONFIG(config) __PERF_COUNTER_FIELD(config, CONFIG)
239 #define PERF_COUNTER_TYPE(config) __PERF_COUNTER_FIELD(config, TYPE)
240 #define PERF_COUNTER_ID(config) __PERF_COUNTER_FIELD(config, EVENT)
242 static void display_events_help(void)
244 unsigned int i;
245 __u64 e;
247 printf(
248 " -e EVENT --event=EVENT # symbolic-name abbreviations");
250 for (i = 0; i < ARRAY_SIZE(event_symbols); i++) {
251 int type, id;
253 e = event_symbols[i].event;
254 type = PERF_COUNTER_TYPE(e);
255 id = PERF_COUNTER_ID(e);
257 printf("\n %d:%d: %-20s",
258 type, id, event_symbols[i].symbol);
261 printf("\n"
262 " rNNN: raw PMU events (eventsel+umask)\n\n");
265 static void display_help(void)
267 printf(
268 "Usage: perfstat [<events...>] <cmd...>\n\n"
269 "PerfStat Options (up to %d event types can be specified):\n\n",
270 MAX_COUNTERS);
272 display_events_help();
274 printf(
275 " -l # scale counter values\n"
276 " -a # system-wide collection\n");
277 exit(0);
280 static char *event_name(int ctr)
282 __u64 config = event_id[ctr];
283 int type = PERF_COUNTER_TYPE(config);
284 int id = PERF_COUNTER_ID(config);
285 static char buf[32];
287 if (PERF_COUNTER_RAW(config)) {
288 sprintf(buf, "raw 0x%llx", PERF_COUNTER_CONFIG(config));
289 return buf;
292 switch (type) {
293 case PERF_TYPE_HARDWARE:
294 if (id < PERF_HW_EVENTS_MAX)
295 return hw_event_names[id];
296 return "unknown-hardware";
298 case PERF_TYPE_SOFTWARE:
299 if (id < PERF_SW_EVENTS_MAX)
300 return sw_event_names[id];
301 return "unknown-software";
303 default:
304 break;
307 return "unknown";
311 * Each event can have multiple symbolic names.
312 * Symbolic names are (almost) exactly matched.
314 static __u64 match_event_symbols(char *str)
316 __u64 config, id;
317 int type;
318 unsigned int i;
320 if (sscanf(str, "r%llx", &config) == 1)
321 return config | PERF_COUNTER_RAW_MASK;
323 if (sscanf(str, "%d:%llu", &type, &id) == 2)
324 return EID(type, id);
326 for (i = 0; i < ARRAY_SIZE(event_symbols); i++) {
327 if (!strncmp(str, event_symbols[i].symbol,
328 strlen(event_symbols[i].symbol)))
329 return event_symbols[i].event;
332 return ~0ULL;
335 static int parse_events(char *str)
337 __u64 config;
339 again:
340 if (nr_counters == MAX_COUNTERS)
341 return -1;
343 config = match_event_symbols(str);
344 if (config == ~0ULL)
345 return -1;
347 event_id[nr_counters] = config;
348 nr_counters++;
350 str = strstr(str, ",");
351 if (str) {
352 str++;
353 goto again;
356 return 0;
361 * perfstat
364 char fault_here[1000000];
366 static void create_perfstat_counter(int counter)
368 struct perf_counter_hw_event hw_event;
370 memset(&hw_event, 0, sizeof(hw_event));
371 hw_event.config = event_id[counter];
372 hw_event.record_type = 0;
373 hw_event.nmi = 0;
374 if (scale)
375 hw_event.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
376 PERF_FORMAT_TOTAL_TIME_RUNNING;
378 if (system_wide) {
379 int cpu;
380 for (cpu = 0; cpu < nr_cpus; cpu ++) {
381 fd[cpu][counter] = sys_perf_counter_open(&hw_event, -1, cpu, -1, 0);
382 if (fd[cpu][counter] < 0) {
383 printf("perfstat error: syscall returned with %d (%s)\n",
384 fd[cpu][counter], strerror(errno));
385 exit(-1);
388 } else {
389 hw_event.inherit = 1;
390 hw_event.disabled = 1;
392 fd[0][counter] = sys_perf_counter_open(&hw_event, 0, -1, -1, 0);
393 if (fd[0][counter] < 0) {
394 printf("perfstat error: syscall returned with %d (%s)\n",
395 fd[0][counter], strerror(errno));
396 exit(-1);
401 int do_perfstat(int argc, char *argv[])
403 unsigned long long t0, t1;
404 int counter;
405 ssize_t res;
406 int status;
407 int pid;
409 if (!system_wide)
410 nr_cpus = 1;
412 for (counter = 0; counter < nr_counters; counter++)
413 create_perfstat_counter(counter);
415 argc -= optind;
416 argv += optind;
418 if (!argc)
419 display_help();
422 * Enable counters and exec the command:
424 t0 = rdclock();
425 prctl(PR_TASK_PERF_COUNTERS_ENABLE);
427 if ((pid = fork()) < 0)
428 perror("failed to fork");
429 if (!pid) {
430 if (execvp(argv[0], argv)) {
431 perror(argv[0]);
432 exit(-1);
435 while (wait(&status) >= 0)
437 prctl(PR_TASK_PERF_COUNTERS_DISABLE);
438 t1 = rdclock();
440 fflush(stdout);
442 fprintf(stderr, "\n");
443 fprintf(stderr, " Performance counter stats for \'%s\':\n",
444 argv[0]);
445 fprintf(stderr, "\n");
447 for (counter = 0; counter < nr_counters; counter++) {
448 int cpu, nv;
449 __u64 count[3], single_count[3];
450 int scaled;
452 count[0] = count[1] = count[2] = 0;
453 nv = scale ? 3 : 1;
454 for (cpu = 0; cpu < nr_cpus; cpu ++) {
455 res = read(fd[cpu][counter],
456 single_count, nv * sizeof(__u64));
457 assert(res == nv * sizeof(__u64));
459 count[0] += single_count[0];
460 if (scale) {
461 count[1] += single_count[1];
462 count[2] += single_count[2];
466 scaled = 0;
467 if (scale) {
468 if (count[2] == 0) {
469 fprintf(stderr, " %14s %-20s\n",
470 "<not counted>", event_name(counter));
471 continue;
473 if (count[2] < count[1]) {
474 scaled = 1;
475 count[0] = (unsigned long long)
476 ((double)count[0] * count[1] / count[2] + 0.5);
480 if (event_id[counter] == EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_CLOCK) ||
481 event_id[counter] == EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK)) {
483 double msecs = (double)count[0] / 1000000;
485 fprintf(stderr, " %14.6f %-20s (msecs)",
486 msecs, event_name(counter));
487 } else {
488 fprintf(stderr, " %14Ld %-20s (events)",
489 count[0], event_name(counter));
491 if (scaled)
492 fprintf(stderr, " (scaled from %.2f%%)",
493 (double) count[2] / count[1] * 100);
494 fprintf(stderr, "\n");
496 fprintf(stderr, "\n");
497 fprintf(stderr, " Wall-clock time elapsed: %12.6f msecs\n",
498 (double)(t1-t0)/1e6);
499 fprintf(stderr, "\n");
501 return 0;
504 static void process_options(int argc, char **argv)
506 int error = 0, counter;
508 for (;;) {
509 int option_index = 0;
510 /** Options for getopt */
511 static struct option long_options[] = {
512 {"count", required_argument, NULL, 'c'},
513 {"cpu", required_argument, NULL, 'C'},
514 {"delay", required_argument, NULL, 'd'},
515 {"dump_symtab", no_argument, NULL, 'D'},
516 {"event", required_argument, NULL, 'e'},
517 {"filter", required_argument, NULL, 'f'},
518 {"group", required_argument, NULL, 'g'},
519 {"help", no_argument, NULL, 'h'},
520 {"nmi", required_argument, NULL, 'n'},
521 {"munmap_info", no_argument, NULL, 'U'},
522 {"pid", required_argument, NULL, 'p'},
523 {"realtime", required_argument, NULL, 'r'},
524 {"scale", no_argument, NULL, 'l'},
525 {"symbol", required_argument, NULL, 's'},
526 {"stat", no_argument, NULL, 'S'},
527 {"vmlinux", required_argument, NULL, 'x'},
528 {"zero", no_argument, NULL, 'z'},
529 {NULL, 0, NULL, 0 }
531 int c = getopt_long(argc, argv, "+:ac:C:d:De:f:g:hln:m:p:r:s:Sx:zMU",
532 long_options, &option_index);
533 if (c == -1)
534 break;
536 switch (c) {
537 case 'a': system_wide = 1; break;
538 case 'c': default_interval = atoi(optarg); break;
539 case 'C':
540 /* CPU and PID are mutually exclusive */
541 if (tid != -1) {
542 printf("WARNING: CPU switch overriding PID\n");
543 sleep(1);
544 tid = -1;
546 profile_cpu = atoi(optarg); break;
548 case 'e': error = parse_events(optarg); break;
550 case 'g': group = atoi(optarg); break;
551 case 'h': display_help(); break;
552 case 'l': scale = 1; break;
553 case 'n': nmi = atoi(optarg); break;
554 case 'p':
555 /* CPU and PID are mutually exclusive */
556 if (profile_cpu != -1) {
557 printf("WARNING: PID switch overriding CPU\n");
558 sleep(1);
559 profile_cpu = -1;
561 tid = atoi(optarg); break;
562 case 'z': zero = 1; break;
563 default: error = 1; break;
566 if (error)
567 display_help();
569 if (!nr_counters) {
570 nr_counters = 8;
573 for (counter = 0; counter < nr_counters; counter++) {
574 if (event_count[counter])
575 continue;
577 event_count[counter] = default_interval;
581 int cmd_stat(int argc, char **argv, const char *prefix)
583 page_size = sysconf(_SC_PAGE_SIZE);
585 process_options(argc, argv);
587 nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
588 assert(nr_cpus <= MAX_NR_CPUS);
589 assert(nr_cpus >= 0);
591 return do_perfstat(argc, argv);