perf_counter tools: fix event_id type
[firewire-audio.git] / Documentation / perf_counter / kerneltop.c
blob7bfb0f0d8005bb83863c66be03ef090f5180ee0c
1 /*
2 * kerneltop.c: show top kernel functions - performance counters showcase
4 Build with:
6 cc -O6 -Wall -lrt `pkg-config --cflags --libs glib-2.0` -o kerneltop kerneltop.c
8 Sample output:
10 ------------------------------------------------------------------------------
11 KernelTop: 2669 irqs/sec [NMI, cache-misses/cache-refs], (all, cpu: 2)
12 ------------------------------------------------------------------------------
14 weight RIP kernel function
15 ______ ________________ _______________
17 35.20 - ffffffff804ce74b : skb_copy_and_csum_dev
18 33.00 - ffffffff804cb740 : sock_alloc_send_skb
19 31.26 - ffffffff804ce808 : skb_push
20 22.43 - ffffffff80510004 : tcp_established_options
21 19.00 - ffffffff8027d250 : find_get_page
22 15.76 - ffffffff804e4fc9 : eth_type_trans
23 15.20 - ffffffff804d8baa : dst_release
24 14.86 - ffffffff804cf5d8 : skb_release_head_state
25 14.00 - ffffffff802217d5 : read_hpet
26 12.00 - ffffffff804ffb7f : __ip_local_out
27 11.97 - ffffffff804fc0c8 : ip_local_deliver_finish
28 8.54 - ffffffff805001a3 : ip_queue_xmit
32 * perfstat: /usr/bin/time -alike performance counter statistics utility
34 It summarizes the counter events of all tasks (and child tasks),
35 covering all CPUs that the command (or workload) executes on.
36 It only counts the per-task events of the workload started,
37 independent of how many other tasks run on those CPUs.
39 Sample output:
41 $ ./perfstat -e 1 -e 3 -e 5 ls -lR /usr/include/ >/dev/null
43 Performance counter stats for 'ls':
45 163516953 instructions
46 2295 cache-misses
47 2855182 branch-misses
51 * Copyright (C) 2008, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
53 * Improvements and fixes by:
55 * Arjan van de Ven <arjan@linux.intel.com>
56 * Yanmin Zhang <yanmin.zhang@intel.com>
57 * Wu Fengguang <fengguang.wu@intel.com>
58 * Mike Galbraith <efault@gmx.de>
60 * Released under the GPL v2. (and only v2, not any later version)
63 #define _GNU_SOURCE
64 #include <sys/types.h>
65 #include <sys/stat.h>
66 #include <sys/time.h>
67 #include <unistd.h>
68 #include <stdint.h>
69 #include <stdlib.h>
70 #include <string.h>
71 #include <getopt.h>
72 #include <assert.h>
73 #include <fcntl.h>
74 #include <stdio.h>
75 #include <errno.h>
76 #include <ctype.h>
77 #include <time.h>
79 #include <glib.h>
81 #include <sys/syscall.h>
82 #include <sys/ioctl.h>
83 #include <sys/poll.h>
84 #include <sys/prctl.h>
85 #include <sys/wait.h>
86 #include <sys/uio.h>
88 #include <linux/unistd.h>
90 #include "perfcounters.h"
93 #define MAX_COUNTERS 64
94 #define MAX_NR_CPUS 256
96 #define DEF_PERFSTAT_EVENTS { -2, -5, -4, -3, 0, 1, 2, 3}
98 static int run_perfstat = 0;
99 static int system_wide = 0;
101 static int nr_counters = 0;
102 static __s64 event_id[MAX_COUNTERS] = DEF_PERFSTAT_EVENTS;
103 static int event_raw[MAX_COUNTERS];
104 static int event_count[MAX_COUNTERS];
105 static int fd[MAX_NR_CPUS][MAX_COUNTERS];
107 static __u64 count_filter = 100;
109 static int tid = -1;
110 static int profile_cpu = -1;
111 static int nr_cpus = 0;
112 static int nmi = 1;
113 static int group = 0;
115 static char *vmlinux;
117 static char *sym_filter;
118 static unsigned long filter_start;
119 static unsigned long filter_end;
121 static int delay_secs = 2;
122 static int zero;
123 static int dump_symtab;
125 static GList *lines;
127 struct source_line {
128 uint64_t EIP;
129 unsigned long count;
130 char *line;
134 const unsigned int default_count[] = {
135 1000000,
136 1000000,
137 10000,
138 10000,
139 1000000,
140 10000,
143 static char *hw_event_names[] = {
144 "CPU cycles",
145 "instructions",
146 "cache references",
147 "cache misses",
148 "branches",
149 "branch misses",
150 "bus cycles",
153 static char *sw_event_names[] = {
154 "cpu clock ticks",
155 "task clock ticks",
156 "pagefaults",
157 "context switches",
158 "CPU migrations",
161 struct event_symbol {
162 int event;
163 char *symbol;
166 static struct event_symbol event_symbols[] = {
167 {PERF_COUNT_CPU_CYCLES, "cpu-cycles", },
168 {PERF_COUNT_CPU_CYCLES, "cycles", },
169 {PERF_COUNT_INSTRUCTIONS, "instructions", },
170 {PERF_COUNT_CACHE_REFERENCES, "cache-references", },
171 {PERF_COUNT_CACHE_MISSES, "cache-misses", },
172 {PERF_COUNT_BRANCH_INSTRUCTIONS, "branch-instructions", },
173 {PERF_COUNT_BRANCH_INSTRUCTIONS, "branches", },
174 {PERF_COUNT_BRANCH_MISSES, "branch-misses", },
175 {PERF_COUNT_BUS_CYCLES, "bus-cycles", },
176 {PERF_COUNT_CPU_CLOCK, "cpu-ticks", },
177 {PERF_COUNT_CPU_CLOCK, "ticks", },
178 {PERF_COUNT_TASK_CLOCK, "task-ticks", },
179 {PERF_COUNT_PAGE_FAULTS, "page-faults", },
180 {PERF_COUNT_PAGE_FAULTS, "faults", },
181 {PERF_COUNT_CONTEXT_SWITCHES, "context-switches", },
182 {PERF_COUNT_CONTEXT_SWITCHES, "cs", },
183 {PERF_COUNT_CPU_MIGRATIONS, "cpu-migrations", },
184 {PERF_COUNT_CPU_MIGRATIONS, "migrations", },
187 static void display_events_help(void)
189 unsigned int i;
190 int e;
192 printf(
193 " -e EVENT --event=EVENT # symbolic-name abbreviations");
195 for (i = 0, e = PERF_HW_EVENTS_MAX; i < ARRAY_SIZE(event_symbols); i++) {
196 if (e != event_symbols[i].event) {
197 e = event_symbols[i].event;
198 printf(
199 "\n %2d: %-20s", e, event_symbols[i].symbol);
200 } else
201 printf(" %s", event_symbols[i].symbol);
204 printf("\n"
205 " rNNN: raw PMU events (eventsel+umask)\n\n");
208 static void display_perfstat_help(void)
210 printf(
211 "Usage: perfstat [<events...>] <cmd...>\n\n"
212 "PerfStat Options (up to %d event types can be specified):\n\n",
213 MAX_COUNTERS);
215 display_events_help();
217 printf(
218 " -a # system-wide collection\n");
219 exit(0);
222 static void display_help(void)
224 if (run_perfstat)
225 return display_perfstat_help();
227 printf(
228 "Usage: kerneltop [<options>]\n"
229 " Or: kerneltop -S [<options>] COMMAND [ARGS]\n\n"
230 "KernelTop Options (up to %d event types can be specified at once):\n\n",
231 MAX_COUNTERS);
233 display_events_help();
235 printf(
236 " -S --stat # perfstat COMMAND\n"
237 " -a # system-wide collection (for perfstat)\n\n"
238 " -c CNT --count=CNT # event period to sample\n\n"
239 " -C CPU --cpu=CPU # CPU (-1 for all) [default: -1]\n"
240 " -p PID --pid=PID # PID of sampled task (-1 for all) [default: -1]\n\n"
241 " -d delay --delay=<seconds> # sampling/display delay [default: 2]\n"
242 " -f CNT --filter=CNT # min-event-count filter [default: 100]\n\n"
243 " -s symbol --symbol=<symbol> # function to be showed annotated one-shot\n"
244 " -x path --vmlinux=<path> # the vmlinux binary, required for -s use\n"
245 " -z --zero # zero counts after display\n"
246 " -D --dump_symtab # dump symbol table to stderr on startup\n"
249 exit(0);
252 static int type_valid(int type)
254 if (type >= PERF_HW_EVENTS_MAX)
255 return 0;
256 if (type <= PERF_SW_EVENTS_MIN)
257 return 0;
259 return 1;
262 static char *event_name(int ctr)
264 __s64 type = event_id[ctr];
265 static char buf[32];
267 if (event_raw[ctr]) {
268 sprintf(buf, "raw 0x%llx", (long long)type);
269 return buf;
271 if (!type_valid(type))
272 return "unknown";
274 if (type >= 0)
275 return hw_event_names[type];
277 return sw_event_names[-type-1];
281 * Each event can have multiple symbolic names.
282 * Symbolic names are (almost) exactly matched.
284 static int match_event_symbols(char *str)
286 unsigned int i;
288 if (isdigit(str[0]) || str[0] == '-')
289 return atoi(str);
291 for (i = 0; i < ARRAY_SIZE(event_symbols); i++) {
292 if (!strncmp(str, event_symbols[i].symbol,
293 strlen(event_symbols[i].symbol)))
294 return event_symbols[i].event;
297 return PERF_HW_EVENTS_MAX;
300 static int parse_events(char *str)
302 __s64 type;
303 int raw;
305 again:
306 if (nr_counters == MAX_COUNTERS)
307 return -1;
309 raw = 0;
310 if (*str == 'r') {
311 raw = 1;
312 ++str;
313 type = strtol(str, NULL, 16);
314 } else {
315 type = match_event_symbols(str);
316 if (!type_valid(type))
317 return -1;
320 event_id[nr_counters] = type;
321 event_raw[nr_counters] = raw;
322 nr_counters++;
324 str = strstr(str, ",");
325 if (str) {
326 str++;
327 goto again;
330 return 0;
335 * perfstat
338 char fault_here[1000000];
340 static void create_perfstat_counter(int counter)
342 struct perf_counter_hw_event hw_event;
344 memset(&hw_event, 0, sizeof(hw_event));
345 hw_event.type = event_id[counter];
346 hw_event.raw = event_raw[counter];
347 hw_event.record_type = PERF_RECORD_SIMPLE;
348 hw_event.nmi = 0;
350 if (system_wide) {
351 int cpu;
352 for (cpu = 0; cpu < nr_cpus; cpu ++) {
353 fd[cpu][counter] = sys_perf_counter_open(&hw_event, -1, cpu, -1, 0);
354 if (fd[cpu][counter] < 0) {
355 printf("perfstat error: syscall returned with %d (%s)\n",
356 fd[cpu][counter], strerror(errno));
357 exit(-1);
360 } else {
361 hw_event.inherit = 1;
362 hw_event.disabled = 1;
364 fd[0][counter] = sys_perf_counter_open(&hw_event, 0, -1, -1, 0);
365 if (fd[0][counter] < 0) {
366 printf("perfstat error: syscall returned with %d (%s)\n",
367 fd[0][counter], strerror(errno));
368 exit(-1);
373 int do_perfstat(int argc, char *argv[])
375 unsigned long long t0, t1;
376 int counter;
377 ssize_t res;
378 int status;
379 int pid;
381 if (!system_wide)
382 nr_cpus = 1;
384 for (counter = 0; counter < nr_counters; counter++)
385 create_perfstat_counter(counter);
387 argc -= optind;
388 argv += optind;
391 * Enable counters and exec the command:
393 t0 = rdclock();
394 prctl(PR_TASK_PERF_COUNTERS_ENABLE);
396 if ((pid = fork()) < 0)
397 perror("failed to fork");
398 if (!pid) {
399 if (execvp(argv[0], argv)) {
400 perror(argv[0]);
401 exit(-1);
404 while (wait(&status) >= 0)
406 prctl(PR_TASK_PERF_COUNTERS_DISABLE);
407 t1 = rdclock();
409 fflush(stdout);
411 fprintf(stderr, "\n");
412 fprintf(stderr, " Performance counter stats for \'%s\':\n",
413 argv[0]);
414 fprintf(stderr, "\n");
416 for (counter = 0; counter < nr_counters; counter++) {
417 int cpu;
418 __u64 count, single_count;
420 count = 0;
421 for (cpu = 0; cpu < nr_cpus; cpu ++) {
422 res = read(fd[cpu][counter],
423 (char *) &single_count, sizeof(single_count));
424 assert(res == sizeof(single_count));
425 count += single_count;
428 if (!event_raw[counter] &&
429 (event_id[counter] == PERF_COUNT_CPU_CLOCK ||
430 event_id[counter] == PERF_COUNT_TASK_CLOCK)) {
432 double msecs = (double)count / 1000000;
434 fprintf(stderr, " %14.6f %-20s (msecs)\n",
435 msecs, event_name(counter));
436 } else {
437 fprintf(stderr, " %14Ld %-20s (events)\n",
438 count, event_name(counter));
440 if (!counter)
441 fprintf(stderr, "\n");
443 fprintf(stderr, "\n");
444 fprintf(stderr, " Wall-clock time elapsed: %12.6f msecs\n",
445 (double)(t1-t0)/1e6);
446 fprintf(stderr, "\n");
448 return 0;
452 * Symbols
455 static uint64_t min_ip;
456 static uint64_t max_ip = -1ll;
458 struct sym_entry {
459 unsigned long long addr;
460 char *sym;
461 unsigned long count[MAX_COUNTERS];
462 int skip;
463 GList *source;
466 #define MAX_SYMS 100000
468 static int sym_table_count;
470 struct sym_entry *sym_filter_entry;
472 static struct sym_entry sym_table[MAX_SYMS];
474 static void show_details(struct sym_entry *sym);
477 * Ordering weight: count-1 * count-2 * ... / count-n
479 static double sym_weight(const struct sym_entry *sym)
481 double weight;
482 int counter;
484 weight = sym->count[0];
486 for (counter = 1; counter < nr_counters-1; counter++)
487 weight *= sym->count[counter];
489 weight /= (sym->count[counter] + 1);
491 return weight;
494 static int compare(const void *__sym1, const void *__sym2)
496 const struct sym_entry *sym1 = __sym1, *sym2 = __sym2;
498 return sym_weight(sym1) < sym_weight(sym2);
501 static time_t last_refresh;
502 static long events;
503 static long userspace_events;
504 static const char CONSOLE_CLEAR[] = "\e[H\e[2J";
506 static struct sym_entry tmp[MAX_SYMS];
508 static void print_sym_table(void)
510 int i, printed;
511 int counter;
512 float events_per_sec = events/delay_secs;
513 float kevents_per_sec = (events-userspace_events)/delay_secs;
515 memcpy(tmp, sym_table, sizeof(sym_table[0])*sym_table_count);
516 qsort(tmp, sym_table_count, sizeof(tmp[0]), compare);
518 write(1, CONSOLE_CLEAR, strlen(CONSOLE_CLEAR));
520 printf(
521 "------------------------------------------------------------------------------\n");
522 printf( " KernelTop:%8.0f irqs/sec kernel:%3.1f%% [%s, ",
523 events_per_sec,
524 100.0 - (100.0*((events_per_sec-kevents_per_sec)/events_per_sec)),
525 nmi ? "NMI" : "IRQ");
527 if (nr_counters == 1)
528 printf("%d ", event_count[0]);
530 for (counter = 0; counter < nr_counters; counter++) {
531 if (counter)
532 printf("/");
534 printf("%s", event_name(counter));
537 printf( "], ");
539 if (tid != -1)
540 printf(" (tid: %d", tid);
541 else
542 printf(" (all");
544 if (profile_cpu != -1)
545 printf(", cpu: %d)\n", profile_cpu);
546 else {
547 if (tid != -1)
548 printf(")\n");
549 else
550 printf(", %d CPUs)\n", nr_cpus);
553 printf("------------------------------------------------------------------------------\n\n");
555 if (nr_counters == 1)
556 printf(" events");
557 else
558 printf(" weight events");
560 printf(" RIP kernel function\n"
561 " ______ ______ ________________ _______________\n\n"
564 printed = 0;
565 for (i = 0; i < sym_table_count; i++) {
566 int count;
568 if (nr_counters == 1) {
569 if (printed <= 18 &&
570 tmp[i].count[0] >= count_filter) {
571 printf("%19.2f - %016llx : %s\n",
572 sym_weight(tmp + i), tmp[i].addr, tmp[i].sym);
573 printed++;
575 } else {
576 if (printed <= 18 &&
577 tmp[i].count[0] >= count_filter) {
578 printf("%8.1f %10ld - %016llx : %s\n",
579 sym_weight(tmp + i),
580 tmp[i].count[0],
581 tmp[i].addr, tmp[i].sym);
582 printed++;
586 * Add decay to the counts:
588 for (count = 0; count < nr_counters; count++)
589 sym_table[i].count[count] = zero ? 0 : sym_table[i].count[count] * 7 / 8;
592 if (sym_filter_entry)
593 show_details(sym_filter_entry);
595 last_refresh = time(NULL);
598 struct pollfd stdin_poll = { .fd = 0, .events = POLLIN };
600 if (poll(&stdin_poll, 1, 0) == 1) {
601 printf("key pressed - exiting.\n");
602 exit(0);
607 static int read_symbol(FILE *in, struct sym_entry *s)
609 static int filter_match = 0;
610 char *sym, stype;
611 char str[500];
612 int rc, pos;
614 rc = fscanf(in, "%llx %c %499s", &s->addr, &stype, str);
615 if (rc == EOF)
616 return -1;
618 assert(rc == 3);
620 /* skip until end of line: */
621 pos = strlen(str);
622 do {
623 rc = fgetc(in);
624 if (rc == '\n' || rc == EOF || pos >= 499)
625 break;
626 str[pos] = rc;
627 pos++;
628 } while (1);
629 str[pos] = 0;
631 sym = str;
633 /* Filter out known duplicates and non-text symbols. */
634 if (!strcmp(sym, "_text"))
635 return 1;
636 if (!min_ip && !strcmp(sym, "_stext"))
637 return 1;
638 if (!strcmp(sym, "_etext") || !strcmp(sym, "_sinittext"))
639 return 1;
640 if (stype != 'T' && stype != 't')
641 return 1;
642 if (!strncmp("init_module", sym, 11) || !strncmp("cleanup_module", sym, 14))
643 return 1;
644 if (strstr(sym, "_text_start") || strstr(sym, "_text_end"))
645 return 1;
647 s->sym = malloc(strlen(str));
648 assert(s->sym);
650 strcpy((char *)s->sym, str);
651 s->skip = 0;
653 /* Tag events to be skipped. */
654 if (!strcmp("default_idle", s->sym) || !strcmp("cpu_idle", s->sym))
655 s->skip = 1;
656 if (!strcmp("enter_idle", s->sym) || !strcmp("exit_idle", s->sym))
657 s->skip = 1;
659 if (filter_match == 1) {
660 filter_end = s->addr;
661 filter_match = -1;
662 if (filter_end - filter_start > 10000) {
663 printf("hm, too large filter symbol <%s> - skipping.\n",
664 sym_filter);
665 printf("symbol filter start: %016lx\n", filter_start);
666 printf(" end: %016lx\n", filter_end);
667 filter_end = filter_start = 0;
668 sym_filter = NULL;
669 sleep(1);
672 if (filter_match == 0 && sym_filter && !strcmp(s->sym, sym_filter)) {
673 filter_match = 1;
674 filter_start = s->addr;
677 return 0;
680 int compare_addr(const void *__sym1, const void *__sym2)
682 const struct sym_entry *sym1 = __sym1, *sym2 = __sym2;
684 return sym1->addr > sym2->addr;
687 static void sort_symbol_table(void)
689 int i, dups;
691 do {
692 qsort(sym_table, sym_table_count, sizeof(sym_table[0]), compare_addr);
693 for (i = 0, dups = 0; i < sym_table_count; i++) {
694 if (sym_table[i].addr == sym_table[i+1].addr) {
695 sym_table[i+1].addr = -1ll;
696 dups++;
699 sym_table_count -= dups;
700 } while(dups);
703 static void parse_symbols(void)
705 struct sym_entry *last;
707 FILE *kallsyms = fopen("/proc/kallsyms", "r");
709 if (!kallsyms) {
710 printf("Could not open /proc/kallsyms - no CONFIG_KALLSYMS_ALL=y?\n");
711 exit(-1);
714 while (!feof(kallsyms)) {
715 if (read_symbol(kallsyms, &sym_table[sym_table_count]) == 0) {
716 sym_table_count++;
717 assert(sym_table_count <= MAX_SYMS);
721 sort_symbol_table();
722 min_ip = sym_table[0].addr;
723 max_ip = sym_table[sym_table_count-1].addr;
724 last = sym_table + sym_table_count++;
726 last->addr = -1ll;
727 last->sym = "<end>";
729 if (filter_end) {
730 int count;
731 for (count=0; count < sym_table_count; count ++) {
732 if (!strcmp(sym_table[count].sym, sym_filter)) {
733 sym_filter_entry = &sym_table[count];
734 break;
738 if (dump_symtab) {
739 int i;
741 for (i = 0; i < sym_table_count; i++)
742 fprintf(stderr, "%llx %s\n",
743 sym_table[i].addr, sym_table[i].sym);
748 * Source lines
751 static void parse_vmlinux(char *filename)
753 FILE *file;
754 char command[PATH_MAX*2];
755 if (!filename)
756 return;
758 sprintf(command, "objdump --start-address=0x%016lx --stop-address=0x%016lx -dS %s", filter_start, filter_end, filename);
760 file = popen(command, "r");
761 if (!file)
762 return;
764 while (!feof(file)) {
765 struct source_line *src;
766 size_t dummy = 0;
767 char *c;
769 src = malloc(sizeof(struct source_line));
770 assert(src != NULL);
771 memset(src, 0, sizeof(struct source_line));
773 if (getline(&src->line, &dummy, file) < 0)
774 break;
775 if (!src->line)
776 break;
778 c = strchr(src->line, '\n');
779 if (c)
780 *c = 0;
782 lines = g_list_prepend(lines, src);
784 if (strlen(src->line)>8 && src->line[8] == ':')
785 src->EIP = strtoull(src->line, NULL, 16);
786 if (strlen(src->line)>8 && src->line[16] == ':')
787 src->EIP = strtoull(src->line, NULL, 16);
789 pclose(file);
790 lines = g_list_reverse(lines);
793 static void record_precise_ip(uint64_t ip)
795 struct source_line *line;
796 GList *item;
798 item = g_list_first(lines);
799 while (item) {
800 line = item->data;
801 if (line->EIP == ip)
802 line->count++;
803 if (line->EIP > ip)
804 break;
805 item = g_list_next(item);
809 static void lookup_sym_in_vmlinux(struct sym_entry *sym)
811 struct source_line *line;
812 GList *item;
813 char pattern[PATH_MAX];
814 sprintf(pattern, "<%s>:", sym->sym);
816 item = g_list_first(lines);
817 while (item) {
818 line = item->data;
819 if (strstr(line->line, pattern)) {
820 sym->source = item;
821 break;
823 item = g_list_next(item);
827 void show_lines(GList *item_queue, int item_queue_count)
829 int i;
830 struct source_line *line;
832 for (i = 0; i < item_queue_count; i++) {
833 line = item_queue->data;
834 printf("%8li\t%s\n", line->count, line->line);
835 item_queue = g_list_next(item_queue);
839 #define TRACE_COUNT 3
841 static void show_details(struct sym_entry *sym)
843 struct source_line *line;
844 GList *item;
845 int displayed = 0;
846 GList *item_queue = NULL;
847 int item_queue_count = 0;
849 if (!sym->source)
850 lookup_sym_in_vmlinux(sym);
851 if (!sym->source)
852 return;
854 printf("Showing details for %s\n", sym->sym);
856 item = sym->source;
857 while (item) {
858 line = item->data;
859 if (displayed && strstr(line->line, ">:"))
860 break;
862 if (!item_queue_count)
863 item_queue = item;
864 item_queue_count ++;
866 if (line->count >= count_filter) {
867 show_lines(item_queue, item_queue_count);
868 item_queue_count = 0;
869 item_queue = NULL;
870 } else if (item_queue_count > TRACE_COUNT) {
871 item_queue = g_list_next(item_queue);
872 item_queue_count --;
875 line->count = 0;
876 displayed++;
877 if (displayed > 300)
878 break;
879 item = g_list_next(item);
884 * Binary search in the histogram table and record the hit:
886 static void record_ip(uint64_t ip, int counter)
888 int left_idx, middle_idx, right_idx, idx;
889 unsigned long left, middle, right;
891 record_precise_ip(ip);
893 left_idx = 0;
894 right_idx = sym_table_count-1;
895 assert(ip <= max_ip && ip >= min_ip);
897 while (left_idx + 1 < right_idx) {
898 middle_idx = (left_idx + right_idx) / 2;
900 left = sym_table[ left_idx].addr;
901 middle = sym_table[middle_idx].addr;
902 right = sym_table[ right_idx].addr;
904 if (!(left <= middle && middle <= right)) {
905 printf("%016lx...\n%016lx...\n%016lx\n", left, middle, right);
906 printf("%d %d %d\n", left_idx, middle_idx, right_idx);
908 assert(left <= middle && middle <= right);
909 if (!(left <= ip && ip <= right)) {
910 printf(" left: %016lx\n", left);
911 printf(" ip: %016lx\n", ip);
912 printf("right: %016lx\n", right);
914 assert(left <= ip && ip <= right);
916 * [ left .... target .... middle .... right ]
917 * => right := middle
919 if (ip < middle) {
920 right_idx = middle_idx;
921 continue;
924 * [ left .... middle ... target ... right ]
925 * => left := middle
927 left_idx = middle_idx;
930 idx = left_idx;
932 if (!sym_table[idx].skip)
933 sym_table[idx].count[counter]++;
934 else events--;
937 static void process_event(uint64_t ip, int counter)
939 events++;
941 if (ip < min_ip || ip > max_ip) {
942 userspace_events++;
943 return;
946 record_ip(ip, counter);
949 static void process_options(int argc, char *argv[])
951 int error = 0, counter;
953 if (strstr(argv[0], "perfstat"))
954 run_perfstat = 1;
956 for (;;) {
957 int option_index = 0;
958 /** Options for getopt */
959 static struct option long_options[] = {
960 {"count", required_argument, NULL, 'c'},
961 {"cpu", required_argument, NULL, 'C'},
962 {"delay", required_argument, NULL, 'd'},
963 {"dump_symtab", no_argument, NULL, 'D'},
964 {"event", required_argument, NULL, 'e'},
965 {"filter", required_argument, NULL, 'f'},
966 {"group", required_argument, NULL, 'g'},
967 {"help", no_argument, NULL, 'h'},
968 {"nmi", required_argument, NULL, 'n'},
969 {"pid", required_argument, NULL, 'p'},
970 {"vmlinux", required_argument, NULL, 'x'},
971 {"symbol", required_argument, NULL, 's'},
972 {"stat", no_argument, NULL, 'S'},
973 {"zero", no_argument, NULL, 'z'},
974 {NULL, 0, NULL, 0 }
976 int c = getopt_long(argc, argv, "+:ac:C:d:De:f:g:hn:p:s:Sx:z",
977 long_options, &option_index);
978 if (c == -1)
979 break;
981 switch (c) {
982 case 'a': system_wide = 1; break;
983 case 'c': event_count[nr_counters] = atoi(optarg); break;
984 case 'C':
985 /* CPU and PID are mutually exclusive */
986 if (tid != -1) {
987 printf("WARNING: CPU switch overriding PID\n");
988 sleep(1);
989 tid = -1;
991 profile_cpu = atoi(optarg); break;
992 case 'd': delay_secs = atoi(optarg); break;
993 case 'D': dump_symtab = 1; break;
995 case 'e': error = parse_events(optarg); break;
997 case 'f': count_filter = atoi(optarg); break;
998 case 'g': group = atoi(optarg); break;
999 case 'h': display_help(); break;
1000 case 'n': nmi = atoi(optarg); break;
1001 case 'p':
1002 /* CPU and PID are mutually exclusive */
1003 if (profile_cpu != -1) {
1004 printf("WARNING: PID switch overriding CPU\n");
1005 sleep(1);
1006 profile_cpu = -1;
1008 tid = atoi(optarg); break;
1009 case 's': sym_filter = strdup(optarg); break;
1010 case 'S': run_perfstat = 1; break;
1011 case 'x': vmlinux = strdup(optarg); break;
1012 case 'z': zero = 1; break;
1013 default: error = 1; break;
1016 if (error)
1017 display_help();
1019 if (!nr_counters) {
1020 if (run_perfstat)
1021 nr_counters = 8;
1022 else {
1023 nr_counters = 1;
1024 event_id[0] = 0;
1028 for (counter = 0; counter < nr_counters; counter++) {
1029 if (event_count[counter])
1030 continue;
1032 if (event_id[counter] < PERF_HW_EVENTS_MAX)
1033 event_count[counter] = default_count[event_id[counter]];
1034 else
1035 event_count[counter] = 100000;
1039 int main(int argc, char *argv[])
1041 struct pollfd event_array[MAX_NR_CPUS][MAX_COUNTERS];
1042 struct perf_counter_hw_event hw_event;
1043 int i, counter, group_fd;
1044 unsigned int cpu;
1045 uint64_t ip;
1046 ssize_t res;
1047 int ret;
1049 process_options(argc, argv);
1051 nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
1052 assert(nr_cpus <= MAX_NR_CPUS);
1053 assert(nr_cpus >= 0);
1055 if (run_perfstat)
1056 return do_perfstat(argc, argv);
1058 if (tid != -1 || profile_cpu != -1)
1059 nr_cpus = 1;
1061 for (i = 0; i < nr_cpus; i++) {
1062 group_fd = -1;
1063 for (counter = 0; counter < nr_counters; counter++) {
1065 cpu = profile_cpu;
1066 if (tid == -1 && profile_cpu == -1)
1067 cpu = i;
1069 memset(&hw_event, 0, sizeof(hw_event));
1070 hw_event.type = event_id[counter];
1071 hw_event.raw = event_raw[counter];
1072 hw_event.irq_period = event_count[counter];
1073 hw_event.record_type = PERF_RECORD_IRQ;
1074 hw_event.nmi = nmi;
1076 fd[i][counter] = sys_perf_counter_open(&hw_event, tid, cpu, group_fd, 0);
1077 fcntl(fd[i][counter], F_SETFL, O_NONBLOCK);
1078 if (fd[i][counter] < 0) {
1079 printf("kerneltop error: syscall returned with %d (%s)\n",
1080 fd[i][counter], strerror(-fd[i][counter]));
1081 if (fd[i][counter] == -1)
1082 printf("Are you root?\n");
1083 exit(-1);
1085 assert(fd[i][counter] >= 0);
1088 * First counter acts as the group leader:
1090 if (group && group_fd == -1)
1091 group_fd = fd[i][counter];
1093 event_array[i][counter].fd = fd[i][counter];
1094 event_array[i][counter].events = POLLIN;
1098 parse_symbols();
1099 if (vmlinux && sym_filter_entry)
1100 parse_vmlinux(vmlinux);
1102 printf("KernelTop refresh period: %d seconds\n", delay_secs);
1103 last_refresh = time(NULL);
1105 while (1) {
1106 int hits = events;
1108 for (i = 0; i < nr_cpus; i++) {
1109 for (counter = 0; counter < nr_counters; counter++) {
1110 res = read(fd[i][counter], (char *) &ip, sizeof(ip));
1111 if (res > 0) {
1112 assert(res == sizeof(ip));
1114 process_event(ip, counter);
1119 if (time(NULL) >= last_refresh + delay_secs) {
1120 print_sym_table();
1121 events = userspace_events = 0;
1124 if (hits == events)
1125 ret = poll(event_array[0], nr_cpus, 1000);
1126 hits = events;
1129 return 0;