Regen for compat 50 update.
[netbsd-mini2440.git] / usr.sbin / lockstat / main.c
bloba57382de176d0dbbf941b984ae9c6a37d2179bfd
1 /* $NetBSD: main.c,v 1.13 2008/04/28 15:36:01 ad Exp $ */
3 /*-
4 * Copyright (c) 2006, 2007 The NetBSD Foundation, Inc.
5 * All rights reserved.
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Andrew Doran.
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
33 * TODO:
35 * - Tracking of times for sleep locks is broken.
36 * - Need better analysis and tracking of events.
37 * - Shouldn't have to parse the namelist here. We should use something like
38 * FreeBSD's libelf.
39 * - The way the namelist is searched sucks, is it worth doing something
40 * better?
43 #include <sys/cdefs.h>
44 #ifndef lint
45 __RCSID("$NetBSD: main.c,v 1.13 2008/04/28 15:36:01 ad Exp $");
46 #endif /* not lint */
48 #include <sys/types.h>
49 #include <sys/param.h>
50 #include <sys/time.h>
51 #include <sys/fcntl.h>
52 #include <sys/ioctl.h>
53 #include <sys/wait.h>
54 #include <sys/signal.h>
55 #include <sys/sysctl.h>
57 #include <dev/lockstat.h>
59 #include <stdio.h>
60 #include <stdlib.h>
61 #include <string.h>
62 #include <limits.h>
63 #include <unistd.h>
64 #include <err.h>
65 #include <paths.h>
66 #include <util.h>
67 #include <ctype.h>
68 #include <errno.h>
69 #include <stdbool.h>
71 #include "extern.h"
73 #define _PATH_DEV_LOCKSTAT "/dev/lockstat"
75 #define MILLI 1000.0
76 #define MICRO 1000000.0
77 #define NANO 1000000000.0
78 #define PICO 1000000000000.0
80 TAILQ_HEAD(lock_head, lockstruct);
81 typedef struct lock_head locklist_t;
82 TAILQ_HEAD(buf_head, lsbuf);
83 typedef struct buf_head buflist_t;
85 typedef struct lockstruct {
86 TAILQ_ENTRY(lockstruct) chain;
87 buflist_t bufs;
88 buflist_t tosort;
89 uintptr_t lock;
90 double time;
91 uint32_t count;
92 u_int flags;
93 u_int nbufs;
94 char name[NAME_SIZE];
95 } lock_t;
97 typedef struct name {
98 const char *name;
99 int mask;
100 } name_t;
102 const name_t locknames[] = {
103 { "adaptive_mutex", LB_ADAPTIVE_MUTEX },
104 { "spin_mutex", LB_SPIN_MUTEX },
105 { "rwlock", LB_RWLOCK },
106 { "kernel_lock", LB_KERNEL_LOCK },
107 { "preemption", LB_NOPREEMPT },
108 { NULL, 0 }
111 const name_t eventnames[] = {
112 { "spin", LB_SPIN },
113 { "sleep_exclusive", LB_SLEEP1 },
114 { "sleep_shared", LB_SLEEP2 },
115 { NULL, 0 },
118 const name_t alltypes[] = {
119 { "Adaptive mutex spin", LB_ADAPTIVE_MUTEX | LB_SPIN },
120 { "Adaptive mutex sleep", LB_ADAPTIVE_MUTEX | LB_SLEEP1 },
121 { "Spin mutex spin", LB_SPIN_MUTEX | LB_SPIN },
122 { "RW lock sleep (writer)", LB_RWLOCK | LB_SLEEP1 },
123 { "RW lock sleep (reader)", LB_RWLOCK | LB_SLEEP2 },
124 { "RW lock spin", LB_RWLOCK | LB_SPIN },
125 { "Kernel lock spin", LB_KERNEL_LOCK | LB_SPIN },
126 { "Kernel preemption defer", LB_NOPREEMPT | LB_SPIN },
127 { NULL, 0 }
130 locklist_t locklist;
131 locklist_t freelist;
132 locklist_t sortlist;
134 lsbuf_t *bufs;
135 lsdisable_t ld;
136 bool lflag;
137 bool fflag;
138 int nbufs;
139 bool cflag;
140 int lsfd;
141 int displayed;
142 int bin64;
143 double tscale;
144 double cscale;
145 double cpuscale[sizeof(ld.ld_freq) / sizeof(ld.ld_freq[0])];
146 FILE *outfp;
148 void findsym(findsym_t, char *, uintptr_t *, uintptr_t *, bool);
149 void spawn(int, char **);
150 void display(int, const char *name);
151 void listnames(const name_t *);
152 void collapse(bool, bool);
153 int matchname(const name_t *, char *);
154 void makelists(int, int);
155 void nullsig(int);
156 void usage(void);
157 int ncpu(void);
158 lock_t *morelocks(void);
161 main(int argc, char **argv)
163 int eventtype, locktype, ch, nlfd, fd, i;
164 bool sflag, pflag, mflag, Mflag;
165 const char *nlistf, *outf;
166 char *lockname, *funcname;
167 const name_t *name;
168 lsenable_t le;
169 double ms;
170 char *p;
172 nlistf = NULL;
173 outf = NULL;
174 lockname = NULL;
175 funcname = NULL;
176 eventtype = -1;
177 locktype = -1;
178 nbufs = 0;
179 sflag = false;
180 pflag = false;
181 mflag = false;
182 Mflag = false;
184 while ((ch = getopt(argc, argv, "E:F:L:MN:T:b:ceflmo:pst")) != -1)
185 switch (ch) {
186 case 'E':
187 eventtype = matchname(eventnames, optarg);
188 break;
189 case 'F':
190 funcname = optarg;
191 break;
192 case 'L':
193 lockname = optarg;
194 break;
195 case 'N':
196 nlistf = optarg;
197 break;
198 case 'T':
199 locktype = matchname(locknames, optarg);
200 break;
201 case 'b':
202 nbufs = (int)strtol(optarg, &p, 0);
203 if (!isdigit((u_int)*optarg) || *p != '\0')
204 usage();
205 break;
206 case 'c':
207 cflag = true;
208 break;
209 case 'e':
210 listnames(eventnames);
211 break;
212 case 'f':
213 fflag = true;
214 break;
215 case 'l':
216 lflag = true;
217 break;
218 case 'm':
219 mflag = true;
220 break;
221 case 'M':
222 Mflag = true;
223 break;
224 case 'o':
225 outf = optarg;
226 break;
227 case 'p':
228 pflag = true;
229 break;
230 case 's':
231 sflag = true;
232 break;
233 case 't':
234 listnames(locknames);
235 break;
236 default:
237 usage();
239 argc -= optind;
240 argv += optind;
242 if (*argv == NULL)
243 usage();
245 if (outf) {
246 fd = open(outf, O_WRONLY | O_CREAT | O_TRUNC, 0600);
247 if (fd == -1)
248 err(EXIT_FAILURE, "opening %s", outf);
249 outfp = fdopen(fd, "w");
250 } else
251 outfp = stdout;
254 * Find the name list for resolving symbol names, and load it into
255 * memory.
257 if (nlistf == NULL) {
258 nlfd = open(_PATH_KSYMS, O_RDONLY);
259 nlistf = getbootfile();
260 } else
261 nlfd = -1;
262 if (nlfd == -1) {
263 if ((nlfd = open(nlistf, O_RDONLY)) < 0)
264 err(EXIT_FAILURE, "cannot open " _PATH_KSYMS " or %s",
265 nlistf);
267 if (loadsym32(nlfd) != 0) {
268 if (loadsym64(nlfd) != 0)
269 errx(EXIT_FAILURE, "unable to load symbol table");
270 bin64 = 1;
272 close(nlfd);
274 memset(&le, 0, sizeof(le));
275 le.le_nbufs = nbufs;
278 * Set up initial filtering.
280 if (lockname != NULL) {
281 findsym(LOCK_BYNAME, lockname, &le.le_lockstart,
282 &le.le_lockend, true);
283 le.le_flags |= LE_ONE_LOCK;
285 if (!lflag)
286 le.le_flags |= LE_CALLSITE;
287 if (!fflag)
288 le.le_flags |= LE_LOCK;
289 if (funcname != NULL) {
290 if (lflag)
291 usage();
292 findsym(FUNC_BYNAME, funcname, &le.le_csstart, &le.le_csend, true);
293 le.le_flags |= LE_ONE_CALLSITE;
295 le.le_mask = (eventtype & LB_EVENT_MASK) | (locktype & LB_LOCK_MASK);
298 * Start tracing.
300 if ((lsfd = open(_PATH_DEV_LOCKSTAT, O_RDONLY)) < 0)
301 err(EXIT_FAILURE, "cannot open " _PATH_DEV_LOCKSTAT);
302 if (ioctl(lsfd, IOC_LOCKSTAT_GVERSION, &ch) < 0)
303 err(EXIT_FAILURE, "ioctl");
304 if (ch != LS_VERSION)
305 errx(EXIT_FAILURE,
306 "incompatible lockstat interface version (%d, kernel %d)",
307 LS_VERSION, ch);
308 if (ioctl(lsfd, IOC_LOCKSTAT_ENABLE, &le))
309 err(EXIT_FAILURE, "cannot enable tracing");
312 * Execute the traced program.
314 spawn(argc, argv);
317 * Stop tracing, and read the trace buffers from the kernel.
319 if (ioctl(lsfd, IOC_LOCKSTAT_DISABLE, &ld) == -1) {
320 if (errno == EOVERFLOW) {
321 warnx("overflowed available kernel trace buffers");
322 exit(EXIT_FAILURE);
324 err(EXIT_FAILURE, "cannot disable tracing");
326 if ((bufs = malloc(ld.ld_size)) == NULL)
327 err(EXIT_FAILURE, "cannot allocate memory for user buffers");
328 if (read(lsfd, bufs, ld.ld_size) != ld.ld_size)
329 err(EXIT_FAILURE, "reading from " _PATH_DEV_LOCKSTAT);
330 if (close(lsfd))
331 err(EXIT_FAILURE, "close(" _PATH_DEV_LOCKSTAT ")");
334 * Figure out how to scale the results. For internal use we convert
335 * all times from CPU frequency based to picoseconds, and values are
336 * eventually displayed in ms.
338 for (i = 0; i < sizeof(ld.ld_freq) / sizeof(ld.ld_freq[0]); i++)
339 if (ld.ld_freq[i] != 0)
340 cpuscale[i] = PICO / ld.ld_freq[i];
341 ms = ld.ld_time.tv_sec * MILLI + ld.ld_time.tv_nsec / MICRO;
342 if (pflag)
343 cscale = 1.0 / ncpu();
344 else
345 cscale = 1.0;
346 cscale *= (sflag ? MILLI / ms : 1.0);
347 tscale = cscale / NANO;
348 nbufs = (int)(ld.ld_size / sizeof(lsbuf_t));
350 TAILQ_INIT(&locklist);
351 TAILQ_INIT(&sortlist);
352 TAILQ_INIT(&freelist);
354 if ((mflag | Mflag) != 0)
355 collapse(mflag, Mflag);
358 * Display the results.
360 fprintf(outfp, "Elapsed time: %.2f seconds.", ms / MILLI);
361 if (sflag || pflag) {
362 fprintf(outfp, " Displaying ");
363 if (pflag)
364 fprintf(outfp, "per-CPU ");
365 if (sflag)
366 fprintf(outfp, "per-second ");
367 fprintf(outfp, "averages.");
369 putc('\n', outfp);
371 for (name = alltypes; name->name != NULL; name++) {
372 if (eventtype != -1 &&
373 (name->mask & LB_EVENT_MASK) != eventtype)
374 continue;
375 if (locktype != -1 &&
376 (name->mask & LB_LOCK_MASK) != locktype)
377 continue;
379 display(name->mask, name->name);
382 if (displayed == 0)
383 fprintf(outfp, "None of the selected events were recorded.\n");
384 exit(EXIT_SUCCESS);
387 void
388 usage(void)
391 fprintf(stderr,
392 "%s: usage:\n"
393 "%s [options] <command>\n\n"
394 "-b nbuf\t\tset number of event buffers to allocate\n"
395 "-c\t\treport percentage of total events by count, not time\n"
396 "-E event\t\tdisplay only one type of event\n"
397 "-e\t\tlist event types\n"
398 "-F func\t\tlimit trace to one function\n"
399 "-f\t\ttrace only by function\n"
400 "-L lock\t\tlimit trace to one lock (name, or address)\n"
401 "-l\t\ttrace only by lock\n"
402 "-M\t\tmerge lock addresses within unique objects\n"
403 "-m\t\tmerge call sites within unique functions\n"
404 "-N nlist\tspecify name list file\n"
405 "-o file\t\tsend output to named file, not stdout\n"
406 "-p\t\tshow average count/time per CPU, not total\n"
407 "-s\t\tshow average count/time per second, not total\n"
408 "-T type\t\tdisplay only one type of lock\n"
409 "-t\t\tlist lock types\n",
410 getprogname(), getprogname());
412 exit(EXIT_FAILURE);
415 void
416 nullsig(int junk)
419 (void)junk;
422 void
423 listnames(const name_t *name)
426 for (; name->name != NULL; name++)
427 printf("%s\n", name->name);
429 exit(EXIT_SUCCESS);
433 matchname(const name_t *name, char *string)
435 int empty, mask;
436 char *sp;
438 empty = 1;
439 mask = 0;
441 while ((sp = strsep(&string, ",")) != NULL) {
442 if (*sp == '\0')
443 usage();
445 for (; name->name != NULL; name++) {
446 if (strcasecmp(name->name, sp) == 0) {
447 mask |= name->mask;
448 break;
451 if (name->name == NULL)
452 errx(EXIT_FAILURE, "unknown identifier `%s'", sp);
453 empty = 0;
456 if (empty)
457 usage();
459 return mask;
463 * Return the number of CPUs in the running system.
466 ncpu(void)
468 int rv, mib[2];
469 size_t varlen;
471 mib[0] = CTL_HW;
472 mib[1] = HW_NCPU;
473 varlen = sizeof(rv);
474 if (sysctl(mib, 2, &rv, &varlen, NULL, (size_t)0) < 0)
475 rv = 1;
477 return (rv);
481 * Call into the ELF parser and look up a symbol by name or by address.
483 void
484 findsym(findsym_t find, char *name, uintptr_t *start, uintptr_t *end, bool chg)
486 uintptr_t tend, sa, ea;
487 char *p;
488 int rv;
490 if (!chg) {
491 sa = *start;
492 start = &sa;
493 end = &ea;
496 if (end == NULL)
497 end = &tend;
499 if (find == LOCK_BYNAME) {
500 if (isdigit((u_int)name[0])) {
501 *start = (uintptr_t)strtoul(name, &p, 0);
502 if (*p == '\0')
503 return;
507 if (bin64)
508 rv = findsym64(find, name, start, end);
509 else
510 rv = findsym32(find, name, start, end);
512 if (find == FUNC_BYNAME || find == LOCK_BYNAME) {
513 if (rv == -1)
514 errx(EXIT_FAILURE, "unable to find symbol `%s'", name);
515 return;
518 if (rv == -1)
519 snprintf(name, NAME_SIZE, "%016lx", (long)*start);
523 * Fork off the child process and wait for it to complete. We trap SIGINT
524 * so that the caller can use Ctrl-C to stop tracing early and still get
525 * useful results.
527 void
528 spawn(int argc, char **argv)
530 pid_t pid;
532 switch (pid = fork()) {
533 case 0:
534 close(lsfd);
535 if (execvp(argv[0], argv) == -1)
536 err(EXIT_FAILURE, "cannot exec");
537 break;
538 case -1:
539 err(EXIT_FAILURE, "cannot fork to exec");
540 break;
541 default:
542 signal(SIGINT, nullsig);
543 wait(NULL);
544 signal(SIGINT, SIG_DFL);
545 break;
550 * Allocate a new block of lock_t structures.
552 lock_t *
553 morelocks(void)
555 const static int batch = 32;
556 lock_t *l, *lp, *max;
558 l = (lock_t *)malloc(sizeof(*l) * batch);
560 for (lp = l, max = l + batch; lp < max; lp++)
561 TAILQ_INSERT_TAIL(&freelist, lp, chain);
563 return l;
567 * Collapse addresses from unique objects.
569 void
570 collapse(bool func, bool lock)
572 lsbuf_t *lb, *max;
574 for (lb = bufs, max = bufs + nbufs; lb < max; lb++) {
575 if (func && lb->lb_callsite != 0) {
576 findsym(FUNC_BYADDR, NULL, &lb->lb_callsite, NULL,
577 true);
579 if (lock && lb->lb_lock != 0) {
580 findsym(LOCK_BYADDR, NULL, &lb->lb_lock, NULL,
581 true);
587 * From the kernel supplied data, construct two dimensional lists of locks
588 * and event buffers, indexed by lock type and sorted by event type.
590 void
591 makelists(int mask, int event)
593 lsbuf_t *lb, *lb2, *max;
594 lock_t *l, *l2;
595 int type;
598 * Recycle lock_t structures from the last run.
600 while ((l = TAILQ_FIRST(&locklist)) != NULL) {
601 TAILQ_REMOVE(&locklist, l, chain);
602 TAILQ_INSERT_HEAD(&freelist, l, chain);
605 type = mask & LB_LOCK_MASK;
607 for (lb = bufs, max = bufs + nbufs; lb < max; lb++) {
608 if ((lb->lb_flags & LB_LOCK_MASK) != type ||
609 lb->lb_counts[event] == 0)
610 continue;
613 * Look for a record descibing this lock, and allocate a
614 * new one if needed.
616 TAILQ_FOREACH(l, &sortlist, chain) {
617 if (l->lock == lb->lb_lock)
618 break;
620 if (l == NULL) {
621 if ((l = TAILQ_FIRST(&freelist)) == NULL)
622 l = morelocks();
623 TAILQ_REMOVE(&freelist, l, chain);
624 l->flags = lb->lb_flags;
625 l->lock = lb->lb_lock;
626 l->nbufs = 0;
627 l->name[0] = '\0';
628 l->count = 0;
629 l->time = 0;
630 TAILQ_INIT(&l->tosort);
631 TAILQ_INIT(&l->bufs);
632 TAILQ_INSERT_TAIL(&sortlist, l, chain);
636 * Scale the time values per buffer and summarise
637 * times+counts per lock.
639 lb->lb_times[event] *= cpuscale[lb->lb_cpu];
640 l->count += lb->lb_counts[event];
641 l->time += lb->lb_times[event];
644 * Merge same lock+callsite pairs from multiple CPUs
645 * together.
647 TAILQ_FOREACH(lb2, &l->tosort, lb_chain.tailq) {
648 if (lb->lb_callsite == lb2->lb_callsite)
649 break;
651 if (lb2 != NULL) {
652 lb2->lb_counts[event] += lb->lb_counts[event];
653 lb2->lb_times[event] += lb->lb_times[event];
654 } else {
655 TAILQ_INSERT_HEAD(&l->tosort, lb, lb_chain.tailq);
656 l->nbufs++;
661 * Now sort the lists.
663 while ((l = TAILQ_FIRST(&sortlist)) != NULL) {
664 TAILQ_REMOVE(&sortlist, l, chain);
667 * Sort the buffers into the per-lock list.
669 while ((lb = TAILQ_FIRST(&l->tosort)) != NULL) {
670 TAILQ_REMOVE(&l->tosort, lb, lb_chain.tailq);
672 lb2 = TAILQ_FIRST(&l->bufs);
673 while (lb2 != NULL) {
674 if (cflag) {
675 if (lb->lb_counts[event] >
676 lb2->lb_counts[event])
677 break;
678 } else if (lb->lb_times[event] >
679 lb2->lb_times[event])
680 break;
681 lb2 = TAILQ_NEXT(lb2, lb_chain.tailq);
683 if (lb2 == NULL)
684 TAILQ_INSERT_TAIL(&l->bufs, lb,
685 lb_chain.tailq);
686 else
687 TAILQ_INSERT_BEFORE(lb2, lb, lb_chain.tailq);
691 * Sort this lock into the per-type list, based on the
692 * totals per lock.
694 l2 = TAILQ_FIRST(&locklist);
695 while (l2 != NULL) {
696 if (cflag) {
697 if (l->count > l2->count)
698 break;
699 } else if (l->time > l2->time)
700 break;
701 l2 = TAILQ_NEXT(l2, chain);
703 if (l2 == NULL)
704 TAILQ_INSERT_TAIL(&locklist, l, chain);
705 else
706 TAILQ_INSERT_BEFORE(l2, l, chain);
711 * Display a summary table for one lock type / event type pair.
713 void
714 display(int mask, const char *name)
716 lock_t *l;
717 lsbuf_t *lb;
718 double pcscale, metric;
719 char fname[NAME_SIZE];
720 int event;
722 event = (mask & LB_EVENT_MASK) - 1;
723 makelists(mask, event);
725 if (TAILQ_EMPTY(&locklist))
726 return;
728 fprintf(outfp, "\n-- %s\n\n"
729 "Total%% Count Time/ms Lock Caller\n"
730 "------ ------- --------- ---------------------- ------------------------------\n",
731 name);
734 * Sum up all events for this type of lock + event.
736 pcscale = 0;
737 TAILQ_FOREACH(l, &locklist, chain) {
738 if (cflag)
739 pcscale += l->count;
740 else
741 pcscale += l->time;
742 displayed++;
744 if (pcscale == 0)
745 pcscale = 100;
746 else
747 pcscale = (100.0 / pcscale);
750 * For each lock, print a summary total, followed by a breakdown by
751 * caller.
753 TAILQ_FOREACH(l, &locklist, chain) {
754 if (cflag)
755 metric = l->count;
756 else
757 metric = l->time;
758 metric *= pcscale;
760 if (l->name[0] == '\0')
761 findsym(LOCK_BYADDR, l->name, &l->lock, NULL, false);
763 if (lflag || l->nbufs > 1)
764 fprintf(outfp, "%6.2f %7d %9.2f %-22s <all>\n",
765 metric, (int)(l->count * cscale),
766 l->time * tscale, l->name);
768 if (lflag)
769 continue;
771 TAILQ_FOREACH(lb, &l->bufs, lb_chain.tailq) {
772 if (cflag)
773 metric = lb->lb_counts[event];
774 else
775 metric = lb->lb_times[event];
776 metric *= pcscale;
778 findsym(FUNC_BYADDR, fname, &lb->lb_callsite, NULL,
779 false);
780 fprintf(outfp, "%6.2f %7d %9.2f %-22s %s\n",
781 metric, (int)(lb->lb_counts[event] * cscale),
782 lb->lb_times[event] * tscale, l->name, fname);