5 #include "util/cache.h"
6 #include "util/symbol.h"
7 #include "util/thread.h"
8 #include "util/header.h"
10 #include "util/parse-options.h"
11 #include "util/trace-event.h"
13 #include "util/debug.h"
14 #include "util/session.h"
16 #include <sys/types.h>
17 #include <sys/prctl.h>
18 #include <semaphore.h>
23 #include <linux/list.h>
24 #include <linux/hash.h>
26 /* based on kernel/lockdep.c */
27 #define LOCKHASH_BITS 12
28 #define LOCKHASH_SIZE (1UL << LOCKHASH_BITS)
30 static struct list_head lockhash_table
[LOCKHASH_SIZE
];
32 #define __lockhashfn(key) hash_long((unsigned long)key, LOCKHASH_BITS)
33 #define lockhashentry(key) (lockhash_table + __lockhashfn((key)))
35 #define LOCK_STATE_UNLOCKED 0 /* initial state */
36 #define LOCK_STATE_LOCKED 1
39 struct list_head hash_entry
;
40 struct rb_node rb
; /* used for sorting */
43 * FIXME: raw_field_value() returns unsigned long long,
44 * so address of lockdep_map should be dealed as 64bit.
45 * Is there more better solution?
47 void *addr
; /* address of lockdep_map, used as ID */
48 char *name
; /* for strcpy(), we cannot use const */
51 u64 prev_event_time
; /* timestamp of previous event */
53 unsigned int nr_acquired
;
54 unsigned int nr_acquire
;
55 unsigned int nr_contended
;
56 unsigned int nr_release
;
58 /* these times are in nano sec. */
64 /* build simple key function one is bigger than two */
65 #define SINGLE_KEY(member) \
66 static int lock_stat_key_ ## member(struct lock_stat *one, \
67 struct lock_stat *two) \
69 return one->member > two->member; \
72 SINGLE_KEY(nr_acquired
)
73 SINGLE_KEY(nr_contended
)
74 SINGLE_KEY(wait_time_total
)
75 SINGLE_KEY(wait_time_min
)
76 SINGLE_KEY(wait_time_max
)
80 * name: the value for specify by user
81 * this should be simpler than raw name of member
82 * e.g. nr_acquired -> acquired, wait_time_total -> wait_total
85 int (*key
)(struct lock_stat
*, struct lock_stat
*);
88 static const char *sort_key
= "acquired";
90 static int (*compare
)(struct lock_stat
*, struct lock_stat
*);
92 static struct rb_root result
; /* place to store sorted data */
94 #define DEF_KEY_LOCK(name, fn_suffix) \
95 { #name, lock_stat_key_ ## fn_suffix }
96 struct lock_key keys
[] = {
97 DEF_KEY_LOCK(acquired
, nr_acquired
),
98 DEF_KEY_LOCK(contended
, nr_contended
),
99 DEF_KEY_LOCK(wait_total
, wait_time_total
),
100 DEF_KEY_LOCK(wait_min
, wait_time_min
),
101 DEF_KEY_LOCK(wait_max
, wait_time_max
),
103 /* extra comparisons much complicated should be here */
108 static void select_key(void)
112 for (i
= 0; keys
[i
].name
; i
++) {
113 if (!strcmp(keys
[i
].name
, sort_key
)) {
114 compare
= keys
[i
].key
;
119 die("Unknown compare key:%s\n", sort_key
);
122 static void insert_to_result(struct lock_stat
*st
,
123 int (*bigger
)(struct lock_stat
*, struct lock_stat
*))
125 struct rb_node
**rb
= &result
.rb_node
;
126 struct rb_node
*parent
= NULL
;
130 p
= container_of(*rb
, struct lock_stat
, rb
);
134 rb
= &(*rb
)->rb_left
;
136 rb
= &(*rb
)->rb_right
;
139 rb_link_node(&st
->rb
, parent
, rb
);
140 rb_insert_color(&st
->rb
, &result
);
143 /* returns left most element of result, and erase it */
144 static struct lock_stat
*pop_from_result(void)
146 struct rb_node
*node
= result
.rb_node
;
151 while (node
->rb_left
)
152 node
= node
->rb_left
;
154 rb_erase(node
, &result
);
155 return container_of(node
, struct lock_stat
, rb
);
158 static struct lock_stat
*lock_stat_findnew(void *addr
, const char *name
)
160 struct list_head
*entry
= lockhashentry(addr
);
161 struct lock_stat
*ret
, *new;
163 list_for_each_entry(ret
, entry
, hash_entry
) {
164 if (ret
->addr
== addr
)
168 new = zalloc(sizeof(struct lock_stat
));
173 new->name
= zalloc(sizeof(char) * strlen(name
) + 1);
176 strcpy(new->name
, name
);
178 /* LOCK_STATE_UNLOCKED == 0 isn't guaranteed forever */
179 new->state
= LOCK_STATE_UNLOCKED
;
180 new->wait_time_min
= ULLONG_MAX
;
182 list_add(&new->hash_entry
, entry
);
186 die("memory allocation failed\n");
189 static char const *input_name
= "perf.data";
191 static int profile_cpu
= -1;
193 struct raw_event_sample
{
198 struct trace_acquire_event
{
203 struct trace_acquired_event
{
208 struct trace_contended_event
{
213 struct trace_release_event
{
218 struct trace_lock_handler
{
219 void (*acquire_event
)(struct trace_acquire_event
*,
223 struct thread
*thread
);
225 void (*acquired_event
)(struct trace_acquired_event
*,
229 struct thread
*thread
);
231 void (*contended_event
)(struct trace_contended_event
*,
235 struct thread
*thread
);
237 void (*release_event
)(struct trace_release_event
*,
241 struct thread
*thread
);
245 report_lock_acquire_event(struct trace_acquire_event
*acquire_event
,
246 struct event
*__event __used
,
249 struct thread
*thread __used
)
251 struct lock_stat
*st
;
253 st
= lock_stat_findnew(acquire_event
->addr
, acquire_event
->name
);
256 case LOCK_STATE_UNLOCKED
:
258 case LOCK_STATE_LOCKED
:
265 st
->prev_event_time
= timestamp
;
269 report_lock_acquired_event(struct trace_acquired_event
*acquired_event
,
270 struct event
*__event __used
,
273 struct thread
*thread __used
)
275 struct lock_stat
*st
;
277 st
= lock_stat_findnew(acquired_event
->addr
, acquired_event
->name
);
280 case LOCK_STATE_UNLOCKED
:
281 st
->state
= LOCK_STATE_LOCKED
;
284 case LOCK_STATE_LOCKED
:
291 st
->prev_event_time
= timestamp
;
295 report_lock_contended_event(struct trace_contended_event
*contended_event
,
296 struct event
*__event __used
,
299 struct thread
*thread __used
)
301 struct lock_stat
*st
;
303 st
= lock_stat_findnew(contended_event
->addr
, contended_event
->name
);
306 case LOCK_STATE_UNLOCKED
:
308 case LOCK_STATE_LOCKED
:
316 st
->prev_event_time
= timestamp
;
320 report_lock_release_event(struct trace_release_event
*release_event
,
321 struct event
*__event __used
,
324 struct thread
*thread __used
)
326 struct lock_stat
*st
;
329 st
= lock_stat_findnew(release_event
->addr
, release_event
->name
);
332 case LOCK_STATE_UNLOCKED
:
334 case LOCK_STATE_LOCKED
:
335 st
->state
= LOCK_STATE_UNLOCKED
;
336 hold_time
= timestamp
- st
->prev_event_time
;
338 if (timestamp
< st
->prev_event_time
) {
339 /* terribly, this can happen... */
343 if (st
->wait_time_min
> hold_time
)
344 st
->wait_time_min
= hold_time
;
345 if (st
->wait_time_max
< hold_time
)
346 st
->wait_time_max
= hold_time
;
347 st
->wait_time_total
+= hold_time
;
357 st
->prev_event_time
= timestamp
;
360 /* lock oriented handlers */
361 /* TODO: handlers for CPU oriented, thread oriented */
362 static struct trace_lock_handler report_lock_ops
= {
363 .acquire_event
= report_lock_acquire_event
,
364 .acquired_event
= report_lock_acquired_event
,
365 .contended_event
= report_lock_contended_event
,
366 .release_event
= report_lock_release_event
,
369 static struct trace_lock_handler
*trace_handler
;
372 process_lock_acquire_event(void *data
,
373 struct event
*event __used
,
375 u64 timestamp __used
,
376 struct thread
*thread __used
)
378 struct trace_acquire_event acquire_event
;
379 u64 tmp
; /* this is required for casting... */
381 tmp
= raw_field_value(event
, "lockdep_addr", data
);
382 memcpy(&acquire_event
.addr
, &tmp
, sizeof(void *));
383 acquire_event
.name
= (char *)raw_field_ptr(event
, "name", data
);
385 if (trace_handler
->acquire_event
)
386 trace_handler
->acquire_event(&acquire_event
, event
, cpu
, timestamp
, thread
);
390 process_lock_acquired_event(void *data
,
391 struct event
*event __used
,
393 u64 timestamp __used
,
394 struct thread
*thread __used
)
396 struct trace_acquired_event acquired_event
;
397 u64 tmp
; /* this is required for casting... */
399 tmp
= raw_field_value(event
, "lockdep_addr", data
);
400 memcpy(&acquired_event
.addr
, &tmp
, sizeof(void *));
401 acquired_event
.name
= (char *)raw_field_ptr(event
, "name", data
);
403 if (trace_handler
->acquire_event
)
404 trace_handler
->acquired_event(&acquired_event
, event
, cpu
, timestamp
, thread
);
408 process_lock_contended_event(void *data
,
409 struct event
*event __used
,
411 u64 timestamp __used
,
412 struct thread
*thread __used
)
414 struct trace_contended_event contended_event
;
415 u64 tmp
; /* this is required for casting... */
417 tmp
= raw_field_value(event
, "lockdep_addr", data
);
418 memcpy(&contended_event
.addr
, &tmp
, sizeof(void *));
419 contended_event
.name
= (char *)raw_field_ptr(event
, "name", data
);
421 if (trace_handler
->acquire_event
)
422 trace_handler
->contended_event(&contended_event
, event
, cpu
, timestamp
, thread
);
426 process_lock_release_event(void *data
,
427 struct event
*event __used
,
429 u64 timestamp __used
,
430 struct thread
*thread __used
)
432 struct trace_release_event release_event
;
433 u64 tmp
; /* this is required for casting... */
435 tmp
= raw_field_value(event
, "lockdep_addr", data
);
436 memcpy(&release_event
.addr
, &tmp
, sizeof(void *));
437 release_event
.name
= (char *)raw_field_ptr(event
, "name", data
);
439 if (trace_handler
->acquire_event
)
440 trace_handler
->release_event(&release_event
, event
, cpu
, timestamp
, thread
);
444 process_raw_event(void *data
, int cpu
,
445 u64 timestamp
, struct thread
*thread
)
450 type
= trace_parse_common_type(data
);
451 event
= trace_find_event(type
);
453 if (!strcmp(event
->name
, "lock_acquire"))
454 process_lock_acquire_event(data
, event
, cpu
, timestamp
, thread
);
455 if (!strcmp(event
->name
, "lock_acquired"))
456 process_lock_acquired_event(data
, event
, cpu
, timestamp
, thread
);
457 if (!strcmp(event
->name
, "lock_contended"))
458 process_lock_contended_event(data
, event
, cpu
, timestamp
, thread
);
459 if (!strcmp(event
->name
, "lock_release"))
460 process_lock_release_event(data
, event
, cpu
, timestamp
, thread
);
463 struct raw_event_queue
{
467 struct thread
*thread
;
468 struct list_head list
;
471 static LIST_HEAD(raw_event_head
);
473 #define FLUSH_PERIOD (5 * NSEC_PER_SEC)
475 static u64 flush_limit
= ULLONG_MAX
;
476 static u64 last_flush
= 0;
477 struct raw_event_queue
*last_inserted
;
479 static void flush_raw_event_queue(u64 limit
)
481 struct raw_event_queue
*tmp
, *iter
;
483 list_for_each_entry_safe(iter
, tmp
, &raw_event_head
, list
) {
484 if (iter
->timestamp
> limit
)
487 if (iter
== last_inserted
)
488 last_inserted
= NULL
;
490 process_raw_event(iter
->data
, iter
->cpu
, iter
->timestamp
,
493 last_flush
= iter
->timestamp
;
494 list_del(&iter
->list
);
500 static void __queue_raw_event_end(struct raw_event_queue
*new)
502 struct raw_event_queue
*iter
;
504 list_for_each_entry_reverse(iter
, &raw_event_head
, list
) {
505 if (iter
->timestamp
< new->timestamp
) {
506 list_add(&new->list
, &iter
->list
);
511 list_add(&new->list
, &raw_event_head
);
514 static void __queue_raw_event_before(struct raw_event_queue
*new,
515 struct raw_event_queue
*iter
)
517 list_for_each_entry_continue_reverse(iter
, &raw_event_head
, list
) {
518 if (iter
->timestamp
< new->timestamp
) {
519 list_add(&new->list
, &iter
->list
);
524 list_add(&new->list
, &raw_event_head
);
527 static void __queue_raw_event_after(struct raw_event_queue
*new,
528 struct raw_event_queue
*iter
)
530 list_for_each_entry_continue(iter
, &raw_event_head
, list
) {
531 if (iter
->timestamp
> new->timestamp
) {
532 list_add_tail(&new->list
, &iter
->list
);
536 list_add_tail(&new->list
, &raw_event_head
);
539 /* The queue is ordered by time */
540 static void __queue_raw_event(struct raw_event_queue
*new)
542 if (!last_inserted
) {
543 __queue_raw_event_end(new);
548 * Most of the time the current event has a timestamp
549 * very close to the last event inserted, unless we just switched
550 * to another event buffer. Having a sorting based on a list and
551 * on the last inserted event that is close to the current one is
552 * probably more efficient than an rbtree based sorting.
554 if (last_inserted
->timestamp
>= new->timestamp
)
555 __queue_raw_event_before(new, last_inserted
);
557 __queue_raw_event_after(new, last_inserted
);
560 static void queue_raw_event(void *data
, int raw_size
, int cpu
,
561 u64 timestamp
, struct thread
*thread
)
563 struct raw_event_queue
*new;
565 if (flush_limit
== ULLONG_MAX
)
566 flush_limit
= timestamp
+ FLUSH_PERIOD
;
568 if (timestamp
< last_flush
) {
569 printf("Warning: Timestamp below last timeslice flush\n");
573 new = malloc(sizeof(*new));
575 die("Not enough memory\n");
577 new->timestamp
= timestamp
;
579 new->thread
= thread
;
581 new->data
= malloc(raw_size
);
583 die("Not enough memory\n");
585 memcpy(new->data
, data
, raw_size
);
587 __queue_raw_event(new);
591 * We want to have a slice of events covering 2 * FLUSH_PERIOD
592 * If FLUSH_PERIOD is big enough, it ensures every events that occured
593 * in the first half of the timeslice have all been buffered and there
594 * are none remaining (we need that because of the weakly ordered
595 * event recording we have). Then once we reach the 2 * FLUSH_PERIOD
596 * timeslice, we flush the first half to be gentle with the memory
597 * (the second half can still get new events in the middle, so wait
598 * another period to flush it)
600 if (new->timestamp
> flush_limit
&&
601 new->timestamp
- flush_limit
> FLUSH_PERIOD
) {
602 flush_limit
+= FLUSH_PERIOD
;
603 flush_raw_event_queue(flush_limit
);
607 static int process_sample_event(event_t
*event
, struct perf_session
*session
)
609 struct thread
*thread
;
610 struct sample_data data
;
612 bzero(&data
, sizeof(struct sample_data
));
613 event__parse_sample(event
, session
->sample_type
, &data
);
614 thread
= perf_session__findnew(session
, data
.pid
);
616 if (thread
== NULL
) {
617 pr_debug("problem processing %d event, skipping it.\n",
622 dump_printf(" ... thread: %s:%d\n", thread
->comm
, thread
->pid
);
624 if (profile_cpu
!= -1 && profile_cpu
!= (int) data
.cpu
)
627 queue_raw_event(data
.raw_data
, data
.raw_size
, data
.cpu
, data
.time
, thread
);
632 /* TODO: various way to print, coloring, nano or milli sec */
633 static void print_result(void)
635 struct lock_stat
*st
;
638 printf("%18s ", "ID");
639 printf("%20s ", "Name");
640 printf("%10s ", "acquired");
641 printf("%10s ", "contended");
643 printf("%15s ", "total wait (ns)");
644 printf("%15s ", "max wait (ns)");
645 printf("%15s ", "min wait (ns)");
649 while ((st
= pop_from_result())) {
652 printf("%p ", st
->addr
);
654 if (strlen(st
->name
) < 16) {
655 /* output raw name */
656 printf("%20s ", st
->name
);
658 strncpy(cut_name
, st
->name
, 16);
663 /* cut off name for saving output style */
664 printf("%20s ", cut_name
);
667 printf("%10u ", st
->nr_acquired
);
668 printf("%10u ", st
->nr_contended
);
670 printf("%15llu ", st
->wait_time_total
);
671 printf("%15llu ", st
->wait_time_max
);
672 printf("%15llu ", st
->wait_time_min
== ULLONG_MAX
?
673 0 : st
->wait_time_min
);
678 static void dump_map(void)
681 struct lock_stat
*st
;
683 for (i
= 0; i
< LOCKHASH_SIZE
; i
++) {
684 list_for_each_entry(st
, &lockhash_table
[i
], hash_entry
) {
685 printf("%p: %s\n", st
->addr
, st
->name
);
690 static struct perf_event_ops eops
= {
691 .sample
= process_sample_event
,
692 .comm
= event__process_comm
,
695 static struct perf_session
*session
;
697 static int read_events(void)
699 session
= perf_session__new(input_name
, O_RDONLY
, 0);
701 die("Initializing perf session failed\n");
703 return perf_session__process_events(session
, &eops
);
706 static void sort_result(void)
709 struct lock_stat
*st
;
711 for (i
= 0; i
< LOCKHASH_SIZE
; i
++) {
712 list_for_each_entry(st
, &lockhash_table
[i
], hash_entry
) {
713 insert_to_result(st
, compare
);
718 static void __cmd_report(void)
723 flush_raw_event_queue(ULLONG_MAX
);
728 static const char * const report_usage
[] = {
729 "perf lock report [<options>]",
733 static const struct option report_options
[] = {
734 OPT_STRING('k', "key", &sort_key
, "acquired",
740 static const char * const lock_usage
[] = {
741 "perf lock [<options>] {record|trace|report}",
745 static const struct option lock_options
[] = {
746 OPT_STRING('i', "input", &input_name
, "file", "input file name"),
747 OPT_BOOLEAN('v', "verbose", &verbose
, "be more verbose (show symbol address, etc)"),
748 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace
, "dump raw trace in ASCII"),
752 static const char *record_args
[] = {
759 "-e", "lock:lock_acquire:r",
760 "-e", "lock:lock_acquired:r",
761 "-e", "lock:lock_contended:r",
762 "-e", "lock:lock_release:r",
765 static int __cmd_record(int argc
, const char **argv
)
767 unsigned int rec_argc
, i
, j
;
768 const char **rec_argv
;
770 rec_argc
= ARRAY_SIZE(record_args
) + argc
- 1;
771 rec_argv
= calloc(rec_argc
+ 1, sizeof(char *));
773 for (i
= 0; i
< ARRAY_SIZE(record_args
); i
++)
774 rec_argv
[i
] = strdup(record_args
[i
]);
776 for (j
= 1; j
< (unsigned int)argc
; j
++, i
++)
777 rec_argv
[i
] = argv
[j
];
779 BUG_ON(i
!= rec_argc
);
781 return cmd_record(i
, rec_argv
, NULL
);
784 int cmd_lock(int argc
, const char **argv
, const char *prefix __used
)
789 for (i
= 0; i
< LOCKHASH_SIZE
; i
++)
790 INIT_LIST_HEAD(lockhash_table
+ i
);
792 argc
= parse_options(argc
, argv
, lock_options
, lock_usage
,
793 PARSE_OPT_STOP_AT_NON_OPTION
);
795 usage_with_options(lock_usage
, lock_options
);
797 if (!strncmp(argv
[0], "rec", 3)) {
798 return __cmd_record(argc
, argv
);
799 } else if (!strncmp(argv
[0], "report", 6)) {
800 trace_handler
= &report_lock_ops
;
802 argc
= parse_options(argc
, argv
,
803 report_options
, report_usage
, 0);
805 usage_with_options(report_usage
, report_options
);
808 } else if (!strcmp(argv
[0], "trace")) {
809 /* Aliased to 'perf trace' */
810 return cmd_trace(argc
, argv
, prefix
);
811 } else if (!strcmp(argv
[0], "map")) {
812 /* recycling report_lock_ops */
813 trace_handler
= &report_lock_ops
;
818 usage_with_options(lock_usage
, lock_options
);