2 * builtin-timechart.c - make an svg timechart of system activity
4 * (C) Copyright 2009 Intel Corporation
7 * Arjan van de Ven <arjan@linux.intel.com>
9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License
11 * as published by the Free Software Foundation; version 2
17 #include "util/util.h"
19 #include "util/color.h"
20 #include <linux/list.h>
21 #include "util/cache.h"
22 #include <linux/rbtree.h>
23 #include "util/symbol.h"
24 #include "util/string.h"
25 #include "util/callchain.h"
26 #include "util/strlist.h"
29 #include "util/header.h"
30 #include "util/parse-options.h"
31 #include "util/parse-events.h"
32 #include "util/svghelper.h"
34 static char const *input_name
= "perf.data";
35 static char const *output_name
= "output.svg";
38 static unsigned long page_size
;
39 static unsigned long mmap_window
= 32;
40 static u64 sample_type
;
42 static unsigned int numcpus
;
43 static u64 min_freq
; /* Lowest CPU frequency seen */
44 static u64 max_freq
; /* Highest CPU frequency seen */
45 static u64 turbo_frequency
;
47 static u64 first_time
, last_time
;
49 static int power_only
;
52 static struct perf_header
*header
;
61 struct sample_wrapper
;
64 * Datastructure layout:
65 * We keep an list of "pid"s, matching the kernels notion of a task struct.
66 * Each "pid" entry, has a list of "comm"s.
67 * this is because we want to track different programs different, while
68 * exec will reuse the original pid (by design).
69 * Each comm has a list of samples that will be used to draw
84 struct per_pidcomm
*all
;
85 struct per_pidcomm
*current
;
92 struct per_pidcomm
*next
;
106 struct cpu_sample
*samples
;
109 struct sample_wrapper
{
110 struct sample_wrapper
*next
;
113 unsigned char data
[0];
117 #define TYPE_RUNNING 1
118 #define TYPE_WAITING 2
119 #define TYPE_BLOCKED 3
122 struct cpu_sample
*next
;
130 static struct per_pid
*all_data
;
136 struct power_event
*next
;
145 struct wake_event
*next
;
151 static struct power_event
*power_events
;
152 static struct wake_event
*wake_events
;
154 struct sample_wrapper
*all_samples
;
156 static struct per_pid
*find_create_pid(int pid
)
158 struct per_pid
*cursor
= all_data
;
161 if (cursor
->pid
== pid
)
163 cursor
= cursor
->next
;
165 cursor
= malloc(sizeof(struct per_pid
));
166 assert(cursor
!= NULL
);
167 memset(cursor
, 0, sizeof(struct per_pid
));
169 cursor
->next
= all_data
;
174 static void pid_set_comm(int pid
, char *comm
)
177 struct per_pidcomm
*c
;
178 p
= find_create_pid(pid
);
181 if (c
->comm
&& strcmp(c
->comm
, comm
) == 0) {
186 c
->comm
= strdup(comm
);
192 c
= malloc(sizeof(struct per_pidcomm
));
194 memset(c
, 0, sizeof(struct per_pidcomm
));
195 c
->comm
= strdup(comm
);
201 static void pid_fork(int pid
, int ppid
, u64 timestamp
)
203 struct per_pid
*p
, *pp
;
204 p
= find_create_pid(pid
);
205 pp
= find_create_pid(ppid
);
207 if (pp
->current
&& pp
->current
->comm
&& !p
->current
)
208 pid_set_comm(pid
, pp
->current
->comm
);
210 p
->start_time
= timestamp
;
212 p
->current
->start_time
= timestamp
;
213 p
->current
->state_since
= timestamp
;
217 static void pid_exit(int pid
, u64 timestamp
)
220 p
= find_create_pid(pid
);
221 p
->end_time
= timestamp
;
223 p
->current
->end_time
= timestamp
;
227 pid_put_sample(int pid
, int type
, unsigned int cpu
, u64 start
, u64 end
)
230 struct per_pidcomm
*c
;
231 struct cpu_sample
*sample
;
233 p
= find_create_pid(pid
);
236 c
= malloc(sizeof(struct per_pidcomm
));
238 memset(c
, 0, sizeof(struct per_pidcomm
));
244 sample
= malloc(sizeof(struct cpu_sample
));
245 assert(sample
!= NULL
);
246 memset(sample
, 0, sizeof(struct cpu_sample
));
247 sample
->start_time
= start
;
248 sample
->end_time
= end
;
250 sample
->next
= c
->samples
;
254 if (sample
->type
== TYPE_RUNNING
&& end
> start
&& start
> 0) {
255 c
->total_time
+= (end
-start
);
256 p
->total_time
+= (end
-start
);
259 if (c
->start_time
== 0 || c
->start_time
> start
)
260 c
->start_time
= start
;
261 if (p
->start_time
== 0 || p
->start_time
> start
)
262 p
->start_time
= start
;
268 #define MAX_CPUS 4096
270 static u64 cpus_cstate_start_times
[MAX_CPUS
];
271 static int cpus_cstate_state
[MAX_CPUS
];
272 static u64 cpus_pstate_start_times
[MAX_CPUS
];
273 static u64 cpus_pstate_state
[MAX_CPUS
];
276 process_comm_event(event_t
*event
)
278 pid_set_comm(event
->comm
.tid
, event
->comm
.comm
);
282 process_fork_event(event_t
*event
)
284 pid_fork(event
->fork
.pid
, event
->fork
.ppid
, event
->fork
.time
);
289 process_exit_event(event_t
*event
)
291 pid_exit(event
->fork
.pid
, event
->fork
.time
);
299 unsigned char preempt_count
;
305 struct trace_entry te
;
310 #define TASK_COMM_LEN 16
311 struct wakeup_entry
{
312 struct trace_entry te
;
313 char comm
[TASK_COMM_LEN
];
320 * trace_flag_type is an enumeration that holds different
321 * states when a trace occurs. These are:
322 * IRQS_OFF - interrupts were disabled
323 * IRQS_NOSUPPORT - arch does not support irqs_disabled_flags
324 * NEED_RESCED - reschedule is requested
325 * HARDIRQ - inside an interrupt handler
326 * SOFTIRQ - inside a softirq handler
328 enum trace_flag_type
{
329 TRACE_FLAG_IRQS_OFF
= 0x01,
330 TRACE_FLAG_IRQS_NOSUPPORT
= 0x02,
331 TRACE_FLAG_NEED_RESCHED
= 0x04,
332 TRACE_FLAG_HARDIRQ
= 0x08,
333 TRACE_FLAG_SOFTIRQ
= 0x10,
338 struct sched_switch
{
339 struct trace_entry te
;
340 char prev_comm
[TASK_COMM_LEN
];
343 long prev_state
; /* Arjan weeps. */
344 char next_comm
[TASK_COMM_LEN
];
349 static void c_state_start(int cpu
, u64 timestamp
, int state
)
351 cpus_cstate_start_times
[cpu
] = timestamp
;
352 cpus_cstate_state
[cpu
] = state
;
355 static void c_state_end(int cpu
, u64 timestamp
)
357 struct power_event
*pwr
;
358 pwr
= malloc(sizeof(struct power_event
));
361 memset(pwr
, 0, sizeof(struct power_event
));
363 pwr
->state
= cpus_cstate_state
[cpu
];
364 pwr
->start_time
= cpus_cstate_start_times
[cpu
];
365 pwr
->end_time
= timestamp
;
368 pwr
->next
= power_events
;
373 static void p_state_change(int cpu
, u64 timestamp
, u64 new_freq
)
375 struct power_event
*pwr
;
376 pwr
= malloc(sizeof(struct power_event
));
378 if (new_freq
> 8000000) /* detect invalid data */
383 memset(pwr
, 0, sizeof(struct power_event
));
385 pwr
->state
= cpus_pstate_state
[cpu
];
386 pwr
->start_time
= cpus_pstate_start_times
[cpu
];
387 pwr
->end_time
= timestamp
;
390 pwr
->next
= power_events
;
392 if (!pwr
->start_time
)
393 pwr
->start_time
= first_time
;
397 cpus_pstate_state
[cpu
] = new_freq
;
398 cpus_pstate_start_times
[cpu
] = timestamp
;
400 if ((u64
)new_freq
> max_freq
)
403 if (new_freq
< min_freq
|| min_freq
== 0)
406 if (new_freq
== max_freq
- 1000)
407 turbo_frequency
= max_freq
;
411 sched_wakeup(int cpu
, u64 timestamp
, int pid
, struct trace_entry
*te
)
413 struct wake_event
*we
;
415 struct wakeup_entry
*wake
= (void *)te
;
417 we
= malloc(sizeof(struct wake_event
));
421 memset(we
, 0, sizeof(struct wake_event
));
422 we
->time
= timestamp
;
425 if ((te
->flags
& TRACE_FLAG_HARDIRQ
) || (te
->flags
& TRACE_FLAG_SOFTIRQ
))
428 we
->wakee
= wake
->pid
;
429 we
->next
= wake_events
;
431 p
= find_create_pid(we
->wakee
);
433 if (p
&& p
->current
&& p
->current
->state
== TYPE_NONE
) {
434 p
->current
->state_since
= timestamp
;
435 p
->current
->state
= TYPE_WAITING
;
437 if (p
&& p
->current
&& p
->current
->state
== TYPE_BLOCKED
) {
438 pid_put_sample(p
->pid
, p
->current
->state
, cpu
, p
->current
->state_since
, timestamp
);
439 p
->current
->state_since
= timestamp
;
440 p
->current
->state
= TYPE_WAITING
;
444 static void sched_switch(int cpu
, u64 timestamp
, struct trace_entry
*te
)
446 struct per_pid
*p
= NULL
, *prev_p
;
447 struct sched_switch
*sw
= (void *)te
;
450 prev_p
= find_create_pid(sw
->prev_pid
);
452 p
= find_create_pid(sw
->next_pid
);
454 if (prev_p
->current
&& prev_p
->current
->state
!= TYPE_NONE
)
455 pid_put_sample(sw
->prev_pid
, TYPE_RUNNING
, cpu
, prev_p
->current
->state_since
, timestamp
);
456 if (p
&& p
->current
) {
457 if (p
->current
->state
!= TYPE_NONE
)
458 pid_put_sample(sw
->next_pid
, p
->current
->state
, cpu
, p
->current
->state_since
, timestamp
);
460 p
->current
->state_since
= timestamp
;
461 p
->current
->state
= TYPE_RUNNING
;
464 if (prev_p
->current
) {
465 prev_p
->current
->state
= TYPE_NONE
;
466 prev_p
->current
->state_since
= timestamp
;
467 if (sw
->prev_state
& 2)
468 prev_p
->current
->state
= TYPE_BLOCKED
;
469 if (sw
->prev_state
== 0)
470 prev_p
->current
->state
= TYPE_WAITING
;
476 process_sample_event(event_t
*event
)
483 struct trace_entry
*te
;
485 if (sample_type
& PERF_SAMPLE_IP
)
488 if (sample_type
& PERF_SAMPLE_TID
) {
489 pid
= event
->sample
.array
[cursor
]>>32;
492 if (sample_type
& PERF_SAMPLE_TIME
) {
493 stamp
= event
->sample
.array
[cursor
++];
495 if (!first_time
|| first_time
> stamp
)
497 if (last_time
< stamp
)
501 if (sample_type
& PERF_SAMPLE_ADDR
)
502 addr
= event
->sample
.array
[cursor
++];
503 if (sample_type
& PERF_SAMPLE_ID
)
505 if (sample_type
& PERF_SAMPLE_STREAM_ID
)
507 if (sample_type
& PERF_SAMPLE_CPU
)
508 cpu
= event
->sample
.array
[cursor
++] & 0xFFFFFFFF;
509 if (sample_type
& PERF_SAMPLE_PERIOD
)
512 te
= (void *)&event
->sample
.array
[cursor
];
514 if (sample_type
& PERF_SAMPLE_RAW
&& te
->size
> 0) {
516 struct power_entry
*pe
;
520 event_str
= perf_header__find_event(te
->type
);
525 if (strcmp(event_str
, "power:power_start") == 0)
526 c_state_start(cpu
, stamp
, pe
->value
);
528 if (strcmp(event_str
, "power:power_end") == 0)
529 c_state_end(cpu
, stamp
);
531 if (strcmp(event_str
, "power:power_frequency") == 0)
532 p_state_change(cpu
, stamp
, pe
->value
);
534 if (strcmp(event_str
, "sched:sched_wakeup") == 0)
535 sched_wakeup(cpu
, stamp
, pid
, te
);
537 if (strcmp(event_str
, "sched:sched_switch") == 0)
538 sched_switch(cpu
, stamp
, te
);
544 * After the last sample we need to wrap up the current C/P state
545 * and close out each CPU for these.
547 static void end_sample_processing(void)
550 struct power_event
*pwr
;
552 for (cpu
= 0; cpu
<= numcpus
; cpu
++) {
553 pwr
= malloc(sizeof(struct power_event
));
556 memset(pwr
, 0, sizeof(struct power_event
));
560 pwr
->state
= cpus_cstate_state
[cpu
];
561 pwr
->start_time
= cpus_cstate_start_times
[cpu
];
562 pwr
->end_time
= last_time
;
565 pwr
->next
= power_events
;
571 pwr
= malloc(sizeof(struct power_event
));
574 memset(pwr
, 0, sizeof(struct power_event
));
576 pwr
->state
= cpus_pstate_state
[cpu
];
577 pwr
->start_time
= cpus_pstate_start_times
[cpu
];
578 pwr
->end_time
= last_time
;
581 pwr
->next
= power_events
;
583 if (!pwr
->start_time
)
584 pwr
->start_time
= first_time
;
586 pwr
->state
= min_freq
;
591 static u64
sample_time(event_t
*event
)
596 if (sample_type
& PERF_SAMPLE_IP
)
598 if (sample_type
& PERF_SAMPLE_TID
)
600 if (sample_type
& PERF_SAMPLE_TIME
)
601 return event
->sample
.array
[cursor
];
607 * We first queue all events, sorted backwards by insertion.
608 * The order will get flipped later.
611 queue_sample_event(event_t
*event
)
613 struct sample_wrapper
*copy
, *prev
;
616 size
= event
->sample
.header
.size
+ sizeof(struct sample_wrapper
) + 8;
622 memset(copy
, 0, size
);
625 copy
->timestamp
= sample_time(event
);
627 memcpy(©
->data
, event
, event
->sample
.header
.size
);
629 /* insert in the right place in the list */
632 /* first sample ever */
637 if (all_samples
->timestamp
< copy
->timestamp
) {
638 /* insert at the head of the list */
639 copy
->next
= all_samples
;
646 if (prev
->next
->timestamp
< copy
->timestamp
) {
647 copy
->next
= prev
->next
;
653 /* insert at the end of the list */
659 static void sort_queued_samples(void)
661 struct sample_wrapper
*cursor
, *next
;
663 cursor
= all_samples
;
668 cursor
->next
= all_samples
;
669 all_samples
= cursor
;
675 * Sort the pid datastructure
677 static void sort_pids(void)
679 struct per_pid
*new_list
, *p
, *cursor
, *prev
;
680 /* sort by ppid first, then by pid, lowest to highest */
689 if (new_list
== NULL
) {
697 if (cursor
->ppid
> p
->ppid
||
698 (cursor
->ppid
== p
->ppid
&& cursor
->pid
> p
->pid
)) {
699 /* must insert before */
701 p
->next
= prev
->next
;
714 cursor
= cursor
->next
;
723 static void draw_c_p_states(void)
725 struct power_event
*pwr
;
729 * two pass drawing so that the P state bars are on top of the C state blocks
732 if (pwr
->type
== CSTATE
)
733 svg_cstate(pwr
->cpu
, pwr
->start_time
, pwr
->end_time
, pwr
->state
);
739 if (pwr
->type
== PSTATE
) {
741 pwr
->state
= min_freq
;
742 svg_pstate(pwr
->cpu
, pwr
->start_time
, pwr
->end_time
, pwr
->state
);
748 static void draw_wakeups(void)
750 struct wake_event
*we
;
752 struct per_pidcomm
*c
;
756 int from
= 0, to
= 0;
757 char *task_from
= NULL
, *task_to
= NULL
;
759 /* locate the column of the waker and wakee */
762 if (p
->pid
== we
->waker
|| p
->pid
== we
->wakee
) {
765 if (c
->Y
&& c
->start_time
<= we
->time
&& c
->end_time
>= we
->time
) {
766 if (p
->pid
== we
->waker
) {
768 task_from
= strdup(c
->comm
);
770 if (p
->pid
== we
->wakee
) {
772 task_to
= strdup(c
->comm
);
779 if (p
->pid
== we
->waker
&& !from
) {
781 task_from
= strdup(c
->comm
);
783 if (p
->pid
== we
->wakee
&& !to
) {
785 task_to
= strdup(c
->comm
);
794 task_from
= malloc(40);
795 sprintf(task_from
, "[%i]", we
->waker
);
798 task_to
= malloc(40);
799 sprintf(task_to
, "[%i]", we
->wakee
);
803 svg_interrupt(we
->time
, to
);
804 else if (from
&& to
&& abs(from
- to
) == 1)
805 svg_wakeline(we
->time
, from
, to
);
807 svg_partial_wakeline(we
->time
, from
, task_from
, to
, task_to
);
815 static void draw_cpu_usage(void)
818 struct per_pidcomm
*c
;
819 struct cpu_sample
*sample
;
826 if (sample
->type
== TYPE_RUNNING
)
827 svg_process(sample
->cpu
, sample
->start_time
, sample
->end_time
, "sample", c
->comm
);
829 sample
= sample
->next
;
837 static void draw_process_bars(void)
840 struct per_pidcomm
*c
;
841 struct cpu_sample
*sample
;
856 svg_box(Y
, c
->start_time
, c
->end_time
, "process");
859 if (sample
->type
== TYPE_RUNNING
)
860 svg_sample(Y
, sample
->cpu
, sample
->start_time
, sample
->end_time
);
861 if (sample
->type
== TYPE_BLOCKED
)
862 svg_box(Y
, sample
->start_time
, sample
->end_time
, "blocked");
863 if (sample
->type
== TYPE_WAITING
)
864 svg_waiting(Y
, sample
->start_time
, sample
->end_time
);
865 sample
= sample
->next
;
870 if (c
->total_time
> 5000000000) /* 5 seconds */
871 sprintf(comm
, "%s:%i (%2.2fs)", c
->comm
, p
->pid
, c
->total_time
/ 1000000000.0);
873 sprintf(comm
, "%s:%i (%3.1fms)", c
->comm
, p
->pid
, c
->total_time
/ 1000000.0);
875 svg_text(Y
, c
->start_time
, comm
);
885 static int determine_display_tasks(u64 threshold
)
888 struct per_pidcomm
*c
;
894 if (p
->start_time
== 1)
895 p
->start_time
= first_time
;
897 /* no exit marker, task kept running to the end */
898 if (p
->end_time
== 0)
899 p
->end_time
= last_time
;
900 if (p
->total_time
>= threshold
&& !power_only
)
908 if (c
->start_time
== 1)
909 c
->start_time
= first_time
;
911 if (c
->total_time
>= threshold
&& !power_only
) {
916 if (c
->end_time
== 0)
917 c
->end_time
= last_time
;
928 #define TIME_THRESH 10000000
930 static void write_svg_file(const char *filename
)
938 count
= determine_display_tasks(TIME_THRESH
);
940 /* We'd like to show at least 15 tasks; be less picky if we have fewer */
942 count
= determine_display_tasks(TIME_THRESH
/ 10);
944 open_svg(filename
, numcpus
, count
, first_time
, last_time
);
949 for (i
= 0; i
< numcpus
; i
++)
950 svg_cpu_box(i
, max_freq
, turbo_frequency
);
961 process_event(event_t
*event
)
964 switch (event
->header
.type
) {
966 case PERF_RECORD_COMM
:
967 return process_comm_event(event
);
968 case PERF_RECORD_FORK
:
969 return process_fork_event(event
);
970 case PERF_RECORD_EXIT
:
971 return process_exit_event(event
);
972 case PERF_RECORD_SAMPLE
:
973 return queue_sample_event(event
);
976 * We dont process them right now but they are fine:
978 case PERF_RECORD_MMAP
:
979 case PERF_RECORD_THROTTLE
:
980 case PERF_RECORD_UNTHROTTLE
:
990 static void process_samples(void)
992 struct sample_wrapper
*cursor
;
995 sort_queued_samples();
997 cursor
= all_samples
;
999 event
= (void *)&cursor
->data
;
1000 cursor
= cursor
->next
;
1001 process_sample_event(event
);
1006 static int __cmd_timechart(void)
1008 int ret
, rc
= EXIT_FAILURE
;
1009 unsigned long offset
= 0;
1010 unsigned long head
, shift
;
1011 struct stat statbuf
;
1017 input
= open(input_name
, O_RDONLY
);
1019 fprintf(stderr
, " failed to open file: %s", input_name
);
1020 if (!strcmp(input_name
, "perf.data"))
1021 fprintf(stderr
, " (try 'perf record' first)");
1022 fprintf(stderr
, "\n");
1026 ret
= fstat(input
, &statbuf
);
1028 perror("failed to stat file");
1032 if (!statbuf
.st_size
) {
1033 fprintf(stderr
, "zero-sized file, nothing to do!\n");
1037 header
= perf_header__read(input
);
1038 head
= header
->data_offset
;
1040 sample_type
= perf_header__sample_type(header
);
1042 shift
= page_size
* (head
/ page_size
);
1047 buf
= (char *)mmap(NULL
, page_size
* mmap_window
, PROT_READ
,
1048 MAP_SHARED
, input
, offset
);
1049 if (buf
== MAP_FAILED
) {
1050 perror("failed to mmap file");
1055 event
= (event_t
*)(buf
+ head
);
1057 size
= event
->header
.size
;
1061 if (head
+ event
->header
.size
>= page_size
* mmap_window
) {
1064 shift
= page_size
* (head
/ page_size
);
1066 ret2
= munmap(buf
, page_size
* mmap_window
);
1074 size
= event
->header
.size
;
1076 if (!size
|| process_event(event
) < 0) {
1078 printf("%p [%p]: skipping unknown header type: %d\n",
1079 (void *)(offset
+ head
),
1080 (void *)(long)(event
->header
.size
),
1081 event
->header
.type
);
1084 * assume we lost track of the stream, check alignment, and
1085 * increment a single u64 in the hope to catch on again 'soon'.
1088 if (unlikely(head
& 7))
1096 if (offset
+ head
>= header
->data_offset
+ header
->data_size
)
1099 if (offset
+ head
< (unsigned long)statbuf
.st_size
)
1109 end_sample_processing();
1113 write_svg_file(output_name
);
1115 printf("Written %2.1f seconds of trace to %s.\n", (last_time
- first_time
) / 1000000000.0, output_name
);
1120 static const char * const timechart_usage
[] = {
1121 "perf timechart [<options>] {record}",
1125 static const char *record_args
[] = {
1132 "-e", "power:power_start",
1133 "-e", "power:power_end",
1134 "-e", "power:power_frequency",
1135 "-e", "sched:sched_wakeup",
1136 "-e", "sched:sched_switch",
1139 static int __cmd_record(int argc
, const char **argv
)
1141 unsigned int rec_argc
, i
, j
;
1142 const char **rec_argv
;
1144 rec_argc
= ARRAY_SIZE(record_args
) + argc
- 1;
1145 rec_argv
= calloc(rec_argc
+ 1, sizeof(char *));
1147 for (i
= 0; i
< ARRAY_SIZE(record_args
); i
++)
1148 rec_argv
[i
] = strdup(record_args
[i
]);
1150 for (j
= 1; j
< (unsigned int)argc
; j
++, i
++)
1151 rec_argv
[i
] = argv
[j
];
1153 return cmd_record(i
, rec_argv
, NULL
);
1156 static const struct option options
[] = {
1157 OPT_STRING('i', "input", &input_name
, "file",
1159 OPT_STRING('o', "output", &output_name
, "file",
1160 "output file name"),
1161 OPT_INTEGER('w', "width", &svg_page_width
,
1163 OPT_BOOLEAN('p', "power-only", &power_only
,
1164 "output power data only"),
1169 int cmd_timechart(int argc
, const char **argv
, const char *prefix __used
)
1173 page_size
= getpagesize();
1175 argc
= parse_options(argc
, argv
, options
, timechart_usage
,
1176 PARSE_OPT_STOP_AT_NON_OPTION
);
1178 if (argc
&& !strncmp(argv
[0], "rec", 3))
1179 return __cmd_record(argc
, argv
);
1181 usage_with_options(timechart_usage
, options
);
1185 return __cmd_timechart();