1 #define _FILE_OFFSET_BITS 64
3 #include <linux/kernel.h>
13 static int perf_session__open(struct perf_session
*self
, bool force
)
15 struct stat input_stat
;
17 if (!strcmp(self
->filename
, "-")) {
19 self
->fd
= STDIN_FILENO
;
21 if (perf_header__read(self
, self
->fd
) < 0)
22 pr_err("incompatible file format");
27 self
->fd
= open(self
->filename
, O_RDONLY
);
29 pr_err("failed to open file: %s", self
->filename
);
30 if (!strcmp(self
->filename
, "perf.data"))
31 pr_err(" (try 'perf record' first)");
36 if (fstat(self
->fd
, &input_stat
) < 0)
39 if (!force
&& input_stat
.st_uid
&& (input_stat
.st_uid
!= geteuid())) {
40 pr_err("file %s not owned by current user or root\n",
45 if (!input_stat
.st_size
) {
46 pr_info("zero-sized file (%s), nothing to do!\n",
51 if (perf_header__read(self
, self
->fd
) < 0) {
52 pr_err("incompatible file format");
56 self
->size
= input_stat
.st_size
;
65 void perf_session__update_sample_type(struct perf_session
*self
)
67 self
->sample_type
= perf_header__sample_type(&self
->header
);
70 int perf_session__create_kernel_maps(struct perf_session
*self
)
72 struct rb_root
*machines
= &self
->machines
;
73 int ret
= machines__create_kernel_maps(machines
, HOST_KERNEL_ID
);
76 ret
= machines__create_guest_kernel_maps(machines
);
80 struct perf_session
*perf_session__new(const char *filename
, int mode
, bool force
)
82 size_t len
= filename
? strlen(filename
) + 1 : 0;
83 struct perf_session
*self
= zalloc(sizeof(*self
) + len
);
88 if (perf_header__init(&self
->header
) < 0)
91 memcpy(self
->filename
, filename
, len
);
92 self
->threads
= RB_ROOT
;
93 self
->stats_by_id
= RB_ROOT
;
94 self
->last_match
= NULL
;
95 self
->mmap_window
= 32;
98 self
->unknown_events
= 0;
99 self
->machines
= RB_ROOT
;
100 self
->ordered_samples
.flush_limit
= ULLONG_MAX
;
101 INIT_LIST_HEAD(&self
->ordered_samples
.samples_head
);
103 if (mode
== O_RDONLY
) {
104 if (perf_session__open(self
, force
) < 0)
106 } else if (mode
== O_WRONLY
) {
108 * In O_RDONLY mode this will be performed when reading the
109 * kernel MMAP event, in event__process_mmap().
111 if (perf_session__create_kernel_maps(self
) < 0)
115 perf_session__update_sample_type(self
);
122 perf_session__delete(self
);
126 void perf_session__delete(struct perf_session
*self
)
128 perf_header__exit(&self
->header
);
134 static bool symbol__match_parent_regex(struct symbol
*sym
)
136 if (sym
->name
&& !regexec(&parent_regex
, sym
->name
, 0, NULL
, 0))
142 struct map_symbol
*perf_session__resolve_callchain(struct perf_session
*self
,
143 struct thread
*thread
,
144 struct ip_callchain
*chain
,
145 struct symbol
**parent
)
147 u8 cpumode
= PERF_RECORD_MISC_USER
;
149 struct map_symbol
*syms
= calloc(chain
->nr
, sizeof(*syms
));
154 for (i
= 0; i
< chain
->nr
; i
++) {
155 u64 ip
= chain
->ips
[i
];
156 struct addr_location al
;
158 if (ip
>= PERF_CONTEXT_MAX
) {
160 case PERF_CONTEXT_HV
:
161 cpumode
= PERF_RECORD_MISC_HYPERVISOR
; break;
162 case PERF_CONTEXT_KERNEL
:
163 cpumode
= PERF_RECORD_MISC_KERNEL
; break;
164 case PERF_CONTEXT_USER
:
165 cpumode
= PERF_RECORD_MISC_USER
; break;
173 thread__find_addr_location(thread
, self
, cpumode
,
174 MAP__FUNCTION
, thread
->pid
, ip
, &al
, NULL
);
175 if (al
.sym
!= NULL
) {
176 if (sort__has_parent
&& !*parent
&&
177 symbol__match_parent_regex(al
.sym
))
179 if (!symbol_conf
.use_callchain
)
181 syms
[i
].map
= al
.map
;
182 syms
[i
].sym
= al
.sym
;
189 static int process_event_stub(event_t
*event __used
,
190 struct perf_session
*session __used
)
192 dump_printf(": unhandled!\n");
196 static void perf_event_ops__fill_defaults(struct perf_event_ops
*handler
)
198 if (handler
->sample
== NULL
)
199 handler
->sample
= process_event_stub
;
200 if (handler
->mmap
== NULL
)
201 handler
->mmap
= process_event_stub
;
202 if (handler
->comm
== NULL
)
203 handler
->comm
= process_event_stub
;
204 if (handler
->fork
== NULL
)
205 handler
->fork
= process_event_stub
;
206 if (handler
->exit
== NULL
)
207 handler
->exit
= process_event_stub
;
208 if (handler
->lost
== NULL
)
209 handler
->lost
= process_event_stub
;
210 if (handler
->read
== NULL
)
211 handler
->read
= process_event_stub
;
212 if (handler
->throttle
== NULL
)
213 handler
->throttle
= process_event_stub
;
214 if (handler
->unthrottle
== NULL
)
215 handler
->unthrottle
= process_event_stub
;
216 if (handler
->attr
== NULL
)
217 handler
->attr
= process_event_stub
;
218 if (handler
->event_type
== NULL
)
219 handler
->event_type
= process_event_stub
;
220 if (handler
->tracing_data
== NULL
)
221 handler
->tracing_data
= process_event_stub
;
222 if (handler
->build_id
== NULL
)
223 handler
->build_id
= process_event_stub
;
226 static const char *event__name
[] = {
228 [PERF_RECORD_MMAP
] = "MMAP",
229 [PERF_RECORD_LOST
] = "LOST",
230 [PERF_RECORD_COMM
] = "COMM",
231 [PERF_RECORD_EXIT
] = "EXIT",
232 [PERF_RECORD_THROTTLE
] = "THROTTLE",
233 [PERF_RECORD_UNTHROTTLE
] = "UNTHROTTLE",
234 [PERF_RECORD_FORK
] = "FORK",
235 [PERF_RECORD_READ
] = "READ",
236 [PERF_RECORD_SAMPLE
] = "SAMPLE",
237 [PERF_RECORD_HEADER_ATTR
] = "ATTR",
238 [PERF_RECORD_HEADER_EVENT_TYPE
] = "EVENT_TYPE",
239 [PERF_RECORD_HEADER_TRACING_DATA
] = "TRACING_DATA",
240 [PERF_RECORD_HEADER_BUILD_ID
] = "BUILD_ID",
243 unsigned long event__total
[PERF_RECORD_HEADER_MAX
];
245 void event__print_totals(void)
248 for (i
= 0; i
< PERF_RECORD_HEADER_MAX
; ++i
) {
251 pr_info("%10s events: %10ld\n",
252 event__name
[i
], event__total
[i
]);
256 void mem_bswap_64(void *src
, int byte_size
)
260 while (byte_size
> 0) {
262 byte_size
-= sizeof(u64
);
267 static void event__all64_swap(event_t
*self
)
269 struct perf_event_header
*hdr
= &self
->header
;
270 mem_bswap_64(hdr
+ 1, self
->header
.size
- sizeof(*hdr
));
273 static void event__comm_swap(event_t
*self
)
275 self
->comm
.pid
= bswap_32(self
->comm
.pid
);
276 self
->comm
.tid
= bswap_32(self
->comm
.tid
);
279 static void event__mmap_swap(event_t
*self
)
281 self
->mmap
.pid
= bswap_32(self
->mmap
.pid
);
282 self
->mmap
.tid
= bswap_32(self
->mmap
.tid
);
283 self
->mmap
.start
= bswap_64(self
->mmap
.start
);
284 self
->mmap
.len
= bswap_64(self
->mmap
.len
);
285 self
->mmap
.pgoff
= bswap_64(self
->mmap
.pgoff
);
288 static void event__task_swap(event_t
*self
)
290 self
->fork
.pid
= bswap_32(self
->fork
.pid
);
291 self
->fork
.tid
= bswap_32(self
->fork
.tid
);
292 self
->fork
.ppid
= bswap_32(self
->fork
.ppid
);
293 self
->fork
.ptid
= bswap_32(self
->fork
.ptid
);
294 self
->fork
.time
= bswap_64(self
->fork
.time
);
297 static void event__read_swap(event_t
*self
)
299 self
->read
.pid
= bswap_32(self
->read
.pid
);
300 self
->read
.tid
= bswap_32(self
->read
.tid
);
301 self
->read
.value
= bswap_64(self
->read
.value
);
302 self
->read
.time_enabled
= bswap_64(self
->read
.time_enabled
);
303 self
->read
.time_running
= bswap_64(self
->read
.time_running
);
304 self
->read
.id
= bswap_64(self
->read
.id
);
307 static void event__attr_swap(event_t
*self
)
311 self
->attr
.attr
.type
= bswap_32(self
->attr
.attr
.type
);
312 self
->attr
.attr
.size
= bswap_32(self
->attr
.attr
.size
);
313 self
->attr
.attr
.config
= bswap_64(self
->attr
.attr
.config
);
314 self
->attr
.attr
.sample_period
= bswap_64(self
->attr
.attr
.sample_period
);
315 self
->attr
.attr
.sample_type
= bswap_64(self
->attr
.attr
.sample_type
);
316 self
->attr
.attr
.read_format
= bswap_64(self
->attr
.attr
.read_format
);
317 self
->attr
.attr
.wakeup_events
= bswap_32(self
->attr
.attr
.wakeup_events
);
318 self
->attr
.attr
.bp_type
= bswap_32(self
->attr
.attr
.bp_type
);
319 self
->attr
.attr
.bp_addr
= bswap_64(self
->attr
.attr
.bp_addr
);
320 self
->attr
.attr
.bp_len
= bswap_64(self
->attr
.attr
.bp_len
);
322 size
= self
->header
.size
;
323 size
-= (void *)&self
->attr
.id
- (void *)self
;
324 mem_bswap_64(self
->attr
.id
, size
);
327 static void event__event_type_swap(event_t
*self
)
329 self
->event_type
.event_type
.event_id
=
330 bswap_64(self
->event_type
.event_type
.event_id
);
333 static void event__tracing_data_swap(event_t
*self
)
335 self
->tracing_data
.size
= bswap_32(self
->tracing_data
.size
);
338 typedef void (*event__swap_op
)(event_t
*self
);
340 static event__swap_op event__swap_ops
[] = {
341 [PERF_RECORD_MMAP
] = event__mmap_swap
,
342 [PERF_RECORD_COMM
] = event__comm_swap
,
343 [PERF_RECORD_FORK
] = event__task_swap
,
344 [PERF_RECORD_EXIT
] = event__task_swap
,
345 [PERF_RECORD_LOST
] = event__all64_swap
,
346 [PERF_RECORD_READ
] = event__read_swap
,
347 [PERF_RECORD_SAMPLE
] = event__all64_swap
,
348 [PERF_RECORD_HEADER_ATTR
] = event__attr_swap
,
349 [PERF_RECORD_HEADER_EVENT_TYPE
] = event__event_type_swap
,
350 [PERF_RECORD_HEADER_TRACING_DATA
] = event__tracing_data_swap
,
351 [PERF_RECORD_HEADER_BUILD_ID
] = NULL
,
352 [PERF_RECORD_HEADER_MAX
] = NULL
,
355 struct sample_queue
{
357 struct sample_event
*event
;
358 struct list_head list
;
361 #define FLUSH_PERIOD (2 * NSEC_PER_SEC)
363 static void flush_sample_queue(struct perf_session
*s
,
364 struct perf_event_ops
*ops
)
366 struct list_head
*head
= &s
->ordered_samples
.samples_head
;
367 u64 limit
= s
->ordered_samples
.flush_limit
;
368 struct sample_queue
*tmp
, *iter
;
370 if (!ops
->ordered_samples
)
373 list_for_each_entry_safe(iter
, tmp
, head
, list
) {
374 if (iter
->timestamp
> limit
)
377 if (iter
== s
->ordered_samples
.last_inserted
)
378 s
->ordered_samples
.last_inserted
= NULL
;
380 ops
->sample((event_t
*)iter
->event
, s
);
382 s
->ordered_samples
.last_flush
= iter
->timestamp
;
383 list_del(&iter
->list
);
389 static void __queue_sample_end(struct sample_queue
*new, struct list_head
*head
)
391 struct sample_queue
*iter
;
393 list_for_each_entry_reverse(iter
, head
, list
) {
394 if (iter
->timestamp
< new->timestamp
) {
395 list_add(&new->list
, &iter
->list
);
400 list_add(&new->list
, head
);
403 static void __queue_sample_before(struct sample_queue
*new,
404 struct sample_queue
*iter
,
405 struct list_head
*head
)
407 list_for_each_entry_continue_reverse(iter
, head
, list
) {
408 if (iter
->timestamp
< new->timestamp
) {
409 list_add(&new->list
, &iter
->list
);
414 list_add(&new->list
, head
);
417 static void __queue_sample_after(struct sample_queue
*new,
418 struct sample_queue
*iter
,
419 struct list_head
*head
)
421 list_for_each_entry_continue(iter
, head
, list
) {
422 if (iter
->timestamp
> new->timestamp
) {
423 list_add_tail(&new->list
, &iter
->list
);
427 list_add_tail(&new->list
, head
);
430 /* The queue is ordered by time */
431 static void __queue_sample_event(struct sample_queue
*new,
432 struct perf_session
*s
)
434 struct sample_queue
*last_inserted
= s
->ordered_samples
.last_inserted
;
435 struct list_head
*head
= &s
->ordered_samples
.samples_head
;
438 if (!last_inserted
) {
439 __queue_sample_end(new, head
);
444 * Most of the time the current event has a timestamp
445 * very close to the last event inserted, unless we just switched
446 * to another event buffer. Having a sorting based on a list and
447 * on the last inserted event that is close to the current one is
448 * probably more efficient than an rbtree based sorting.
450 if (last_inserted
->timestamp
>= new->timestamp
)
451 __queue_sample_before(new, last_inserted
, head
);
453 __queue_sample_after(new, last_inserted
, head
);
456 static int queue_sample_event(event_t
*event
, struct sample_data
*data
,
457 struct perf_session
*s
,
458 struct perf_event_ops
*ops
)
460 u64 timestamp
= data
->time
;
461 struct sample_queue
*new;
465 if (s
->ordered_samples
.flush_limit
== ULLONG_MAX
)
466 s
->ordered_samples
.flush_limit
= timestamp
+ FLUSH_PERIOD
;
468 if (timestamp
< s
->ordered_samples
.last_flush
) {
469 printf("Warning: Timestamp below last timeslice flush\n");
473 new = malloc(sizeof(*new));
477 new->timestamp
= timestamp
;
479 new->event
= malloc(event
->header
.size
);
485 memcpy(new->event
, event
, event
->header
.size
);
487 __queue_sample_event(new, s
);
488 s
->ordered_samples
.last_inserted
= new;
491 * We want to have a slice of events covering 2 * FLUSH_PERIOD
492 * If FLUSH_PERIOD is big enough, it ensures every events that occured
493 * in the first half of the timeslice have all been buffered and there
494 * are none remaining (we need that because of the weakly ordered
495 * event recording we have). Then once we reach the 2 * FLUSH_PERIOD
496 * timeslice, we flush the first half to be gentle with the memory
497 * (the second half can still get new events in the middle, so wait
498 * another period to flush it)
500 flush_limit
= s
->ordered_samples
.flush_limit
;
502 if (new->timestamp
> flush_limit
&&
503 new->timestamp
- flush_limit
> FLUSH_PERIOD
) {
504 s
->ordered_samples
.flush_limit
+= FLUSH_PERIOD
;
505 flush_sample_queue(s
, ops
);
511 static int perf_session__process_sample(event_t
*event
, struct perf_session
*s
,
512 struct perf_event_ops
*ops
)
514 struct sample_data data
;
516 if (!ops
->ordered_samples
)
517 return ops
->sample(event
, s
);
519 bzero(&data
, sizeof(struct sample_data
));
520 event__parse_sample(event
, s
->sample_type
, &data
);
522 queue_sample_event(event
, &data
, s
, ops
);
527 static int perf_session__process_event(struct perf_session
*self
,
529 struct perf_event_ops
*ops
,
530 u64 offset
, u64 head
)
534 if (event
->header
.type
< PERF_RECORD_HEADER_MAX
) {
535 dump_printf("%#Lx [%#x]: PERF_RECORD_%s",
536 offset
+ head
, event
->header
.size
,
537 event__name
[event
->header
.type
]);
539 ++event__total
[event
->header
.type
];
542 if (self
->header
.needs_swap
&& event__swap_ops
[event
->header
.type
])
543 event__swap_ops
[event
->header
.type
](event
);
545 switch (event
->header
.type
) {
546 case PERF_RECORD_SAMPLE
:
547 return perf_session__process_sample(event
, self
, ops
);
548 case PERF_RECORD_MMAP
:
549 return ops
->mmap(event
, self
);
550 case PERF_RECORD_COMM
:
551 return ops
->comm(event
, self
);
552 case PERF_RECORD_FORK
:
553 return ops
->fork(event
, self
);
554 case PERF_RECORD_EXIT
:
555 return ops
->exit(event
, self
);
556 case PERF_RECORD_LOST
:
557 return ops
->lost(event
, self
);
558 case PERF_RECORD_READ
:
559 return ops
->read(event
, self
);
560 case PERF_RECORD_THROTTLE
:
561 return ops
->throttle(event
, self
);
562 case PERF_RECORD_UNTHROTTLE
:
563 return ops
->unthrottle(event
, self
);
564 case PERF_RECORD_HEADER_ATTR
:
565 return ops
->attr(event
, self
);
566 case PERF_RECORD_HEADER_EVENT_TYPE
:
567 return ops
->event_type(event
, self
);
568 case PERF_RECORD_HEADER_TRACING_DATA
:
569 /* setup for reading amidst mmap */
570 lseek(self
->fd
, offset
+ head
, SEEK_SET
);
571 return ops
->tracing_data(event
, self
);
572 case PERF_RECORD_HEADER_BUILD_ID
:
573 return ops
->build_id(event
, self
);
575 self
->unknown_events
++;
580 void perf_event_header__bswap(struct perf_event_header
*self
)
582 self
->type
= bswap_32(self
->type
);
583 self
->misc
= bswap_16(self
->misc
);
584 self
->size
= bswap_16(self
->size
);
587 static struct thread
*perf_session__register_idle_thread(struct perf_session
*self
)
589 struct thread
*thread
= perf_session__findnew(self
, 0);
591 if (thread
== NULL
|| thread__set_comm(thread
, "swapper")) {
592 pr_err("problem inserting idle task.\n");
599 int do_read(int fd
, void *buf
, size_t size
)
601 void *buf_start
= buf
;
604 int ret
= read(fd
, buf
, size
);
613 return buf
- buf_start
;
616 #define session_done() (*(volatile int *)(&session_done))
617 volatile int session_done
;
619 static int __perf_session__process_pipe_events(struct perf_session
*self
,
620 struct perf_event_ops
*ops
)
629 perf_event_ops__fill_defaults(ops
);
633 err
= do_read(self
->fd
, &event
, sizeof(struct perf_event_header
));
638 pr_err("failed to read event header\n");
642 if (self
->header
.needs_swap
)
643 perf_event_header__bswap(&event
.header
);
645 size
= event
.header
.size
;
650 p
+= sizeof(struct perf_event_header
);
652 err
= do_read(self
->fd
, p
, size
- sizeof(struct perf_event_header
));
655 pr_err("unexpected end of event stream\n");
659 pr_err("failed to read event data\n");
664 (skip
= perf_session__process_event(self
, &event
, ops
,
666 dump_printf("%#Lx [%#x]: skipping unknown header type: %d\n",
667 head
, event
.header
.size
, event
.header
.type
);
669 * assume we lost track of the stream, check alignment, and
670 * increment a single u64 in the hope to catch on again 'soon'.
672 if (unlikely(head
& 7))
680 dump_printf("\n%#Lx [%#x]: event: %d\n",
681 head
, event
.header
.size
, event
.header
.type
);
694 int __perf_session__process_events(struct perf_session
*self
,
695 u64 data_offset
, u64 data_size
,
696 u64 file_size
, struct perf_event_ops
*ops
)
698 int err
, mmap_prot
, mmap_flags
;
705 struct ui_progress
*progress
= ui_progress__new("Processing events...",
707 if (progress
== NULL
)
710 perf_event_ops__fill_defaults(ops
);
712 page_size
= sysconf(_SC_PAGESIZE
);
715 shift
= page_size
* (head
/ page_size
);
719 mmap_prot
= PROT_READ
;
720 mmap_flags
= MAP_SHARED
;
722 if (self
->header
.needs_swap
) {
723 mmap_prot
|= PROT_WRITE
;
724 mmap_flags
= MAP_PRIVATE
;
727 buf
= mmap(NULL
, page_size
* self
->mmap_window
, mmap_prot
,
728 mmap_flags
, self
->fd
, offset
);
729 if (buf
== MAP_FAILED
) {
730 pr_err("failed to mmap file\n");
736 event
= (event_t
*)(buf
+ head
);
737 ui_progress__update(progress
, offset
);
739 if (self
->header
.needs_swap
)
740 perf_event_header__bswap(&event
->header
);
741 size
= event
->header
.size
;
745 if (head
+ event
->header
.size
>= page_size
* self
->mmap_window
) {
748 shift
= page_size
* (head
/ page_size
);
750 munmap_ret
= munmap(buf
, page_size
* self
->mmap_window
);
751 assert(munmap_ret
== 0);
758 size
= event
->header
.size
;
760 dump_printf("\n%#Lx [%#x]: event: %d\n",
761 offset
+ head
, event
->header
.size
, event
->header
.type
);
764 perf_session__process_event(self
, event
, ops
, offset
, head
) < 0) {
765 dump_printf("%#Lx [%#x]: skipping unknown header type: %d\n",
766 offset
+ head
, event
->header
.size
,
769 * assume we lost track of the stream, check alignment, and
770 * increment a single u64 in the hope to catch on again 'soon'.
772 if (unlikely(head
& 7))
780 if (offset
+ head
>= data_offset
+ data_size
)
783 if (offset
+ head
< file_size
)
787 /* do the final flush for ordered samples */
788 self
->ordered_samples
.flush_limit
= ULLONG_MAX
;
789 flush_sample_queue(self
, ops
);
791 ui_progress__delete(progress
);
795 int perf_session__process_events(struct perf_session
*self
,
796 struct perf_event_ops
*ops
)
800 if (perf_session__register_idle_thread(self
) == NULL
)
803 if (!symbol_conf
.full_paths
) {
806 if (getcwd(bf
, sizeof(bf
)) == NULL
) {
809 pr_err("failed to get the current directory\n");
812 self
->cwd
= strdup(bf
);
813 if (self
->cwd
== NULL
) {
817 self
->cwdlen
= strlen(self
->cwd
);
821 err
= __perf_session__process_events(self
,
822 self
->header
.data_offset
,
823 self
->header
.data_size
,
826 err
= __perf_session__process_pipe_events(self
, ops
);
831 bool perf_session__has_traces(struct perf_session
*self
, const char *msg
)
833 if (!(self
->sample_type
& PERF_SAMPLE_RAW
)) {
834 pr_err("No trace sample to read. Did you call 'perf %s'?\n", msg
);
841 int perf_session__set_kallsyms_ref_reloc_sym(struct map
**maps
,
842 const char *symbol_name
,
847 struct ref_reloc_sym
*ref
;
849 ref
= zalloc(sizeof(struct ref_reloc_sym
));
853 ref
->name
= strdup(symbol_name
);
854 if (ref
->name
== NULL
) {
859 bracket
= strchr(ref
->name
, ']');
865 for (i
= 0; i
< MAP__NR_TYPES
; ++i
) {
866 struct kmap
*kmap
= map__kmap(maps
[i
]);
867 kmap
->ref_reloc_sym
= ref
;