2 * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
4 * Parts came from builtin-{top,stat,record}.c, see those files for further
7 * Released under the GPL v2. (and only v2, not any later version)
16 #include "thread_map.h"
18 #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
19 #define GROUP_FD(group_fd, cpu) (*(int *)xyarray__entry(group_fd, cpu, 0))
21 int __perf_evsel__sample_size(u64 sample_type
)
23 u64 mask
= sample_type
& PERF_SAMPLE_MASK
;
27 for (i
= 0; i
< 64; i
++) {
28 if (mask
& (1ULL << i
))
37 void perf_evsel__init(struct perf_evsel
*evsel
,
38 struct perf_event_attr
*attr
, int idx
)
42 INIT_LIST_HEAD(&evsel
->node
);
43 hists__init(&evsel
->hists
);
46 struct perf_evsel
*perf_evsel__new(struct perf_event_attr
*attr
, int idx
)
48 struct perf_evsel
*evsel
= zalloc(sizeof(*evsel
));
51 perf_evsel__init(evsel
, attr
, idx
);
56 int perf_evsel__alloc_fd(struct perf_evsel
*evsel
, int ncpus
, int nthreads
)
59 evsel
->fd
= xyarray__new(ncpus
, nthreads
, sizeof(int));
62 for (cpu
= 0; cpu
< ncpus
; cpu
++) {
63 for (thread
= 0; thread
< nthreads
; thread
++) {
64 FD(evsel
, cpu
, thread
) = -1;
69 return evsel
->fd
!= NULL
? 0 : -ENOMEM
;
72 int perf_evsel__alloc_id(struct perf_evsel
*evsel
, int ncpus
, int nthreads
)
74 evsel
->sample_id
= xyarray__new(ncpus
, nthreads
, sizeof(struct perf_sample_id
));
75 if (evsel
->sample_id
== NULL
)
78 evsel
->id
= zalloc(ncpus
* nthreads
* sizeof(u64
));
79 if (evsel
->id
== NULL
) {
80 xyarray__delete(evsel
->sample_id
);
81 evsel
->sample_id
= NULL
;
88 int perf_evsel__alloc_counts(struct perf_evsel
*evsel
, int ncpus
)
90 evsel
->counts
= zalloc((sizeof(*evsel
->counts
) +
91 (ncpus
* sizeof(struct perf_counts_values
))));
92 return evsel
->counts
!= NULL
? 0 : -ENOMEM
;
95 void perf_evsel__free_fd(struct perf_evsel
*evsel
)
97 xyarray__delete(evsel
->fd
);
101 void perf_evsel__free_id(struct perf_evsel
*evsel
)
103 xyarray__delete(evsel
->sample_id
);
104 evsel
->sample_id
= NULL
;
109 void perf_evsel__close_fd(struct perf_evsel
*evsel
, int ncpus
, int nthreads
)
113 for (cpu
= 0; cpu
< ncpus
; cpu
++)
114 for (thread
= 0; thread
< nthreads
; ++thread
) {
115 close(FD(evsel
, cpu
, thread
));
116 FD(evsel
, cpu
, thread
) = -1;
120 void perf_evsel__exit(struct perf_evsel
*evsel
)
122 assert(list_empty(&evsel
->node
));
123 xyarray__delete(evsel
->fd
);
124 xyarray__delete(evsel
->sample_id
);
128 void perf_evsel__delete(struct perf_evsel
*evsel
)
130 perf_evsel__exit(evsel
);
131 close_cgroup(evsel
->cgrp
);
136 int __perf_evsel__read_on_cpu(struct perf_evsel
*evsel
,
137 int cpu
, int thread
, bool scale
)
139 struct perf_counts_values count
;
140 size_t nv
= scale
? 3 : 1;
142 if (FD(evsel
, cpu
, thread
) < 0)
145 if (evsel
->counts
== NULL
&& perf_evsel__alloc_counts(evsel
, cpu
+ 1) < 0)
148 if (readn(FD(evsel
, cpu
, thread
), &count
, nv
* sizeof(u64
)) < 0)
154 else if (count
.run
< count
.ena
)
155 count
.val
= (u64
)((double)count
.val
* count
.ena
/ count
.run
+ 0.5);
157 count
.ena
= count
.run
= 0;
159 evsel
->counts
->cpu
[cpu
] = count
;
163 int __perf_evsel__read(struct perf_evsel
*evsel
,
164 int ncpus
, int nthreads
, bool scale
)
166 size_t nv
= scale
? 3 : 1;
168 struct perf_counts_values
*aggr
= &evsel
->counts
->aggr
, count
;
170 aggr
->val
= aggr
->ena
= aggr
->run
= 0;
172 for (cpu
= 0; cpu
< ncpus
; cpu
++) {
173 for (thread
= 0; thread
< nthreads
; thread
++) {
174 if (FD(evsel
, cpu
, thread
) < 0)
177 if (readn(FD(evsel
, cpu
, thread
),
178 &count
, nv
* sizeof(u64
)) < 0)
181 aggr
->val
+= count
.val
;
183 aggr
->ena
+= count
.ena
;
184 aggr
->run
+= count
.run
;
189 evsel
->counts
->scaled
= 0;
191 if (aggr
->run
== 0) {
192 evsel
->counts
->scaled
= -1;
197 if (aggr
->run
< aggr
->ena
) {
198 evsel
->counts
->scaled
= 1;
199 aggr
->val
= (u64
)((double)aggr
->val
* aggr
->ena
/ aggr
->run
+ 0.5);
202 aggr
->ena
= aggr
->run
= 0;
207 static int __perf_evsel__open(struct perf_evsel
*evsel
, struct cpu_map
*cpus
,
208 struct thread_map
*threads
, bool group
,
209 struct xyarray
*group_fds
)
212 unsigned long flags
= 0;
215 if (evsel
->fd
== NULL
&&
216 perf_evsel__alloc_fd(evsel
, cpus
->nr
, threads
->nr
) < 0)
220 flags
= PERF_FLAG_PID_CGROUP
;
221 pid
= evsel
->cgrp
->fd
;
224 for (cpu
= 0; cpu
< cpus
->nr
; cpu
++) {
225 int group_fd
= group_fds
? GROUP_FD(group_fds
, cpu
) : -1;
227 for (thread
= 0; thread
< threads
->nr
; thread
++) {
230 pid
= threads
->map
[thread
];
232 FD(evsel
, cpu
, thread
) = sys_perf_event_open(&evsel
->attr
,
236 if (FD(evsel
, cpu
, thread
) < 0) {
241 if (group
&& group_fd
== -1)
242 group_fd
= FD(evsel
, cpu
, thread
);
250 while (--thread
>= 0) {
251 close(FD(evsel
, cpu
, thread
));
252 FD(evsel
, cpu
, thread
) = -1;
254 thread
= threads
->nr
;
255 } while (--cpu
>= 0);
259 void perf_evsel__close(struct perf_evsel
*evsel
, int ncpus
, int nthreads
)
261 if (evsel
->fd
== NULL
)
264 perf_evsel__close_fd(evsel
, ncpus
, nthreads
);
265 perf_evsel__free_fd(evsel
);
278 struct thread_map map
;
280 } empty_thread_map
= {
285 int perf_evsel__open(struct perf_evsel
*evsel
, struct cpu_map
*cpus
,
286 struct thread_map
*threads
, bool group
,
287 struct xyarray
*group_fd
)
290 /* Work around old compiler warnings about strict aliasing */
291 cpus
= &empty_cpu_map
.map
;
295 threads
= &empty_thread_map
.map
;
297 return __perf_evsel__open(evsel
, cpus
, threads
, group
, group_fd
);
300 int perf_evsel__open_per_cpu(struct perf_evsel
*evsel
,
301 struct cpu_map
*cpus
, bool group
,
302 struct xyarray
*group_fd
)
304 return __perf_evsel__open(evsel
, cpus
, &empty_thread_map
.map
, group
,
308 int perf_evsel__open_per_thread(struct perf_evsel
*evsel
,
309 struct thread_map
*threads
, bool group
,
310 struct xyarray
*group_fd
)
312 return __perf_evsel__open(evsel
, &empty_cpu_map
.map
, threads
, group
,
316 static int perf_event__parse_id_sample(const union perf_event
*event
, u64 type
,
317 struct perf_sample
*sample
)
319 const u64
*array
= event
->sample
.array
;
321 array
+= ((event
->header
.size
-
322 sizeof(event
->header
)) / sizeof(u64
)) - 1;
324 if (type
& PERF_SAMPLE_CPU
) {
325 u32
*p
= (u32
*)array
;
330 if (type
& PERF_SAMPLE_STREAM_ID
) {
331 sample
->stream_id
= *array
;
335 if (type
& PERF_SAMPLE_ID
) {
340 if (type
& PERF_SAMPLE_TIME
) {
341 sample
->time
= *array
;
345 if (type
& PERF_SAMPLE_TID
) {
346 u32
*p
= (u32
*)array
;
354 static bool sample_overlap(const union perf_event
*event
,
355 const void *offset
, u64 size
)
357 const void *base
= event
;
359 if (offset
+ size
> base
+ event
->header
.size
)
365 int perf_event__parse_sample(const union perf_event
*event
, u64 type
,
366 int sample_size
, bool sample_id_all
,
367 struct perf_sample
*data
, bool swapped
)
372 * used for cross-endian analysis. See git commit 65014ab3
373 * for why this goofiness is needed.
381 data
->cpu
= data
->pid
= data
->tid
= -1;
382 data
->stream_id
= data
->id
= data
->time
= -1ULL;
384 if (event
->header
.type
!= PERF_RECORD_SAMPLE
) {
387 return perf_event__parse_id_sample(event
, type
, data
);
390 array
= event
->sample
.array
;
392 if (sample_size
+ sizeof(event
->header
) > event
->header
.size
)
395 if (type
& PERF_SAMPLE_IP
) {
396 data
->ip
= event
->ip
.ip
;
400 if (type
& PERF_SAMPLE_TID
) {
403 /* undo swap of u64, then swap on individual u32s */
404 u
.val64
= bswap_64(u
.val64
);
405 u
.val32
[0] = bswap_32(u
.val32
[0]);
406 u
.val32
[1] = bswap_32(u
.val32
[1]);
409 data
->pid
= u
.val32
[0];
410 data
->tid
= u
.val32
[1];
414 if (type
& PERF_SAMPLE_TIME
) {
420 if (type
& PERF_SAMPLE_ADDR
) {
426 if (type
& PERF_SAMPLE_ID
) {
431 if (type
& PERF_SAMPLE_STREAM_ID
) {
432 data
->stream_id
= *array
;
436 if (type
& PERF_SAMPLE_CPU
) {
440 /* undo swap of u64, then swap on individual u32s */
441 u
.val64
= bswap_64(u
.val64
);
442 u
.val32
[0] = bswap_32(u
.val32
[0]);
445 data
->cpu
= u
.val32
[0];
449 if (type
& PERF_SAMPLE_PERIOD
) {
450 data
->period
= *array
;
454 if (type
& PERF_SAMPLE_READ
) {
455 fprintf(stderr
, "PERF_SAMPLE_READ is unsuported for now\n");
459 if (type
& PERF_SAMPLE_CALLCHAIN
) {
460 if (sample_overlap(event
, array
, sizeof(data
->callchain
->nr
)))
463 data
->callchain
= (struct ip_callchain
*)array
;
465 if (sample_overlap(event
, array
, data
->callchain
->nr
))
468 array
+= 1 + data
->callchain
->nr
;
471 if (type
& PERF_SAMPLE_RAW
) {
475 if (WARN_ONCE(swapped
,
476 "Endianness of raw data not corrected!\n")) {
477 /* undo swap of u64, then swap on individual u32s */
478 u
.val64
= bswap_64(u
.val64
);
479 u
.val32
[0] = bswap_32(u
.val32
[0]);
480 u
.val32
[1] = bswap_32(u
.val32
[1]);
483 if (sample_overlap(event
, array
, sizeof(u32
)))
486 data
->raw_size
= u
.val32
[0];
487 pdata
= (void *) array
+ sizeof(u32
);
489 if (sample_overlap(event
, pdata
, data
->raw_size
))
492 data
->raw_data
= (void *) pdata
;