tools/perf/util/evsel.c

   1 /*
   2  * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
   3  *
   4  * Parts came from builtin-{top,stat,record}.c, see those files for further
   5  * copyright notes.
   6  *
   7  * Released under the GPL v2. (and only v2, not any later version)
   8  */
   9
  10 #include <byteswap.h>
  11 #include "asm/bug.h"
  12 #include "evsel.h"
  13 #include "evlist.h"
  14 #include "util.h"
  15 #include "cpumap.h"
  16 #include "thread_map.h"
  17
  18 #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
  19 #define GROUP_FD(group_fd, cpu) (*(int *)xyarray__entry(group_fd, cpu, 0))
  20
  21 int __perf_evsel__sample_size(u64 sample_type)
  22 {
  23         u64 mask = sample_type & PERF_SAMPLE_MASK;
  24         int size = 0;
  25         int i;
  26
  27         for (i = 0; i < 64; i++) {
  28                 if (mask & (1ULL << i))
  29                         size++;
  30         }
  31
  32         size *= sizeof(u64);
  33
  34         return size;
  35 }
  36
  37 void perf_evsel__init(struct perf_evsel *evsel,
  38                       struct perf_event_attr *attr, int idx)
  39 {
  40         evsel->idx         = idx;
  41         evsel->attr        = *attr;
  42         INIT_LIST_HEAD(&evsel->node);
  43         hists__init(&evsel->hists);
  44 }
  45
  46 struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr, int idx)
  47 {
  48         struct perf_evsel *evsel = zalloc(sizeof(*evsel));
  49
  50         if (evsel != NULL)
  51                 perf_evsel__init(evsel, attr, idx);
  52
  53         return evsel;
  54 }
  55
  56 int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
  57 {
  58         int cpu, thread;
  59         evsel->fd = xyarray__new(ncpus, nthreads, sizeof(int));
  60
  61         if (evsel->fd) {
  62                 for (cpu = 0; cpu < ncpus; cpu++) {
  63                         for (thread = 0; thread < nthreads; thread++) {
  64                                 FD(evsel, cpu, thread) = -1;
  65                         }
  66                 }
  67         }
  68
  69         return evsel->fd != NULL ? 0 : -ENOMEM;
  70 }
  71
  72 int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads)
  73 {
  74         evsel->sample_id = xyarray__new(ncpus, nthreads, sizeof(struct perf_sample_id));
  75         if (evsel->sample_id == NULL)
  76                 return -ENOMEM;
  77
  78         evsel->id = zalloc(ncpus * nthreads * sizeof(u64));
  79         if (evsel->id == NULL) {
  80                 xyarray__delete(evsel->sample_id);
  81                 evsel->sample_id = NULL;
  82                 return -ENOMEM;
  83         }
  84
  85         return 0;
  86 }
  87
  88 int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus)
  89 {
  90         evsel->counts = zalloc((sizeof(*evsel->counts) +
  91                                 (ncpus * sizeof(struct perf_counts_values))));
  92         return evsel->counts != NULL ? 0 : -ENOMEM;
  93 }
  94
  95 void perf_evsel__free_fd(struct perf_evsel *evsel)
  96 {
  97         xyarray__delete(evsel->fd);
  98         evsel->fd = NULL;
  99 }
 100
 101 void perf_evsel__free_id(struct perf_evsel *evsel)
 102 {
 103         xyarray__delete(evsel->sample_id);
 104         evsel->sample_id = NULL;
 105         free(evsel->id);
 106         evsel->id = NULL;
 107 }
 108
 109 void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
 110 {
 111         int cpu, thread;
 112
 113         for (cpu = 0; cpu < ncpus; cpu++)
 114                 for (thread = 0; thread < nthreads; ++thread) {
 115                         close(FD(evsel, cpu, thread));
 116                         FD(evsel, cpu, thread) = -1;
 117                 }
 118 }
 119
 120 void perf_evsel__exit(struct perf_evsel *evsel)
 121 {
 122         assert(list_empty(&evsel->node));
 123         xyarray__delete(evsel->fd);
 124         xyarray__delete(evsel->sample_id);
 125         free(evsel->id);
 126 }
 127
 128 void perf_evsel__delete(struct perf_evsel *evsel)
 129 {
 130         perf_evsel__exit(evsel);
 131         close_cgroup(evsel->cgrp);
 132         free(evsel->name);
 133         free(evsel);
 134 }
 135
 136 int __perf_evsel__read_on_cpu(struct perf_evsel *evsel,
 137                               int cpu, int thread, bool scale)
 138 {
 139         struct perf_counts_values count;
 140         size_t nv = scale ? 3 : 1;
 141
 142         if (FD(evsel, cpu, thread) < 0)
 143                 return -EINVAL;
 144
 145         if (evsel->counts == NULL && perf_evsel__alloc_counts(evsel, cpu + 1) < 0)
 146                 return -ENOMEM;
 147
 148         if (readn(FD(evsel, cpu, thread), &count, nv * sizeof(u64)) < 0)
 149                 return -errno;
 150
 151         if (scale) {
 152                 if (count.run == 0)
 153                         count.val = 0;
 154                 else if (count.run < count.ena)
 155                         count.val = (u64)((double)count.val * count.ena / count.run + 0.5);
 156         } else
 157                 count.ena = count.run = 0;
 158
 159         evsel->counts->cpu[cpu] = count;
 160         return 0;
 161 }
 162
 163 int __perf_evsel__read(struct perf_evsel *evsel,
 164                        int ncpus, int nthreads, bool scale)
 165 {
 166         size_t nv = scale ? 3 : 1;
 167         int cpu, thread;
 168         struct perf_counts_values *aggr = &evsel->counts->aggr, count;
 169
 170         aggr->val = aggr->ena = aggr->run = 0;
 171
 172         for (cpu = 0; cpu < ncpus; cpu++) {
 173                 for (thread = 0; thread < nthreads; thread++) {
 174                         if (FD(evsel, cpu, thread) < 0)
 175                                 continue;
 176
 177                         if (readn(FD(evsel, cpu, thread),
 178                                   &count, nv * sizeof(u64)) < 0)
 179                                 return -errno;
 180
 181                         aggr->val += count.val;
 182                         if (scale) {
 183                                 aggr->ena += count.ena;
 184                                 aggr->run += count.run;
 185                         }
 186                 }
 187         }
 188
 189         evsel->counts->scaled = 0;
 190         if (scale) {
 191                 if (aggr->run == 0) {
 192                         evsel->counts->scaled = -1;
 193                         aggr->val = 0;
 194                         return 0;
 195                 }
 196
 197                 if (aggr->run < aggr->ena) {
 198                         evsel->counts->scaled = 1;
 199                         aggr->val = (u64)((double)aggr->val * aggr->ena / aggr->run + 0.5);
 200                 }
 201         } else
 202                 aggr->ena = aggr->run = 0;
 203
 204         return 0;
 205 }
 206
 207 static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
 208                               struct thread_map *threads, bool group,
 209                               struct xyarray *group_fds)
 210 {
 211         int cpu, thread;
 212         unsigned long flags = 0;
 213         int pid = -1, err;
 214
 215         if (evsel->fd == NULL &&
 216             perf_evsel__alloc_fd(evsel, cpus->nr, threads->nr) < 0)
 217                 return -ENOMEM;
 218
 219         if (evsel->cgrp) {
 220                 flags = PERF_FLAG_PID_CGROUP;
 221                 pid = evsel->cgrp->fd;
 222         }
 223
 224         for (cpu = 0; cpu < cpus->nr; cpu++) {
 225                 int group_fd = group_fds ? GROUP_FD(group_fds, cpu) : -1;
 226
 227                 for (thread = 0; thread < threads->nr; thread++) {
 228
 229                         if (!evsel->cgrp)
 230                                 pid = threads->map[thread];
 231
 232                         FD(evsel, cpu, thread) = sys_perf_event_open(&evsel->attr,
 233                                                                      pid,
 234                                                                      cpus->map[cpu],
 235                                                                      group_fd, flags);
 236                         if (FD(evsel, cpu, thread) < 0) {
 237                                 err = -errno;
 238                                 goto out_close;
 239                         }
 240
 241                         if (group && group_fd == -1)
 242                                 group_fd = FD(evsel, cpu, thread);
 243                 }
 244         }
 245
 246         return 0;
 247
 248 out_close:
 249         do {
 250                 while (--thread >= 0) {
 251                         close(FD(evsel, cpu, thread));
 252                         FD(evsel, cpu, thread) = -1;
 253                 }
 254                 thread = threads->nr;
 255         } while (--cpu >= 0);
 256         return err;
 257 }
 258
 259 void perf_evsel__close(struct perf_evsel *evsel, int ncpus, int nthreads)
 260 {
 261         if (evsel->fd == NULL)
 262                 return;
 263
 264         perf_evsel__close_fd(evsel, ncpus, nthreads);
 265         perf_evsel__free_fd(evsel);
 266         evsel->fd = NULL;
 267 }
 268
 269 static struct {
 270         struct cpu_map map;
 271         int cpus[1];
 272 } empty_cpu_map = {
 273         .map.nr = 1,
 274         .cpus   = { -1, },
 275 };
 276
 277 static struct {
 278         struct thread_map map;
 279         int threads[1];
 280 } empty_thread_map = {
 281         .map.nr  = 1,
 282         .threads = { -1, },
 283 };
 284
 285 int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
 286                      struct thread_map *threads, bool group,
 287                      struct xyarray *group_fd)
 288 {
 289         if (cpus == NULL) {
 290                 /* Work around old compiler warnings about strict aliasing */
 291                 cpus = &empty_cpu_map.map;
 292         }
 293
 294         if (threads == NULL)
 295                 threads = &empty_thread_map.map;
 296
 297         return __perf_evsel__open(evsel, cpus, threads, group, group_fd);
 298 }
 299
 300 int perf_evsel__open_per_cpu(struct perf_evsel *evsel,
 301                              struct cpu_map *cpus, bool group,
 302                              struct xyarray *group_fd)
 303 {
 304         return __perf_evsel__open(evsel, cpus, &empty_thread_map.map, group,
 305                                   group_fd);
 306 }
 307
 308 int perf_evsel__open_per_thread(struct perf_evsel *evsel,
 309                                 struct thread_map *threads, bool group,
 310                                 struct xyarray *group_fd)
 311 {
 312         return __perf_evsel__open(evsel, &empty_cpu_map.map, threads, group,
 313                                   group_fd);
 314 }
 315
 316 static int perf_event__parse_id_sample(const union perf_event *event, u64 type,
 317                                        struct perf_sample *sample)
 318 {
 319         const u64 *array = event->sample.array;
 320
 321         array += ((event->header.size -
 322                    sizeof(event->header)) / sizeof(u64)) - 1;
 323
 324         if (type & PERF_SAMPLE_CPU) {
 325                 u32 *p = (u32 *)array;
 326                 sample->cpu = *p;
 327                 array--;
 328         }
 329
 330         if (type & PERF_SAMPLE_STREAM_ID) {
 331                 sample->stream_id = *array;
 332                 array--;
 333         }
 334
 335         if (type & PERF_SAMPLE_ID) {
 336                 sample->id = *array;
 337                 array--;
 338         }
 339
 340         if (type & PERF_SAMPLE_TIME) {
 341                 sample->time = *array;
 342                 array--;
 343         }
 344
 345         if (type & PERF_SAMPLE_TID) {
 346                 u32 *p = (u32 *)array;
 347                 sample->pid = p[0];
 348                 sample->tid = p[1];
 349         }
 350
 351         return 0;
 352 }
 353
 354 static bool sample_overlap(const union perf_event *event,
 355                            const void *offset, u64 size)
 356 {
 357         const void *base = event;
 358
 359         if (offset + size > base + event->header.size)
 360                 return true;
 361
 362         return false;
 363 }
 364
 365 int perf_event__parse_sample(const union perf_event *event, u64 type,
 366                              int sample_size, bool sample_id_all,
 367                              struct perf_sample *data, bool swapped)
 368 {
 369         const u64 *array;
 370
 371         /*
 372          * used for cross-endian analysis. See git commit 65014ab3
 373          * for why this goofiness is needed.
 374          */
 375         union {
 376                 u64 val64;
 377                 u32 val32[2];
 378         } u;
 379
 380
 381         data->cpu = data->pid = data->tid = -1;
 382         data->stream_id = data->id = data->time = -1ULL;
 383
 384         if (event->header.type != PERF_RECORD_SAMPLE) {
 385                 if (!sample_id_all)
 386                         return 0;
 387                 return perf_event__parse_id_sample(event, type, data);
 388         }
 389
 390         array = event->sample.array;
 391
 392         if (sample_size + sizeof(event->header) > event->header.size)
 393                 return -EFAULT;
 394
 395         if (type & PERF_SAMPLE_IP) {
 396                 data->ip = event->ip.ip;
 397                 array++;
 398         }
 399
 400         if (type & PERF_SAMPLE_TID) {
 401                 u.val64 = *array;
 402                 if (swapped) {
 403                         /* undo swap of u64, then swap on individual u32s */
 404                         u.val64 = bswap_64(u.val64);
 405                         u.val32[0] = bswap_32(u.val32[0]);
 406                         u.val32[1] = bswap_32(u.val32[1]);
 407                 }
 408
 409                 data->pid = u.val32[0];
 410                 data->tid = u.val32[1];
 411                 array++;
 412         }
 413
 414         if (type & PERF_SAMPLE_TIME) {
 415                 data->time = *array;
 416                 array++;
 417         }
 418
 419         data->addr = 0;
 420         if (type & PERF_SAMPLE_ADDR) {
 421                 data->addr = *array;
 422                 array++;
 423         }
 424
 425         data->id = -1ULL;
 426         if (type & PERF_SAMPLE_ID) {
 427                 data->id = *array;
 428                 array++;
 429         }
 430
 431         if (type & PERF_SAMPLE_STREAM_ID) {
 432                 data->stream_id = *array;
 433                 array++;
 434         }
 435
 436         if (type & PERF_SAMPLE_CPU) {
 437
 438                 u.val64 = *array;
 439                 if (swapped) {
 440                         /* undo swap of u64, then swap on individual u32s */
 441                         u.val64 = bswap_64(u.val64);
 442                         u.val32[0] = bswap_32(u.val32[0]);
 443                 }
 444
 445                 data->cpu = u.val32[0];
 446                 array++;
 447         }
 448
 449         if (type & PERF_SAMPLE_PERIOD) {
 450                 data->period = *array;
 451                 array++;
 452         }
 453
 454         if (type & PERF_SAMPLE_READ) {
 455                 fprintf(stderr, "PERF_SAMPLE_READ is unsuported for now\n");
 456                 return -1;
 457         }
 458
 459         if (type & PERF_SAMPLE_CALLCHAIN) {
 460                 if (sample_overlap(event, array, sizeof(data->callchain->nr)))
 461                         return -EFAULT;
 462
 463                 data->callchain = (struct ip_callchain *)array;
 464
 465                 if (sample_overlap(event, array, data->callchain->nr))
 466                         return -EFAULT;
 467
 468                 array += 1 + data->callchain->nr;
 469         }
 470
 471         if (type & PERF_SAMPLE_RAW) {
 472                 const u64 *pdata;
 473
 474                 u.val64 = *array;
 475                 if (WARN_ONCE(swapped,
 476                               "Endianness of raw data not corrected!\n")) {
 477                         /* undo swap of u64, then swap on individual u32s */
 478                         u.val64 = bswap_64(u.val64);
 479                         u.val32[0] = bswap_32(u.val32[0]);
 480                         u.val32[1] = bswap_32(u.val32[1]);
 481                 }
 482
 483                 if (sample_overlap(event, array, sizeof(u32)))
 484                         return -EFAULT;
 485
 486                 data->raw_size = u.val32[0];
 487                 pdata = (void *) array + sizeof(u32);
 488
 489                 if (sample_overlap(event, pdata, data->raw_size))
 490                         return -EFAULT;
 491
 492                 data->raw_data = (void *) pdata;
 493         }
 494
 495         return 0;
 496 }