2 * perf record: Record the profile of a workload (or a CPU, or a PID) into
3 * the perf.data output file - for later analysis via perf report.
8 #include "util/parse-options.h"
9 #include "util/parse-events.h"
10 #include "util/string.h"
15 #define ALIGN(x, a) __ALIGN_MASK(x, (typeof(x))(a)-1)
16 #define __ALIGN_MASK(x, mask) (((x)+(mask))&~(mask))
18 static int default_interval
= 100000;
19 static int event_count
[MAX_COUNTERS
];
21 static int fd
[MAX_NR_CPUS
][MAX_COUNTERS
];
22 static int nr_cpus
= 0;
23 static unsigned int page_size
;
24 static unsigned int mmap_pages
= 128;
26 static const char *output_name
= "perf.data";
28 static unsigned int realtime_prio
= 0;
29 static int system_wide
= 0;
30 static pid_t target_pid
= -1;
31 static int inherit
= 1;
33 static int append_file
= 0;
35 const unsigned int default_count
[] = {
51 static unsigned int mmap_read_head(struct mmap_data
*md
)
53 struct perf_counter_mmap_page
*pc
= md
->base
;
63 static struct timeval last_read
, this_read
;
65 static void mmap_read(struct mmap_data
*md
)
67 unsigned int head
= mmap_read_head(md
);
68 unsigned int old
= md
->prev
;
69 unsigned char *data
= md
->base
+ page_size
;
74 gettimeofday(&this_read
, NULL
);
77 * If we're further behind than half the buffer, there's a chance
78 * the writer will bite our tail and screw up the events under us.
80 * If we somehow ended up ahead of the head, we got messed up.
82 * In either case, truncate and restart at head.
85 if (diff
> md
->mask
/ 2 || diff
< 0) {
89 timersub(&this_read
, &last_read
, &iv
);
90 msecs
= iv
.tv_sec
*1000 + iv
.tv_usec
/1000;
92 fprintf(stderr
, "WARNING: failed to keep up with mmap data."
93 " Last read %lu msecs ago.\n", msecs
);
96 * head points to a known good entry, start there.
101 last_read
= this_read
;
108 if ((old
& md
->mask
) + size
!= (head
& md
->mask
)) {
109 buf
= &data
[old
& md
->mask
];
110 size
= md
->mask
+ 1 - (old
& md
->mask
);
113 int ret
= write(output
, buf
, size
);
115 perror("failed to write");
123 buf
= &data
[old
& md
->mask
];
127 int ret
= write(output
, buf
, size
);
129 perror("failed to write");
139 static volatile int done
= 0;
141 static void sig_handler(int sig
)
146 static struct pollfd event_array
[MAX_NR_CPUS
* MAX_COUNTERS
];
147 static struct mmap_data mmap_array
[MAX_NR_CPUS
][MAX_COUNTERS
];
153 struct perf_event_header header
;
159 char filename
[PATH_MAX
];
163 struct perf_event_header header
;
169 static void pid_synthesize_comm_event(pid_t pid
, int full
)
171 struct comm_event comm_ev
;
172 char filename
[PATH_MAX
];
178 struct dirent dirent
, *next
;
180 snprintf(filename
, sizeof(filename
), "/proc/%d/stat", pid
);
182 fd
= open(filename
, O_RDONLY
);
184 fprintf(stderr
, "couldn't open %s\n", filename
);
187 if (read(fd
, bf
, sizeof(bf
)) < 0) {
188 fprintf(stderr
, "couldn't read %s\n", filename
);
193 /* 9027 (cat) R 6747 9027 6747 34816 9027 ... */
194 memset(&comm_ev
, 0, sizeof(comm_ev
));
195 field
= strchr(bf
, '(');
198 sep
= strchr(++field
, ')');
202 memcpy(comm_ev
.comm
, field
, size
++);
205 comm_ev
.header
.type
= PERF_EVENT_COMM
;
206 size
= ALIGN(size
, sizeof(uint64_t));
207 comm_ev
.header
.size
= sizeof(comm_ev
) - (sizeof(comm_ev
.comm
) - size
);
212 ret
= write(output
, &comm_ev
, comm_ev
.header
.size
);
214 perror("failed to write");
220 snprintf(filename
, sizeof(filename
), "/proc/%d/task", pid
);
222 tasks
= opendir(filename
);
223 while (!readdir_r(tasks
, &dirent
, &next
) && next
) {
225 pid
= strtol(dirent
.d_name
, &end
, 10);
231 ret
= write(output
, &comm_ev
, comm_ev
.header
.size
);
233 perror("failed to write");
241 fprintf(stderr
, "couldn't get COMM and pgid, malformed %s\n",
246 static void pid_synthesize_mmap_events(pid_t pid
)
248 char filename
[PATH_MAX
];
251 snprintf(filename
, sizeof(filename
), "/proc/%d/maps", pid
);
253 fp
= fopen(filename
, "r");
255 fprintf(stderr
, "couldn't open %s\n", filename
);
259 char bf
[BUFSIZ
], *pbf
= bf
;
260 struct mmap_event mmap_ev
= {
261 .header
.type
= PERF_EVENT_MMAP
,
265 if (fgets(bf
, sizeof(bf
), fp
) == NULL
)
268 /* 00400000-0040c000 r-xp 00000000 fd:01 41038 /bin/cat */
269 n
= hex2u64(pbf
, &mmap_ev
.start
);
273 n
= hex2u64(pbf
, &mmap_ev
.len
);
277 if (*pbf
== 'x') { /* vm_exec */
278 char *execname
= strrchr(bf
, ' ');
280 if (execname
== NULL
|| execname
[1] != '/')
284 size
= strlen(execname
);
285 execname
[size
- 1] = '\0'; /* Remove \n */
286 memcpy(mmap_ev
.filename
, execname
, size
);
287 size
= ALIGN(size
, sizeof(uint64_t));
288 mmap_ev
.len
-= mmap_ev
.start
;
289 mmap_ev
.header
.size
= (sizeof(mmap_ev
) -
290 (sizeof(mmap_ev
.filename
) - size
));
294 if (write(output
, &mmap_ev
, mmap_ev
.header
.size
) < 0) {
295 perror("failed to write");
304 static void synthesize_events(void)
307 struct dirent dirent
, *next
;
309 proc
= opendir("/proc");
311 while (!readdir_r(proc
, &dirent
, &next
) && next
) {
315 pid
= strtol(dirent
.d_name
, &end
, 10);
316 if (*end
) /* only interested in proper numerical dirents */
319 pid_synthesize_comm_event(pid
, 1);
320 pid_synthesize_mmap_events(pid
);
326 static void open_counters(int cpu
, pid_t pid
)
328 struct perf_counter_attr attr
;
329 int counter
, group_fd
;
333 pid_synthesize_comm_event(pid
, 0);
334 pid_synthesize_mmap_events(pid
);
338 for (counter
= 0; counter
< nr_counters
; counter
++) {
340 memset(&attr
, 0, sizeof(attr
));
341 attr
.config
= event_id
[counter
];
342 attr
.sample_period
= event_count
[counter
];
343 attr
.sample_type
= PERF_SAMPLE_IP
| PERF_SAMPLE_TID
;
346 attr
.inherit
= (cpu
< 0) && inherit
;
348 track
= 0; // only the first counter needs these
350 fd
[nr_cpu
][counter
] =
351 sys_perf_counter_open(&attr
, pid
, cpu
, group_fd
, 0);
353 if (fd
[nr_cpu
][counter
] < 0) {
355 printf("kerneltop error: syscall returned with %d (%s)\n",
356 fd
[nr_cpu
][counter
], strerror(err
));
358 printf("Are you root?\n");
361 assert(fd
[nr_cpu
][counter
] >= 0);
362 fcntl(fd
[nr_cpu
][counter
], F_SETFL
, O_NONBLOCK
);
365 * First counter acts as the group leader:
367 if (group
&& group_fd
== -1)
368 group_fd
= fd
[nr_cpu
][counter
];
370 event_array
[nr_poll
].fd
= fd
[nr_cpu
][counter
];
371 event_array
[nr_poll
].events
= POLLIN
;
374 mmap_array
[nr_cpu
][counter
].counter
= counter
;
375 mmap_array
[nr_cpu
][counter
].prev
= 0;
376 mmap_array
[nr_cpu
][counter
].mask
= mmap_pages
*page_size
- 1;
377 mmap_array
[nr_cpu
][counter
].base
= mmap(NULL
, (mmap_pages
+1)*page_size
,
378 PROT_READ
, MAP_SHARED
, fd
[nr_cpu
][counter
], 0);
379 if (mmap_array
[nr_cpu
][counter
].base
== MAP_FAILED
) {
380 printf("kerneltop error: failed to mmap with %d (%s)\n",
381 errno
, strerror(errno
));
388 static int __cmd_record(int argc
, const char **argv
)
396 page_size
= sysconf(_SC_PAGE_SIZE
);
397 nr_cpus
= sysconf(_SC_NPROCESSORS_ONLN
);
398 assert(nr_cpus
<= MAX_NR_CPUS
);
399 assert(nr_cpus
>= 0);
401 if (!stat(output_name
, &st
) && !force
&& !append_file
) {
402 fprintf(stderr
, "Error, output file %s exists, use -A to append or -f to overwrite.\n",
407 flags
= O_CREAT
|O_RDWR
;
413 output
= open(output_name
, flags
, S_IRUSR
|S_IWUSR
);
415 perror("failed to create output file");
420 open_counters(-1, target_pid
!= -1 ? target_pid
: 0);
421 } else for (i
= 0; i
< nr_cpus
; i
++)
422 open_counters(i
, target_pid
);
424 signal(SIGCHLD
, sig_handler
);
425 signal(SIGINT
, sig_handler
);
427 if (target_pid
== -1 && argc
) {
430 perror("failed to fork");
433 if (execvp(argv
[0], (char **)argv
)) {
441 struct sched_param param
;
443 param
.sched_priority
= realtime_prio
;
444 if (sched_setscheduler(0, SCHED_FIFO
, ¶m
)) {
445 printf("Could not set realtime priority.\n");
456 for (i
= 0; i
< nr_cpu
; i
++) {
457 for (counter
= 0; counter
< nr_counters
; counter
++)
458 mmap_read(&mmap_array
[i
][counter
]);
462 ret
= poll(event_array
, nr_poll
, 100);
468 static const char * const record_usage
[] = {
469 "perf record [<options>] [<command>]",
470 "perf record [<options>] -- <command> [<options>]",
474 static char events_help_msg
[EVENTS_HELP_MAX
];
476 static const struct option options
[] = {
477 OPT_CALLBACK('e', "event", NULL
, "event",
478 events_help_msg
, parse_events
),
479 OPT_INTEGER('p', "pid", &target_pid
,
480 "record events on existing pid"),
481 OPT_INTEGER('r', "realtime", &realtime_prio
,
482 "collect data with this RT SCHED_FIFO priority"),
483 OPT_BOOLEAN('a', "all-cpus", &system_wide
,
484 "system-wide collection from all CPUs"),
485 OPT_BOOLEAN('A', "append", &append_file
,
486 "append to the output file to do incremental profiling"),
487 OPT_BOOLEAN('f', "force", &force
,
488 "overwrite existing data file"),
489 OPT_INTEGER('c', "count", &default_interval
,
490 "event period to sample"),
491 OPT_STRING('o', "output", &output_name
, "file",
493 OPT_BOOLEAN('i', "inherit", &inherit
,
494 "child tasks inherit counters"),
495 OPT_INTEGER('m', "mmap-pages", &mmap_pages
,
496 "number of mmap data pages"),
500 int cmd_record(int argc
, const char **argv
, const char *prefix
)
504 create_events_help(events_help_msg
);
506 argc
= parse_options(argc
, argv
, options
, record_usage
, 0);
507 if (!argc
&& target_pid
== -1 && !system_wide
)
508 usage_with_options(record_usage
, options
);
515 for (counter
= 0; counter
< nr_counters
; counter
++) {
516 if (event_count
[counter
])
519 event_count
[counter
] = default_interval
;
522 return __cmd_record(argc
, argv
);