(INLINE_SYSCALL): Don't mark asm input operand as clobbered.
[glibc.git] / elf / sprof.c
blob37472ab5164e25c16f5137d41e053ce7dda273d8
1 /* Read and display shared object profiling data.
2 Copyright (C) 1997, 1998, 1999, 2000, 2001 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, write to the Free
18 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19 02111-1307 USA. */
21 #include <argp.h>
22 #include <dlfcn.h>
23 #include <elf.h>
24 #include <error.h>
25 #include <fcntl.h>
26 #include <inttypes.h>
27 #include <libintl.h>
28 #include <locale.h>
29 #include <obstack.h>
30 #include <search.h>
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <string.h>
34 #include <unistd.h>
35 #include <ldsodefs.h>
36 #include <sys/gmon.h>
37 #include <sys/gmon_out.h>
38 #include <sys/mman.h>
39 #include <sys/param.h>
40 #include <sys/stat.h>
42 /* Get libc version number. */
43 #include "../version.h"
45 #define PACKAGE _libc_intl_domainname
48 #include <endian.h>
49 #if BYTE_ORDER == BIG_ENDIAN
50 #define byteorder ELFDATA2MSB
51 #define byteorder_name "big-endian"
52 #elif BYTE_ORDER == LITTLE_ENDIAN
53 #define byteorder ELFDATA2LSB
54 #define byteorder_name "little-endian"
55 #else
56 #error "Unknown BYTE_ORDER " BYTE_ORDER
57 #define byteorder ELFDATANONE
58 #endif
61 extern int __profile_frequency (void);
63 /* Name and version of program. */
64 static void print_version (FILE *stream, struct argp_state *state);
65 void (*argp_program_version_hook) (FILE *, struct argp_state *) = print_version;
67 #define OPT_TEST 1
69 /* Definitions of arguments for argp functions. */
70 static const struct argp_option options[] =
72 { NULL, 0, NULL, 0, N_("Output selection:") },
73 { "call-pairs", 'c', NULL, 0,
74 N_("print list of count paths and their number of use") },
75 { "flat-profile", 'p', NULL, 0,
76 N_("generate flat profile with counts and ticks") },
77 { "graph", 'q', NULL, 0, N_("generate call graph") },
79 { "test", OPT_TEST, NULL, OPTION_HIDDEN, NULL },
80 { NULL, 0, NULL, 0, NULL }
83 /* Short description of program. */
84 static const char doc[] = N_("Read and display shared object profiling data");
86 /* Strings for arguments in help texts. */
87 static const char args_doc[] = N_("SHOBJ [PROFDATA]");
89 /* Prototype for option handler. */
90 static error_t parse_opt (int key, char *arg, struct argp_state *state);
92 /* Data structure to communicate with argp functions. */
93 static struct argp argp =
95 options, parse_opt, args_doc, doc, NULL, NULL
99 /* Operation modes. */
100 static enum
102 NONE = 0,
103 FLAT_MODE = 1 << 0,
104 CALL_GRAPH_MODE = 1 << 1,
105 CALL_PAIRS = 1 << 2,
107 DEFAULT_MODE = FLAT_MODE | CALL_GRAPH_MODE
108 } mode;
110 /* If nonzero the total number of invocations of a function is emitted. */
111 int count_total;
113 /* Nozero for testing. */
114 int do_test;
116 /* Strcuture describing calls. */
117 struct here_fromstruct
119 struct here_cg_arc_record volatile *here;
120 uint16_t link;
123 /* We define a special type to address the elements of the arc table.
124 This is basically the `gmon_cg_arc_record' format but it includes
125 the room for the tag and it uses real types. */
126 struct here_cg_arc_record
128 uintptr_t from_pc;
129 uintptr_t self_pc;
130 uint32_t count;
131 } __attribute__ ((packed));
134 struct known_symbol;
135 struct arc_list
137 size_t idx;
138 uintmax_t count;
140 struct arc_list *next;
143 static struct obstack ob_list;
146 struct known_symbol
148 const char *name;
149 uintptr_t addr;
150 size_t size;
152 uintmax_t ticks;
153 uintmax_t calls;
155 struct arc_list *froms;
156 struct arc_list *tos;
160 struct shobj
162 const char *name; /* User-provided name. */
164 struct link_map *map;
165 const char *dynstrtab; /* Dynamic string table of shared object. */
166 const char *soname; /* Soname of shared object. */
168 uintptr_t lowpc;
169 uintptr_t highpc;
170 unsigned long int kcountsize;
171 size_t expected_size; /* Expected size of profiling file. */
172 size_t tossize;
173 size_t fromssize;
174 size_t fromlimit;
175 unsigned int hashfraction;
176 int s_scale;
178 void *symbol_map;
179 size_t symbol_mapsize;
180 const ElfW(Sym) *symtab;
181 size_t symtab_size;
182 const char *strtab;
184 struct obstack ob_str;
185 struct obstack ob_sym;
189 struct profdata
191 void *addr;
192 off_t size;
194 char *hist;
195 struct gmon_hist_hdr *hist_hdr;
196 uint16_t *kcount;
197 uint32_t narcs; /* Number of arcs in toset. */
198 struct here_cg_arc_record *data;
199 uint16_t *tos;
200 struct here_fromstruct *froms;
203 /* Search tree for symbols. */
204 void *symroot;
205 static struct known_symbol **sortsym;
206 static size_t symidx;
207 static uintmax_t total_ticks;
209 /* Prototypes for local functions. */
210 static struct shobj *load_shobj (const char *name);
211 static void unload_shobj (struct shobj *shobj);
212 static struct profdata *load_profdata (const char *name, struct shobj *shobj);
213 static void unload_profdata (struct profdata *profdata);
214 static void count_total_ticks (struct shobj *shobj, struct profdata *profdata);
215 static void count_calls (struct shobj *shobj, struct profdata *profdata);
216 static void read_symbols (struct shobj *shobj);
217 static void add_arcs (struct profdata *profdata);
218 static void generate_flat_profile (struct profdata *profdata);
219 static void generate_call_graph (struct profdata *profdata);
220 static void generate_call_pair_list (struct profdata *profdata);
224 main (int argc, char *argv[])
226 const char *shobj;
227 const char *profdata;
228 struct shobj *shobj_handle;
229 struct profdata *profdata_handle;
230 int remaining;
232 setlocale (LC_ALL, "");
234 /* Initialize the message catalog. */
235 textdomain (_libc_intl_domainname);
237 /* Parse and process arguments. */
238 argp_parse (&argp, argc, argv, 0, &remaining, NULL);
240 if (argc - remaining == 0 || argc - remaining > 2)
242 /* We need exactly two non-option parameter. */
243 argp_help (&argp, stdout, ARGP_HELP_SEE | ARGP_HELP_EXIT_ERR,
244 program_invocation_short_name);
245 exit (1);
248 /* Get parameters. */
249 shobj = argv[remaining];
250 if (argc - remaining == 2)
251 profdata = argv[remaining + 1];
252 else
253 /* No filename for the profiling data given. We will determine it
254 from the soname of the shobj, later. */
255 profdata = NULL;
257 /* First see whether we can load the shared object. */
258 shobj_handle = load_shobj (shobj);
259 if (shobj_handle == NULL)
260 exit (1);
262 /* We can now determine the filename for the profiling data, if
263 nececessary. */
264 if (profdata == NULL)
266 char *newp;
268 if (shobj_handle->soname == NULL)
270 unload_shobj (shobj_handle);
272 error (EXIT_FAILURE, 0, _("\
273 no filename for profiling data given and shared object `%s' has no soname"),
274 shobj);
277 newp = (char *) alloca (strlen (shobj_handle->soname)
278 + sizeof ".profile");
279 stpcpy (stpcpy (newp, shobj_handle->soname), ".profile");
280 profdata = newp;
283 /* Now see whether the profiling data file matches the given object. */
284 profdata_handle = load_profdata (profdata, shobj_handle);
285 if (profdata_handle == NULL)
287 unload_shobj (shobj_handle);
289 exit (1);
292 read_symbols (shobj_handle);
294 /* Count the ticks. */
295 count_total_ticks (shobj_handle, profdata_handle);
297 /* Count the calls. */
298 count_calls (shobj_handle, profdata_handle);
300 /* Add the arc information. */
301 add_arcs (profdata_handle);
303 /* If no mode is specified fall back to the default mode. */
304 if (mode == NONE)
305 mode = DEFAULT_MODE;
307 /* Do some work. */
308 if (mode & FLAT_MODE)
309 generate_flat_profile (profdata_handle);
311 if (mode & CALL_GRAPH_MODE)
312 generate_call_graph (profdata_handle);
314 if (mode & CALL_PAIRS)
315 generate_call_pair_list (profdata_handle);
317 /* Free the resources. */
318 unload_shobj (shobj_handle);
319 unload_profdata (profdata_handle);
321 return 0;
325 /* Handle program arguments. */
326 static error_t
327 parse_opt (int key, char *arg, struct argp_state *state)
329 switch (key)
331 case 'c':
332 mode |= CALL_PAIRS;
333 break;
334 case 'p':
335 mode |= FLAT_MODE;
336 break;
337 case 'q':
338 mode |= CALL_GRAPH_MODE;
339 break;
340 case OPT_TEST:
341 do_test = 1;
342 break;
343 default:
344 return ARGP_ERR_UNKNOWN;
346 return 0;
350 /* Print the version information. */
351 static void
352 print_version (FILE *stream, struct argp_state *state)
354 fprintf (stream, "sprof (GNU %s) %s\n", PACKAGE, VERSION);
355 fprintf (stream, gettext ("\
356 Copyright (C) %s Free Software Foundation, Inc.\n\
357 This is free software; see the source for copying conditions. There is NO\n\
358 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\
360 "2001");
361 fprintf (stream, gettext ("Written by %s.\n"), "Ulrich Drepper");
365 /* Note that we must not use `dlopen' etc. The shobj object must not
366 be loaded for use. */
367 static struct shobj *
368 load_shobj (const char *name)
370 struct link_map *map = NULL;
371 struct shobj *result;
372 ElfW(Addr) mapstart = ~((ElfW(Addr)) 0);
373 ElfW(Addr) mapend = 0;
374 const ElfW(Phdr) *ph;
375 size_t textsize;
376 unsigned int log_hashfraction;
377 ElfW(Ehdr) *ehdr;
378 int fd;
379 ElfW(Shdr) *shdr;
380 void *ptr;
381 size_t pagesize = getpagesize ();
382 const char *shstrtab;
383 int idx;
384 ElfW(Shdr) *symtab_entry;
386 /* Since we use dlopen() we must be prepared to work around the sometimes
387 strange lookup rules for the shared objects. If we have a file foo.so
388 in the current directory and the user specfies foo.so on the command
389 line (without specifying a directory) we should load the file in the
390 current directory even if a normal dlopen() call would read the other
391 file. We do this by adding a directory portion to the name. */
392 if (strchr (name, '/') == NULL)
394 char *load_name = (char *) alloca (strlen (name) + 3);
395 stpcpy (stpcpy (load_name, "./"), name);
397 map = (struct link_map *) dlopen (load_name, RTLD_LAZY);
399 if (map == NULL)
401 map = (struct link_map *) dlopen (name, RTLD_LAZY);
402 if (map == NULL)
404 error (0, errno, _("failed to load shared object `%s'"), name);
405 return NULL;
409 /* Prepare the result. */
410 result = (struct shobj *) calloc (1, sizeof (struct shobj));
411 if (result == NULL)
413 error (0, errno, _("cannot create internal descriptors"));
414 dlclose (map);
415 return NULL;
417 result->name = name;
418 result->map = map;
420 /* Compute the size of the sections which contain program code.
421 This must match the code in dl-profile.c (_dl_start_profile). */
422 for (ph = map->l_phdr; ph < &map->l_phdr[map->l_phnum]; ++ph)
423 if (ph->p_type == PT_LOAD && (ph->p_flags & PF_X))
425 ElfW(Addr) start = (ph->p_vaddr & ~(pagesize - 1));
426 ElfW(Addr) end = ((ph->p_vaddr + ph->p_memsz + pagesize - 1)
427 & ~(pagesize - 1));
429 if (start < mapstart)
430 mapstart = start;
431 if (end > mapend)
432 mapend = end;
435 result->lowpc = ROUNDDOWN ((uintptr_t) (mapstart + map->l_addr),
436 HISTFRACTION * sizeof (HISTCOUNTER));
437 result->highpc = ROUNDUP ((uintptr_t) (mapend + map->l_addr),
438 HISTFRACTION * sizeof (HISTCOUNTER));
439 if (do_test)
440 printf ("load addr: %0#*" PRIxPTR "\n"
441 "lower bound PC: %0#*" PRIxPTR "\n"
442 "upper bound PC: %0#*" PRIxPTR "\n",
443 __ELF_NATIVE_CLASS == 32 ? 10 : 18, map->l_addr,
444 __ELF_NATIVE_CLASS == 32 ? 10 : 18, result->lowpc,
445 __ELF_NATIVE_CLASS == 32 ? 10 : 18, result->highpc);
447 textsize = result->highpc - result->lowpc;
448 result->kcountsize = textsize / HISTFRACTION;
449 result->hashfraction = HASHFRACTION;
450 if ((HASHFRACTION & (HASHFRACTION - 1)) == 0)
451 /* If HASHFRACTION is a power of two, mcount can use shifting
452 instead of integer division. Precompute shift amount. */
453 log_hashfraction = __builtin_ffs (result->hashfraction
454 * sizeof (struct here_fromstruct)) - 1;
455 else
456 log_hashfraction = -1;
457 if (do_test)
458 printf ("hashfraction = %d\ndivider = %Zu\n",
459 result->hashfraction,
460 result->hashfraction * sizeof (struct here_fromstruct));
461 result->tossize = textsize / HASHFRACTION;
462 result->fromlimit = textsize * ARCDENSITY / 100;
463 if (result->fromlimit < MINARCS)
464 result->fromlimit = MINARCS;
465 if (result->fromlimit > MAXARCS)
466 result->fromlimit = MAXARCS;
467 result->fromssize = result->fromlimit * sizeof (struct here_fromstruct);
469 result->expected_size = (sizeof (struct gmon_hdr)
470 + 4 + sizeof (struct gmon_hist_hdr)
471 + result->kcountsize
472 + 4 + 4
473 + (result->fromssize
474 * sizeof (struct here_cg_arc_record)));
476 if (do_test)
477 printf ("expected size: %Zd\n", result->expected_size);
479 #define SCALE_1_TO_1 0x10000L
481 if (result->kcountsize < result->highpc - result->lowpc)
483 size_t range = result->highpc - result->lowpc;
484 size_t quot = range / result->kcountsize;
486 if (quot >= SCALE_1_TO_1)
487 result->s_scale = 1;
488 else if (quot >= SCALE_1_TO_1 / 256)
489 result->s_scale = SCALE_1_TO_1 / quot;
490 else if (range > ULONG_MAX / 256)
491 result->s_scale = ((SCALE_1_TO_1 * 256)
492 / (range / (result->kcountsize / 256)));
493 else
494 result->s_scale = ((SCALE_1_TO_1 * 256)
495 / ((range * 256) / result->kcountsize));
497 else
498 result->s_scale = SCALE_1_TO_1;
500 if (do_test)
501 printf ("s_scale: %d\n", result->s_scale);
503 /* Determine the dynamic string table. */
504 if (map->l_info[DT_STRTAB] == NULL)
505 result->dynstrtab = NULL;
506 else
507 result->dynstrtab = (const char *) D_PTR (map, l_info[DT_STRTAB]);
508 if (do_test)
509 printf ("string table: %p\n", result->dynstrtab);
511 /* Determine the soname. */
512 if (map->l_info[DT_SONAME] == NULL)
513 result->soname = NULL;
514 else
515 result->soname = result->dynstrtab + map->l_info[DT_SONAME]->d_un.d_val;
516 if (do_test && result->soname != NULL)
517 printf ("soname: %s\n", result->soname);
519 /* Now we have to load the symbol table.
521 First load the section header table. */
522 ehdr = (ElfW(Ehdr) *) map->l_addr;
524 /* Make sure we are on the right party. */
525 if (ehdr->e_shentsize != sizeof (ElfW(Shdr)))
526 abort ();
528 /* And we need the shared object file descriptor again. */
529 fd = open (map->l_name, O_RDONLY);
530 if (fd == -1)
531 /* Dooh, this really shouldn't happen. We know the file is available. */
532 error (EXIT_FAILURE, errno, _("Reopening shared object `%s' failed"),
533 map->l_name);
535 /* Now map the section header. */
536 ptr = mmap (NULL, (ehdr->e_shnum * sizeof (ElfW(Shdr))
537 + (ehdr->e_shoff & (pagesize - 1))), PROT_READ,
538 MAP_SHARED|MAP_FILE, fd, ehdr->e_shoff & ~(pagesize - 1));
539 if (ptr == MAP_FAILED)
540 error (EXIT_FAILURE, errno, _("mapping of section headers failed"));
541 shdr = (ElfW(Shdr) *) ((char *) ptr + (ehdr->e_shoff & (pagesize - 1)));
543 /* Get the section header string table. */
544 ptr = mmap (NULL, (shdr[ehdr->e_shstrndx].sh_size
545 + (shdr[ehdr->e_shstrndx].sh_offset & (pagesize - 1))),
546 PROT_READ, MAP_SHARED|MAP_FILE, fd,
547 shdr[ehdr->e_shstrndx].sh_offset & ~(pagesize - 1));
548 if (ptr == MAP_FAILED)
549 error (EXIT_FAILURE, errno,
550 _("mapping of section header string table failed"));
551 shstrtab = ((const char *) ptr
552 + (shdr[ehdr->e_shstrndx].sh_offset & (pagesize - 1)));
554 /* Search for the ".symtab" section. */
555 symtab_entry = NULL;
556 for (idx = 0; idx < ehdr->e_shnum; ++idx)
557 if (shdr[idx].sh_type == SHT_SYMTAB
558 && strcmp (shstrtab + shdr[idx].sh_name, ".symtab") == 0)
560 symtab_entry = &shdr[idx];
561 break;
564 /* We don't need the section header string table anymore. */
565 munmap (ptr, (shdr[ehdr->e_shstrndx].sh_size
566 + (shdr[ehdr->e_shstrndx].sh_offset & (pagesize - 1))));
568 if (symtab_entry == NULL)
570 fprintf (stderr, _("\
571 *** The file `%s' is stripped: no detailed analysis possible\n"),
572 name);
573 result->symtab = NULL;
574 result->strtab = NULL;
576 else
578 ElfW(Off) min_offset, max_offset;
579 ElfW(Shdr) *strtab_entry;
581 strtab_entry = &shdr[symtab_entry->sh_link];
583 /* Find the minimum and maximum offsets that include both the symbol
584 table and the string table. */
585 if (symtab_entry->sh_offset < strtab_entry->sh_offset)
587 min_offset = symtab_entry->sh_offset & ~(pagesize - 1);
588 max_offset = strtab_entry->sh_offset + strtab_entry->sh_size;
590 else
592 min_offset = strtab_entry->sh_offset & ~(pagesize - 1);
593 max_offset = symtab_entry->sh_offset + symtab_entry->sh_size;
596 result->symbol_map = mmap (NULL, max_offset - min_offset,
597 PROT_READ, MAP_SHARED|MAP_FILE, fd,
598 min_offset);
599 if (result->symbol_map == NULL)
600 error (EXIT_FAILURE, errno, _("failed to load symbol data"));
602 result->symtab
603 = (const ElfW(Sym) *) ((const char *) result->symbol_map
604 + (symtab_entry->sh_offset - min_offset));
605 result->symtab_size = symtab_entry->sh_size;
606 result->strtab = ((const char *) result->symbol_map
607 + (strtab_entry->sh_offset - min_offset));
608 result->symbol_mapsize = max_offset - min_offset;
611 /* Now we also don't need the section header table anymore. */
612 munmap ((char *) shdr - (ehdr->e_shoff & (pagesize - 1)),
613 (ehdr->e_phnum * sizeof (ElfW(Shdr))
614 + (ehdr->e_shoff & (pagesize - 1))));
616 /* Free the descriptor for the shared object. */
617 close (fd);
619 return result;
623 static void
624 unload_shobj (struct shobj *shobj)
626 munmap (shobj->symbol_map, shobj->symbol_mapsize);
627 dlclose (shobj->map);
631 static struct profdata *
632 load_profdata (const char *name, struct shobj *shobj)
634 struct profdata *result;
635 int fd;
636 struct stat st;
637 void *addr;
638 struct gmon_hdr gmon_hdr;
639 struct gmon_hist_hdr hist_hdr;
640 uint32_t *narcsp;
641 size_t fromlimit;
642 struct here_cg_arc_record *data;
643 struct here_fromstruct *froms;
644 uint16_t *tos;
645 size_t fromidx;
646 size_t idx;
648 fd = open (name, O_RDONLY);
649 if (fd == -1)
651 char *ext_name;
653 if (errno != ENOENT || strchr (name, '/') != NULL)
654 /* The file exists but we are not allowed to read it or the
655 file does not exist and the name includes a path
656 specification.. */
657 return NULL;
659 /* A file with the given name does not exist in the current
660 directory, try it in the default location where the profiling
661 files are created. */
662 ext_name = (char *) alloca (strlen (name) + sizeof "/var/tmp/");
663 stpcpy (stpcpy (ext_name, "/var/tmp/"), name);
664 name = ext_name;
666 fd = open (ext_name, O_RDONLY);
667 if (fd == -1)
669 /* Even this file does not exist. */
670 error (0, errno, _("cannot load profiling data"));
671 return NULL;
675 /* We have found the file, now make sure it is the right one for the
676 data file. */
677 if (fstat (fd, &st) < 0)
679 error (0, errno, _("while stat'ing profiling data file"));
680 close (fd);
681 return NULL;
684 if (st.st_size != shobj->expected_size)
686 error (0, 0,
687 _("profiling data file `%s' does not match shared object `%s'"),
688 name, shobj->name);
689 close (fd);
690 return NULL;
693 /* The data file is most probably the right one for our shared
694 object. Map it now. */
695 addr = mmap (NULL, st.st_size, PROT_READ, MAP_SHARED|MAP_FILE, fd, 0);
696 if (addr == MAP_FAILED)
698 error (0, errno, _("failed to mmap the profiling data file"));
699 close (fd);
700 return NULL;
703 /* We don't need the file desriptor anymore. */
704 if (close (fd) < 0)
706 error (0, errno, _("error while closing the profiling data file"));
707 munmap (addr, st.st_size);
708 return NULL;
711 /* Prepare the result. */
712 result = (struct profdata *) calloc (1, sizeof (struct profdata));
713 if (result == NULL)
715 error (0, errno, _("cannot create internal descriptor"));
716 munmap (addr, st.st_size);
717 return NULL;
720 /* Store the address and size so that we can later free the resources. */
721 result->addr = addr;
722 result->size = st.st_size;
724 /* Pointer to data after the header. */
725 result->hist = (char *) ((struct gmon_hdr *) addr + 1);
726 result->hist_hdr = (struct gmon_hist_hdr *) ((char *) result->hist
727 + sizeof (uint32_t));
728 result->kcount = (uint16_t *) ((char *) result->hist + sizeof (uint32_t)
729 + sizeof (struct gmon_hist_hdr));
731 /* Compute pointer to array of the arc information. */
732 narcsp = (uint32_t *) ((char *) result->kcount + shobj->kcountsize
733 + sizeof (uint32_t));
734 result->narcs = *narcsp;
735 result->data = (struct here_cg_arc_record *) ((char *) narcsp
736 + sizeof (uint32_t));
738 /* Create the gmon_hdr we expect or write. */
739 memset (&gmon_hdr, '\0', sizeof (struct gmon_hdr));
740 memcpy (&gmon_hdr.cookie[0], GMON_MAGIC, sizeof (gmon_hdr.cookie));
741 *(int32_t *) gmon_hdr.version = GMON_SHOBJ_VERSION;
743 /* Create the hist_hdr we expect or write. */
744 *(char **) hist_hdr.low_pc = (char *) shobj->lowpc - shobj->map->l_addr;
745 *(char **) hist_hdr.high_pc = (char *) shobj->highpc - shobj->map->l_addr;
746 if (do_test)
747 printf ("low_pc = %p\nhigh_pc = %p\n",
748 *(char **) hist_hdr.low_pc, *(char **) hist_hdr.high_pc);
749 *(int32_t *) hist_hdr.hist_size = shobj->kcountsize / sizeof (HISTCOUNTER);
750 *(int32_t *) hist_hdr.prof_rate = __profile_frequency ();
751 strncpy (hist_hdr.dimen, "seconds", sizeof (hist_hdr.dimen));
752 hist_hdr.dimen_abbrev = 's';
754 /* Test whether the header of the profiling data is ok. */
755 if (memcmp (addr, &gmon_hdr, sizeof (struct gmon_hdr)) != 0
756 || *(uint32_t *) result->hist != GMON_TAG_TIME_HIST
757 || memcmp (result->hist_hdr, &hist_hdr,
758 sizeof (struct gmon_hist_hdr)) != 0
759 || narcsp[-1] != GMON_TAG_CG_ARC)
761 free (result);
762 error (0, 0, _("`%s' is no correct profile data file for `%s'"),
763 name, shobj->name);
764 munmap (addr, st.st_size);
765 return NULL;
768 /* We are pretty sure now that this is a correct input file. Set up
769 the remaining information in the result structure and return. */
770 result->tos = (uint16_t *) calloc (shobj->tossize + shobj->fromssize, 1);
771 if (result->tos == NULL)
773 error (0, errno, _("cannot create internal descriptor"));
774 munmap (addr, st.st_size);
775 free (result);
776 return NULL;
779 result->froms = (struct here_fromstruct *) ((char *) result->tos
780 + shobj->tossize);
781 fromidx = 0;
783 /* Now we have to process all the arc count entries. */
784 fromlimit = shobj->fromlimit;
785 data = result->data;
786 froms = result->froms;
787 tos = result->tos;
788 for (idx = 0; idx < MIN (*narcsp, fromlimit); ++idx)
790 size_t to_index;
791 size_t newfromidx;
792 to_index = (data[idx].self_pc / (shobj->hashfraction * sizeof (*tos)));
793 newfromidx = fromidx++;
794 froms[newfromidx].here = &data[idx];
795 froms[newfromidx].link = tos[to_index];
796 tos[to_index] = newfromidx;
799 return result;
803 static void
804 unload_profdata (struct profdata *profdata)
806 free (profdata->tos);
807 munmap (profdata->addr, profdata->size);
808 free (profdata);
812 static void
813 count_total_ticks (struct shobj *shobj, struct profdata *profdata)
815 volatile uint16_t *kcount = profdata->kcount;
816 size_t maxkidx = shobj->kcountsize;
817 size_t factor = 2 * (65536 / shobj->s_scale);
818 size_t kidx = 0;
819 size_t sidx = 0;
821 while (sidx < symidx)
823 uintptr_t start = sortsym[sidx]->addr;
824 uintptr_t end = start + sortsym[sidx]->size;
826 while (kidx < maxkidx && factor * kidx < start)
827 ++kidx;
828 if (kidx == maxkidx)
829 break;
831 while (kidx < maxkidx && factor * kidx < end)
832 sortsym[sidx]->ticks += kcount[kidx++];
833 if (kidx == maxkidx)
834 break;
836 total_ticks += sortsym[sidx++]->ticks;
841 static size_t
842 find_symbol (uintptr_t addr)
844 size_t sidx = 0;
846 while (sidx < symidx)
848 uintptr_t start = sortsym[sidx]->addr;
849 uintptr_t end = start + sortsym[sidx]->size;
851 if (addr >= start && addr < end)
852 return sidx;
854 if (addr < start)
855 break;
857 ++sidx;
860 return (size_t) -1l;
864 static void
865 count_calls (struct shobj *shobj, struct profdata *profdata)
867 struct here_cg_arc_record *data = profdata->data;
868 uint32_t narcs = profdata->narcs;
869 uint32_t cnt;
871 for (cnt = 0; cnt < narcs; ++cnt)
873 uintptr_t here = data[cnt].self_pc;
874 size_t symbol_idx;
876 /* Find the symbol for this address. */
877 symbol_idx = find_symbol (here);
878 if (symbol_idx != (size_t) -1l)
879 sortsym[symbol_idx]->calls += data[cnt].count;
884 static int
885 symorder (const void *o1, const void *o2)
887 const struct known_symbol *p1 = (const struct known_symbol *) o1;
888 const struct known_symbol *p2 = (const struct known_symbol *) o2;
890 return p1->addr - p2->addr;
894 static void
895 printsym (const void *node, VISIT value, int level)
897 if (value == leaf || value == postorder)
898 sortsym[symidx++] = *(struct known_symbol **) node;
902 static void
903 read_symbols (struct shobj *shobj)
905 int n = 0;
907 /* Initialize the obstacks. */
908 #define obstack_chunk_alloc malloc
909 #define obstack_chunk_free free
910 obstack_init (&shobj->ob_str);
911 obstack_init (&shobj->ob_sym);
912 obstack_init (&ob_list);
914 /* Process the symbols. */
915 if (shobj->symtab != NULL)
917 const ElfW(Sym) *sym = shobj->symtab;
918 const ElfW(Sym) *sym_end
919 = (const ElfW(Sym) *) ((const char *) sym + shobj->symtab_size);
920 for (; sym < sym_end; sym++)
921 if ((ELFW(ST_TYPE) (sym->st_info) == STT_FUNC
922 || ELFW(ST_TYPE) (sym->st_info) == STT_NOTYPE)
923 && sym->st_size != 0)
925 struct known_symbol **existp;
926 struct known_symbol *newsym
927 = (struct known_symbol *) obstack_alloc (&shobj->ob_sym,
928 sizeof (*newsym));
929 if (newsym == NULL)
930 error (EXIT_FAILURE, errno, _("cannot allocate symbol data"));
932 newsym->name = &shobj->strtab[sym->st_name];
933 newsym->addr = sym->st_value;
934 newsym->size = sym->st_size;
935 newsym->ticks = 0;
936 newsym->calls = 0;
938 existp = tfind (newsym, &symroot, symorder);
939 if (existp == NULL)
941 /* New function. */
942 tsearch (newsym, &symroot, symorder);
943 ++n;
945 else
947 /* The function is already defined. See whether we have
948 a better name here. */
949 if ((*existp)->name[0] == '_' && newsym->name[0] != '_')
950 *existp = newsym;
951 else
952 /* We don't need the allocated memory. */
953 obstack_free (&shobj->ob_sym, newsym);
957 else
959 /* Blarg, the binary is stripped. We have to rely on the
960 information contained in the dynamic section of the object. */
961 const ElfW(Sym) *symtab = (ElfW(Sym) *) D_PTR (shobj->map,
962 l_info[DT_SYMTAB]);
963 const char *strtab = (const char *) D_PTR (shobj->map,
964 l_info[DT_STRTAB]);
966 /* We assume that the string table follows the symbol table,
967 because there is no way in ELF to know the size of the
968 dynamic symbol table without looking at the section headers. */
969 while ((void *) symtab < (void *) strtab)
971 if ((ELFW(ST_TYPE)(symtab->st_info) == STT_FUNC
972 || ELFW(ST_TYPE)(symtab->st_info) == STT_NOTYPE)
973 && symtab->st_size != 0)
975 struct known_symbol *newsym;
976 struct known_symbol **existp;
978 newsym =
979 (struct known_symbol *) obstack_alloc (&shobj->ob_sym,
980 sizeof (*newsym));
981 if (newsym == NULL)
982 error (EXIT_FAILURE, errno, _("cannot allocate symbol data"));
984 newsym->name = &strtab[symtab->st_name];
985 newsym->addr = symtab->st_value;
986 newsym->size = symtab->st_size;
987 newsym->ticks = 0;
988 newsym->froms = NULL;
989 newsym->tos = NULL;
991 existp = tfind (newsym, &symroot, symorder);
992 if (existp == NULL)
994 /* New function. */
995 tsearch (newsym, &symroot, symorder);
996 ++n;
998 else
1000 /* The function is already defined. See whether we have
1001 a better name here. */
1002 if ((*existp)->name[0] == '_' && newsym->name[0] != '_')
1003 *existp = newsym;
1004 else
1005 /* We don't need the allocated memory. */
1006 obstack_free (&shobj->ob_sym, newsym);
1010 ++symtab;
1014 sortsym = malloc (n * sizeof (struct known_symbol *));
1015 if (sortsym == NULL)
1016 abort ();
1018 twalk (symroot, printsym);
1022 static void
1023 add_arcs (struct profdata *profdata)
1025 uint32_t narcs = profdata->narcs;
1026 struct here_cg_arc_record *data = profdata->data;
1027 uint32_t cnt;
1029 for (cnt = 0; cnt < narcs; ++cnt)
1031 /* First add the incoming arc. */
1032 size_t sym_idx = find_symbol (data[cnt].self_pc);
1034 if (sym_idx != (size_t) -1l)
1036 struct known_symbol *sym = sortsym[sym_idx];
1037 struct arc_list *runp = sym->froms;
1039 while (runp != NULL
1040 && ((data[cnt].from_pc == 0 && runp->idx != (size_t) -1l)
1041 || (data[cnt].from_pc != 0
1042 && (runp->idx == (size_t) -1l
1043 || data[cnt].from_pc < sortsym[runp->idx]->addr
1044 || (data[cnt].from_pc
1045 >= (sortsym[runp->idx]->addr
1046 + sortsym[runp->idx]->size))))))
1047 runp = runp->next;
1049 if (runp == NULL)
1051 /* We need a new entry. */
1052 struct arc_list *newp = (struct arc_list *)
1053 obstack_alloc (&ob_list, sizeof (struct arc_list));
1055 if (data[cnt].from_pc == 0)
1056 newp->idx = (size_t) -1l;
1057 else
1058 newp->idx = find_symbol (data[cnt].from_pc);
1059 newp->count = data[cnt].count;
1060 newp->next = sym->froms;
1061 sym->froms = newp;
1063 else
1064 /* Increment the counter for the found entry. */
1065 runp->count += data[cnt].count;
1068 /* Now add it to the appropriate outgoing list. */
1069 sym_idx = find_symbol (data[cnt].from_pc);
1070 if (sym_idx != (size_t) -1l)
1072 struct known_symbol *sym = sortsym[sym_idx];
1073 struct arc_list *runp = sym->tos;
1075 while (runp != NULL
1076 && (runp->idx == (size_t) -1l
1077 || data[cnt].self_pc < sortsym[runp->idx]->addr
1078 || data[cnt].self_pc >= (sortsym[runp->idx]->addr
1079 + sortsym[runp->idx]->size)))
1080 runp = runp->next;
1082 if (runp == NULL)
1084 /* We need a new entry. */
1085 struct arc_list *newp = (struct arc_list *)
1086 obstack_alloc (&ob_list, sizeof (struct arc_list));
1088 newp->idx = find_symbol (data[cnt].self_pc);
1089 newp->count = data[cnt].count;
1090 newp->next = sym->tos;
1091 sym->tos = newp;
1093 else
1094 /* Increment the counter for the found entry. */
1095 runp->count += data[cnt].count;
1101 static int
1102 countorder (const void *p1, const void *p2)
1104 struct known_symbol *s1 = (struct known_symbol *) p1;
1105 struct known_symbol *s2 = (struct known_symbol *) p2;
1107 if (s1->ticks != s2->ticks)
1108 return (int) (s2->ticks - s1->ticks);
1110 if (s1->calls != s2->calls)
1111 return (int) (s2->calls - s1->calls);
1113 return strcmp (s1->name, s2->name);
1117 static double tick_unit;
1118 static uintmax_t cumu_ticks;
1120 static void
1121 printflat (const void *node, VISIT value, int level)
1123 if (value == leaf || value == postorder)
1125 struct known_symbol *s = *(struct known_symbol **) node;
1127 cumu_ticks += s->ticks;
1129 printf ("%6.2f%10.2f%9.2f%9" PRIdMAX "%9.2f %s\n",
1130 total_ticks ? (100.0 * s->ticks) / total_ticks : 0.0,
1131 tick_unit * cumu_ticks,
1132 tick_unit * s->ticks,
1133 s->calls,
1134 s->calls ? (s->ticks * 1000000) * tick_unit / s->calls : 0,
1135 /* FIXME: don't know about called functions. */
1136 s->name);
1141 /* ARGUSED */
1142 static void
1143 freenoop (void *p)
1148 static void
1149 generate_flat_profile (struct profdata *profdata)
1151 size_t n;
1152 void *data = NULL;
1154 tick_unit = 1.0 / *(uint32_t *) profdata->hist_hdr->prof_rate;
1156 printf ("Flat profile:\n\n"
1157 "Each sample counts as %g %s.\n",
1158 tick_unit, profdata->hist_hdr->dimen);
1159 fputs (" % cumulative self self total\n"
1160 " time seconds seconds calls us/call us/call name\n",
1161 stdout);
1163 for (n = 0; n < symidx; ++n)
1164 if (sortsym[n]->calls != 0 || sortsym[n]->ticks != 0)
1165 tsearch (sortsym[n], &data, countorder);
1167 twalk (data, printflat);
1169 tdestroy (data, freenoop);
1173 static void
1174 generate_call_graph (struct profdata *profdata)
1176 size_t cnt;
1178 puts ("\nindex % time self children called name\n");
1180 for (cnt = 0; cnt < symidx; ++cnt)
1181 if (sortsym[cnt]->froms != NULL || sortsym[cnt]->tos != NULL)
1183 struct arc_list *runp;
1184 size_t n;
1186 /* First print the from-information. */
1187 runp = sortsym[cnt]->froms;
1188 while (runp != NULL)
1190 printf (" %8.2f%8.2f%9" PRIdMAX "/%-9" PRIdMAX " %s",
1191 (runp->idx != (size_t) -1l
1192 ? sortsym[runp->idx]->ticks * tick_unit : 0.0),
1193 0.0, /* FIXME: what's time for the children, recursive */
1194 runp->count, sortsym[cnt]->calls,
1195 (runp->idx != (size_t) -1l ?
1196 sortsym[runp->idx]->name : "<UNKNOWN>"));
1198 if (runp->idx != (size_t) -1l)
1199 printf (" [%Zd]", runp->idx);
1200 putchar_unlocked ('\n');
1202 runp = runp->next;
1205 /* Info abount the function itself. */
1206 n = printf ("[%Zu]", cnt);
1207 printf ("%*s%5.1f%8.2f%8.2f%9" PRIdMAX " %s [%Zd]\n",
1208 (int) (7 - n), " ",
1209 total_ticks ? (100.0 * sortsym[cnt]->ticks) / total_ticks : 0,
1210 sortsym[cnt]->ticks * tick_unit,
1211 0.0, /* FIXME: what's time for the children, recursive */
1212 sortsym[cnt]->calls,
1213 sortsym[cnt]->name, cnt);
1215 /* Info about the functions this function calls. */
1216 runp = sortsym[cnt]->tos;
1217 while (runp != NULL)
1219 printf (" %8.2f%8.2f%9" PRIdMAX "/",
1220 (runp->idx != (size_t) -1l
1221 ? sortsym[runp->idx]->ticks * tick_unit : 0.0),
1222 0.0, /* FIXME: what's time for the children, recursive */
1223 runp->count);
1225 if (runp->idx != (size_t) -1l)
1226 printf ("%-9" PRIdMAX " %s [%Zd]\n",
1227 sortsym[runp->idx]->calls,
1228 sortsym[runp->idx]->name,
1229 runp->idx);
1230 else
1231 fputs ("??? <UNKNOWN>\n\n", stdout);
1233 runp = runp->next;
1236 fputs ("-----------------------------------------------\n", stdout);
1241 static void
1242 generate_call_pair_list (struct profdata *profdata)
1244 size_t cnt;
1246 for (cnt = 0; cnt < symidx; ++cnt)
1247 if (sortsym[cnt]->froms != NULL || sortsym[cnt]->tos != NULL)
1249 struct arc_list *runp;
1251 /* First print the incoming arcs. */
1252 runp = sortsym[cnt]->froms;
1253 while (runp != NULL)
1255 if (runp->idx == (size_t) -1l)
1256 printf ("\
1257 <UNKNOWN> %-34s %9" PRIdMAX "\n",
1258 sortsym[cnt]->name, runp->count);
1259 runp = runp->next;
1262 /* Next the outgoing arcs. */
1263 runp = sortsym[cnt]->tos;
1264 while (runp != NULL)
1266 printf ("%-34s %-34s %9" PRIdMAX "\n",
1267 sortsym[cnt]->name,
1268 (runp->idx != (size_t) -1l
1269 ? sortsym[runp->idx]->name : "<UNKNOWN>"),
1270 runp->count);
1271 runp = runp->next;