Update.
[glibc.git] / elf / sprof.c
blob6b5ccc108ff151131cdbf5c5050e8dd19e323fbe
1 /* Read and display shared object profiling data.
2 Copyright (C) 1997,1998,1999,2000,2001,2002 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, write to the Free
18 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19 02111-1307 USA. */
21 #include <argp.h>
22 #include <dlfcn.h>
23 #include <elf.h>
24 #include <error.h>
25 #include <fcntl.h>
26 #include <inttypes.h>
27 #include <libintl.h>
28 #include <locale.h>
29 #include <obstack.h>
30 #include <search.h>
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <string.h>
34 #include <unistd.h>
35 #include <ldsodefs.h>
36 #include <sys/gmon.h>
37 #include <sys/gmon_out.h>
38 #include <sys/mman.h>
39 #include <sys/param.h>
40 #include <sys/stat.h>
42 /* Get libc version number. */
43 #include "../version.h"
45 #define PACKAGE _libc_intl_domainname
48 #include <endian.h>
49 #if BYTE_ORDER == BIG_ENDIAN
50 #define byteorder ELFDATA2MSB
51 #define byteorder_name "big-endian"
52 #elif BYTE_ORDER == LITTLE_ENDIAN
53 #define byteorder ELFDATA2LSB
54 #define byteorder_name "little-endian"
55 #else
56 #error "Unknown BYTE_ORDER " BYTE_ORDER
57 #define byteorder ELFDATANONE
58 #endif
61 extern int __profile_frequency (void);
63 /* Name and version of program. */
64 static void print_version (FILE *stream, struct argp_state *state);
65 void (*argp_program_version_hook) (FILE *, struct argp_state *) = print_version;
67 #define OPT_TEST 1
69 /* Definitions of arguments for argp functions. */
70 static const struct argp_option options[] =
72 { NULL, 0, NULL, 0, N_("Output selection:") },
73 { "call-pairs", 'c', NULL, 0,
74 N_("print list of count paths and their number of use") },
75 { "flat-profile", 'p', NULL, 0,
76 N_("generate flat profile with counts and ticks") },
77 { "graph", 'q', NULL, 0, N_("generate call graph") },
79 { "test", OPT_TEST, NULL, OPTION_HIDDEN, NULL },
80 { NULL, 0, NULL, 0, NULL }
83 /* Short description of program. */
84 static const char doc[] = N_("Read and display shared object profiling data");
86 /* Strings for arguments in help texts. */
87 static const char args_doc[] = N_("SHOBJ [PROFDATA]");
89 /* Prototype for option handler. */
90 static error_t parse_opt (int key, char *arg, struct argp_state *state);
92 /* Data structure to communicate with argp functions. */
93 static struct argp argp =
95 options, parse_opt, args_doc, doc, NULL, NULL
99 /* Operation modes. */
100 static enum
102 NONE = 0,
103 FLAT_MODE = 1 << 0,
104 CALL_GRAPH_MODE = 1 << 1,
105 CALL_PAIRS = 1 << 2,
107 DEFAULT_MODE = FLAT_MODE | CALL_GRAPH_MODE
108 } mode;
110 /* If nonzero the total number of invocations of a function is emitted. */
111 int count_total;
113 /* Nozero for testing. */
114 int do_test;
116 /* Strcuture describing calls. */
117 struct here_fromstruct
119 struct here_cg_arc_record volatile *here;
120 uint16_t link;
123 /* We define a special type to address the elements of the arc table.
124 This is basically the `gmon_cg_arc_record' format but it includes
125 the room for the tag and it uses real types. */
126 struct here_cg_arc_record
128 uintptr_t from_pc;
129 uintptr_t self_pc;
130 uint32_t count;
131 } __attribute__ ((packed));
134 struct known_symbol;
135 struct arc_list
137 size_t idx;
138 uintmax_t count;
140 struct arc_list *next;
143 static struct obstack ob_list;
146 struct known_symbol
148 const char *name;
149 uintptr_t addr;
150 size_t size;
152 uintmax_t ticks;
153 uintmax_t calls;
155 struct arc_list *froms;
156 struct arc_list *tos;
160 struct shobj
162 const char *name; /* User-provided name. */
164 struct link_map *map;
165 const char *dynstrtab; /* Dynamic string table of shared object. */
166 const char *soname; /* Soname of shared object. */
168 uintptr_t lowpc;
169 uintptr_t highpc;
170 unsigned long int kcountsize;
171 size_t expected_size; /* Expected size of profiling file. */
172 size_t tossize;
173 size_t fromssize;
174 size_t fromlimit;
175 unsigned int hashfraction;
176 int s_scale;
178 void *symbol_map;
179 size_t symbol_mapsize;
180 const ElfW(Sym) *symtab;
181 size_t symtab_size;
182 const char *strtab;
184 struct obstack ob_str;
185 struct obstack ob_sym;
189 struct profdata
191 void *addr;
192 off_t size;
194 char *hist;
195 struct gmon_hist_hdr *hist_hdr;
196 uint16_t *kcount;
197 uint32_t narcs; /* Number of arcs in toset. */
198 struct here_cg_arc_record *data;
199 uint16_t *tos;
200 struct here_fromstruct *froms;
203 /* Search tree for symbols. */
204 void *symroot;
205 static struct known_symbol **sortsym;
206 static size_t symidx;
207 static uintmax_t total_ticks;
209 /* Prototypes for local functions. */
210 static struct shobj *load_shobj (const char *name);
211 static void unload_shobj (struct shobj *shobj);
212 static struct profdata *load_profdata (const char *name, struct shobj *shobj);
213 static void unload_profdata (struct profdata *profdata);
214 static void count_total_ticks (struct shobj *shobj, struct profdata *profdata);
215 static void count_calls (struct shobj *shobj, struct profdata *profdata);
216 static void read_symbols (struct shobj *shobj);
217 static void add_arcs (struct profdata *profdata);
218 static void generate_flat_profile (struct profdata *profdata);
219 static void generate_call_graph (struct profdata *profdata);
220 static void generate_call_pair_list (struct profdata *profdata);
224 main (int argc, char *argv[])
226 const char *shobj;
227 const char *profdata;
228 struct shobj *shobj_handle;
229 struct profdata *profdata_handle;
230 int remaining;
232 setlocale (LC_ALL, "");
234 /* Initialize the message catalog. */
235 textdomain (_libc_intl_domainname);
237 /* Parse and process arguments. */
238 argp_parse (&argp, argc, argv, 0, &remaining, NULL);
240 if (argc - remaining == 0 || argc - remaining > 2)
242 /* We need exactly two non-option parameter. */
243 argp_help (&argp, stdout, ARGP_HELP_SEE | ARGP_HELP_EXIT_ERR,
244 program_invocation_short_name);
245 exit (1);
248 /* Get parameters. */
249 shobj = argv[remaining];
250 if (argc - remaining == 2)
251 profdata = argv[remaining + 1];
252 else
253 /* No filename for the profiling data given. We will determine it
254 from the soname of the shobj, later. */
255 profdata = NULL;
257 /* First see whether we can load the shared object. */
258 shobj_handle = load_shobj (shobj);
259 if (shobj_handle == NULL)
260 exit (1);
262 /* We can now determine the filename for the profiling data, if
263 nececessary. */
264 if (profdata == NULL)
266 char *newp;
267 const char *soname;
268 size_t soname_len;
270 soname = shobj_handle->soname ?: basename (shobj);
271 soname_len = strlen (soname);
272 newp = (char *) alloca (soname_len + sizeof ".profile");
273 stpcpy (mempcpy (newp, soname, soname_len), ".profile");
274 profdata = newp;
277 /* Now see whether the profiling data file matches the given object. */
278 profdata_handle = load_profdata (profdata, shobj_handle);
279 if (profdata_handle == NULL)
281 unload_shobj (shobj_handle);
283 exit (1);
286 read_symbols (shobj_handle);
288 /* Count the ticks. */
289 count_total_ticks (shobj_handle, profdata_handle);
291 /* Count the calls. */
292 count_calls (shobj_handle, profdata_handle);
294 /* Add the arc information. */
295 add_arcs (profdata_handle);
297 /* If no mode is specified fall back to the default mode. */
298 if (mode == NONE)
299 mode = DEFAULT_MODE;
301 /* Do some work. */
302 if (mode & FLAT_MODE)
303 generate_flat_profile (profdata_handle);
305 if (mode & CALL_GRAPH_MODE)
306 generate_call_graph (profdata_handle);
308 if (mode & CALL_PAIRS)
309 generate_call_pair_list (profdata_handle);
311 /* Free the resources. */
312 unload_shobj (shobj_handle);
313 unload_profdata (profdata_handle);
315 return 0;
319 /* Handle program arguments. */
320 static error_t
321 parse_opt (int key, char *arg, struct argp_state *state)
323 switch (key)
325 case 'c':
326 mode |= CALL_PAIRS;
327 break;
328 case 'p':
329 mode |= FLAT_MODE;
330 break;
331 case 'q':
332 mode |= CALL_GRAPH_MODE;
333 break;
334 case OPT_TEST:
335 do_test = 1;
336 break;
337 default:
338 return ARGP_ERR_UNKNOWN;
340 return 0;
344 /* Print the version information. */
345 static void
346 print_version (FILE *stream, struct argp_state *state)
348 fprintf (stream, "sprof (GNU %s) %s\n", PACKAGE, VERSION);
349 fprintf (stream, gettext ("\
350 Copyright (C) %s Free Software Foundation, Inc.\n\
351 This is free software; see the source for copying conditions. There is NO\n\
352 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\
354 "2002");
355 fprintf (stream, gettext ("Written by %s.\n"), "Ulrich Drepper");
359 /* Note that we must not use `dlopen' etc. The shobj object must not
360 be loaded for use. */
361 static struct shobj *
362 load_shobj (const char *name)
364 struct link_map *map = NULL;
365 struct shobj *result;
366 ElfW(Addr) mapstart = ~((ElfW(Addr)) 0);
367 ElfW(Addr) mapend = 0;
368 const ElfW(Phdr) *ph;
369 size_t textsize;
370 unsigned int log_hashfraction;
371 ElfW(Ehdr) *ehdr;
372 int fd;
373 ElfW(Shdr) *shdr;
374 void *ptr;
375 size_t pagesize = getpagesize ();
376 const char *shstrtab;
377 int idx;
378 ElfW(Shdr) *symtab_entry;
380 /* Since we use dlopen() we must be prepared to work around the sometimes
381 strange lookup rules for the shared objects. If we have a file foo.so
382 in the current directory and the user specfies foo.so on the command
383 line (without specifying a directory) we should load the file in the
384 current directory even if a normal dlopen() call would read the other
385 file. We do this by adding a directory portion to the name. */
386 if (strchr (name, '/') == NULL)
388 char *load_name = (char *) alloca (strlen (name) + 3);
389 stpcpy (stpcpy (load_name, "./"), name);
391 map = (struct link_map *) dlopen (load_name, RTLD_LAZY | __RTLD_SPROF);
393 if (map == NULL)
395 map = (struct link_map *) dlopen (name, RTLD_LAZY | __RTLD_SPROF);
396 if (map == NULL)
398 error (0, errno, _("failed to load shared object `%s'"), name);
399 return NULL;
403 /* Prepare the result. */
404 result = (struct shobj *) calloc (1, sizeof (struct shobj));
405 if (result == NULL)
407 error (0, errno, _("cannot create internal descriptors"));
408 dlclose (map);
409 return NULL;
411 result->name = name;
412 result->map = map;
414 /* Compute the size of the sections which contain program code.
415 This must match the code in dl-profile.c (_dl_start_profile). */
416 for (ph = map->l_phdr; ph < &map->l_phdr[map->l_phnum]; ++ph)
417 if (ph->p_type == PT_LOAD && (ph->p_flags & PF_X))
419 ElfW(Addr) start = (ph->p_vaddr & ~(pagesize - 1));
420 ElfW(Addr) end = ((ph->p_vaddr + ph->p_memsz + pagesize - 1)
421 & ~(pagesize - 1));
423 if (start < mapstart)
424 mapstart = start;
425 if (end > mapend)
426 mapend = end;
429 result->lowpc = ROUNDDOWN ((uintptr_t) (mapstart + map->l_addr),
430 HISTFRACTION * sizeof (HISTCOUNTER));
431 result->highpc = ROUNDUP ((uintptr_t) (mapend + map->l_addr),
432 HISTFRACTION * sizeof (HISTCOUNTER));
433 if (do_test)
434 printf ("load addr: %0#*" PRIxPTR "\n"
435 "lower bound PC: %0#*" PRIxPTR "\n"
436 "upper bound PC: %0#*" PRIxPTR "\n",
437 __ELF_NATIVE_CLASS == 32 ? 10 : 18, map->l_addr,
438 __ELF_NATIVE_CLASS == 32 ? 10 : 18, result->lowpc,
439 __ELF_NATIVE_CLASS == 32 ? 10 : 18, result->highpc);
441 textsize = result->highpc - result->lowpc;
442 result->kcountsize = textsize / HISTFRACTION;
443 result->hashfraction = HASHFRACTION;
444 if ((HASHFRACTION & (HASHFRACTION - 1)) == 0)
445 /* If HASHFRACTION is a power of two, mcount can use shifting
446 instead of integer division. Precompute shift amount. */
447 log_hashfraction = __builtin_ffs (result->hashfraction
448 * sizeof (struct here_fromstruct)) - 1;
449 else
450 log_hashfraction = -1;
451 if (do_test)
452 printf ("hashfraction = %d\ndivider = %Zu\n",
453 result->hashfraction,
454 result->hashfraction * sizeof (struct here_fromstruct));
455 result->tossize = textsize / HASHFRACTION;
456 result->fromlimit = textsize * ARCDENSITY / 100;
457 if (result->fromlimit < MINARCS)
458 result->fromlimit = MINARCS;
459 if (result->fromlimit > MAXARCS)
460 result->fromlimit = MAXARCS;
461 result->fromssize = result->fromlimit * sizeof (struct here_fromstruct);
463 result->expected_size = (sizeof (struct gmon_hdr)
464 + 4 + sizeof (struct gmon_hist_hdr)
465 + result->kcountsize
466 + 4 + 4
467 + (result->fromssize
468 * sizeof (struct here_cg_arc_record)));
470 if (do_test)
471 printf ("expected size: %Zd\n", result->expected_size);
473 #define SCALE_1_TO_1 0x10000L
475 if (result->kcountsize < result->highpc - result->lowpc)
477 size_t range = result->highpc - result->lowpc;
478 size_t quot = range / result->kcountsize;
480 if (quot >= SCALE_1_TO_1)
481 result->s_scale = 1;
482 else if (quot >= SCALE_1_TO_1 / 256)
483 result->s_scale = SCALE_1_TO_1 / quot;
484 else if (range > ULONG_MAX / 256)
485 result->s_scale = ((SCALE_1_TO_1 * 256)
486 / (range / (result->kcountsize / 256)));
487 else
488 result->s_scale = ((SCALE_1_TO_1 * 256)
489 / ((range * 256) / result->kcountsize));
491 else
492 result->s_scale = SCALE_1_TO_1;
494 if (do_test)
495 printf ("s_scale: %d\n", result->s_scale);
497 /* Determine the dynamic string table. */
498 if (map->l_info[DT_STRTAB] == NULL)
499 result->dynstrtab = NULL;
500 else
501 result->dynstrtab = (const char *) D_PTR (map, l_info[DT_STRTAB]);
502 if (do_test)
503 printf ("string table: %p\n", result->dynstrtab);
505 /* Determine the soname. */
506 if (map->l_info[DT_SONAME] == NULL)
507 result->soname = NULL;
508 else
509 result->soname = result->dynstrtab + map->l_info[DT_SONAME]->d_un.d_val;
510 if (do_test && result->soname != NULL)
511 printf ("soname: %s\n", result->soname);
513 /* Now we have to load the symbol table.
515 First load the section header table. */
516 ehdr = (ElfW(Ehdr) *) map->l_addr;
518 /* Make sure we are on the right party. */
519 if (ehdr->e_shentsize != sizeof (ElfW(Shdr)))
520 abort ();
522 /* And we need the shared object file descriptor again. */
523 fd = open (map->l_name, O_RDONLY);
524 if (fd == -1)
525 /* Dooh, this really shouldn't happen. We know the file is available. */
526 error (EXIT_FAILURE, errno, _("Reopening shared object `%s' failed"),
527 map->l_name);
529 /* Now map the section header. */
530 ptr = mmap (NULL, (ehdr->e_shnum * sizeof (ElfW(Shdr))
531 + (ehdr->e_shoff & (pagesize - 1))), PROT_READ,
532 MAP_SHARED|MAP_FILE, fd, ehdr->e_shoff & ~(pagesize - 1));
533 if (ptr == MAP_FAILED)
534 error (EXIT_FAILURE, errno, _("mapping of section headers failed"));
535 shdr = (ElfW(Shdr) *) ((char *) ptr + (ehdr->e_shoff & (pagesize - 1)));
537 /* Get the section header string table. */
538 ptr = mmap (NULL, (shdr[ehdr->e_shstrndx].sh_size
539 + (shdr[ehdr->e_shstrndx].sh_offset & (pagesize - 1))),
540 PROT_READ, MAP_SHARED|MAP_FILE, fd,
541 shdr[ehdr->e_shstrndx].sh_offset & ~(pagesize - 1));
542 if (ptr == MAP_FAILED)
543 error (EXIT_FAILURE, errno,
544 _("mapping of section header string table failed"));
545 shstrtab = ((const char *) ptr
546 + (shdr[ehdr->e_shstrndx].sh_offset & (pagesize - 1)));
548 /* Search for the ".symtab" section. */
549 symtab_entry = NULL;
550 for (idx = 0; idx < ehdr->e_shnum; ++idx)
551 if (shdr[idx].sh_type == SHT_SYMTAB
552 && strcmp (shstrtab + shdr[idx].sh_name, ".symtab") == 0)
554 symtab_entry = &shdr[idx];
555 break;
558 /* We don't need the section header string table anymore. */
559 munmap (ptr, (shdr[ehdr->e_shstrndx].sh_size
560 + (shdr[ehdr->e_shstrndx].sh_offset & (pagesize - 1))));
562 if (symtab_entry == NULL)
564 fprintf (stderr, _("\
565 *** The file `%s' is stripped: no detailed analysis possible\n"),
566 name);
567 result->symtab = NULL;
568 result->strtab = NULL;
570 else
572 ElfW(Off) min_offset, max_offset;
573 ElfW(Shdr) *strtab_entry;
575 strtab_entry = &shdr[symtab_entry->sh_link];
577 /* Find the minimum and maximum offsets that include both the symbol
578 table and the string table. */
579 if (symtab_entry->sh_offset < strtab_entry->sh_offset)
581 min_offset = symtab_entry->sh_offset & ~(pagesize - 1);
582 max_offset = strtab_entry->sh_offset + strtab_entry->sh_size;
584 else
586 min_offset = strtab_entry->sh_offset & ~(pagesize - 1);
587 max_offset = symtab_entry->sh_offset + symtab_entry->sh_size;
590 result->symbol_map = mmap (NULL, max_offset - min_offset,
591 PROT_READ, MAP_SHARED|MAP_FILE, fd,
592 min_offset);
593 if (result->symbol_map == NULL)
594 error (EXIT_FAILURE, errno, _("failed to load symbol data"));
596 result->symtab
597 = (const ElfW(Sym) *) ((const char *) result->symbol_map
598 + (symtab_entry->sh_offset - min_offset));
599 result->symtab_size = symtab_entry->sh_size;
600 result->strtab = ((const char *) result->symbol_map
601 + (strtab_entry->sh_offset - min_offset));
602 result->symbol_mapsize = max_offset - min_offset;
605 /* Now we also don't need the section header table anymore. */
606 munmap ((char *) shdr - (ehdr->e_shoff & (pagesize - 1)),
607 (ehdr->e_phnum * sizeof (ElfW(Shdr))
608 + (ehdr->e_shoff & (pagesize - 1))));
610 /* Free the descriptor for the shared object. */
611 close (fd);
613 return result;
617 static void
618 unload_shobj (struct shobj *shobj)
620 munmap (shobj->symbol_map, shobj->symbol_mapsize);
621 dlclose (shobj->map);
625 static struct profdata *
626 load_profdata (const char *name, struct shobj *shobj)
628 struct profdata *result;
629 int fd;
630 struct stat st;
631 void *addr;
632 struct gmon_hdr gmon_hdr;
633 struct gmon_hist_hdr hist_hdr;
634 uint32_t *narcsp;
635 size_t fromlimit;
636 struct here_cg_arc_record *data;
637 struct here_fromstruct *froms;
638 uint16_t *tos;
639 size_t fromidx;
640 size_t idx;
642 fd = open (name, O_RDONLY);
643 if (fd == -1)
645 char *ext_name;
647 if (errno != ENOENT || strchr (name, '/') != NULL)
648 /* The file exists but we are not allowed to read it or the
649 file does not exist and the name includes a path
650 specification.. */
651 return NULL;
653 /* A file with the given name does not exist in the current
654 directory, try it in the default location where the profiling
655 files are created. */
656 ext_name = (char *) alloca (strlen (name) + sizeof "/var/tmp/");
657 stpcpy (stpcpy (ext_name, "/var/tmp/"), name);
658 name = ext_name;
660 fd = open (ext_name, O_RDONLY);
661 if (fd == -1)
663 /* Even this file does not exist. */
664 error (0, errno, _("cannot load profiling data"));
665 return NULL;
669 /* We have found the file, now make sure it is the right one for the
670 data file. */
671 if (fstat (fd, &st) < 0)
673 error (0, errno, _("while stat'ing profiling data file"));
674 close (fd);
675 return NULL;
678 if ((size_t) st.st_size != shobj->expected_size)
680 error (0, 0,
681 _("profiling data file `%s' does not match shared object `%s'"),
682 name, shobj->name);
683 close (fd);
684 return NULL;
687 /* The data file is most probably the right one for our shared
688 object. Map it now. */
689 addr = mmap (NULL, st.st_size, PROT_READ, MAP_SHARED|MAP_FILE, fd, 0);
690 if (addr == MAP_FAILED)
692 error (0, errno, _("failed to mmap the profiling data file"));
693 close (fd);
694 return NULL;
697 /* We don't need the file desriptor anymore. */
698 if (close (fd) < 0)
700 error (0, errno, _("error while closing the profiling data file"));
701 munmap (addr, st.st_size);
702 return NULL;
705 /* Prepare the result. */
706 result = (struct profdata *) calloc (1, sizeof (struct profdata));
707 if (result == NULL)
709 error (0, errno, _("cannot create internal descriptor"));
710 munmap (addr, st.st_size);
711 return NULL;
714 /* Store the address and size so that we can later free the resources. */
715 result->addr = addr;
716 result->size = st.st_size;
718 /* Pointer to data after the header. */
719 result->hist = (char *) ((struct gmon_hdr *) addr + 1);
720 result->hist_hdr = (struct gmon_hist_hdr *) ((char *) result->hist
721 + sizeof (uint32_t));
722 result->kcount = (uint16_t *) ((char *) result->hist + sizeof (uint32_t)
723 + sizeof (struct gmon_hist_hdr));
725 /* Compute pointer to array of the arc information. */
726 narcsp = (uint32_t *) ((char *) result->kcount + shobj->kcountsize
727 + sizeof (uint32_t));
728 result->narcs = *narcsp;
729 result->data = (struct here_cg_arc_record *) ((char *) narcsp
730 + sizeof (uint32_t));
732 /* Create the gmon_hdr we expect or write. */
733 memset (&gmon_hdr, '\0', sizeof (struct gmon_hdr));
734 memcpy (&gmon_hdr.cookie[0], GMON_MAGIC, sizeof (gmon_hdr.cookie));
735 *(int32_t *) gmon_hdr.version = GMON_SHOBJ_VERSION;
737 /* Create the hist_hdr we expect or write. */
738 *(char **) hist_hdr.low_pc = (char *) shobj->lowpc - shobj->map->l_addr;
739 *(char **) hist_hdr.high_pc = (char *) shobj->highpc - shobj->map->l_addr;
740 if (do_test)
741 printf ("low_pc = %p\nhigh_pc = %p\n",
742 *(char **) hist_hdr.low_pc, *(char **) hist_hdr.high_pc);
743 *(int32_t *) hist_hdr.hist_size = shobj->kcountsize / sizeof (HISTCOUNTER);
744 *(int32_t *) hist_hdr.prof_rate = __profile_frequency ();
745 strncpy (hist_hdr.dimen, "seconds", sizeof (hist_hdr.dimen));
746 hist_hdr.dimen_abbrev = 's';
748 /* Test whether the header of the profiling data is ok. */
749 if (memcmp (addr, &gmon_hdr, sizeof (struct gmon_hdr)) != 0
750 || *(uint32_t *) result->hist != GMON_TAG_TIME_HIST
751 || memcmp (result->hist_hdr, &hist_hdr,
752 sizeof (struct gmon_hist_hdr)) != 0
753 || narcsp[-1] != GMON_TAG_CG_ARC)
755 error (0, 0, _("`%s' is no correct profile data file for `%s'"),
756 name, shobj->name);
757 if (do_test)
759 if (memcmp (addr, &gmon_hdr, sizeof (struct gmon_hdr)) != 0)
760 puts ("gmon_hdr differs");
761 if (*(uint32_t *) result->hist != GMON_TAG_TIME_HIST)
762 puts ("result->hist differs");
763 if (memcmp (result->hist_hdr, &hist_hdr,
764 sizeof (struct gmon_hist_hdr)) != 0)
765 puts ("hist_hdr differs");
766 if (narcsp[-1] != GMON_TAG_CG_ARC)
767 puts ("narcsp[-1] differs");
769 free (result);
770 munmap (addr, st.st_size);
771 return NULL;
774 /* We are pretty sure now that this is a correct input file. Set up
775 the remaining information in the result structure and return. */
776 result->tos = (uint16_t *) calloc (shobj->tossize + shobj->fromssize, 1);
777 if (result->tos == NULL)
779 error (0, errno, _("cannot create internal descriptor"));
780 munmap (addr, st.st_size);
781 free (result);
782 return NULL;
785 result->froms = (struct here_fromstruct *) ((char *) result->tos
786 + shobj->tossize);
787 fromidx = 0;
789 /* Now we have to process all the arc count entries. */
790 fromlimit = shobj->fromlimit;
791 data = result->data;
792 froms = result->froms;
793 tos = result->tos;
794 for (idx = 0; idx < MIN (*narcsp, fromlimit); ++idx)
796 size_t to_index;
797 size_t newfromidx;
798 to_index = (data[idx].self_pc / (shobj->hashfraction * sizeof (*tos)));
799 newfromidx = fromidx++;
800 froms[newfromidx].here = &data[idx];
801 froms[newfromidx].link = tos[to_index];
802 tos[to_index] = newfromidx;
805 return result;
809 static void
810 unload_profdata (struct profdata *profdata)
812 free (profdata->tos);
813 munmap (profdata->addr, profdata->size);
814 free (profdata);
818 static void
819 count_total_ticks (struct shobj *shobj, struct profdata *profdata)
821 volatile uint16_t *kcount = profdata->kcount;
822 size_t maxkidx = shobj->kcountsize;
823 size_t factor = 2 * (65536 / shobj->s_scale);
824 size_t kidx = 0;
825 size_t sidx = 0;
827 while (sidx < symidx)
829 uintptr_t start = sortsym[sidx]->addr;
830 uintptr_t end = start + sortsym[sidx]->size;
832 while (kidx < maxkidx && factor * kidx < start)
833 ++kidx;
834 if (kidx == maxkidx)
835 break;
837 while (kidx < maxkidx && factor * kidx < end)
838 sortsym[sidx]->ticks += kcount[kidx++];
839 if (kidx == maxkidx)
840 break;
842 total_ticks += sortsym[sidx++]->ticks;
847 static size_t
848 find_symbol (uintptr_t addr)
850 size_t sidx = 0;
852 while (sidx < symidx)
854 uintptr_t start = sortsym[sidx]->addr;
855 uintptr_t end = start + sortsym[sidx]->size;
857 if (addr >= start && addr < end)
858 return sidx;
860 if (addr < start)
861 break;
863 ++sidx;
866 return (size_t) -1l;
870 static void
871 count_calls (struct shobj *shobj, struct profdata *profdata)
873 struct here_cg_arc_record *data = profdata->data;
874 uint32_t narcs = profdata->narcs;
875 uint32_t cnt;
877 for (cnt = 0; cnt < narcs; ++cnt)
879 uintptr_t here = data[cnt].self_pc;
880 size_t symbol_idx;
882 /* Find the symbol for this address. */
883 symbol_idx = find_symbol (here);
884 if (symbol_idx != (size_t) -1l)
885 sortsym[symbol_idx]->calls += data[cnt].count;
890 static int
891 symorder (const void *o1, const void *o2)
893 const struct known_symbol *p1 = (const struct known_symbol *) o1;
894 const struct known_symbol *p2 = (const struct known_symbol *) o2;
896 return p1->addr - p2->addr;
900 static void
901 printsym (const void *node, VISIT value, int level)
903 if (value == leaf || value == postorder)
904 sortsym[symidx++] = *(struct known_symbol **) node;
908 static void
909 read_symbols (struct shobj *shobj)
911 int n = 0;
913 /* Initialize the obstacks. */
914 #define obstack_chunk_alloc malloc
915 #define obstack_chunk_free free
916 obstack_init (&shobj->ob_str);
917 obstack_init (&shobj->ob_sym);
918 obstack_init (&ob_list);
920 /* Process the symbols. */
921 if (shobj->symtab != NULL)
923 const ElfW(Sym) *sym = shobj->symtab;
924 const ElfW(Sym) *sym_end
925 = (const ElfW(Sym) *) ((const char *) sym + shobj->symtab_size);
926 for (; sym < sym_end; sym++)
927 if ((ELFW(ST_TYPE) (sym->st_info) == STT_FUNC
928 || ELFW(ST_TYPE) (sym->st_info) == STT_NOTYPE)
929 && sym->st_size != 0)
931 struct known_symbol **existp;
932 struct known_symbol *newsym
933 = (struct known_symbol *) obstack_alloc (&shobj->ob_sym,
934 sizeof (*newsym));
935 if (newsym == NULL)
936 error (EXIT_FAILURE, errno, _("cannot allocate symbol data"));
938 newsym->name = &shobj->strtab[sym->st_name];
939 newsym->addr = sym->st_value;
940 newsym->size = sym->st_size;
941 newsym->ticks = 0;
942 newsym->calls = 0;
944 existp = tfind (newsym, &symroot, symorder);
945 if (existp == NULL)
947 /* New function. */
948 tsearch (newsym, &symroot, symorder);
949 ++n;
951 else
953 /* The function is already defined. See whether we have
954 a better name here. */
955 if ((*existp)->name[0] == '_' && newsym->name[0] != '_')
956 *existp = newsym;
957 else
958 /* We don't need the allocated memory. */
959 obstack_free (&shobj->ob_sym, newsym);
963 else
965 /* Blarg, the binary is stripped. We have to rely on the
966 information contained in the dynamic section of the object. */
967 const ElfW(Sym) *symtab = (ElfW(Sym) *) D_PTR (shobj->map,
968 l_info[DT_SYMTAB]);
969 const char *strtab = (const char *) D_PTR (shobj->map,
970 l_info[DT_STRTAB]);
972 /* We assume that the string table follows the symbol table,
973 because there is no way in ELF to know the size of the
974 dynamic symbol table without looking at the section headers. */
975 while ((void *) symtab < (void *) strtab)
977 if ((ELFW(ST_TYPE)(symtab->st_info) == STT_FUNC
978 || ELFW(ST_TYPE)(symtab->st_info) == STT_NOTYPE)
979 && symtab->st_size != 0)
981 struct known_symbol *newsym;
982 struct known_symbol **existp;
984 newsym =
985 (struct known_symbol *) obstack_alloc (&shobj->ob_sym,
986 sizeof (*newsym));
987 if (newsym == NULL)
988 error (EXIT_FAILURE, errno, _("cannot allocate symbol data"));
990 newsym->name = &strtab[symtab->st_name];
991 newsym->addr = symtab->st_value;
992 newsym->size = symtab->st_size;
993 newsym->ticks = 0;
994 newsym->froms = NULL;
995 newsym->tos = NULL;
997 existp = tfind (newsym, &symroot, symorder);
998 if (existp == NULL)
1000 /* New function. */
1001 tsearch (newsym, &symroot, symorder);
1002 ++n;
1004 else
1006 /* The function is already defined. See whether we have
1007 a better name here. */
1008 if ((*existp)->name[0] == '_' && newsym->name[0] != '_')
1009 *existp = newsym;
1010 else
1011 /* We don't need the allocated memory. */
1012 obstack_free (&shobj->ob_sym, newsym);
1016 ++symtab;
1020 sortsym = malloc (n * sizeof (struct known_symbol *));
1021 if (sortsym == NULL)
1022 abort ();
1024 twalk (symroot, printsym);
1028 static void
1029 add_arcs (struct profdata *profdata)
1031 uint32_t narcs = profdata->narcs;
1032 struct here_cg_arc_record *data = profdata->data;
1033 uint32_t cnt;
1035 for (cnt = 0; cnt < narcs; ++cnt)
1037 /* First add the incoming arc. */
1038 size_t sym_idx = find_symbol (data[cnt].self_pc);
1040 if (sym_idx != (size_t) -1l)
1042 struct known_symbol *sym = sortsym[sym_idx];
1043 struct arc_list *runp = sym->froms;
1045 while (runp != NULL
1046 && ((data[cnt].from_pc == 0 && runp->idx != (size_t) -1l)
1047 || (data[cnt].from_pc != 0
1048 && (runp->idx == (size_t) -1l
1049 || data[cnt].from_pc < sortsym[runp->idx]->addr
1050 || (data[cnt].from_pc
1051 >= (sortsym[runp->idx]->addr
1052 + sortsym[runp->idx]->size))))))
1053 runp = runp->next;
1055 if (runp == NULL)
1057 /* We need a new entry. */
1058 struct arc_list *newp = (struct arc_list *)
1059 obstack_alloc (&ob_list, sizeof (struct arc_list));
1061 if (data[cnt].from_pc == 0)
1062 newp->idx = (size_t) -1l;
1063 else
1064 newp->idx = find_symbol (data[cnt].from_pc);
1065 newp->count = data[cnt].count;
1066 newp->next = sym->froms;
1067 sym->froms = newp;
1069 else
1070 /* Increment the counter for the found entry. */
1071 runp->count += data[cnt].count;
1074 /* Now add it to the appropriate outgoing list. */
1075 sym_idx = find_symbol (data[cnt].from_pc);
1076 if (sym_idx != (size_t) -1l)
1078 struct known_symbol *sym = sortsym[sym_idx];
1079 struct arc_list *runp = sym->tos;
1081 while (runp != NULL
1082 && (runp->idx == (size_t) -1l
1083 || data[cnt].self_pc < sortsym[runp->idx]->addr
1084 || data[cnt].self_pc >= (sortsym[runp->idx]->addr
1085 + sortsym[runp->idx]->size)))
1086 runp = runp->next;
1088 if (runp == NULL)
1090 /* We need a new entry. */
1091 struct arc_list *newp = (struct arc_list *)
1092 obstack_alloc (&ob_list, sizeof (struct arc_list));
1094 newp->idx = find_symbol (data[cnt].self_pc);
1095 newp->count = data[cnt].count;
1096 newp->next = sym->tos;
1097 sym->tos = newp;
1099 else
1100 /* Increment the counter for the found entry. */
1101 runp->count += data[cnt].count;
1107 static int
1108 countorder (const void *p1, const void *p2)
1110 struct known_symbol *s1 = (struct known_symbol *) p1;
1111 struct known_symbol *s2 = (struct known_symbol *) p2;
1113 if (s1->ticks != s2->ticks)
1114 return (int) (s2->ticks - s1->ticks);
1116 if (s1->calls != s2->calls)
1117 return (int) (s2->calls - s1->calls);
1119 return strcmp (s1->name, s2->name);
1123 static double tick_unit;
1124 static uintmax_t cumu_ticks;
1126 static void
1127 printflat (const void *node, VISIT value, int level)
1129 if (value == leaf || value == postorder)
1131 struct known_symbol *s = *(struct known_symbol **) node;
1133 cumu_ticks += s->ticks;
1135 printf ("%6.2f%10.2f%9.2f%9" PRIdMAX "%9.2f %s\n",
1136 total_ticks ? (100.0 * s->ticks) / total_ticks : 0.0,
1137 tick_unit * cumu_ticks,
1138 tick_unit * s->ticks,
1139 s->calls,
1140 s->calls ? (s->ticks * 1000000) * tick_unit / s->calls : 0,
1141 /* FIXME: don't know about called functions. */
1142 s->name);
1147 /* ARGUSED */
1148 static void
1149 freenoop (void *p)
1154 static void
1155 generate_flat_profile (struct profdata *profdata)
1157 size_t n;
1158 void *data = NULL;
1160 tick_unit = 1.0 / *(uint32_t *) profdata->hist_hdr->prof_rate;
1162 printf ("Flat profile:\n\n"
1163 "Each sample counts as %g %s.\n",
1164 tick_unit, profdata->hist_hdr->dimen);
1165 fputs (" % cumulative self self total\n"
1166 " time seconds seconds calls us/call us/call name\n",
1167 stdout);
1169 for (n = 0; n < symidx; ++n)
1170 if (sortsym[n]->calls != 0 || sortsym[n]->ticks != 0)
1171 tsearch (sortsym[n], &data, countorder);
1173 twalk (data, printflat);
1175 tdestroy (data, freenoop);
1179 static void
1180 generate_call_graph (struct profdata *profdata)
1182 size_t cnt;
1184 puts ("\nindex % time self children called name\n");
1186 for (cnt = 0; cnt < symidx; ++cnt)
1187 if (sortsym[cnt]->froms != NULL || sortsym[cnt]->tos != NULL)
1189 struct arc_list *runp;
1190 size_t n;
1192 /* First print the from-information. */
1193 runp = sortsym[cnt]->froms;
1194 while (runp != NULL)
1196 printf (" %8.2f%8.2f%9" PRIdMAX "/%-9" PRIdMAX " %s",
1197 (runp->idx != (size_t) -1l
1198 ? sortsym[runp->idx]->ticks * tick_unit : 0.0),
1199 0.0, /* FIXME: what's time for the children, recursive */
1200 runp->count, sortsym[cnt]->calls,
1201 (runp->idx != (size_t) -1l ?
1202 sortsym[runp->idx]->name : "<UNKNOWN>"));
1204 if (runp->idx != (size_t) -1l)
1205 printf (" [%Zd]", runp->idx);
1206 putchar_unlocked ('\n');
1208 runp = runp->next;
1211 /* Info abount the function itself. */
1212 n = printf ("[%Zu]", cnt);
1213 printf ("%*s%5.1f%8.2f%8.2f%9" PRIdMAX " %s [%Zd]\n",
1214 (int) (7 - n), " ",
1215 total_ticks ? (100.0 * sortsym[cnt]->ticks) / total_ticks : 0,
1216 sortsym[cnt]->ticks * tick_unit,
1217 0.0, /* FIXME: what's time for the children, recursive */
1218 sortsym[cnt]->calls,
1219 sortsym[cnt]->name, cnt);
1221 /* Info about the functions this function calls. */
1222 runp = sortsym[cnt]->tos;
1223 while (runp != NULL)
1225 printf (" %8.2f%8.2f%9" PRIdMAX "/",
1226 (runp->idx != (size_t) -1l
1227 ? sortsym[runp->idx]->ticks * tick_unit : 0.0),
1228 0.0, /* FIXME: what's time for the children, recursive */
1229 runp->count);
1231 if (runp->idx != (size_t) -1l)
1232 printf ("%-9" PRIdMAX " %s [%Zd]\n",
1233 sortsym[runp->idx]->calls,
1234 sortsym[runp->idx]->name,
1235 runp->idx);
1236 else
1237 fputs ("??? <UNKNOWN>\n\n", stdout);
1239 runp = runp->next;
1242 fputs ("-----------------------------------------------\n", stdout);
1247 static void
1248 generate_call_pair_list (struct profdata *profdata)
1250 size_t cnt;
1252 for (cnt = 0; cnt < symidx; ++cnt)
1253 if (sortsym[cnt]->froms != NULL || sortsym[cnt]->tos != NULL)
1255 struct arc_list *runp;
1257 /* First print the incoming arcs. */
1258 runp = sortsym[cnt]->froms;
1259 while (runp != NULL)
1261 if (runp->idx == (size_t) -1l)
1262 printf ("\
1263 <UNKNOWN> %-34s %9" PRIdMAX "\n",
1264 sortsym[cnt]->name, runp->count);
1265 runp = runp->next;
1268 /* Next the outgoing arcs. */
1269 runp = sortsym[cnt]->tos;
1270 while (runp != NULL)
1272 printf ("%-34s %-34s %9" PRIdMAX "\n",
1273 sortsym[cnt]->name,
1274 (runp->idx != (size_t) -1l
1275 ? sortsym[runp->idx]->name : "<UNKNOWN>"),
1276 runp->count);
1277 runp = runp->next;