Update.
[glibc.git] / elf / sprof.c
blobbf4ce0efe6b55ea5a5230bd196bd43de12c9d046
1 /* Read and display shared object profiling data.
2 Copyright (C) 1997, 1998, 1999, 2000 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Library General Public License as
8 published by the Free Software Foundation; either version 2 of the
9 License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Library General Public License for more details.
16 You should have received a copy of the GNU Library General Public
17 License along with the GNU C Library; see the file COPYING.LIB. If not,
18 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
21 #include <argp.h>
22 #include <dlfcn.h>
23 #include <elf.h>
24 #include <error.h>
25 #include <fcntl.h>
26 #include <inttypes.h>
27 #include <libintl.h>
28 #include <locale.h>
29 #include <obstack.h>
30 #include <search.h>
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <string.h>
34 #include <unistd.h>
35 #include <ldsodefs.h>
36 #include <sys/gmon.h>
37 #include <sys/gmon_out.h>
38 #include <sys/mman.h>
39 #include <sys/param.h>
40 #include <sys/stat.h>
42 /* Get libc version number. */
43 #include "../version.h"
45 #define PACKAGE _libc_intl_domainname
48 #include <endian.h>
49 #if BYTE_ORDER == BIG_ENDIAN
50 #define byteorder ELFDATA2MSB
51 #define byteorder_name "big-endian"
52 #elif BYTE_ORDER == LITTLE_ENDIAN
53 #define byteorder ELFDATA2LSB
54 #define byteorder_name "little-endian"
55 #else
56 #error "Unknown BYTE_ORDER " BYTE_ORDER
57 #define byteorder ELFDATANONE
58 #endif
61 extern int __profile_frequency (void);
63 /* Name and version of program. */
64 static void print_version (FILE *stream, struct argp_state *state);
65 void (*argp_program_version_hook) (FILE *, struct argp_state *) = print_version;
67 #define OPT_TEST 1
69 /* Definitions of arguments for argp functions. */
70 static const struct argp_option options[] =
72 { NULL, 0, NULL, 0, N_("Output selection:") },
73 { "call-pairs", 'c', NULL, 0,
74 N_("print list of count paths and their number of use") },
75 { "flat-profile", 'p', NULL, 0,
76 N_("generate flat profile with counts and ticks") },
77 { "graph", 'q', NULL, 0, N_("generate call graph") },
79 { "test", OPT_TEST, NULL, OPTION_HIDDEN, NULL },
80 { NULL, 0, NULL, 0, NULL }
83 /* Short description of program. */
84 static const char doc[] = N_("Read and display shared object profiling data");
86 /* Strings for arguments in help texts. */
87 static const char args_doc[] = N_("SHOBJ [PROFDATA]");
89 /* Prototype for option handler. */
90 static error_t parse_opt (int key, char *arg, struct argp_state *state);
92 /* Data structure to communicate with argp functions. */
93 static struct argp argp =
95 options, parse_opt, args_doc, doc, NULL, NULL
99 /* Operation modes. */
100 static enum
102 NONE = 0,
103 FLAT_MODE = 1 << 0,
104 CALL_GRAPH_MODE = 1 << 1,
105 CALL_PAIRS = 1 << 2,
107 DEFAULT_MODE = FLAT_MODE | CALL_GRAPH_MODE
108 } mode;
110 /* If nonzero the total number of invocations of a function is emitted. */
111 int count_total;
113 /* Nozero for testing. */
114 int do_test;
116 /* Strcuture describing calls. */
117 struct here_fromstruct
119 struct here_cg_arc_record volatile *here;
120 uint16_t link;
123 /* We define a special type to address the elements of the arc table.
124 This is basically the `gmon_cg_arc_record' format but it includes
125 the room for the tag and it uses real types. */
126 struct here_cg_arc_record
128 uintptr_t from_pc;
129 uintptr_t self_pc;
130 uint32_t count;
131 } __attribute__ ((packed));
134 struct known_symbol;
135 struct arc_list
137 size_t idx;
138 uintmax_t count;
140 struct arc_list *next;
143 static struct obstack ob_list;
146 struct known_symbol
148 const char *name;
149 uintptr_t addr;
150 size_t size;
152 uintmax_t ticks;
153 uintmax_t calls;
155 struct arc_list *froms;
156 struct arc_list *tos;
160 struct shobj
162 const char *name; /* User-provided name. */
164 struct link_map *map;
165 const char *dynstrtab; /* Dynamic string table of shared object. */
166 const char *soname; /* Soname of shared object. */
168 uintptr_t lowpc;
169 uintptr_t highpc;
170 unsigned long int kcountsize;
171 size_t expected_size; /* Expected size of profiling file. */
172 size_t tossize;
173 size_t fromssize;
174 size_t fromlimit;
175 unsigned int hashfraction;
176 int s_scale;
178 void *symbol_map;
179 size_t symbol_mapsize;
180 const ElfW(Sym) *symtab;
181 size_t symtab_size;
182 const char *strtab;
184 struct obstack ob_str;
185 struct obstack ob_sym;
189 struct profdata
191 void *addr;
192 off_t size;
194 char *hist;
195 struct gmon_hist_hdr *hist_hdr;
196 uint16_t *kcount;
197 uint32_t narcs; /* Number of arcs in toset. */
198 struct here_cg_arc_record *data;
199 uint16_t *tos;
200 struct here_fromstruct *froms;
203 /* Search tree for symbols. */
204 void *symroot;
205 static struct known_symbol **sortsym;
206 static size_t symidx;
207 static uintmax_t total_ticks;
209 /* Prototypes for local functions. */
210 static struct shobj *load_shobj (const char *name);
211 static void unload_shobj (struct shobj *shobj);
212 static struct profdata *load_profdata (const char *name, struct shobj *shobj);
213 static void unload_profdata (struct profdata *profdata);
214 static void count_total_ticks (struct shobj *shobj, struct profdata *profdata);
215 static void count_calls (struct shobj *shobj, struct profdata *profdata);
216 static void read_symbols (struct shobj *shobj);
217 static void add_arcs (struct profdata *profdata);
218 static void generate_flat_profile (struct profdata *profdata);
219 static void generate_call_graph (struct profdata *profdata);
220 static void generate_call_pair_list (struct profdata *profdata);
224 main (int argc, char *argv[])
226 const char *shobj;
227 const char *profdata;
228 struct shobj *shobj_handle;
229 struct profdata *profdata_handle;
230 int remaining;
232 setlocale (LC_ALL, "");
234 /* Initialize the message catalog. */
235 textdomain (_libc_intl_domainname);
237 /* Parse and process arguments. */
238 argp_parse (&argp, argc, argv, 0, &remaining, NULL);
240 if (argc - remaining == 0 || argc - remaining > 2)
242 /* We need exactly two non-option parameter. */
243 argp_help (&argp, stdout, ARGP_HELP_SEE | ARGP_HELP_EXIT_ERR,
244 program_invocation_short_name);
245 exit (1);
248 /* Get parameters. */
249 shobj = argv[remaining];
250 if (argc - remaining == 2)
251 profdata = argv[remaining + 1];
252 else
253 /* No filename for the profiling data given. We will determine it
254 from the soname of the shobj, later. */
255 profdata = NULL;
257 /* First see whether we can load the shared object. */
258 shobj_handle = load_shobj (shobj);
259 if (shobj_handle == NULL)
260 exit (1);
262 /* We can now determine the filename for the profiling data, if
263 nececessary. */
264 if (profdata == NULL)
266 char *newp;
268 if (shobj_handle->soname == NULL)
270 unload_shobj (shobj_handle);
272 error (EXIT_FAILURE, 0, _("\
273 no filename for profiling data given and shared object `%s' has no soname"),
274 shobj);
277 newp = (char *) alloca (strlen (shobj_handle->soname)
278 + sizeof ".profile");
279 stpcpy (stpcpy (newp, shobj_handle->soname), ".profile");
280 profdata = newp;
283 /* Now see whether the profiling data file matches the given object. */
284 profdata_handle = load_profdata (profdata, shobj_handle);
285 if (profdata_handle == NULL)
287 unload_shobj (shobj_handle);
289 exit (1);
292 read_symbols (shobj_handle);
294 /* Count the ticks. */
295 count_total_ticks (shobj_handle, profdata_handle);
297 /* Count the calls. */
298 count_calls (shobj_handle, profdata_handle);
300 /* Add the arc information. */
301 add_arcs (profdata_handle);
303 /* If no mode is specified fall back to the default mode. */
304 if (mode == NONE)
305 mode = DEFAULT_MODE;
307 /* Do some work. */
308 if (mode & FLAT_MODE)
309 generate_flat_profile (profdata_handle);
311 if (mode & CALL_GRAPH_MODE)
312 generate_call_graph (profdata_handle);
314 if (mode & CALL_PAIRS)
315 generate_call_pair_list (profdata_handle);
317 /* Free the resources. */
318 unload_shobj (shobj_handle);
319 unload_profdata (profdata_handle);
321 return 0;
325 /* Handle program arguments. */
326 static error_t
327 parse_opt (int key, char *arg, struct argp_state *state)
329 switch (key)
331 case 'c':
332 mode |= CALL_PAIRS;
333 break;
334 case 'p':
335 mode |= FLAT_MODE;
336 break;
337 case 'q':
338 mode |= CALL_GRAPH_MODE;
339 break;
340 case OPT_TEST:
341 do_test = 1;
342 break;
343 default:
344 return ARGP_ERR_UNKNOWN;
346 return 0;
350 /* Print the version information. */
351 static void
352 print_version (FILE *stream, struct argp_state *state)
354 fprintf (stream, "sprof (GNU %s) %s\n", PACKAGE, VERSION);
355 fprintf (stream, gettext ("\
356 Copyright (C) %s Free Software Foundation, Inc.\n\
357 This is free software; see the source for copying conditions. There is NO\n\
358 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\
360 "1999");
361 fprintf (stream, gettext ("Written by %s.\n"), "Ulrich Drepper");
365 /* Note that we must not use `dlopen' etc. The shobj object must not
366 be loaded for use. */
367 static struct shobj *
368 load_shobj (const char *name)
370 struct link_map *map = NULL;
371 struct shobj *result;
372 ElfW(Addr) mapstart = ~((ElfW(Addr)) 0);
373 ElfW(Addr) mapend = 0;
374 const ElfW(Phdr) *ph;
375 size_t textsize;
376 unsigned int log_hashfraction;
377 ElfW(Ehdr) *ehdr;
378 int fd;
379 ElfW(Shdr) *shdr;
380 void *ptr;
381 size_t pagesize = getpagesize ();
382 const char *shstrtab;
383 int idx;
384 ElfW(Shdr) *symtab_entry;
386 /* Since we use dlopen() we must be prepared to work around the sometimes
387 strange lookup rules for the shared objects. If we have a file foo.so
388 in the current directory and the user specfies foo.so on the command
389 line (without specifying a directory) we should load the file in the
390 current directory even if a normal dlopen() call would read the other
391 file. We do this by adding a directory portion to the name. */
392 if (strchr (name, '/') == NULL)
394 char *load_name = (char *) alloca (strlen (name) + 3);
395 stpcpy (stpcpy (load_name, "./"), name);
397 map = (struct link_map *) dlopen (load_name, RTLD_LAZY);
399 if (map == NULL)
401 map = (struct link_map *) dlopen (name, RTLD_LAZY);
402 if (map == NULL)
404 error (0, errno, _("failed to load shared object `%s'"), name);
405 return NULL;
409 /* Prepare the result. */
410 result = (struct shobj *) calloc (1, sizeof (struct shobj));
411 if (result == NULL)
413 error (0, errno, _("cannot create internal descriptors"));
414 dlclose (map);
415 return NULL;
417 result->name = name;
418 result->map = map;
420 /* Compute the size of the sections which contain program code.
421 This must match the code in dl-profile.c (_dl_start_profile). */
422 for (ph = map->l_phdr; ph < &map->l_phdr[map->l_phnum]; ++ph)
423 if (ph->p_type == PT_LOAD && (ph->p_flags & PF_X))
425 ElfW(Addr) start = (ph->p_vaddr & ~(pagesize - 1));
426 ElfW(Addr) end = ((ph->p_vaddr + ph->p_memsz + pagesize - 1)
427 & ~(pagesize - 1));
429 if (start < mapstart)
430 mapstart = start;
431 if (end > mapend)
432 mapend = end;
435 result->lowpc = ROUNDDOWN ((uintptr_t) (mapstart + map->l_addr),
436 HISTFRACTION * sizeof (HISTCOUNTER));
437 result->highpc = ROUNDUP ((uintptr_t) (mapend + map->l_addr),
438 HISTFRACTION * sizeof (HISTCOUNTER));
439 if (do_test)
440 printf ("load addr: %0#*" PRIxPTR "\n"
441 "lower bound PC: %0#*" PRIxPTR "\n"
442 "upper bound PC: %0#*" PRIxPTR "\n",
443 __ELF_NATIVE_CLASS == 32 ? 10 : 18, map->l_addr,
444 __ELF_NATIVE_CLASS == 32 ? 10 : 18, result->lowpc,
445 __ELF_NATIVE_CLASS == 32 ? 10 : 18, result->highpc);
447 textsize = result->highpc - result->lowpc;
448 result->kcountsize = textsize / HISTFRACTION;
449 result->hashfraction = HASHFRACTION;
450 if ((HASHFRACTION & (HASHFRACTION - 1)) == 0)
451 /* If HASHFRACTION is a power of two, mcount can use shifting
452 instead of integer division. Precompute shift amount. */
453 log_hashfraction = __builtin_ffs (result->hashfraction
454 * sizeof (struct here_fromstruct)) - 1;
455 else
456 log_hashfraction = -1;
457 if (do_test)
458 printf ("hashfraction = %d\ndivider = %Zu\n",
459 result->hashfraction,
460 result->hashfraction * sizeof (struct here_fromstruct));
461 result->tossize = textsize / HASHFRACTION;
462 result->fromlimit = textsize * ARCDENSITY / 100;
463 if (result->fromlimit < MINARCS)
464 result->fromlimit = MINARCS;
465 if (result->fromlimit > MAXARCS)
466 result->fromlimit = MAXARCS;
467 result->fromssize = result->fromlimit * sizeof (struct here_fromstruct);
469 result->expected_size = (sizeof (struct gmon_hdr)
470 + 4 + sizeof (struct gmon_hist_hdr)
471 + result->kcountsize
472 + 4 + 4
473 + (result->fromssize
474 * sizeof (struct here_cg_arc_record)));
476 if (do_test)
477 printf ("expected size: %Zd\n", result->expected_size);
479 #define SCALE_1_TO_1 0x10000L
481 if (result->kcountsize < result->highpc - result->lowpc)
483 size_t range = result->highpc - result->lowpc;
484 size_t quot = range / result->kcountsize;
486 if (quot >= SCALE_1_TO_1)
487 result->s_scale = 1;
488 else if (quot >= SCALE_1_TO_1 / 256)
489 result->s_scale = SCALE_1_TO_1 / quot;
490 else if (range > ULONG_MAX / 256)
491 result->s_scale = ((SCALE_1_TO_1 * 256)
492 / (range / (result->kcountsize / 256)));
493 else
494 result->s_scale = ((SCALE_1_TO_1 * 256)
495 / ((range * 256) / result->kcountsize));
497 else
498 result->s_scale = SCALE_1_TO_1;
500 if (do_test)
501 printf ("s_scale: %d\n", result->s_scale);
503 /* Determine the dynamic string table. */
504 if (map->l_info[DT_STRTAB] == NULL)
505 result->dynstrtab = NULL;
506 else
507 result->dynstrtab = (const char *) (map->l_addr
508 + map->l_info[DT_STRTAB]->d_un.d_ptr);
509 if (do_test)
510 printf ("string table: %p\n", result->dynstrtab);
512 /* Determine the soname. */
513 if (map->l_info[DT_SONAME] == NULL)
514 result->soname = NULL;
515 else
516 result->soname = result->dynstrtab + map->l_info[DT_SONAME]->d_un.d_val;
517 if (do_test)
518 printf ("soname: %s\n", result->soname);
520 /* Now we have to load the symbol table.
522 First load the section header table. */
523 ehdr = (ElfW(Ehdr) *) map->l_addr;
525 /* Make sure we are on the right party. */
526 if (ehdr->e_shentsize != sizeof (ElfW(Shdr)))
527 abort ();
529 /* And we need the shared object file descriptor again. */
530 fd = open (map->l_name, O_RDONLY);
531 if (fd == -1)
532 /* Dooh, this really shouldn't happen. We know the file is available. */
533 error (EXIT_FAILURE, errno, _("Reopening shared object `%s' failed"),
534 map->l_name);
536 /* Now map the section header. */
537 ptr = mmap (NULL, (ehdr->e_shnum * sizeof (ElfW(Shdr))
538 + (ehdr->e_shoff & (pagesize - 1))), PROT_READ,
539 MAP_SHARED|MAP_FILE, fd, ehdr->e_shoff & ~(pagesize - 1));
540 if (ptr == MAP_FAILED)
541 error (EXIT_FAILURE, errno, _("mapping of section headers failed"));
542 shdr = (ElfW(Shdr) *) ((char *) ptr + (ehdr->e_shoff & (pagesize - 1)));
544 /* Get the section header string table. */
545 ptr = mmap (NULL, (shdr[ehdr->e_shstrndx].sh_size
546 + (shdr[ehdr->e_shstrndx].sh_offset & (pagesize - 1))),
547 PROT_READ, MAP_SHARED|MAP_FILE, fd,
548 shdr[ehdr->e_shstrndx].sh_offset & ~(pagesize - 1));
549 if (ptr == MAP_FAILED)
550 error (EXIT_FAILURE, errno,
551 _("mapping of section header string table failed"));
552 shstrtab = ((const char *) ptr
553 + (shdr[ehdr->e_shstrndx].sh_offset & (pagesize - 1)));
555 /* Search for the ".symtab" section. */
556 symtab_entry = NULL;
557 for (idx = 0; idx < ehdr->e_shnum; ++idx)
558 if (shdr[idx].sh_type == SHT_SYMTAB
559 && strcmp (shstrtab + shdr[idx].sh_name, ".symtab") == 0)
561 symtab_entry = &shdr[idx];
562 break;
565 /* We don't need the section header string table anymore. */
566 munmap (ptr, (shdr[ehdr->e_shstrndx].sh_size
567 + (shdr[ehdr->e_shstrndx].sh_offset & (pagesize - 1))));
569 if (symtab_entry == NULL)
571 fprintf (stderr, _("\
572 *** The file `%s' is stripped: no detailed analysis possible\n"),
573 name);
574 result->symtab = NULL;
575 result->strtab = NULL;
577 else
579 ElfW(Off) min_offset, max_offset;
580 ElfW(Shdr) *strtab_entry;
582 strtab_entry = &shdr[symtab_entry->sh_link];
584 /* Find the minimum and maximum offsets that include both the symbol
585 table and the string table. */
586 if (symtab_entry->sh_offset < strtab_entry->sh_offset)
588 min_offset = symtab_entry->sh_offset & ~(pagesize - 1);
589 max_offset = strtab_entry->sh_offset + strtab_entry->sh_size;
591 else
593 min_offset = strtab_entry->sh_offset & ~(pagesize - 1);
594 max_offset = symtab_entry->sh_offset + symtab_entry->sh_size;
597 result->symbol_map = mmap (NULL, max_offset - min_offset,
598 PROT_READ, MAP_SHARED|MAP_FILE, fd,
599 min_offset);
600 if (result->symbol_map == NULL)
601 error (EXIT_FAILURE, errno, _("failed to load symbol data"));
603 result->symtab
604 = (const ElfW(Sym) *) ((const char *) result->symbol_map
605 + (symtab_entry->sh_offset - min_offset));
606 result->symtab_size = symtab_entry->sh_size;
607 result->strtab = ((const char *) result->symbol_map
608 + (strtab_entry->sh_offset - min_offset));
609 result->symbol_mapsize = max_offset - min_offset;
612 /* Now we also don't need the section header table anymore. */
613 munmap ((char *) shdr - (ehdr->e_shoff & (pagesize - 1)),
614 (ehdr->e_phnum * sizeof (ElfW(Shdr))
615 + (ehdr->e_shoff & (pagesize - 1))));
617 /* Free the descriptor for the shared object. */
618 close (fd);
620 return result;
624 static void
625 unload_shobj (struct shobj *shobj)
627 munmap (shobj->symbol_map, shobj->symbol_mapsize);
628 dlclose (shobj->map);
632 static struct profdata *
633 load_profdata (const char *name, struct shobj *shobj)
635 struct profdata *result;
636 int fd;
637 struct stat st;
638 void *addr;
639 struct gmon_hdr gmon_hdr;
640 struct gmon_hist_hdr hist_hdr;
641 uint32_t *narcsp;
642 size_t fromlimit;
643 struct here_cg_arc_record *data;
644 struct here_fromstruct *froms;
645 uint16_t *tos;
646 size_t fromidx;
647 size_t idx;
649 fd = open (name, O_RDONLY);
650 if (fd == -1)
652 char *ext_name;
654 if (errno != ENOENT || strchr (name, '/') != NULL)
655 /* The file exists but we are not allowed to read it or the
656 file does not exist and the name includes a path
657 specification.. */
658 return NULL;
660 /* A file with the given name does not exist in the current
661 directory, try it in the default location where the profiling
662 files are created. */
663 ext_name = (char *) alloca (strlen (name) + sizeof "/var/tmp/");
664 stpcpy (stpcpy (ext_name, "/var/tmp/"), name);
665 name = ext_name;
667 fd = open (ext_name, O_RDONLY);
668 if (fd == -1)
670 /* Even this file does not exist. */
671 error (0, errno, _("cannot load profiling data"));
672 return NULL;
676 /* We have found the file, now make sure it is the right one for the
677 data file. */
678 if (fstat (fd, &st) < 0)
680 error (0, errno, _("while stat'ing profiling data file"));
681 close (fd);
682 return NULL;
685 if (st.st_size != shobj->expected_size)
687 error (0, 0,
688 _("profiling data file `%s' does not match shared object `%s'"),
689 name, shobj->name);
690 close (fd);
691 return NULL;
694 /* The data file is most probably the right one for our shared
695 object. Map it now. */
696 addr = mmap (NULL, st.st_size, PROT_READ, MAP_SHARED|MAP_FILE, fd, 0);
697 if (addr == MAP_FAILED)
699 error (0, errno, _("failed to mmap the profiling data file"));
700 close (fd);
701 return NULL;
704 /* We don't need the file desriptor anymore. */
705 if (close (fd) < 0)
707 error (0, errno, _("error while closing the profiling data file"));
708 munmap (addr, st.st_size);
709 return NULL;
712 /* Prepare the result. */
713 result = (struct profdata *) calloc (1, sizeof (struct profdata));
714 if (result == NULL)
716 error (0, errno, _("cannot create internal descriptor"));
717 munmap (addr, st.st_size);
718 return NULL;
721 /* Store the address and size so that we can later free the resources. */
722 result->addr = addr;
723 result->size = st.st_size;
725 /* Pointer to data after the header. */
726 result->hist = (char *) ((struct gmon_hdr *) addr + 1);
727 result->hist_hdr = (struct gmon_hist_hdr *) ((char *) result->hist
728 + sizeof (uint32_t));
729 result->kcount = (uint16_t *) ((char *) result->hist + sizeof (uint32_t)
730 + sizeof (struct gmon_hist_hdr));
732 /* Compute pointer to array of the arc information. */
733 narcsp = (uint32_t *) ((char *) result->kcount + shobj->kcountsize
734 + sizeof (uint32_t));
735 result->narcs = *narcsp;
736 result->data = (struct here_cg_arc_record *) ((char *) narcsp
737 + sizeof (uint32_t));
739 /* Create the gmon_hdr we expect or write. */
740 memset (&gmon_hdr, '\0', sizeof (struct gmon_hdr));
741 memcpy (&gmon_hdr.cookie[0], GMON_MAGIC, sizeof (gmon_hdr.cookie));
742 *(int32_t *) gmon_hdr.version = GMON_SHOBJ_VERSION;
744 /* Create the hist_hdr we expect or write. */
745 *(char **) hist_hdr.low_pc = (char *) shobj->lowpc - shobj->map->l_addr;
746 *(char **) hist_hdr.high_pc = (char *) shobj->highpc - shobj->map->l_addr;
747 if (do_test)
748 printf ("low_pc = %p\nhigh_pc = %p\n",
749 *(char **) hist_hdr.low_pc, *(char **) hist_hdr.high_pc);
750 *(int32_t *) hist_hdr.hist_size = shobj->kcountsize / sizeof (HISTCOUNTER);
751 *(int32_t *) hist_hdr.prof_rate = __profile_frequency ();
752 strncpy (hist_hdr.dimen, "seconds", sizeof (hist_hdr.dimen));
753 hist_hdr.dimen_abbrev = 's';
755 /* Test whether the header of the profiling data is ok. */
756 if (memcmp (addr, &gmon_hdr, sizeof (struct gmon_hdr)) != 0
757 || *(uint32_t *) result->hist != GMON_TAG_TIME_HIST
758 || memcmp (result->hist_hdr, &hist_hdr,
759 sizeof (struct gmon_hist_hdr)) != 0
760 || narcsp[-1] != GMON_TAG_CG_ARC)
762 free (result);
763 error (0, 0, _("`%s' is no correct profile data file for `%s'"),
764 name, shobj->name);
765 munmap (addr, st.st_size);
766 return NULL;
769 /* We are pretty sure now that this is a correct input file. Set up
770 the remaining information in the result structure and return. */
771 result->tos = (uint16_t *) calloc (shobj->tossize + shobj->fromssize, 1);
772 if (result->tos == NULL)
774 error (0, errno, _("cannot create internal descriptor"));
775 munmap (addr, st.st_size);
776 free (result);
777 return NULL;
780 result->froms = (struct here_fromstruct *) ((char *) result->tos
781 + shobj->tossize);
782 fromidx = 0;
784 /* Now we have to process all the arc count entries. */
785 fromlimit = shobj->fromlimit;
786 data = result->data;
787 froms = result->froms;
788 tos = result->tos;
789 for (idx = 0; idx < MIN (*narcsp, fromlimit); ++idx)
791 size_t to_index;
792 size_t newfromidx;
793 to_index = (data[idx].self_pc / (shobj->hashfraction * sizeof (*tos)));
794 newfromidx = fromidx++;
795 froms[newfromidx].here = &data[idx];
796 froms[newfromidx].link = tos[to_index];
797 tos[to_index] = newfromidx;
800 return result;
804 static void
805 unload_profdata (struct profdata *profdata)
807 free (profdata->tos);
808 munmap (profdata->addr, profdata->size);
809 free (profdata);
813 static void
814 count_total_ticks (struct shobj *shobj, struct profdata *profdata)
816 volatile uint16_t *kcount = profdata->kcount;
817 size_t maxkidx = shobj->kcountsize;
818 size_t factor = 2 * (65536 / shobj->s_scale);
819 size_t kidx = 0;
820 size_t sidx = 0;
822 while (sidx < symidx)
824 uintptr_t start = sortsym[sidx]->addr;
825 uintptr_t end = start + sortsym[sidx]->size;
827 while (kidx < maxkidx && factor * kidx < start)
828 ++kidx;
829 if (kidx == maxkidx)
830 break;
832 while (kidx < maxkidx && factor * kidx < end)
833 sortsym[sidx]->ticks += kcount[kidx++];
834 if (kidx == maxkidx)
835 break;
837 total_ticks += sortsym[sidx++]->ticks;
842 static size_t
843 find_symbol (uintptr_t addr)
845 size_t sidx = 0;
847 while (sidx < symidx)
849 uintptr_t start = sortsym[sidx]->addr;
850 uintptr_t end = start + sortsym[sidx]->size;
852 if (addr >= start && addr < end)
853 return sidx;
855 if (addr < start)
856 break;
858 ++sidx;
861 return (size_t) -1l;
865 static void
866 count_calls (struct shobj *shobj, struct profdata *profdata)
868 struct here_cg_arc_record *data = profdata->data;
869 uint32_t narcs = profdata->narcs;
870 uint32_t cnt;
872 for (cnt = 0; cnt < narcs; ++cnt)
874 uintptr_t here = data[cnt].self_pc;
875 size_t symbol_idx;
877 /* Find the symbol for this address. */
878 symbol_idx = find_symbol (here);
879 if (symbol_idx != (size_t) -1l)
880 sortsym[symbol_idx]->calls += data[cnt].count;
885 static int
886 symorder (const void *o1, const void *o2)
888 const struct known_symbol *p1 = (const struct known_symbol *) o1;
889 const struct known_symbol *p2 = (const struct known_symbol *) o2;
891 return p1->addr - p2->addr;
895 static void
896 printsym (const void *node, VISIT value, int level)
898 if (value == leaf || value == postorder)
899 sortsym[symidx++] = *(struct known_symbol **) node;
903 static void
904 read_symbols (struct shobj *shobj)
906 void *load_addr = (void *) shobj->map->l_addr;
907 int n = 0;
909 /* Initialize the obstacks. */
910 #define obstack_chunk_alloc malloc
911 #define obstack_chunk_free free
912 obstack_init (&shobj->ob_str);
913 obstack_init (&shobj->ob_sym);
914 obstack_init (&ob_list);
916 /* Process the symbols. */
917 if (shobj->symtab)
919 const ElfW(Sym) *sym = shobj->symtab;
920 const ElfW(Sym) *sym_end
921 = (const ElfW(Sym) *) ((const char *) sym + shobj->symtab_size);
922 for (; sym < sym_end; sym++)
923 if ((ELFW(ST_TYPE) (sym->st_info) == STT_FUNC
924 || ELFW(ST_TYPE) (sym->st_info) == STT_NOTYPE)
925 && sym->st_size != 0)
927 struct known_symbol **existp;
928 struct known_symbol *newsym
929 = (struct known_symbol *) obstack_alloc (&shobj->ob_sym,
930 sizeof (*newsym));
931 if (newsym == NULL)
932 error (EXIT_FAILURE, errno, _("cannot allocate symbol data"));
934 newsym->name = &shobj->strtab[sym->st_name];
935 newsym->addr = sym->st_value;
936 newsym->size = sym->st_size;
937 newsym->ticks = 0;
938 newsym->calls = 0;
940 existp = tfind (newsym, &symroot, symorder);
941 if (existp == NULL)
943 /* New function. */
944 tsearch (newsym, &symroot, symorder);
945 ++n;
947 else
949 /* The function is already defined. See whether we have
950 a better name here. */
951 if ((*existp)->name[0] == '_' && newsym->name[0] != '_')
952 *existp = newsym;
953 else
954 /* We don't need the allocated memory. */
955 obstack_free (&shobj->ob_sym, newsym);
959 else
961 /* Blarg, the binary is stripped. We have to rely on the
962 information contained in the dynamic section of the object. */
963 const ElfW(Sym) *symtab = (load_addr
964 + shobj->map->l_info[DT_SYMTAB]->d_un.d_ptr);
965 const char *strtab = (load_addr
966 + shobj->map->l_info[DT_STRTAB]->d_un.d_ptr);
968 /* We assume that the string table follows the symbol table,
969 because there is no way in ELF to know the size of the
970 dynamic symbol table!! */
971 while ((void *) symtab < (void *) strtab)
973 if ((ELFW(ST_TYPE)(symtab->st_info) == STT_FUNC
974 || ELFW(ST_TYPE)(symtab->st_info) == STT_NOTYPE)
975 && symtab->st_size != 0)
977 struct known_symbol *newsym;
978 struct known_symbol **existp;
980 newsym =
981 (struct known_symbol *) obstack_alloc (&shobj->ob_sym,
982 sizeof (*newsym));
983 if (newsym == NULL)
984 error (EXIT_FAILURE, errno, _("cannot allocate symbol data"));
986 newsym->name = &strtab[symtab->st_name];
987 newsym->addr = symtab->st_value;
988 newsym->size = symtab->st_size;
989 newsym->ticks = 0;
990 newsym->froms = NULL;
991 newsym->tos = NULL;
993 existp = tfind (newsym, &symroot, symorder);
994 if (existp == NULL)
996 /* New function. */
997 tsearch (newsym, &symroot, symorder);
998 ++n;
1000 else
1002 /* The function is already defined. See whether we have
1003 a better name here. */
1004 if ((*existp)->name[0] == '_' && newsym->name[0] != '_')
1005 *existp = newsym;
1006 else
1007 /* We don't need the allocated memory. */
1008 obstack_free (&shobj->ob_sym, newsym);
1013 ++symtab;
1016 sortsym = malloc (n * sizeof (struct known_symbol *));
1017 if (sortsym == NULL)
1018 abort ();
1020 twalk (symroot, printsym);
1024 static void
1025 add_arcs (struct profdata *profdata)
1027 uint32_t narcs = profdata->narcs;
1028 struct here_cg_arc_record *data = profdata->data;
1029 uint32_t cnt;
1031 for (cnt = 0; cnt < narcs; ++cnt)
1033 /* First add the incoming arc. */
1034 size_t sym_idx = find_symbol (data[cnt].self_pc);
1036 if (sym_idx != (size_t) -1l)
1038 struct known_symbol *sym = sortsym[sym_idx];
1039 struct arc_list *runp = sym->froms;
1041 while (runp != NULL
1042 && ((data[cnt].from_pc == 0 && runp->idx != (size_t) -1l)
1043 || (data[cnt].from_pc != 0
1044 && (runp->idx == (size_t) -1l
1045 || data[cnt].from_pc < sortsym[runp->idx]->addr
1046 || (data[cnt].from_pc
1047 >= (sortsym[runp->idx]->addr
1048 + sortsym[runp->idx]->size))))))
1049 runp = runp->next;
1051 if (runp == NULL)
1053 /* We need a new entry. */
1054 struct arc_list *newp = (struct arc_list *)
1055 obstack_alloc (&ob_list, sizeof (struct arc_list));
1057 if (data[cnt].from_pc == 0)
1058 newp->idx = (size_t) -1l;
1059 else
1060 newp->idx = find_symbol (data[cnt].from_pc);
1061 newp->count = data[cnt].count;
1062 newp->next = sym->froms;
1063 sym->froms = newp;
1065 else
1066 /* Increment the counter for the found entry. */
1067 runp->count += data[cnt].count;
1070 /* Now add it to the appropriate outgoing list. */
1071 sym_idx = find_symbol (data[cnt].from_pc);
1072 if (sym_idx != (size_t) -1l)
1074 struct known_symbol *sym = sortsym[sym_idx];
1075 struct arc_list *runp = sym->tos;
1077 while (runp != NULL
1078 && (runp->idx == (size_t) -1l
1079 || data[cnt].self_pc < sortsym[runp->idx]->addr
1080 || data[cnt].self_pc >= (sortsym[runp->idx]->addr
1081 + sortsym[runp->idx]->size)))
1082 runp = runp->next;
1084 if (runp == NULL)
1086 /* We need a new entry. */
1087 struct arc_list *newp = (struct arc_list *)
1088 obstack_alloc (&ob_list, sizeof (struct arc_list));
1090 newp->idx = find_symbol (data[cnt].self_pc);
1091 newp->count = data[cnt].count;
1092 newp->next = sym->tos;
1093 sym->tos = newp;
1095 else
1096 /* Increment the counter for the found entry. */
1097 runp->count += data[cnt].count;
1103 static int
1104 countorder (const void *p1, const void *p2)
1106 struct known_symbol *s1 = (struct known_symbol *) p1;
1107 struct known_symbol *s2 = (struct known_symbol *) p2;
1109 if (s1->ticks != s2->ticks)
1110 return (int) (s2->ticks - s1->ticks);
1112 if (s1->calls != s2->calls)
1113 return (int) (s2->calls - s1->calls);
1115 return strcmp (s1->name, s2->name);
1119 static double tick_unit;
1120 static uintmax_t cumu_ticks;
1122 static void
1123 printflat (const void *node, VISIT value, int level)
1125 if (value == leaf || value == postorder)
1127 struct known_symbol *s = *(struct known_symbol **) node;
1129 cumu_ticks += s->ticks;
1131 printf ("%6.2f%10.2f%9.2f%9" PRIdMAX "%9.2f %s\n",
1132 total_ticks ? (100.0 * s->ticks) / total_ticks : 0.0,
1133 tick_unit * cumu_ticks,
1134 tick_unit * s->ticks,
1135 s->calls,
1136 s->calls ? (s->ticks * 1000000) * tick_unit / s->calls : 0,
1137 /* FIXME: don't know about called functions. */
1138 s->name);
1143 /* ARGUSED */
1144 static void
1145 freenoop (void *p)
1150 static void
1151 generate_flat_profile (struct profdata *profdata)
1153 size_t n;
1154 void *data = NULL;
1156 tick_unit = 1.0 / *(uint32_t *) profdata->hist_hdr->prof_rate;
1158 printf ("Flat profile:\n\n"
1159 "Each sample counts as %g %s.\n",
1160 tick_unit, profdata->hist_hdr->dimen);
1161 fputs (" % cumulative self self total\n"
1162 " time seconds seconds calls us/call us/call name\n",
1163 stdout);
1165 for (n = 0; n < symidx; ++n)
1166 if (sortsym[n]->calls != 0 || sortsym[n]->ticks != 0)
1167 tsearch (sortsym[n], &data, countorder);
1169 twalk (data, printflat);
1171 tdestroy (data, freenoop);
1175 static void
1176 generate_call_graph (struct profdata *profdata)
1178 size_t cnt;
1180 puts ("\nindex % time self children called name\n");
1182 for (cnt = 0; cnt < symidx; ++cnt)
1183 if (sortsym[cnt]->froms != NULL || sortsym[cnt]->tos != NULL)
1185 struct arc_list *runp;
1186 size_t n;
1188 /* First print the from-information. */
1189 runp = sortsym[cnt]->froms;
1190 while (runp != NULL)
1192 printf (" %8.2f%8.2f%9" PRIdMAX "/%-9" PRIdMAX " %s",
1193 (runp->idx != (size_t) -1l
1194 ? sortsym[runp->idx]->ticks * tick_unit : 0.0),
1195 0.0, /* FIXME: what's time for the children, recursive */
1196 runp->count, sortsym[cnt]->calls,
1197 (runp->idx != (size_t) -1l ?
1198 sortsym[runp->idx]->name : "<UNKNOWN>"));
1200 if (runp->idx != (size_t) -1l)
1201 printf (" [%Zd]", runp->idx);
1202 putchar_unlocked ('\n');
1204 runp = runp->next;
1207 /* Info abount the function itself. */
1208 n = printf ("[%Zu]", cnt);
1209 printf ("%*s%5.1f%8.2f%8.2f%9" PRIdMAX " %s [%Zd]\n",
1210 (int) (7 - n), " ",
1211 total_ticks ? (100.0 * sortsym[cnt]->ticks) / total_ticks : 0,
1212 sortsym[cnt]->ticks * tick_unit,
1213 0.0, /* FIXME: what's time for the children, recursive */
1214 sortsym[cnt]->calls,
1215 sortsym[cnt]->name, cnt);
1217 /* Info about the functions this function calls. */
1218 runp = sortsym[cnt]->tos;
1219 while (runp != NULL)
1221 printf (" %8.2f%8.2f%9" PRIdMAX "/",
1222 (runp->idx != (size_t) -1l
1223 ? sortsym[runp->idx]->ticks * tick_unit : 0.0),
1224 0.0, /* FIXME: what's time for the children, recursive */
1225 runp->count);
1227 if (runp->idx != (size_t) -1l)
1228 printf ("%-9" PRIdMAX " %s [%Zd]\n",
1229 sortsym[runp->idx]->calls,
1230 sortsym[runp->idx]->name,
1231 runp->idx);
1232 else
1233 fputs ("??? <UNKNOWN>\n\n", stdout);
1235 runp = runp->next;
1238 fputs ("-----------------------------------------------\n", stdout);
1243 static void
1244 generate_call_pair_list (struct profdata *profdata)
1246 size_t cnt;
1248 for (cnt = 0; cnt < symidx; ++cnt)
1249 if (sortsym[cnt]->froms != NULL || sortsym[cnt]->tos != NULL)
1251 struct arc_list *runp;
1253 /* First print the incoming arcs. */
1254 runp = sortsym[cnt]->froms;
1255 while (runp != NULL)
1257 if (runp->idx == (size_t) -1l)
1258 printf ("\
1259 <UNKNOWN> %-34s %9" PRIdMAX "\n",
1260 sortsym[cnt]->name, runp->count);
1261 runp = runp->next;
1264 /* Next the outgoing arcs. */
1265 runp = sortsym[cnt]->tos;
1266 while (runp != NULL)
1268 printf ("%-34s %-34s %9" PRIdMAX "\n",
1269 sortsym[cnt]->name,
1270 (runp->idx != (size_t) -1l
1271 ? sortsym[runp->idx]->name : "<UNKNOWN>"),
1272 runp->count);
1273 runp = runp->next;