Update.
[glibc.git] / elf / sprof.c
blob1a4b0681a76082c39b50a5ca96dd5a5aa59e4a20
1 /* Read and display shared object profiling data.
2 Copyright (C) 1997-2002, 2003, 2004 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, write to the Free
18 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19 02111-1307 USA. */
21 #include <argp.h>
22 #include <dlfcn.h>
23 #include <elf.h>
24 #include <error.h>
25 #include <fcntl.h>
26 #include <inttypes.h>
27 #include <libintl.h>
28 #include <locale.h>
29 #include <obstack.h>
30 #include <search.h>
31 #include <stdbool.h>
32 #include <stdio.h>
33 #include <stdlib.h>
34 #include <string.h>
35 #include <unistd.h>
36 #include <ldsodefs.h>
37 #include <sys/gmon.h>
38 #include <sys/gmon_out.h>
39 #include <sys/mman.h>
40 #include <sys/param.h>
41 #include <sys/stat.h>
43 /* Get libc version number. */
44 #include "../version.h"
46 #define PACKAGE _libc_intl_domainname
49 #include <endian.h>
50 #if BYTE_ORDER == BIG_ENDIAN
51 #define byteorder ELFDATA2MSB
52 #define byteorder_name "big-endian"
53 #elif BYTE_ORDER == LITTLE_ENDIAN
54 #define byteorder ELFDATA2LSB
55 #define byteorder_name "little-endian"
56 #else
57 #error "Unknown BYTE_ORDER " BYTE_ORDER
58 #define byteorder ELFDATANONE
59 #endif
62 extern int __profile_frequency (void);
64 /* Name and version of program. */
65 static void print_version (FILE *stream, struct argp_state *state);
66 void (*argp_program_version_hook) (FILE *, struct argp_state *) = print_version;
68 #define OPT_TEST 1
70 /* Definitions of arguments for argp functions. */
71 static const struct argp_option options[] =
73 { NULL, 0, NULL, 0, N_("Output selection:") },
74 { "call-pairs", 'c', NULL, 0,
75 N_("print list of count paths and their number of use") },
76 { "flat-profile", 'p', NULL, 0,
77 N_("generate flat profile with counts and ticks") },
78 { "graph", 'q', NULL, 0, N_("generate call graph") },
80 { "test", OPT_TEST, NULL, OPTION_HIDDEN, NULL },
81 { NULL, 0, NULL, 0, NULL }
84 /* Short description of program. */
85 static const char doc[] = N_("Read and display shared object profiling data");
87 /* Strings for arguments in help texts. */
88 static const char args_doc[] = N_("SHOBJ [PROFDATA]");
90 /* Prototype for option handler. */
91 static error_t parse_opt (int key, char *arg, struct argp_state *state);
93 /* Data structure to communicate with argp functions. */
94 static struct argp argp =
96 options, parse_opt, args_doc, doc, NULL, NULL
100 /* Operation modes. */
101 static enum
103 NONE = 0,
104 FLAT_MODE = 1 << 0,
105 CALL_GRAPH_MODE = 1 << 1,
106 CALL_PAIRS = 1 << 2,
108 DEFAULT_MODE = FLAT_MODE | CALL_GRAPH_MODE
109 } mode;
111 /* If nonzero the total number of invocations of a function is emitted. */
112 int count_total;
114 /* Nozero for testing. */
115 int do_test;
117 /* Strcuture describing calls. */
118 struct here_fromstruct
120 struct here_cg_arc_record volatile *here;
121 uint16_t link;
124 /* We define a special type to address the elements of the arc table.
125 This is basically the `gmon_cg_arc_record' format but it includes
126 the room for the tag and it uses real types. */
127 struct here_cg_arc_record
129 uintptr_t from_pc;
130 uintptr_t self_pc;
131 uint32_t count;
132 } __attribute__ ((packed));
135 struct known_symbol;
136 struct arc_list
138 size_t idx;
139 uintmax_t count;
141 struct arc_list *next;
144 static struct obstack ob_list;
147 struct known_symbol
149 const char *name;
150 uintptr_t addr;
151 size_t size;
152 bool weak;
153 bool hidden;
155 uintmax_t ticks;
156 uintmax_t calls;
158 struct arc_list *froms;
159 struct arc_list *tos;
163 struct shobj
165 const char *name; /* User-provided name. */
167 struct link_map *map;
168 const char *dynstrtab; /* Dynamic string table of shared object. */
169 const char *soname; /* Soname of shared object. */
171 uintptr_t lowpc;
172 uintptr_t highpc;
173 unsigned long int kcountsize;
174 size_t expected_size; /* Expected size of profiling file. */
175 size_t tossize;
176 size_t fromssize;
177 size_t fromlimit;
178 unsigned int hashfraction;
179 int s_scale;
181 void *symbol_map;
182 size_t symbol_mapsize;
183 const ElfW(Sym) *symtab;
184 size_t symtab_size;
185 const char *strtab;
187 struct obstack ob_str;
188 struct obstack ob_sym;
192 struct profdata
194 void *addr;
195 off_t size;
197 char *hist;
198 struct gmon_hist_hdr *hist_hdr;
199 uint16_t *kcount;
200 uint32_t narcs; /* Number of arcs in toset. */
201 struct here_cg_arc_record *data;
202 uint16_t *tos;
203 struct here_fromstruct *froms;
206 /* Search tree for symbols. */
207 void *symroot;
208 static struct known_symbol **sortsym;
209 static size_t symidx;
210 static uintmax_t total_ticks;
212 /* Prototypes for local functions. */
213 static struct shobj *load_shobj (const char *name);
214 static void unload_shobj (struct shobj *shobj);
215 static struct profdata *load_profdata (const char *name, struct shobj *shobj);
216 static void unload_profdata (struct profdata *profdata);
217 static void count_total_ticks (struct shobj *shobj, struct profdata *profdata);
218 static void count_calls (struct shobj *shobj, struct profdata *profdata);
219 static void read_symbols (struct shobj *shobj);
220 static void add_arcs (struct profdata *profdata);
221 static void generate_flat_profile (struct profdata *profdata);
222 static void generate_call_graph (struct profdata *profdata);
223 static void generate_call_pair_list (struct profdata *profdata);
227 main (int argc, char *argv[])
229 const char *shobj;
230 const char *profdata;
231 struct shobj *shobj_handle;
232 struct profdata *profdata_handle;
233 int remaining;
235 setlocale (LC_ALL, "");
237 /* Initialize the message catalog. */
238 textdomain (_libc_intl_domainname);
240 /* Parse and process arguments. */
241 argp_parse (&argp, argc, argv, 0, &remaining, NULL);
243 if (argc - remaining == 0 || argc - remaining > 2)
245 /* We need exactly two non-option parameter. */
246 argp_help (&argp, stdout, ARGP_HELP_SEE | ARGP_HELP_EXIT_ERR,
247 program_invocation_short_name);
248 exit (1);
251 /* Get parameters. */
252 shobj = argv[remaining];
253 if (argc - remaining == 2)
254 profdata = argv[remaining + 1];
255 else
256 /* No filename for the profiling data given. We will determine it
257 from the soname of the shobj, later. */
258 profdata = NULL;
260 /* First see whether we can load the shared object. */
261 shobj_handle = load_shobj (shobj);
262 if (shobj_handle == NULL)
263 exit (1);
265 /* We can now determine the filename for the profiling data, if
266 nececessary. */
267 if (profdata == NULL)
269 char *newp;
270 const char *soname;
271 size_t soname_len;
273 soname = shobj_handle->soname ?: basename (shobj);
274 soname_len = strlen (soname);
275 newp = (char *) alloca (soname_len + sizeof ".profile");
276 stpcpy (mempcpy (newp, soname, soname_len), ".profile");
277 profdata = newp;
280 /* Now see whether the profiling data file matches the given object. */
281 profdata_handle = load_profdata (profdata, shobj_handle);
282 if (profdata_handle == NULL)
284 unload_shobj (shobj_handle);
286 exit (1);
289 read_symbols (shobj_handle);
291 /* Count the ticks. */
292 count_total_ticks (shobj_handle, profdata_handle);
294 /* Count the calls. */
295 count_calls (shobj_handle, profdata_handle);
297 /* Add the arc information. */
298 add_arcs (profdata_handle);
300 /* If no mode is specified fall back to the default mode. */
301 if (mode == NONE)
302 mode = DEFAULT_MODE;
304 /* Do some work. */
305 if (mode & FLAT_MODE)
306 generate_flat_profile (profdata_handle);
308 if (mode & CALL_GRAPH_MODE)
309 generate_call_graph (profdata_handle);
311 if (mode & CALL_PAIRS)
312 generate_call_pair_list (profdata_handle);
314 /* Free the resources. */
315 unload_shobj (shobj_handle);
316 unload_profdata (profdata_handle);
318 return 0;
322 /* Handle program arguments. */
323 static error_t
324 parse_opt (int key, char *arg, struct argp_state *state)
326 switch (key)
328 case 'c':
329 mode |= CALL_PAIRS;
330 break;
331 case 'p':
332 mode |= FLAT_MODE;
333 break;
334 case 'q':
335 mode |= CALL_GRAPH_MODE;
336 break;
337 case OPT_TEST:
338 do_test = 1;
339 break;
340 default:
341 return ARGP_ERR_UNKNOWN;
343 return 0;
347 /* Print the version information. */
348 static void
349 print_version (FILE *stream, struct argp_state *state)
351 fprintf (stream, "sprof (GNU %s) %s\n", PACKAGE, VERSION);
352 fprintf (stream, gettext ("\
353 Copyright (C) %s Free Software Foundation, Inc.\n\
354 This is free software; see the source for copying conditions. There is NO\n\
355 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\
357 "2004");
358 fprintf (stream, gettext ("Written by %s.\n"), "Ulrich Drepper");
362 /* Note that we must not use `dlopen' etc. The shobj object must not
363 be loaded for use. */
364 static struct shobj *
365 load_shobj (const char *name)
367 struct link_map *map = NULL;
368 struct shobj *result;
369 ElfW(Addr) mapstart = ~((ElfW(Addr)) 0);
370 ElfW(Addr) mapend = 0;
371 const ElfW(Phdr) *ph;
372 size_t textsize;
373 unsigned int log_hashfraction;
374 ElfW(Ehdr) *ehdr;
375 int fd;
376 ElfW(Shdr) *shdr;
377 void *ptr;
378 size_t pagesize = getpagesize ();
379 const char *shstrtab;
380 int idx;
381 ElfW(Shdr) *symtab_entry;
383 /* Since we use dlopen() we must be prepared to work around the sometimes
384 strange lookup rules for the shared objects. If we have a file foo.so
385 in the current directory and the user specfies foo.so on the command
386 line (without specifying a directory) we should load the file in the
387 current directory even if a normal dlopen() call would read the other
388 file. We do this by adding a directory portion to the name. */
389 if (strchr (name, '/') == NULL)
391 char *load_name = (char *) alloca (strlen (name) + 3);
392 stpcpy (stpcpy (load_name, "./"), name);
394 map = (struct link_map *) dlopen (load_name, RTLD_LAZY | __RTLD_SPROF);
396 if (map == NULL)
398 map = (struct link_map *) dlopen (name, RTLD_LAZY | __RTLD_SPROF);
399 if (map == NULL)
401 error (0, errno, _("failed to load shared object `%s'"), name);
402 return NULL;
406 /* Prepare the result. */
407 result = (struct shobj *) calloc (1, sizeof (struct shobj));
408 if (result == NULL)
410 error (0, errno, _("cannot create internal descriptors"));
411 dlclose (map);
412 return NULL;
414 result->name = name;
415 result->map = map;
417 /* Compute the size of the sections which contain program code.
418 This must match the code in dl-profile.c (_dl_start_profile). */
419 for (ph = map->l_phdr; ph < &map->l_phdr[map->l_phnum]; ++ph)
420 if (ph->p_type == PT_LOAD && (ph->p_flags & PF_X))
422 ElfW(Addr) start = (ph->p_vaddr & ~(pagesize - 1));
423 ElfW(Addr) end = ((ph->p_vaddr + ph->p_memsz + pagesize - 1)
424 & ~(pagesize - 1));
426 if (start < mapstart)
427 mapstart = start;
428 if (end > mapend)
429 mapend = end;
432 result->lowpc = ROUNDDOWN ((uintptr_t) (mapstart + map->l_addr),
433 HISTFRACTION * sizeof (HISTCOUNTER));
434 result->highpc = ROUNDUP ((uintptr_t) (mapend + map->l_addr),
435 HISTFRACTION * sizeof (HISTCOUNTER));
436 if (do_test)
437 printf ("load addr: %0#*" PRIxPTR "\n"
438 "lower bound PC: %0#*" PRIxPTR "\n"
439 "upper bound PC: %0#*" PRIxPTR "\n",
440 __ELF_NATIVE_CLASS == 32 ? 10 : 18, map->l_addr,
441 __ELF_NATIVE_CLASS == 32 ? 10 : 18, result->lowpc,
442 __ELF_NATIVE_CLASS == 32 ? 10 : 18, result->highpc);
444 textsize = result->highpc - result->lowpc;
445 result->kcountsize = textsize / HISTFRACTION;
446 result->hashfraction = HASHFRACTION;
447 if ((HASHFRACTION & (HASHFRACTION - 1)) == 0)
448 /* If HASHFRACTION is a power of two, mcount can use shifting
449 instead of integer division. Precompute shift amount. */
450 log_hashfraction = __builtin_ffs (result->hashfraction
451 * sizeof (struct here_fromstruct)) - 1;
452 else
453 log_hashfraction = -1;
454 if (do_test)
455 printf ("hashfraction = %d\ndivider = %Zu\n",
456 result->hashfraction,
457 result->hashfraction * sizeof (struct here_fromstruct));
458 result->tossize = textsize / HASHFRACTION;
459 result->fromlimit = textsize * ARCDENSITY / 100;
460 if (result->fromlimit < MINARCS)
461 result->fromlimit = MINARCS;
462 if (result->fromlimit > MAXARCS)
463 result->fromlimit = MAXARCS;
464 result->fromssize = result->fromlimit * sizeof (struct here_fromstruct);
466 result->expected_size = (sizeof (struct gmon_hdr)
467 + 4 + sizeof (struct gmon_hist_hdr)
468 + result->kcountsize
469 + 4 + 4
470 + (result->fromssize
471 * sizeof (struct here_cg_arc_record)));
473 if (do_test)
474 printf ("expected size: %Zd\n", result->expected_size);
476 #define SCALE_1_TO_1 0x10000L
478 if (result->kcountsize < result->highpc - result->lowpc)
480 size_t range = result->highpc - result->lowpc;
481 size_t quot = range / result->kcountsize;
483 if (quot >= SCALE_1_TO_1)
484 result->s_scale = 1;
485 else if (quot >= SCALE_1_TO_1 / 256)
486 result->s_scale = SCALE_1_TO_1 / quot;
487 else if (range > ULONG_MAX / 256)
488 result->s_scale = ((SCALE_1_TO_1 * 256)
489 / (range / (result->kcountsize / 256)));
490 else
491 result->s_scale = ((SCALE_1_TO_1 * 256)
492 / ((range * 256) / result->kcountsize));
494 else
495 result->s_scale = SCALE_1_TO_1;
497 if (do_test)
498 printf ("s_scale: %d\n", result->s_scale);
500 /* Determine the dynamic string table. */
501 if (map->l_info[DT_STRTAB] == NULL)
502 result->dynstrtab = NULL;
503 else
504 result->dynstrtab = (const char *) D_PTR (map, l_info[DT_STRTAB]);
505 if (do_test)
506 printf ("string table: %p\n", result->dynstrtab);
508 /* Determine the soname. */
509 if (map->l_info[DT_SONAME] == NULL)
510 result->soname = NULL;
511 else
512 result->soname = result->dynstrtab + map->l_info[DT_SONAME]->d_un.d_val;
513 if (do_test && result->soname != NULL)
514 printf ("soname: %s\n", result->soname);
516 /* Now we have to load the symbol table.
518 First load the section header table. */
519 ehdr = (ElfW(Ehdr) *) map->l_map_start;
521 /* Make sure we are on the right party. */
522 if (ehdr->e_shentsize != sizeof (ElfW(Shdr)))
523 abort ();
525 /* And we need the shared object file descriptor again. */
526 fd = open (map->l_name, O_RDONLY);
527 if (fd == -1)
528 /* Dooh, this really shouldn't happen. We know the file is available. */
529 error (EXIT_FAILURE, errno, _("Reopening shared object `%s' failed"),
530 map->l_name);
532 /* Now map the section header. */
533 ptr = mmap (NULL, (ehdr->e_shnum * sizeof (ElfW(Shdr))
534 + (ehdr->e_shoff & (pagesize - 1))), PROT_READ,
535 MAP_SHARED|MAP_FILE, fd, ehdr->e_shoff & ~(pagesize - 1));
536 if (ptr == MAP_FAILED)
537 error (EXIT_FAILURE, errno, _("mapping of section headers failed"));
538 shdr = (ElfW(Shdr) *) ((char *) ptr + (ehdr->e_shoff & (pagesize - 1)));
540 /* Get the section header string table. */
541 ptr = mmap (NULL, (shdr[ehdr->e_shstrndx].sh_size
542 + (shdr[ehdr->e_shstrndx].sh_offset & (pagesize - 1))),
543 PROT_READ, MAP_SHARED|MAP_FILE, fd,
544 shdr[ehdr->e_shstrndx].sh_offset & ~(pagesize - 1));
545 if (ptr == MAP_FAILED)
546 error (EXIT_FAILURE, errno,
547 _("mapping of section header string table failed"));
548 shstrtab = ((const char *) ptr
549 + (shdr[ehdr->e_shstrndx].sh_offset & (pagesize - 1)));
551 /* Search for the ".symtab" section. */
552 symtab_entry = NULL;
553 for (idx = 0; idx < ehdr->e_shnum; ++idx)
554 if (shdr[idx].sh_type == SHT_SYMTAB
555 && strcmp (shstrtab + shdr[idx].sh_name, ".symtab") == 0)
557 symtab_entry = &shdr[idx];
558 break;
561 /* We don't need the section header string table anymore. */
562 munmap (ptr, (shdr[ehdr->e_shstrndx].sh_size
563 + (shdr[ehdr->e_shstrndx].sh_offset & (pagesize - 1))));
565 if (symtab_entry == NULL)
567 fprintf (stderr, _("\
568 *** The file `%s' is stripped: no detailed analysis possible\n"),
569 name);
570 result->symtab = NULL;
571 result->strtab = NULL;
573 else
575 ElfW(Off) min_offset, max_offset;
576 ElfW(Shdr) *strtab_entry;
578 strtab_entry = &shdr[symtab_entry->sh_link];
580 /* Find the minimum and maximum offsets that include both the symbol
581 table and the string table. */
582 if (symtab_entry->sh_offset < strtab_entry->sh_offset)
584 min_offset = symtab_entry->sh_offset & ~(pagesize - 1);
585 max_offset = strtab_entry->sh_offset + strtab_entry->sh_size;
587 else
589 min_offset = strtab_entry->sh_offset & ~(pagesize - 1);
590 max_offset = symtab_entry->sh_offset + symtab_entry->sh_size;
593 result->symbol_map = mmap (NULL, max_offset - min_offset,
594 PROT_READ, MAP_SHARED|MAP_FILE, fd,
595 min_offset);
596 if (result->symbol_map == NULL)
597 error (EXIT_FAILURE, errno, _("failed to load symbol data"));
599 result->symtab
600 = (const ElfW(Sym) *) ((const char *) result->symbol_map
601 + (symtab_entry->sh_offset - min_offset));
602 result->symtab_size = symtab_entry->sh_size;
603 result->strtab = ((const char *) result->symbol_map
604 + (strtab_entry->sh_offset - min_offset));
605 result->symbol_mapsize = max_offset - min_offset;
608 /* Now we also don't need the section header table anymore. */
609 munmap ((char *) shdr - (ehdr->e_shoff & (pagesize - 1)),
610 (ehdr->e_phnum * sizeof (ElfW(Shdr))
611 + (ehdr->e_shoff & (pagesize - 1))));
613 /* Free the descriptor for the shared object. */
614 close (fd);
616 return result;
620 static void
621 unload_shobj (struct shobj *shobj)
623 munmap (shobj->symbol_map, shobj->symbol_mapsize);
624 dlclose (shobj->map);
628 static struct profdata *
629 load_profdata (const char *name, struct shobj *shobj)
631 struct profdata *result;
632 int fd;
633 struct stat st;
634 void *addr;
635 struct gmon_hdr gmon_hdr;
636 struct gmon_hist_hdr hist_hdr;
637 uint32_t *narcsp;
638 size_t fromlimit;
639 struct here_cg_arc_record *data;
640 struct here_fromstruct *froms;
641 uint16_t *tos;
642 size_t fromidx;
643 size_t idx;
645 fd = open (name, O_RDONLY);
646 if (fd == -1)
648 char *ext_name;
650 if (errno != ENOENT || strchr (name, '/') != NULL)
651 /* The file exists but we are not allowed to read it or the
652 file does not exist and the name includes a path
653 specification.. */
654 return NULL;
656 /* A file with the given name does not exist in the current
657 directory, try it in the default location where the profiling
658 files are created. */
659 ext_name = (char *) alloca (strlen (name) + sizeof "/var/tmp/");
660 stpcpy (stpcpy (ext_name, "/var/tmp/"), name);
661 name = ext_name;
663 fd = open (ext_name, O_RDONLY);
664 if (fd == -1)
666 /* Even this file does not exist. */
667 error (0, errno, _("cannot load profiling data"));
668 return NULL;
672 /* We have found the file, now make sure it is the right one for the
673 data file. */
674 if (fstat (fd, &st) < 0)
676 error (0, errno, _("while stat'ing profiling data file"));
677 close (fd);
678 return NULL;
681 if ((size_t) st.st_size != shobj->expected_size)
683 error (0, 0,
684 _("profiling data file `%s' does not match shared object `%s'"),
685 name, shobj->name);
686 close (fd);
687 return NULL;
690 /* The data file is most probably the right one for our shared
691 object. Map it now. */
692 addr = mmap (NULL, st.st_size, PROT_READ, MAP_SHARED|MAP_FILE, fd, 0);
693 if (addr == MAP_FAILED)
695 error (0, errno, _("failed to mmap the profiling data file"));
696 close (fd);
697 return NULL;
700 /* We don't need the file desriptor anymore. */
701 if (close (fd) < 0)
703 error (0, errno, _("error while closing the profiling data file"));
704 munmap (addr, st.st_size);
705 return NULL;
708 /* Prepare the result. */
709 result = (struct profdata *) calloc (1, sizeof (struct profdata));
710 if (result == NULL)
712 error (0, errno, _("cannot create internal descriptor"));
713 munmap (addr, st.st_size);
714 return NULL;
717 /* Store the address and size so that we can later free the resources. */
718 result->addr = addr;
719 result->size = st.st_size;
721 /* Pointer to data after the header. */
722 result->hist = (char *) ((struct gmon_hdr *) addr + 1);
723 result->hist_hdr = (struct gmon_hist_hdr *) ((char *) result->hist
724 + sizeof (uint32_t));
725 result->kcount = (uint16_t *) ((char *) result->hist + sizeof (uint32_t)
726 + sizeof (struct gmon_hist_hdr));
728 /* Compute pointer to array of the arc information. */
729 narcsp = (uint32_t *) ((char *) result->kcount + shobj->kcountsize
730 + sizeof (uint32_t));
731 result->narcs = *narcsp;
732 result->data = (struct here_cg_arc_record *) ((char *) narcsp
733 + sizeof (uint32_t));
735 /* Create the gmon_hdr we expect or write. */
736 memset (&gmon_hdr, '\0', sizeof (struct gmon_hdr));
737 memcpy (&gmon_hdr.cookie[0], GMON_MAGIC, sizeof (gmon_hdr.cookie));
738 *(int32_t *) gmon_hdr.version = GMON_SHOBJ_VERSION;
740 /* Create the hist_hdr we expect or write. */
741 *(char **) hist_hdr.low_pc = (char *) shobj->lowpc - shobj->map->l_addr;
742 *(char **) hist_hdr.high_pc = (char *) shobj->highpc - shobj->map->l_addr;
743 if (do_test)
744 printf ("low_pc = %p\nhigh_pc = %p\n",
745 *(char **) hist_hdr.low_pc, *(char **) hist_hdr.high_pc);
746 *(int32_t *) hist_hdr.hist_size = shobj->kcountsize / sizeof (HISTCOUNTER);
747 *(int32_t *) hist_hdr.prof_rate = __profile_frequency ();
748 strncpy (hist_hdr.dimen, "seconds", sizeof (hist_hdr.dimen));
749 hist_hdr.dimen_abbrev = 's';
751 /* Test whether the header of the profiling data is ok. */
752 if (memcmp (addr, &gmon_hdr, sizeof (struct gmon_hdr)) != 0
753 || *(uint32_t *) result->hist != GMON_TAG_TIME_HIST
754 || memcmp (result->hist_hdr, &hist_hdr,
755 sizeof (struct gmon_hist_hdr)) != 0
756 || narcsp[-1] != GMON_TAG_CG_ARC)
758 error (0, 0, _("`%s' is no correct profile data file for `%s'"),
759 name, shobj->name);
760 if (do_test)
762 if (memcmp (addr, &gmon_hdr, sizeof (struct gmon_hdr)) != 0)
763 puts ("gmon_hdr differs");
764 if (*(uint32_t *) result->hist != GMON_TAG_TIME_HIST)
765 puts ("result->hist differs");
766 if (memcmp (result->hist_hdr, &hist_hdr,
767 sizeof (struct gmon_hist_hdr)) != 0)
768 puts ("hist_hdr differs");
769 if (narcsp[-1] != GMON_TAG_CG_ARC)
770 puts ("narcsp[-1] differs");
772 free (result);
773 munmap (addr, st.st_size);
774 return NULL;
777 /* We are pretty sure now that this is a correct input file. Set up
778 the remaining information in the result structure and return. */
779 result->tos = (uint16_t *) calloc (shobj->tossize + shobj->fromssize, 1);
780 if (result->tos == NULL)
782 error (0, errno, _("cannot create internal descriptor"));
783 munmap (addr, st.st_size);
784 free (result);
785 return NULL;
788 result->froms = (struct here_fromstruct *) ((char *) result->tos
789 + shobj->tossize);
790 fromidx = 0;
792 /* Now we have to process all the arc count entries. */
793 fromlimit = shobj->fromlimit;
794 data = result->data;
795 froms = result->froms;
796 tos = result->tos;
797 for (idx = 0; idx < MIN (*narcsp, fromlimit); ++idx)
799 size_t to_index;
800 size_t newfromidx;
801 to_index = (data[idx].self_pc / (shobj->hashfraction * sizeof (*tos)));
802 newfromidx = fromidx++;
803 froms[newfromidx].here = &data[idx];
804 froms[newfromidx].link = tos[to_index];
805 tos[to_index] = newfromidx;
808 return result;
812 static void
813 unload_profdata (struct profdata *profdata)
815 free (profdata->tos);
816 munmap (profdata->addr, profdata->size);
817 free (profdata);
821 static void
822 count_total_ticks (struct shobj *shobj, struct profdata *profdata)
824 volatile uint16_t *kcount = profdata->kcount;
825 size_t maxkidx = shobj->kcountsize;
826 size_t factor = 2 * (65536 / shobj->s_scale);
827 size_t kidx = 0;
828 size_t sidx = 0;
830 while (sidx < symidx)
832 uintptr_t start = sortsym[sidx]->addr;
833 uintptr_t end = start + sortsym[sidx]->size;
835 while (kidx < maxkidx && factor * kidx < start)
836 ++kidx;
837 if (kidx == maxkidx)
838 break;
840 while (kidx < maxkidx && factor * kidx < end)
841 sortsym[sidx]->ticks += kcount[kidx++];
842 if (kidx == maxkidx)
843 break;
845 total_ticks += sortsym[sidx++]->ticks;
850 static size_t
851 find_symbol (uintptr_t addr)
853 size_t sidx = 0;
855 while (sidx < symidx)
857 uintptr_t start = sortsym[sidx]->addr;
858 uintptr_t end = start + sortsym[sidx]->size;
860 if (addr >= start && addr < end)
861 return sidx;
863 if (addr < start)
864 break;
866 ++sidx;
869 return (size_t) -1l;
873 static void
874 count_calls (struct shobj *shobj, struct profdata *profdata)
876 struct here_cg_arc_record *data = profdata->data;
877 uint32_t narcs = profdata->narcs;
878 uint32_t cnt;
880 for (cnt = 0; cnt < narcs; ++cnt)
882 uintptr_t here = data[cnt].self_pc;
883 size_t symbol_idx;
885 /* Find the symbol for this address. */
886 symbol_idx = find_symbol (here);
887 if (symbol_idx != (size_t) -1l)
888 sortsym[symbol_idx]->calls += data[cnt].count;
893 static int
894 symorder (const void *o1, const void *o2)
896 const struct known_symbol *p1 = (const struct known_symbol *) o1;
897 const struct known_symbol *p2 = (const struct known_symbol *) o2;
899 return p1->addr - p2->addr;
903 static void
904 printsym (const void *node, VISIT value, int level)
906 if (value == leaf || value == postorder)
907 sortsym[symidx++] = *(struct known_symbol **) node;
911 static void
912 read_symbols (struct shobj *shobj)
914 int n = 0;
916 /* Initialize the obstacks. */
917 #define obstack_chunk_alloc malloc
918 #define obstack_chunk_free free
919 obstack_init (&shobj->ob_str);
920 obstack_init (&shobj->ob_sym);
921 obstack_init (&ob_list);
923 /* Process the symbols. */
924 if (shobj->symtab != NULL)
926 const ElfW(Sym) *sym = shobj->symtab;
927 const ElfW(Sym) *sym_end
928 = (const ElfW(Sym) *) ((const char *) sym + shobj->symtab_size);
929 for (; sym < sym_end; sym++)
930 if ((ELFW(ST_TYPE) (sym->st_info) == STT_FUNC
931 || ELFW(ST_TYPE) (sym->st_info) == STT_NOTYPE)
932 && sym->st_size != 0)
934 struct known_symbol **existp;
935 struct known_symbol *newsym
936 = (struct known_symbol *) obstack_alloc (&shobj->ob_sym,
937 sizeof (*newsym));
938 if (newsym == NULL)
939 error (EXIT_FAILURE, errno, _("cannot allocate symbol data"));
941 newsym->name = &shobj->strtab[sym->st_name];
942 newsym->addr = sym->st_value;
943 newsym->size = sym->st_size;
944 newsym->weak = ELFW(ST_BIND) (sym->st_info) == STB_WEAK;
945 newsym->hidden = (ELFW(ST_VISIBILITY) (sym->st_other)
946 != STV_DEFAULT);
947 newsym->ticks = 0;
948 newsym->calls = 0;
950 existp = tfind (newsym, &symroot, symorder);
951 if (existp == NULL)
953 /* New function. */
954 tsearch (newsym, &symroot, symorder);
955 ++n;
957 else
959 /* The function is already defined. See whether we have
960 a better name here. */
961 if (((*existp)->hidden && !newsym->hidden)
962 || ((*existp)->name[0] == '_' && newsym->name[0] != '_')
963 || ((*existp)->name[0] != '_' && newsym->name[0] != '_'
964 && ((*existp)->weak && !newsym->weak)))
965 *existp = newsym;
966 else
967 /* We don't need the allocated memory. */
968 obstack_free (&shobj->ob_sym, newsym);
972 else
974 /* Blarg, the binary is stripped. We have to rely on the
975 information contained in the dynamic section of the object. */
976 const ElfW(Sym) *symtab = (ElfW(Sym) *) D_PTR (shobj->map,
977 l_info[DT_SYMTAB]);
978 const char *strtab = (const char *) D_PTR (shobj->map,
979 l_info[DT_STRTAB]);
981 /* We assume that the string table follows the symbol table,
982 because there is no way in ELF to know the size of the
983 dynamic symbol table without looking at the section headers. */
984 while ((void *) symtab < (void *) strtab)
986 if ((ELFW(ST_TYPE)(symtab->st_info) == STT_FUNC
987 || ELFW(ST_TYPE)(symtab->st_info) == STT_NOTYPE)
988 && symtab->st_size != 0)
990 struct known_symbol *newsym;
991 struct known_symbol **existp;
993 newsym =
994 (struct known_symbol *) obstack_alloc (&shobj->ob_sym,
995 sizeof (*newsym));
996 if (newsym == NULL)
997 error (EXIT_FAILURE, errno, _("cannot allocate symbol data"));
999 newsym->name = &strtab[symtab->st_name];
1000 newsym->addr = symtab->st_value;
1001 newsym->size = symtab->st_size;
1002 newsym->weak = ELFW(ST_BIND) (symtab->st_info) == STB_WEAK;
1003 newsym->hidden = (ELFW(ST_VISIBILITY) (symtab->st_other)
1004 != STV_DEFAULT);
1005 newsym->ticks = 0;
1006 newsym->froms = NULL;
1007 newsym->tos = NULL;
1009 existp = tfind (newsym, &symroot, symorder);
1010 if (existp == NULL)
1012 /* New function. */
1013 tsearch (newsym, &symroot, symorder);
1014 ++n;
1016 else
1018 /* The function is already defined. See whether we have
1019 a better name here. */
1020 if (((*existp)->hidden && !newsym->hidden)
1021 || ((*existp)->name[0] == '_' && newsym->name[0] != '_')
1022 || ((*existp)->name[0] != '_' && newsym->name[0] != '_'
1023 && ((*existp)->weak && !newsym->weak)))
1024 *existp = newsym;
1025 else
1026 /* We don't need the allocated memory. */
1027 obstack_free (&shobj->ob_sym, newsym);
1031 ++symtab;
1035 sortsym = malloc (n * sizeof (struct known_symbol *));
1036 if (sortsym == NULL)
1037 abort ();
1039 twalk (symroot, printsym);
1043 static void
1044 add_arcs (struct profdata *profdata)
1046 uint32_t narcs = profdata->narcs;
1047 struct here_cg_arc_record *data = profdata->data;
1048 uint32_t cnt;
1050 for (cnt = 0; cnt < narcs; ++cnt)
1052 /* First add the incoming arc. */
1053 size_t sym_idx = find_symbol (data[cnt].self_pc);
1055 if (sym_idx != (size_t) -1l)
1057 struct known_symbol *sym = sortsym[sym_idx];
1058 struct arc_list *runp = sym->froms;
1060 while (runp != NULL
1061 && ((data[cnt].from_pc == 0 && runp->idx != (size_t) -1l)
1062 || (data[cnt].from_pc != 0
1063 && (runp->idx == (size_t) -1l
1064 || data[cnt].from_pc < sortsym[runp->idx]->addr
1065 || (data[cnt].from_pc
1066 >= (sortsym[runp->idx]->addr
1067 + sortsym[runp->idx]->size))))))
1068 runp = runp->next;
1070 if (runp == NULL)
1072 /* We need a new entry. */
1073 struct arc_list *newp = (struct arc_list *)
1074 obstack_alloc (&ob_list, sizeof (struct arc_list));
1076 if (data[cnt].from_pc == 0)
1077 newp->idx = (size_t) -1l;
1078 else
1079 newp->idx = find_symbol (data[cnt].from_pc);
1080 newp->count = data[cnt].count;
1081 newp->next = sym->froms;
1082 sym->froms = newp;
1084 else
1085 /* Increment the counter for the found entry. */
1086 runp->count += data[cnt].count;
1089 /* Now add it to the appropriate outgoing list. */
1090 sym_idx = find_symbol (data[cnt].from_pc);
1091 if (sym_idx != (size_t) -1l)
1093 struct known_symbol *sym = sortsym[sym_idx];
1094 struct arc_list *runp = sym->tos;
1096 while (runp != NULL
1097 && (runp->idx == (size_t) -1l
1098 || data[cnt].self_pc < sortsym[runp->idx]->addr
1099 || data[cnt].self_pc >= (sortsym[runp->idx]->addr
1100 + sortsym[runp->idx]->size)))
1101 runp = runp->next;
1103 if (runp == NULL)
1105 /* We need a new entry. */
1106 struct arc_list *newp = (struct arc_list *)
1107 obstack_alloc (&ob_list, sizeof (struct arc_list));
1109 newp->idx = find_symbol (data[cnt].self_pc);
1110 newp->count = data[cnt].count;
1111 newp->next = sym->tos;
1112 sym->tos = newp;
1114 else
1115 /* Increment the counter for the found entry. */
1116 runp->count += data[cnt].count;
1122 static int
1123 countorder (const void *p1, const void *p2)
1125 struct known_symbol *s1 = (struct known_symbol *) p1;
1126 struct known_symbol *s2 = (struct known_symbol *) p2;
1128 if (s1->ticks != s2->ticks)
1129 return (int) (s2->ticks - s1->ticks);
1131 if (s1->calls != s2->calls)
1132 return (int) (s2->calls - s1->calls);
1134 return strcmp (s1->name, s2->name);
1138 static double tick_unit;
1139 static uintmax_t cumu_ticks;
1141 static void
1142 printflat (const void *node, VISIT value, int level)
1144 if (value == leaf || value == postorder)
1146 struct known_symbol *s = *(struct known_symbol **) node;
1148 cumu_ticks += s->ticks;
1150 printf ("%6.2f%10.2f%9.2f%9" PRIdMAX "%9.2f %s\n",
1151 total_ticks ? (100.0 * s->ticks) / total_ticks : 0.0,
1152 tick_unit * cumu_ticks,
1153 tick_unit * s->ticks,
1154 s->calls,
1155 s->calls ? (s->ticks * 1000000) * tick_unit / s->calls : 0,
1156 /* FIXME: don't know about called functions. */
1157 s->name);
1162 /* ARGUSED */
1163 static void
1164 freenoop (void *p)
1169 static void
1170 generate_flat_profile (struct profdata *profdata)
1172 size_t n;
1173 void *data = NULL;
1175 tick_unit = 1.0 / *(uint32_t *) profdata->hist_hdr->prof_rate;
1177 printf ("Flat profile:\n\n"
1178 "Each sample counts as %g %s.\n",
1179 tick_unit, profdata->hist_hdr->dimen);
1180 fputs (" % cumulative self self total\n"
1181 " time seconds seconds calls us/call us/call name\n",
1182 stdout);
1184 for (n = 0; n < symidx; ++n)
1185 if (sortsym[n]->calls != 0 || sortsym[n]->ticks != 0)
1186 tsearch (sortsym[n], &data, countorder);
1188 twalk (data, printflat);
1190 tdestroy (data, freenoop);
1194 static void
1195 generate_call_graph (struct profdata *profdata)
1197 size_t cnt;
1199 puts ("\nindex % time self children called name\n");
1201 for (cnt = 0; cnt < symidx; ++cnt)
1202 if (sortsym[cnt]->froms != NULL || sortsym[cnt]->tos != NULL)
1204 struct arc_list *runp;
1205 size_t n;
1207 /* First print the from-information. */
1208 runp = sortsym[cnt]->froms;
1209 while (runp != NULL)
1211 printf (" %8.2f%8.2f%9" PRIdMAX "/%-9" PRIdMAX " %s",
1212 (runp->idx != (size_t) -1l
1213 ? sortsym[runp->idx]->ticks * tick_unit : 0.0),
1214 0.0, /* FIXME: what's time for the children, recursive */
1215 runp->count, sortsym[cnt]->calls,
1216 (runp->idx != (size_t) -1l ?
1217 sortsym[runp->idx]->name : "<UNKNOWN>"));
1219 if (runp->idx != (size_t) -1l)
1220 printf (" [%Zd]", runp->idx);
1221 putchar_unlocked ('\n');
1223 runp = runp->next;
1226 /* Info abount the function itself. */
1227 n = printf ("[%Zu]", cnt);
1228 printf ("%*s%5.1f%8.2f%8.2f%9" PRIdMAX " %s [%Zd]\n",
1229 (int) (7 - n), " ",
1230 total_ticks ? (100.0 * sortsym[cnt]->ticks) / total_ticks : 0,
1231 sortsym[cnt]->ticks * tick_unit,
1232 0.0, /* FIXME: what's time for the children, recursive */
1233 sortsym[cnt]->calls,
1234 sortsym[cnt]->name, cnt);
1236 /* Info about the functions this function calls. */
1237 runp = sortsym[cnt]->tos;
1238 while (runp != NULL)
1240 printf (" %8.2f%8.2f%9" PRIdMAX "/",
1241 (runp->idx != (size_t) -1l
1242 ? sortsym[runp->idx]->ticks * tick_unit : 0.0),
1243 0.0, /* FIXME: what's time for the children, recursive */
1244 runp->count);
1246 if (runp->idx != (size_t) -1l)
1247 printf ("%-9" PRIdMAX " %s [%Zd]\n",
1248 sortsym[runp->idx]->calls,
1249 sortsym[runp->idx]->name,
1250 runp->idx);
1251 else
1252 fputs ("??? <UNKNOWN>\n\n", stdout);
1254 runp = runp->next;
1257 fputs ("-----------------------------------------------\n", stdout);
1262 static void
1263 generate_call_pair_list (struct profdata *profdata)
1265 size_t cnt;
1267 for (cnt = 0; cnt < symidx; ++cnt)
1268 if (sortsym[cnt]->froms != NULL || sortsym[cnt]->tos != NULL)
1270 struct arc_list *runp;
1272 /* First print the incoming arcs. */
1273 runp = sortsym[cnt]->froms;
1274 while (runp != NULL)
1276 if (runp->idx == (size_t) -1l)
1277 printf ("\
1278 <UNKNOWN> %-34s %9" PRIdMAX "\n",
1279 sortsym[cnt]->name, runp->count);
1280 runp = runp->next;
1283 /* Next the outgoing arcs. */
1284 runp = sortsym[cnt]->tos;
1285 while (runp != NULL)
1287 printf ("%-34s %-34s %9" PRIdMAX "\n",
1288 sortsym[cnt]->name,
1289 (runp->idx != (size_t) -1l
1290 ? sortsym[runp->idx]->name : "<UNKNOWN>"),
1291 runp->count);
1292 runp = runp->next;