Update.
[glibc.git] / elf / sprof.c
blob458a0905c0fa2eda71822102baf2fe36d61359eb
1 /* Read and display shared object profiling data.
2 Copyright (C) 1997-2002, 2003, 2004 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, write to the Free
18 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19 02111-1307 USA. */
21 #include <argp.h>
22 #include <dlfcn.h>
23 #include <elf.h>
24 #include <error.h>
25 #include <fcntl.h>
26 #include <inttypes.h>
27 #include <libintl.h>
28 #include <locale.h>
29 #include <obstack.h>
30 #include <search.h>
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <string.h>
34 #include <unistd.h>
35 #include <ldsodefs.h>
36 #include <sys/gmon.h>
37 #include <sys/gmon_out.h>
38 #include <sys/mman.h>
39 #include <sys/param.h>
40 #include <sys/stat.h>
42 /* Get libc version number. */
43 #include "../version.h"
45 #define PACKAGE _libc_intl_domainname
48 #include <endian.h>
49 #if BYTE_ORDER == BIG_ENDIAN
50 #define byteorder ELFDATA2MSB
51 #define byteorder_name "big-endian"
52 #elif BYTE_ORDER == LITTLE_ENDIAN
53 #define byteorder ELFDATA2LSB
54 #define byteorder_name "little-endian"
55 #else
56 #error "Unknown BYTE_ORDER " BYTE_ORDER
57 #define byteorder ELFDATANONE
58 #endif
61 extern int __profile_frequency (void);
63 /* Name and version of program. */
64 static void print_version (FILE *stream, struct argp_state *state);
65 void (*argp_program_version_hook) (FILE *, struct argp_state *) = print_version;
67 #define OPT_TEST 1
69 /* Definitions of arguments for argp functions. */
70 static const struct argp_option options[] =
72 { NULL, 0, NULL, 0, N_("Output selection:") },
73 { "call-pairs", 'c', NULL, 0,
74 N_("print list of count paths and their number of use") },
75 { "flat-profile", 'p', NULL, 0,
76 N_("generate flat profile with counts and ticks") },
77 { "graph", 'q', NULL, 0, N_("generate call graph") },
79 { "test", OPT_TEST, NULL, OPTION_HIDDEN, NULL },
80 { NULL, 0, NULL, 0, NULL }
83 /* Short description of program. */
84 static const char doc[] = N_("Read and display shared object profiling data");
86 /* Strings for arguments in help texts. */
87 static const char args_doc[] = N_("SHOBJ [PROFDATA]");
89 /* Prototype for option handler. */
90 static error_t parse_opt (int key, char *arg, struct argp_state *state);
92 /* Data structure to communicate with argp functions. */
93 static struct argp argp =
95 options, parse_opt, args_doc, doc, NULL, NULL
99 /* Operation modes. */
100 static enum
102 NONE = 0,
103 FLAT_MODE = 1 << 0,
104 CALL_GRAPH_MODE = 1 << 1,
105 CALL_PAIRS = 1 << 2,
107 DEFAULT_MODE = FLAT_MODE | CALL_GRAPH_MODE
108 } mode;
110 /* If nonzero the total number of invocations of a function is emitted. */
111 int count_total;
113 /* Nozero for testing. */
114 int do_test;
116 /* Strcuture describing calls. */
117 struct here_fromstruct
119 struct here_cg_arc_record volatile *here;
120 uint16_t link;
123 /* We define a special type to address the elements of the arc table.
124 This is basically the `gmon_cg_arc_record' format but it includes
125 the room for the tag and it uses real types. */
126 struct here_cg_arc_record
128 uintptr_t from_pc;
129 uintptr_t self_pc;
130 uint32_t count;
131 } __attribute__ ((packed));
134 struct known_symbol;
135 struct arc_list
137 size_t idx;
138 uintmax_t count;
140 struct arc_list *next;
143 static struct obstack ob_list;
146 struct known_symbol
148 const char *name;
149 uintptr_t addr;
150 size_t size;
151 int weak;
153 uintmax_t ticks;
154 uintmax_t calls;
156 struct arc_list *froms;
157 struct arc_list *tos;
161 struct shobj
163 const char *name; /* User-provided name. */
165 struct link_map *map;
166 const char *dynstrtab; /* Dynamic string table of shared object. */
167 const char *soname; /* Soname of shared object. */
169 uintptr_t lowpc;
170 uintptr_t highpc;
171 unsigned long int kcountsize;
172 size_t expected_size; /* Expected size of profiling file. */
173 size_t tossize;
174 size_t fromssize;
175 size_t fromlimit;
176 unsigned int hashfraction;
177 int s_scale;
179 void *symbol_map;
180 size_t symbol_mapsize;
181 const ElfW(Sym) *symtab;
182 size_t symtab_size;
183 const char *strtab;
185 struct obstack ob_str;
186 struct obstack ob_sym;
190 struct profdata
192 void *addr;
193 off_t size;
195 char *hist;
196 struct gmon_hist_hdr *hist_hdr;
197 uint16_t *kcount;
198 uint32_t narcs; /* Number of arcs in toset. */
199 struct here_cg_arc_record *data;
200 uint16_t *tos;
201 struct here_fromstruct *froms;
204 /* Search tree for symbols. */
205 void *symroot;
206 static struct known_symbol **sortsym;
207 static size_t symidx;
208 static uintmax_t total_ticks;
210 /* Prototypes for local functions. */
211 static struct shobj *load_shobj (const char *name);
212 static void unload_shobj (struct shobj *shobj);
213 static struct profdata *load_profdata (const char *name, struct shobj *shobj);
214 static void unload_profdata (struct profdata *profdata);
215 static void count_total_ticks (struct shobj *shobj, struct profdata *profdata);
216 static void count_calls (struct shobj *shobj, struct profdata *profdata);
217 static void read_symbols (struct shobj *shobj);
218 static void add_arcs (struct profdata *profdata);
219 static void generate_flat_profile (struct profdata *profdata);
220 static void generate_call_graph (struct profdata *profdata);
221 static void generate_call_pair_list (struct profdata *profdata);
225 main (int argc, char *argv[])
227 const char *shobj;
228 const char *profdata;
229 struct shobj *shobj_handle;
230 struct profdata *profdata_handle;
231 int remaining;
233 setlocale (LC_ALL, "");
235 /* Initialize the message catalog. */
236 textdomain (_libc_intl_domainname);
238 /* Parse and process arguments. */
239 argp_parse (&argp, argc, argv, 0, &remaining, NULL);
241 if (argc - remaining == 0 || argc - remaining > 2)
243 /* We need exactly two non-option parameter. */
244 argp_help (&argp, stdout, ARGP_HELP_SEE | ARGP_HELP_EXIT_ERR,
245 program_invocation_short_name);
246 exit (1);
249 /* Get parameters. */
250 shobj = argv[remaining];
251 if (argc - remaining == 2)
252 profdata = argv[remaining + 1];
253 else
254 /* No filename for the profiling data given. We will determine it
255 from the soname of the shobj, later. */
256 profdata = NULL;
258 /* First see whether we can load the shared object. */
259 shobj_handle = load_shobj (shobj);
260 if (shobj_handle == NULL)
261 exit (1);
263 /* We can now determine the filename for the profiling data, if
264 nececessary. */
265 if (profdata == NULL)
267 char *newp;
268 const char *soname;
269 size_t soname_len;
271 soname = shobj_handle->soname ?: basename (shobj);
272 soname_len = strlen (soname);
273 newp = (char *) alloca (soname_len + sizeof ".profile");
274 stpcpy (mempcpy (newp, soname, soname_len), ".profile");
275 profdata = newp;
278 /* Now see whether the profiling data file matches the given object. */
279 profdata_handle = load_profdata (profdata, shobj_handle);
280 if (profdata_handle == NULL)
282 unload_shobj (shobj_handle);
284 exit (1);
287 read_symbols (shobj_handle);
289 /* Count the ticks. */
290 count_total_ticks (shobj_handle, profdata_handle);
292 /* Count the calls. */
293 count_calls (shobj_handle, profdata_handle);
295 /* Add the arc information. */
296 add_arcs (profdata_handle);
298 /* If no mode is specified fall back to the default mode. */
299 if (mode == NONE)
300 mode = DEFAULT_MODE;
302 /* Do some work. */
303 if (mode & FLAT_MODE)
304 generate_flat_profile (profdata_handle);
306 if (mode & CALL_GRAPH_MODE)
307 generate_call_graph (profdata_handle);
309 if (mode & CALL_PAIRS)
310 generate_call_pair_list (profdata_handle);
312 /* Free the resources. */
313 unload_shobj (shobj_handle);
314 unload_profdata (profdata_handle);
316 return 0;
320 /* Handle program arguments. */
321 static error_t
322 parse_opt (int key, char *arg, struct argp_state *state)
324 switch (key)
326 case 'c':
327 mode |= CALL_PAIRS;
328 break;
329 case 'p':
330 mode |= FLAT_MODE;
331 break;
332 case 'q':
333 mode |= CALL_GRAPH_MODE;
334 break;
335 case OPT_TEST:
336 do_test = 1;
337 break;
338 default:
339 return ARGP_ERR_UNKNOWN;
341 return 0;
345 /* Print the version information. */
346 static void
347 print_version (FILE *stream, struct argp_state *state)
349 fprintf (stream, "sprof (GNU %s) %s\n", PACKAGE, VERSION);
350 fprintf (stream, gettext ("\
351 Copyright (C) %s Free Software Foundation, Inc.\n\
352 This is free software; see the source for copying conditions. There is NO\n\
353 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\
355 "2004");
356 fprintf (stream, gettext ("Written by %s.\n"), "Ulrich Drepper");
360 /* Note that we must not use `dlopen' etc. The shobj object must not
361 be loaded for use. */
362 static struct shobj *
363 load_shobj (const char *name)
365 struct link_map *map = NULL;
366 struct shobj *result;
367 ElfW(Addr) mapstart = ~((ElfW(Addr)) 0);
368 ElfW(Addr) mapend = 0;
369 const ElfW(Phdr) *ph;
370 size_t textsize;
371 unsigned int log_hashfraction;
372 ElfW(Ehdr) *ehdr;
373 int fd;
374 ElfW(Shdr) *shdr;
375 void *ptr;
376 size_t pagesize = getpagesize ();
377 const char *shstrtab;
378 int idx;
379 ElfW(Shdr) *symtab_entry;
381 /* Since we use dlopen() we must be prepared to work around the sometimes
382 strange lookup rules for the shared objects. If we have a file foo.so
383 in the current directory and the user specfies foo.so on the command
384 line (without specifying a directory) we should load the file in the
385 current directory even if a normal dlopen() call would read the other
386 file. We do this by adding a directory portion to the name. */
387 if (strchr (name, '/') == NULL)
389 char *load_name = (char *) alloca (strlen (name) + 3);
390 stpcpy (stpcpy (load_name, "./"), name);
392 map = (struct link_map *) dlopen (load_name, RTLD_LAZY | __RTLD_SPROF);
394 if (map == NULL)
396 map = (struct link_map *) dlopen (name, RTLD_LAZY | __RTLD_SPROF);
397 if (map == NULL)
399 error (0, errno, _("failed to load shared object `%s'"), name);
400 return NULL;
404 /* Prepare the result. */
405 result = (struct shobj *) calloc (1, sizeof (struct shobj));
406 if (result == NULL)
408 error (0, errno, _("cannot create internal descriptors"));
409 dlclose (map);
410 return NULL;
412 result->name = name;
413 result->map = map;
415 /* Compute the size of the sections which contain program code.
416 This must match the code in dl-profile.c (_dl_start_profile). */
417 for (ph = map->l_phdr; ph < &map->l_phdr[map->l_phnum]; ++ph)
418 if (ph->p_type == PT_LOAD && (ph->p_flags & PF_X))
420 ElfW(Addr) start = (ph->p_vaddr & ~(pagesize - 1));
421 ElfW(Addr) end = ((ph->p_vaddr + ph->p_memsz + pagesize - 1)
422 & ~(pagesize - 1));
424 if (start < mapstart)
425 mapstart = start;
426 if (end > mapend)
427 mapend = end;
430 result->lowpc = ROUNDDOWN ((uintptr_t) (mapstart + map->l_addr),
431 HISTFRACTION * sizeof (HISTCOUNTER));
432 result->highpc = ROUNDUP ((uintptr_t) (mapend + map->l_addr),
433 HISTFRACTION * sizeof (HISTCOUNTER));
434 if (do_test)
435 printf ("load addr: %0#*" PRIxPTR "\n"
436 "lower bound PC: %0#*" PRIxPTR "\n"
437 "upper bound PC: %0#*" PRIxPTR "\n",
438 __ELF_NATIVE_CLASS == 32 ? 10 : 18, map->l_addr,
439 __ELF_NATIVE_CLASS == 32 ? 10 : 18, result->lowpc,
440 __ELF_NATIVE_CLASS == 32 ? 10 : 18, result->highpc);
442 textsize = result->highpc - result->lowpc;
443 result->kcountsize = textsize / HISTFRACTION;
444 result->hashfraction = HASHFRACTION;
445 if ((HASHFRACTION & (HASHFRACTION - 1)) == 0)
446 /* If HASHFRACTION is a power of two, mcount can use shifting
447 instead of integer division. Precompute shift amount. */
448 log_hashfraction = __builtin_ffs (result->hashfraction
449 * sizeof (struct here_fromstruct)) - 1;
450 else
451 log_hashfraction = -1;
452 if (do_test)
453 printf ("hashfraction = %d\ndivider = %Zu\n",
454 result->hashfraction,
455 result->hashfraction * sizeof (struct here_fromstruct));
456 result->tossize = textsize / HASHFRACTION;
457 result->fromlimit = textsize * ARCDENSITY / 100;
458 if (result->fromlimit < MINARCS)
459 result->fromlimit = MINARCS;
460 if (result->fromlimit > MAXARCS)
461 result->fromlimit = MAXARCS;
462 result->fromssize = result->fromlimit * sizeof (struct here_fromstruct);
464 result->expected_size = (sizeof (struct gmon_hdr)
465 + 4 + sizeof (struct gmon_hist_hdr)
466 + result->kcountsize
467 + 4 + 4
468 + (result->fromssize
469 * sizeof (struct here_cg_arc_record)));
471 if (do_test)
472 printf ("expected size: %Zd\n", result->expected_size);
474 #define SCALE_1_TO_1 0x10000L
476 if (result->kcountsize < result->highpc - result->lowpc)
478 size_t range = result->highpc - result->lowpc;
479 size_t quot = range / result->kcountsize;
481 if (quot >= SCALE_1_TO_1)
482 result->s_scale = 1;
483 else if (quot >= SCALE_1_TO_1 / 256)
484 result->s_scale = SCALE_1_TO_1 / quot;
485 else if (range > ULONG_MAX / 256)
486 result->s_scale = ((SCALE_1_TO_1 * 256)
487 / (range / (result->kcountsize / 256)));
488 else
489 result->s_scale = ((SCALE_1_TO_1 * 256)
490 / ((range * 256) / result->kcountsize));
492 else
493 result->s_scale = SCALE_1_TO_1;
495 if (do_test)
496 printf ("s_scale: %d\n", result->s_scale);
498 /* Determine the dynamic string table. */
499 if (map->l_info[DT_STRTAB] == NULL)
500 result->dynstrtab = NULL;
501 else
502 result->dynstrtab = (const char *) D_PTR (map, l_info[DT_STRTAB]);
503 if (do_test)
504 printf ("string table: %p\n", result->dynstrtab);
506 /* Determine the soname. */
507 if (map->l_info[DT_SONAME] == NULL)
508 result->soname = NULL;
509 else
510 result->soname = result->dynstrtab + map->l_info[DT_SONAME]->d_un.d_val;
511 if (do_test && result->soname != NULL)
512 printf ("soname: %s\n", result->soname);
514 /* Now we have to load the symbol table.
516 First load the section header table. */
517 ehdr = (ElfW(Ehdr) *) map->l_map_start;
519 /* Make sure we are on the right party. */
520 if (ehdr->e_shentsize != sizeof (ElfW(Shdr)))
521 abort ();
523 /* And we need the shared object file descriptor again. */
524 fd = open (map->l_name, O_RDONLY);
525 if (fd == -1)
526 /* Dooh, this really shouldn't happen. We know the file is available. */
527 error (EXIT_FAILURE, errno, _("Reopening shared object `%s' failed"),
528 map->l_name);
530 /* Now map the section header. */
531 ptr = mmap (NULL, (ehdr->e_shnum * sizeof (ElfW(Shdr))
532 + (ehdr->e_shoff & (pagesize - 1))), PROT_READ,
533 MAP_SHARED|MAP_FILE, fd, ehdr->e_shoff & ~(pagesize - 1));
534 if (ptr == MAP_FAILED)
535 error (EXIT_FAILURE, errno, _("mapping of section headers failed"));
536 shdr = (ElfW(Shdr) *) ((char *) ptr + (ehdr->e_shoff & (pagesize - 1)));
538 /* Get the section header string table. */
539 ptr = mmap (NULL, (shdr[ehdr->e_shstrndx].sh_size
540 + (shdr[ehdr->e_shstrndx].sh_offset & (pagesize - 1))),
541 PROT_READ, MAP_SHARED|MAP_FILE, fd,
542 shdr[ehdr->e_shstrndx].sh_offset & ~(pagesize - 1));
543 if (ptr == MAP_FAILED)
544 error (EXIT_FAILURE, errno,
545 _("mapping of section header string table failed"));
546 shstrtab = ((const char *) ptr
547 + (shdr[ehdr->e_shstrndx].sh_offset & (pagesize - 1)));
549 /* Search for the ".symtab" section. */
550 symtab_entry = NULL;
551 for (idx = 0; idx < ehdr->e_shnum; ++idx)
552 if (shdr[idx].sh_type == SHT_SYMTAB
553 && strcmp (shstrtab + shdr[idx].sh_name, ".symtab") == 0)
555 symtab_entry = &shdr[idx];
556 break;
559 /* We don't need the section header string table anymore. */
560 munmap (ptr, (shdr[ehdr->e_shstrndx].sh_size
561 + (shdr[ehdr->e_shstrndx].sh_offset & (pagesize - 1))));
563 if (symtab_entry == NULL)
565 fprintf (stderr, _("\
566 *** The file `%s' is stripped: no detailed analysis possible\n"),
567 name);
568 result->symtab = NULL;
569 result->strtab = NULL;
571 else
573 ElfW(Off) min_offset, max_offset;
574 ElfW(Shdr) *strtab_entry;
576 strtab_entry = &shdr[symtab_entry->sh_link];
578 /* Find the minimum and maximum offsets that include both the symbol
579 table and the string table. */
580 if (symtab_entry->sh_offset < strtab_entry->sh_offset)
582 min_offset = symtab_entry->sh_offset & ~(pagesize - 1);
583 max_offset = strtab_entry->sh_offset + strtab_entry->sh_size;
585 else
587 min_offset = strtab_entry->sh_offset & ~(pagesize - 1);
588 max_offset = symtab_entry->sh_offset + symtab_entry->sh_size;
591 result->symbol_map = mmap (NULL, max_offset - min_offset,
592 PROT_READ, MAP_SHARED|MAP_FILE, fd,
593 min_offset);
594 if (result->symbol_map == NULL)
595 error (EXIT_FAILURE, errno, _("failed to load symbol data"));
597 result->symtab
598 = (const ElfW(Sym) *) ((const char *) result->symbol_map
599 + (symtab_entry->sh_offset - min_offset));
600 result->symtab_size = symtab_entry->sh_size;
601 result->strtab = ((const char *) result->symbol_map
602 + (strtab_entry->sh_offset - min_offset));
603 result->symbol_mapsize = max_offset - min_offset;
606 /* Now we also don't need the section header table anymore. */
607 munmap ((char *) shdr - (ehdr->e_shoff & (pagesize - 1)),
608 (ehdr->e_phnum * sizeof (ElfW(Shdr))
609 + (ehdr->e_shoff & (pagesize - 1))));
611 /* Free the descriptor for the shared object. */
612 close (fd);
614 return result;
618 static void
619 unload_shobj (struct shobj *shobj)
621 munmap (shobj->symbol_map, shobj->symbol_mapsize);
622 dlclose (shobj->map);
626 static struct profdata *
627 load_profdata (const char *name, struct shobj *shobj)
629 struct profdata *result;
630 int fd;
631 struct stat st;
632 void *addr;
633 struct gmon_hdr gmon_hdr;
634 struct gmon_hist_hdr hist_hdr;
635 uint32_t *narcsp;
636 size_t fromlimit;
637 struct here_cg_arc_record *data;
638 struct here_fromstruct *froms;
639 uint16_t *tos;
640 size_t fromidx;
641 size_t idx;
643 fd = open (name, O_RDONLY);
644 if (fd == -1)
646 char *ext_name;
648 if (errno != ENOENT || strchr (name, '/') != NULL)
649 /* The file exists but we are not allowed to read it or the
650 file does not exist and the name includes a path
651 specification.. */
652 return NULL;
654 /* A file with the given name does not exist in the current
655 directory, try it in the default location where the profiling
656 files are created. */
657 ext_name = (char *) alloca (strlen (name) + sizeof "/var/tmp/");
658 stpcpy (stpcpy (ext_name, "/var/tmp/"), name);
659 name = ext_name;
661 fd = open (ext_name, O_RDONLY);
662 if (fd == -1)
664 /* Even this file does not exist. */
665 error (0, errno, _("cannot load profiling data"));
666 return NULL;
670 /* We have found the file, now make sure it is the right one for the
671 data file. */
672 if (fstat (fd, &st) < 0)
674 error (0, errno, _("while stat'ing profiling data file"));
675 close (fd);
676 return NULL;
679 if ((size_t) st.st_size != shobj->expected_size)
681 error (0, 0,
682 _("profiling data file `%s' does not match shared object `%s'"),
683 name, shobj->name);
684 close (fd);
685 return NULL;
688 /* The data file is most probably the right one for our shared
689 object. Map it now. */
690 addr = mmap (NULL, st.st_size, PROT_READ, MAP_SHARED|MAP_FILE, fd, 0);
691 if (addr == MAP_FAILED)
693 error (0, errno, _("failed to mmap the profiling data file"));
694 close (fd);
695 return NULL;
698 /* We don't need the file desriptor anymore. */
699 if (close (fd) < 0)
701 error (0, errno, _("error while closing the profiling data file"));
702 munmap (addr, st.st_size);
703 return NULL;
706 /* Prepare the result. */
707 result = (struct profdata *) calloc (1, sizeof (struct profdata));
708 if (result == NULL)
710 error (0, errno, _("cannot create internal descriptor"));
711 munmap (addr, st.st_size);
712 return NULL;
715 /* Store the address and size so that we can later free the resources. */
716 result->addr = addr;
717 result->size = st.st_size;
719 /* Pointer to data after the header. */
720 result->hist = (char *) ((struct gmon_hdr *) addr + 1);
721 result->hist_hdr = (struct gmon_hist_hdr *) ((char *) result->hist
722 + sizeof (uint32_t));
723 result->kcount = (uint16_t *) ((char *) result->hist + sizeof (uint32_t)
724 + sizeof (struct gmon_hist_hdr));
726 /* Compute pointer to array of the arc information. */
727 narcsp = (uint32_t *) ((char *) result->kcount + shobj->kcountsize
728 + sizeof (uint32_t));
729 result->narcs = *narcsp;
730 result->data = (struct here_cg_arc_record *) ((char *) narcsp
731 + sizeof (uint32_t));
733 /* Create the gmon_hdr we expect or write. */
734 memset (&gmon_hdr, '\0', sizeof (struct gmon_hdr));
735 memcpy (&gmon_hdr.cookie[0], GMON_MAGIC, sizeof (gmon_hdr.cookie));
736 *(int32_t *) gmon_hdr.version = GMON_SHOBJ_VERSION;
738 /* Create the hist_hdr we expect or write. */
739 *(char **) hist_hdr.low_pc = (char *) shobj->lowpc - shobj->map->l_addr;
740 *(char **) hist_hdr.high_pc = (char *) shobj->highpc - shobj->map->l_addr;
741 if (do_test)
742 printf ("low_pc = %p\nhigh_pc = %p\n",
743 *(char **) hist_hdr.low_pc, *(char **) hist_hdr.high_pc);
744 *(int32_t *) hist_hdr.hist_size = shobj->kcountsize / sizeof (HISTCOUNTER);
745 *(int32_t *) hist_hdr.prof_rate = __profile_frequency ();
746 strncpy (hist_hdr.dimen, "seconds", sizeof (hist_hdr.dimen));
747 hist_hdr.dimen_abbrev = 's';
749 /* Test whether the header of the profiling data is ok. */
750 if (memcmp (addr, &gmon_hdr, sizeof (struct gmon_hdr)) != 0
751 || *(uint32_t *) result->hist != GMON_TAG_TIME_HIST
752 || memcmp (result->hist_hdr, &hist_hdr,
753 sizeof (struct gmon_hist_hdr)) != 0
754 || narcsp[-1] != GMON_TAG_CG_ARC)
756 error (0, 0, _("`%s' is no correct profile data file for `%s'"),
757 name, shobj->name);
758 if (do_test)
760 if (memcmp (addr, &gmon_hdr, sizeof (struct gmon_hdr)) != 0)
761 puts ("gmon_hdr differs");
762 if (*(uint32_t *) result->hist != GMON_TAG_TIME_HIST)
763 puts ("result->hist differs");
764 if (memcmp (result->hist_hdr, &hist_hdr,
765 sizeof (struct gmon_hist_hdr)) != 0)
766 puts ("hist_hdr differs");
767 if (narcsp[-1] != GMON_TAG_CG_ARC)
768 puts ("narcsp[-1] differs");
770 free (result);
771 munmap (addr, st.st_size);
772 return NULL;
775 /* We are pretty sure now that this is a correct input file. Set up
776 the remaining information in the result structure and return. */
777 result->tos = (uint16_t *) calloc (shobj->tossize + shobj->fromssize, 1);
778 if (result->tos == NULL)
780 error (0, errno, _("cannot create internal descriptor"));
781 munmap (addr, st.st_size);
782 free (result);
783 return NULL;
786 result->froms = (struct here_fromstruct *) ((char *) result->tos
787 + shobj->tossize);
788 fromidx = 0;
790 /* Now we have to process all the arc count entries. */
791 fromlimit = shobj->fromlimit;
792 data = result->data;
793 froms = result->froms;
794 tos = result->tos;
795 for (idx = 0; idx < MIN (*narcsp, fromlimit); ++idx)
797 size_t to_index;
798 size_t newfromidx;
799 to_index = (data[idx].self_pc / (shobj->hashfraction * sizeof (*tos)));
800 newfromidx = fromidx++;
801 froms[newfromidx].here = &data[idx];
802 froms[newfromidx].link = tos[to_index];
803 tos[to_index] = newfromidx;
806 return result;
810 static void
811 unload_profdata (struct profdata *profdata)
813 free (profdata->tos);
814 munmap (profdata->addr, profdata->size);
815 free (profdata);
819 static void
820 count_total_ticks (struct shobj *shobj, struct profdata *profdata)
822 volatile uint16_t *kcount = profdata->kcount;
823 size_t maxkidx = shobj->kcountsize;
824 size_t factor = 2 * (65536 / shobj->s_scale);
825 size_t kidx = 0;
826 size_t sidx = 0;
828 while (sidx < symidx)
830 uintptr_t start = sortsym[sidx]->addr;
831 uintptr_t end = start + sortsym[sidx]->size;
833 while (kidx < maxkidx && factor * kidx < start)
834 ++kidx;
835 if (kidx == maxkidx)
836 break;
838 while (kidx < maxkidx && factor * kidx < end)
839 sortsym[sidx]->ticks += kcount[kidx++];
840 if (kidx == maxkidx)
841 break;
843 total_ticks += sortsym[sidx++]->ticks;
848 static size_t
849 find_symbol (uintptr_t addr)
851 size_t sidx = 0;
853 while (sidx < symidx)
855 uintptr_t start = sortsym[sidx]->addr;
856 uintptr_t end = start + sortsym[sidx]->size;
858 if (addr >= start && addr < end)
859 return sidx;
861 if (addr < start)
862 break;
864 ++sidx;
867 return (size_t) -1l;
871 static void
872 count_calls (struct shobj *shobj, struct profdata *profdata)
874 struct here_cg_arc_record *data = profdata->data;
875 uint32_t narcs = profdata->narcs;
876 uint32_t cnt;
878 for (cnt = 0; cnt < narcs; ++cnt)
880 uintptr_t here = data[cnt].self_pc;
881 size_t symbol_idx;
883 /* Find the symbol for this address. */
884 symbol_idx = find_symbol (here);
885 if (symbol_idx != (size_t) -1l)
886 sortsym[symbol_idx]->calls += data[cnt].count;
891 static int
892 symorder (const void *o1, const void *o2)
894 const struct known_symbol *p1 = (const struct known_symbol *) o1;
895 const struct known_symbol *p2 = (const struct known_symbol *) o2;
897 return p1->addr - p2->addr;
901 static void
902 printsym (const void *node, VISIT value, int level)
904 if (value == leaf || value == postorder)
905 sortsym[symidx++] = *(struct known_symbol **) node;
909 static void
910 read_symbols (struct shobj *shobj)
912 int n = 0;
914 /* Initialize the obstacks. */
915 #define obstack_chunk_alloc malloc
916 #define obstack_chunk_free free
917 obstack_init (&shobj->ob_str);
918 obstack_init (&shobj->ob_sym);
919 obstack_init (&ob_list);
921 /* Process the symbols. */
922 if (shobj->symtab != NULL)
924 const ElfW(Sym) *sym = shobj->symtab;
925 const ElfW(Sym) *sym_end
926 = (const ElfW(Sym) *) ((const char *) sym + shobj->symtab_size);
927 for (; sym < sym_end; sym++)
928 if ((ELFW(ST_TYPE) (sym->st_info) == STT_FUNC
929 || ELFW(ST_TYPE) (sym->st_info) == STT_NOTYPE)
930 && sym->st_size != 0)
932 struct known_symbol **existp;
933 struct known_symbol *newsym
934 = (struct known_symbol *) obstack_alloc (&shobj->ob_sym,
935 sizeof (*newsym));
936 if (newsym == NULL)
937 error (EXIT_FAILURE, errno, _("cannot allocate symbol data"));
939 newsym->name = &shobj->strtab[sym->st_name];
940 newsym->addr = sym->st_value;
941 newsym->size = sym->st_size;
942 newsym->weak = ELFW(ST_BIND) (sym->st_info) == STB_WEAK;
943 newsym->ticks = 0;
944 newsym->calls = 0;
946 existp = tfind (newsym, &symroot, symorder);
947 if (existp == NULL)
949 /* New function. */
950 tsearch (newsym, &symroot, symorder);
951 ++n;
953 else
955 /* The function is already defined. See whether we have
956 a better name here. */
957 if (((*existp)->name[0] == '_' && newsym->name[0] != '_')
958 || ((*existp)->name[0] != '_' && newsym->name[0] != '_'
959 && (*existp)->weak && !newsym->weak))
960 *existp = newsym;
961 else
962 /* We don't need the allocated memory. */
963 obstack_free (&shobj->ob_sym, newsym);
967 else
969 /* Blarg, the binary is stripped. We have to rely on the
970 information contained in the dynamic section of the object. */
971 const ElfW(Sym) *symtab = (ElfW(Sym) *) D_PTR (shobj->map,
972 l_info[DT_SYMTAB]);
973 const char *strtab = (const char *) D_PTR (shobj->map,
974 l_info[DT_STRTAB]);
976 /* We assume that the string table follows the symbol table,
977 because there is no way in ELF to know the size of the
978 dynamic symbol table without looking at the section headers. */
979 while ((void *) symtab < (void *) strtab)
981 if ((ELFW(ST_TYPE)(symtab->st_info) == STT_FUNC
982 || ELFW(ST_TYPE)(symtab->st_info) == STT_NOTYPE)
983 && symtab->st_size != 0)
985 struct known_symbol *newsym;
986 struct known_symbol **existp;
988 newsym =
989 (struct known_symbol *) obstack_alloc (&shobj->ob_sym,
990 sizeof (*newsym));
991 if (newsym == NULL)
992 error (EXIT_FAILURE, errno, _("cannot allocate symbol data"));
994 newsym->name = &strtab[symtab->st_name];
995 newsym->addr = symtab->st_value;
996 newsym->size = symtab->st_size;
997 newsym->weak = ELFW(ST_BIND) (symtab->st_info) == STB_WEAK;
998 newsym->ticks = 0;
999 newsym->froms = NULL;
1000 newsym->tos = NULL;
1002 existp = tfind (newsym, &symroot, symorder);
1003 if (existp == NULL)
1005 /* New function. */
1006 tsearch (newsym, &symroot, symorder);
1007 ++n;
1009 else
1011 /* The function is already defined. See whether we have
1012 a better name here. */
1013 if (((*existp)->name[0] == '_' && newsym->name[0] != '_')
1014 || ((*existp)->name[0] != '_' && newsym->name[0] != '_'
1015 && (*existp)->weak && !newsym->weak))
1016 *existp = newsym;
1017 else
1018 /* We don't need the allocated memory. */
1019 obstack_free (&shobj->ob_sym, newsym);
1023 ++symtab;
1027 sortsym = malloc (n * sizeof (struct known_symbol *));
1028 if (sortsym == NULL)
1029 abort ();
1031 twalk (symroot, printsym);
1035 static void
1036 add_arcs (struct profdata *profdata)
1038 uint32_t narcs = profdata->narcs;
1039 struct here_cg_arc_record *data = profdata->data;
1040 uint32_t cnt;
1042 for (cnt = 0; cnt < narcs; ++cnt)
1044 /* First add the incoming arc. */
1045 size_t sym_idx = find_symbol (data[cnt].self_pc);
1047 if (sym_idx != (size_t) -1l)
1049 struct known_symbol *sym = sortsym[sym_idx];
1050 struct arc_list *runp = sym->froms;
1052 while (runp != NULL
1053 && ((data[cnt].from_pc == 0 && runp->idx != (size_t) -1l)
1054 || (data[cnt].from_pc != 0
1055 && (runp->idx == (size_t) -1l
1056 || data[cnt].from_pc < sortsym[runp->idx]->addr
1057 || (data[cnt].from_pc
1058 >= (sortsym[runp->idx]->addr
1059 + sortsym[runp->idx]->size))))))
1060 runp = runp->next;
1062 if (runp == NULL)
1064 /* We need a new entry. */
1065 struct arc_list *newp = (struct arc_list *)
1066 obstack_alloc (&ob_list, sizeof (struct arc_list));
1068 if (data[cnt].from_pc == 0)
1069 newp->idx = (size_t) -1l;
1070 else
1071 newp->idx = find_symbol (data[cnt].from_pc);
1072 newp->count = data[cnt].count;
1073 newp->next = sym->froms;
1074 sym->froms = newp;
1076 else
1077 /* Increment the counter for the found entry. */
1078 runp->count += data[cnt].count;
1081 /* Now add it to the appropriate outgoing list. */
1082 sym_idx = find_symbol (data[cnt].from_pc);
1083 if (sym_idx != (size_t) -1l)
1085 struct known_symbol *sym = sortsym[sym_idx];
1086 struct arc_list *runp = sym->tos;
1088 while (runp != NULL
1089 && (runp->idx == (size_t) -1l
1090 || data[cnt].self_pc < sortsym[runp->idx]->addr
1091 || data[cnt].self_pc >= (sortsym[runp->idx]->addr
1092 + sortsym[runp->idx]->size)))
1093 runp = runp->next;
1095 if (runp == NULL)
1097 /* We need a new entry. */
1098 struct arc_list *newp = (struct arc_list *)
1099 obstack_alloc (&ob_list, sizeof (struct arc_list));
1101 newp->idx = find_symbol (data[cnt].self_pc);
1102 newp->count = data[cnt].count;
1103 newp->next = sym->tos;
1104 sym->tos = newp;
1106 else
1107 /* Increment the counter for the found entry. */
1108 runp->count += data[cnt].count;
1114 static int
1115 countorder (const void *p1, const void *p2)
1117 struct known_symbol *s1 = (struct known_symbol *) p1;
1118 struct known_symbol *s2 = (struct known_symbol *) p2;
1120 if (s1->ticks != s2->ticks)
1121 return (int) (s2->ticks - s1->ticks);
1123 if (s1->calls != s2->calls)
1124 return (int) (s2->calls - s1->calls);
1126 return strcmp (s1->name, s2->name);
1130 static double tick_unit;
1131 static uintmax_t cumu_ticks;
1133 static void
1134 printflat (const void *node, VISIT value, int level)
1136 if (value == leaf || value == postorder)
1138 struct known_symbol *s = *(struct known_symbol **) node;
1140 cumu_ticks += s->ticks;
1142 printf ("%6.2f%10.2f%9.2f%9" PRIdMAX "%9.2f %s\n",
1143 total_ticks ? (100.0 * s->ticks) / total_ticks : 0.0,
1144 tick_unit * cumu_ticks,
1145 tick_unit * s->ticks,
1146 s->calls,
1147 s->calls ? (s->ticks * 1000000) * tick_unit / s->calls : 0,
1148 /* FIXME: don't know about called functions. */
1149 s->name);
1154 /* ARGUSED */
1155 static void
1156 freenoop (void *p)
1161 static void
1162 generate_flat_profile (struct profdata *profdata)
1164 size_t n;
1165 void *data = NULL;
1167 tick_unit = 1.0 / *(uint32_t *) profdata->hist_hdr->prof_rate;
1169 printf ("Flat profile:\n\n"
1170 "Each sample counts as %g %s.\n",
1171 tick_unit, profdata->hist_hdr->dimen);
1172 fputs (" % cumulative self self total\n"
1173 " time seconds seconds calls us/call us/call name\n",
1174 stdout);
1176 for (n = 0; n < symidx; ++n)
1177 if (sortsym[n]->calls != 0 || sortsym[n]->ticks != 0)
1178 tsearch (sortsym[n], &data, countorder);
1180 twalk (data, printflat);
1182 tdestroy (data, freenoop);
1186 static void
1187 generate_call_graph (struct profdata *profdata)
1189 size_t cnt;
1191 puts ("\nindex % time self children called name\n");
1193 for (cnt = 0; cnt < symidx; ++cnt)
1194 if (sortsym[cnt]->froms != NULL || sortsym[cnt]->tos != NULL)
1196 struct arc_list *runp;
1197 size_t n;
1199 /* First print the from-information. */
1200 runp = sortsym[cnt]->froms;
1201 while (runp != NULL)
1203 printf (" %8.2f%8.2f%9" PRIdMAX "/%-9" PRIdMAX " %s",
1204 (runp->idx != (size_t) -1l
1205 ? sortsym[runp->idx]->ticks * tick_unit : 0.0),
1206 0.0, /* FIXME: what's time for the children, recursive */
1207 runp->count, sortsym[cnt]->calls,
1208 (runp->idx != (size_t) -1l ?
1209 sortsym[runp->idx]->name : "<UNKNOWN>"));
1211 if (runp->idx != (size_t) -1l)
1212 printf (" [%Zd]", runp->idx);
1213 putchar_unlocked ('\n');
1215 runp = runp->next;
1218 /* Info abount the function itself. */
1219 n = printf ("[%Zu]", cnt);
1220 printf ("%*s%5.1f%8.2f%8.2f%9" PRIdMAX " %s [%Zd]\n",
1221 (int) (7 - n), " ",
1222 total_ticks ? (100.0 * sortsym[cnt]->ticks) / total_ticks : 0,
1223 sortsym[cnt]->ticks * tick_unit,
1224 0.0, /* FIXME: what's time for the children, recursive */
1225 sortsym[cnt]->calls,
1226 sortsym[cnt]->name, cnt);
1228 /* Info about the functions this function calls. */
1229 runp = sortsym[cnt]->tos;
1230 while (runp != NULL)
1232 printf (" %8.2f%8.2f%9" PRIdMAX "/",
1233 (runp->idx != (size_t) -1l
1234 ? sortsym[runp->idx]->ticks * tick_unit : 0.0),
1235 0.0, /* FIXME: what's time for the children, recursive */
1236 runp->count);
1238 if (runp->idx != (size_t) -1l)
1239 printf ("%-9" PRIdMAX " %s [%Zd]\n",
1240 sortsym[runp->idx]->calls,
1241 sortsym[runp->idx]->name,
1242 runp->idx);
1243 else
1244 fputs ("??? <UNKNOWN>\n\n", stdout);
1246 runp = runp->next;
1249 fputs ("-----------------------------------------------\n", stdout);
1254 static void
1255 generate_call_pair_list (struct profdata *profdata)
1257 size_t cnt;
1259 for (cnt = 0; cnt < symidx; ++cnt)
1260 if (sortsym[cnt]->froms != NULL || sortsym[cnt]->tos != NULL)
1262 struct arc_list *runp;
1264 /* First print the incoming arcs. */
1265 runp = sortsym[cnt]->froms;
1266 while (runp != NULL)
1268 if (runp->idx == (size_t) -1l)
1269 printf ("\
1270 <UNKNOWN> %-34s %9" PRIdMAX "\n",
1271 sortsym[cnt]->name, runp->count);
1272 runp = runp->next;
1275 /* Next the outgoing arcs. */
1276 runp = sortsym[cnt]->tos;
1277 while (runp != NULL)
1279 printf ("%-34s %-34s %9" PRIdMAX "\n",
1280 sortsym[cnt]->name,
1281 (runp->idx != (size_t) -1l
1282 ? sortsym[runp->idx]->name : "<UNKNOWN>"),
1283 runp->count);
1284 runp = runp->next;