2.3.5-5
[glibc.git] / elf / sprof.c
blob9567e4689fdbd2ecbdd3704e012e216b4f596c91
1 /* Read and display shared object profiling data.
2 Copyright (C) 1997-2002, 2003, 2004, 2005 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, write to the Free
18 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19 02111-1307 USA. */
21 #include <argp.h>
22 #include <dlfcn.h>
23 #include <elf.h>
24 #include <error.h>
25 #include <fcntl.h>
26 #include <inttypes.h>
27 #include <libintl.h>
28 #include <locale.h>
29 #include <obstack.h>
30 #include <search.h>
31 #include <stdbool.h>
32 #include <stdio.h>
33 #include <stdlib.h>
34 #include <string.h>
35 #include <unistd.h>
36 #include <ldsodefs.h>
37 #include <sys/gmon.h>
38 #include <sys/gmon_out.h>
39 #include <sys/mman.h>
40 #include <sys/param.h>
41 #include <sys/stat.h>
43 /* Get libc version number. */
44 #include "../version.h"
46 #define PACKAGE _libc_intl_domainname
49 #include <endian.h>
50 #if BYTE_ORDER == BIG_ENDIAN
51 # define byteorder ELFDATA2MSB
52 # define byteorder_name "big-endian"
53 #elif BYTE_ORDER == LITTLE_ENDIAN
54 # define byteorder ELFDATA2LSB
55 # define byteorder_name "little-endian"
56 #else
57 # error "Unknown BYTE_ORDER " BYTE_ORDER
58 # define byteorder ELFDATANONE
59 #endif
61 #ifndef PATH_MAX
62 # define PATH_MAX 1024
63 #endif
66 extern int __profile_frequency (void);
68 /* Name and version of program. */
69 static void print_version (FILE *stream, struct argp_state *state);
70 void (*argp_program_version_hook) (FILE *, struct argp_state *) = print_version;
72 #define OPT_TEST 1
74 /* Definitions of arguments for argp functions. */
75 static const struct argp_option options[] =
77 { NULL, 0, NULL, 0, N_("Output selection:") },
78 { "call-pairs", 'c', NULL, 0,
79 N_("print list of count paths and their number of use") },
80 { "flat-profile", 'p', NULL, 0,
81 N_("generate flat profile with counts and ticks") },
82 { "graph", 'q', NULL, 0, N_("generate call graph") },
84 { "test", OPT_TEST, NULL, OPTION_HIDDEN, NULL },
85 { NULL, 0, NULL, 0, NULL }
88 /* Short description of program. */
89 static const char doc[] = N_("Read and display shared object profiling data.\v\
90 For bug reporting instructions, please see:\n\
91 <http://www.gnu.org/software/libc/bugs.html>.\n");
93 /* Strings for arguments in help texts. */
94 static const char args_doc[] = N_("SHOBJ [PROFDATA]");
96 /* Prototype for option handler. */
97 static error_t parse_opt (int key, char *arg, struct argp_state *state);
99 /* Data structure to communicate with argp functions. */
100 static struct argp argp =
102 options, parse_opt, args_doc, doc
106 /* Operation modes. */
107 static enum
109 NONE = 0,
110 FLAT_MODE = 1 << 0,
111 CALL_GRAPH_MODE = 1 << 1,
112 CALL_PAIRS = 1 << 2,
114 DEFAULT_MODE = FLAT_MODE | CALL_GRAPH_MODE
115 } mode;
117 /* Nozero for testing. */
118 static int do_test;
120 /* Strcuture describing calls. */
121 struct here_fromstruct
123 struct here_cg_arc_record volatile *here;
124 uint16_t link;
127 /* We define a special type to address the elements of the arc table.
128 This is basically the `gmon_cg_arc_record' format but it includes
129 the room for the tag and it uses real types. */
130 struct here_cg_arc_record
132 uintptr_t from_pc;
133 uintptr_t self_pc;
134 uint32_t count;
135 } __attribute__ ((packed));
138 struct known_symbol;
139 struct arc_list
141 size_t idx;
142 uintmax_t count;
144 struct arc_list *next;
147 static struct obstack ob_list;
150 struct known_symbol
152 const char *name;
153 uintptr_t addr;
154 size_t size;
155 bool weak;
156 bool hidden;
158 uintmax_t ticks;
159 uintmax_t calls;
161 struct arc_list *froms;
162 struct arc_list *tos;
166 struct shobj
168 const char *name; /* User-provided name. */
170 struct link_map *map;
171 const char *dynstrtab; /* Dynamic string table of shared object. */
172 const char *soname; /* Soname of shared object. */
174 uintptr_t lowpc;
175 uintptr_t highpc;
176 unsigned long int kcountsize;
177 size_t expected_size; /* Expected size of profiling file. */
178 size_t tossize;
179 size_t fromssize;
180 size_t fromlimit;
181 unsigned int hashfraction;
182 int s_scale;
184 void *symbol_map;
185 size_t symbol_mapsize;
186 const ElfW(Sym) *symtab;
187 size_t symtab_size;
188 const char *strtab;
190 struct obstack ob_str;
191 struct obstack ob_sym;
195 struct profdata
197 void *addr;
198 off_t size;
200 char *hist;
201 struct gmon_hist_hdr *hist_hdr;
202 uint16_t *kcount;
203 uint32_t narcs; /* Number of arcs in toset. */
204 struct here_cg_arc_record *data;
205 uint16_t *tos;
206 struct here_fromstruct *froms;
209 /* Search tree for symbols. */
210 static void *symroot;
211 static struct known_symbol **sortsym;
212 static size_t symidx;
213 static uintmax_t total_ticks;
215 /* Prototypes for local functions. */
216 static struct shobj *load_shobj (const char *name);
217 static void unload_shobj (struct shobj *shobj);
218 static struct profdata *load_profdata (const char *name, struct shobj *shobj);
219 static void unload_profdata (struct profdata *profdata);
220 static void count_total_ticks (struct shobj *shobj, struct profdata *profdata);
221 static void count_calls (struct shobj *shobj, struct profdata *profdata);
222 static void read_symbols (struct shobj *shobj);
223 static void add_arcs (struct profdata *profdata);
224 static void generate_flat_profile (struct profdata *profdata);
225 static void generate_call_graph (struct profdata *profdata);
226 static void generate_call_pair_list (struct profdata *profdata);
230 main (int argc, char *argv[])
232 const char *shobj;
233 const char *profdata;
234 struct shobj *shobj_handle;
235 struct profdata *profdata_handle;
236 int remaining;
238 setlocale (LC_ALL, "");
240 /* Initialize the message catalog. */
241 textdomain (_libc_intl_domainname);
243 /* Parse and process arguments. */
244 argp_parse (&argp, argc, argv, 0, &remaining, NULL);
246 if (argc - remaining == 0 || argc - remaining > 2)
248 /* We need exactly two non-option parameter. */
249 argp_help (&argp, stdout, ARGP_HELP_SEE | ARGP_HELP_EXIT_ERR,
250 program_invocation_short_name);
251 exit (1);
254 /* Get parameters. */
255 shobj = argv[remaining];
256 if (argc - remaining == 2)
257 profdata = argv[remaining + 1];
258 else
259 /* No filename for the profiling data given. We will determine it
260 from the soname of the shobj, later. */
261 profdata = NULL;
263 /* First see whether we can load the shared object. */
264 shobj_handle = load_shobj (shobj);
265 if (shobj_handle == NULL)
266 exit (1);
268 /* We can now determine the filename for the profiling data, if
269 nececessary. */
270 if (profdata == NULL)
272 char *newp;
273 const char *soname;
274 size_t soname_len;
276 soname = shobj_handle->soname ?: basename (shobj);
277 soname_len = strlen (soname);
278 newp = (char *) alloca (soname_len + sizeof ".profile");
279 stpcpy (mempcpy (newp, soname, soname_len), ".profile");
280 profdata = newp;
283 /* Now see whether the profiling data file matches the given object. */
284 profdata_handle = load_profdata (profdata, shobj_handle);
285 if (profdata_handle == NULL)
287 unload_shobj (shobj_handle);
289 exit (1);
292 read_symbols (shobj_handle);
294 /* Count the ticks. */
295 count_total_ticks (shobj_handle, profdata_handle);
297 /* Count the calls. */
298 count_calls (shobj_handle, profdata_handle);
300 /* Add the arc information. */
301 add_arcs (profdata_handle);
303 /* If no mode is specified fall back to the default mode. */
304 if (mode == NONE)
305 mode = DEFAULT_MODE;
307 /* Do some work. */
308 if (mode & FLAT_MODE)
309 generate_flat_profile (profdata_handle);
311 if (mode & CALL_GRAPH_MODE)
312 generate_call_graph (profdata_handle);
314 if (mode & CALL_PAIRS)
315 generate_call_pair_list (profdata_handle);
317 /* Free the resources. */
318 unload_shobj (shobj_handle);
319 unload_profdata (profdata_handle);
321 return 0;
325 /* Handle program arguments. */
326 static error_t
327 parse_opt (int key, char *arg, struct argp_state *state)
329 switch (key)
331 case 'c':
332 mode |= CALL_PAIRS;
333 break;
334 case 'p':
335 mode |= FLAT_MODE;
336 break;
337 case 'q':
338 mode |= CALL_GRAPH_MODE;
339 break;
340 case OPT_TEST:
341 do_test = 1;
342 break;
343 default:
344 return ARGP_ERR_UNKNOWN;
346 return 0;
350 /* Print the version information. */
351 static void
352 print_version (FILE *stream, struct argp_state *state)
354 fprintf (stream, "sprof (GNU %s) %s\n", PACKAGE, VERSION);
355 fprintf (stream, gettext ("\
356 Copyright (C) %s Free Software Foundation, Inc.\n\
357 This is free software; see the source for copying conditions. There is NO\n\
358 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\
360 "2005");
361 fprintf (stream, gettext ("Written by %s.\n"), "Ulrich Drepper");
365 /* Note that we must not use `dlopen' etc. The shobj object must not
366 be loaded for use. */
367 static struct shobj *
368 load_shobj (const char *name)
370 struct link_map *map = NULL;
371 struct shobj *result;
372 ElfW(Addr) mapstart = ~((ElfW(Addr)) 0);
373 ElfW(Addr) mapend = 0;
374 const ElfW(Phdr) *ph;
375 size_t textsize;
376 unsigned int log_hashfraction;
377 ElfW(Ehdr) *ehdr;
378 int fd;
379 ElfW(Shdr) *shdr;
380 size_t pagesize = getpagesize ();
382 /* Since we use dlopen() we must be prepared to work around the sometimes
383 strange lookup rules for the shared objects. If we have a file foo.so
384 in the current directory and the user specfies foo.so on the command
385 line (without specifying a directory) we should load the file in the
386 current directory even if a normal dlopen() call would read the other
387 file. We do this by adding a directory portion to the name. */
388 if (strchr (name, '/') == NULL)
390 char *load_name = (char *) alloca (strlen (name) + 3);
391 stpcpy (stpcpy (load_name, "./"), name);
393 map = (struct link_map *) dlopen (load_name, RTLD_LAZY | __RTLD_SPROF);
395 if (map == NULL)
397 map = (struct link_map *) dlopen (name, RTLD_LAZY | __RTLD_SPROF);
398 if (map == NULL)
400 error (0, errno, _("failed to load shared object `%s'"), name);
401 return NULL;
405 /* Prepare the result. */
406 result = (struct shobj *) calloc (1, sizeof (struct shobj));
407 if (result == NULL)
409 error (0, errno, _("cannot create internal descriptors"));
410 dlclose (map);
411 return NULL;
413 result->name = name;
414 result->map = map;
416 /* Compute the size of the sections which contain program code.
417 This must match the code in dl-profile.c (_dl_start_profile). */
418 for (ph = map->l_phdr; ph < &map->l_phdr[map->l_phnum]; ++ph)
419 if (ph->p_type == PT_LOAD && (ph->p_flags & PF_X))
421 ElfW(Addr) start = (ph->p_vaddr & ~(pagesize - 1));
422 ElfW(Addr) end = ((ph->p_vaddr + ph->p_memsz + pagesize - 1)
423 & ~(pagesize - 1));
425 if (start < mapstart)
426 mapstart = start;
427 if (end > mapend)
428 mapend = end;
431 result->lowpc = ROUNDDOWN ((uintptr_t) (mapstart + map->l_addr),
432 HISTFRACTION * sizeof (HISTCOUNTER));
433 result->highpc = ROUNDUP ((uintptr_t) (mapend + map->l_addr),
434 HISTFRACTION * sizeof (HISTCOUNTER));
435 if (do_test)
436 printf ("load addr: %0#*" PRIxPTR "\n"
437 "lower bound PC: %0#*" PRIxPTR "\n"
438 "upper bound PC: %0#*" PRIxPTR "\n",
439 __ELF_NATIVE_CLASS == 32 ? 10 : 18, map->l_addr,
440 __ELF_NATIVE_CLASS == 32 ? 10 : 18, result->lowpc,
441 __ELF_NATIVE_CLASS == 32 ? 10 : 18, result->highpc);
443 textsize = result->highpc - result->lowpc;
444 result->kcountsize = textsize / HISTFRACTION;
445 result->hashfraction = HASHFRACTION;
446 if ((HASHFRACTION & (HASHFRACTION - 1)) == 0)
447 /* If HASHFRACTION is a power of two, mcount can use shifting
448 instead of integer division. Precompute shift amount. */
449 log_hashfraction = __builtin_ffs (result->hashfraction
450 * sizeof (struct here_fromstruct)) - 1;
451 else
452 log_hashfraction = -1;
453 if (do_test)
454 printf ("hashfraction = %d\ndivider = %Zu\n",
455 result->hashfraction,
456 result->hashfraction * sizeof (struct here_fromstruct));
457 result->tossize = textsize / HASHFRACTION;
458 result->fromlimit = textsize * ARCDENSITY / 100;
459 if (result->fromlimit < MINARCS)
460 result->fromlimit = MINARCS;
461 if (result->fromlimit > MAXARCS)
462 result->fromlimit = MAXARCS;
463 result->fromssize = result->fromlimit * sizeof (struct here_fromstruct);
465 result->expected_size = (sizeof (struct gmon_hdr)
466 + 4 + sizeof (struct gmon_hist_hdr)
467 + result->kcountsize
468 + 4 + 4
469 + (result->fromssize
470 * sizeof (struct here_cg_arc_record)));
472 if (do_test)
473 printf ("expected size: %Zd\n", result->expected_size);
475 #define SCALE_1_TO_1 0x10000L
477 if (result->kcountsize < result->highpc - result->lowpc)
479 size_t range = result->highpc - result->lowpc;
480 size_t quot = range / result->kcountsize;
482 if (quot >= SCALE_1_TO_1)
483 result->s_scale = 1;
484 else if (quot >= SCALE_1_TO_1 / 256)
485 result->s_scale = SCALE_1_TO_1 / quot;
486 else if (range > ULONG_MAX / 256)
487 result->s_scale = ((SCALE_1_TO_1 * 256)
488 / (range / (result->kcountsize / 256)));
489 else
490 result->s_scale = ((SCALE_1_TO_1 * 256)
491 / ((range * 256) / result->kcountsize));
493 else
494 result->s_scale = SCALE_1_TO_1;
496 if (do_test)
497 printf ("s_scale: %d\n", result->s_scale);
499 /* Determine the dynamic string table. */
500 if (map->l_info[DT_STRTAB] == NULL)
501 result->dynstrtab = NULL;
502 else
503 result->dynstrtab = (const char *) D_PTR (map, l_info[DT_STRTAB]);
504 if (do_test)
505 printf ("string table: %p\n", result->dynstrtab);
507 /* Determine the soname. */
508 if (map->l_info[DT_SONAME] == NULL)
509 result->soname = NULL;
510 else
511 result->soname = result->dynstrtab + map->l_info[DT_SONAME]->d_un.d_val;
512 if (do_test && result->soname != NULL)
513 printf ("soname: %s\n", result->soname);
515 /* Now we have to load the symbol table.
517 First load the section header table. */
518 ehdr = (ElfW(Ehdr) *) map->l_map_start;
520 /* Make sure we are on the right party. */
521 if (ehdr->e_shentsize != sizeof (ElfW(Shdr)))
522 abort ();
524 /* And we need the shared object file descriptor again. */
525 fd = open (map->l_name, O_RDONLY);
526 if (fd == -1)
527 /* Dooh, this really shouldn't happen. We know the file is available. */
528 error (EXIT_FAILURE, errno, _("Reopening shared object `%s' failed"),
529 map->l_name);
531 /* Map the section header. */
532 size_t size = ehdr->e_shnum * sizeof (ElfW(Shdr));
533 shdr = (ElfW(Shdr) *) alloca (size);
534 if (pread (fd, shdr, size, ehdr->e_shoff) != size)
535 error (EXIT_FAILURE, errno, _("reading of section headers failed"));
537 /* Get the section header string table. */
538 char *shstrtab = (char *) alloca (shdr[ehdr->e_shstrndx].sh_size);
539 if (pread (fd, shstrtab, shdr[ehdr->e_shstrndx].sh_size,
540 shdr[ehdr->e_shstrndx].sh_offset)
541 != shdr[ehdr->e_shstrndx].sh_size)
542 error (EXIT_FAILURE, errno,
543 _("reading of section header string table failed"));
545 /* Search for the ".symtab" section. */
546 ElfW(Shdr) *symtab_entry = NULL;
547 ElfW(Shdr) *debuglink_entry = NULL;
548 for (int idx = 0; idx < ehdr->e_shnum; ++idx)
549 if (shdr[idx].sh_type == SHT_SYMTAB
550 && strcmp (shstrtab + shdr[idx].sh_name, ".symtab") == 0)
552 symtab_entry = &shdr[idx];
553 break;
555 else if (shdr[idx].sh_type == SHT_PROGBITS
556 && strcmp (shstrtab + shdr[idx].sh_name, ".gnu_debuglink") == 0)
557 debuglink_entry = &shdr[idx];
559 /* Get the file name of the debuginfo file if necessary. */
560 int symfd = fd;
561 if (symtab_entry == NULL && debuglink_entry != NULL)
563 size_t size = debuglink_entry->sh_size;
564 char *debuginfo_fname = (char *) alloca (size + 1);
565 debuginfo_fname[size] = '\0';
566 if (pread (fd, debuginfo_fname, size, debuglink_entry->sh_offset)
567 != size)
569 fprintf (stderr, _("*** Cannot read debuginfo file name: %m\n"));
570 goto no_debuginfo;
573 static const char procpath[] = "/proc/self/fd/%d";
574 char origprocname[sizeof (procpath) + sizeof (int) * 3];
575 snprintf (origprocname, sizeof (origprocname), procpath, fd);
576 char *origlink = (char *) alloca (PATH_MAX + 1);
577 origlink[PATH_MAX] = '\0';
578 if (readlink (origprocname, origlink, PATH_MAX) == -1)
579 goto no_debuginfo;
581 /* Try to find the actual file. There are three places:
582 1. the same directory the DSO is in
583 2. in a subdir named .debug of the directory the DSO is in
584 3. in /usr/lib/debug/PATH-OF-DSO
586 char *realname = canonicalize_file_name (origlink);
587 char *cp = NULL;
588 if (realname == NULL || (cp = strrchr (realname, '/')) == NULL)
589 error (EXIT_FAILURE, errno, _("cannot determine file name"));
591 /* Leave the last slash in place. */
592 *++cp = '\0';
594 /* First add the debuginfo file name only. */
595 static const char usrlibdebug[]= "/usr/lib/debug/";
596 char *workbuf = (char *) alloca (sizeof (usrlibdebug)
597 + (cp - realname)
598 + strlen (debuginfo_fname));
599 strcpy (stpcpy (workbuf, realname), debuginfo_fname);
601 int fd2 = open (workbuf, O_RDONLY);
602 if (fd2 == -1)
604 strcpy (stpcpy (stpcpy (workbuf, realname), ".debug/"),
605 debuginfo_fname);
606 fd2 = open (workbuf, O_RDONLY);
607 if (fd2 == -1)
609 strcpy (stpcpy (stpcpy (workbuf, usrlibdebug), realname),
610 debuginfo_fname);
611 fd2 = open (workbuf, O_RDONLY);
615 if (fd2 != -1)
617 ElfW(Ehdr) ehdr2;
619 /* Read the ELF header. */
620 if (pread (fd2, &ehdr2, sizeof (ehdr2), 0) != sizeof (ehdr2))
621 error (EXIT_FAILURE, errno,
622 _("reading of ELF header failed"));
624 /* Map the section header. */
625 size_t size = ehdr2.e_shnum * sizeof (ElfW(Shdr));
626 ElfW(Shdr) *shdr2 = (ElfW(Shdr) *) alloca (size);
627 if (pread (fd2, shdr2, size, ehdr2.e_shoff) != size)
628 error (EXIT_FAILURE, errno,
629 _("reading of section headers failed"));
631 /* Get the section header string table. */
632 shstrtab = (char *) alloca (shdr2[ehdr2.e_shstrndx].sh_size);
633 if (pread (fd2, shstrtab, shdr2[ehdr2.e_shstrndx].sh_size,
634 shdr2[ehdr2.e_shstrndx].sh_offset)
635 != shdr2[ehdr2.e_shstrndx].sh_size)
636 error (EXIT_FAILURE, errno,
637 _("reading of section header string table failed"));
639 /* Search for the ".symtab" section. */
640 for (int idx = 0; idx < ehdr2.e_shnum; ++idx)
641 if (shdr2[idx].sh_type == SHT_SYMTAB
642 && strcmp (shstrtab + shdr2[idx].sh_name, ".symtab") == 0)
644 symtab_entry = &shdr2[idx];
645 shdr = shdr2;
646 symfd = fd2;
647 break;
650 if (fd2 != symfd)
651 close (fd2);
655 no_debuginfo:
656 if (symtab_entry == NULL)
658 fprintf (stderr, _("\
659 *** The file `%s' is stripped: no detailed analysis possible\n"),
660 name);
661 result->symtab = NULL;
662 result->strtab = NULL;
664 else
666 ElfW(Off) min_offset, max_offset;
667 ElfW(Shdr) *strtab_entry;
669 strtab_entry = &shdr[symtab_entry->sh_link];
671 /* Find the minimum and maximum offsets that include both the symbol
672 table and the string table. */
673 if (symtab_entry->sh_offset < strtab_entry->sh_offset)
675 min_offset = symtab_entry->sh_offset & ~(pagesize - 1);
676 max_offset = strtab_entry->sh_offset + strtab_entry->sh_size;
678 else
680 min_offset = strtab_entry->sh_offset & ~(pagesize - 1);
681 max_offset = symtab_entry->sh_offset + symtab_entry->sh_size;
684 result->symbol_map = mmap (NULL, max_offset - min_offset,
685 PROT_READ, MAP_SHARED|MAP_FILE, symfd,
686 min_offset);
687 if (result->symbol_map == MAP_FAILED)
688 error (EXIT_FAILURE, errno, _("failed to load symbol data"));
690 result->symtab
691 = (const ElfW(Sym) *) ((const char *) result->symbol_map
692 + (symtab_entry->sh_offset - min_offset));
693 result->symtab_size = symtab_entry->sh_size;
694 result->strtab = ((const char *) result->symbol_map
695 + (strtab_entry->sh_offset - min_offset));
696 result->symbol_mapsize = max_offset - min_offset;
699 /* Free the descriptor for the shared object. */
700 close (fd);
701 if (symfd != fd)
702 close (symfd);
704 return result;
708 static void
709 unload_shobj (struct shobj *shobj)
711 munmap (shobj->symbol_map, shobj->symbol_mapsize);
712 dlclose (shobj->map);
716 static struct profdata *
717 load_profdata (const char *name, struct shobj *shobj)
719 struct profdata *result;
720 int fd;
721 struct stat st;
722 void *addr;
723 struct gmon_hdr gmon_hdr;
724 struct gmon_hist_hdr hist_hdr;
725 uint32_t *narcsp;
726 size_t fromlimit;
727 struct here_cg_arc_record *data;
728 struct here_fromstruct *froms;
729 uint16_t *tos;
730 size_t fromidx;
731 size_t idx;
733 fd = open (name, O_RDONLY);
734 if (fd == -1)
736 char *ext_name;
738 if (errno != ENOENT || strchr (name, '/') != NULL)
739 /* The file exists but we are not allowed to read it or the
740 file does not exist and the name includes a path
741 specification.. */
742 return NULL;
744 /* A file with the given name does not exist in the current
745 directory, try it in the default location where the profiling
746 files are created. */
747 ext_name = (char *) alloca (strlen (name) + sizeof "/var/tmp/");
748 stpcpy (stpcpy (ext_name, "/var/tmp/"), name);
749 name = ext_name;
751 fd = open (ext_name, O_RDONLY);
752 if (fd == -1)
754 /* Even this file does not exist. */
755 error (0, errno, _("cannot load profiling data"));
756 return NULL;
760 /* We have found the file, now make sure it is the right one for the
761 data file. */
762 if (fstat (fd, &st) < 0)
764 error (0, errno, _("while stat'ing profiling data file"));
765 close (fd);
766 return NULL;
769 if ((size_t) st.st_size != shobj->expected_size)
771 error (0, 0,
772 _("profiling data file `%s' does not match shared object `%s'"),
773 name, shobj->name);
774 close (fd);
775 return NULL;
778 /* The data file is most probably the right one for our shared
779 object. Map it now. */
780 addr = mmap (NULL, st.st_size, PROT_READ, MAP_SHARED|MAP_FILE, fd, 0);
781 if (addr == MAP_FAILED)
783 error (0, errno, _("failed to mmap the profiling data file"));
784 close (fd);
785 return NULL;
788 /* We don't need the file desriptor anymore. */
789 if (close (fd) < 0)
791 error (0, errno, _("error while closing the profiling data file"));
792 munmap (addr, st.st_size);
793 return NULL;
796 /* Prepare the result. */
797 result = (struct profdata *) calloc (1, sizeof (struct profdata));
798 if (result == NULL)
800 error (0, errno, _("cannot create internal descriptor"));
801 munmap (addr, st.st_size);
802 return NULL;
805 /* Store the address and size so that we can later free the resources. */
806 result->addr = addr;
807 result->size = st.st_size;
809 /* Pointer to data after the header. */
810 result->hist = (char *) ((struct gmon_hdr *) addr + 1);
811 result->hist_hdr = (struct gmon_hist_hdr *) ((char *) result->hist
812 + sizeof (uint32_t));
813 result->kcount = (uint16_t *) ((char *) result->hist + sizeof (uint32_t)
814 + sizeof (struct gmon_hist_hdr));
816 /* Compute pointer to array of the arc information. */
817 narcsp = (uint32_t *) ((char *) result->kcount + shobj->kcountsize
818 + sizeof (uint32_t));
819 result->narcs = *narcsp;
820 result->data = (struct here_cg_arc_record *) ((char *) narcsp
821 + sizeof (uint32_t));
823 /* Create the gmon_hdr we expect or write. */
824 memset (&gmon_hdr, '\0', sizeof (struct gmon_hdr));
825 memcpy (&gmon_hdr.cookie[0], GMON_MAGIC, sizeof (gmon_hdr.cookie));
826 *(int32_t *) gmon_hdr.version = GMON_SHOBJ_VERSION;
828 /* Create the hist_hdr we expect or write. */
829 *(char **) hist_hdr.low_pc = (char *) shobj->lowpc - shobj->map->l_addr;
830 *(char **) hist_hdr.high_pc = (char *) shobj->highpc - shobj->map->l_addr;
831 if (do_test)
832 printf ("low_pc = %p\nhigh_pc = %p\n",
833 *(char **) hist_hdr.low_pc, *(char **) hist_hdr.high_pc);
834 *(int32_t *) hist_hdr.hist_size = shobj->kcountsize / sizeof (HISTCOUNTER);
835 *(int32_t *) hist_hdr.prof_rate = __profile_frequency ();
836 strncpy (hist_hdr.dimen, "seconds", sizeof (hist_hdr.dimen));
837 hist_hdr.dimen_abbrev = 's';
839 /* Test whether the header of the profiling data is ok. */
840 if (memcmp (addr, &gmon_hdr, sizeof (struct gmon_hdr)) != 0
841 || *(uint32_t *) result->hist != GMON_TAG_TIME_HIST
842 || memcmp (result->hist_hdr, &hist_hdr,
843 sizeof (struct gmon_hist_hdr)) != 0
844 || narcsp[-1] != GMON_TAG_CG_ARC)
846 error (0, 0, _("`%s' is no correct profile data file for `%s'"),
847 name, shobj->name);
848 if (do_test)
850 if (memcmp (addr, &gmon_hdr, sizeof (struct gmon_hdr)) != 0)
851 puts ("gmon_hdr differs");
852 if (*(uint32_t *) result->hist != GMON_TAG_TIME_HIST)
853 puts ("result->hist differs");
854 if (memcmp (result->hist_hdr, &hist_hdr,
855 sizeof (struct gmon_hist_hdr)) != 0)
856 puts ("hist_hdr differs");
857 if (narcsp[-1] != GMON_TAG_CG_ARC)
858 puts ("narcsp[-1] differs");
860 free (result);
861 munmap (addr, st.st_size);
862 return NULL;
865 /* We are pretty sure now that this is a correct input file. Set up
866 the remaining information in the result structure and return. */
867 result->tos = (uint16_t *) calloc (shobj->tossize + shobj->fromssize, 1);
868 if (result->tos == NULL)
870 error (0, errno, _("cannot create internal descriptor"));
871 munmap (addr, st.st_size);
872 free (result);
873 return NULL;
876 result->froms = (struct here_fromstruct *) ((char *) result->tos
877 + shobj->tossize);
878 fromidx = 0;
880 /* Now we have to process all the arc count entries. */
881 fromlimit = shobj->fromlimit;
882 data = result->data;
883 froms = result->froms;
884 tos = result->tos;
885 for (idx = 0; idx < MIN (*narcsp, fromlimit); ++idx)
887 size_t to_index;
888 size_t newfromidx;
889 to_index = (data[idx].self_pc / (shobj->hashfraction * sizeof (*tos)));
890 newfromidx = fromidx++;
891 froms[newfromidx].here = &data[idx];
892 froms[newfromidx].link = tos[to_index];
893 tos[to_index] = newfromidx;
896 return result;
900 static void
901 unload_profdata (struct profdata *profdata)
903 free (profdata->tos);
904 munmap (profdata->addr, profdata->size);
905 free (profdata);
909 static void
910 count_total_ticks (struct shobj *shobj, struct profdata *profdata)
912 volatile uint16_t *kcount = profdata->kcount;
913 size_t maxkidx = shobj->kcountsize;
914 size_t factor = 2 * (65536 / shobj->s_scale);
915 size_t kidx = 0;
916 size_t sidx = 0;
918 while (sidx < symidx)
920 uintptr_t start = sortsym[sidx]->addr;
921 uintptr_t end = start + sortsym[sidx]->size;
923 while (kidx < maxkidx && factor * kidx < start)
924 ++kidx;
925 if (kidx == maxkidx)
926 break;
928 while (kidx < maxkidx && factor * kidx < end)
929 sortsym[sidx]->ticks += kcount[kidx++];
930 if (kidx == maxkidx)
931 break;
933 total_ticks += sortsym[sidx++]->ticks;
938 static size_t
939 find_symbol (uintptr_t addr)
941 size_t sidx = 0;
943 while (sidx < symidx)
945 uintptr_t start = sortsym[sidx]->addr;
946 uintptr_t end = start + sortsym[sidx]->size;
948 if (addr >= start && addr < end)
949 return sidx;
951 if (addr < start)
952 break;
954 ++sidx;
957 return (size_t) -1l;
961 static void
962 count_calls (struct shobj *shobj, struct profdata *profdata)
964 struct here_cg_arc_record *data = profdata->data;
965 uint32_t narcs = profdata->narcs;
966 uint32_t cnt;
968 for (cnt = 0; cnt < narcs; ++cnt)
970 uintptr_t here = data[cnt].self_pc;
971 size_t symbol_idx;
973 /* Find the symbol for this address. */
974 symbol_idx = find_symbol (here);
975 if (symbol_idx != (size_t) -1l)
976 sortsym[symbol_idx]->calls += data[cnt].count;
981 static int
982 symorder (const void *o1, const void *o2)
984 const struct known_symbol *p1 = (const struct known_symbol *) o1;
985 const struct known_symbol *p2 = (const struct known_symbol *) o2;
987 return p1->addr - p2->addr;
991 static void
992 printsym (const void *node, VISIT value, int level)
994 if (value == leaf || value == postorder)
995 sortsym[symidx++] = *(struct known_symbol **) node;
999 static void
1000 read_symbols (struct shobj *shobj)
1002 int n = 0;
1004 /* Initialize the obstacks. */
1005 #define obstack_chunk_alloc malloc
1006 #define obstack_chunk_free free
1007 obstack_init (&shobj->ob_str);
1008 obstack_init (&shobj->ob_sym);
1009 obstack_init (&ob_list);
1011 /* Process the symbols. */
1012 if (shobj->symtab != NULL)
1014 const ElfW(Sym) *sym = shobj->symtab;
1015 const ElfW(Sym) *sym_end
1016 = (const ElfW(Sym) *) ((const char *) sym + shobj->symtab_size);
1017 for (; sym < sym_end; sym++)
1018 if ((ELFW(ST_TYPE) (sym->st_info) == STT_FUNC
1019 || ELFW(ST_TYPE) (sym->st_info) == STT_NOTYPE)
1020 && sym->st_size != 0)
1022 struct known_symbol **existp;
1023 struct known_symbol *newsym
1024 = (struct known_symbol *) obstack_alloc (&shobj->ob_sym,
1025 sizeof (*newsym));
1026 if (newsym == NULL)
1027 error (EXIT_FAILURE, errno, _("cannot allocate symbol data"));
1029 newsym->name = &shobj->strtab[sym->st_name];
1030 newsym->addr = sym->st_value;
1031 newsym->size = sym->st_size;
1032 newsym->weak = ELFW(ST_BIND) (sym->st_info) == STB_WEAK;
1033 newsym->hidden = (ELFW(ST_VISIBILITY) (sym->st_other)
1034 != STV_DEFAULT);
1035 newsym->ticks = 0;
1036 newsym->calls = 0;
1038 existp = tfind (newsym, &symroot, symorder);
1039 if (existp == NULL)
1041 /* New function. */
1042 tsearch (newsym, &symroot, symorder);
1043 ++n;
1045 else
1047 /* The function is already defined. See whether we have
1048 a better name here. */
1049 if (((*existp)->hidden && !newsym->hidden)
1050 || ((*existp)->name[0] == '_' && newsym->name[0] != '_')
1051 || ((*existp)->name[0] != '_' && newsym->name[0] != '_'
1052 && ((*existp)->weak && !newsym->weak)))
1053 *existp = newsym;
1054 else
1055 /* We don't need the allocated memory. */
1056 obstack_free (&shobj->ob_sym, newsym);
1060 else
1062 /* Blarg, the binary is stripped. We have to rely on the
1063 information contained in the dynamic section of the object. */
1064 const ElfW(Sym) *symtab = (ElfW(Sym) *) D_PTR (shobj->map,
1065 l_info[DT_SYMTAB]);
1066 const char *strtab = (const char *) D_PTR (shobj->map,
1067 l_info[DT_STRTAB]);
1069 /* We assume that the string table follows the symbol table,
1070 because there is no way in ELF to know the size of the
1071 dynamic symbol table without looking at the section headers. */
1072 while ((void *) symtab < (void *) strtab)
1074 if ((ELFW(ST_TYPE)(symtab->st_info) == STT_FUNC
1075 || ELFW(ST_TYPE)(symtab->st_info) == STT_NOTYPE)
1076 && symtab->st_size != 0)
1078 struct known_symbol *newsym;
1079 struct known_symbol **existp;
1081 newsym =
1082 (struct known_symbol *) obstack_alloc (&shobj->ob_sym,
1083 sizeof (*newsym));
1084 if (newsym == NULL)
1085 error (EXIT_FAILURE, errno, _("cannot allocate symbol data"));
1087 newsym->name = &strtab[symtab->st_name];
1088 newsym->addr = symtab->st_value;
1089 newsym->size = symtab->st_size;
1090 newsym->weak = ELFW(ST_BIND) (symtab->st_info) == STB_WEAK;
1091 newsym->hidden = (ELFW(ST_VISIBILITY) (symtab->st_other)
1092 != STV_DEFAULT);
1093 newsym->ticks = 0;
1094 newsym->froms = NULL;
1095 newsym->tos = NULL;
1097 existp = tfind (newsym, &symroot, symorder);
1098 if (existp == NULL)
1100 /* New function. */
1101 tsearch (newsym, &symroot, symorder);
1102 ++n;
1104 else
1106 /* The function is already defined. See whether we have
1107 a better name here. */
1108 if (((*existp)->hidden && !newsym->hidden)
1109 || ((*existp)->name[0] == '_' && newsym->name[0] != '_')
1110 || ((*existp)->name[0] != '_' && newsym->name[0] != '_'
1111 && ((*existp)->weak && !newsym->weak)))
1112 *existp = newsym;
1113 else
1114 /* We don't need the allocated memory. */
1115 obstack_free (&shobj->ob_sym, newsym);
1119 ++symtab;
1123 sortsym = malloc (n * sizeof (struct known_symbol *));
1124 if (sortsym == NULL)
1125 abort ();
1127 twalk (symroot, printsym);
1131 static void
1132 add_arcs (struct profdata *profdata)
1134 uint32_t narcs = profdata->narcs;
1135 struct here_cg_arc_record *data = profdata->data;
1136 uint32_t cnt;
1138 for (cnt = 0; cnt < narcs; ++cnt)
1140 /* First add the incoming arc. */
1141 size_t sym_idx = find_symbol (data[cnt].self_pc);
1143 if (sym_idx != (size_t) -1l)
1145 struct known_symbol *sym = sortsym[sym_idx];
1146 struct arc_list *runp = sym->froms;
1148 while (runp != NULL
1149 && ((data[cnt].from_pc == 0 && runp->idx != (size_t) -1l)
1150 || (data[cnt].from_pc != 0
1151 && (runp->idx == (size_t) -1l
1152 || data[cnt].from_pc < sortsym[runp->idx]->addr
1153 || (data[cnt].from_pc
1154 >= (sortsym[runp->idx]->addr
1155 + sortsym[runp->idx]->size))))))
1156 runp = runp->next;
1158 if (runp == NULL)
1160 /* We need a new entry. */
1161 struct arc_list *newp = (struct arc_list *)
1162 obstack_alloc (&ob_list, sizeof (struct arc_list));
1164 if (data[cnt].from_pc == 0)
1165 newp->idx = (size_t) -1l;
1166 else
1167 newp->idx = find_symbol (data[cnt].from_pc);
1168 newp->count = data[cnt].count;
1169 newp->next = sym->froms;
1170 sym->froms = newp;
1172 else
1173 /* Increment the counter for the found entry. */
1174 runp->count += data[cnt].count;
1177 /* Now add it to the appropriate outgoing list. */
1178 sym_idx = find_symbol (data[cnt].from_pc);
1179 if (sym_idx != (size_t) -1l)
1181 struct known_symbol *sym = sortsym[sym_idx];
1182 struct arc_list *runp = sym->tos;
1184 while (runp != NULL
1185 && (runp->idx == (size_t) -1l
1186 || data[cnt].self_pc < sortsym[runp->idx]->addr
1187 || data[cnt].self_pc >= (sortsym[runp->idx]->addr
1188 + sortsym[runp->idx]->size)))
1189 runp = runp->next;
1191 if (runp == NULL)
1193 /* We need a new entry. */
1194 struct arc_list *newp = (struct arc_list *)
1195 obstack_alloc (&ob_list, sizeof (struct arc_list));
1197 newp->idx = find_symbol (data[cnt].self_pc);
1198 newp->count = data[cnt].count;
1199 newp->next = sym->tos;
1200 sym->tos = newp;
1202 else
1203 /* Increment the counter for the found entry. */
1204 runp->count += data[cnt].count;
1210 static int
1211 countorder (const void *p1, const void *p2)
1213 struct known_symbol *s1 = (struct known_symbol *) p1;
1214 struct known_symbol *s2 = (struct known_symbol *) p2;
1216 if (s1->ticks != s2->ticks)
1217 return (int) (s2->ticks - s1->ticks);
1219 if (s1->calls != s2->calls)
1220 return (int) (s2->calls - s1->calls);
1222 return strcmp (s1->name, s2->name);
1226 static double tick_unit;
1227 static uintmax_t cumu_ticks;
1229 static void
1230 printflat (const void *node, VISIT value, int level)
1232 if (value == leaf || value == postorder)
1234 struct known_symbol *s = *(struct known_symbol **) node;
1236 cumu_ticks += s->ticks;
1238 printf ("%6.2f%10.2f%9.2f%9" PRIdMAX "%9.2f %s\n",
1239 total_ticks ? (100.0 * s->ticks) / total_ticks : 0.0,
1240 tick_unit * cumu_ticks,
1241 tick_unit * s->ticks,
1242 s->calls,
1243 s->calls ? (s->ticks * 1000000) * tick_unit / s->calls : 0,
1244 /* FIXME: don't know about called functions. */
1245 s->name);
1250 /* ARGUSED */
1251 static void
1252 freenoop (void *p)
1257 static void
1258 generate_flat_profile (struct profdata *profdata)
1260 size_t n;
1261 void *data = NULL;
1263 tick_unit = 1.0 / *(uint32_t *) profdata->hist_hdr->prof_rate;
1265 printf ("Flat profile:\n\n"
1266 "Each sample counts as %g %s.\n",
1267 tick_unit, profdata->hist_hdr->dimen);
1268 fputs (" % cumulative self self total\n"
1269 " time seconds seconds calls us/call us/call name\n",
1270 stdout);
1272 for (n = 0; n < symidx; ++n)
1273 if (sortsym[n]->calls != 0 || sortsym[n]->ticks != 0)
1274 tsearch (sortsym[n], &data, countorder);
1276 twalk (data, printflat);
1278 tdestroy (data, freenoop);
1282 static void
1283 generate_call_graph (struct profdata *profdata)
1285 size_t cnt;
1287 puts ("\nindex % time self children called name\n");
1289 for (cnt = 0; cnt < symidx; ++cnt)
1290 if (sortsym[cnt]->froms != NULL || sortsym[cnt]->tos != NULL)
1292 struct arc_list *runp;
1293 size_t n;
1295 /* First print the from-information. */
1296 runp = sortsym[cnt]->froms;
1297 while (runp != NULL)
1299 printf (" %8.2f%8.2f%9" PRIdMAX "/%-9" PRIdMAX " %s",
1300 (runp->idx != (size_t) -1l
1301 ? sortsym[runp->idx]->ticks * tick_unit : 0.0),
1302 0.0, /* FIXME: what's time for the children, recursive */
1303 runp->count, sortsym[cnt]->calls,
1304 (runp->idx != (size_t) -1l ?
1305 sortsym[runp->idx]->name : "<UNKNOWN>"));
1307 if (runp->idx != (size_t) -1l)
1308 printf (" [%Zd]", runp->idx);
1309 putchar_unlocked ('\n');
1311 runp = runp->next;
1314 /* Info abount the function itself. */
1315 n = printf ("[%Zu]", cnt);
1316 printf ("%*s%5.1f%8.2f%8.2f%9" PRIdMAX " %s [%Zd]\n",
1317 (int) (7 - n), " ",
1318 total_ticks ? (100.0 * sortsym[cnt]->ticks) / total_ticks : 0,
1319 sortsym[cnt]->ticks * tick_unit,
1320 0.0, /* FIXME: what's time for the children, recursive */
1321 sortsym[cnt]->calls,
1322 sortsym[cnt]->name, cnt);
1324 /* Info about the functions this function calls. */
1325 runp = sortsym[cnt]->tos;
1326 while (runp != NULL)
1328 printf (" %8.2f%8.2f%9" PRIdMAX "/",
1329 (runp->idx != (size_t) -1l
1330 ? sortsym[runp->idx]->ticks * tick_unit : 0.0),
1331 0.0, /* FIXME: what's time for the children, recursive */
1332 runp->count);
1334 if (runp->idx != (size_t) -1l)
1335 printf ("%-9" PRIdMAX " %s [%Zd]\n",
1336 sortsym[runp->idx]->calls,
1337 sortsym[runp->idx]->name,
1338 runp->idx);
1339 else
1340 fputs ("??? <UNKNOWN>\n\n", stdout);
1342 runp = runp->next;
1345 fputs ("-----------------------------------------------\n", stdout);
1350 static void
1351 generate_call_pair_list (struct profdata *profdata)
1353 size_t cnt;
1355 for (cnt = 0; cnt < symidx; ++cnt)
1356 if (sortsym[cnt]->froms != NULL || sortsym[cnt]->tos != NULL)
1358 struct arc_list *runp;
1360 /* First print the incoming arcs. */
1361 runp = sortsym[cnt]->froms;
1362 while (runp != NULL)
1364 if (runp->idx == (size_t) -1l)
1365 printf ("\
1366 <UNKNOWN> %-34s %9" PRIdMAX "\n",
1367 sortsym[cnt]->name, runp->count);
1368 runp = runp->next;
1371 /* Next the outgoing arcs. */
1372 runp = sortsym[cnt]->tos;
1373 while (runp != NULL)
1375 printf ("%-34s %-34s %9" PRIdMAX "\n",
1376 sortsym[cnt]->name,
1377 (runp->idx != (size_t) -1l
1378 ? sortsym[runp->idx]->name : "<UNKNOWN>"),
1379 runp->count);
1380 runp = runp->next;