Fix unwind info in x86 memcmp-ssse3.
[glibc.git] / elf / sprof.c
blob96d854fb70c735d5717ee691e193fddf8613361b
1 /* Read and display shared object profiling data.
2 Copyright (C) 1997-2008, 2009 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, write to the Free
18 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19 02111-1307 USA. */
21 #include <argp.h>
22 #include <dlfcn.h>
23 #include <elf.h>
24 #include <error.h>
25 #include <fcntl.h>
26 #include <inttypes.h>
27 #include <libintl.h>
28 #include <locale.h>
29 #include <obstack.h>
30 #include <search.h>
31 #include <stdbool.h>
32 #include <stdio.h>
33 #include <stdlib.h>
34 #include <string.h>
35 #include <unistd.h>
36 #include <ldsodefs.h>
37 #include <sys/gmon.h>
38 #include <sys/gmon_out.h>
39 #include <sys/mman.h>
40 #include <sys/param.h>
41 #include <sys/stat.h>
43 /* Get libc version number. */
44 #include "../version.h"
46 #define PACKAGE _libc_intl_domainname
49 #include <endian.h>
50 #if BYTE_ORDER == BIG_ENDIAN
51 # define byteorder ELFDATA2MSB
52 # define byteorder_name "big-endian"
53 #elif BYTE_ORDER == LITTLE_ENDIAN
54 # define byteorder ELFDATA2LSB
55 # define byteorder_name "little-endian"
56 #else
57 # error "Unknown BYTE_ORDER " BYTE_ORDER
58 # define byteorder ELFDATANONE
59 #endif
61 #ifndef PATH_MAX
62 # define PATH_MAX 1024
63 #endif
66 extern int __profile_frequency (void);
68 /* Name and version of program. */
69 static void print_version (FILE *stream, struct argp_state *state);
70 void (*argp_program_version_hook) (FILE *, struct argp_state *) = print_version;
72 #define OPT_TEST 1
74 /* Definitions of arguments for argp functions. */
75 static const struct argp_option options[] =
77 { NULL, 0, NULL, 0, N_("Output selection:") },
78 { "call-pairs", 'c', NULL, 0,
79 N_("print list of count paths and their number of use") },
80 { "flat-profile", 'p', NULL, 0,
81 N_("generate flat profile with counts and ticks") },
82 { "graph", 'q', NULL, 0, N_("generate call graph") },
84 { "test", OPT_TEST, NULL, OPTION_HIDDEN, NULL },
85 { NULL, 0, NULL, 0, NULL }
88 /* Short description of program. */
89 static const char doc[] = N_("Read and display shared object profiling data.");
90 //For bug reporting instructions, please see:\n
91 //<http://www.gnu.org/software/libc/bugs.html>.\n");
93 /* Strings for arguments in help texts. */
94 static const char args_doc[] = N_("SHOBJ [PROFDATA]");
96 /* Prototype for option handler. */
97 static error_t parse_opt (int key, char *arg, struct argp_state *state);
99 /* Function to print some extra text in the help message. */
100 static char *more_help (int key, const char *text, void *input);
102 /* Data structure to communicate with argp functions. */
103 static struct argp argp =
105 options, parse_opt, args_doc, doc, NULL, more_help
109 /* Operation modes. */
110 static enum
112 NONE = 0,
113 FLAT_MODE = 1 << 0,
114 CALL_GRAPH_MODE = 1 << 1,
115 CALL_PAIRS = 1 << 2,
117 DEFAULT_MODE = FLAT_MODE | CALL_GRAPH_MODE
118 } mode;
120 /* Nozero for testing. */
121 static int do_test;
123 /* Strcuture describing calls. */
124 struct here_fromstruct
126 struct here_cg_arc_record volatile *here;
127 uint16_t link;
130 /* We define a special type to address the elements of the arc table.
131 This is basically the `gmon_cg_arc_record' format but it includes
132 the room for the tag and it uses real types. */
133 struct here_cg_arc_record
135 uintptr_t from_pc;
136 uintptr_t self_pc;
137 uint32_t count;
138 } __attribute__ ((packed));
141 struct known_symbol;
142 struct arc_list
144 size_t idx;
145 uintmax_t count;
147 struct arc_list *next;
150 static struct obstack ob_list;
153 struct known_symbol
155 const char *name;
156 uintptr_t addr;
157 size_t size;
158 bool weak;
159 bool hidden;
161 uintmax_t ticks;
162 uintmax_t calls;
164 struct arc_list *froms;
165 struct arc_list *tos;
169 struct shobj
171 const char *name; /* User-provided name. */
173 struct link_map *map;
174 const char *dynstrtab; /* Dynamic string table of shared object. */
175 const char *soname; /* Soname of shared object. */
177 uintptr_t lowpc;
178 uintptr_t highpc;
179 unsigned long int kcountsize;
180 size_t expected_size; /* Expected size of profiling file. */
181 size_t tossize;
182 size_t fromssize;
183 size_t fromlimit;
184 unsigned int hashfraction;
185 int s_scale;
187 void *symbol_map;
188 size_t symbol_mapsize;
189 const ElfW(Sym) *symtab;
190 size_t symtab_size;
191 const char *strtab;
193 struct obstack ob_str;
194 struct obstack ob_sym;
198 struct real_gmon_hist_hdr
200 char *low_pc;
201 char *high_pc;
202 int32_t hist_size;
203 int32_t prof_rate;
204 char dimen[15];
205 char dimen_abbrev;
209 struct profdata
211 void *addr;
212 off_t size;
214 char *hist;
215 struct real_gmon_hist_hdr *hist_hdr;
216 uint16_t *kcount;
217 uint32_t narcs; /* Number of arcs in toset. */
218 struct here_cg_arc_record *data;
219 uint16_t *tos;
220 struct here_fromstruct *froms;
223 /* Search tree for symbols. */
224 static void *symroot;
225 static struct known_symbol **sortsym;
226 static size_t symidx;
227 static uintmax_t total_ticks;
229 /* Prototypes for local functions. */
230 static struct shobj *load_shobj (const char *name);
231 static void unload_shobj (struct shobj *shobj);
232 static struct profdata *load_profdata (const char *name, struct shobj *shobj);
233 static void unload_profdata (struct profdata *profdata);
234 static void count_total_ticks (struct shobj *shobj, struct profdata *profdata);
235 static void count_calls (struct shobj *shobj, struct profdata *profdata);
236 static void read_symbols (struct shobj *shobj);
237 static void add_arcs (struct profdata *profdata);
238 static void generate_flat_profile (struct profdata *profdata);
239 static void generate_call_graph (struct profdata *profdata);
240 static void generate_call_pair_list (struct profdata *profdata);
244 main (int argc, char *argv[])
246 const char *shobj;
247 const char *profdata;
248 struct shobj *shobj_handle;
249 struct profdata *profdata_handle;
250 int remaining;
252 setlocale (LC_ALL, "");
254 /* Initialize the message catalog. */
255 textdomain (_libc_intl_domainname);
257 /* Parse and process arguments. */
258 argp_parse (&argp, argc, argv, 0, &remaining, NULL);
260 if (argc - remaining == 0 || argc - remaining > 2)
262 /* We need exactly two non-option parameter. */
263 argp_help (&argp, stdout, ARGP_HELP_SEE | ARGP_HELP_EXIT_ERR,
264 program_invocation_short_name);
265 exit (1);
268 /* Get parameters. */
269 shobj = argv[remaining];
270 if (argc - remaining == 2)
271 profdata = argv[remaining + 1];
272 else
273 /* No filename for the profiling data given. We will determine it
274 from the soname of the shobj, later. */
275 profdata = NULL;
277 /* First see whether we can load the shared object. */
278 shobj_handle = load_shobj (shobj);
279 if (shobj_handle == NULL)
280 exit (1);
282 /* We can now determine the filename for the profiling data, if
283 nececessary. */
284 if (profdata == NULL)
286 char *newp;
287 const char *soname;
288 size_t soname_len;
290 soname = shobj_handle->soname ?: basename (shobj);
291 soname_len = strlen (soname);
292 newp = (char *) alloca (soname_len + sizeof ".profile");
293 stpcpy (mempcpy (newp, soname, soname_len), ".profile");
294 profdata = newp;
297 /* Now see whether the profiling data file matches the given object. */
298 profdata_handle = load_profdata (profdata, shobj_handle);
299 if (profdata_handle == NULL)
301 unload_shobj (shobj_handle);
303 exit (1);
306 read_symbols (shobj_handle);
308 /* Count the ticks. */
309 count_total_ticks (shobj_handle, profdata_handle);
311 /* Count the calls. */
312 count_calls (shobj_handle, profdata_handle);
314 /* Add the arc information. */
315 add_arcs (profdata_handle);
317 /* If no mode is specified fall back to the default mode. */
318 if (mode == NONE)
319 mode = DEFAULT_MODE;
321 /* Do some work. */
322 if (mode & FLAT_MODE)
323 generate_flat_profile (profdata_handle);
325 if (mode & CALL_GRAPH_MODE)
326 generate_call_graph (profdata_handle);
328 if (mode & CALL_PAIRS)
329 generate_call_pair_list (profdata_handle);
331 /* Free the resources. */
332 unload_shobj (shobj_handle);
333 unload_profdata (profdata_handle);
335 return 0;
339 /* Handle program arguments. */
340 static error_t
341 parse_opt (int key, char *arg, struct argp_state *state)
343 switch (key)
345 case 'c':
346 mode |= CALL_PAIRS;
347 break;
348 case 'p':
349 mode |= FLAT_MODE;
350 break;
351 case 'q':
352 mode |= CALL_GRAPH_MODE;
353 break;
354 case OPT_TEST:
355 do_test = 1;
356 break;
357 default:
358 return ARGP_ERR_UNKNOWN;
360 return 0;
364 static char *
365 more_help (int key, const char *text, void *input)
367 switch (key)
369 case ARGP_KEY_HELP_EXTRA:
370 /* We print some extra information. */
371 return strdup (gettext ("\
372 For bug reporting instructions, please see:\n\
373 <http://www.gnu.org/software/libc/bugs.html>.\n"));
374 default:
375 break;
377 return (char *) text;
381 /* Print the version information. */
382 static void
383 print_version (FILE *stream, struct argp_state *state)
385 fprintf (stream, "sprof (GNU %s) %s\n", PACKAGE, VERSION);
386 fprintf (stream, gettext ("\
387 Copyright (C) %s Free Software Foundation, Inc.\n\
388 This is free software; see the source for copying conditions. There is NO\n\
389 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\
391 "2009");
392 fprintf (stream, gettext ("Written by %s.\n"), "Ulrich Drepper");
396 /* Note that we must not use `dlopen' etc. The shobj object must not
397 be loaded for use. */
398 static struct shobj *
399 load_shobj (const char *name)
401 struct link_map *map = NULL;
402 struct shobj *result;
403 ElfW(Addr) mapstart = ~((ElfW(Addr)) 0);
404 ElfW(Addr) mapend = 0;
405 const ElfW(Phdr) *ph;
406 size_t textsize;
407 unsigned int log_hashfraction;
408 ElfW(Ehdr) *ehdr;
409 int fd;
410 ElfW(Shdr) *shdr;
411 size_t pagesize = getpagesize ();
413 /* Since we use dlopen() we must be prepared to work around the sometimes
414 strange lookup rules for the shared objects. If we have a file foo.so
415 in the current directory and the user specfies foo.so on the command
416 line (without specifying a directory) we should load the file in the
417 current directory even if a normal dlopen() call would read the other
418 file. We do this by adding a directory portion to the name. */
419 if (strchr (name, '/') == NULL)
421 char *load_name = (char *) alloca (strlen (name) + 3);
422 stpcpy (stpcpy (load_name, "./"), name);
424 map = (struct link_map *) dlopen (load_name, RTLD_LAZY | __RTLD_SPROF);
426 if (map == NULL)
428 map = (struct link_map *) dlopen (name, RTLD_LAZY | __RTLD_SPROF);
429 if (map == NULL)
431 error (0, errno, _("failed to load shared object `%s'"), name);
432 return NULL;
436 /* Prepare the result. */
437 result = (struct shobj *) calloc (1, sizeof (struct shobj));
438 if (result == NULL)
440 error (0, errno, _("cannot create internal descriptors"));
441 dlclose (map);
442 return NULL;
444 result->name = name;
445 result->map = map;
447 /* Compute the size of the sections which contain program code.
448 This must match the code in dl-profile.c (_dl_start_profile). */
449 for (ph = map->l_phdr; ph < &map->l_phdr[map->l_phnum]; ++ph)
450 if (ph->p_type == PT_LOAD && (ph->p_flags & PF_X))
452 ElfW(Addr) start = (ph->p_vaddr & ~(pagesize - 1));
453 ElfW(Addr) end = ((ph->p_vaddr + ph->p_memsz + pagesize - 1)
454 & ~(pagesize - 1));
456 if (start < mapstart)
457 mapstart = start;
458 if (end > mapend)
459 mapend = end;
462 result->lowpc = ROUNDDOWN ((uintptr_t) (mapstart + map->l_addr),
463 HISTFRACTION * sizeof (HISTCOUNTER));
464 result->highpc = ROUNDUP ((uintptr_t) (mapend + map->l_addr),
465 HISTFRACTION * sizeof (HISTCOUNTER));
466 if (do_test)
467 printf ("load addr: %0#*" PRIxPTR "\n"
468 "lower bound PC: %0#*" PRIxPTR "\n"
469 "upper bound PC: %0#*" PRIxPTR "\n",
470 __ELF_NATIVE_CLASS == 32 ? 10 : 18, map->l_addr,
471 __ELF_NATIVE_CLASS == 32 ? 10 : 18, result->lowpc,
472 __ELF_NATIVE_CLASS == 32 ? 10 : 18, result->highpc);
474 textsize = result->highpc - result->lowpc;
475 result->kcountsize = textsize / HISTFRACTION;
476 result->hashfraction = HASHFRACTION;
477 if ((HASHFRACTION & (HASHFRACTION - 1)) == 0)
478 /* If HASHFRACTION is a power of two, mcount can use shifting
479 instead of integer division. Precompute shift amount. */
480 log_hashfraction = __builtin_ffs (result->hashfraction
481 * sizeof (struct here_fromstruct)) - 1;
482 else
483 log_hashfraction = -1;
484 if (do_test)
485 printf ("hashfraction = %d\ndivider = %Zu\n",
486 result->hashfraction,
487 result->hashfraction * sizeof (struct here_fromstruct));
488 result->tossize = textsize / HASHFRACTION;
489 result->fromlimit = textsize * ARCDENSITY / 100;
490 if (result->fromlimit < MINARCS)
491 result->fromlimit = MINARCS;
492 if (result->fromlimit > MAXARCS)
493 result->fromlimit = MAXARCS;
494 result->fromssize = result->fromlimit * sizeof (struct here_fromstruct);
496 result->expected_size = (sizeof (struct gmon_hdr)
497 + 4 + sizeof (struct gmon_hist_hdr)
498 + result->kcountsize
499 + 4 + 4
500 + (result->fromssize
501 * sizeof (struct here_cg_arc_record)));
503 if (do_test)
504 printf ("expected size: %Zd\n", result->expected_size);
506 #define SCALE_1_TO_1 0x10000L
508 if (result->kcountsize < result->highpc - result->lowpc)
510 size_t range = result->highpc - result->lowpc;
511 size_t quot = range / result->kcountsize;
513 if (quot >= SCALE_1_TO_1)
514 result->s_scale = 1;
515 else if (quot >= SCALE_1_TO_1 / 256)
516 result->s_scale = SCALE_1_TO_1 / quot;
517 else if (range > ULONG_MAX / 256)
518 result->s_scale = ((SCALE_1_TO_1 * 256)
519 / (range / (result->kcountsize / 256)));
520 else
521 result->s_scale = ((SCALE_1_TO_1 * 256)
522 / ((range * 256) / result->kcountsize));
524 else
525 result->s_scale = SCALE_1_TO_1;
527 if (do_test)
528 printf ("s_scale: %d\n", result->s_scale);
530 /* Determine the dynamic string table. */
531 if (map->l_info[DT_STRTAB] == NULL)
532 result->dynstrtab = NULL;
533 else
534 result->dynstrtab = (const char *) D_PTR (map, l_info[DT_STRTAB]);
535 if (do_test)
536 printf ("string table: %p\n", result->dynstrtab);
538 /* Determine the soname. */
539 if (map->l_info[DT_SONAME] == NULL)
540 result->soname = NULL;
541 else
542 result->soname = result->dynstrtab + map->l_info[DT_SONAME]->d_un.d_val;
543 if (do_test && result->soname != NULL)
544 printf ("soname: %s\n", result->soname);
546 /* Now we have to load the symbol table.
548 First load the section header table. */
549 ehdr = (ElfW(Ehdr) *) map->l_map_start;
551 /* Make sure we are on the right party. */
552 if (ehdr->e_shentsize != sizeof (ElfW(Shdr)))
553 abort ();
555 /* And we need the shared object file descriptor again. */
556 fd = open (map->l_name, O_RDONLY);
557 if (fd == -1)
558 /* Dooh, this really shouldn't happen. We know the file is available. */
559 error (EXIT_FAILURE, errno, _("Reopening shared object `%s' failed"),
560 map->l_name);
562 /* Map the section header. */
563 size_t size = ehdr->e_shnum * sizeof (ElfW(Shdr));
564 shdr = (ElfW(Shdr) *) alloca (size);
565 if (pread (fd, shdr, size, ehdr->e_shoff) != size)
566 error (EXIT_FAILURE, errno, _("reading of section headers failed"));
568 /* Get the section header string table. */
569 char *shstrtab = (char *) alloca (shdr[ehdr->e_shstrndx].sh_size);
570 if (pread (fd, shstrtab, shdr[ehdr->e_shstrndx].sh_size,
571 shdr[ehdr->e_shstrndx].sh_offset)
572 != shdr[ehdr->e_shstrndx].sh_size)
573 error (EXIT_FAILURE, errno,
574 _("reading of section header string table failed"));
576 /* Search for the ".symtab" section. */
577 ElfW(Shdr) *symtab_entry = NULL;
578 ElfW(Shdr) *debuglink_entry = NULL;
579 for (int idx = 0; idx < ehdr->e_shnum; ++idx)
580 if (shdr[idx].sh_type == SHT_SYMTAB
581 && strcmp (shstrtab + shdr[idx].sh_name, ".symtab") == 0)
583 symtab_entry = &shdr[idx];
584 break;
586 else if (shdr[idx].sh_type == SHT_PROGBITS
587 && strcmp (shstrtab + shdr[idx].sh_name, ".gnu_debuglink") == 0)
588 debuglink_entry = &shdr[idx];
590 /* Get the file name of the debuginfo file if necessary. */
591 int symfd = fd;
592 if (symtab_entry == NULL && debuglink_entry != NULL)
594 size_t size = debuglink_entry->sh_size;
595 char *debuginfo_fname = (char *) alloca (size + 1);
596 debuginfo_fname[size] = '\0';
597 if (pread (fd, debuginfo_fname, size, debuglink_entry->sh_offset)
598 != size)
600 fprintf (stderr, _("*** Cannot read debuginfo file name: %m\n"));
601 goto no_debuginfo;
604 static const char procpath[] = "/proc/self/fd/%d";
605 char origprocname[sizeof (procpath) + sizeof (int) * 3];
606 snprintf (origprocname, sizeof (origprocname), procpath, fd);
607 char *origlink = (char *) alloca (PATH_MAX + 1);
608 origlink[PATH_MAX] = '\0';
609 if (readlink (origprocname, origlink, PATH_MAX) == -1)
610 goto no_debuginfo;
612 /* Try to find the actual file. There are three places:
613 1. the same directory the DSO is in
614 2. in a subdir named .debug of the directory the DSO is in
615 3. in /usr/lib/debug/PATH-OF-DSO
617 char *realname = canonicalize_file_name (origlink);
618 char *cp = NULL;
619 if (realname == NULL || (cp = strrchr (realname, '/')) == NULL)
620 error (EXIT_FAILURE, errno, _("cannot determine file name"));
622 /* Leave the last slash in place. */
623 *++cp = '\0';
625 /* First add the debuginfo file name only. */
626 static const char usrlibdebug[]= "/usr/lib/debug/";
627 char *workbuf = (char *) alloca (sizeof (usrlibdebug)
628 + (cp - realname)
629 + strlen (debuginfo_fname));
630 strcpy (stpcpy (workbuf, realname), debuginfo_fname);
632 int fd2 = open (workbuf, O_RDONLY);
633 if (fd2 == -1)
635 strcpy (stpcpy (stpcpy (workbuf, realname), ".debug/"),
636 debuginfo_fname);
637 fd2 = open (workbuf, O_RDONLY);
638 if (fd2 == -1)
640 strcpy (stpcpy (stpcpy (workbuf, usrlibdebug), realname),
641 debuginfo_fname);
642 fd2 = open (workbuf, O_RDONLY);
646 if (fd2 != -1)
648 ElfW(Ehdr) ehdr2;
650 /* Read the ELF header. */
651 if (pread (fd2, &ehdr2, sizeof (ehdr2), 0) != sizeof (ehdr2))
652 error (EXIT_FAILURE, errno,
653 _("reading of ELF header failed"));
655 /* Map the section header. */
656 size_t size = ehdr2.e_shnum * sizeof (ElfW(Shdr));
657 ElfW(Shdr) *shdr2 = (ElfW(Shdr) *) alloca (size);
658 if (pread (fd2, shdr2, size, ehdr2.e_shoff) != size)
659 error (EXIT_FAILURE, errno,
660 _("reading of section headers failed"));
662 /* Get the section header string table. */
663 shstrtab = (char *) alloca (shdr2[ehdr2.e_shstrndx].sh_size);
664 if (pread (fd2, shstrtab, shdr2[ehdr2.e_shstrndx].sh_size,
665 shdr2[ehdr2.e_shstrndx].sh_offset)
666 != shdr2[ehdr2.e_shstrndx].sh_size)
667 error (EXIT_FAILURE, errno,
668 _("reading of section header string table failed"));
670 /* Search for the ".symtab" section. */
671 for (int idx = 0; idx < ehdr2.e_shnum; ++idx)
672 if (shdr2[idx].sh_type == SHT_SYMTAB
673 && strcmp (shstrtab + shdr2[idx].sh_name, ".symtab") == 0)
675 symtab_entry = &shdr2[idx];
676 shdr = shdr2;
677 symfd = fd2;
678 break;
681 if (fd2 != symfd)
682 close (fd2);
686 no_debuginfo:
687 if (symtab_entry == NULL)
689 fprintf (stderr, _("\
690 *** The file `%s' is stripped: no detailed analysis possible\n"),
691 name);
692 result->symtab = NULL;
693 result->strtab = NULL;
695 else
697 ElfW(Off) min_offset, max_offset;
698 ElfW(Shdr) *strtab_entry;
700 strtab_entry = &shdr[symtab_entry->sh_link];
702 /* Find the minimum and maximum offsets that include both the symbol
703 table and the string table. */
704 if (symtab_entry->sh_offset < strtab_entry->sh_offset)
706 min_offset = symtab_entry->sh_offset & ~(pagesize - 1);
707 max_offset = strtab_entry->sh_offset + strtab_entry->sh_size;
709 else
711 min_offset = strtab_entry->sh_offset & ~(pagesize - 1);
712 max_offset = symtab_entry->sh_offset + symtab_entry->sh_size;
715 result->symbol_map = mmap (NULL, max_offset - min_offset,
716 PROT_READ, MAP_SHARED|MAP_FILE, symfd,
717 min_offset);
718 if (result->symbol_map == MAP_FAILED)
719 error (EXIT_FAILURE, errno, _("failed to load symbol data"));
721 result->symtab
722 = (const ElfW(Sym) *) ((const char *) result->symbol_map
723 + (symtab_entry->sh_offset - min_offset));
724 result->symtab_size = symtab_entry->sh_size;
725 result->strtab = ((const char *) result->symbol_map
726 + (strtab_entry->sh_offset - min_offset));
727 result->symbol_mapsize = max_offset - min_offset;
730 /* Free the descriptor for the shared object. */
731 close (fd);
732 if (symfd != fd)
733 close (symfd);
735 return result;
739 static void
740 unload_shobj (struct shobj *shobj)
742 munmap (shobj->symbol_map, shobj->symbol_mapsize);
743 dlclose (shobj->map);
747 static struct profdata *
748 load_profdata (const char *name, struct shobj *shobj)
750 struct profdata *result;
751 int fd;
752 struct stat st;
753 void *addr;
754 uint32_t *narcsp;
755 size_t fromlimit;
756 struct here_cg_arc_record *data;
757 struct here_fromstruct *froms;
758 uint16_t *tos;
759 size_t fromidx;
760 size_t idx;
762 fd = open (name, O_RDONLY);
763 if (fd == -1)
765 char *ext_name;
767 if (errno != ENOENT || strchr (name, '/') != NULL)
768 /* The file exists but we are not allowed to read it or the
769 file does not exist and the name includes a path
770 specification.. */
771 return NULL;
773 /* A file with the given name does not exist in the current
774 directory, try it in the default location where the profiling
775 files are created. */
776 ext_name = (char *) alloca (strlen (name) + sizeof "/var/tmp/");
777 stpcpy (stpcpy (ext_name, "/var/tmp/"), name);
778 name = ext_name;
780 fd = open (ext_name, O_RDONLY);
781 if (fd == -1)
783 /* Even this file does not exist. */
784 error (0, errno, _("cannot load profiling data"));
785 return NULL;
789 /* We have found the file, now make sure it is the right one for the
790 data file. */
791 if (fstat (fd, &st) < 0)
793 error (0, errno, _("while stat'ing profiling data file"));
794 close (fd);
795 return NULL;
798 if ((size_t) st.st_size != shobj->expected_size)
800 error (0, 0,
801 _("profiling data file `%s' does not match shared object `%s'"),
802 name, shobj->name);
803 close (fd);
804 return NULL;
807 /* The data file is most probably the right one for our shared
808 object. Map it now. */
809 addr = mmap (NULL, st.st_size, PROT_READ, MAP_SHARED|MAP_FILE, fd, 0);
810 if (addr == MAP_FAILED)
812 error (0, errno, _("failed to mmap the profiling data file"));
813 close (fd);
814 return NULL;
817 /* We don't need the file desriptor anymore. */
818 if (close (fd) < 0)
820 error (0, errno, _("error while closing the profiling data file"));
821 munmap (addr, st.st_size);
822 return NULL;
825 /* Prepare the result. */
826 result = (struct profdata *) calloc (1, sizeof (struct profdata));
827 if (result == NULL)
829 error (0, errno, _("cannot create internal descriptor"));
830 munmap (addr, st.st_size);
831 return NULL;
834 /* Store the address and size so that we can later free the resources. */
835 result->addr = addr;
836 result->size = st.st_size;
838 /* Pointer to data after the header. */
839 result->hist = (char *) ((struct gmon_hdr *) addr + 1);
840 result->hist_hdr = (struct real_gmon_hist_hdr *) ((char *) result->hist
841 + sizeof (uint32_t));
842 result->kcount = (uint16_t *) ((char *) result->hist + sizeof (uint32_t)
843 + sizeof (struct real_gmon_hist_hdr));
845 /* Compute pointer to array of the arc information. */
846 narcsp = (uint32_t *) ((char *) result->kcount + shobj->kcountsize
847 + sizeof (uint32_t));
848 result->narcs = *narcsp;
849 result->data = (struct here_cg_arc_record *) ((char *) narcsp
850 + sizeof (uint32_t));
852 /* Create the gmon_hdr we expect or write. */
853 struct real_gmon_hdr
855 char cookie[4];
856 int32_t version;
857 char spare[3 * 4];
858 } gmon_hdr;
859 if (sizeof (gmon_hdr) != sizeof (struct gmon_hdr)
860 || (offsetof (struct real_gmon_hdr, cookie)
861 != offsetof (struct gmon_hdr, cookie))
862 || (offsetof (struct real_gmon_hdr, version)
863 != offsetof (struct gmon_hdr, version)))
864 abort ();
866 memcpy (&gmon_hdr.cookie[0], GMON_MAGIC, sizeof (gmon_hdr.cookie));
867 gmon_hdr.version = GMON_SHOBJ_VERSION;
868 memset (gmon_hdr.spare, '\0', sizeof (gmon_hdr.spare));
870 /* Create the hist_hdr we expect or write. */
871 struct real_gmon_hist_hdr hist_hdr;
872 if (sizeof (hist_hdr) != sizeof (struct gmon_hist_hdr)
873 || (offsetof (struct real_gmon_hist_hdr, low_pc)
874 != offsetof (struct gmon_hist_hdr, low_pc))
875 || (offsetof (struct real_gmon_hist_hdr, high_pc)
876 != offsetof (struct gmon_hist_hdr, high_pc))
877 || (offsetof (struct real_gmon_hist_hdr, hist_size)
878 != offsetof (struct gmon_hist_hdr, hist_size))
879 || (offsetof (struct real_gmon_hist_hdr, prof_rate)
880 != offsetof (struct gmon_hist_hdr, prof_rate))
881 || (offsetof (struct real_gmon_hist_hdr, dimen)
882 != offsetof (struct gmon_hist_hdr, dimen))
883 || (offsetof (struct real_gmon_hist_hdr, dimen_abbrev)
884 != offsetof (struct gmon_hist_hdr, dimen_abbrev)))
885 abort ();
887 hist_hdr.low_pc = (char *) shobj->lowpc - shobj->map->l_addr;
888 hist_hdr.high_pc = (char *) shobj->highpc - shobj->map->l_addr;
889 if (do_test)
890 printf ("low_pc = %p\nhigh_pc = %p\n", hist_hdr.low_pc, hist_hdr.high_pc);
891 hist_hdr.hist_size = shobj->kcountsize / sizeof (HISTCOUNTER);
892 hist_hdr.prof_rate = __profile_frequency ();
893 strncpy (hist_hdr.dimen, "seconds", sizeof (hist_hdr.dimen));
894 hist_hdr.dimen_abbrev = 's';
896 /* Test whether the header of the profiling data is ok. */
897 if (memcmp (addr, &gmon_hdr, sizeof (struct gmon_hdr)) != 0
898 || *(uint32_t *) result->hist != GMON_TAG_TIME_HIST
899 || memcmp (result->hist_hdr, &hist_hdr,
900 sizeof (struct gmon_hist_hdr)) != 0
901 || narcsp[-1] != GMON_TAG_CG_ARC)
903 error (0, 0, _("`%s' is no correct profile data file for `%s'"),
904 name, shobj->name);
905 if (do_test)
907 if (memcmp (addr, &gmon_hdr, sizeof (struct gmon_hdr)) != 0)
908 puts ("gmon_hdr differs");
909 if (*(uint32_t *) result->hist != GMON_TAG_TIME_HIST)
910 puts ("result->hist differs");
911 if (memcmp (result->hist_hdr, &hist_hdr,
912 sizeof (struct gmon_hist_hdr)) != 0)
913 puts ("hist_hdr differs");
914 if (narcsp[-1] != GMON_TAG_CG_ARC)
915 puts ("narcsp[-1] differs");
917 free (result);
918 munmap (addr, st.st_size);
919 return NULL;
922 /* We are pretty sure now that this is a correct input file. Set up
923 the remaining information in the result structure and return. */
924 result->tos = (uint16_t *) calloc (shobj->tossize + shobj->fromssize, 1);
925 if (result->tos == NULL)
927 error (0, errno, _("cannot create internal descriptor"));
928 munmap (addr, st.st_size);
929 free (result);
930 return NULL;
933 result->froms = (struct here_fromstruct *) ((char *) result->tos
934 + shobj->tossize);
935 fromidx = 0;
937 /* Now we have to process all the arc count entries. */
938 fromlimit = shobj->fromlimit;
939 data = result->data;
940 froms = result->froms;
941 tos = result->tos;
942 for (idx = 0; idx < MIN (*narcsp, fromlimit); ++idx)
944 size_t to_index;
945 size_t newfromidx;
946 to_index = (data[idx].self_pc / (shobj->hashfraction * sizeof (*tos)));
947 newfromidx = fromidx++;
948 froms[newfromidx].here = &data[idx];
949 froms[newfromidx].link = tos[to_index];
950 tos[to_index] = newfromidx;
953 return result;
957 static void
958 unload_profdata (struct profdata *profdata)
960 free (profdata->tos);
961 munmap (profdata->addr, profdata->size);
962 free (profdata);
966 static void
967 count_total_ticks (struct shobj *shobj, struct profdata *profdata)
969 volatile uint16_t *kcount = profdata->kcount;
970 size_t maxkidx = shobj->kcountsize;
971 size_t factor = 2 * (65536 / shobj->s_scale);
972 size_t kidx = 0;
973 size_t sidx = 0;
975 while (sidx < symidx)
977 uintptr_t start = sortsym[sidx]->addr;
978 uintptr_t end = start + sortsym[sidx]->size;
980 while (kidx < maxkidx && factor * kidx < start)
981 ++kidx;
982 if (kidx == maxkidx)
983 break;
985 while (kidx < maxkidx && factor * kidx < end)
986 sortsym[sidx]->ticks += kcount[kidx++];
987 if (kidx == maxkidx)
988 break;
990 total_ticks += sortsym[sidx++]->ticks;
995 static size_t
996 find_symbol (uintptr_t addr)
998 size_t sidx = 0;
1000 while (sidx < symidx)
1002 uintptr_t start = sortsym[sidx]->addr;
1003 uintptr_t end = start + sortsym[sidx]->size;
1005 if (addr >= start && addr < end)
1006 return sidx;
1008 if (addr < start)
1009 break;
1011 ++sidx;
1014 return (size_t) -1l;
1018 static void
1019 count_calls (struct shobj *shobj, struct profdata *profdata)
1021 struct here_cg_arc_record *data = profdata->data;
1022 uint32_t narcs = profdata->narcs;
1023 uint32_t cnt;
1025 for (cnt = 0; cnt < narcs; ++cnt)
1027 uintptr_t here = data[cnt].self_pc;
1028 size_t symbol_idx;
1030 /* Find the symbol for this address. */
1031 symbol_idx = find_symbol (here);
1032 if (symbol_idx != (size_t) -1l)
1033 sortsym[symbol_idx]->calls += data[cnt].count;
1038 static int
1039 symorder (const void *o1, const void *o2)
1041 const struct known_symbol *p1 = (const struct known_symbol *) o1;
1042 const struct known_symbol *p2 = (const struct known_symbol *) o2;
1044 return p1->addr - p2->addr;
1048 static void
1049 printsym (const void *node, VISIT value, int level)
1051 if (value == leaf || value == postorder)
1052 sortsym[symidx++] = *(struct known_symbol **) node;
1056 static void
1057 read_symbols (struct shobj *shobj)
1059 int n = 0;
1061 /* Initialize the obstacks. */
1062 #define obstack_chunk_alloc malloc
1063 #define obstack_chunk_free free
1064 obstack_init (&shobj->ob_str);
1065 obstack_init (&shobj->ob_sym);
1066 obstack_init (&ob_list);
1068 /* Process the symbols. */
1069 if (shobj->symtab != NULL)
1071 const ElfW(Sym) *sym = shobj->symtab;
1072 const ElfW(Sym) *sym_end
1073 = (const ElfW(Sym) *) ((const char *) sym + shobj->symtab_size);
1074 for (; sym < sym_end; sym++)
1075 if ((ELFW(ST_TYPE) (sym->st_info) == STT_FUNC
1076 || ELFW(ST_TYPE) (sym->st_info) == STT_NOTYPE)
1077 && sym->st_size != 0)
1079 struct known_symbol **existp;
1080 struct known_symbol *newsym
1081 = (struct known_symbol *) obstack_alloc (&shobj->ob_sym,
1082 sizeof (*newsym));
1083 if (newsym == NULL)
1084 error (EXIT_FAILURE, errno, _("cannot allocate symbol data"));
1086 newsym->name = &shobj->strtab[sym->st_name];
1087 newsym->addr = sym->st_value;
1088 newsym->size = sym->st_size;
1089 newsym->weak = ELFW(ST_BIND) (sym->st_info) == STB_WEAK;
1090 newsym->hidden = (ELFW(ST_VISIBILITY) (sym->st_other)
1091 != STV_DEFAULT);
1092 newsym->ticks = 0;
1093 newsym->calls = 0;
1095 existp = tfind (newsym, &symroot, symorder);
1096 if (existp == NULL)
1098 /* New function. */
1099 tsearch (newsym, &symroot, symorder);
1100 ++n;
1102 else
1104 /* The function is already defined. See whether we have
1105 a better name here. */
1106 if (((*existp)->hidden && !newsym->hidden)
1107 || ((*existp)->name[0] == '_' && newsym->name[0] != '_')
1108 || ((*existp)->name[0] != '_' && newsym->name[0] != '_'
1109 && ((*existp)->weak && !newsym->weak)))
1110 *existp = newsym;
1111 else
1112 /* We don't need the allocated memory. */
1113 obstack_free (&shobj->ob_sym, newsym);
1117 else
1119 /* Blarg, the binary is stripped. We have to rely on the
1120 information contained in the dynamic section of the object. */
1121 const ElfW(Sym) *symtab = (ElfW(Sym) *) D_PTR (shobj->map,
1122 l_info[DT_SYMTAB]);
1123 const char *strtab = (const char *) D_PTR (shobj->map,
1124 l_info[DT_STRTAB]);
1126 /* We assume that the string table follows the symbol table,
1127 because there is no way in ELF to know the size of the
1128 dynamic symbol table without looking at the section headers. */
1129 while ((void *) symtab < (void *) strtab)
1131 if ((ELFW(ST_TYPE)(symtab->st_info) == STT_FUNC
1132 || ELFW(ST_TYPE)(symtab->st_info) == STT_NOTYPE)
1133 && symtab->st_size != 0)
1135 struct known_symbol *newsym;
1136 struct known_symbol **existp;
1138 newsym =
1139 (struct known_symbol *) obstack_alloc (&shobj->ob_sym,
1140 sizeof (*newsym));
1141 if (newsym == NULL)
1142 error (EXIT_FAILURE, errno, _("cannot allocate symbol data"));
1144 newsym->name = &strtab[symtab->st_name];
1145 newsym->addr = symtab->st_value;
1146 newsym->size = symtab->st_size;
1147 newsym->weak = ELFW(ST_BIND) (symtab->st_info) == STB_WEAK;
1148 newsym->hidden = (ELFW(ST_VISIBILITY) (symtab->st_other)
1149 != STV_DEFAULT);
1150 newsym->ticks = 0;
1151 newsym->froms = NULL;
1152 newsym->tos = NULL;
1154 existp = tfind (newsym, &symroot, symorder);
1155 if (existp == NULL)
1157 /* New function. */
1158 tsearch (newsym, &symroot, symorder);
1159 ++n;
1161 else
1163 /* The function is already defined. See whether we have
1164 a better name here. */
1165 if (((*existp)->hidden && !newsym->hidden)
1166 || ((*existp)->name[0] == '_' && newsym->name[0] != '_')
1167 || ((*existp)->name[0] != '_' && newsym->name[0] != '_'
1168 && ((*existp)->weak && !newsym->weak)))
1169 *existp = newsym;
1170 else
1171 /* We don't need the allocated memory. */
1172 obstack_free (&shobj->ob_sym, newsym);
1176 ++symtab;
1180 sortsym = malloc (n * sizeof (struct known_symbol *));
1181 if (sortsym == NULL)
1182 abort ();
1184 twalk (symroot, printsym);
1188 static void
1189 add_arcs (struct profdata *profdata)
1191 uint32_t narcs = profdata->narcs;
1192 struct here_cg_arc_record *data = profdata->data;
1193 uint32_t cnt;
1195 for (cnt = 0; cnt < narcs; ++cnt)
1197 /* First add the incoming arc. */
1198 size_t sym_idx = find_symbol (data[cnt].self_pc);
1200 if (sym_idx != (size_t) -1l)
1202 struct known_symbol *sym = sortsym[sym_idx];
1203 struct arc_list *runp = sym->froms;
1205 while (runp != NULL
1206 && ((data[cnt].from_pc == 0 && runp->idx != (size_t) -1l)
1207 || (data[cnt].from_pc != 0
1208 && (runp->idx == (size_t) -1l
1209 || data[cnt].from_pc < sortsym[runp->idx]->addr
1210 || (data[cnt].from_pc
1211 >= (sortsym[runp->idx]->addr
1212 + sortsym[runp->idx]->size))))))
1213 runp = runp->next;
1215 if (runp == NULL)
1217 /* We need a new entry. */
1218 struct arc_list *newp = (struct arc_list *)
1219 obstack_alloc (&ob_list, sizeof (struct arc_list));
1221 if (data[cnt].from_pc == 0)
1222 newp->idx = (size_t) -1l;
1223 else
1224 newp->idx = find_symbol (data[cnt].from_pc);
1225 newp->count = data[cnt].count;
1226 newp->next = sym->froms;
1227 sym->froms = newp;
1229 else
1230 /* Increment the counter for the found entry. */
1231 runp->count += data[cnt].count;
1234 /* Now add it to the appropriate outgoing list. */
1235 sym_idx = find_symbol (data[cnt].from_pc);
1236 if (sym_idx != (size_t) -1l)
1238 struct known_symbol *sym = sortsym[sym_idx];
1239 struct arc_list *runp = sym->tos;
1241 while (runp != NULL
1242 && (runp->idx == (size_t) -1l
1243 || data[cnt].self_pc < sortsym[runp->idx]->addr
1244 || data[cnt].self_pc >= (sortsym[runp->idx]->addr
1245 + sortsym[runp->idx]->size)))
1246 runp = runp->next;
1248 if (runp == NULL)
1250 /* We need a new entry. */
1251 struct arc_list *newp = (struct arc_list *)
1252 obstack_alloc (&ob_list, sizeof (struct arc_list));
1254 newp->idx = find_symbol (data[cnt].self_pc);
1255 newp->count = data[cnt].count;
1256 newp->next = sym->tos;
1257 sym->tos = newp;
1259 else
1260 /* Increment the counter for the found entry. */
1261 runp->count += data[cnt].count;
1267 static int
1268 countorder (const void *p1, const void *p2)
1270 struct known_symbol *s1 = (struct known_symbol *) p1;
1271 struct known_symbol *s2 = (struct known_symbol *) p2;
1273 if (s1->ticks != s2->ticks)
1274 return (int) (s2->ticks - s1->ticks);
1276 if (s1->calls != s2->calls)
1277 return (int) (s2->calls - s1->calls);
1279 return strcmp (s1->name, s2->name);
1283 static double tick_unit;
1284 static uintmax_t cumu_ticks;
1286 static void
1287 printflat (const void *node, VISIT value, int level)
1289 if (value == leaf || value == postorder)
1291 struct known_symbol *s = *(struct known_symbol **) node;
1293 cumu_ticks += s->ticks;
1295 printf ("%6.2f%10.2f%9.2f%9" PRIdMAX "%9.2f %s\n",
1296 total_ticks ? (100.0 * s->ticks) / total_ticks : 0.0,
1297 tick_unit * cumu_ticks,
1298 tick_unit * s->ticks,
1299 s->calls,
1300 s->calls ? (s->ticks * 1000000) * tick_unit / s->calls : 0,
1301 /* FIXME: don't know about called functions. */
1302 s->name);
1307 /* ARGUSED */
1308 static void
1309 freenoop (void *p)
1314 static void
1315 generate_flat_profile (struct profdata *profdata)
1317 size_t n;
1318 void *data = NULL;
1320 tick_unit = 1.0 / profdata->hist_hdr->prof_rate;
1322 printf ("Flat profile:\n\n"
1323 "Each sample counts as %g %s.\n",
1324 tick_unit, profdata->hist_hdr->dimen);
1325 fputs (" % cumulative self self total\n"
1326 " time seconds seconds calls us/call us/call name\n",
1327 stdout);
1329 for (n = 0; n < symidx; ++n)
1330 if (sortsym[n]->calls != 0 || sortsym[n]->ticks != 0)
1331 tsearch (sortsym[n], &data, countorder);
1333 twalk (data, printflat);
1335 tdestroy (data, freenoop);
1339 static void
1340 generate_call_graph (struct profdata *profdata)
1342 size_t cnt;
1344 puts ("\nindex % time self children called name\n");
1346 for (cnt = 0; cnt < symidx; ++cnt)
1347 if (sortsym[cnt]->froms != NULL || sortsym[cnt]->tos != NULL)
1349 struct arc_list *runp;
1350 size_t n;
1352 /* First print the from-information. */
1353 runp = sortsym[cnt]->froms;
1354 while (runp != NULL)
1356 printf (" %8.2f%8.2f%9" PRIdMAX "/%-9" PRIdMAX " %s",
1357 (runp->idx != (size_t) -1l
1358 ? sortsym[runp->idx]->ticks * tick_unit : 0.0),
1359 0.0, /* FIXME: what's time for the children, recursive */
1360 runp->count, sortsym[cnt]->calls,
1361 (runp->idx != (size_t) -1l ?
1362 sortsym[runp->idx]->name : "<UNKNOWN>"));
1364 if (runp->idx != (size_t) -1l)
1365 printf (" [%Zd]", runp->idx);
1366 putchar_unlocked ('\n');
1368 runp = runp->next;
1371 /* Info abount the function itself. */
1372 n = printf ("[%Zu]", cnt);
1373 printf ("%*s%5.1f%8.2f%8.2f%9" PRIdMAX " %s [%Zd]\n",
1374 (int) (7 - n), " ",
1375 total_ticks ? (100.0 * sortsym[cnt]->ticks) / total_ticks : 0,
1376 sortsym[cnt]->ticks * tick_unit,
1377 0.0, /* FIXME: what's time for the children, recursive */
1378 sortsym[cnt]->calls,
1379 sortsym[cnt]->name, cnt);
1381 /* Info about the functions this function calls. */
1382 runp = sortsym[cnt]->tos;
1383 while (runp != NULL)
1385 printf (" %8.2f%8.2f%9" PRIdMAX "/",
1386 (runp->idx != (size_t) -1l
1387 ? sortsym[runp->idx]->ticks * tick_unit : 0.0),
1388 0.0, /* FIXME: what's time for the children, recursive */
1389 runp->count);
1391 if (runp->idx != (size_t) -1l)
1392 printf ("%-9" PRIdMAX " %s [%Zd]\n",
1393 sortsym[runp->idx]->calls,
1394 sortsym[runp->idx]->name,
1395 runp->idx);
1396 else
1397 fputs ("??? <UNKNOWN>\n\n", stdout);
1399 runp = runp->next;
1402 fputs ("-----------------------------------------------\n", stdout);
1407 static void
1408 generate_call_pair_list (struct profdata *profdata)
1410 size_t cnt;
1412 for (cnt = 0; cnt < symidx; ++cnt)
1413 if (sortsym[cnt]->froms != NULL || sortsym[cnt]->tos != NULL)
1415 struct arc_list *runp;
1417 /* First print the incoming arcs. */
1418 runp = sortsym[cnt]->froms;
1419 while (runp != NULL)
1421 if (runp->idx == (size_t) -1l)
1422 printf ("\
1423 <UNKNOWN> %-34s %9" PRIdMAX "\n",
1424 sortsym[cnt]->name, runp->count);
1425 runp = runp->next;
1428 /* Next the outgoing arcs. */
1429 runp = sortsym[cnt]->tos;
1430 while (runp != NULL)
1432 printf ("%-34s %-34s %9" PRIdMAX "\n",
1433 sortsym[cnt]->name,
1434 (runp->idx != (size_t) -1l
1435 ? sortsym[runp->idx]->name : "<UNKNOWN>"),
1436 runp->count);
1437 runp = runp->next;