Make SH ucontext always match current kernels.
[glibc.git] / elf / sprof.c
blobacf2c205d6810ccf9af2ac5a5bfa45da4a0cdd5a
1 /* Read and display shared object profiling data.
2 Copyright (C) 1997-2016 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, see
18 <http://www.gnu.org/licenses/>. */
20 #include <argp.h>
21 #include <dlfcn.h>
22 #include <elf.h>
23 #include <error.h>
24 #include <fcntl.h>
25 #include <inttypes.h>
26 #include <libintl.h>
27 #include <locale.h>
28 #include <obstack.h>
29 #include <search.h>
30 #include <stdbool.h>
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <string.h>
34 #include <unistd.h>
35 #include <stdint.h>
36 #include <ldsodefs.h>
37 #include <sys/gmon.h>
38 #include <sys/gmon_out.h>
39 #include <sys/mman.h>
40 #include <sys/param.h>
41 #include <sys/stat.h>
43 /* Get libc version number. */
44 #include "../version.h"
46 #define PACKAGE _libc_intl_domainname
49 #include <endian.h>
50 #if BYTE_ORDER == BIG_ENDIAN
51 # define byteorder ELFDATA2MSB
52 # define byteorder_name "big-endian"
53 #elif BYTE_ORDER == LITTLE_ENDIAN
54 # define byteorder ELFDATA2LSB
55 # define byteorder_name "little-endian"
56 #else
57 # error "Unknown BYTE_ORDER " BYTE_ORDER
58 # define byteorder ELFDATANONE
59 #endif
61 #ifndef PATH_MAX
62 # define PATH_MAX 1024
63 #endif
66 extern int __profile_frequency (void);
68 /* Name and version of program. */
69 static void print_version (FILE *stream, struct argp_state *state);
70 void (*argp_program_version_hook) (FILE *, struct argp_state *) = print_version;
72 #define OPT_TEST 1
74 /* Definitions of arguments for argp functions. */
75 static const struct argp_option options[] =
77 { NULL, 0, NULL, 0, N_("Output selection:") },
78 { "call-pairs", 'c', NULL, 0,
79 N_("print list of count paths and their number of use") },
80 { "flat-profile", 'p', NULL, 0,
81 N_("generate flat profile with counts and ticks") },
82 { "graph", 'q', NULL, 0, N_("generate call graph") },
84 { "test", OPT_TEST, NULL, OPTION_HIDDEN, NULL },
85 { NULL, 0, NULL, 0, NULL }
88 /* Short description of program. */
89 static const char doc[] = N_("Read and display shared object profiling data.");
90 //For bug reporting instructions, please see:\n
91 //<http://www.gnu.org/software/libc/bugs.html>.\n");
93 /* Strings for arguments in help texts. */
94 static const char args_doc[] = N_("SHOBJ [PROFDATA]");
96 /* Prototype for option handler. */
97 static error_t parse_opt (int key, char *arg, struct argp_state *state);
99 /* Function to print some extra text in the help message. */
100 static char *more_help (int key, const char *text, void *input);
102 /* Data structure to communicate with argp functions. */
103 static struct argp argp =
105 options, parse_opt, args_doc, doc, NULL, more_help
109 /* Operation modes. */
110 static enum
112 NONE = 0,
113 FLAT_MODE = 1 << 0,
114 CALL_GRAPH_MODE = 1 << 1,
115 CALL_PAIRS = 1 << 2,
117 DEFAULT_MODE = FLAT_MODE | CALL_GRAPH_MODE
118 } mode;
120 /* Nozero for testing. */
121 static int do_test;
123 /* Strcuture describing calls. */
124 struct here_fromstruct
126 struct here_cg_arc_record volatile *here;
127 uint16_t link;
130 /* We define a special type to address the elements of the arc table.
131 This is basically the `gmon_cg_arc_record' format but it includes
132 the room for the tag and it uses real types. */
133 struct here_cg_arc_record
135 uintptr_t from_pc;
136 uintptr_t self_pc;
137 uint32_t count;
138 } __attribute__ ((packed));
141 struct known_symbol;
142 struct arc_list
144 size_t idx;
145 uintmax_t count;
147 struct arc_list *next;
150 static struct obstack ob_list;
153 struct known_symbol
155 const char *name;
156 uintptr_t addr;
157 size_t size;
158 bool weak;
159 bool hidden;
161 uintmax_t ticks;
162 uintmax_t calls;
164 struct arc_list *froms;
165 struct arc_list *tos;
169 struct shobj
171 const char *name; /* User-provided name. */
173 struct link_map *map;
174 const char *dynstrtab; /* Dynamic string table of shared object. */
175 const char *soname; /* Soname of shared object. */
177 uintptr_t lowpc;
178 uintptr_t highpc;
179 unsigned long int kcountsize;
180 size_t expected_size; /* Expected size of profiling file. */
181 size_t tossize;
182 size_t fromssize;
183 size_t fromlimit;
184 unsigned int hashfraction;
185 int s_scale;
187 void *symbol_map;
188 size_t symbol_mapsize;
189 const ElfW(Sym) *symtab;
190 size_t symtab_size;
191 const char *strtab;
193 struct obstack ob_str;
194 struct obstack ob_sym;
198 struct real_gmon_hist_hdr
200 char *low_pc;
201 char *high_pc;
202 int32_t hist_size;
203 int32_t prof_rate;
204 char dimen[15];
205 char dimen_abbrev;
209 struct profdata
211 void *addr;
212 off_t size;
214 char *hist;
215 struct real_gmon_hist_hdr *hist_hdr;
216 uint16_t *kcount;
217 uint32_t narcs; /* Number of arcs in toset. */
218 struct here_cg_arc_record *data;
219 uint16_t *tos;
220 struct here_fromstruct *froms;
223 /* Search tree for symbols. */
224 static void *symroot;
225 static struct known_symbol **sortsym;
226 static size_t symidx;
227 static uintmax_t total_ticks;
229 /* Prototypes for local functions. */
230 static struct shobj *load_shobj (const char *name);
231 static void unload_shobj (struct shobj *shobj);
232 static struct profdata *load_profdata (const char *name, struct shobj *shobj);
233 static void unload_profdata (struct profdata *profdata);
234 static void count_total_ticks (struct shobj *shobj, struct profdata *profdata);
235 static void count_calls (struct shobj *shobj, struct profdata *profdata);
236 static void read_symbols (struct shobj *shobj);
237 static void add_arcs (struct profdata *profdata);
238 static void generate_flat_profile (struct profdata *profdata);
239 static void generate_call_graph (struct profdata *profdata);
240 static void generate_call_pair_list (struct profdata *profdata);
244 main (int argc, char *argv[])
246 const char *shobj;
247 const char *profdata;
248 struct shobj *shobj_handle;
249 struct profdata *profdata_handle;
250 int remaining;
252 setlocale (LC_ALL, "");
254 /* Initialize the message catalog. */
255 textdomain (_libc_intl_domainname);
257 /* Parse and process arguments. */
258 argp_parse (&argp, argc, argv, 0, &remaining, NULL);
260 if (argc - remaining == 0 || argc - remaining > 2)
262 /* We need exactly two non-option parameter. */
263 argp_help (&argp, stdout, ARGP_HELP_SEE | ARGP_HELP_EXIT_ERR,
264 program_invocation_short_name);
265 exit (1);
268 /* Get parameters. */
269 shobj = argv[remaining];
270 if (argc - remaining == 2)
271 profdata = argv[remaining + 1];
272 else
273 /* No filename for the profiling data given. We will determine it
274 from the soname of the shobj, later. */
275 profdata = NULL;
277 /* First see whether we can load the shared object. */
278 shobj_handle = load_shobj (shobj);
279 if (shobj_handle == NULL)
280 exit (1);
282 /* We can now determine the filename for the profiling data, if
283 nececessary. */
284 if (profdata == NULL)
286 char *newp;
287 const char *soname;
288 size_t soname_len;
290 soname = shobj_handle->soname ?: basename (shobj);
291 soname_len = strlen (soname);
292 newp = (char *) alloca (soname_len + sizeof ".profile");
293 stpcpy (mempcpy (newp, soname, soname_len), ".profile");
294 profdata = newp;
297 /* Now see whether the profiling data file matches the given object. */
298 profdata_handle = load_profdata (profdata, shobj_handle);
299 if (profdata_handle == NULL)
301 unload_shobj (shobj_handle);
303 exit (1);
306 read_symbols (shobj_handle);
308 /* Count the ticks. */
309 count_total_ticks (shobj_handle, profdata_handle);
311 /* Count the calls. */
312 count_calls (shobj_handle, profdata_handle);
314 /* Add the arc information. */
315 add_arcs (profdata_handle);
317 /* If no mode is specified fall back to the default mode. */
318 if (mode == NONE)
319 mode = DEFAULT_MODE;
321 /* Do some work. */
322 if (mode & FLAT_MODE)
323 generate_flat_profile (profdata_handle);
325 if (mode & CALL_GRAPH_MODE)
326 generate_call_graph (profdata_handle);
328 if (mode & CALL_PAIRS)
329 generate_call_pair_list (profdata_handle);
331 /* Free the resources. */
332 unload_shobj (shobj_handle);
333 unload_profdata (profdata_handle);
335 return 0;
339 /* Handle program arguments. */
340 static error_t
341 parse_opt (int key, char *arg, struct argp_state *state)
343 switch (key)
345 case 'c':
346 mode |= CALL_PAIRS;
347 break;
348 case 'p':
349 mode |= FLAT_MODE;
350 break;
351 case 'q':
352 mode |= CALL_GRAPH_MODE;
353 break;
354 case OPT_TEST:
355 do_test = 1;
356 break;
357 default:
358 return ARGP_ERR_UNKNOWN;
360 return 0;
364 static char *
365 more_help (int key, const char *text, void *input)
367 char *tp = NULL;
368 switch (key)
370 case ARGP_KEY_HELP_EXTRA:
371 /* We print some extra information. */
372 if (asprintf (&tp, gettext ("\
373 For bug reporting instructions, please see:\n\
374 %s.\n"), REPORT_BUGS_TO) < 0)
375 return NULL;
376 return tp;
377 default:
378 break;
380 return (char *) text;
384 /* Print the version information. */
385 static void
386 print_version (FILE *stream, struct argp_state *state)
388 fprintf (stream, "sprof %s%s\n", PKGVERSION, VERSION);
389 fprintf (stream, gettext ("\
390 Copyright (C) %s Free Software Foundation, Inc.\n\
391 This is free software; see the source for copying conditions. There is NO\n\
392 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\
394 "2016");
395 fprintf (stream, gettext ("Written by %s.\n"), "Ulrich Drepper");
399 /* Note that we must not use `dlopen' etc. The shobj object must not
400 be loaded for use. */
401 static struct shobj *
402 load_shobj (const char *name)
404 struct link_map *map = NULL;
405 struct shobj *result;
406 ElfW(Addr) mapstart = ~((ElfW(Addr)) 0);
407 ElfW(Addr) mapend = 0;
408 const ElfW(Phdr) *ph;
409 size_t textsize;
410 ElfW(Ehdr) *ehdr;
411 int fd;
412 ElfW(Shdr) *shdr;
413 size_t pagesize = getpagesize ();
415 /* Since we use dlopen() we must be prepared to work around the sometimes
416 strange lookup rules for the shared objects. If we have a file foo.so
417 in the current directory and the user specfies foo.so on the command
418 line (without specifying a directory) we should load the file in the
419 current directory even if a normal dlopen() call would read the other
420 file. We do this by adding a directory portion to the name. */
421 if (strchr (name, '/') == NULL)
423 char *load_name = (char *) alloca (strlen (name) + 3);
424 stpcpy (stpcpy (load_name, "./"), name);
426 map = (struct link_map *) dlopen (load_name, RTLD_LAZY | __RTLD_SPROF);
428 if (map == NULL)
430 map = (struct link_map *) dlopen (name, RTLD_LAZY | __RTLD_SPROF);
431 if (map == NULL)
433 error (0, errno, _("failed to load shared object `%s'"), name);
434 return NULL;
438 /* Prepare the result. */
439 result = (struct shobj *) calloc (1, sizeof (struct shobj));
440 if (result == NULL)
442 error (0, errno, _("cannot create internal descriptor"));
443 dlclose (map);
444 return NULL;
446 result->name = name;
447 result->map = map;
449 /* Compute the size of the sections which contain program code.
450 This must match the code in dl-profile.c (_dl_start_profile). */
451 for (ph = map->l_phdr; ph < &map->l_phdr[map->l_phnum]; ++ph)
452 if (ph->p_type == PT_LOAD && (ph->p_flags & PF_X))
454 ElfW(Addr) start = (ph->p_vaddr & ~(pagesize - 1));
455 ElfW(Addr) end = ((ph->p_vaddr + ph->p_memsz + pagesize - 1)
456 & ~(pagesize - 1));
458 if (start < mapstart)
459 mapstart = start;
460 if (end > mapend)
461 mapend = end;
464 result->lowpc = ROUNDDOWN ((uintptr_t) (mapstart + map->l_addr),
465 HISTFRACTION * sizeof (HISTCOUNTER));
466 result->highpc = ROUNDUP ((uintptr_t) (mapend + map->l_addr),
467 HISTFRACTION * sizeof (HISTCOUNTER));
468 if (do_test)
469 printf ("load addr: %0#*" PRIxPTR "\n"
470 "lower bound PC: %0#*" PRIxPTR "\n"
471 "upper bound PC: %0#*" PRIxPTR "\n",
472 __ELF_NATIVE_CLASS == 32 ? 10 : 18, map->l_addr,
473 __ELF_NATIVE_CLASS == 32 ? 10 : 18, result->lowpc,
474 __ELF_NATIVE_CLASS == 32 ? 10 : 18, result->highpc);
476 textsize = result->highpc - result->lowpc;
477 result->kcountsize = textsize / HISTFRACTION;
478 result->hashfraction = HASHFRACTION;
479 if (do_test)
480 printf ("hashfraction = %d\ndivider = %Zu\n",
481 result->hashfraction,
482 result->hashfraction * sizeof (struct here_fromstruct));
483 result->tossize = textsize / HASHFRACTION;
484 result->fromlimit = textsize * ARCDENSITY / 100;
485 if (result->fromlimit < MINARCS)
486 result->fromlimit = MINARCS;
487 if (result->fromlimit > MAXARCS)
488 result->fromlimit = MAXARCS;
489 result->fromssize = result->fromlimit * sizeof (struct here_fromstruct);
491 result->expected_size = (sizeof (struct gmon_hdr)
492 + 4 + sizeof (struct gmon_hist_hdr)
493 + result->kcountsize
494 + 4 + 4
495 + (result->fromssize
496 * sizeof (struct here_cg_arc_record)));
498 if (do_test)
499 printf ("expected size: %Zd\n", result->expected_size);
501 #define SCALE_1_TO_1 0x10000L
503 if (result->kcountsize < result->highpc - result->lowpc)
505 size_t range = result->highpc - result->lowpc;
506 size_t quot = range / result->kcountsize;
508 if (quot >= SCALE_1_TO_1)
509 result->s_scale = 1;
510 else if (quot >= SCALE_1_TO_1 / 256)
511 result->s_scale = SCALE_1_TO_1 / quot;
512 else if (range > ULONG_MAX / 256)
513 result->s_scale = ((SCALE_1_TO_1 * 256)
514 / (range / (result->kcountsize / 256)));
515 else
516 result->s_scale = ((SCALE_1_TO_1 * 256)
517 / ((range * 256) / result->kcountsize));
519 else
520 result->s_scale = SCALE_1_TO_1;
522 if (do_test)
523 printf ("s_scale: %d\n", result->s_scale);
525 /* Determine the dynamic string table. */
526 if (map->l_info[DT_STRTAB] == NULL)
527 result->dynstrtab = NULL;
528 else
529 result->dynstrtab = (const char *) D_PTR (map, l_info[DT_STRTAB]);
530 if (do_test)
531 printf ("string table: %p\n", result->dynstrtab);
533 /* Determine the soname. */
534 if (map->l_info[DT_SONAME] == NULL)
535 result->soname = NULL;
536 else
537 result->soname = result->dynstrtab + map->l_info[DT_SONAME]->d_un.d_val;
538 if (do_test && result->soname != NULL)
539 printf ("soname: %s\n", result->soname);
541 /* Now we have to load the symbol table.
543 First load the section header table. */
544 ehdr = (ElfW(Ehdr) *) map->l_map_start;
546 /* Make sure we are on the right party. */
547 if (ehdr->e_shentsize != sizeof (ElfW(Shdr)))
548 abort ();
550 /* And we need the shared object file descriptor again. */
551 fd = open (map->l_name, O_RDONLY);
552 if (fd == -1)
553 /* Dooh, this really shouldn't happen. We know the file is available. */
554 error (EXIT_FAILURE, errno, _("Reopening shared object `%s' failed"),
555 map->l_name);
557 /* Map the section header. */
558 size_t size = ehdr->e_shnum * sizeof (ElfW(Shdr));
559 shdr = (ElfW(Shdr) *) alloca (size);
560 if (pread (fd, shdr, size, ehdr->e_shoff) != size)
561 error (EXIT_FAILURE, errno, _("reading of section headers failed"));
563 /* Get the section header string table. */
564 char *shstrtab = (char *) alloca (shdr[ehdr->e_shstrndx].sh_size);
565 if (pread (fd, shstrtab, shdr[ehdr->e_shstrndx].sh_size,
566 shdr[ehdr->e_shstrndx].sh_offset)
567 != shdr[ehdr->e_shstrndx].sh_size)
568 error (EXIT_FAILURE, errno,
569 _("reading of section header string table failed"));
571 /* Search for the ".symtab" section. */
572 ElfW(Shdr) *symtab_entry = NULL;
573 ElfW(Shdr) *debuglink_entry = NULL;
574 for (int idx = 0; idx < ehdr->e_shnum; ++idx)
575 if (shdr[idx].sh_type == SHT_SYMTAB
576 && strcmp (shstrtab + shdr[idx].sh_name, ".symtab") == 0)
578 symtab_entry = &shdr[idx];
579 break;
581 else if (shdr[idx].sh_type == SHT_PROGBITS
582 && strcmp (shstrtab + shdr[idx].sh_name, ".gnu_debuglink") == 0)
583 debuglink_entry = &shdr[idx];
585 /* Get the file name of the debuginfo file if necessary. */
586 int symfd = fd;
587 if (symtab_entry == NULL && debuglink_entry != NULL)
589 size_t size = debuglink_entry->sh_size;
590 char *debuginfo_fname = (char *) alloca (size + 1);
591 debuginfo_fname[size] = '\0';
592 if (pread (fd, debuginfo_fname, size, debuglink_entry->sh_offset)
593 != size)
595 fprintf (stderr, _("*** Cannot read debuginfo file name: %m\n"));
596 goto no_debuginfo;
599 static const char procpath[] = "/proc/self/fd/%d";
600 char origprocname[sizeof (procpath) + sizeof (int) * 3];
601 snprintf (origprocname, sizeof (origprocname), procpath, fd);
602 char *origlink = (char *) alloca (PATH_MAX);
603 ssize_t n = readlink (origprocname, origlink, PATH_MAX - 1);
604 if (n == -1)
605 goto no_debuginfo;
606 origlink[n] = '\0';
608 /* Try to find the actual file. There are three places:
609 1. the same directory the DSO is in
610 2. in a subdir named .debug of the directory the DSO is in
611 3. in /usr/lib/debug/PATH-OF-DSO
613 char *realname = canonicalize_file_name (origlink);
614 char *cp = NULL;
615 if (realname == NULL || (cp = strrchr (realname, '/')) == NULL)
616 error (EXIT_FAILURE, errno, _("cannot determine file name"));
618 /* Leave the last slash in place. */
619 *++cp = '\0';
621 /* First add the debuginfo file name only. */
622 static const char usrlibdebug[]= "/usr/lib/debug/";
623 char *workbuf = (char *) alloca (sizeof (usrlibdebug)
624 + (cp - realname)
625 + strlen (debuginfo_fname));
626 strcpy (stpcpy (workbuf, realname), debuginfo_fname);
628 int fd2 = open (workbuf, O_RDONLY);
629 if (fd2 == -1)
631 strcpy (stpcpy (stpcpy (workbuf, realname), ".debug/"),
632 debuginfo_fname);
633 fd2 = open (workbuf, O_RDONLY);
634 if (fd2 == -1)
636 strcpy (stpcpy (stpcpy (workbuf, usrlibdebug), realname),
637 debuginfo_fname);
638 fd2 = open (workbuf, O_RDONLY);
642 if (fd2 != -1)
644 ElfW(Ehdr) ehdr2;
646 /* Read the ELF header. */
647 if (pread (fd2, &ehdr2, sizeof (ehdr2), 0) != sizeof (ehdr2))
648 error (EXIT_FAILURE, errno,
649 _("reading of ELF header failed"));
651 /* Map the section header. */
652 size_t size = ehdr2.e_shnum * sizeof (ElfW(Shdr));
653 ElfW(Shdr) *shdr2 = (ElfW(Shdr) *) alloca (size);
654 if (pread (fd2, shdr2, size, ehdr2.e_shoff) != size)
655 error (EXIT_FAILURE, errno,
656 _("reading of section headers failed"));
658 /* Get the section header string table. */
659 shstrtab = (char *) alloca (shdr2[ehdr2.e_shstrndx].sh_size);
660 if (pread (fd2, shstrtab, shdr2[ehdr2.e_shstrndx].sh_size,
661 shdr2[ehdr2.e_shstrndx].sh_offset)
662 != shdr2[ehdr2.e_shstrndx].sh_size)
663 error (EXIT_FAILURE, errno,
664 _("reading of section header string table failed"));
666 /* Search for the ".symtab" section. */
667 for (int idx = 0; idx < ehdr2.e_shnum; ++idx)
668 if (shdr2[idx].sh_type == SHT_SYMTAB
669 && strcmp (shstrtab + shdr2[idx].sh_name, ".symtab") == 0)
671 symtab_entry = &shdr2[idx];
672 shdr = shdr2;
673 symfd = fd2;
674 break;
677 if (fd2 != symfd)
678 close (fd2);
682 no_debuginfo:
683 if (symtab_entry == NULL)
685 fprintf (stderr, _("\
686 *** The file `%s' is stripped: no detailed analysis possible\n"),
687 name);
688 result->symtab = NULL;
689 result->strtab = NULL;
691 else
693 ElfW(Off) min_offset, max_offset;
694 ElfW(Shdr) *strtab_entry;
696 strtab_entry = &shdr[symtab_entry->sh_link];
698 /* Find the minimum and maximum offsets that include both the symbol
699 table and the string table. */
700 if (symtab_entry->sh_offset < strtab_entry->sh_offset)
702 min_offset = symtab_entry->sh_offset & ~(pagesize - 1);
703 max_offset = strtab_entry->sh_offset + strtab_entry->sh_size;
705 else
707 min_offset = strtab_entry->sh_offset & ~(pagesize - 1);
708 max_offset = symtab_entry->sh_offset + symtab_entry->sh_size;
711 result->symbol_map = mmap (NULL, max_offset - min_offset,
712 PROT_READ, MAP_SHARED|MAP_FILE, symfd,
713 min_offset);
714 if (result->symbol_map == MAP_FAILED)
715 error (EXIT_FAILURE, errno, _("failed to load symbol data"));
717 result->symtab
718 = (const ElfW(Sym) *) ((const char *) result->symbol_map
719 + (symtab_entry->sh_offset - min_offset));
720 result->symtab_size = symtab_entry->sh_size;
721 result->strtab = ((const char *) result->symbol_map
722 + (strtab_entry->sh_offset - min_offset));
723 result->symbol_mapsize = max_offset - min_offset;
726 /* Free the descriptor for the shared object. */
727 close (fd);
728 if (symfd != fd)
729 close (symfd);
731 return result;
735 static void
736 unload_shobj (struct shobj *shobj)
738 munmap (shobj->symbol_map, shobj->symbol_mapsize);
739 dlclose (shobj->map);
743 static struct profdata *
744 load_profdata (const char *name, struct shobj *shobj)
746 struct profdata *result;
747 int fd;
748 struct stat64 st;
749 void *addr;
750 uint32_t *narcsp;
751 size_t fromlimit;
752 struct here_cg_arc_record *data;
753 struct here_fromstruct *froms;
754 uint16_t *tos;
755 size_t fromidx;
756 size_t idx;
758 fd = open (name, O_RDONLY);
759 if (fd == -1)
761 char *ext_name;
763 if (errno != ENOENT || strchr (name, '/') != NULL)
764 /* The file exists but we are not allowed to read it or the
765 file does not exist and the name includes a path
766 specification.. */
767 return NULL;
769 /* A file with the given name does not exist in the current
770 directory, try it in the default location where the profiling
771 files are created. */
772 ext_name = (char *) alloca (strlen (name) + sizeof "/var/tmp/");
773 stpcpy (stpcpy (ext_name, "/var/tmp/"), name);
774 name = ext_name;
776 fd = open (ext_name, O_RDONLY);
777 if (fd == -1)
779 /* Even this file does not exist. */
780 error (0, errno, _("cannot load profiling data"));
781 return NULL;
785 /* We have found the file, now make sure it is the right one for the
786 data file. */
787 if (fstat64 (fd, &st) < 0)
789 error (0, errno, _("while stat'ing profiling data file"));
790 close (fd);
791 return NULL;
794 if ((size_t) st.st_size != shobj->expected_size)
796 error (0, 0,
797 _("profiling data file `%s' does not match shared object `%s'"),
798 name, shobj->name);
799 close (fd);
800 return NULL;
803 /* The data file is most probably the right one for our shared
804 object. Map it now. */
805 addr = mmap (NULL, st.st_size, PROT_READ, MAP_SHARED|MAP_FILE, fd, 0);
806 if (addr == MAP_FAILED)
808 error (0, errno, _("failed to mmap the profiling data file"));
809 close (fd);
810 return NULL;
813 /* We don't need the file desriptor anymore. */
814 if (close (fd) < 0)
816 error (0, errno, _("error while closing the profiling data file"));
817 munmap (addr, st.st_size);
818 return NULL;
821 /* Prepare the result. */
822 result = (struct profdata *) calloc (1, sizeof (struct profdata));
823 if (result == NULL)
825 error (0, errno, _("cannot create internal descriptor"));
826 munmap (addr, st.st_size);
827 return NULL;
830 /* Store the address and size so that we can later free the resources. */
831 result->addr = addr;
832 result->size = st.st_size;
834 /* Pointer to data after the header. */
835 result->hist = (char *) ((struct gmon_hdr *) addr + 1);
836 result->hist_hdr = (struct real_gmon_hist_hdr *) ((char *) result->hist
837 + sizeof (uint32_t));
838 result->kcount = (uint16_t *) ((char *) result->hist + sizeof (uint32_t)
839 + sizeof (struct real_gmon_hist_hdr));
841 /* Compute pointer to array of the arc information. */
842 narcsp = (uint32_t *) ((char *) result->kcount + shobj->kcountsize
843 + sizeof (uint32_t));
844 result->narcs = *narcsp;
845 result->data = (struct here_cg_arc_record *) ((char *) narcsp
846 + sizeof (uint32_t));
848 /* Create the gmon_hdr we expect or write. */
849 struct real_gmon_hdr
851 char cookie[4];
852 int32_t version;
853 char spare[3 * 4];
854 } gmon_hdr;
855 if (sizeof (gmon_hdr) != sizeof (struct gmon_hdr)
856 || (offsetof (struct real_gmon_hdr, cookie)
857 != offsetof (struct gmon_hdr, cookie))
858 || (offsetof (struct real_gmon_hdr, version)
859 != offsetof (struct gmon_hdr, version)))
860 abort ();
862 memcpy (&gmon_hdr.cookie[0], GMON_MAGIC, sizeof (gmon_hdr.cookie));
863 gmon_hdr.version = GMON_SHOBJ_VERSION;
864 memset (gmon_hdr.spare, '\0', sizeof (gmon_hdr.spare));
866 /* Create the hist_hdr we expect or write. */
867 struct real_gmon_hist_hdr hist_hdr;
868 if (sizeof (hist_hdr) != sizeof (struct gmon_hist_hdr)
869 || (offsetof (struct real_gmon_hist_hdr, low_pc)
870 != offsetof (struct gmon_hist_hdr, low_pc))
871 || (offsetof (struct real_gmon_hist_hdr, high_pc)
872 != offsetof (struct gmon_hist_hdr, high_pc))
873 || (offsetof (struct real_gmon_hist_hdr, hist_size)
874 != offsetof (struct gmon_hist_hdr, hist_size))
875 || (offsetof (struct real_gmon_hist_hdr, prof_rate)
876 != offsetof (struct gmon_hist_hdr, prof_rate))
877 || (offsetof (struct real_gmon_hist_hdr, dimen)
878 != offsetof (struct gmon_hist_hdr, dimen))
879 || (offsetof (struct real_gmon_hist_hdr, dimen_abbrev)
880 != offsetof (struct gmon_hist_hdr, dimen_abbrev)))
881 abort ();
883 hist_hdr.low_pc = (char *) shobj->lowpc - shobj->map->l_addr;
884 hist_hdr.high_pc = (char *) shobj->highpc - shobj->map->l_addr;
885 if (do_test)
886 printf ("low_pc = %p\nhigh_pc = %p\n", hist_hdr.low_pc, hist_hdr.high_pc);
887 hist_hdr.hist_size = shobj->kcountsize / sizeof (HISTCOUNTER);
888 hist_hdr.prof_rate = __profile_frequency ();
889 strncpy (hist_hdr.dimen, "seconds", sizeof (hist_hdr.dimen));
890 hist_hdr.dimen_abbrev = 's';
892 /* Test whether the header of the profiling data is ok. */
893 if (memcmp (addr, &gmon_hdr, sizeof (struct gmon_hdr)) != 0
894 || *(uint32_t *) result->hist != GMON_TAG_TIME_HIST
895 || memcmp (result->hist_hdr, &hist_hdr,
896 sizeof (struct gmon_hist_hdr)) != 0
897 || narcsp[-1] != GMON_TAG_CG_ARC)
899 error (0, 0, _("`%s' is no correct profile data file for `%s'"),
900 name, shobj->name);
901 if (do_test)
903 if (memcmp (addr, &gmon_hdr, sizeof (struct gmon_hdr)) != 0)
904 puts ("gmon_hdr differs");
905 if (*(uint32_t *) result->hist != GMON_TAG_TIME_HIST)
906 puts ("result->hist differs");
907 if (memcmp (result->hist_hdr, &hist_hdr,
908 sizeof (struct gmon_hist_hdr)) != 0)
909 puts ("hist_hdr differs");
910 if (narcsp[-1] != GMON_TAG_CG_ARC)
911 puts ("narcsp[-1] differs");
913 free (result);
914 munmap (addr, st.st_size);
915 return NULL;
918 /* We are pretty sure now that this is a correct input file. Set up
919 the remaining information in the result structure and return. */
920 result->tos = (uint16_t *) calloc (shobj->tossize + shobj->fromssize, 1);
921 if (result->tos == NULL)
923 error (0, errno, _("cannot create internal descriptor"));
924 munmap (addr, st.st_size);
925 free (result);
926 return NULL;
929 result->froms = (struct here_fromstruct *) ((char *) result->tos
930 + shobj->tossize);
931 fromidx = 0;
933 /* Now we have to process all the arc count entries. */
934 fromlimit = shobj->fromlimit;
935 data = result->data;
936 froms = result->froms;
937 tos = result->tos;
938 for (idx = 0; idx < MIN (*narcsp, fromlimit); ++idx)
940 size_t to_index;
941 size_t newfromidx;
942 to_index = (data[idx].self_pc / (shobj->hashfraction * sizeof (*tos)));
943 newfromidx = fromidx++;
944 froms[newfromidx].here = &data[idx];
945 froms[newfromidx].link = tos[to_index];
946 tos[to_index] = newfromidx;
949 return result;
953 static void
954 unload_profdata (struct profdata *profdata)
956 free (profdata->tos);
957 munmap (profdata->addr, profdata->size);
958 free (profdata);
962 static void
963 count_total_ticks (struct shobj *shobj, struct profdata *profdata)
965 volatile uint16_t *kcount = profdata->kcount;
966 size_t maxkidx = shobj->kcountsize;
967 size_t factor = 2 * (65536 / shobj->s_scale);
968 size_t kidx = 0;
969 size_t sidx = 0;
971 while (sidx < symidx)
973 uintptr_t start = sortsym[sidx]->addr;
974 uintptr_t end = start + sortsym[sidx]->size;
976 while (kidx < maxkidx && factor * kidx < start)
977 ++kidx;
978 if (kidx == maxkidx)
979 break;
981 while (kidx < maxkidx && factor * kidx < end)
982 sortsym[sidx]->ticks += kcount[kidx++];
983 if (kidx == maxkidx)
984 break;
986 total_ticks += sortsym[sidx++]->ticks;
991 static size_t
992 find_symbol (uintptr_t addr)
994 size_t sidx = 0;
996 while (sidx < symidx)
998 uintptr_t start = sortsym[sidx]->addr;
999 uintptr_t end = start + sortsym[sidx]->size;
1001 if (addr >= start && addr < end)
1002 return sidx;
1004 if (addr < start)
1005 break;
1007 ++sidx;
1010 return (size_t) -1l;
1014 static void
1015 count_calls (struct shobj *shobj, struct profdata *profdata)
1017 struct here_cg_arc_record *data = profdata->data;
1018 uint32_t narcs = profdata->narcs;
1019 uint32_t cnt;
1021 for (cnt = 0; cnt < narcs; ++cnt)
1023 uintptr_t here = data[cnt].self_pc;
1024 size_t symbol_idx;
1026 /* Find the symbol for this address. */
1027 symbol_idx = find_symbol (here);
1028 if (symbol_idx != (size_t) -1l)
1029 sortsym[symbol_idx]->calls += data[cnt].count;
1034 static int
1035 symorder (const void *o1, const void *o2)
1037 const struct known_symbol *p1 = (const struct known_symbol *) o1;
1038 const struct known_symbol *p2 = (const struct known_symbol *) o2;
1040 return p1->addr - p2->addr;
1044 static void
1045 printsym (const void *node, VISIT value, int level)
1047 if (value == leaf || value == postorder)
1048 sortsym[symidx++] = *(struct known_symbol **) node;
1052 static void
1053 read_symbols (struct shobj *shobj)
1055 int n = 0;
1057 /* Initialize the obstacks. */
1058 #define obstack_chunk_alloc malloc
1059 #define obstack_chunk_free free
1060 obstack_init (&shobj->ob_str);
1061 obstack_init (&shobj->ob_sym);
1062 obstack_init (&ob_list);
1064 /* Process the symbols. */
1065 if (shobj->symtab != NULL)
1067 const ElfW(Sym) *sym = shobj->symtab;
1068 const ElfW(Sym) *sym_end
1069 = (const ElfW(Sym) *) ((const char *) sym + shobj->symtab_size);
1070 for (; sym < sym_end; sym++)
1071 if ((ELFW(ST_TYPE) (sym->st_info) == STT_FUNC
1072 || ELFW(ST_TYPE) (sym->st_info) == STT_NOTYPE)
1073 && sym->st_size != 0)
1075 struct known_symbol **existp;
1076 struct known_symbol *newsym
1077 = (struct known_symbol *) obstack_alloc (&shobj->ob_sym,
1078 sizeof (*newsym));
1079 if (newsym == NULL)
1080 error (EXIT_FAILURE, errno, _("cannot allocate symbol data"));
1082 newsym->name = &shobj->strtab[sym->st_name];
1083 newsym->addr = sym->st_value;
1084 newsym->size = sym->st_size;
1085 newsym->weak = ELFW(ST_BIND) (sym->st_info) == STB_WEAK;
1086 newsym->hidden = (ELFW(ST_VISIBILITY) (sym->st_other)
1087 != STV_DEFAULT);
1088 newsym->ticks = 0;
1089 newsym->calls = 0;
1091 existp = tfind (newsym, &symroot, symorder);
1092 if (existp == NULL)
1094 /* New function. */
1095 tsearch (newsym, &symroot, symorder);
1096 ++n;
1098 else
1100 /* The function is already defined. See whether we have
1101 a better name here. */
1102 if (((*existp)->hidden && !newsym->hidden)
1103 || ((*existp)->name[0] == '_' && newsym->name[0] != '_')
1104 || ((*existp)->name[0] != '_' && newsym->name[0] != '_'
1105 && ((*existp)->weak && !newsym->weak)))
1106 *existp = newsym;
1107 else
1108 /* We don't need the allocated memory. */
1109 obstack_free (&shobj->ob_sym, newsym);
1113 else
1115 /* Blarg, the binary is stripped. We have to rely on the
1116 information contained in the dynamic section of the object. */
1117 const ElfW(Sym) *symtab = (ElfW(Sym) *) D_PTR (shobj->map,
1118 l_info[DT_SYMTAB]);
1119 const char *strtab = (const char *) D_PTR (shobj->map,
1120 l_info[DT_STRTAB]);
1122 /* We assume that the string table follows the symbol table,
1123 because there is no way in ELF to know the size of the
1124 dynamic symbol table without looking at the section headers. */
1125 while ((void *) symtab < (void *) strtab)
1127 if ((ELFW(ST_TYPE)(symtab->st_info) == STT_FUNC
1128 || ELFW(ST_TYPE)(symtab->st_info) == STT_NOTYPE)
1129 && symtab->st_size != 0)
1131 struct known_symbol *newsym;
1132 struct known_symbol **existp;
1134 newsym =
1135 (struct known_symbol *) obstack_alloc (&shobj->ob_sym,
1136 sizeof (*newsym));
1137 if (newsym == NULL)
1138 error (EXIT_FAILURE, errno, _("cannot allocate symbol data"));
1140 newsym->name = &strtab[symtab->st_name];
1141 newsym->addr = symtab->st_value;
1142 newsym->size = symtab->st_size;
1143 newsym->weak = ELFW(ST_BIND) (symtab->st_info) == STB_WEAK;
1144 newsym->hidden = (ELFW(ST_VISIBILITY) (symtab->st_other)
1145 != STV_DEFAULT);
1146 newsym->ticks = 0;
1147 newsym->froms = NULL;
1148 newsym->tos = NULL;
1150 existp = tfind (newsym, &symroot, symorder);
1151 if (existp == NULL)
1153 /* New function. */
1154 tsearch (newsym, &symroot, symorder);
1155 ++n;
1157 else
1159 /* The function is already defined. See whether we have
1160 a better name here. */
1161 if (((*existp)->hidden && !newsym->hidden)
1162 || ((*existp)->name[0] == '_' && newsym->name[0] != '_')
1163 || ((*existp)->name[0] != '_' && newsym->name[0] != '_'
1164 && ((*existp)->weak && !newsym->weak)))
1165 *existp = newsym;
1166 else
1167 /* We don't need the allocated memory. */
1168 obstack_free (&shobj->ob_sym, newsym);
1172 ++symtab;
1176 sortsym = malloc (n * sizeof (struct known_symbol *));
1177 if (sortsym == NULL)
1178 abort ();
1180 twalk (symroot, printsym);
1184 static void
1185 add_arcs (struct profdata *profdata)
1187 uint32_t narcs = profdata->narcs;
1188 struct here_cg_arc_record *data = profdata->data;
1189 uint32_t cnt;
1191 for (cnt = 0; cnt < narcs; ++cnt)
1193 /* First add the incoming arc. */
1194 size_t sym_idx = find_symbol (data[cnt].self_pc);
1196 if (sym_idx != (size_t) -1l)
1198 struct known_symbol *sym = sortsym[sym_idx];
1199 struct arc_list *runp = sym->froms;
1201 while (runp != NULL
1202 && ((data[cnt].from_pc == 0 && runp->idx != (size_t) -1l)
1203 || (data[cnt].from_pc != 0
1204 && (runp->idx == (size_t) -1l
1205 || data[cnt].from_pc < sortsym[runp->idx]->addr
1206 || (data[cnt].from_pc
1207 >= (sortsym[runp->idx]->addr
1208 + sortsym[runp->idx]->size))))))
1209 runp = runp->next;
1211 if (runp == NULL)
1213 /* We need a new entry. */
1214 struct arc_list *newp = (struct arc_list *)
1215 obstack_alloc (&ob_list, sizeof (struct arc_list));
1217 if (data[cnt].from_pc == 0)
1218 newp->idx = (size_t) -1l;
1219 else
1220 newp->idx = find_symbol (data[cnt].from_pc);
1221 newp->count = data[cnt].count;
1222 newp->next = sym->froms;
1223 sym->froms = newp;
1225 else
1226 /* Increment the counter for the found entry. */
1227 runp->count += data[cnt].count;
1230 /* Now add it to the appropriate outgoing list. */
1231 sym_idx = find_symbol (data[cnt].from_pc);
1232 if (sym_idx != (size_t) -1l)
1234 struct known_symbol *sym = sortsym[sym_idx];
1235 struct arc_list *runp = sym->tos;
1237 while (runp != NULL
1238 && (runp->idx == (size_t) -1l
1239 || data[cnt].self_pc < sortsym[runp->idx]->addr
1240 || data[cnt].self_pc >= (sortsym[runp->idx]->addr
1241 + sortsym[runp->idx]->size)))
1242 runp = runp->next;
1244 if (runp == NULL)
1246 /* We need a new entry. */
1247 struct arc_list *newp = (struct arc_list *)
1248 obstack_alloc (&ob_list, sizeof (struct arc_list));
1250 newp->idx = find_symbol (data[cnt].self_pc);
1251 newp->count = data[cnt].count;
1252 newp->next = sym->tos;
1253 sym->tos = newp;
1255 else
1256 /* Increment the counter for the found entry. */
1257 runp->count += data[cnt].count;
1263 static int
1264 countorder (const void *p1, const void *p2)
1266 struct known_symbol *s1 = (struct known_symbol *) p1;
1267 struct known_symbol *s2 = (struct known_symbol *) p2;
1269 if (s1->ticks != s2->ticks)
1270 return (int) (s2->ticks - s1->ticks);
1272 if (s1->calls != s2->calls)
1273 return (int) (s2->calls - s1->calls);
1275 return strcmp (s1->name, s2->name);
1279 static double tick_unit;
1280 static uintmax_t cumu_ticks;
1282 static void
1283 printflat (const void *node, VISIT value, int level)
1285 if (value == leaf || value == postorder)
1287 struct known_symbol *s = *(struct known_symbol **) node;
1289 cumu_ticks += s->ticks;
1291 printf ("%6.2f%10.2f%9.2f%9" PRIdMAX "%9.2f %s\n",
1292 total_ticks ? (100.0 * s->ticks) / total_ticks : 0.0,
1293 tick_unit * cumu_ticks,
1294 tick_unit * s->ticks,
1295 s->calls,
1296 s->calls ? (s->ticks * 1000000) * tick_unit / s->calls : 0,
1297 /* FIXME: don't know about called functions. */
1298 s->name);
1303 /* ARGUSED */
1304 static void
1305 freenoop (void *p)
1310 static void
1311 generate_flat_profile (struct profdata *profdata)
1313 size_t n;
1314 void *data = NULL;
1316 tick_unit = 1.0 / profdata->hist_hdr->prof_rate;
1318 printf ("Flat profile:\n\n"
1319 "Each sample counts as %g %s.\n",
1320 tick_unit, profdata->hist_hdr->dimen);
1321 fputs (" % cumulative self self total\n"
1322 " time seconds seconds calls us/call us/call name\n",
1323 stdout);
1325 for (n = 0; n < symidx; ++n)
1326 if (sortsym[n]->calls != 0 || sortsym[n]->ticks != 0)
1327 tsearch (sortsym[n], &data, countorder);
1329 twalk (data, printflat);
1331 tdestroy (data, freenoop);
1335 static void
1336 generate_call_graph (struct profdata *profdata)
1338 size_t cnt;
1340 puts ("\nindex % time self children called name\n");
1342 for (cnt = 0; cnt < symidx; ++cnt)
1343 if (sortsym[cnt]->froms != NULL || sortsym[cnt]->tos != NULL)
1345 struct arc_list *runp;
1346 size_t n;
1348 /* First print the from-information. */
1349 runp = sortsym[cnt]->froms;
1350 while (runp != NULL)
1352 printf (" %8.2f%8.2f%9" PRIdMAX "/%-9" PRIdMAX " %s",
1353 (runp->idx != (size_t) -1l
1354 ? sortsym[runp->idx]->ticks * tick_unit : 0.0),
1355 0.0, /* FIXME: what's time for the children, recursive */
1356 runp->count, sortsym[cnt]->calls,
1357 (runp->idx != (size_t) -1l ?
1358 sortsym[runp->idx]->name : "<UNKNOWN>"));
1360 if (runp->idx != (size_t) -1l)
1361 printf (" [%Zd]", runp->idx);
1362 putchar_unlocked ('\n');
1364 runp = runp->next;
1367 /* Info about the function itself. */
1368 n = printf ("[%Zu]", cnt);
1369 printf ("%*s%5.1f%8.2f%8.2f%9" PRIdMAX " %s [%Zd]\n",
1370 (int) (7 - n), " ",
1371 total_ticks ? (100.0 * sortsym[cnt]->ticks) / total_ticks : 0,
1372 sortsym[cnt]->ticks * tick_unit,
1373 0.0, /* FIXME: what's time for the children, recursive */
1374 sortsym[cnt]->calls,
1375 sortsym[cnt]->name, cnt);
1377 /* Info about the functions this function calls. */
1378 runp = sortsym[cnt]->tos;
1379 while (runp != NULL)
1381 printf (" %8.2f%8.2f%9" PRIdMAX "/",
1382 (runp->idx != (size_t) -1l
1383 ? sortsym[runp->idx]->ticks * tick_unit : 0.0),
1384 0.0, /* FIXME: what's time for the children, recursive */
1385 runp->count);
1387 if (runp->idx != (size_t) -1l)
1388 printf ("%-9" PRIdMAX " %s [%Zd]\n",
1389 sortsym[runp->idx]->calls,
1390 sortsym[runp->idx]->name,
1391 runp->idx);
1392 else
1393 fputs ("??? <UNKNOWN>\n\n", stdout);
1395 runp = runp->next;
1398 fputs ("-----------------------------------------------\n", stdout);
1403 static void
1404 generate_call_pair_list (struct profdata *profdata)
1406 size_t cnt;
1408 for (cnt = 0; cnt < symidx; ++cnt)
1409 if (sortsym[cnt]->froms != NULL || sortsym[cnt]->tos != NULL)
1411 struct arc_list *runp;
1413 /* First print the incoming arcs. */
1414 runp = sortsym[cnt]->froms;
1415 while (runp != NULL)
1417 if (runp->idx == (size_t) -1l)
1418 printf ("\
1419 <UNKNOWN> %-34s %9" PRIdMAX "\n",
1420 sortsym[cnt]->name, runp->count);
1421 runp = runp->next;
1424 /* Next the outgoing arcs. */
1425 runp = sortsym[cnt]->tos;
1426 while (runp != NULL)
1428 printf ("%-34s %-34s %9" PRIdMAX "\n",
1429 sortsym[cnt]->name,
1430 (runp->idx != (size_t) -1l
1431 ? sortsym[runp->idx]->name : "<UNKNOWN>"),
1432 runp->count);
1433 runp = runp->next;