* acx.m4: Name cache variables properly.
[binutils.git] / gprof / hist.c
blobbfa34eebf0839e9e06efb4ab87cab33609afdf34
1 /* hist.c - Histogram related operations.
3 Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
5 This file is part of GNU Binutils.
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA. */
22 #include "libiberty.h"
23 #include "gprof.h"
24 #include "search_list.h"
25 #include "source.h"
26 #include "symtab.h"
27 #include "corefile.h"
28 #include "gmon_io.h"
29 #include "gmon_out.h"
30 #include "hist.h"
31 #include "sym_ids.h"
32 #include "utils.h"
34 #define UNITS_TO_CODE (offset_to_code / sizeof(UNIT))
36 static void scale_and_align_entries PARAMS ((void));
37 static void print_header PARAMS ((int));
38 static void print_line PARAMS ((Sym *, double));
39 static int cmp_time PARAMS ((const PTR, const PTR));
41 /* Declarations of automatically generated functions to output blurbs. */
42 extern void flat_blurb PARAMS ((FILE * fp));
44 bfd_vma s_lowpc; /* Lowest address in .text. */
45 bfd_vma s_highpc = 0; /* Highest address in .text. */
46 bfd_vma lowpc, highpc; /* Same, but expressed in UNITs. */
47 int hist_num_bins = 0; /* Number of histogram samples. */
48 int *hist_sample = 0; /* Histogram samples (shorts in the file!). */
49 double hist_scale;
50 char hist_dimension[16] = "seconds";
51 char hist_dimension_abbrev = 's';
53 static double accum_time; /* Accumulated time so far for print_line(). */
54 static double total_time; /* Total time for all routines. */
56 /* Table of SI prefixes for powers of 10 (used to automatically
57 scale some of the values in the flat profile). */
58 const struct
60 char prefix;
61 double scale;
63 SItab[] =
65 { 'T', 1e-12 }, /* tera */
66 { 'G', 1e-09 }, /* giga */
67 { 'M', 1e-06 }, /* mega */
68 { 'K', 1e-03 }, /* kilo */
69 { ' ', 1e-00 },
70 { 'm', 1e+03 }, /* milli */
71 { 'u', 1e+06 }, /* micro */
72 { 'n', 1e+09 }, /* nano */
73 { 'p', 1e+12 }, /* pico */
74 { 'f', 1e+15 }, /* femto */
75 { 'a', 1e+18 } /* ato */
79 /* Read the histogram from file IFP. FILENAME is the name of IFP and
80 is provided for formatting error messages only. */
82 void
83 hist_read_rec (ifp, filename)
84 FILE * ifp;
85 const char *filename;
87 bfd_vma n_lowpc, n_highpc;
88 int i, ncnt, profrate;
89 UNIT count;
91 if (gmon_io_read_vma (ifp, &n_lowpc)
92 || gmon_io_read_vma (ifp, &n_highpc)
93 || gmon_io_read_32 (ifp, &ncnt)
94 || gmon_io_read_32 (ifp, &profrate)
95 || gmon_io_read (ifp, hist_dimension, 15)
96 || gmon_io_read (ifp, &hist_dimension_abbrev, 1))
98 fprintf (stderr, _("%s: %s: unexpected end of file\n"),
99 whoami, filename);
101 done (1);
104 if (!s_highpc)
106 /* This is the first histogram record. */
107 s_lowpc = n_lowpc;
108 s_highpc = n_highpc;
109 lowpc = (bfd_vma) n_lowpc / sizeof (UNIT);
110 highpc = (bfd_vma) n_highpc / sizeof (UNIT);
111 hist_num_bins = ncnt;
112 hz = profrate;
115 DBG (SAMPLEDEBUG,
116 printf ("[hist_read_rec] n_lowpc 0x%lx n_highpc 0x%lx ncnt %d\n",
117 (unsigned long) n_lowpc, (unsigned long) n_highpc, ncnt);
118 printf ("[hist_read_rec] s_lowpc 0x%lx s_highpc 0x%lx nsamples %d\n",
119 (unsigned long) s_lowpc, (unsigned long) s_highpc,
120 hist_num_bins);
121 printf ("[hist_read_rec] lowpc 0x%lx highpc 0x%lx\n",
122 (unsigned long) lowpc, (unsigned long) highpc));
124 if (n_lowpc != s_lowpc || n_highpc != s_highpc
125 || ncnt != hist_num_bins || hz != profrate)
127 fprintf (stderr, _("%s: `%s' is incompatible with first gmon file\n"),
128 whoami, filename);
129 done (1);
132 if (!hist_sample)
134 hist_sample = (int *) xmalloc (hist_num_bins * sizeof (hist_sample[0]));
135 memset (hist_sample, 0, hist_num_bins * sizeof (hist_sample[0]));
138 for (i = 0; i < hist_num_bins; ++i)
140 if (fread (&count[0], sizeof (count), 1, ifp) != 1)
142 fprintf (stderr,
143 _("%s: %s: unexpected EOF after reading %d of %d samples\n"),
144 whoami, filename, i, hist_num_bins);
145 done (1);
147 hist_sample[i] += bfd_get_16 (core_bfd, (bfd_byte *) & count[0]);
148 DBG (SAMPLEDEBUG,
149 printf ("[hist_read_rec] 0x%lx: %u\n",
150 (unsigned long) (n_lowpc + i * (n_highpc - n_lowpc) / ncnt),
151 hist_sample[i]));
156 /* Write execution histogram to file OFP. FILENAME is the name
157 of OFP and is provided for formatting error-messages only. */
159 void
160 hist_write_hist (ofp, filename)
161 FILE * ofp;
162 const char *filename;
164 UNIT count;
165 int i;
167 /* Write header. */
169 if (gmon_io_write_8 (ofp, GMON_TAG_TIME_HIST)
170 || gmon_io_write_vma (ofp, s_lowpc)
171 || gmon_io_write_vma (ofp, s_highpc)
172 || gmon_io_write_32 (ofp, hist_num_bins)
173 || gmon_io_write_32 (ofp, hz)
174 || gmon_io_write (ofp, hist_dimension, 15)
175 || gmon_io_write (ofp, &hist_dimension_abbrev, 1))
177 perror (filename);
178 done (1);
181 for (i = 0; i < hist_num_bins; ++i)
183 bfd_put_16 (core_bfd, (bfd_vma) hist_sample[i], (bfd_byte *) &count[0]);
185 if (fwrite (&count[0], sizeof (count), 1, ofp) != 1)
187 perror (filename);
188 done (1);
194 /* Calculate scaled entry point addresses (to save time in
195 hist_assign_samples), and, on architectures that have procedure
196 entry masks at the start of a function, possibly push the scaled
197 entry points over the procedure entry mask, if it turns out that
198 the entry point is in one bin and the code for a routine is in the
199 next bin. */
201 static void
202 scale_and_align_entries ()
204 Sym *sym;
205 bfd_vma bin_of_entry;
206 bfd_vma bin_of_code;
208 for (sym = symtab.base; sym < symtab.limit; sym++)
210 sym->hist.scaled_addr = sym->addr / sizeof (UNIT);
211 bin_of_entry = (sym->hist.scaled_addr - lowpc) / hist_scale;
212 bin_of_code = ((sym->hist.scaled_addr + UNITS_TO_CODE - lowpc)
213 / hist_scale);
214 if (bin_of_entry < bin_of_code)
216 DBG (SAMPLEDEBUG,
217 printf ("[scale_and_align_entries] pushing 0x%lx to 0x%lx\n",
218 (unsigned long) sym->hist.scaled_addr,
219 (unsigned long) (sym->hist.scaled_addr
220 + UNITS_TO_CODE)));
221 sym->hist.scaled_addr += UNITS_TO_CODE;
227 /* Assign samples to the symbol to which they belong.
229 Histogram bin I covers some address range [BIN_LOWPC,BIN_HIGH_PC)
230 which may overlap one more symbol address ranges. If a symbol
231 overlaps with the bin's address range by O percent, then O percent
232 of the bin's count is credited to that symbol.
234 There are three cases as to where BIN_LOW_PC and BIN_HIGH_PC can be
235 with respect to the symbol's address range [SYM_LOW_PC,
236 SYM_HIGH_PC) as shown in the following diagram. OVERLAP computes
237 the distance (in UNITs) between the arrows, the fraction of the
238 sample that is to be credited to the symbol which starts at
239 SYM_LOW_PC.
241 sym_low_pc sym_high_pc
245 +-----------------------------------------------+
247 | ->| |<- ->| |<- ->| |<- |
248 | | | | | |
249 +---------+ +---------+ +---------+
251 ^ ^ ^ ^ ^ ^
252 | | | | | |
253 bin_low_pc bin_high_pc bin_low_pc bin_high_pc bin_low_pc bin_high_pc
255 For the VAX we assert that samples will never fall in the first two
256 bytes of any routine, since that is the entry mask, thus we call
257 scale_and_align_entries() to adjust the entry points if the entry
258 mask falls in one bin but the code for the routine doesn't start
259 until the next bin. In conjunction with the alignment of routine
260 addresses, this should allow us to have only one sample for every
261 four bytes of text space and never have any overlap (the two end
262 cases, above). */
264 void
265 hist_assign_samples ()
267 bfd_vma bin_low_pc, bin_high_pc;
268 bfd_vma sym_low_pc, sym_high_pc;
269 bfd_vma overlap, addr;
270 int bin_count, i;
271 unsigned int j;
272 double time, credit;
274 /* Read samples and assign to symbols. */
275 hist_scale = highpc - lowpc;
276 hist_scale /= hist_num_bins;
277 scale_and_align_entries ();
279 /* Iterate over all sample bins. */
280 for (i = 0, j = 1; i < hist_num_bins; ++i)
282 bin_count = hist_sample[i];
283 if (! bin_count)
284 continue;
286 bin_low_pc = lowpc + (bfd_vma) (hist_scale * i);
287 bin_high_pc = lowpc + (bfd_vma) (hist_scale * (i + 1));
288 time = bin_count;
290 DBG (SAMPLEDEBUG,
291 printf (
292 "[assign_samples] bin_low_pc=0x%lx, bin_high_pc=0x%lx, bin_count=%d\n",
293 (unsigned long) (sizeof (UNIT) * bin_low_pc),
294 (unsigned long) (sizeof (UNIT) * bin_high_pc),
295 bin_count));
296 total_time += time;
298 /* Credit all symbols that are covered by bin I. */
299 for (j = j - 1; j < symtab.len; ++j)
301 sym_low_pc = symtab.base[j].hist.scaled_addr;
302 sym_high_pc = symtab.base[j + 1].hist.scaled_addr;
304 /* If high end of bin is below entry address,
305 go for next bin. */
306 if (bin_high_pc < sym_low_pc)
307 break;
309 /* If low end of bin is above high end of symbol,
310 go for next symbol. */
311 if (bin_low_pc >= sym_high_pc)
312 continue;
314 overlap =
315 MIN (bin_high_pc, sym_high_pc) - MAX (bin_low_pc, sym_low_pc);
316 if (overlap > 0)
318 DBG (SAMPLEDEBUG,
319 printf (
320 "[assign_samples] [0x%lx,0x%lx) %s gets %f ticks %ld overlap\n",
321 (unsigned long) symtab.base[j].addr,
322 (unsigned long) (sizeof (UNIT) * sym_high_pc),
323 symtab.base[j].name, overlap * time / hist_scale,
324 (long) overlap));
326 addr = symtab.base[j].addr;
327 credit = overlap * time / hist_scale;
329 /* Credit symbol if it appears in INCL_FLAT or that
330 table is empty and it does not appear it in
331 EXCL_FLAT. */
332 if (sym_lookup (&syms[INCL_FLAT], addr)
333 || (syms[INCL_FLAT].len == 0
334 && !sym_lookup (&syms[EXCL_FLAT], addr)))
336 symtab.base[j].hist.time += credit;
338 else
340 total_time -= credit;
346 DBG (SAMPLEDEBUG, printf ("[assign_samples] total_time %f\n",
347 total_time));
351 /* Print header for flag histogram profile. */
353 static void
354 print_header (prefix)
355 int prefix;
357 char unit[64];
359 sprintf (unit, _("%c%c/call"), prefix, hist_dimension_abbrev);
361 if (bsd_style_output)
363 printf (_("\ngranularity: each sample hit covers %ld byte(s)"),
364 (long) hist_scale * sizeof (UNIT));
365 if (total_time > 0.0)
367 printf (_(" for %.2f%% of %.2f %s\n\n"),
368 100.0 / total_time, total_time / hz, hist_dimension);
371 else
373 printf (_("\nEach sample counts as %g %s.\n"), 1.0 / hz, hist_dimension);
376 if (total_time <= 0.0)
378 printf (_(" no time accumulated\n\n"));
380 /* This doesn't hurt since all the numerators will be zero. */
381 total_time = 1.0;
384 printf ("%5.5s %10.10s %8.8s %8.8s %8.8s %8.8s %-8.8s\n",
385 "% ", _("cumulative"), _("self "), "", _("self "), _("total "),
386 "");
387 printf ("%5.5s %9.9s %8.8s %8.8s %8.8s %8.8s %-8.8s\n",
388 _("time"), hist_dimension, hist_dimension, _("calls"), unit, unit,
389 _("name"));
393 static void
394 print_line (sym, scale)
395 Sym *sym;
396 double scale;
398 if (ignore_zeros && sym->ncalls == 0 && sym->hist.time == 0)
399 return;
401 accum_time += sym->hist.time;
403 if (bsd_style_output)
404 printf ("%5.1f %10.2f %8.2f",
405 total_time > 0.0 ? 100 * sym->hist.time / total_time : 0.0,
406 accum_time / hz, sym->hist.time / hz);
407 else
408 printf ("%6.2f %9.2f %8.2f",
409 total_time > 0.0 ? 100 * sym->hist.time / total_time : 0.0,
410 accum_time / hz, sym->hist.time / hz);
412 if (sym->ncalls != 0)
413 printf (" %8lu %8.2f %8.2f ",
414 sym->ncalls, scale * sym->hist.time / hz / sym->ncalls,
415 scale * (sym->hist.time + sym->cg.child_time) / hz / sym->ncalls);
416 else
417 printf (" %8.8s %8.8s %8.8s ", "", "", "");
419 if (bsd_style_output)
420 print_name (sym);
421 else
422 print_name_only (sym);
424 printf ("\n");
428 /* Compare LP and RP. The primary comparison key is execution time,
429 the secondary is number of invocation, and the tertiary is the
430 lexicographic order of the function names. */
432 static int
433 cmp_time (lp, rp)
434 const PTR lp;
435 const PTR rp;
437 const Sym *left = *(const Sym **) lp;
438 const Sym *right = *(const Sym **) rp;
439 double time_diff;
441 time_diff = right->hist.time - left->hist.time;
443 if (time_diff > 0.0)
444 return 1;
446 if (time_diff < 0.0)
447 return -1;
449 if (right->ncalls > left->ncalls)
450 return 1;
452 if (right->ncalls < left->ncalls)
453 return -1;
455 return strcmp (left->name, right->name);
459 /* Print the flat histogram profile. */
461 void
462 hist_print ()
464 Sym **time_sorted_syms, *top_dog, *sym;
465 unsigned int index;
466 unsigned log_scale;
467 double top_time, time;
468 bfd_vma addr;
470 if (first_output)
471 first_output = FALSE;
472 else
473 printf ("\f\n");
475 accum_time = 0.0;
477 if (bsd_style_output)
479 if (print_descriptions)
481 printf (_("\n\n\nflat profile:\n"));
482 flat_blurb (stdout);
485 else
487 printf (_("Flat profile:\n"));
490 /* Sort the symbol table by time (call-count and name as secondary
491 and tertiary keys). */
492 time_sorted_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
494 for (index = 0; index < symtab.len; ++index)
495 time_sorted_syms[index] = &symtab.base[index];
497 qsort (time_sorted_syms, symtab.len, sizeof (Sym *), cmp_time);
499 if (bsd_style_output)
501 log_scale = 5; /* Milli-seconds is BSD-default. */
503 else
505 /* Search for symbol with highest per-call
506 execution time and scale accordingly. */
507 log_scale = 0;
508 top_dog = 0;
509 top_time = 0.0;
511 for (index = 0; index < symtab.len; ++index)
513 sym = time_sorted_syms[index];
515 if (sym->ncalls != 0)
517 time = (sym->hist.time + sym->cg.child_time) / sym->ncalls;
519 if (time > top_time)
521 top_dog = sym;
522 top_time = time;
527 if (top_dog && top_dog->ncalls != 0 && top_time > 0.0)
529 top_time /= hz;
531 for (log_scale = 0; log_scale < ARRAY_SIZE (SItab); log_scale ++)
533 double scaled_value = SItab[log_scale].scale * top_time;
535 if (scaled_value >= 1.0 && scaled_value < 1000.0)
536 break;
541 /* For now, the dimension is always seconds. In the future, we
542 may also want to support other (pseudo-)dimensions (such as
543 I-cache misses etc.). */
544 print_header (SItab[log_scale].prefix);
546 for (index = 0; index < symtab.len; ++index)
548 addr = time_sorted_syms[index]->addr;
550 /* Print symbol if its in INCL_FLAT table or that table
551 is empty and the symbol is not in EXCL_FLAT. */
552 if (sym_lookup (&syms[INCL_FLAT], addr)
553 || (syms[INCL_FLAT].len == 0
554 && !sym_lookup (&syms[EXCL_FLAT], addr)))
555 print_line (time_sorted_syms[index], SItab[log_scale].scale);
558 free (time_sorted_syms);
560 if (print_descriptions && !bsd_style_output)
561 flat_blurb (stdout);