2005-02-28 Paolo Bonzini <bonzini@gnu.org>
[binutils.git] / gprof / hist.c
blob1ee40c50257b8b3115d5f9c27cc47117f2e761a0
1 /* hist.c - Histogram related operations.
3 Copyright 2000, 2001, 2002, 2004, 2005 Free Software Foundation, Inc.
5 This file is part of GNU Binutils.
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA. */
22 #include "libiberty.h"
23 #include "gprof.h"
24 #include "search_list.h"
25 #include "source.h"
26 #include "symtab.h"
27 #include "corefile.h"
28 #include "gmon_io.h"
29 #include "gmon_out.h"
30 #include "hist.h"
31 #include "sym_ids.h"
32 #include "utils.h"
34 #define UNITS_TO_CODE (offset_to_code / sizeof(UNIT))
36 static void scale_and_align_entries (void);
37 static void print_header (int);
38 static void print_line (Sym *, double);
39 static int cmp_time (const PTR, const PTR);
41 /* Declarations of automatically generated functions to output blurbs. */
42 extern void flat_blurb (FILE * fp);
44 bfd_vma s_lowpc; /* Lowest address in .text. */
45 bfd_vma s_highpc = 0; /* Highest address in .text. */
46 bfd_vma lowpc, highpc; /* Same, but expressed in UNITs. */
47 unsigned int hist_num_bins = 0; /* Number of histogram samples. */
48 int *hist_sample = 0; /* Histogram samples (shorts in the file!). */
49 double hist_scale;
50 char hist_dimension[16] = "seconds";
51 char hist_dimension_abbrev = 's';
53 static double accum_time; /* Accumulated time so far for print_line(). */
54 static double total_time; /* Total time for all routines. */
56 /* Table of SI prefixes for powers of 10 (used to automatically
57 scale some of the values in the flat profile). */
58 const struct
60 char prefix;
61 double scale;
63 SItab[] =
65 { 'T', 1e-12 }, /* tera */
66 { 'G', 1e-09 }, /* giga */
67 { 'M', 1e-06 }, /* mega */
68 { 'K', 1e-03 }, /* kilo */
69 { ' ', 1e-00 },
70 { 'm', 1e+03 }, /* milli */
71 { 'u', 1e+06 }, /* micro */
72 { 'n', 1e+09 }, /* nano */
73 { 'p', 1e+12 }, /* pico */
74 { 'f', 1e+15 }, /* femto */
75 { 'a', 1e+18 } /* ato */
79 /* Read the histogram from file IFP. FILENAME is the name of IFP and
80 is provided for formatting error messages only. */
82 void
83 hist_read_rec (FILE * ifp, const char *filename)
85 bfd_vma n_lowpc, n_highpc;
86 unsigned int i, ncnt, profrate;
87 UNIT count;
89 if (gmon_io_read_vma (ifp, &n_lowpc)
90 || gmon_io_read_vma (ifp, &n_highpc)
91 || gmon_io_read_32 (ifp, &ncnt)
92 || gmon_io_read_32 (ifp, &profrate)
93 || gmon_io_read (ifp, hist_dimension, 15)
94 || gmon_io_read (ifp, &hist_dimension_abbrev, 1))
96 fprintf (stderr, _("%s: %s: unexpected end of file\n"),
97 whoami, filename);
99 done (1);
102 if (!s_highpc)
104 /* This is the first histogram record. */
105 s_lowpc = n_lowpc;
106 s_highpc = n_highpc;
107 lowpc = (bfd_vma) n_lowpc / sizeof (UNIT);
108 highpc = (bfd_vma) n_highpc / sizeof (UNIT);
109 hist_num_bins = ncnt;
110 hz = profrate;
113 DBG (SAMPLEDEBUG,
114 printf ("[hist_read_rec] n_lowpc 0x%lx n_highpc 0x%lx ncnt %u\n",
115 (unsigned long) n_lowpc, (unsigned long) n_highpc, ncnt);
116 printf ("[hist_read_rec] s_lowpc 0x%lx s_highpc 0x%lx nsamples %u\n",
117 (unsigned long) s_lowpc, (unsigned long) s_highpc,
118 hist_num_bins);
119 printf ("[hist_read_rec] lowpc 0x%lx highpc 0x%lx\n",
120 (unsigned long) lowpc, (unsigned long) highpc));
122 if (n_lowpc != s_lowpc || n_highpc != s_highpc
123 || ncnt != hist_num_bins || hz != (int) profrate)
125 fprintf (stderr, _("%s: `%s' is incompatible with first gmon file\n"),
126 whoami, filename);
127 done (1);
130 if (!hist_sample)
132 hist_sample = (int *) xmalloc (hist_num_bins * sizeof (hist_sample[0]));
133 memset (hist_sample, 0, hist_num_bins * sizeof (hist_sample[0]));
136 for (i = 0; i < hist_num_bins; ++i)
138 if (fread (&count[0], sizeof (count), 1, ifp) != 1)
140 fprintf (stderr,
141 _("%s: %s: unexpected EOF after reading %u of %u samples\n"),
142 whoami, filename, i, hist_num_bins);
143 done (1);
145 hist_sample[i] += bfd_get_16 (core_bfd, (bfd_byte *) & count[0]);
146 DBG (SAMPLEDEBUG,
147 printf ("[hist_read_rec] 0x%lx: %u\n",
148 (unsigned long) (n_lowpc + i * (n_highpc - n_lowpc) / ncnt),
149 hist_sample[i]));
154 /* Write execution histogram to file OFP. FILENAME is the name
155 of OFP and is provided for formatting error-messages only. */
157 void
158 hist_write_hist (FILE * ofp, const char *filename)
160 UNIT count;
161 unsigned int i;
163 /* Write header. */
165 if (gmon_io_write_8 (ofp, GMON_TAG_TIME_HIST)
166 || gmon_io_write_vma (ofp, s_lowpc)
167 || gmon_io_write_vma (ofp, s_highpc)
168 || gmon_io_write_32 (ofp, hist_num_bins)
169 || gmon_io_write_32 (ofp, hz)
170 || gmon_io_write (ofp, hist_dimension, 15)
171 || gmon_io_write (ofp, &hist_dimension_abbrev, 1))
173 perror (filename);
174 done (1);
177 for (i = 0; i < hist_num_bins; ++i)
179 bfd_put_16 (core_bfd, (bfd_vma) hist_sample[i], (bfd_byte *) &count[0]);
181 if (fwrite (&count[0], sizeof (count), 1, ofp) != 1)
183 perror (filename);
184 done (1);
190 /* Calculate scaled entry point addresses (to save time in
191 hist_assign_samples), and, on architectures that have procedure
192 entry masks at the start of a function, possibly push the scaled
193 entry points over the procedure entry mask, if it turns out that
194 the entry point is in one bin and the code for a routine is in the
195 next bin. */
197 static void
198 scale_and_align_entries ()
200 Sym *sym;
201 bfd_vma bin_of_entry;
202 bfd_vma bin_of_code;
204 for (sym = symtab.base; sym < symtab.limit; sym++)
206 sym->hist.scaled_addr = sym->addr / sizeof (UNIT);
207 bin_of_entry = (sym->hist.scaled_addr - lowpc) / hist_scale;
208 bin_of_code = ((sym->hist.scaled_addr + UNITS_TO_CODE - lowpc)
209 / hist_scale);
210 if (bin_of_entry < bin_of_code)
212 DBG (SAMPLEDEBUG,
213 printf ("[scale_and_align_entries] pushing 0x%lx to 0x%lx\n",
214 (unsigned long) sym->hist.scaled_addr,
215 (unsigned long) (sym->hist.scaled_addr
216 + UNITS_TO_CODE)));
217 sym->hist.scaled_addr += UNITS_TO_CODE;
223 /* Assign samples to the symbol to which they belong.
225 Histogram bin I covers some address range [BIN_LOWPC,BIN_HIGH_PC)
226 which may overlap one more symbol address ranges. If a symbol
227 overlaps with the bin's address range by O percent, then O percent
228 of the bin's count is credited to that symbol.
230 There are three cases as to where BIN_LOW_PC and BIN_HIGH_PC can be
231 with respect to the symbol's address range [SYM_LOW_PC,
232 SYM_HIGH_PC) as shown in the following diagram. OVERLAP computes
233 the distance (in UNITs) between the arrows, the fraction of the
234 sample that is to be credited to the symbol which starts at
235 SYM_LOW_PC.
237 sym_low_pc sym_high_pc
241 +-----------------------------------------------+
243 | ->| |<- ->| |<- ->| |<- |
244 | | | | | |
245 +---------+ +---------+ +---------+
247 ^ ^ ^ ^ ^ ^
248 | | | | | |
249 bin_low_pc bin_high_pc bin_low_pc bin_high_pc bin_low_pc bin_high_pc
251 For the VAX we assert that samples will never fall in the first two
252 bytes of any routine, since that is the entry mask, thus we call
253 scale_and_align_entries() to adjust the entry points if the entry
254 mask falls in one bin but the code for the routine doesn't start
255 until the next bin. In conjunction with the alignment of routine
256 addresses, this should allow us to have only one sample for every
257 four bytes of text space and never have any overlap (the two end
258 cases, above). */
260 void
261 hist_assign_samples ()
263 bfd_vma bin_low_pc, bin_high_pc;
264 bfd_vma sym_low_pc, sym_high_pc;
265 bfd_vma overlap, addr;
266 unsigned int bin_count;
267 unsigned int i, j;
268 double time, credit;
270 /* Read samples and assign to symbols. */
271 hist_scale = highpc - lowpc;
272 hist_scale /= hist_num_bins;
273 scale_and_align_entries ();
275 /* Iterate over all sample bins. */
276 for (i = 0, j = 1; i < hist_num_bins; ++i)
278 bin_count = hist_sample[i];
279 if (! bin_count)
280 continue;
282 bin_low_pc = lowpc + (bfd_vma) (hist_scale * i);
283 bin_high_pc = lowpc + (bfd_vma) (hist_scale * (i + 1));
284 time = bin_count;
286 DBG (SAMPLEDEBUG,
287 printf (
288 "[assign_samples] bin_low_pc=0x%lx, bin_high_pc=0x%lx, bin_count=%u\n",
289 (unsigned long) (sizeof (UNIT) * bin_low_pc),
290 (unsigned long) (sizeof (UNIT) * bin_high_pc),
291 bin_count));
292 total_time += time;
294 /* Credit all symbols that are covered by bin I. */
295 for (j = j - 1; j < symtab.len; ++j)
297 sym_low_pc = symtab.base[j].hist.scaled_addr;
298 sym_high_pc = symtab.base[j + 1].hist.scaled_addr;
300 /* If high end of bin is below entry address,
301 go for next bin. */
302 if (bin_high_pc < sym_low_pc)
303 break;
305 /* If low end of bin is above high end of symbol,
306 go for next symbol. */
307 if (bin_low_pc >= sym_high_pc)
308 continue;
310 overlap =
311 MIN (bin_high_pc, sym_high_pc) - MAX (bin_low_pc, sym_low_pc);
312 if (overlap > 0)
314 DBG (SAMPLEDEBUG,
315 printf (
316 "[assign_samples] [0x%lx,0x%lx) %s gets %f ticks %ld overlap\n",
317 (unsigned long) symtab.base[j].addr,
318 (unsigned long) (sizeof (UNIT) * sym_high_pc),
319 symtab.base[j].name, overlap * time / hist_scale,
320 (long) overlap));
322 addr = symtab.base[j].addr;
323 credit = overlap * time / hist_scale;
325 /* Credit symbol if it appears in INCL_FLAT or that
326 table is empty and it does not appear it in
327 EXCL_FLAT. */
328 if (sym_lookup (&syms[INCL_FLAT], addr)
329 || (syms[INCL_FLAT].len == 0
330 && !sym_lookup (&syms[EXCL_FLAT], addr)))
332 symtab.base[j].hist.time += credit;
334 else
336 total_time -= credit;
342 DBG (SAMPLEDEBUG, printf ("[assign_samples] total_time %f\n",
343 total_time));
347 /* Print header for flag histogram profile. */
349 static void
350 print_header (int prefix)
352 char unit[64];
354 sprintf (unit, _("%c%c/call"), prefix, hist_dimension_abbrev);
356 if (bsd_style_output)
358 printf (_("\ngranularity: each sample hit covers %ld byte(s)"),
359 (long) hist_scale * sizeof (UNIT));
360 if (total_time > 0.0)
362 printf (_(" for %.2f%% of %.2f %s\n\n"),
363 100.0 / total_time, total_time / hz, hist_dimension);
366 else
368 printf (_("\nEach sample counts as %g %s.\n"), 1.0 / hz, hist_dimension);
371 if (total_time <= 0.0)
373 printf (_(" no time accumulated\n\n"));
375 /* This doesn't hurt since all the numerators will be zero. */
376 total_time = 1.0;
379 printf ("%5.5s %10.10s %8.8s %8.8s %8.8s %8.8s %-8.8s\n",
380 "% ", _("cumulative"), _("self "), "", _("self "), _("total "),
381 "");
382 printf ("%5.5s %9.9s %8.8s %8.8s %8.8s %8.8s %-8.8s\n",
383 _("time"), hist_dimension, hist_dimension, _("calls"), unit, unit,
384 _("name"));
388 static void
389 print_line (Sym *sym, double scale)
391 if (ignore_zeros && sym->ncalls == 0 && sym->hist.time == 0)
392 return;
394 accum_time += sym->hist.time;
396 if (bsd_style_output)
397 printf ("%5.1f %10.2f %8.2f",
398 total_time > 0.0 ? 100 * sym->hist.time / total_time : 0.0,
399 accum_time / hz, sym->hist.time / hz);
400 else
401 printf ("%6.2f %9.2f %8.2f",
402 total_time > 0.0 ? 100 * sym->hist.time / total_time : 0.0,
403 accum_time / hz, sym->hist.time / hz);
405 if (sym->ncalls != 0)
406 printf (" %8lu %8.2f %8.2f ",
407 sym->ncalls, scale * sym->hist.time / hz / sym->ncalls,
408 scale * (sym->hist.time + sym->cg.child_time) / hz / sym->ncalls);
409 else
410 printf (" %8.8s %8.8s %8.8s ", "", "", "");
412 if (bsd_style_output)
413 print_name (sym);
414 else
415 print_name_only (sym);
417 printf ("\n");
421 /* Compare LP and RP. The primary comparison key is execution time,
422 the secondary is number of invocation, and the tertiary is the
423 lexicographic order of the function names. */
425 static int
426 cmp_time (const PTR lp, const PTR rp)
428 const Sym *left = *(const Sym **) lp;
429 const Sym *right = *(const Sym **) rp;
430 double time_diff;
432 time_diff = right->hist.time - left->hist.time;
434 if (time_diff > 0.0)
435 return 1;
437 if (time_diff < 0.0)
438 return -1;
440 if (right->ncalls > left->ncalls)
441 return 1;
443 if (right->ncalls < left->ncalls)
444 return -1;
446 return strcmp (left->name, right->name);
450 /* Print the flat histogram profile. */
452 void
453 hist_print ()
455 Sym **time_sorted_syms, *top_dog, *sym;
456 unsigned int index;
457 unsigned log_scale;
458 double top_time, time;
459 bfd_vma addr;
461 if (first_output)
462 first_output = FALSE;
463 else
464 printf ("\f\n");
466 accum_time = 0.0;
468 if (bsd_style_output)
470 if (print_descriptions)
472 printf (_("\n\n\nflat profile:\n"));
473 flat_blurb (stdout);
476 else
478 printf (_("Flat profile:\n"));
481 /* Sort the symbol table by time (call-count and name as secondary
482 and tertiary keys). */
483 time_sorted_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
485 for (index = 0; index < symtab.len; ++index)
486 time_sorted_syms[index] = &symtab.base[index];
488 qsort (time_sorted_syms, symtab.len, sizeof (Sym *), cmp_time);
490 if (bsd_style_output)
492 log_scale = 5; /* Milli-seconds is BSD-default. */
494 else
496 /* Search for symbol with highest per-call
497 execution time and scale accordingly. */
498 log_scale = 0;
499 top_dog = 0;
500 top_time = 0.0;
502 for (index = 0; index < symtab.len; ++index)
504 sym = time_sorted_syms[index];
506 if (sym->ncalls != 0)
508 time = (sym->hist.time + sym->cg.child_time) / sym->ncalls;
510 if (time > top_time)
512 top_dog = sym;
513 top_time = time;
518 if (top_dog && top_dog->ncalls != 0 && top_time > 0.0)
520 top_time /= hz;
522 for (log_scale = 0; log_scale < ARRAY_SIZE (SItab); log_scale ++)
524 double scaled_value = SItab[log_scale].scale * top_time;
526 if (scaled_value >= 1.0 && scaled_value < 1000.0)
527 break;
532 /* For now, the dimension is always seconds. In the future, we
533 may also want to support other (pseudo-)dimensions (such as
534 I-cache misses etc.). */
535 print_header (SItab[log_scale].prefix);
537 for (index = 0; index < symtab.len; ++index)
539 addr = time_sorted_syms[index]->addr;
541 /* Print symbol if its in INCL_FLAT table or that table
542 is empty and the symbol is not in EXCL_FLAT. */
543 if (sym_lookup (&syms[INCL_FLAT], addr)
544 || (syms[INCL_FLAT].len == 0
545 && !sym_lookup (&syms[EXCL_FLAT], addr)))
546 print_line (time_sorted_syms[index], SItab[log_scale].scale);
549 free (time_sorted_syms);
551 if (print_descriptions && !bsd_style_output)
552 flat_blurb (stdout);