2010-01-10 Doug Kwan <dougkwan@google.com>
[binutils.git] / gprof / hist.c
blob25364b215e823fe633a7823f0d7a895db547a3ee
1 /* hist.c - Histogram related operations.
3 Copyright 1999, 2000, 2001, 2002, 2004, 2005, 2007, 2009
4 Free Software Foundation, Inc.
6 This file is part of GNU Binutils.
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
21 02110-1301, USA. */
23 #include "gprof.h"
24 #include "libiberty.h"
25 #include "search_list.h"
26 #include "source.h"
27 #include "symtab.h"
28 #include "corefile.h"
29 #include "gmon_io.h"
30 #include "gmon_out.h"
31 #include "hist.h"
32 #include "sym_ids.h"
33 #include "utils.h"
34 #include "math.h"
35 #include "stdio.h"
36 #include "stdlib.h"
38 #define UNITS_TO_CODE (offset_to_code / sizeof(UNIT))
40 static void scale_and_align_entries (void);
41 static void print_header (int);
42 static void print_line (Sym *, double);
43 static int cmp_time (const PTR, const PTR);
45 /* Declarations of automatically generated functions to output blurbs. */
46 extern void flat_blurb (FILE * fp);
48 static histogram *find_histogram (bfd_vma lowpc, bfd_vma highpc);
49 static histogram *find_histogram_for_pc (bfd_vma pc);
51 histogram * histograms;
52 unsigned num_histograms;
53 double hist_scale;
54 static char hist_dimension[16] = "seconds";
55 static char hist_dimension_abbrev = 's';
57 static double accum_time; /* Accumulated time so far for print_line(). */
58 static double total_time; /* Total time for all routines. */
60 /* Table of SI prefixes for powers of 10 (used to automatically
61 scale some of the values in the flat profile). */
62 const struct
64 char prefix;
65 double scale;
67 SItab[] =
69 { 'T', 1e-12 }, /* tera */
70 { 'G', 1e-09 }, /* giga */
71 { 'M', 1e-06 }, /* mega */
72 { 'K', 1e-03 }, /* kilo */
73 { ' ', 1e-00 },
74 { 'm', 1e+03 }, /* milli */
75 { 'u', 1e+06 }, /* micro */
76 { 'n', 1e+09 }, /* nano */
77 { 'p', 1e+12 }, /* pico */
78 { 'f', 1e+15 }, /* femto */
79 { 'a', 1e+18 } /* ato */
82 /* Reads just the header part of histogram record into
83 *RECORD from IFP. FILENAME is the name of IFP and
84 is provided for formatting error messages only.
86 If FIRST is non-zero, sets global variables HZ, HIST_DIMENSION,
87 HIST_DIMENSION_ABBREV, HIST_SCALE. If FIRST is zero, checks
88 that the new histogram is compatible with already-set values
89 of those variables and emits an error if that's not so. */
90 static void
91 read_histogram_header (histogram *record,
92 FILE *ifp, const char *filename,
93 int first)
95 unsigned int profrate;
96 char n_hist_dimension[15];
97 char n_hist_dimension_abbrev;
98 double n_hist_scale;
100 if (gmon_io_read_vma (ifp, &record->lowpc)
101 || gmon_io_read_vma (ifp, &record->highpc)
102 || gmon_io_read_32 (ifp, &record->num_bins)
103 || gmon_io_read_32 (ifp, &profrate)
104 || gmon_io_read (ifp, n_hist_dimension, 15)
105 || gmon_io_read (ifp, &n_hist_dimension_abbrev, 1))
107 fprintf (stderr, _("%s: %s: unexpected end of file\n"),
108 whoami, filename);
110 done (1);
113 n_hist_scale = (double)((record->highpc - record->lowpc) / sizeof (UNIT))
114 / record->num_bins;
116 if (first)
118 /* We don't try to veryfy profrate is the same for all histogram
119 records. If we have two histogram records for the same
120 address range and profiling samples is done as often
121 as possible as opposed on timer, then the actual profrate will
122 be slightly different. Most of the time the difference does not
123 matter and insisting that profiling rate is exactly the same
124 will only create inconvenient. */
125 hz = profrate;
126 memcpy (hist_dimension, n_hist_dimension, 15);
127 hist_dimension_abbrev = n_hist_dimension_abbrev;
128 hist_scale = n_hist_scale;
130 else
132 if (strncmp (n_hist_dimension, hist_dimension, 15) != 0)
134 fprintf (stderr,
135 _("%s: dimension unit changed between histogram records\n"
136 "%s: from '%s'\n"
137 "%s: to '%s'\n"),
138 whoami, whoami, hist_dimension, whoami, n_hist_dimension);
139 done (1);
142 if (n_hist_dimension_abbrev != hist_dimension_abbrev)
144 fprintf (stderr,
145 _("%s: dimension abbreviation changed between histogram records\n"
146 "%s: from '%c'\n"
147 "%s: to '%c'\n"),
148 whoami, whoami, hist_dimension_abbrev, whoami, n_hist_dimension_abbrev);
149 done (1);
152 /* The only reason we require the same scale for histograms is that
153 there's code (notably printing code), that prints units,
154 and it would be very confusing to have one unit mean different
155 things for different functions. */
156 if (fabs (hist_scale - n_hist_scale) > 0.000001)
158 fprintf (stderr,
159 _("%s: different scales in histogram records"),
160 whoami);
161 done (1);
166 /* Read the histogram from file IFP. FILENAME is the name of IFP and
167 is provided for formatting error messages only. */
169 void
170 hist_read_rec (FILE * ifp, const char *filename)
172 bfd_vma lowpc, highpc;
173 histogram n_record;
174 histogram *record, *existing_record;
175 unsigned i;
177 /* 1. Read the header and see if there's existing record for the
178 same address range and that there are no overlapping records. */
179 read_histogram_header (&n_record, ifp, filename, num_histograms == 0);
181 existing_record = find_histogram (n_record.lowpc, n_record.highpc);
182 if (existing_record)
184 record = existing_record;
186 else
188 /* If this record overlaps, but does not completely match an existing
189 record, it's an error. */
190 lowpc = n_record.lowpc;
191 highpc = n_record.highpc;
192 hist_clip_symbol_address (&lowpc, &highpc);
193 if (lowpc != highpc)
195 fprintf (stderr,
196 _("%s: overlapping histogram records\n"),
197 whoami);
198 done (1);
201 /* This is new record. Add it to global array and allocate space for
202 the samples. */
203 histograms = (struct histogram *)
204 xrealloc (histograms, sizeof (histogram) * (num_histograms + 1));
205 memcpy (histograms + num_histograms,
206 &n_record, sizeof (histogram));
207 record = &histograms[num_histograms];
208 ++num_histograms;
210 record->sample = (int *) xmalloc (record->num_bins
211 * sizeof (record->sample[0]));
212 memset (record->sample, 0, record->num_bins * sizeof (record->sample[0]));
215 /* 2. We have either a new record (with zeroed histogram data), or an existing
216 record with some data in the histogram already. Read new data into the
217 record, adding hit counts. */
219 DBG (SAMPLEDEBUG,
220 printf ("[hist_read_rec] n_lowpc 0x%lx n_highpc 0x%lx ncnt %u\n",
221 (unsigned long) record->lowpc, (unsigned long) record->highpc,
222 record->num_bins));
224 for (i = 0; i < record->num_bins; ++i)
226 UNIT count;
227 if (fread (&count[0], sizeof (count), 1, ifp) != 1)
229 fprintf (stderr,
230 _("%s: %s: unexpected EOF after reading %u of %u samples\n"),
231 whoami, filename, i, record->num_bins);
232 done (1);
234 record->sample[i] += bfd_get_16 (core_bfd, (bfd_byte *) & count[0]);
235 DBG (SAMPLEDEBUG,
236 printf ("[hist_read_rec] 0x%lx: %u\n",
237 (unsigned long) (record->lowpc
238 + i * (record->highpc - record->lowpc)
239 / record->num_bins),
240 record->sample[i]));
245 /* Write all execution histograms file OFP. FILENAME is the name
246 of OFP and is provided for formatting error-messages only. */
248 void
249 hist_write_hist (FILE * ofp, const char *filename)
251 UNIT count;
252 unsigned int i, r;
254 for (r = 0; r < num_histograms; ++r)
256 histogram *record = &histograms[r];
258 /* Write header. */
260 if (gmon_io_write_8 (ofp, GMON_TAG_TIME_HIST)
261 || gmon_io_write_vma (ofp, record->lowpc)
262 || gmon_io_write_vma (ofp, record->highpc)
263 || gmon_io_write_32 (ofp, record->num_bins)
264 || gmon_io_write_32 (ofp, hz)
265 || gmon_io_write (ofp, hist_dimension, 15)
266 || gmon_io_write (ofp, &hist_dimension_abbrev, 1))
268 perror (filename);
269 done (1);
272 for (i = 0; i < record->num_bins; ++i)
274 bfd_put_16 (core_bfd, (bfd_vma) record->sample[i], (bfd_byte *) &count[0]);
276 if (fwrite (&count[0], sizeof (count), 1, ofp) != 1)
278 perror (filename);
279 done (1);
285 /* Calculate scaled entry point addresses (to save time in
286 hist_assign_samples), and, on architectures that have procedure
287 entry masks at the start of a function, possibly push the scaled
288 entry points over the procedure entry mask, if it turns out that
289 the entry point is in one bin and the code for a routine is in the
290 next bin. */
292 static void
293 scale_and_align_entries ()
295 Sym *sym;
296 bfd_vma bin_of_entry;
297 bfd_vma bin_of_code;
299 for (sym = symtab.base; sym < symtab.limit; sym++)
301 histogram *r = find_histogram_for_pc (sym->addr);
303 sym->hist.scaled_addr = sym->addr / sizeof (UNIT);
305 if (r)
307 bin_of_entry = (sym->hist.scaled_addr - r->lowpc) / hist_scale;
308 bin_of_code = ((sym->hist.scaled_addr + UNITS_TO_CODE - r->lowpc)
309 / hist_scale);
310 if (bin_of_entry < bin_of_code)
312 DBG (SAMPLEDEBUG,
313 printf ("[scale_and_align_entries] pushing 0x%lx to 0x%lx\n",
314 (unsigned long) sym->hist.scaled_addr,
315 (unsigned long) (sym->hist.scaled_addr
316 + UNITS_TO_CODE)));
317 sym->hist.scaled_addr += UNITS_TO_CODE;
324 /* Assign samples to the symbol to which they belong.
326 Histogram bin I covers some address range [BIN_LOWPC,BIN_HIGH_PC)
327 which may overlap one more symbol address ranges. If a symbol
328 overlaps with the bin's address range by O percent, then O percent
329 of the bin's count is credited to that symbol.
331 There are three cases as to where BIN_LOW_PC and BIN_HIGH_PC can be
332 with respect to the symbol's address range [SYM_LOW_PC,
333 SYM_HIGH_PC) as shown in the following diagram. OVERLAP computes
334 the distance (in UNITs) between the arrows, the fraction of the
335 sample that is to be credited to the symbol which starts at
336 SYM_LOW_PC.
338 sym_low_pc sym_high_pc
342 +-----------------------------------------------+
344 | ->| |<- ->| |<- ->| |<- |
345 | | | | | |
346 +---------+ +---------+ +---------+
348 ^ ^ ^ ^ ^ ^
349 | | | | | |
350 bin_low_pc bin_high_pc bin_low_pc bin_high_pc bin_low_pc bin_high_pc
352 For the VAX we assert that samples will never fall in the first two
353 bytes of any routine, since that is the entry mask, thus we call
354 scale_and_align_entries() to adjust the entry points if the entry
355 mask falls in one bin but the code for the routine doesn't start
356 until the next bin. In conjunction with the alignment of routine
357 addresses, this should allow us to have only one sample for every
358 four bytes of text space and never have any overlap (the two end
359 cases, above). */
361 static void
362 hist_assign_samples_1 (histogram *r)
364 bfd_vma bin_low_pc, bin_high_pc;
365 bfd_vma sym_low_pc, sym_high_pc;
366 bfd_vma overlap, addr;
367 unsigned int bin_count;
368 unsigned int i, j;
369 double count_time, credit;
371 bfd_vma lowpc = r->lowpc / sizeof (UNIT);
373 /* Iterate over all sample bins. */
374 for (i = 0, j = 1; i < r->num_bins; ++i)
376 bin_count = r->sample[i];
377 if (! bin_count)
378 continue;
380 bin_low_pc = lowpc + (bfd_vma) (hist_scale * i);
381 bin_high_pc = lowpc + (bfd_vma) (hist_scale * (i + 1));
382 count_time = bin_count;
384 DBG (SAMPLEDEBUG,
385 printf (
386 "[assign_samples] bin_low_pc=0x%lx, bin_high_pc=0x%lx, bin_count=%u\n",
387 (unsigned long) (sizeof (UNIT) * bin_low_pc),
388 (unsigned long) (sizeof (UNIT) * bin_high_pc),
389 bin_count));
390 total_time += count_time;
392 /* Credit all symbols that are covered by bin I. */
393 for (j = j - 1; j < symtab.len; ++j)
395 sym_low_pc = symtab.base[j].hist.scaled_addr;
396 sym_high_pc = symtab.base[j + 1].hist.scaled_addr;
398 /* If high end of bin is below entry address,
399 go for next bin. */
400 if (bin_high_pc < sym_low_pc)
401 break;
403 /* If low end of bin is above high end of symbol,
404 go for next symbol. */
405 if (bin_low_pc >= sym_high_pc)
406 continue;
408 overlap =
409 MIN (bin_high_pc, sym_high_pc) - MAX (bin_low_pc, sym_low_pc);
410 if (overlap > 0)
412 DBG (SAMPLEDEBUG,
413 printf (
414 "[assign_samples] [0x%lx,0x%lx) %s gets %f ticks %ld overlap\n",
415 (unsigned long) symtab.base[j].addr,
416 (unsigned long) (sizeof (UNIT) * sym_high_pc),
417 symtab.base[j].name, overlap * count_time / hist_scale,
418 (long) overlap));
420 addr = symtab.base[j].addr;
421 credit = overlap * count_time / hist_scale;
423 /* Credit symbol if it appears in INCL_FLAT or that
424 table is empty and it does not appear it in
425 EXCL_FLAT. */
426 if (sym_lookup (&syms[INCL_FLAT], addr)
427 || (syms[INCL_FLAT].len == 0
428 && !sym_lookup (&syms[EXCL_FLAT], addr)))
430 symtab.base[j].hist.time += credit;
432 else
434 total_time -= credit;
440 DBG (SAMPLEDEBUG, printf ("[assign_samples] total_time %f\n",
441 total_time));
444 /* Calls 'hist_assign_sampes_1' for all histogram records read so far. */
445 void
446 hist_assign_samples ()
448 unsigned i;
450 scale_and_align_entries ();
452 for (i = 0; i < num_histograms; ++i)
453 hist_assign_samples_1 (&histograms[i]);
457 /* Print header for flag histogram profile. */
459 static void
460 print_header (int prefix)
462 char unit[64];
464 sprintf (unit, _("%c%c/call"), prefix, hist_dimension_abbrev);
466 if (bsd_style_output)
468 printf (_("\ngranularity: each sample hit covers %ld byte(s)"),
469 (long) hist_scale * (long) sizeof (UNIT));
470 if (total_time > 0.0)
472 printf (_(" for %.2f%% of %.2f %s\n\n"),
473 100.0 / total_time, total_time / hz, hist_dimension);
476 else
478 printf (_("\nEach sample counts as %g %s.\n"), 1.0 / hz, hist_dimension);
481 if (total_time <= 0.0)
483 printf (_(" no time accumulated\n\n"));
485 /* This doesn't hurt since all the numerators will be zero. */
486 total_time = 1.0;
489 printf ("%5.5s %10.10s %8.8s %8.8s %8.8s %8.8s %-8.8s\n",
490 "% ", _("cumulative"), _("self "), "", _("self "), _("total "),
491 "");
492 printf ("%5.5s %9.9s %8.8s %8.8s %8.8s %8.8s %-8.8s\n",
493 _("time"), hist_dimension, hist_dimension, _("calls"), unit, unit,
494 _("name"));
498 static void
499 print_line (Sym *sym, double scale)
501 if (ignore_zeros && sym->ncalls == 0 && sym->hist.time == 0)
502 return;
504 accum_time += sym->hist.time;
506 if (bsd_style_output)
507 printf ("%5.1f %10.2f %8.2f",
508 total_time > 0.0 ? 100 * sym->hist.time / total_time : 0.0,
509 accum_time / hz, sym->hist.time / hz);
510 else
511 printf ("%6.2f %9.2f %8.2f",
512 total_time > 0.0 ? 100 * sym->hist.time / total_time : 0.0,
513 accum_time / hz, sym->hist.time / hz);
515 if (sym->ncalls != 0)
516 printf (" %8lu %8.2f %8.2f ",
517 sym->ncalls, scale * sym->hist.time / hz / sym->ncalls,
518 scale * (sym->hist.time + sym->cg.child_time) / hz / sym->ncalls);
519 else
520 printf (" %8.8s %8.8s %8.8s ", "", "", "");
522 if (bsd_style_output)
523 print_name (sym);
524 else
525 print_name_only (sym);
527 printf ("\n");
531 /* Compare LP and RP. The primary comparison key is execution time,
532 the secondary is number of invocation, and the tertiary is the
533 lexicographic order of the function names. */
535 static int
536 cmp_time (const PTR lp, const PTR rp)
538 const Sym *left = *(const Sym **) lp;
539 const Sym *right = *(const Sym **) rp;
540 double time_diff;
542 time_diff = right->hist.time - left->hist.time;
544 if (time_diff > 0.0)
545 return 1;
547 if (time_diff < 0.0)
548 return -1;
550 if (right->ncalls > left->ncalls)
551 return 1;
553 if (right->ncalls < left->ncalls)
554 return -1;
556 return strcmp (left->name, right->name);
560 /* Print the flat histogram profile. */
562 void
563 hist_print ()
565 Sym **time_sorted_syms, *top_dog, *sym;
566 unsigned int sym_index;
567 unsigned log_scale;
568 double top_time;
569 bfd_vma addr;
571 if (first_output)
572 first_output = FALSE;
573 else
574 printf ("\f\n");
576 accum_time = 0.0;
578 if (bsd_style_output)
580 if (print_descriptions)
582 printf (_("\n\n\nflat profile:\n"));
583 flat_blurb (stdout);
586 else
588 printf (_("Flat profile:\n"));
591 /* Sort the symbol table by time (call-count and name as secondary
592 and tertiary keys). */
593 time_sorted_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
595 for (sym_index = 0; sym_index < symtab.len; ++sym_index)
596 time_sorted_syms[sym_index] = &symtab.base[sym_index];
598 qsort (time_sorted_syms, symtab.len, sizeof (Sym *), cmp_time);
600 if (bsd_style_output)
602 log_scale = 5; /* Milli-seconds is BSD-default. */
604 else
606 /* Search for symbol with highest per-call
607 execution time and scale accordingly. */
608 log_scale = 0;
609 top_dog = 0;
610 top_time = 0.0;
612 for (sym_index = 0; sym_index < symtab.len; ++sym_index)
614 sym = time_sorted_syms[sym_index];
616 if (sym->ncalls != 0)
618 double call_time;
620 call_time = (sym->hist.time + sym->cg.child_time) / sym->ncalls;
622 if (call_time > top_time)
624 top_dog = sym;
625 top_time = call_time;
630 if (top_dog && top_dog->ncalls != 0 && top_time > 0.0)
632 top_time /= hz;
634 for (log_scale = 0; log_scale < ARRAY_SIZE (SItab); log_scale ++)
636 double scaled_value = SItab[log_scale].scale * top_time;
638 if (scaled_value >= 1.0 && scaled_value < 1000.0)
639 break;
644 /* For now, the dimension is always seconds. In the future, we
645 may also want to support other (pseudo-)dimensions (such as
646 I-cache misses etc.). */
647 print_header (SItab[log_scale].prefix);
649 for (sym_index = 0; sym_index < symtab.len; ++sym_index)
651 addr = time_sorted_syms[sym_index]->addr;
653 /* Print symbol if its in INCL_FLAT table or that table
654 is empty and the symbol is not in EXCL_FLAT. */
655 if (sym_lookup (&syms[INCL_FLAT], addr)
656 || (syms[INCL_FLAT].len == 0
657 && !sym_lookup (&syms[EXCL_FLAT], addr)))
658 print_line (time_sorted_syms[sym_index], SItab[log_scale].scale);
661 free (time_sorted_syms);
663 if (print_descriptions && !bsd_style_output)
664 flat_blurb (stdout);
668 hist_check_address (unsigned address)
670 unsigned i;
672 for (i = 0; i < num_histograms; ++i)
673 if (histograms[i].lowpc <= address && address < histograms[i].highpc)
674 return 1;
676 return 0;
679 #if ! defined(min)
680 #define min(a,b) (((a)<(b)) ? (a) : (b))
681 #endif
682 #if ! defined(max)
683 #define max(a,b) (((a)>(b)) ? (a) : (b))
684 #endif
686 void
687 hist_clip_symbol_address (bfd_vma *p_lowpc, bfd_vma *p_highpc)
689 unsigned i;
690 int found = 0;
692 if (num_histograms == 0)
694 *p_highpc = *p_lowpc;
695 return;
698 for (i = 0; i < num_histograms; ++i)
700 bfd_vma common_low, common_high;
701 common_low = max (histograms[i].lowpc, *p_lowpc);
702 common_high = min (histograms[i].highpc, *p_highpc);
704 if (common_low < common_high)
706 if (found)
708 fprintf (stderr,
709 _("%s: found a symbol that covers "
710 "several histogram records"),
711 whoami);
712 done (1);
715 found = 1;
716 *p_lowpc = common_low;
717 *p_highpc = common_high;
721 if (!found)
722 *p_highpc = *p_lowpc;
725 /* Find and return exising histogram record having the same lowpc and
726 highpc as passed via the parameters. Return NULL if nothing is found.
727 The return value is valid until any new histogram is read. */
728 static histogram *
729 find_histogram (bfd_vma lowpc, bfd_vma highpc)
731 unsigned i;
732 for (i = 0; i < num_histograms; ++i)
734 if (histograms[i].lowpc == lowpc && histograms[i].highpc == highpc)
735 return &histograms[i];
737 return 0;
740 /* Given a PC, return histogram record which address range include this PC.
741 Return NULL if there's no such record. */
742 static histogram *
743 find_histogram_for_pc (bfd_vma pc)
745 unsigned i;
746 for (i = 0; i < num_histograms; ++i)
748 if (histograms[i].lowpc <= pc && pc < histograms[i].highpc)
749 return &histograms[i];
751 return 0;