* elf32-spu.c (build_stub): Fix malloc under-allocation.
[binutils.git] / gprof / hist.c
blob91b0000168fa8553ea2f7e4b8d398bff4afec71d
1 /* hist.c - Histogram related operations.
3 Copyright 1999, 2000, 2001, 2002, 2004, 2005, 2007, 2009
4 Free Software Foundation, Inc.
6 This file is part of GNU Binutils.
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
21 02110-1301, USA. */
23 #include "gprof.h"
24 #include "libiberty.h"
25 #include "search_list.h"
26 #include "source.h"
27 #include "symtab.h"
28 #include "corefile.h"
29 #include "gmon_io.h"
30 #include "gmon_out.h"
31 #include "hist.h"
32 #include "sym_ids.h"
33 #include "utils.h"
34 #include "math.h"
35 #include "stdio.h"
36 #include "stdlib.h"
38 #define UNITS_TO_CODE (offset_to_code / sizeof(UNIT))
40 static void scale_and_align_entries (void);
41 static void print_header (int);
42 static void print_line (Sym *, double);
43 static int cmp_time (const PTR, const PTR);
45 /* Declarations of automatically generated functions to output blurbs. */
46 extern void flat_blurb (FILE * fp);
48 static histogram *find_histogram (bfd_vma lowpc, bfd_vma highpc);
49 static histogram *find_histogram_for_pc (bfd_vma pc);
51 histogram * histograms;
52 unsigned num_histograms;
53 double hist_scale;
54 static char hist_dimension[16] = "seconds";
55 static char hist_dimension_abbrev = 's';
57 static double accum_time; /* Accumulated time so far for print_line(). */
58 static double total_time; /* Total time for all routines. */
60 /* Table of SI prefixes for powers of 10 (used to automatically
61 scale some of the values in the flat profile). */
62 const struct
64 char prefix;
65 double scale;
67 SItab[] =
69 { 'T', 1e-12 }, /* tera */
70 { 'G', 1e-09 }, /* giga */
71 { 'M', 1e-06 }, /* mega */
72 { 'K', 1e-03 }, /* kilo */
73 { ' ', 1e-00 },
74 { 'm', 1e+03 }, /* milli */
75 { 'u', 1e+06 }, /* micro */
76 { 'n', 1e+09 }, /* nano */
77 { 'p', 1e+12 }, /* pico */
78 { 'f', 1e+15 }, /* femto */
79 { 'a', 1e+18 } /* ato */
82 /* Reads just the header part of histogram record into
83 *RECORD from IFP. FILENAME is the name of IFP and
84 is provided for formatting error messages only.
86 If FIRST is non-zero, sets global variables HZ, HIST_DIMENSION,
87 HIST_DIMENSION_ABBREV, HIST_SCALE. If FIRST is zero, checks
88 that the new histogram is compatible with already-set values
89 of those variables and emits an error if that's not so. */
90 static void
91 read_histogram_header (histogram *record,
92 FILE *ifp, const char *filename,
93 int first)
95 unsigned int profrate;
96 char n_hist_dimension[15];
97 char n_hist_dimension_abbrev;
98 double n_hist_scale;
100 if (gmon_io_read_vma (ifp, &record->lowpc)
101 || gmon_io_read_vma (ifp, &record->highpc)
102 || gmon_io_read_32 (ifp, &record->num_bins)
103 || gmon_io_read_32 (ifp, &profrate)
104 || gmon_io_read (ifp, n_hist_dimension, 15)
105 || gmon_io_read (ifp, &n_hist_dimension_abbrev, 1))
107 fprintf (stderr, _("%s: %s: unexpected end of file\n"),
108 whoami, filename);
110 done (1);
113 n_hist_scale = (double)((record->highpc - record->lowpc) / sizeof (UNIT))
114 / record->num_bins;
116 if (first)
118 /* We don't try to veryfy profrate is the same for all histogram
119 records. If we have two histogram records for the same
120 address range and profiling samples is done as often
121 as possible as opposed on timer, then the actual profrate will
122 be slightly different. Most of the time the difference does not
123 matter and insisting that profiling rate is exactly the same
124 will only create inconvenient. */
125 hz = profrate;
126 memcpy (hist_dimension, n_hist_dimension, 15);
127 hist_dimension_abbrev = n_hist_dimension_abbrev;
128 hist_scale = n_hist_scale;
130 else
132 if (strncmp (n_hist_dimension, hist_dimension, 15) != 0)
134 fprintf (stderr,
135 _("%s: dimension unit changed between histogram records\n"
136 "%s: from '%s'\n"
137 "%s: to '%s'\n"),
138 whoami, whoami, hist_dimension, whoami, n_hist_dimension);
139 done (1);
142 if (n_hist_dimension_abbrev != hist_dimension_abbrev)
144 fprintf (stderr,
145 _("%s: dimension abbreviation changed between histogram records\n"
146 "%s: from '%c'\n"
147 "%s: to '%c'\n"),
148 whoami, whoami, hist_dimension_abbrev, whoami, n_hist_dimension_abbrev);
149 done (1);
152 /* The only reason we require the same scale for histograms is that
153 there's code (notably printing code), that prints units,
154 and it would be very confusing to have one unit mean different
155 things for different functions. */
156 if (fabs (hist_scale - n_hist_scale) > 0.000001)
158 fprintf (stderr,
159 _("%s: different scales in histogram records"),
160 whoami);
161 done (1);
166 /* Read the histogram from file IFP. FILENAME is the name of IFP and
167 is provided for formatting error messages only. */
169 void
170 hist_read_rec (FILE * ifp, const char *filename)
172 bfd_vma lowpc, highpc;
173 histogram n_record;
174 histogram *record, *existing_record;
175 unsigned i;
177 /* 1. Read the header and see if there's existing record for the
178 same address range and that there are no overlapping records. */
179 read_histogram_header (&n_record, ifp, filename, num_histograms == 0);
181 existing_record = find_histogram (n_record.lowpc, n_record.highpc);
182 if (existing_record)
184 record = existing_record;
186 else
188 /* If this record overlaps, but does not completely match an existing
189 record, it's an error. */
190 lowpc = n_record.lowpc;
191 highpc = n_record.highpc;
192 hist_clip_symbol_address (&lowpc, &highpc);
193 if (lowpc != highpc)
195 fprintf (stderr,
196 _("%s: overlapping histogram records\n"),
197 whoami);
198 done (1);
201 /* This is new record. Add it to global array and allocate space for
202 the samples. */
203 histograms = (struct histogram *)
204 xrealloc (histograms, sizeof (histogram) * (num_histograms + 1));
205 memcpy (histograms + num_histograms,
206 &n_record, sizeof (histogram));
207 record = &histograms[num_histograms];
208 ++num_histograms;
210 record->sample = (int *) xmalloc (record->num_bins
211 * sizeof (record->sample[0]));
212 memset (record->sample, 0, record->num_bins * sizeof (record->sample[0]));
215 /* 2. We have either a new record (with zeroed histogram data), or an existing
216 record with some data in the histogram already. Read new data into the
217 record, adding hit counts. */
219 DBG (SAMPLEDEBUG,
220 printf ("[hist_read_rec] n_lowpc 0x%lx n_highpc 0x%lx ncnt %u\n",
221 (unsigned long) record->lowpc, (unsigned long) record->highpc,
222 record->num_bins));
224 for (i = 0; i < record->num_bins; ++i)
226 UNIT count;
227 if (fread (&count[0], sizeof (count), 1, ifp) != 1)
229 fprintf (stderr,
230 _("%s: %s: unexpected EOF after reading %u of %u samples\n"),
231 whoami, filename, i, record->num_bins);
232 done (1);
234 record->sample[i] += bfd_get_16 (core_bfd, (bfd_byte *) & count[0]);
235 DBG (SAMPLEDEBUG,
236 printf ("[hist_read_rec] 0x%lx: %u\n",
237 (unsigned long) (record->lowpc
238 + i * (record->highpc - record->lowpc)
239 / record->num_bins),
240 record->sample[i]));
245 /* Write all execution histograms file OFP. FILENAME is the name
246 of OFP and is provided for formatting error-messages only. */
248 void
249 hist_write_hist (FILE * ofp, const char *filename)
251 UNIT count;
252 unsigned int i, r;
254 for (r = 0; r < num_histograms; ++r)
256 histogram *record = &histograms[r];
258 /* Write header. */
260 if (gmon_io_write_8 (ofp, GMON_TAG_TIME_HIST)
261 || gmon_io_write_vma (ofp, record->lowpc)
262 || gmon_io_write_vma (ofp, record->highpc)
263 || gmon_io_write_32 (ofp, record->num_bins)
264 || gmon_io_write_32 (ofp, hz)
265 || gmon_io_write (ofp, hist_dimension, 15)
266 || gmon_io_write (ofp, &hist_dimension_abbrev, 1))
268 perror (filename);
269 done (1);
272 for (i = 0; i < record->num_bins; ++i)
274 bfd_put_16 (core_bfd, (bfd_vma) record->sample[i], (bfd_byte *) &count[0]);
276 if (fwrite (&count[0], sizeof (count), 1, ofp) != 1)
278 perror (filename);
279 done (1);
285 /* Calculate scaled entry point addresses (to save time in
286 hist_assign_samples), and, on architectures that have procedure
287 entry masks at the start of a function, possibly push the scaled
288 entry points over the procedure entry mask, if it turns out that
289 the entry point is in one bin and the code for a routine is in the
290 next bin. */
292 static void
293 scale_and_align_entries ()
295 Sym *sym;
296 bfd_vma bin_of_entry;
297 bfd_vma bin_of_code;
299 for (sym = symtab.base; sym < symtab.limit; sym++)
301 histogram *r = find_histogram_for_pc (sym->addr);
303 sym->hist.scaled_addr = sym->addr / sizeof (UNIT);
305 if (r)
307 bin_of_entry = (sym->hist.scaled_addr - r->lowpc) / hist_scale;
308 bin_of_code = ((sym->hist.scaled_addr + UNITS_TO_CODE - r->lowpc)
309 / hist_scale);
310 if (bin_of_entry < bin_of_code)
312 DBG (SAMPLEDEBUG,
313 printf ("[scale_and_align_entries] pushing 0x%lx to 0x%lx\n",
314 (unsigned long) sym->hist.scaled_addr,
315 (unsigned long) (sym->hist.scaled_addr
316 + UNITS_TO_CODE)));
317 sym->hist.scaled_addr += UNITS_TO_CODE;
324 /* Assign samples to the symbol to which they belong.
326 Histogram bin I covers some address range [BIN_LOWPC,BIN_HIGH_PC)
327 which may overlap one more symbol address ranges. If a symbol
328 overlaps with the bin's address range by O percent, then O percent
329 of the bin's count is credited to that symbol.
331 There are three cases as to where BIN_LOW_PC and BIN_HIGH_PC can be
332 with respect to the symbol's address range [SYM_LOW_PC,
333 SYM_HIGH_PC) as shown in the following diagram. OVERLAP computes
334 the distance (in UNITs) between the arrows, the fraction of the
335 sample that is to be credited to the symbol which starts at
336 SYM_LOW_PC.
338 sym_low_pc sym_high_pc
342 +-----------------------------------------------+
344 | ->| |<- ->| |<- ->| |<- |
345 | | | | | |
346 +---------+ +---------+ +---------+
348 ^ ^ ^ ^ ^ ^
349 | | | | | |
350 bin_low_pc bin_high_pc bin_low_pc bin_high_pc bin_low_pc bin_high_pc
352 For the VAX we assert that samples will never fall in the first two
353 bytes of any routine, since that is the entry mask, thus we call
354 scale_and_align_entries() to adjust the entry points if the entry
355 mask falls in one bin but the code for the routine doesn't start
356 until the next bin. In conjunction with the alignment of routine
357 addresses, this should allow us to have only one sample for every
358 four bytes of text space and never have any overlap (the two end
359 cases, above). */
361 static void
362 hist_assign_samples_1 (histogram *r)
364 bfd_vma bin_low_pc, bin_high_pc;
365 bfd_vma sym_low_pc, sym_high_pc;
366 bfd_vma overlap, addr;
367 unsigned int bin_count;
368 unsigned int i, j, k;
369 double count_time, credit;
371 bfd_vma lowpc = r->lowpc / sizeof (UNIT);
373 /* Iterate over all sample bins. */
374 for (i = 0, k = 1; i < r->num_bins; ++i)
376 bin_count = r->sample[i];
377 if (! bin_count)
378 continue;
380 bin_low_pc = lowpc + (bfd_vma) (hist_scale * i);
381 bin_high_pc = lowpc + (bfd_vma) (hist_scale * (i + 1));
382 count_time = bin_count;
384 DBG (SAMPLEDEBUG,
385 printf (
386 "[assign_samples] bin_low_pc=0x%lx, bin_high_pc=0x%lx, bin_count=%u\n",
387 (unsigned long) (sizeof (UNIT) * bin_low_pc),
388 (unsigned long) (sizeof (UNIT) * bin_high_pc),
389 bin_count));
390 total_time += count_time;
392 /* Credit all symbols that are covered by bin I.
394 PR gprof/13325: Make sure that K does not get decremented
395 and J will never be less than 0. */
396 for (j = k - 1; j < symtab.len; k = ++j)
398 sym_low_pc = symtab.base[j].hist.scaled_addr;
399 sym_high_pc = symtab.base[j + 1].hist.scaled_addr;
401 /* If high end of bin is below entry address,
402 go for next bin. */
403 if (bin_high_pc < sym_low_pc)
404 break;
406 /* If low end of bin is above high end of symbol,
407 go for next symbol. */
408 if (bin_low_pc >= sym_high_pc)
409 continue;
411 overlap =
412 MIN (bin_high_pc, sym_high_pc) - MAX (bin_low_pc, sym_low_pc);
413 if (overlap > 0)
415 DBG (SAMPLEDEBUG,
416 printf (
417 "[assign_samples] [0x%lx,0x%lx) %s gets %f ticks %ld overlap\n",
418 (unsigned long) symtab.base[j].addr,
419 (unsigned long) (sizeof (UNIT) * sym_high_pc),
420 symtab.base[j].name, overlap * count_time / hist_scale,
421 (long) overlap));
423 addr = symtab.base[j].addr;
424 credit = overlap * count_time / hist_scale;
426 /* Credit symbol if it appears in INCL_FLAT or that
427 table is empty and it does not appear it in
428 EXCL_FLAT. */
429 if (sym_lookup (&syms[INCL_FLAT], addr)
430 || (syms[INCL_FLAT].len == 0
431 && !sym_lookup (&syms[EXCL_FLAT], addr)))
433 symtab.base[j].hist.time += credit;
435 else
437 total_time -= credit;
443 DBG (SAMPLEDEBUG, printf ("[assign_samples] total_time %f\n",
444 total_time));
447 /* Calls 'hist_assign_sampes_1' for all histogram records read so far. */
448 void
449 hist_assign_samples ()
451 unsigned i;
453 scale_and_align_entries ();
455 for (i = 0; i < num_histograms; ++i)
456 hist_assign_samples_1 (&histograms[i]);
460 /* Print header for flag histogram profile. */
462 static void
463 print_header (int prefix)
465 char unit[64];
467 sprintf (unit, _("%c%c/call"), prefix, hist_dimension_abbrev);
469 if (bsd_style_output)
471 printf (_("\ngranularity: each sample hit covers %ld byte(s)"),
472 (long) hist_scale * (long) sizeof (UNIT));
473 if (total_time > 0.0)
475 printf (_(" for %.2f%% of %.2f %s\n\n"),
476 100.0 / total_time, total_time / hz, hist_dimension);
479 else
481 printf (_("\nEach sample counts as %g %s.\n"), 1.0 / hz, hist_dimension);
484 if (total_time <= 0.0)
486 printf (_(" no time accumulated\n\n"));
488 /* This doesn't hurt since all the numerators will be zero. */
489 total_time = 1.0;
492 printf ("%5.5s %10.10s %8.8s %8.8s %8.8s %8.8s %-8.8s\n",
493 "% ", _("cumulative"), _("self "), "", _("self "), _("total "),
494 "");
495 printf ("%5.5s %9.9s %8.8s %8.8s %8.8s %8.8s %-8.8s\n",
496 _("time"), hist_dimension, hist_dimension, _("calls"), unit, unit,
497 _("name"));
501 static void
502 print_line (Sym *sym, double scale)
504 if (ignore_zeros && sym->ncalls == 0 && sym->hist.time == 0)
505 return;
507 accum_time += sym->hist.time;
509 if (bsd_style_output)
510 printf ("%5.1f %10.2f %8.2f",
511 total_time > 0.0 ? 100 * sym->hist.time / total_time : 0.0,
512 accum_time / hz, sym->hist.time / hz);
513 else
514 printf ("%6.2f %9.2f %8.2f",
515 total_time > 0.0 ? 100 * sym->hist.time / total_time : 0.0,
516 accum_time / hz, sym->hist.time / hz);
518 if (sym->ncalls != 0)
519 printf (" %8lu %8.2f %8.2f ",
520 sym->ncalls, scale * sym->hist.time / hz / sym->ncalls,
521 scale * (sym->hist.time + sym->cg.child_time) / hz / sym->ncalls);
522 else
523 printf (" %8.8s %8.8s %8.8s ", "", "", "");
525 if (bsd_style_output)
526 print_name (sym);
527 else
528 print_name_only (sym);
530 printf ("\n");
534 /* Compare LP and RP. The primary comparison key is execution time,
535 the secondary is number of invocation, and the tertiary is the
536 lexicographic order of the function names. */
538 static int
539 cmp_time (const PTR lp, const PTR rp)
541 const Sym *left = *(const Sym **) lp;
542 const Sym *right = *(const Sym **) rp;
543 double time_diff;
545 time_diff = right->hist.time - left->hist.time;
547 if (time_diff > 0.0)
548 return 1;
550 if (time_diff < 0.0)
551 return -1;
553 if (right->ncalls > left->ncalls)
554 return 1;
556 if (right->ncalls < left->ncalls)
557 return -1;
559 return strcmp (left->name, right->name);
563 /* Print the flat histogram profile. */
565 void
566 hist_print ()
568 Sym **time_sorted_syms, *top_dog, *sym;
569 unsigned int sym_index;
570 unsigned log_scale;
571 double top_time;
572 bfd_vma addr;
574 if (first_output)
575 first_output = FALSE;
576 else
577 printf ("\f\n");
579 accum_time = 0.0;
581 if (bsd_style_output)
583 if (print_descriptions)
585 printf (_("\n\n\nflat profile:\n"));
586 flat_blurb (stdout);
589 else
591 printf (_("Flat profile:\n"));
594 /* Sort the symbol table by time (call-count and name as secondary
595 and tertiary keys). */
596 time_sorted_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
598 for (sym_index = 0; sym_index < symtab.len; ++sym_index)
599 time_sorted_syms[sym_index] = &symtab.base[sym_index];
601 qsort (time_sorted_syms, symtab.len, sizeof (Sym *), cmp_time);
603 if (bsd_style_output)
605 log_scale = 5; /* Milli-seconds is BSD-default. */
607 else
609 /* Search for symbol with highest per-call
610 execution time and scale accordingly. */
611 log_scale = 0;
612 top_dog = 0;
613 top_time = 0.0;
615 for (sym_index = 0; sym_index < symtab.len; ++sym_index)
617 sym = time_sorted_syms[sym_index];
619 if (sym->ncalls != 0)
621 double call_time;
623 call_time = (sym->hist.time + sym->cg.child_time) / sym->ncalls;
625 if (call_time > top_time)
627 top_dog = sym;
628 top_time = call_time;
633 if (top_dog && top_dog->ncalls != 0 && top_time > 0.0)
635 top_time /= hz;
637 for (log_scale = 0; log_scale < ARRAY_SIZE (SItab); log_scale ++)
639 double scaled_value = SItab[log_scale].scale * top_time;
641 if (scaled_value >= 1.0 && scaled_value < 1000.0)
642 break;
647 /* For now, the dimension is always seconds. In the future, we
648 may also want to support other (pseudo-)dimensions (such as
649 I-cache misses etc.). */
650 print_header (SItab[log_scale].prefix);
652 for (sym_index = 0; sym_index < symtab.len; ++sym_index)
654 addr = time_sorted_syms[sym_index]->addr;
656 /* Print symbol if its in INCL_FLAT table or that table
657 is empty and the symbol is not in EXCL_FLAT. */
658 if (sym_lookup (&syms[INCL_FLAT], addr)
659 || (syms[INCL_FLAT].len == 0
660 && !sym_lookup (&syms[EXCL_FLAT], addr)))
661 print_line (time_sorted_syms[sym_index], SItab[log_scale].scale);
664 free (time_sorted_syms);
666 if (print_descriptions && !bsd_style_output)
667 flat_blurb (stdout);
671 hist_check_address (unsigned address)
673 unsigned i;
675 for (i = 0; i < num_histograms; ++i)
676 if (histograms[i].lowpc <= address && address < histograms[i].highpc)
677 return 1;
679 return 0;
682 #if ! defined(min)
683 #define min(a,b) (((a)<(b)) ? (a) : (b))
684 #endif
685 #if ! defined(max)
686 #define max(a,b) (((a)>(b)) ? (a) : (b))
687 #endif
689 void
690 hist_clip_symbol_address (bfd_vma *p_lowpc, bfd_vma *p_highpc)
692 unsigned i;
693 int found = 0;
695 if (num_histograms == 0)
697 *p_highpc = *p_lowpc;
698 return;
701 for (i = 0; i < num_histograms; ++i)
703 bfd_vma common_low, common_high;
704 common_low = max (histograms[i].lowpc, *p_lowpc);
705 common_high = min (histograms[i].highpc, *p_highpc);
707 if (common_low < common_high)
709 if (found)
711 fprintf (stderr,
712 _("%s: found a symbol that covers "
713 "several histogram records"),
714 whoami);
715 done (1);
718 found = 1;
719 *p_lowpc = common_low;
720 *p_highpc = common_high;
724 if (!found)
725 *p_highpc = *p_lowpc;
728 /* Find and return exising histogram record having the same lowpc and
729 highpc as passed via the parameters. Return NULL if nothing is found.
730 The return value is valid until any new histogram is read. */
731 static histogram *
732 find_histogram (bfd_vma lowpc, bfd_vma highpc)
734 unsigned i;
735 for (i = 0; i < num_histograms; ++i)
737 if (histograms[i].lowpc == lowpc && histograms[i].highpc == highpc)
738 return &histograms[i];
740 return 0;
743 /* Given a PC, return histogram record which address range include this PC.
744 Return NULL if there's no such record. */
745 static histogram *
746 find_histogram_for_pc (bfd_vma pc)
748 unsigned i;
749 for (i = 0; i < num_histograms; ++i)
751 if (histograms[i].lowpc <= pc && pc < histograms[i].highpc)
752 return &histograms[i];
754 return 0;