1 /* hist.c - Histogram related operations.
3 Copyright 1999, 2000, 2001, 2002, 2004, 2005
4 Free Software Foundation, Inc.
6 This file is part of GNU Binutils.
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
23 #include "libiberty.h"
25 #include "search_list.h"
35 #define UNITS_TO_CODE (offset_to_code / sizeof(UNIT))
37 static void scale_and_align_entries (void);
38 static void print_header (int);
39 static void print_line (Sym
*, double);
40 static int cmp_time (const PTR
, const PTR
);
42 /* Declarations of automatically generated functions to output blurbs. */
43 extern void flat_blurb (FILE * fp
);
45 bfd_vma s_lowpc
; /* Lowest address in .text. */
46 bfd_vma s_highpc
= 0; /* Highest address in .text. */
47 bfd_vma lowpc
, highpc
; /* Same, but expressed in UNITs. */
48 unsigned int hist_num_bins
= 0; /* Number of histogram samples. */
49 int *hist_sample
= 0; /* Histogram samples (shorts in the file!). */
51 static char hist_dimension
[16] = "seconds";
52 static char hist_dimension_abbrev
= 's';
54 static double accum_time
; /* Accumulated time so far for print_line(). */
55 static double total_time
; /* Total time for all routines. */
57 /* Table of SI prefixes for powers of 10 (used to automatically
58 scale some of the values in the flat profile). */
66 { 'T', 1e-12 }, /* tera */
67 { 'G', 1e-09 }, /* giga */
68 { 'M', 1e-06 }, /* mega */
69 { 'K', 1e-03 }, /* kilo */
71 { 'm', 1e+03 }, /* milli */
72 { 'u', 1e+06 }, /* micro */
73 { 'n', 1e+09 }, /* nano */
74 { 'p', 1e+12 }, /* pico */
75 { 'f', 1e+15 }, /* femto */
76 { 'a', 1e+18 } /* ato */
80 /* Read the histogram from file IFP. FILENAME is the name of IFP and
81 is provided for formatting error messages only. */
84 hist_read_rec (FILE * ifp
, const char *filename
)
86 bfd_vma n_lowpc
, n_highpc
;
87 unsigned int i
, ncnt
, profrate
;
90 if (gmon_io_read_vma (ifp
, &n_lowpc
)
91 || gmon_io_read_vma (ifp
, &n_highpc
)
92 || gmon_io_read_32 (ifp
, &ncnt
)
93 || gmon_io_read_32 (ifp
, &profrate
)
94 || gmon_io_read (ifp
, hist_dimension
, 15)
95 || gmon_io_read (ifp
, &hist_dimension_abbrev
, 1))
97 fprintf (stderr
, _("%s: %s: unexpected end of file\n"),
105 /* This is the first histogram record. */
108 lowpc
= (bfd_vma
) n_lowpc
/ sizeof (UNIT
);
109 highpc
= (bfd_vma
) n_highpc
/ sizeof (UNIT
);
110 hist_num_bins
= ncnt
;
115 printf ("[hist_read_rec] n_lowpc 0x%lx n_highpc 0x%lx ncnt %u\n",
116 (unsigned long) n_lowpc
, (unsigned long) n_highpc
, ncnt
);
117 printf ("[hist_read_rec] s_lowpc 0x%lx s_highpc 0x%lx nsamples %u\n",
118 (unsigned long) s_lowpc
, (unsigned long) s_highpc
,
120 printf ("[hist_read_rec] lowpc 0x%lx highpc 0x%lx\n",
121 (unsigned long) lowpc
, (unsigned long) highpc
));
123 if (n_lowpc
!= s_lowpc
|| n_highpc
!= s_highpc
124 || ncnt
!= hist_num_bins
|| hz
!= (int) profrate
)
126 fprintf (stderr
, _("%s: `%s' is incompatible with first gmon file\n"),
133 hist_sample
= (int *) xmalloc (hist_num_bins
* sizeof (hist_sample
[0]));
134 memset (hist_sample
, 0, hist_num_bins
* sizeof (hist_sample
[0]));
137 for (i
= 0; i
< hist_num_bins
; ++i
)
139 if (fread (&count
[0], sizeof (count
), 1, ifp
) != 1)
142 _("%s: %s: unexpected EOF after reading %u of %u samples\n"),
143 whoami
, filename
, i
, hist_num_bins
);
146 hist_sample
[i
] += bfd_get_16 (core_bfd
, (bfd_byte
*) & count
[0]);
148 printf ("[hist_read_rec] 0x%lx: %u\n",
149 (unsigned long) (n_lowpc
+ i
* (n_highpc
- n_lowpc
) / ncnt
),
155 /* Write execution histogram to file OFP. FILENAME is the name
156 of OFP and is provided for formatting error-messages only. */
159 hist_write_hist (FILE * ofp
, const char *filename
)
166 if (gmon_io_write_8 (ofp
, GMON_TAG_TIME_HIST
)
167 || gmon_io_write_vma (ofp
, s_lowpc
)
168 || gmon_io_write_vma (ofp
, s_highpc
)
169 || gmon_io_write_32 (ofp
, hist_num_bins
)
170 || gmon_io_write_32 (ofp
, hz
)
171 || gmon_io_write (ofp
, hist_dimension
, 15)
172 || gmon_io_write (ofp
, &hist_dimension_abbrev
, 1))
178 for (i
= 0; i
< hist_num_bins
; ++i
)
180 bfd_put_16 (core_bfd
, (bfd_vma
) hist_sample
[i
], (bfd_byte
*) &count
[0]);
182 if (fwrite (&count
[0], sizeof (count
), 1, ofp
) != 1)
191 /* Calculate scaled entry point addresses (to save time in
192 hist_assign_samples), and, on architectures that have procedure
193 entry masks at the start of a function, possibly push the scaled
194 entry points over the procedure entry mask, if it turns out that
195 the entry point is in one bin and the code for a routine is in the
199 scale_and_align_entries ()
202 bfd_vma bin_of_entry
;
205 for (sym
= symtab
.base
; sym
< symtab
.limit
; sym
++)
207 sym
->hist
.scaled_addr
= sym
->addr
/ sizeof (UNIT
);
208 bin_of_entry
= (sym
->hist
.scaled_addr
- lowpc
) / hist_scale
;
209 bin_of_code
= ((sym
->hist
.scaled_addr
+ UNITS_TO_CODE
- lowpc
)
211 if (bin_of_entry
< bin_of_code
)
214 printf ("[scale_and_align_entries] pushing 0x%lx to 0x%lx\n",
215 (unsigned long) sym
->hist
.scaled_addr
,
216 (unsigned long) (sym
->hist
.scaled_addr
218 sym
->hist
.scaled_addr
+= UNITS_TO_CODE
;
224 /* Assign samples to the symbol to which they belong.
226 Histogram bin I covers some address range [BIN_LOWPC,BIN_HIGH_PC)
227 which may overlap one more symbol address ranges. If a symbol
228 overlaps with the bin's address range by O percent, then O percent
229 of the bin's count is credited to that symbol.
231 There are three cases as to where BIN_LOW_PC and BIN_HIGH_PC can be
232 with respect to the symbol's address range [SYM_LOW_PC,
233 SYM_HIGH_PC) as shown in the following diagram. OVERLAP computes
234 the distance (in UNITs) between the arrows, the fraction of the
235 sample that is to be credited to the symbol which starts at
238 sym_low_pc sym_high_pc
242 +-----------------------------------------------+
244 | ->| |<- ->| |<- ->| |<- |
246 +---------+ +---------+ +---------+
250 bin_low_pc bin_high_pc bin_low_pc bin_high_pc bin_low_pc bin_high_pc
252 For the VAX we assert that samples will never fall in the first two
253 bytes of any routine, since that is the entry mask, thus we call
254 scale_and_align_entries() to adjust the entry points if the entry
255 mask falls in one bin but the code for the routine doesn't start
256 until the next bin. In conjunction with the alignment of routine
257 addresses, this should allow us to have only one sample for every
258 four bytes of text space and never have any overlap (the two end
262 hist_assign_samples ()
264 bfd_vma bin_low_pc
, bin_high_pc
;
265 bfd_vma sym_low_pc
, sym_high_pc
;
266 bfd_vma overlap
, addr
;
267 unsigned int bin_count
;
271 /* Read samples and assign to symbols. */
272 hist_scale
= highpc
- lowpc
;
273 hist_scale
/= hist_num_bins
;
274 scale_and_align_entries ();
276 /* Iterate over all sample bins. */
277 for (i
= 0, j
= 1; i
< hist_num_bins
; ++i
)
279 bin_count
= hist_sample
[i
];
283 bin_low_pc
= lowpc
+ (bfd_vma
) (hist_scale
* i
);
284 bin_high_pc
= lowpc
+ (bfd_vma
) (hist_scale
* (i
+ 1));
289 "[assign_samples] bin_low_pc=0x%lx, bin_high_pc=0x%lx, bin_count=%u\n",
290 (unsigned long) (sizeof (UNIT
) * bin_low_pc
),
291 (unsigned long) (sizeof (UNIT
) * bin_high_pc
),
295 /* Credit all symbols that are covered by bin I. */
296 for (j
= j
- 1; j
< symtab
.len
; ++j
)
298 sym_low_pc
= symtab
.base
[j
].hist
.scaled_addr
;
299 sym_high_pc
= symtab
.base
[j
+ 1].hist
.scaled_addr
;
301 /* If high end of bin is below entry address,
303 if (bin_high_pc
< sym_low_pc
)
306 /* If low end of bin is above high end of symbol,
307 go for next symbol. */
308 if (bin_low_pc
>= sym_high_pc
)
312 MIN (bin_high_pc
, sym_high_pc
) - MAX (bin_low_pc
, sym_low_pc
);
317 "[assign_samples] [0x%lx,0x%lx) %s gets %f ticks %ld overlap\n",
318 (unsigned long) symtab
.base
[j
].addr
,
319 (unsigned long) (sizeof (UNIT
) * sym_high_pc
),
320 symtab
.base
[j
].name
, overlap
* time
/ hist_scale
,
323 addr
= symtab
.base
[j
].addr
;
324 credit
= overlap
* time
/ hist_scale
;
326 /* Credit symbol if it appears in INCL_FLAT or that
327 table is empty and it does not appear it in
329 if (sym_lookup (&syms
[INCL_FLAT
], addr
)
330 || (syms
[INCL_FLAT
].len
== 0
331 && !sym_lookup (&syms
[EXCL_FLAT
], addr
)))
333 symtab
.base
[j
].hist
.time
+= credit
;
337 total_time
-= credit
;
343 DBG (SAMPLEDEBUG
, printf ("[assign_samples] total_time %f\n",
348 /* Print header for flag histogram profile. */
351 print_header (int prefix
)
355 sprintf (unit
, _("%c%c/call"), prefix
, hist_dimension_abbrev
);
357 if (bsd_style_output
)
359 printf (_("\ngranularity: each sample hit covers %ld byte(s)"),
360 (long) hist_scale
* sizeof (UNIT
));
361 if (total_time
> 0.0)
363 printf (_(" for %.2f%% of %.2f %s\n\n"),
364 100.0 / total_time
, total_time
/ hz
, hist_dimension
);
369 printf (_("\nEach sample counts as %g %s.\n"), 1.0 / hz
, hist_dimension
);
372 if (total_time
<= 0.0)
374 printf (_(" no time accumulated\n\n"));
376 /* This doesn't hurt since all the numerators will be zero. */
380 printf ("%5.5s %10.10s %8.8s %8.8s %8.8s %8.8s %-8.8s\n",
381 "% ", _("cumulative"), _("self "), "", _("self "), _("total "),
383 printf ("%5.5s %9.9s %8.8s %8.8s %8.8s %8.8s %-8.8s\n",
384 _("time"), hist_dimension
, hist_dimension
, _("calls"), unit
, unit
,
390 print_line (Sym
*sym
, double scale
)
392 if (ignore_zeros
&& sym
->ncalls
== 0 && sym
->hist
.time
== 0)
395 accum_time
+= sym
->hist
.time
;
397 if (bsd_style_output
)
398 printf ("%5.1f %10.2f %8.2f",
399 total_time
> 0.0 ? 100 * sym
->hist
.time
/ total_time
: 0.0,
400 accum_time
/ hz
, sym
->hist
.time
/ hz
);
402 printf ("%6.2f %9.2f %8.2f",
403 total_time
> 0.0 ? 100 * sym
->hist
.time
/ total_time
: 0.0,
404 accum_time
/ hz
, sym
->hist
.time
/ hz
);
406 if (sym
->ncalls
!= 0)
407 printf (" %8lu %8.2f %8.2f ",
408 sym
->ncalls
, scale
* sym
->hist
.time
/ hz
/ sym
->ncalls
,
409 scale
* (sym
->hist
.time
+ sym
->cg
.child_time
) / hz
/ sym
->ncalls
);
411 printf (" %8.8s %8.8s %8.8s ", "", "", "");
413 if (bsd_style_output
)
416 print_name_only (sym
);
422 /* Compare LP and RP. The primary comparison key is execution time,
423 the secondary is number of invocation, and the tertiary is the
424 lexicographic order of the function names. */
427 cmp_time (const PTR lp
, const PTR rp
)
429 const Sym
*left
= *(const Sym
**) lp
;
430 const Sym
*right
= *(const Sym
**) rp
;
433 time_diff
= right
->hist
.time
- left
->hist
.time
;
441 if (right
->ncalls
> left
->ncalls
)
444 if (right
->ncalls
< left
->ncalls
)
447 return strcmp (left
->name
, right
->name
);
451 /* Print the flat histogram profile. */
456 Sym
**time_sorted_syms
, *top_dog
, *sym
;
459 double top_time
, time
;
463 first_output
= FALSE
;
469 if (bsd_style_output
)
471 if (print_descriptions
)
473 printf (_("\n\n\nflat profile:\n"));
479 printf (_("Flat profile:\n"));
482 /* Sort the symbol table by time (call-count and name as secondary
483 and tertiary keys). */
484 time_sorted_syms
= (Sym
**) xmalloc (symtab
.len
* sizeof (Sym
*));
486 for (index
= 0; index
< symtab
.len
; ++index
)
487 time_sorted_syms
[index
] = &symtab
.base
[index
];
489 qsort (time_sorted_syms
, symtab
.len
, sizeof (Sym
*), cmp_time
);
491 if (bsd_style_output
)
493 log_scale
= 5; /* Milli-seconds is BSD-default. */
497 /* Search for symbol with highest per-call
498 execution time and scale accordingly. */
503 for (index
= 0; index
< symtab
.len
; ++index
)
505 sym
= time_sorted_syms
[index
];
507 if (sym
->ncalls
!= 0)
509 time
= (sym
->hist
.time
+ sym
->cg
.child_time
) / sym
->ncalls
;
519 if (top_dog
&& top_dog
->ncalls
!= 0 && top_time
> 0.0)
523 for (log_scale
= 0; log_scale
< ARRAY_SIZE (SItab
); log_scale
++)
525 double scaled_value
= SItab
[log_scale
].scale
* top_time
;
527 if (scaled_value
>= 1.0 && scaled_value
< 1000.0)
533 /* For now, the dimension is always seconds. In the future, we
534 may also want to support other (pseudo-)dimensions (such as
535 I-cache misses etc.). */
536 print_header (SItab
[log_scale
].prefix
);
538 for (index
= 0; index
< symtab
.len
; ++index
)
540 addr
= time_sorted_syms
[index
]->addr
;
542 /* Print symbol if its in INCL_FLAT table or that table
543 is empty and the symbol is not in EXCL_FLAT. */
544 if (sym_lookup (&syms
[INCL_FLAT
], addr
)
545 || (syms
[INCL_FLAT
].len
== 0
546 && !sym_lookup (&syms
[EXCL_FLAT
], addr
)))
547 print_line (time_sorted_syms
[index
], SItab
[log_scale
].scale
);
550 free (time_sorted_syms
);
552 if (print_descriptions
&& !bsd_style_output
)