2010-03-23 Rafael Ávila de Espíndola <respindola@mozilla.com>
[binutils.git] / gprof / cg_print.c
blobc1a2a31df0067be3421eb33c38721fac376c60a7
1 /* cg_print.c - Print routines for displaying call graphs.
3 Copyright 2000, 2001, 2002, 2004, 2007, 2009
4 Free Software Foundation, Inc.
6 This file is part of GNU Binutils.
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
21 02110-1301, USA. */
23 #include "gprof.h"
24 #include "libiberty.h"
25 #include "filenames.h"
26 #include "search_list.h"
27 #include "source.h"
28 #include "symtab.h"
29 #include "cg_arcs.h"
30 #include "cg_print.h"
31 #include "hist.h"
32 #include "utils.h"
33 #include "corefile.h"
35 /* Return value of comparison functions used to sort tables. */
36 #define LESSTHAN -1
37 #define EQUALTO 0
38 #define GREATERTHAN 1
40 static void print_header (void);
41 static void print_cycle (Sym *);
42 static int cmp_member (Sym *, Sym *);
43 static void sort_members (Sym *);
44 static void print_members (Sym *);
45 static int cmp_arc (Arc *, Arc *);
46 static void sort_parents (Sym *);
47 static void print_parents (Sym *);
48 static void sort_children (Sym *);
49 static void print_children (Sym *);
50 static void print_line (Sym *);
51 static int cmp_name (const PTR, const PTR);
52 static int cmp_arc_count (const PTR, const PTR);
53 static int cmp_fun_nuses (const PTR, const PTR);
54 static void order_and_dump_functions_by_arcs
55 (Arc **, unsigned long, int, Arc **, unsigned long *);
57 /* Declarations of automatically generated functions to output blurbs. */
58 extern void bsd_callg_blurb (FILE * fp);
59 extern void fsf_callg_blurb (FILE * fp);
61 double print_time = 0.0;
64 static void
65 print_header ()
67 if (first_output)
68 first_output = FALSE;
69 else
70 printf ("\f\n");
72 if (!bsd_style_output)
74 if (print_descriptions)
75 printf (_("\t\t Call graph (explanation follows)\n\n"));
76 else
77 printf (_("\t\t\tCall graph\n\n"));
80 printf (_("\ngranularity: each sample hit covers %ld byte(s)"),
81 (long) hist_scale * (long) sizeof (UNIT));
83 if (print_time > 0.0)
84 printf (_(" for %.2f%% of %.2f seconds\n\n"),
85 100.0 / print_time, print_time / hz);
86 else
88 printf (_(" no time propagated\n\n"));
90 /* This doesn't hurt, since all the numerators will be 0.0. */
91 print_time = 1.0;
94 if (bsd_style_output)
96 printf ("%6.6s %5.5s %7.7s %11.11s %7.7s/%-7.7s %-8.8s\n",
97 "", "", "", "", _("called"), _("total"), _("parents"));
98 printf ("%-6.6s %5.5s %7.7s %11.11s %7.7s+%-7.7s %-8.8s\t%5.5s\n",
99 _("index"), _("%time"), _("self"), _("descendants"),
100 _("called"), _("self"), _("name"), _("index"));
101 printf ("%6.6s %5.5s %7.7s %11.11s %7.7s/%-7.7s %-8.8s\n",
102 "", "", "", "", _("called"), _("total"), _("children"));
103 printf ("\n");
105 else
107 printf (_("index %% time self children called name\n"));
111 /* Print a cycle header. */
113 static void
114 print_cycle (Sym *cyc)
116 char buf[BUFSIZ];
118 sprintf (buf, "[%d]", cyc->cg.index);
119 printf (bsd_style_output
120 ? "%-6.6s %5.1f %7.2f %11.2f %7lu"
121 : "%-6.6s %5.1f %7.2f %7.2f %7lu", buf,
122 100 * (cyc->cg.prop.self + cyc->cg.prop.child) / print_time,
123 cyc->cg.prop.self / hz, cyc->cg.prop.child / hz, cyc->ncalls);
125 if (cyc->cg.self_calls != 0)
126 printf ("+%-7lu", cyc->cg.self_calls);
127 else
128 printf (" %7.7s", "");
130 printf (_(" <cycle %d as a whole> [%d]\n"), cyc->cg.cyc.num, cyc->cg.index);
133 /* Compare LEFT and RIGHT membmer. Major comparison key is
134 CG.PROP.SELF+CG.PROP.CHILD, secondary key is NCALLS+CG.SELF_CALLS. */
136 static int
137 cmp_member (Sym *left, Sym *right)
139 double left_time = left->cg.prop.self + left->cg.prop.child;
140 double right_time = right->cg.prop.self + right->cg.prop.child;
141 unsigned long left_calls = left->ncalls + left->cg.self_calls;
142 unsigned long right_calls = right->ncalls + right->cg.self_calls;
144 if (left_time > right_time)
145 return GREATERTHAN;
147 if (left_time < right_time)
148 return LESSTHAN;
150 if (left_calls > right_calls)
151 return GREATERTHAN;
153 if (left_calls < right_calls)
154 return LESSTHAN;
156 return EQUALTO;
159 /* Sort members of a cycle. */
161 static void
162 sort_members (Sym *cyc)
164 Sym *todo, *doing, *prev;
166 /* Detach cycle members from cyclehead,
167 and insertion sort them back on. */
168 todo = cyc->cg.cyc.next;
169 cyc->cg.cyc.next = 0;
171 for (doing = todo; doing != NULL; doing = todo)
173 todo = doing->cg.cyc.next;
175 for (prev = cyc; prev->cg.cyc.next; prev = prev->cg.cyc.next)
177 if (cmp_member (doing, prev->cg.cyc.next) == GREATERTHAN)
178 break;
181 doing->cg.cyc.next = prev->cg.cyc.next;
182 prev->cg.cyc.next = doing;
186 /* Print the members of a cycle. */
188 static void
189 print_members (Sym *cyc)
191 Sym *member;
193 sort_members (cyc);
195 for (member = cyc->cg.cyc.next; member; member = member->cg.cyc.next)
197 printf (bsd_style_output
198 ? "%6.6s %5.5s %7.2f %11.2f %7lu"
199 : "%6.6s %5.5s %7.2f %7.2f %7lu",
200 "", "", member->cg.prop.self / hz, member->cg.prop.child / hz,
201 member->ncalls);
203 if (member->cg.self_calls != 0)
204 printf ("+%-7lu", member->cg.self_calls);
205 else
206 printf (" %7.7s", "");
208 printf (" ");
209 print_name (member);
210 printf ("\n");
214 /* Compare two arcs to/from the same child/parent.
215 - if one arc is a self arc, it's least.
216 - if one arc is within a cycle, it's less than.
217 - if both arcs are within a cycle, compare arc counts.
218 - if neither arc is within a cycle, compare with
219 time + child_time as major key
220 arc count as minor key. */
222 static int
223 cmp_arc (Arc *left, Arc *right)
225 Sym *left_parent = left->parent;
226 Sym *left_child = left->child;
227 Sym *right_parent = right->parent;
228 Sym *right_child = right->child;
229 double left_time, right_time;
231 DBG (TIMEDEBUG,
232 printf ("[cmp_arc] ");
233 print_name (left_parent);
234 printf (" calls ");
235 print_name (left_child);
236 printf (" %f + %f %lu/%lu\n", left->time, left->child_time,
237 left->count, left_child->ncalls);
238 printf ("[cmp_arc] ");
239 print_name (right_parent);
240 printf (" calls ");
241 print_name (right_child);
242 printf (" %f + %f %lu/%lu\n", right->time, right->child_time,
243 right->count, right_child->ncalls);
244 printf ("\n");
247 if (left_parent == left_child)
248 return LESSTHAN; /* Left is a self call. */
250 if (right_parent == right_child)
251 return GREATERTHAN; /* Right is a self call. */
253 if (left_parent->cg.cyc.num != 0 && left_child->cg.cyc.num != 0
254 && left_parent->cg.cyc.num == left_child->cg.cyc.num)
256 /* Left is a call within a cycle. */
257 if (right_parent->cg.cyc.num != 0 && right_child->cg.cyc.num != 0
258 && right_parent->cg.cyc.num == right_child->cg.cyc.num)
260 /* Right is a call within the cycle, too. */
261 if (left->count < right->count)
262 return LESSTHAN;
264 if (left->count > right->count)
265 return GREATERTHAN;
267 return EQUALTO;
269 else
271 /* Right isn't a call within the cycle. */
272 return LESSTHAN;
275 else
277 /* Left isn't a call within a cycle. */
278 if (right_parent->cg.cyc.num != 0 && right_child->cg.cyc.num != 0
279 && right_parent->cg.cyc.num == right_child->cg.cyc.num)
281 /* Right is a call within a cycle. */
282 return GREATERTHAN;
284 else
286 /* Neither is a call within a cycle. */
287 left_time = left->time + left->child_time;
288 right_time = right->time + right->child_time;
290 if (left_time < right_time)
291 return LESSTHAN;
293 if (left_time > right_time)
294 return GREATERTHAN;
296 if (left->count < right->count)
297 return LESSTHAN;
299 if (left->count > right->count)
300 return GREATERTHAN;
302 return EQUALTO;
308 static void
309 sort_parents (Sym * child)
311 Arc *arc, *detached, sorted, *prev;
313 /* Unlink parents from child, then insertion sort back on to
314 sorted's parents.
315 *arc the arc you have detached and are inserting.
316 *detached the rest of the arcs to be sorted.
317 sorted arc list onto which you insertion sort.
318 *prev arc before the arc you are comparing. */
319 sorted.next_parent = 0;
321 for (arc = child->cg.parents; arc; arc = detached)
323 detached = arc->next_parent;
325 /* Consider *arc as disconnected; insert it into sorted. */
326 for (prev = &sorted; prev->next_parent; prev = prev->next_parent)
328 if (cmp_arc (arc, prev->next_parent) != GREATERTHAN)
329 break;
332 arc->next_parent = prev->next_parent;
333 prev->next_parent = arc;
336 /* Reattach sorted arcs to child. */
337 child->cg.parents = sorted.next_parent;
341 static void
342 print_parents (Sym *child)
344 Sym *parent;
345 Arc *arc;
346 Sym *cycle_head;
348 if (child->cg.cyc.head != 0)
349 cycle_head = child->cg.cyc.head;
350 else
351 cycle_head = child;
353 if (!child->cg.parents)
355 printf (bsd_style_output
356 ? _("%6.6s %5.5s %7.7s %11.11s %7.7s %7.7s <spontaneous>\n")
357 : _("%6.6s %5.5s %7.7s %7.7s %7.7s %7.7s <spontaneous>\n"),
358 "", "", "", "", "", "");
359 return;
362 sort_parents (child);
364 for (arc = child->cg.parents; arc; arc = arc->next_parent)
366 parent = arc->parent;
367 if (child == parent || (child->cg.cyc.num != 0
368 && parent->cg.cyc.num == child->cg.cyc.num))
370 /* Selfcall or call among siblings. */
371 printf (bsd_style_output
372 ? "%6.6s %5.5s %7.7s %11.11s %7lu %7.7s "
373 : "%6.6s %5.5s %7.7s %7.7s %7lu %7.7s ",
374 "", "", "", "",
375 arc->count, "");
376 print_name (parent);
377 printf ("\n");
379 else
381 /* Regular parent of child. */
382 printf (bsd_style_output
383 ? "%6.6s %5.5s %7.2f %11.2f %7lu/%-7lu "
384 : "%6.6s %5.5s %7.2f %7.2f %7lu/%-7lu ",
385 "", "",
386 arc->time / hz, arc->child_time / hz,
387 arc->count, cycle_head->ncalls);
388 print_name (parent);
389 printf ("\n");
395 static void
396 sort_children (Sym *parent)
398 Arc *arc, *detached, sorted, *prev;
400 /* Unlink children from parent, then insertion sort back on to
401 sorted's children.
402 *arc the arc you have detached and are inserting.
403 *detached the rest of the arcs to be sorted.
404 sorted arc list onto which you insertion sort.
405 *prev arc before the arc you are comparing. */
406 sorted.next_child = 0;
408 for (arc = parent->cg.children; arc; arc = detached)
410 detached = arc->next_child;
412 /* Consider *arc as disconnected; insert it into sorted. */
413 for (prev = &sorted; prev->next_child; prev = prev->next_child)
415 if (cmp_arc (arc, prev->next_child) != LESSTHAN)
416 break;
419 arc->next_child = prev->next_child;
420 prev->next_child = arc;
423 /* Reattach sorted children to parent. */
424 parent->cg.children = sorted.next_child;
428 static void
429 print_children (Sym *parent)
431 Sym *child;
432 Arc *arc;
434 sort_children (parent);
435 arc = parent->cg.children;
437 for (arc = parent->cg.children; arc; arc = arc->next_child)
439 child = arc->child;
440 if (child == parent || (child->cg.cyc.num != 0
441 && child->cg.cyc.num == parent->cg.cyc.num))
443 /* Self call or call to sibling. */
444 printf (bsd_style_output
445 ? "%6.6s %5.5s %7.7s %11.11s %7lu %7.7s "
446 : "%6.6s %5.5s %7.7s %7.7s %7lu %7.7s ",
447 "", "", "", "", arc->count, "");
448 print_name (child);
449 printf ("\n");
451 else
453 /* Regular child of parent. */
454 printf (bsd_style_output
455 ? "%6.6s %5.5s %7.2f %11.2f %7lu/%-7lu "
456 : "%6.6s %5.5s %7.2f %7.2f %7lu/%-7lu ",
457 "", "",
458 arc->time / hz, arc->child_time / hz,
459 arc->count, child->cg.cyc.head->ncalls);
460 print_name (child);
461 printf ("\n");
467 static void
468 print_line (Sym *np)
470 char buf[BUFSIZ];
472 sprintf (buf, "[%d]", np->cg.index);
473 printf (bsd_style_output
474 ? "%-6.6s %5.1f %7.2f %11.2f"
475 : "%-6.6s %5.1f %7.2f %7.2f", buf,
476 100 * (np->cg.prop.self + np->cg.prop.child) / print_time,
477 np->cg.prop.self / hz, np->cg.prop.child / hz);
479 if ((np->ncalls + np->cg.self_calls) != 0)
481 printf (" %7lu", np->ncalls);
483 if (np->cg.self_calls != 0)
484 printf ("+%-7lu ", np->cg.self_calls);
485 else
486 printf (" %7.7s ", "");
488 else
490 printf (" %7.7s %7.7s ", "", "");
493 print_name (np);
494 printf ("\n");
498 /* Print dynamic call graph. */
500 void
501 cg_print (Sym ** timesortsym)
503 unsigned int sym_index;
504 Sym *parent;
506 if (print_descriptions && bsd_style_output)
507 bsd_callg_blurb (stdout);
509 print_header ();
511 for (sym_index = 0; sym_index < symtab.len + num_cycles; ++sym_index)
513 parent = timesortsym[sym_index];
515 if ((ignore_zeros && parent->ncalls == 0
516 && parent->cg.self_calls == 0 && parent->cg.prop.self == 0
517 && parent->cg.prop.child == 0)
518 || !parent->cg.print_flag
519 || (line_granularity && ! parent->is_func))
520 continue;
522 if (!parent->name && parent->cg.cyc.num != 0)
524 /* Cycle header. */
525 print_cycle (parent);
526 print_members (parent);
528 else
530 print_parents (parent);
531 print_line (parent);
532 print_children (parent);
535 if (bsd_style_output)
536 printf ("\n");
538 printf ("-----------------------------------------------\n");
540 if (bsd_style_output)
541 printf ("\n");
544 free (timesortsym);
546 if (print_descriptions && !bsd_style_output)
547 fsf_callg_blurb (stdout);
551 static int
552 cmp_name (const PTR left, const PTR right)
554 const Sym **npp1 = (const Sym **) left;
555 const Sym **npp2 = (const Sym **) right;
557 return strcmp ((*npp1)->name, (*npp2)->name);
561 void
562 cg_print_index ()
564 unsigned int sym_index;
565 unsigned int nnames, todo, i, j;
566 int col, starting_col;
567 Sym **name_sorted_syms, *sym;
568 const char *filename;
569 char buf[20];
570 int column_width = (output_width - 1) / 3; /* Don't write in last col! */
572 /* Now, sort regular function name
573 alphabetically to create an index. */
574 name_sorted_syms = (Sym **) xmalloc ((symtab.len + num_cycles) * sizeof (Sym *));
576 for (sym_index = 0, nnames = 0; sym_index < symtab.len; sym_index++)
578 if (ignore_zeros && symtab.base[sym_index].ncalls == 0
579 && symtab.base[sym_index].hist.time == 0)
580 continue;
582 name_sorted_syms[nnames++] = &symtab.base[sym_index];
585 qsort (name_sorted_syms, nnames, sizeof (Sym *), cmp_name);
587 for (sym_index = 1, todo = nnames; sym_index <= num_cycles; sym_index++)
588 name_sorted_syms[todo++] = &cycle_header[sym_index];
590 printf ("\f\n");
591 printf (_("Index by function name\n\n"));
592 sym_index = (todo + 2) / 3;
594 for (i = 0; i < sym_index; i++)
596 col = 0;
597 starting_col = 0;
599 for (j = i; j < todo; j += sym_index)
601 sym = name_sorted_syms[j];
603 if (sym->cg.print_flag)
604 sprintf (buf, "[%d]", sym->cg.index);
605 else
606 sprintf (buf, "(%d)", sym->cg.index);
608 if (j < nnames)
610 if (bsd_style_output)
612 printf ("%6.6s %-19.19s", buf, sym->name);
614 else
616 col += strlen (buf);
618 for (; col < starting_col + 5; ++col)
619 putchar (' ');
621 printf (" %s ", buf);
622 col += print_name_only (sym);
624 if (!line_granularity && sym->is_static && sym->file)
626 filename = sym->file->name;
628 if (!print_path)
630 filename = strrchr (filename, '/');
632 if (filename)
633 ++filename;
634 else
635 filename = sym->file->name;
638 printf (" (%s)", filename);
639 col += strlen (filename) + 3;
643 else
645 if (bsd_style_output)
647 printf ("%6.6s ", buf);
648 sprintf (buf, _("<cycle %d>"), sym->cg.cyc.num);
649 printf ("%-19.19s", buf);
651 else
653 col += strlen (buf);
654 for (; col < starting_col + 5; ++col)
655 putchar (' ');
656 printf (" %s ", buf);
657 sprintf (buf, _("<cycle %d>"), sym->cg.cyc.num);
658 printf ("%s", buf);
659 col += strlen (buf);
663 starting_col += column_width;
666 printf ("\n");
669 free (name_sorted_syms);
672 /* Compare two arcs based on their usage counts.
673 We want to sort in descending order. */
675 static int
676 cmp_arc_count (const PTR left, const PTR right)
678 const Arc **npp1 = (const Arc **) left;
679 const Arc **npp2 = (const Arc **) right;
681 if ((*npp1)->count > (*npp2)->count)
682 return -1;
683 else if ((*npp1)->count < (*npp2)->count)
684 return 1;
685 else
686 return 0;
689 /* Compare two funtions based on their usage counts.
690 We want to sort in descending order. */
692 static int
693 cmp_fun_nuses (const PTR left, const PTR right)
695 const Sym **npp1 = (const Sym **) left;
696 const Sym **npp2 = (const Sym **) right;
698 if ((*npp1)->nuses > (*npp2)->nuses)
699 return -1;
700 else if ((*npp1)->nuses < (*npp2)->nuses)
701 return 1;
702 else
703 return 0;
706 /* Print a suggested function ordering based on the profiling data.
708 We perform 4 major steps when ordering functions:
710 * Group unused functions together and place them at the
711 end of the function order.
713 * Search the highest use arcs (those which account for 90% of
714 the total arc count) for functions which have several parents.
716 Group those with the most call sites together (currently the
717 top 1.25% which have at least five different call sites).
719 These are emitted at the start of the function order.
721 * Use a greedy placement algorithm to place functions which
722 occur in the top 99% of the arcs in the profile. Some provisions
723 are made to handle high usage arcs where the parent and/or
724 child has already been placed.
726 * Run the same greedy placement algorithm on the remaining
727 arcs to place the leftover functions.
730 The various "magic numbers" should (one day) be tuneable by command
731 line options. They were arrived at by benchmarking a few applications
732 with various values to see which values produced better overall function
733 orderings.
735 Of course, profiling errors, machine limitations (PA long calls), and
736 poor cutoff values for the placement algorithm may limit the usefullness
737 of the resulting function order. Improvements would be greatly appreciated.
739 Suggestions:
741 * Place the functions with many callers near the middle of the
742 list to reduce long calls.
744 * Propagate arc usage changes as functions are placed. Ie if
745 func1 and func2 are placed together, arcs to/from those arcs
746 to the same parent/child should be combined, then resort the
747 arcs to choose the next one.
749 * Implement some global positioning algorithm to place the
750 chains made by the greedy local positioning algorithm. Probably
751 by examining arcs which haven't been placed yet to tie two
752 chains together.
754 * Take a function's size and time into account in the algorithm;
755 size in particular is important on the PA (long calls). Placing
756 many small functions onto their own page may be wise.
758 * Use better profiling information; many published algorithms
759 are based on call sequences through time, rather than just
760 arc counts.
762 * Prodecure cloning could improve performance when a small number
763 of arcs account for most of the calls to a particular function.
765 * Use relocation information to avoid moving unused functions
766 completely out of the code stream; this would avoid severe lossage
767 when the profile data bears little resemblance to actual runs.
769 * Propagation of arc usages should also improve .o link line
770 ordering which shares the same arc placement algorithm with
771 the function ordering code (in fact it is a degenerate case
772 of function ordering). */
774 void
775 cg_print_function_ordering (void)
777 unsigned long sym_index;
778 unsigned long arc_index;
779 unsigned long used, unused, scratch_index;
780 unsigned long unplaced_arc_count, high_arc_count, scratch_arc_count;
781 #ifdef __GNUC__
782 unsigned long long total_arcs, tmp_arcs_count;
783 #else
784 unsigned long total_arcs, tmp_arcs_count;
785 #endif
786 Sym **unused_syms, **used_syms, **scratch_syms;
787 Arc **unplaced_arcs, **high_arcs, **scratch_arcs;
789 sym_index = 0;
790 used = 0;
791 unused = 0;
792 scratch_index = 0;
793 unplaced_arc_count = 0;
794 high_arc_count = 0;
795 scratch_arc_count = 0;
797 /* First group all the unused functions together. */
798 unused_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
799 used_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
800 scratch_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
801 high_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
802 scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
803 unplaced_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
805 /* Walk through all the functions; mark those which are never
806 called as placed (we'll emit them as a group later). */
807 for (sym_index = 0, used = 0, unused = 0; sym_index < symtab.len; sym_index++)
809 if (symtab.base[sym_index].ncalls == 0)
811 unused_syms[unused++] = &symtab.base[sym_index];
812 symtab.base[sym_index].has_been_placed = 1;
814 else
816 used_syms[used++] = &symtab.base[sym_index];
817 symtab.base[sym_index].has_been_placed = 0;
818 symtab.base[sym_index].next = 0;
819 symtab.base[sym_index].prev = 0;
820 symtab.base[sym_index].nuses = 0;
824 /* Sort the arcs from most used to least used. */
825 qsort (arcs, numarcs, sizeof (Arc *), cmp_arc_count);
827 /* Compute the total arc count. Also mark arcs as unplaced.
829 Note we don't compensate for overflow if that happens!
830 Overflow is much less likely when this file is compiled
831 with GCC as it can double-wide integers via long long. */
832 total_arcs = 0;
833 for (arc_index = 0; arc_index < numarcs; arc_index++)
835 total_arcs += arcs[arc_index]->count;
836 arcs[arc_index]->has_been_placed = 0;
839 /* We want to pull out those functions which are referenced
840 by many highly used arcs and emit them as a group. This
841 could probably use some tuning. */
842 tmp_arcs_count = 0;
843 for (arc_index = 0; arc_index < numarcs; arc_index++)
845 tmp_arcs_count += arcs[arc_index]->count;
847 /* Count how many times each parent and child are used up
848 to our threshhold of arcs (90%). */
849 if ((double)tmp_arcs_count / (double)total_arcs > 0.90)
850 break;
852 arcs[arc_index]->child->nuses++;
855 /* Now sort a temporary symbol table based on the number of
856 times each function was used in the highest used arcs. */
857 memcpy (scratch_syms, used_syms, used * sizeof (Sym *));
858 qsort (scratch_syms, used, sizeof (Sym *), cmp_fun_nuses);
860 /* Now pick out those symbols we're going to emit as
861 a group. We take up to 1.25% of the used symbols. */
862 for (sym_index = 0; sym_index < used / 80; sym_index++)
864 Sym *sym = scratch_syms[sym_index];
865 Arc *arc;
867 /* If we hit symbols that aren't used from many call sites,
868 then we can quit. We choose five as the low limit for
869 no particular reason. */
870 if (sym->nuses == 5)
871 break;
873 /* We're going to need the arcs between these functions.
874 Unfortunately, we don't know all these functions
875 until we're done. So we keep track of all the arcs
876 to the functions we care about, then prune out those
877 which are uninteresting.
879 An interesting variation would be to quit when we found
880 multi-call site functions which account for some percentage
881 of the arcs. */
882 arc = sym->cg.children;
884 while (arc)
886 if (arc->parent != arc->child)
887 scratch_arcs[scratch_arc_count++] = arc;
888 arc->has_been_placed = 1;
889 arc = arc->next_child;
892 arc = sym->cg.parents;
894 while (arc)
896 if (arc->parent != arc->child)
897 scratch_arcs[scratch_arc_count++] = arc;
898 arc->has_been_placed = 1;
899 arc = arc->next_parent;
902 /* Keep track of how many symbols we're going to place. */
903 scratch_index = sym_index;
905 /* A lie, but it makes identifying
906 these functions easier later. */
907 sym->has_been_placed = 1;
910 /* Now walk through the temporary arcs and copy
911 those we care about into the high arcs array. */
912 for (arc_index = 0; arc_index < scratch_arc_count; arc_index++)
914 Arc *arc = scratch_arcs[arc_index];
916 /* If this arc refers to highly used functions, then
917 then we want to keep it. */
918 if (arc->child->has_been_placed
919 && arc->parent->has_been_placed)
921 high_arcs[high_arc_count++] = scratch_arcs[arc_index];
923 /* We need to turn of has_been_placed since we're going to
924 use the main arc placement algorithm on these arcs. */
925 arc->child->has_been_placed = 0;
926 arc->parent->has_been_placed = 0;
930 /* Dump the multi-site high usage functions which are not
931 going to be ordered by the main ordering algorithm. */
932 for (sym_index = 0; sym_index < scratch_index; sym_index++)
934 if (scratch_syms[sym_index]->has_been_placed)
935 printf ("%s\n", scratch_syms[sym_index]->name);
938 /* Now we can order the multi-site high use
939 functions based on the arcs between them. */
940 qsort (high_arcs, high_arc_count, sizeof (Arc *), cmp_arc_count);
941 order_and_dump_functions_by_arcs (high_arcs, high_arc_count, 1,
942 unplaced_arcs, &unplaced_arc_count);
944 /* Order and dump the high use functions left,
945 these typically have only a few call sites. */
946 order_and_dump_functions_by_arcs (arcs, numarcs, 0,
947 unplaced_arcs, &unplaced_arc_count);
949 /* Now place the rarely used functions. */
950 order_and_dump_functions_by_arcs (unplaced_arcs, unplaced_arc_count, 1,
951 scratch_arcs, &scratch_arc_count);
953 /* Output any functions not emitted by the order_and_dump calls. */
954 for (sym_index = 0; sym_index < used; sym_index++)
955 if (used_syms[sym_index]->has_been_placed == 0)
956 printf("%s\n", used_syms[sym_index]->name);
958 /* Output the unused functions. */
959 for (sym_index = 0; sym_index < unused; sym_index++)
960 printf("%s\n", unused_syms[sym_index]->name);
962 unused_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
963 used_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
964 scratch_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
965 high_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
966 scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
967 unplaced_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
969 free (unused_syms);
970 free (used_syms);
971 free (scratch_syms);
972 free (high_arcs);
973 free (scratch_arcs);
974 free (unplaced_arcs);
977 /* Place functions based on the arcs in THE_ARCS with ARC_COUNT entries;
978 place unused arcs into UNPLACED_ARCS/UNPLACED_ARC_COUNT.
980 If ALL is nonzero, then place all functions referenced by THE_ARCS,
981 else only place those referenced in the top 99% of the arcs in THE_ARCS. */
983 #define MOST 0.99
984 static void
985 order_and_dump_functions_by_arcs (the_arcs, arc_count, all,
986 unplaced_arcs, unplaced_arc_count)
987 Arc **the_arcs;
988 unsigned long arc_count;
989 int all;
990 Arc **unplaced_arcs;
991 unsigned long *unplaced_arc_count;
993 #ifdef __GNUC__
994 unsigned long long tmp_arcs, total_arcs;
995 #else
996 unsigned long tmp_arcs, total_arcs;
997 #endif
998 unsigned int arc_index;
1000 /* If needed, compute the total arc count.
1002 Note we don't compensate for overflow if that happens! */
1003 if (! all)
1005 total_arcs = 0;
1006 for (arc_index = 0; arc_index < arc_count; arc_index++)
1007 total_arcs += the_arcs[arc_index]->count;
1009 else
1010 total_arcs = 0;
1012 tmp_arcs = 0;
1014 for (arc_index = 0; arc_index < arc_count; arc_index++)
1016 Sym *sym1, *sym2;
1017 Sym *child, *parent;
1019 tmp_arcs += the_arcs[arc_index]->count;
1021 /* Ignore this arc if it's already been placed. */
1022 if (the_arcs[arc_index]->has_been_placed)
1023 continue;
1025 child = the_arcs[arc_index]->child;
1026 parent = the_arcs[arc_index]->parent;
1028 /* If we're not using all arcs, and this is a rarely used
1029 arc, then put it on the unplaced_arc list. Similarly
1030 if both the parent and child of this arc have been placed. */
1031 if ((! all && (double)tmp_arcs / (double)total_arcs > MOST)
1032 || child->has_been_placed || parent->has_been_placed)
1034 unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[arc_index];
1035 continue;
1038 /* If all slots in the parent and child are full, then there isn't
1039 anything we can do right now. We'll place this arc on the
1040 unplaced arc list in the hope that a global positioning
1041 algorithm can use it to place function chains. */
1042 if (parent->next && parent->prev && child->next && child->prev)
1044 unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[arc_index];
1045 continue;
1048 /* If the parent is unattached, then find the closest
1049 place to attach it onto child's chain. Similarly
1050 for the opposite case. */
1051 if (!parent->next && !parent->prev)
1053 int next_count = 0;
1054 int prev_count = 0;
1055 Sym *prev = child;
1056 Sym *next = child;
1058 /* Walk to the beginning and end of the child's chain. */
1059 while (next->next)
1061 next = next->next;
1062 next_count++;
1065 while (prev->prev)
1067 prev = prev->prev;
1068 prev_count++;
1071 /* Choose the closest. */
1072 child = next_count < prev_count ? next : prev;
1074 else if (! child->next && !child->prev)
1076 int next_count = 0;
1077 int prev_count = 0;
1078 Sym *prev = parent;
1079 Sym *next = parent;
1081 while (next->next)
1083 next = next->next;
1084 next_count++;
1087 while (prev->prev)
1089 prev = prev->prev;
1090 prev_count++;
1093 parent = prev_count < next_count ? prev : next;
1095 else
1097 /* Couldn't find anywhere to attach the functions,
1098 put the arc on the unplaced arc list. */
1099 unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[arc_index];
1100 continue;
1103 /* Make sure we don't tie two ends together. */
1104 sym1 = parent;
1105 if (sym1->next)
1106 while (sym1->next)
1107 sym1 = sym1->next;
1108 else
1109 while (sym1->prev)
1110 sym1 = sym1->prev;
1112 sym2 = child;
1113 if (sym2->next)
1114 while (sym2->next)
1115 sym2 = sym2->next;
1116 else
1117 while (sym2->prev)
1118 sym2 = sym2->prev;
1120 if (sym1 == child
1121 && sym2 == parent)
1123 /* This would tie two ends together. */
1124 unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[arc_index];
1125 continue;
1128 if (parent->next)
1130 /* Must attach to the parent's prev field. */
1131 if (! child->next)
1133 /* parent-prev and child-next */
1134 parent->prev = child;
1135 child->next = parent;
1136 the_arcs[arc_index]->has_been_placed = 1;
1139 else if (parent->prev)
1141 /* Must attach to the parent's next field. */
1142 if (! child->prev)
1144 /* parent-next and child-prev */
1145 parent->next = child;
1146 child->prev = parent;
1147 the_arcs[arc_index]->has_been_placed = 1;
1150 else
1152 /* Can attach to either field in the parent, depends
1153 on where we've got space in the child. */
1154 if (child->prev)
1156 /* parent-prev and child-next. */
1157 parent->prev = child;
1158 child->next = parent;
1159 the_arcs[arc_index]->has_been_placed = 1;
1161 else
1163 /* parent-next and child-prev. */
1164 parent->next = child;
1165 child->prev = parent;
1166 the_arcs[arc_index]->has_been_placed = 1;
1171 /* Dump the chains of functions we've made. */
1172 for (arc_index = 0; arc_index < arc_count; arc_index++)
1174 Sym *sym;
1175 if (the_arcs[arc_index]->parent->has_been_placed
1176 || the_arcs[arc_index]->child->has_been_placed)
1177 continue;
1179 sym = the_arcs[arc_index]->parent;
1181 /* If this symbol isn't attached to any other
1182 symbols, then we've got a rarely used arc.
1184 Skip it for now, we'll deal with them later. */
1185 if (sym->next == NULL
1186 && sym->prev == NULL)
1187 continue;
1189 /* Get to the start of this chain. */
1190 while (sym->prev)
1191 sym = sym->prev;
1193 while (sym)
1195 /* Mark it as placed. */
1196 sym->has_been_placed = 1;
1197 printf ("%s\n", sym->name);
1198 sym = sym->next;
1202 /* If we want to place all the arcs, then output
1203 those which weren't placed by the main algorithm. */
1204 if (all)
1205 for (arc_index = 0; arc_index < arc_count; arc_index++)
1207 Sym *sym;
1208 if (the_arcs[arc_index]->parent->has_been_placed
1209 || the_arcs[arc_index]->child->has_been_placed)
1210 continue;
1212 sym = the_arcs[arc_index]->parent;
1214 sym->has_been_placed = 1;
1215 printf ("%s\n", sym->name);
1219 /* Compare two function_map structs based on file name.
1220 We want to sort in ascending order. */
1222 static int
1223 cmp_symbol_map (const void * l, const void * r)
1225 return filename_cmp (((struct function_map *) l)->file_name,
1226 ((struct function_map *) r)->file_name);
1229 /* Print a suggested .o ordering for files on a link line based
1230 on profiling information. This uses the function placement
1231 code for the bulk of its work. */
1233 void
1234 cg_print_file_ordering (void)
1236 unsigned long scratch_arc_count;
1237 unsigned long arc_index;
1238 unsigned long sym_index;
1239 Arc **scratch_arcs;
1240 char *last;
1242 scratch_arc_count = 0;
1244 scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
1245 for (arc_index = 0; arc_index < numarcs; arc_index++)
1247 if (! arcs[arc_index]->parent->mapped
1248 || ! arcs[arc_index]->child->mapped)
1249 arcs[arc_index]->has_been_placed = 1;
1252 order_and_dump_functions_by_arcs (arcs, numarcs, 0,
1253 scratch_arcs, &scratch_arc_count);
1255 /* Output .o's not handled by the main placement algorithm. */
1256 for (sym_index = 0; sym_index < symtab.len; sym_index++)
1258 if (symtab.base[sym_index].mapped
1259 && ! symtab.base[sym_index].has_been_placed)
1260 printf ("%s\n", symtab.base[sym_index].name);
1263 qsort (symbol_map, symbol_map_count, sizeof (struct function_map), cmp_symbol_map);
1265 /* Now output any .o's that didn't have any text symbols. */
1266 last = NULL;
1267 for (sym_index = 0; sym_index < symbol_map_count; sym_index++)
1269 unsigned int index2;
1271 /* Don't bother searching if this symbol
1272 is the same as the previous one. */
1273 if (last && !filename_cmp (last, symbol_map[sym_index].file_name))
1274 continue;
1276 for (index2 = 0; index2 < symtab.len; index2++)
1278 if (! symtab.base[index2].mapped)
1279 continue;
1281 if (!filename_cmp (symtab.base[index2].name,
1282 symbol_map[sym_index].file_name))
1283 break;
1286 /* If we didn't find it in the symbol table, then it must
1287 be a .o with no text symbols. Output it last. */
1288 if (index2 == symtab.len)
1289 printf ("%s\n", symbol_map[sym_index].file_name);
1290 last = symbol_map[sym_index].file_name;