Fix "pushw imm16" for x86-64 disassembler.
[binutils.git] / gprof / cg_print.c
blobc1cb400681bc184d684a4b49dfb5f8a96b4b71d4
1 /* cg_print.c - Print routines for displaying call graphs.
3 Copyright 2000, 2001, 2002, 2004, 2007, 2009
4 Free Software Foundation, Inc.
6 This file is part of GNU Binutils.
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
21 02110-1301, USA. */
23 #include "gprof.h"
24 #include "libiberty.h"
25 #include "search_list.h"
26 #include "source.h"
27 #include "symtab.h"
28 #include "cg_arcs.h"
29 #include "cg_print.h"
30 #include "hist.h"
31 #include "utils.h"
32 #include "corefile.h"
34 /* Return value of comparison functions used to sort tables. */
35 #define LESSTHAN -1
36 #define EQUALTO 0
37 #define GREATERTHAN 1
39 static void print_header (void);
40 static void print_cycle (Sym *);
41 static int cmp_member (Sym *, Sym *);
42 static void sort_members (Sym *);
43 static void print_members (Sym *);
44 static int cmp_arc (Arc *, Arc *);
45 static void sort_parents (Sym *);
46 static void print_parents (Sym *);
47 static void sort_children (Sym *);
48 static void print_children (Sym *);
49 static void print_line (Sym *);
50 static int cmp_name (const PTR, const PTR);
51 static int cmp_arc_count (const PTR, const PTR);
52 static int cmp_fun_nuses (const PTR, const PTR);
53 static void order_and_dump_functions_by_arcs
54 (Arc **, unsigned long, int, Arc **, unsigned long *);
56 /* Declarations of automatically generated functions to output blurbs. */
57 extern void bsd_callg_blurb (FILE * fp);
58 extern void fsf_callg_blurb (FILE * fp);
60 double print_time = 0.0;
63 static void
64 print_header ()
66 if (first_output)
67 first_output = FALSE;
68 else
69 printf ("\f\n");
71 if (!bsd_style_output)
73 if (print_descriptions)
74 printf (_("\t\t Call graph (explanation follows)\n\n"));
75 else
76 printf (_("\t\t\tCall graph\n\n"));
79 printf (_("\ngranularity: each sample hit covers %ld byte(s)"),
80 (long) hist_scale * (long) sizeof (UNIT));
82 if (print_time > 0.0)
83 printf (_(" for %.2f%% of %.2f seconds\n\n"),
84 100.0 / print_time, print_time / hz);
85 else
87 printf (_(" no time propagated\n\n"));
89 /* This doesn't hurt, since all the numerators will be 0.0. */
90 print_time = 1.0;
93 if (bsd_style_output)
95 printf ("%6.6s %5.5s %7.7s %11.11s %7.7s/%-7.7s %-8.8s\n",
96 "", "", "", "", _("called"), _("total"), _("parents"));
97 printf ("%-6.6s %5.5s %7.7s %11.11s %7.7s+%-7.7s %-8.8s\t%5.5s\n",
98 _("index"), _("%time"), _("self"), _("descendants"),
99 _("called"), _("self"), _("name"), _("index"));
100 printf ("%6.6s %5.5s %7.7s %11.11s %7.7s/%-7.7s %-8.8s\n",
101 "", "", "", "", _("called"), _("total"), _("children"));
102 printf ("\n");
104 else
106 printf (_("index %% time self children called name\n"));
110 /* Print a cycle header. */
112 static void
113 print_cycle (Sym *cyc)
115 char buf[BUFSIZ];
117 sprintf (buf, "[%d]", cyc->cg.index);
118 printf (bsd_style_output
119 ? "%-6.6s %5.1f %7.2f %11.2f %7lu"
120 : "%-6.6s %5.1f %7.2f %7.2f %7lu", buf,
121 100 * (cyc->cg.prop.self + cyc->cg.prop.child) / print_time,
122 cyc->cg.prop.self / hz, cyc->cg.prop.child / hz, cyc->ncalls);
124 if (cyc->cg.self_calls != 0)
125 printf ("+%-7lu", cyc->cg.self_calls);
126 else
127 printf (" %7.7s", "");
129 printf (_(" <cycle %d as a whole> [%d]\n"), cyc->cg.cyc.num, cyc->cg.index);
132 /* Compare LEFT and RIGHT membmer. Major comparison key is
133 CG.PROP.SELF+CG.PROP.CHILD, secondary key is NCALLS+CG.SELF_CALLS. */
135 static int
136 cmp_member (Sym *left, Sym *right)
138 double left_time = left->cg.prop.self + left->cg.prop.child;
139 double right_time = right->cg.prop.self + right->cg.prop.child;
140 unsigned long left_calls = left->ncalls + left->cg.self_calls;
141 unsigned long right_calls = right->ncalls + right->cg.self_calls;
143 if (left_time > right_time)
144 return GREATERTHAN;
146 if (left_time < right_time)
147 return LESSTHAN;
149 if (left_calls > right_calls)
150 return GREATERTHAN;
152 if (left_calls < right_calls)
153 return LESSTHAN;
155 return EQUALTO;
158 /* Sort members of a cycle. */
160 static void
161 sort_members (Sym *cyc)
163 Sym *todo, *doing, *prev;
165 /* Detach cycle members from cyclehead,
166 and insertion sort them back on. */
167 todo = cyc->cg.cyc.next;
168 cyc->cg.cyc.next = 0;
170 for (doing = todo; doing != NULL; doing = todo)
172 todo = doing->cg.cyc.next;
174 for (prev = cyc; prev->cg.cyc.next; prev = prev->cg.cyc.next)
176 if (cmp_member (doing, prev->cg.cyc.next) == GREATERTHAN)
177 break;
180 doing->cg.cyc.next = prev->cg.cyc.next;
181 prev->cg.cyc.next = doing;
185 /* Print the members of a cycle. */
187 static void
188 print_members (Sym *cyc)
190 Sym *member;
192 sort_members (cyc);
194 for (member = cyc->cg.cyc.next; member; member = member->cg.cyc.next)
196 printf (bsd_style_output
197 ? "%6.6s %5.5s %7.2f %11.2f %7lu"
198 : "%6.6s %5.5s %7.2f %7.2f %7lu",
199 "", "", member->cg.prop.self / hz, member->cg.prop.child / hz,
200 member->ncalls);
202 if (member->cg.self_calls != 0)
203 printf ("+%-7lu", member->cg.self_calls);
204 else
205 printf (" %7.7s", "");
207 printf (" ");
208 print_name (member);
209 printf ("\n");
213 /* Compare two arcs to/from the same child/parent.
214 - if one arc is a self arc, it's least.
215 - if one arc is within a cycle, it's less than.
216 - if both arcs are within a cycle, compare arc counts.
217 - if neither arc is within a cycle, compare with
218 time + child_time as major key
219 arc count as minor key. */
221 static int
222 cmp_arc (Arc *left, Arc *right)
224 Sym *left_parent = left->parent;
225 Sym *left_child = left->child;
226 Sym *right_parent = right->parent;
227 Sym *right_child = right->child;
228 double left_time, right_time;
230 DBG (TIMEDEBUG,
231 printf ("[cmp_arc] ");
232 print_name (left_parent);
233 printf (" calls ");
234 print_name (left_child);
235 printf (" %f + %f %lu/%lu\n", left->time, left->child_time,
236 left->count, left_child->ncalls);
237 printf ("[cmp_arc] ");
238 print_name (right_parent);
239 printf (" calls ");
240 print_name (right_child);
241 printf (" %f + %f %lu/%lu\n", right->time, right->child_time,
242 right->count, right_child->ncalls);
243 printf ("\n");
246 if (left_parent == left_child)
247 return LESSTHAN; /* Left is a self call. */
249 if (right_parent == right_child)
250 return GREATERTHAN; /* Right is a self call. */
252 if (left_parent->cg.cyc.num != 0 && left_child->cg.cyc.num != 0
253 && left_parent->cg.cyc.num == left_child->cg.cyc.num)
255 /* Left is a call within a cycle. */
256 if (right_parent->cg.cyc.num != 0 && right_child->cg.cyc.num != 0
257 && right_parent->cg.cyc.num == right_child->cg.cyc.num)
259 /* Right is a call within the cycle, too. */
260 if (left->count < right->count)
261 return LESSTHAN;
263 if (left->count > right->count)
264 return GREATERTHAN;
266 return EQUALTO;
268 else
270 /* Right isn't a call within the cycle. */
271 return LESSTHAN;
274 else
276 /* Left isn't a call within a cycle. */
277 if (right_parent->cg.cyc.num != 0 && right_child->cg.cyc.num != 0
278 && right_parent->cg.cyc.num == right_child->cg.cyc.num)
280 /* Right is a call within a cycle. */
281 return GREATERTHAN;
283 else
285 /* Neither is a call within a cycle. */
286 left_time = left->time + left->child_time;
287 right_time = right->time + right->child_time;
289 if (left_time < right_time)
290 return LESSTHAN;
292 if (left_time > right_time)
293 return GREATERTHAN;
295 if (left->count < right->count)
296 return LESSTHAN;
298 if (left->count > right->count)
299 return GREATERTHAN;
301 return EQUALTO;
307 static void
308 sort_parents (Sym * child)
310 Arc *arc, *detached, sorted, *prev;
312 /* Unlink parents from child, then insertion sort back on to
313 sorted's parents.
314 *arc the arc you have detached and are inserting.
315 *detached the rest of the arcs to be sorted.
316 sorted arc list onto which you insertion sort.
317 *prev arc before the arc you are comparing. */
318 sorted.next_parent = 0;
320 for (arc = child->cg.parents; arc; arc = detached)
322 detached = arc->next_parent;
324 /* Consider *arc as disconnected; insert it into sorted. */
325 for (prev = &sorted; prev->next_parent; prev = prev->next_parent)
327 if (cmp_arc (arc, prev->next_parent) != GREATERTHAN)
328 break;
331 arc->next_parent = prev->next_parent;
332 prev->next_parent = arc;
335 /* Reattach sorted arcs to child. */
336 child->cg.parents = sorted.next_parent;
340 static void
341 print_parents (Sym *child)
343 Sym *parent;
344 Arc *arc;
345 Sym *cycle_head;
347 if (child->cg.cyc.head != 0)
348 cycle_head = child->cg.cyc.head;
349 else
350 cycle_head = child;
352 if (!child->cg.parents)
354 printf (bsd_style_output
355 ? _("%6.6s %5.5s %7.7s %11.11s %7.7s %7.7s <spontaneous>\n")
356 : _("%6.6s %5.5s %7.7s %7.7s %7.7s %7.7s <spontaneous>\n"),
357 "", "", "", "", "", "");
358 return;
361 sort_parents (child);
363 for (arc = child->cg.parents; arc; arc = arc->next_parent)
365 parent = arc->parent;
366 if (child == parent || (child->cg.cyc.num != 0
367 && parent->cg.cyc.num == child->cg.cyc.num))
369 /* Selfcall or call among siblings. */
370 printf (bsd_style_output
371 ? "%6.6s %5.5s %7.7s %11.11s %7lu %7.7s "
372 : "%6.6s %5.5s %7.7s %7.7s %7lu %7.7s ",
373 "", "", "", "",
374 arc->count, "");
375 print_name (parent);
376 printf ("\n");
378 else
380 /* Regular parent of child. */
381 printf (bsd_style_output
382 ? "%6.6s %5.5s %7.2f %11.2f %7lu/%-7lu "
383 : "%6.6s %5.5s %7.2f %7.2f %7lu/%-7lu ",
384 "", "",
385 arc->time / hz, arc->child_time / hz,
386 arc->count, cycle_head->ncalls);
387 print_name (parent);
388 printf ("\n");
394 static void
395 sort_children (Sym *parent)
397 Arc *arc, *detached, sorted, *prev;
399 /* Unlink children from parent, then insertion sort back on to
400 sorted's children.
401 *arc the arc you have detached and are inserting.
402 *detached the rest of the arcs to be sorted.
403 sorted arc list onto which you insertion sort.
404 *prev arc before the arc you are comparing. */
405 sorted.next_child = 0;
407 for (arc = parent->cg.children; arc; arc = detached)
409 detached = arc->next_child;
411 /* Consider *arc as disconnected; insert it into sorted. */
412 for (prev = &sorted; prev->next_child; prev = prev->next_child)
414 if (cmp_arc (arc, prev->next_child) != LESSTHAN)
415 break;
418 arc->next_child = prev->next_child;
419 prev->next_child = arc;
422 /* Reattach sorted children to parent. */
423 parent->cg.children = sorted.next_child;
427 static void
428 print_children (Sym *parent)
430 Sym *child;
431 Arc *arc;
433 sort_children (parent);
434 arc = parent->cg.children;
436 for (arc = parent->cg.children; arc; arc = arc->next_child)
438 child = arc->child;
439 if (child == parent || (child->cg.cyc.num != 0
440 && child->cg.cyc.num == parent->cg.cyc.num))
442 /* Self call or call to sibling. */
443 printf (bsd_style_output
444 ? "%6.6s %5.5s %7.7s %11.11s %7lu %7.7s "
445 : "%6.6s %5.5s %7.7s %7.7s %7lu %7.7s ",
446 "", "", "", "", arc->count, "");
447 print_name (child);
448 printf ("\n");
450 else
452 /* Regular child of parent. */
453 printf (bsd_style_output
454 ? "%6.6s %5.5s %7.2f %11.2f %7lu/%-7lu "
455 : "%6.6s %5.5s %7.2f %7.2f %7lu/%-7lu ",
456 "", "",
457 arc->time / hz, arc->child_time / hz,
458 arc->count, child->cg.cyc.head->ncalls);
459 print_name (child);
460 printf ("\n");
466 static void
467 print_line (Sym *np)
469 char buf[BUFSIZ];
471 sprintf (buf, "[%d]", np->cg.index);
472 printf (bsd_style_output
473 ? "%-6.6s %5.1f %7.2f %11.2f"
474 : "%-6.6s %5.1f %7.2f %7.2f", buf,
475 100 * (np->cg.prop.self + np->cg.prop.child) / print_time,
476 np->cg.prop.self / hz, np->cg.prop.child / hz);
478 if ((np->ncalls + np->cg.self_calls) != 0)
480 printf (" %7lu", np->ncalls);
482 if (np->cg.self_calls != 0)
483 printf ("+%-7lu ", np->cg.self_calls);
484 else
485 printf (" %7.7s ", "");
487 else
489 printf (" %7.7s %7.7s ", "", "");
492 print_name (np);
493 printf ("\n");
497 /* Print dynamic call graph. */
499 void
500 cg_print (Sym ** timesortsym)
502 unsigned int sym_index;
503 Sym *parent;
505 if (print_descriptions && bsd_style_output)
506 bsd_callg_blurb (stdout);
508 print_header ();
510 for (sym_index = 0; sym_index < symtab.len + num_cycles; ++sym_index)
512 parent = timesortsym[sym_index];
514 if ((ignore_zeros && parent->ncalls == 0
515 && parent->cg.self_calls == 0 && parent->cg.prop.self == 0
516 && parent->cg.prop.child == 0)
517 || !parent->cg.print_flag
518 || (line_granularity && ! parent->is_func))
519 continue;
521 if (!parent->name && parent->cg.cyc.num != 0)
523 /* Cycle header. */
524 print_cycle (parent);
525 print_members (parent);
527 else
529 print_parents (parent);
530 print_line (parent);
531 print_children (parent);
534 if (bsd_style_output)
535 printf ("\n");
537 printf ("-----------------------------------------------\n");
539 if (bsd_style_output)
540 printf ("\n");
543 free (timesortsym);
545 if (print_descriptions && !bsd_style_output)
546 fsf_callg_blurb (stdout);
550 static int
551 cmp_name (const PTR left, const PTR right)
553 const Sym **npp1 = (const Sym **) left;
554 const Sym **npp2 = (const Sym **) right;
556 return strcmp ((*npp1)->name, (*npp2)->name);
560 void
561 cg_print_index ()
563 unsigned int sym_index;
564 unsigned int nnames, todo, i, j;
565 int col, starting_col;
566 Sym **name_sorted_syms, *sym;
567 const char *filename;
568 char buf[20];
569 int column_width = (output_width - 1) / 3; /* Don't write in last col! */
571 /* Now, sort regular function name
572 alphabetically to create an index. */
573 name_sorted_syms = (Sym **) xmalloc ((symtab.len + num_cycles) * sizeof (Sym *));
575 for (sym_index = 0, nnames = 0; sym_index < symtab.len; sym_index++)
577 if (ignore_zeros && symtab.base[sym_index].ncalls == 0
578 && symtab.base[sym_index].hist.time == 0)
579 continue;
581 name_sorted_syms[nnames++] = &symtab.base[sym_index];
584 qsort (name_sorted_syms, nnames, sizeof (Sym *), cmp_name);
586 for (sym_index = 1, todo = nnames; sym_index <= num_cycles; sym_index++)
587 name_sorted_syms[todo++] = &cycle_header[sym_index];
589 printf ("\f\n");
590 printf (_("Index by function name\n\n"));
591 sym_index = (todo + 2) / 3;
593 for (i = 0; i < sym_index; i++)
595 col = 0;
596 starting_col = 0;
598 for (j = i; j < todo; j += sym_index)
600 sym = name_sorted_syms[j];
602 if (sym->cg.print_flag)
603 sprintf (buf, "[%d]", sym->cg.index);
604 else
605 sprintf (buf, "(%d)", sym->cg.index);
607 if (j < nnames)
609 if (bsd_style_output)
611 printf ("%6.6s %-19.19s", buf, sym->name);
613 else
615 col += strlen (buf);
617 for (; col < starting_col + 5; ++col)
618 putchar (' ');
620 printf (" %s ", buf);
621 col += print_name_only (sym);
623 if (!line_granularity && sym->is_static && sym->file)
625 filename = sym->file->name;
627 if (!print_path)
629 filename = strrchr (filename, '/');
631 if (filename)
632 ++filename;
633 else
634 filename = sym->file->name;
637 printf (" (%s)", filename);
638 col += strlen (filename) + 3;
642 else
644 if (bsd_style_output)
646 printf ("%6.6s ", buf);
647 sprintf (buf, _("<cycle %d>"), sym->cg.cyc.num);
648 printf ("%-19.19s", buf);
650 else
652 col += strlen (buf);
653 for (; col < starting_col + 5; ++col)
654 putchar (' ');
655 printf (" %s ", buf);
656 sprintf (buf, _("<cycle %d>"), sym->cg.cyc.num);
657 printf ("%s", buf);
658 col += strlen (buf);
662 starting_col += column_width;
665 printf ("\n");
668 free (name_sorted_syms);
671 /* Compare two arcs based on their usage counts.
672 We want to sort in descending order. */
674 static int
675 cmp_arc_count (const PTR left, const PTR right)
677 const Arc **npp1 = (const Arc **) left;
678 const Arc **npp2 = (const Arc **) right;
680 if ((*npp1)->count > (*npp2)->count)
681 return -1;
682 else if ((*npp1)->count < (*npp2)->count)
683 return 1;
684 else
685 return 0;
688 /* Compare two funtions based on their usage counts.
689 We want to sort in descending order. */
691 static int
692 cmp_fun_nuses (const PTR left, const PTR right)
694 const Sym **npp1 = (const Sym **) left;
695 const Sym **npp2 = (const Sym **) right;
697 if ((*npp1)->nuses > (*npp2)->nuses)
698 return -1;
699 else if ((*npp1)->nuses < (*npp2)->nuses)
700 return 1;
701 else
702 return 0;
705 /* Print a suggested function ordering based on the profiling data.
707 We perform 4 major steps when ordering functions:
709 * Group unused functions together and place them at the
710 end of the function order.
712 * Search the highest use arcs (those which account for 90% of
713 the total arc count) for functions which have several parents.
715 Group those with the most call sites together (currently the
716 top 1.25% which have at least five different call sites).
718 These are emitted at the start of the function order.
720 * Use a greedy placement algorithm to place functions which
721 occur in the top 99% of the arcs in the profile. Some provisions
722 are made to handle high usage arcs where the parent and/or
723 child has already been placed.
725 * Run the same greedy placement algorithm on the remaining
726 arcs to place the leftover functions.
729 The various "magic numbers" should (one day) be tuneable by command
730 line options. They were arrived at by benchmarking a few applications
731 with various values to see which values produced better overall function
732 orderings.
734 Of course, profiling errors, machine limitations (PA long calls), and
735 poor cutoff values for the placement algorithm may limit the usefullness
736 of the resulting function order. Improvements would be greatly appreciated.
738 Suggestions:
740 * Place the functions with many callers near the middle of the
741 list to reduce long calls.
743 * Propagate arc usage changes as functions are placed. Ie if
744 func1 and func2 are placed together, arcs to/from those arcs
745 to the same parent/child should be combined, then resort the
746 arcs to choose the next one.
748 * Implement some global positioning algorithm to place the
749 chains made by the greedy local positioning algorithm. Probably
750 by examining arcs which haven't been placed yet to tie two
751 chains together.
753 * Take a function's size and time into account in the algorithm;
754 size in particular is important on the PA (long calls). Placing
755 many small functions onto their own page may be wise.
757 * Use better profiling information; many published algorithms
758 are based on call sequences through time, rather than just
759 arc counts.
761 * Prodecure cloning could improve performance when a small number
762 of arcs account for most of the calls to a particular function.
764 * Use relocation information to avoid moving unused functions
765 completely out of the code stream; this would avoid severe lossage
766 when the profile data bears little resemblance to actual runs.
768 * Propagation of arc usages should also improve .o link line
769 ordering which shares the same arc placement algorithm with
770 the function ordering code (in fact it is a degenerate case
771 of function ordering). */
773 void
774 cg_print_function_ordering (void)
776 unsigned long sym_index;
777 unsigned long arc_index;
778 unsigned long used, unused, scratch_index;
779 unsigned long unplaced_arc_count, high_arc_count, scratch_arc_count;
780 #ifdef __GNUC__
781 unsigned long long total_arcs, tmp_arcs_count;
782 #else
783 unsigned long total_arcs, tmp_arcs_count;
784 #endif
785 Sym **unused_syms, **used_syms, **scratch_syms;
786 Arc **unplaced_arcs, **high_arcs, **scratch_arcs;
788 sym_index = 0;
789 used = 0;
790 unused = 0;
791 scratch_index = 0;
792 unplaced_arc_count = 0;
793 high_arc_count = 0;
794 scratch_arc_count = 0;
796 /* First group all the unused functions together. */
797 unused_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
798 used_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
799 scratch_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
800 high_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
801 scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
802 unplaced_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
804 /* Walk through all the functions; mark those which are never
805 called as placed (we'll emit them as a group later). */
806 for (sym_index = 0, used = 0, unused = 0; sym_index < symtab.len; sym_index++)
808 if (symtab.base[sym_index].ncalls == 0)
810 unused_syms[unused++] = &symtab.base[sym_index];
811 symtab.base[sym_index].has_been_placed = 1;
813 else
815 used_syms[used++] = &symtab.base[sym_index];
816 symtab.base[sym_index].has_been_placed = 0;
817 symtab.base[sym_index].next = 0;
818 symtab.base[sym_index].prev = 0;
819 symtab.base[sym_index].nuses = 0;
823 /* Sort the arcs from most used to least used. */
824 qsort (arcs, numarcs, sizeof (Arc *), cmp_arc_count);
826 /* Compute the total arc count. Also mark arcs as unplaced.
828 Note we don't compensate for overflow if that happens!
829 Overflow is much less likely when this file is compiled
830 with GCC as it can double-wide integers via long long. */
831 total_arcs = 0;
832 for (arc_index = 0; arc_index < numarcs; arc_index++)
834 total_arcs += arcs[arc_index]->count;
835 arcs[arc_index]->has_been_placed = 0;
838 /* We want to pull out those functions which are referenced
839 by many highly used arcs and emit them as a group. This
840 could probably use some tuning. */
841 tmp_arcs_count = 0;
842 for (arc_index = 0; arc_index < numarcs; arc_index++)
844 tmp_arcs_count += arcs[arc_index]->count;
846 /* Count how many times each parent and child are used up
847 to our threshhold of arcs (90%). */
848 if ((double)tmp_arcs_count / (double)total_arcs > 0.90)
849 break;
851 arcs[arc_index]->child->nuses++;
854 /* Now sort a temporary symbol table based on the number of
855 times each function was used in the highest used arcs. */
856 memcpy (scratch_syms, used_syms, used * sizeof (Sym *));
857 qsort (scratch_syms, used, sizeof (Sym *), cmp_fun_nuses);
859 /* Now pick out those symbols we're going to emit as
860 a group. We take up to 1.25% of the used symbols. */
861 for (sym_index = 0; sym_index < used / 80; sym_index++)
863 Sym *sym = scratch_syms[sym_index];
864 Arc *arc;
866 /* If we hit symbols that aren't used from many call sites,
867 then we can quit. We choose five as the low limit for
868 no particular reason. */
869 if (sym->nuses == 5)
870 break;
872 /* We're going to need the arcs between these functions.
873 Unfortunately, we don't know all these functions
874 until we're done. So we keep track of all the arcs
875 to the functions we care about, then prune out those
876 which are uninteresting.
878 An interesting variation would be to quit when we found
879 multi-call site functions which account for some percentage
880 of the arcs. */
881 arc = sym->cg.children;
883 while (arc)
885 if (arc->parent != arc->child)
886 scratch_arcs[scratch_arc_count++] = arc;
887 arc->has_been_placed = 1;
888 arc = arc->next_child;
891 arc = sym->cg.parents;
893 while (arc)
895 if (arc->parent != arc->child)
896 scratch_arcs[scratch_arc_count++] = arc;
897 arc->has_been_placed = 1;
898 arc = arc->next_parent;
901 /* Keep track of how many symbols we're going to place. */
902 scratch_index = sym_index;
904 /* A lie, but it makes identifying
905 these functions easier later. */
906 sym->has_been_placed = 1;
909 /* Now walk through the temporary arcs and copy
910 those we care about into the high arcs array. */
911 for (arc_index = 0; arc_index < scratch_arc_count; arc_index++)
913 Arc *arc = scratch_arcs[arc_index];
915 /* If this arc refers to highly used functions, then
916 then we want to keep it. */
917 if (arc->child->has_been_placed
918 && arc->parent->has_been_placed)
920 high_arcs[high_arc_count++] = scratch_arcs[arc_index];
922 /* We need to turn of has_been_placed since we're going to
923 use the main arc placement algorithm on these arcs. */
924 arc->child->has_been_placed = 0;
925 arc->parent->has_been_placed = 0;
929 /* Dump the multi-site high usage functions which are not
930 going to be ordered by the main ordering algorithm. */
931 for (sym_index = 0; sym_index < scratch_index; sym_index++)
933 if (scratch_syms[sym_index]->has_been_placed)
934 printf ("%s\n", scratch_syms[sym_index]->name);
937 /* Now we can order the multi-site high use
938 functions based on the arcs between them. */
939 qsort (high_arcs, high_arc_count, sizeof (Arc *), cmp_arc_count);
940 order_and_dump_functions_by_arcs (high_arcs, high_arc_count, 1,
941 unplaced_arcs, &unplaced_arc_count);
943 /* Order and dump the high use functions left,
944 these typically have only a few call sites. */
945 order_and_dump_functions_by_arcs (arcs, numarcs, 0,
946 unplaced_arcs, &unplaced_arc_count);
948 /* Now place the rarely used functions. */
949 order_and_dump_functions_by_arcs (unplaced_arcs, unplaced_arc_count, 1,
950 scratch_arcs, &scratch_arc_count);
952 /* Output any functions not emitted by the order_and_dump calls. */
953 for (sym_index = 0; sym_index < used; sym_index++)
954 if (used_syms[sym_index]->has_been_placed == 0)
955 printf("%s\n", used_syms[sym_index]->name);
957 /* Output the unused functions. */
958 for (sym_index = 0; sym_index < unused; sym_index++)
959 printf("%s\n", unused_syms[sym_index]->name);
961 unused_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
962 used_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
963 scratch_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
964 high_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
965 scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
966 unplaced_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
968 free (unused_syms);
969 free (used_syms);
970 free (scratch_syms);
971 free (high_arcs);
972 free (scratch_arcs);
973 free (unplaced_arcs);
976 /* Place functions based on the arcs in THE_ARCS with ARC_COUNT entries;
977 place unused arcs into UNPLACED_ARCS/UNPLACED_ARC_COUNT.
979 If ALL is nonzero, then place all functions referenced by THE_ARCS,
980 else only place those referenced in the top 99% of the arcs in THE_ARCS. */
982 #define MOST 0.99
983 static void
984 order_and_dump_functions_by_arcs (the_arcs, arc_count, all,
985 unplaced_arcs, unplaced_arc_count)
986 Arc **the_arcs;
987 unsigned long arc_count;
988 int all;
989 Arc **unplaced_arcs;
990 unsigned long *unplaced_arc_count;
992 #ifdef __GNUC__
993 unsigned long long tmp_arcs, total_arcs;
994 #else
995 unsigned long tmp_arcs, total_arcs;
996 #endif
997 unsigned int arc_index;
999 /* If needed, compute the total arc count.
1001 Note we don't compensate for overflow if that happens! */
1002 if (! all)
1004 total_arcs = 0;
1005 for (arc_index = 0; arc_index < arc_count; arc_index++)
1006 total_arcs += the_arcs[arc_index]->count;
1008 else
1009 total_arcs = 0;
1011 tmp_arcs = 0;
1013 for (arc_index = 0; arc_index < arc_count; arc_index++)
1015 Sym *sym1, *sym2;
1016 Sym *child, *parent;
1018 tmp_arcs += the_arcs[arc_index]->count;
1020 /* Ignore this arc if it's already been placed. */
1021 if (the_arcs[arc_index]->has_been_placed)
1022 continue;
1024 child = the_arcs[arc_index]->child;
1025 parent = the_arcs[arc_index]->parent;
1027 /* If we're not using all arcs, and this is a rarely used
1028 arc, then put it on the unplaced_arc list. Similarly
1029 if both the parent and child of this arc have been placed. */
1030 if ((! all && (double)tmp_arcs / (double)total_arcs > MOST)
1031 || child->has_been_placed || parent->has_been_placed)
1033 unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[arc_index];
1034 continue;
1037 /* If all slots in the parent and child are full, then there isn't
1038 anything we can do right now. We'll place this arc on the
1039 unplaced arc list in the hope that a global positioning
1040 algorithm can use it to place function chains. */
1041 if (parent->next && parent->prev && child->next && child->prev)
1043 unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[arc_index];
1044 continue;
1047 /* If the parent is unattached, then find the closest
1048 place to attach it onto child's chain. Similarly
1049 for the opposite case. */
1050 if (!parent->next && !parent->prev)
1052 int next_count = 0;
1053 int prev_count = 0;
1054 Sym *prev = child;
1055 Sym *next = child;
1057 /* Walk to the beginning and end of the child's chain. */
1058 while (next->next)
1060 next = next->next;
1061 next_count++;
1064 while (prev->prev)
1066 prev = prev->prev;
1067 prev_count++;
1070 /* Choose the closest. */
1071 child = next_count < prev_count ? next : prev;
1073 else if (! child->next && !child->prev)
1075 int next_count = 0;
1076 int prev_count = 0;
1077 Sym *prev = parent;
1078 Sym *next = parent;
1080 while (next->next)
1082 next = next->next;
1083 next_count++;
1086 while (prev->prev)
1088 prev = prev->prev;
1089 prev_count++;
1092 parent = prev_count < next_count ? prev : next;
1094 else
1096 /* Couldn't find anywhere to attach the functions,
1097 put the arc on the unplaced arc list. */
1098 unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[arc_index];
1099 continue;
1102 /* Make sure we don't tie two ends together. */
1103 sym1 = parent;
1104 if (sym1->next)
1105 while (sym1->next)
1106 sym1 = sym1->next;
1107 else
1108 while (sym1->prev)
1109 sym1 = sym1->prev;
1111 sym2 = child;
1112 if (sym2->next)
1113 while (sym2->next)
1114 sym2 = sym2->next;
1115 else
1116 while (sym2->prev)
1117 sym2 = sym2->prev;
1119 if (sym1 == child
1120 && sym2 == parent)
1122 /* This would tie two ends together. */
1123 unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[arc_index];
1124 continue;
1127 if (parent->next)
1129 /* Must attach to the parent's prev field. */
1130 if (! child->next)
1132 /* parent-prev and child-next */
1133 parent->prev = child;
1134 child->next = parent;
1135 the_arcs[arc_index]->has_been_placed = 1;
1138 else if (parent->prev)
1140 /* Must attach to the parent's next field. */
1141 if (! child->prev)
1143 /* parent-next and child-prev */
1144 parent->next = child;
1145 child->prev = parent;
1146 the_arcs[arc_index]->has_been_placed = 1;
1149 else
1151 /* Can attach to either field in the parent, depends
1152 on where we've got space in the child. */
1153 if (child->prev)
1155 /* parent-prev and child-next. */
1156 parent->prev = child;
1157 child->next = parent;
1158 the_arcs[arc_index]->has_been_placed = 1;
1160 else
1162 /* parent-next and child-prev. */
1163 parent->next = child;
1164 child->prev = parent;
1165 the_arcs[arc_index]->has_been_placed = 1;
1170 /* Dump the chains of functions we've made. */
1171 for (arc_index = 0; arc_index < arc_count; arc_index++)
1173 Sym *sym;
1174 if (the_arcs[arc_index]->parent->has_been_placed
1175 || the_arcs[arc_index]->child->has_been_placed)
1176 continue;
1178 sym = the_arcs[arc_index]->parent;
1180 /* If this symbol isn't attached to any other
1181 symbols, then we've got a rarely used arc.
1183 Skip it for now, we'll deal with them later. */
1184 if (sym->next == NULL
1185 && sym->prev == NULL)
1186 continue;
1188 /* Get to the start of this chain. */
1189 while (sym->prev)
1190 sym = sym->prev;
1192 while (sym)
1194 /* Mark it as placed. */
1195 sym->has_been_placed = 1;
1196 printf ("%s\n", sym->name);
1197 sym = sym->next;
1201 /* If we want to place all the arcs, then output
1202 those which weren't placed by the main algorithm. */
1203 if (all)
1204 for (arc_index = 0; arc_index < arc_count; arc_index++)
1206 Sym *sym;
1207 if (the_arcs[arc_index]->parent->has_been_placed
1208 || the_arcs[arc_index]->child->has_been_placed)
1209 continue;
1211 sym = the_arcs[arc_index]->parent;
1213 sym->has_been_placed = 1;
1214 printf ("%s\n", sym->name);
1218 /* Compare two function_map structs based on file name.
1219 We want to sort in ascending order. */
1221 static int
1222 cmp_symbol_map (const void * l, const void * r)
1224 return strcmp (((struct function_map *) l)->file_name,
1225 ((struct function_map *) r)->file_name);
1228 /* Print a suggested .o ordering for files on a link line based
1229 on profiling information. This uses the function placement
1230 code for the bulk of its work. */
1232 void
1233 cg_print_file_ordering (void)
1235 unsigned long scratch_arc_count;
1236 unsigned long arc_index;
1237 unsigned long sym_index;
1238 Arc **scratch_arcs;
1239 char *last;
1241 scratch_arc_count = 0;
1243 scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
1244 for (arc_index = 0; arc_index < numarcs; arc_index++)
1246 if (! arcs[arc_index]->parent->mapped
1247 || ! arcs[arc_index]->child->mapped)
1248 arcs[arc_index]->has_been_placed = 1;
1251 order_and_dump_functions_by_arcs (arcs, numarcs, 0,
1252 scratch_arcs, &scratch_arc_count);
1254 /* Output .o's not handled by the main placement algorithm. */
1255 for (sym_index = 0; sym_index < symtab.len; sym_index++)
1257 if (symtab.base[sym_index].mapped
1258 && ! symtab.base[sym_index].has_been_placed)
1259 printf ("%s\n", symtab.base[sym_index].name);
1262 qsort (symbol_map, symbol_map_count, sizeof (struct function_map), cmp_symbol_map);
1264 /* Now output any .o's that didn't have any text symbols. */
1265 last = NULL;
1266 for (sym_index = 0; sym_index < symbol_map_count; sym_index++)
1268 unsigned int index2;
1270 /* Don't bother searching if this symbol
1271 is the same as the previous one. */
1272 if (last && !strcmp (last, symbol_map[sym_index].file_name))
1273 continue;
1275 for (index2 = 0; index2 < symtab.len; index2++)
1277 if (! symtab.base[index2].mapped)
1278 continue;
1280 if (!strcmp (symtab.base[index2].name, symbol_map[sym_index].file_name))
1281 break;
1284 /* If we didn't find it in the symbol table, then it must
1285 be a .o with no text symbols. Output it last. */
1286 if (index2 == symtab.len)
1287 printf ("%s\n", symbol_map[sym_index].file_name);
1288 last = symbol_map[sym_index].file_name;