Improve TLS support on TILE-Gx/TILEPro:
[binutils.git] / gprof / cg_print.c
blob8ff14714171323ab41dcac78dd6ec59c9d7bc9b6
1 /* cg_print.c - Print routines for displaying call graphs.
3 Copyright 2000, 2001, 2002, 2004, 2007, 2009, 2011
4 Free Software Foundation, Inc.
6 This file is part of GNU Binutils.
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
21 02110-1301, USA. */
23 #include "gprof.h"
24 #include "libiberty.h"
25 #include "filenames.h"
26 #include "search_list.h"
27 #include "source.h"
28 #include "symtab.h"
29 #include "cg_arcs.h"
30 #include "cg_print.h"
31 #include "hist.h"
32 #include "utils.h"
33 #include "corefile.h"
35 /* Return value of comparison functions used to sort tables. */
36 #define LESSTHAN -1
37 #define EQUALTO 0
38 #define GREATERTHAN 1
40 static void print_header (void);
41 static void print_cycle (Sym *);
42 static int cmp_member (Sym *, Sym *);
43 static void sort_members (Sym *);
44 static void print_members (Sym *);
45 static int cmp_arc (Arc *, Arc *);
46 static void sort_parents (Sym *);
47 static void print_parents (Sym *);
48 static void sort_children (Sym *);
49 static void print_children (Sym *);
50 static void print_line (Sym *);
51 static int cmp_name (const PTR, const PTR);
52 static int cmp_arc_count (const PTR, const PTR);
53 static int cmp_fun_nuses (const PTR, const PTR);
54 static void order_and_dump_functions_by_arcs
55 (Arc **, unsigned long, int, Arc **, unsigned long *);
57 /* Declarations of automatically generated functions to output blurbs. */
58 extern void bsd_callg_blurb (FILE * fp);
59 extern void fsf_callg_blurb (FILE * fp);
61 double print_time = 0.0;
64 static void
65 print_header ()
67 if (first_output)
68 first_output = FALSE;
69 else
70 printf ("\f\n");
72 if (!bsd_style_output)
74 if (print_descriptions)
75 printf (_("\t\t Call graph (explanation follows)\n\n"));
76 else
77 printf (_("\t\t\tCall graph\n\n"));
80 printf (_("\ngranularity: each sample hit covers %ld byte(s)"),
81 (long) hist_scale * (long) sizeof (UNIT));
83 if (print_time > 0.0)
84 printf (_(" for %.2f%% of %.2f seconds\n\n"),
85 100.0 / print_time, print_time / hz);
86 else
88 printf (_(" no time propagated\n\n"));
90 /* This doesn't hurt, since all the numerators will be 0.0. */
91 print_time = 1.0;
94 if (bsd_style_output)
96 printf ("%6.6s %5.5s %7.7s %11.11s %7.7s/%-7.7s %-8.8s\n",
97 "", "", "", "", _("called"), _("total"), _("parents"));
98 printf ("%-6.6s %5.5s %7.7s %11.11s %7.7s+%-7.7s %-8.8s\t%5.5s\n",
99 _("index"),
100 /* xgettext:no-c-format */
101 _("%time"),
102 _("self"), _("descendants"), _("called"), _("self"),
103 _("name"), _("index"));
104 printf ("%6.6s %5.5s %7.7s %11.11s %7.7s/%-7.7s %-8.8s\n",
105 "", "", "", "", _("called"), _("total"), _("children"));
106 printf ("\n");
108 else
110 printf (_("index %% time self children called name\n"));
114 /* Print a cycle header. */
116 static void
117 print_cycle (Sym *cyc)
119 char buf[BUFSIZ];
121 sprintf (buf, "[%d]", cyc->cg.index);
122 printf (bsd_style_output
123 ? "%-6.6s %5.1f %7.2f %11.2f %7lu"
124 : "%-6.6s %5.1f %7.2f %7.2f %7lu", buf,
125 100 * (cyc->cg.prop.self + cyc->cg.prop.child) / print_time,
126 cyc->cg.prop.self / hz, cyc->cg.prop.child / hz, cyc->ncalls);
128 if (cyc->cg.self_calls != 0)
129 printf ("+%-7lu", cyc->cg.self_calls);
130 else
131 printf (" %7.7s", "");
133 printf (_(" <cycle %d as a whole> [%d]\n"), cyc->cg.cyc.num, cyc->cg.index);
136 /* Compare LEFT and RIGHT membmer. Major comparison key is
137 CG.PROP.SELF+CG.PROP.CHILD, secondary key is NCALLS+CG.SELF_CALLS. */
139 static int
140 cmp_member (Sym *left, Sym *right)
142 double left_time = left->cg.prop.self + left->cg.prop.child;
143 double right_time = right->cg.prop.self + right->cg.prop.child;
144 unsigned long left_calls = left->ncalls + left->cg.self_calls;
145 unsigned long right_calls = right->ncalls + right->cg.self_calls;
147 if (left_time > right_time)
148 return GREATERTHAN;
150 if (left_time < right_time)
151 return LESSTHAN;
153 if (left_calls > right_calls)
154 return GREATERTHAN;
156 if (left_calls < right_calls)
157 return LESSTHAN;
159 return EQUALTO;
162 /* Sort members of a cycle. */
164 static void
165 sort_members (Sym *cyc)
167 Sym *todo, *doing, *prev;
169 /* Detach cycle members from cyclehead,
170 and insertion sort them back on. */
171 todo = cyc->cg.cyc.next;
172 cyc->cg.cyc.next = 0;
174 for (doing = todo; doing != NULL; doing = todo)
176 todo = doing->cg.cyc.next;
178 for (prev = cyc; prev->cg.cyc.next; prev = prev->cg.cyc.next)
180 if (cmp_member (doing, prev->cg.cyc.next) == GREATERTHAN)
181 break;
184 doing->cg.cyc.next = prev->cg.cyc.next;
185 prev->cg.cyc.next = doing;
189 /* Print the members of a cycle. */
191 static void
192 print_members (Sym *cyc)
194 Sym *member;
196 sort_members (cyc);
198 for (member = cyc->cg.cyc.next; member; member = member->cg.cyc.next)
200 printf (bsd_style_output
201 ? "%6.6s %5.5s %7.2f %11.2f %7lu"
202 : "%6.6s %5.5s %7.2f %7.2f %7lu",
203 "", "", member->cg.prop.self / hz, member->cg.prop.child / hz,
204 member->ncalls);
206 if (member->cg.self_calls != 0)
207 printf ("+%-7lu", member->cg.self_calls);
208 else
209 printf (" %7.7s", "");
211 printf (" ");
212 print_name (member);
213 printf ("\n");
217 /* Compare two arcs to/from the same child/parent.
218 - if one arc is a self arc, it's least.
219 - if one arc is within a cycle, it's less than.
220 - if both arcs are within a cycle, compare arc counts.
221 - if neither arc is within a cycle, compare with
222 time + child_time as major key
223 arc count as minor key. */
225 static int
226 cmp_arc (Arc *left, Arc *right)
228 Sym *left_parent = left->parent;
229 Sym *left_child = left->child;
230 Sym *right_parent = right->parent;
231 Sym *right_child = right->child;
232 double left_time, right_time;
234 DBG (TIMEDEBUG,
235 printf ("[cmp_arc] ");
236 print_name (left_parent);
237 printf (" calls ");
238 print_name (left_child);
239 printf (" %f + %f %lu/%lu\n", left->time, left->child_time,
240 left->count, left_child->ncalls);
241 printf ("[cmp_arc] ");
242 print_name (right_parent);
243 printf (" calls ");
244 print_name (right_child);
245 printf (" %f + %f %lu/%lu\n", right->time, right->child_time,
246 right->count, right_child->ncalls);
247 printf ("\n");
250 if (left_parent == left_child)
251 return LESSTHAN; /* Left is a self call. */
253 if (right_parent == right_child)
254 return GREATERTHAN; /* Right is a self call. */
256 if (left_parent->cg.cyc.num != 0 && left_child->cg.cyc.num != 0
257 && left_parent->cg.cyc.num == left_child->cg.cyc.num)
259 /* Left is a call within a cycle. */
260 if (right_parent->cg.cyc.num != 0 && right_child->cg.cyc.num != 0
261 && right_parent->cg.cyc.num == right_child->cg.cyc.num)
263 /* Right is a call within the cycle, too. */
264 if (left->count < right->count)
265 return LESSTHAN;
267 if (left->count > right->count)
268 return GREATERTHAN;
270 return EQUALTO;
272 else
274 /* Right isn't a call within the cycle. */
275 return LESSTHAN;
278 else
280 /* Left isn't a call within a cycle. */
281 if (right_parent->cg.cyc.num != 0 && right_child->cg.cyc.num != 0
282 && right_parent->cg.cyc.num == right_child->cg.cyc.num)
284 /* Right is a call within a cycle. */
285 return GREATERTHAN;
287 else
289 /* Neither is a call within a cycle. */
290 left_time = left->time + left->child_time;
291 right_time = right->time + right->child_time;
293 if (left_time < right_time)
294 return LESSTHAN;
296 if (left_time > right_time)
297 return GREATERTHAN;
299 if (left->count < right->count)
300 return LESSTHAN;
302 if (left->count > right->count)
303 return GREATERTHAN;
305 return EQUALTO;
311 static void
312 sort_parents (Sym * child)
314 Arc *arc, *detached, sorted, *prev;
316 /* Unlink parents from child, then insertion sort back on to
317 sorted's parents.
318 *arc the arc you have detached and are inserting.
319 *detached the rest of the arcs to be sorted.
320 sorted arc list onto which you insertion sort.
321 *prev arc before the arc you are comparing. */
322 sorted.next_parent = 0;
324 for (arc = child->cg.parents; arc; arc = detached)
326 detached = arc->next_parent;
328 /* Consider *arc as disconnected; insert it into sorted. */
329 for (prev = &sorted; prev->next_parent; prev = prev->next_parent)
331 if (cmp_arc (arc, prev->next_parent) != GREATERTHAN)
332 break;
335 arc->next_parent = prev->next_parent;
336 prev->next_parent = arc;
339 /* Reattach sorted arcs to child. */
340 child->cg.parents = sorted.next_parent;
344 static void
345 print_parents (Sym *child)
347 Sym *parent;
348 Arc *arc;
349 Sym *cycle_head;
351 if (child->cg.cyc.head != 0)
352 cycle_head = child->cg.cyc.head;
353 else
354 cycle_head = child;
356 if (!child->cg.parents)
358 printf (bsd_style_output
359 ? _("%6.6s %5.5s %7.7s %11.11s %7.7s %7.7s <spontaneous>\n")
360 : _("%6.6s %5.5s %7.7s %7.7s %7.7s %7.7s <spontaneous>\n"),
361 "", "", "", "", "", "");
362 return;
365 sort_parents (child);
367 for (arc = child->cg.parents; arc; arc = arc->next_parent)
369 parent = arc->parent;
370 if (child == parent || (child->cg.cyc.num != 0
371 && parent->cg.cyc.num == child->cg.cyc.num))
373 /* Selfcall or call among siblings. */
374 printf (bsd_style_output
375 ? "%6.6s %5.5s %7.7s %11.11s %7lu %7.7s "
376 : "%6.6s %5.5s %7.7s %7.7s %7lu %7.7s ",
377 "", "", "", "",
378 arc->count, "");
379 print_name (parent);
380 printf ("\n");
382 else
384 /* Regular parent of child. */
385 printf (bsd_style_output
386 ? "%6.6s %5.5s %7.2f %11.2f %7lu/%-7lu "
387 : "%6.6s %5.5s %7.2f %7.2f %7lu/%-7lu ",
388 "", "",
389 arc->time / hz, arc->child_time / hz,
390 arc->count, cycle_head->ncalls);
391 print_name (parent);
392 printf ("\n");
398 static void
399 sort_children (Sym *parent)
401 Arc *arc, *detached, sorted, *prev;
403 /* Unlink children from parent, then insertion sort back on to
404 sorted's children.
405 *arc the arc you have detached and are inserting.
406 *detached the rest of the arcs to be sorted.
407 sorted arc list onto which you insertion sort.
408 *prev arc before the arc you are comparing. */
409 sorted.next_child = 0;
411 for (arc = parent->cg.children; arc; arc = detached)
413 detached = arc->next_child;
415 /* Consider *arc as disconnected; insert it into sorted. */
416 for (prev = &sorted; prev->next_child; prev = prev->next_child)
418 if (cmp_arc (arc, prev->next_child) != LESSTHAN)
419 break;
422 arc->next_child = prev->next_child;
423 prev->next_child = arc;
426 /* Reattach sorted children to parent. */
427 parent->cg.children = sorted.next_child;
431 static void
432 print_children (Sym *parent)
434 Sym *child;
435 Arc *arc;
437 sort_children (parent);
438 arc = parent->cg.children;
440 for (arc = parent->cg.children; arc; arc = arc->next_child)
442 child = arc->child;
443 if (child == parent || (child->cg.cyc.num != 0
444 && child->cg.cyc.num == parent->cg.cyc.num))
446 /* Self call or call to sibling. */
447 printf (bsd_style_output
448 ? "%6.6s %5.5s %7.7s %11.11s %7lu %7.7s "
449 : "%6.6s %5.5s %7.7s %7.7s %7lu %7.7s ",
450 "", "", "", "", arc->count, "");
451 print_name (child);
452 printf ("\n");
454 else
456 /* Regular child of parent. */
457 printf (bsd_style_output
458 ? "%6.6s %5.5s %7.2f %11.2f %7lu/%-7lu "
459 : "%6.6s %5.5s %7.2f %7.2f %7lu/%-7lu ",
460 "", "",
461 arc->time / hz, arc->child_time / hz,
462 arc->count, child->cg.cyc.head->ncalls);
463 print_name (child);
464 printf ("\n");
470 static void
471 print_line (Sym *np)
473 char buf[BUFSIZ];
475 sprintf (buf, "[%d]", np->cg.index);
476 printf (bsd_style_output
477 ? "%-6.6s %5.1f %7.2f %11.2f"
478 : "%-6.6s %5.1f %7.2f %7.2f", buf,
479 100 * (np->cg.prop.self + np->cg.prop.child) / print_time,
480 np->cg.prop.self / hz, np->cg.prop.child / hz);
482 if ((np->ncalls + np->cg.self_calls) != 0)
484 printf (" %7lu", np->ncalls);
486 if (np->cg.self_calls != 0)
487 printf ("+%-7lu ", np->cg.self_calls);
488 else
489 printf (" %7.7s ", "");
491 else
493 printf (" %7.7s %7.7s ", "", "");
496 print_name (np);
497 printf ("\n");
501 /* Print dynamic call graph. */
503 void
504 cg_print (Sym ** timesortsym)
506 unsigned int sym_index;
507 Sym *parent;
509 if (print_descriptions && bsd_style_output)
510 bsd_callg_blurb (stdout);
512 print_header ();
514 for (sym_index = 0; sym_index < symtab.len + num_cycles; ++sym_index)
516 parent = timesortsym[sym_index];
518 if ((ignore_zeros && parent->ncalls == 0
519 && parent->cg.self_calls == 0 && parent->cg.prop.self == 0
520 && parent->cg.prop.child == 0)
521 || !parent->cg.print_flag
522 || (line_granularity && ! parent->is_func))
523 continue;
525 if (!parent->name && parent->cg.cyc.num != 0)
527 /* Cycle header. */
528 print_cycle (parent);
529 print_members (parent);
531 else
533 print_parents (parent);
534 print_line (parent);
535 print_children (parent);
538 if (bsd_style_output)
539 printf ("\n");
541 printf ("-----------------------------------------------\n");
543 if (bsd_style_output)
544 printf ("\n");
547 free (timesortsym);
549 if (print_descriptions && !bsd_style_output)
550 fsf_callg_blurb (stdout);
554 static int
555 cmp_name (const PTR left, const PTR right)
557 const Sym **npp1 = (const Sym **) left;
558 const Sym **npp2 = (const Sym **) right;
560 return strcmp ((*npp1)->name, (*npp2)->name);
564 void
565 cg_print_index ()
567 unsigned int sym_index;
568 unsigned int nnames, todo, i, j;
569 int col, starting_col;
570 Sym **name_sorted_syms, *sym;
571 const char *filename;
572 char buf[20];
573 int column_width = (output_width - 1) / 3; /* Don't write in last col! */
575 /* Now, sort regular function name
576 alphabetically to create an index. */
577 name_sorted_syms = (Sym **) xmalloc ((symtab.len + num_cycles) * sizeof (Sym *));
579 for (sym_index = 0, nnames = 0; sym_index < symtab.len; sym_index++)
581 if (ignore_zeros && symtab.base[sym_index].ncalls == 0
582 && symtab.base[sym_index].hist.time == 0)
583 continue;
585 name_sorted_syms[nnames++] = &symtab.base[sym_index];
588 qsort (name_sorted_syms, nnames, sizeof (Sym *), cmp_name);
590 for (sym_index = 1, todo = nnames; sym_index <= num_cycles; sym_index++)
591 name_sorted_syms[todo++] = &cycle_header[sym_index];
593 printf ("\f\n");
594 printf (_("Index by function name\n\n"));
595 sym_index = (todo + 2) / 3;
597 for (i = 0; i < sym_index; i++)
599 col = 0;
600 starting_col = 0;
602 for (j = i; j < todo; j += sym_index)
604 sym = name_sorted_syms[j];
606 if (sym->cg.print_flag)
607 sprintf (buf, "[%d]", sym->cg.index);
608 else
609 sprintf (buf, "(%d)", sym->cg.index);
611 if (j < nnames)
613 if (bsd_style_output)
615 printf ("%6.6s %-19.19s", buf, sym->name);
617 else
619 col += strlen (buf);
621 for (; col < starting_col + 5; ++col)
622 putchar (' ');
624 printf (" %s ", buf);
625 col += print_name_only (sym);
627 if (!line_granularity && sym->is_static && sym->file)
629 filename = sym->file->name;
631 if (!print_path)
633 filename = strrchr (filename, '/');
635 if (filename)
636 ++filename;
637 else
638 filename = sym->file->name;
641 printf (" (%s)", filename);
642 col += strlen (filename) + 3;
646 else
648 if (bsd_style_output)
650 printf ("%6.6s ", buf);
651 sprintf (buf, _("<cycle %d>"), sym->cg.cyc.num);
652 printf ("%-19.19s", buf);
654 else
656 col += strlen (buf);
657 for (; col < starting_col + 5; ++col)
658 putchar (' ');
659 printf (" %s ", buf);
660 sprintf (buf, _("<cycle %d>"), sym->cg.cyc.num);
661 printf ("%s", buf);
662 col += strlen (buf);
666 starting_col += column_width;
669 printf ("\n");
672 free (name_sorted_syms);
675 /* Compare two arcs based on their usage counts.
676 We want to sort in descending order. */
678 static int
679 cmp_arc_count (const PTR left, const PTR right)
681 const Arc **npp1 = (const Arc **) left;
682 const Arc **npp2 = (const Arc **) right;
684 if ((*npp1)->count > (*npp2)->count)
685 return -1;
686 else if ((*npp1)->count < (*npp2)->count)
687 return 1;
688 else
689 return 0;
692 /* Compare two funtions based on their usage counts.
693 We want to sort in descending order. */
695 static int
696 cmp_fun_nuses (const PTR left, const PTR right)
698 const Sym **npp1 = (const Sym **) left;
699 const Sym **npp2 = (const Sym **) right;
701 if ((*npp1)->nuses > (*npp2)->nuses)
702 return -1;
703 else if ((*npp1)->nuses < (*npp2)->nuses)
704 return 1;
705 else
706 return 0;
709 /* Print a suggested function ordering based on the profiling data.
711 We perform 4 major steps when ordering functions:
713 * Group unused functions together and place them at the
714 end of the function order.
716 * Search the highest use arcs (those which account for 90% of
717 the total arc count) for functions which have several parents.
719 Group those with the most call sites together (currently the
720 top 1.25% which have at least five different call sites).
722 These are emitted at the start of the function order.
724 * Use a greedy placement algorithm to place functions which
725 occur in the top 99% of the arcs in the profile. Some provisions
726 are made to handle high usage arcs where the parent and/or
727 child has already been placed.
729 * Run the same greedy placement algorithm on the remaining
730 arcs to place the leftover functions.
733 The various "magic numbers" should (one day) be tuneable by command
734 line options. They were arrived at by benchmarking a few applications
735 with various values to see which values produced better overall function
736 orderings.
738 Of course, profiling errors, machine limitations (PA long calls), and
739 poor cutoff values for the placement algorithm may limit the usefullness
740 of the resulting function order. Improvements would be greatly appreciated.
742 Suggestions:
744 * Place the functions with many callers near the middle of the
745 list to reduce long calls.
747 * Propagate arc usage changes as functions are placed. Ie if
748 func1 and func2 are placed together, arcs to/from those arcs
749 to the same parent/child should be combined, then resort the
750 arcs to choose the next one.
752 * Implement some global positioning algorithm to place the
753 chains made by the greedy local positioning algorithm. Probably
754 by examining arcs which haven't been placed yet to tie two
755 chains together.
757 * Take a function's size and time into account in the algorithm;
758 size in particular is important on the PA (long calls). Placing
759 many small functions onto their own page may be wise.
761 * Use better profiling information; many published algorithms
762 are based on call sequences through time, rather than just
763 arc counts.
765 * Prodecure cloning could improve performance when a small number
766 of arcs account for most of the calls to a particular function.
768 * Use relocation information to avoid moving unused functions
769 completely out of the code stream; this would avoid severe lossage
770 when the profile data bears little resemblance to actual runs.
772 * Propagation of arc usages should also improve .o link line
773 ordering which shares the same arc placement algorithm with
774 the function ordering code (in fact it is a degenerate case
775 of function ordering). */
777 void
778 cg_print_function_ordering (void)
780 unsigned long sym_index;
781 unsigned long arc_index;
782 unsigned long used, unused, scratch_index;
783 unsigned long unplaced_arc_count, high_arc_count, scratch_arc_count;
784 #ifdef __GNUC__
785 unsigned long long total_arcs, tmp_arcs_count;
786 #else
787 unsigned long total_arcs, tmp_arcs_count;
788 #endif
789 Sym **unused_syms, **used_syms, **scratch_syms;
790 Arc **unplaced_arcs, **high_arcs, **scratch_arcs;
792 sym_index = 0;
793 used = 0;
794 unused = 0;
795 scratch_index = 0;
796 unplaced_arc_count = 0;
797 high_arc_count = 0;
798 scratch_arc_count = 0;
800 /* First group all the unused functions together. */
801 unused_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
802 used_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
803 scratch_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
804 high_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
805 scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
806 unplaced_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
808 /* Walk through all the functions; mark those which are never
809 called as placed (we'll emit them as a group later). */
810 for (sym_index = 0, used = 0, unused = 0; sym_index < symtab.len; sym_index++)
812 if (symtab.base[sym_index].ncalls == 0)
814 unused_syms[unused++] = &symtab.base[sym_index];
815 symtab.base[sym_index].has_been_placed = 1;
817 else
819 used_syms[used++] = &symtab.base[sym_index];
820 symtab.base[sym_index].has_been_placed = 0;
821 symtab.base[sym_index].next = 0;
822 symtab.base[sym_index].prev = 0;
823 symtab.base[sym_index].nuses = 0;
827 /* Sort the arcs from most used to least used. */
828 qsort (arcs, numarcs, sizeof (Arc *), cmp_arc_count);
830 /* Compute the total arc count. Also mark arcs as unplaced.
832 Note we don't compensate for overflow if that happens!
833 Overflow is much less likely when this file is compiled
834 with GCC as it can double-wide integers via long long. */
835 total_arcs = 0;
836 for (arc_index = 0; arc_index < numarcs; arc_index++)
838 total_arcs += arcs[arc_index]->count;
839 arcs[arc_index]->has_been_placed = 0;
842 /* We want to pull out those functions which are referenced
843 by many highly used arcs and emit them as a group. This
844 could probably use some tuning. */
845 tmp_arcs_count = 0;
846 for (arc_index = 0; arc_index < numarcs; arc_index++)
848 tmp_arcs_count += arcs[arc_index]->count;
850 /* Count how many times each parent and child are used up
851 to our threshhold of arcs (90%). */
852 if ((double)tmp_arcs_count / (double)total_arcs > 0.90)
853 break;
855 arcs[arc_index]->child->nuses++;
858 /* Now sort a temporary symbol table based on the number of
859 times each function was used in the highest used arcs. */
860 memcpy (scratch_syms, used_syms, used * sizeof (Sym *));
861 qsort (scratch_syms, used, sizeof (Sym *), cmp_fun_nuses);
863 /* Now pick out those symbols we're going to emit as
864 a group. We take up to 1.25% of the used symbols. */
865 for (sym_index = 0; sym_index < used / 80; sym_index++)
867 Sym *sym = scratch_syms[sym_index];
868 Arc *arc;
870 /* If we hit symbols that aren't used from many call sites,
871 then we can quit. We choose five as the low limit for
872 no particular reason. */
873 if (sym->nuses == 5)
874 break;
876 /* We're going to need the arcs between these functions.
877 Unfortunately, we don't know all these functions
878 until we're done. So we keep track of all the arcs
879 to the functions we care about, then prune out those
880 which are uninteresting.
882 An interesting variation would be to quit when we found
883 multi-call site functions which account for some percentage
884 of the arcs. */
885 arc = sym->cg.children;
887 while (arc)
889 if (arc->parent != arc->child)
890 scratch_arcs[scratch_arc_count++] = arc;
891 arc->has_been_placed = 1;
892 arc = arc->next_child;
895 arc = sym->cg.parents;
897 while (arc)
899 if (arc->parent != arc->child)
900 scratch_arcs[scratch_arc_count++] = arc;
901 arc->has_been_placed = 1;
902 arc = arc->next_parent;
905 /* Keep track of how many symbols we're going to place. */
906 scratch_index = sym_index;
908 /* A lie, but it makes identifying
909 these functions easier later. */
910 sym->has_been_placed = 1;
913 /* Now walk through the temporary arcs and copy
914 those we care about into the high arcs array. */
915 for (arc_index = 0; arc_index < scratch_arc_count; arc_index++)
917 Arc *arc = scratch_arcs[arc_index];
919 /* If this arc refers to highly used functions, then
920 then we want to keep it. */
921 if (arc->child->has_been_placed
922 && arc->parent->has_been_placed)
924 high_arcs[high_arc_count++] = scratch_arcs[arc_index];
926 /* We need to turn of has_been_placed since we're going to
927 use the main arc placement algorithm on these arcs. */
928 arc->child->has_been_placed = 0;
929 arc->parent->has_been_placed = 0;
933 /* Dump the multi-site high usage functions which are not
934 going to be ordered by the main ordering algorithm. */
935 for (sym_index = 0; sym_index < scratch_index; sym_index++)
937 if (scratch_syms[sym_index]->has_been_placed)
938 printf ("%s\n", scratch_syms[sym_index]->name);
941 /* Now we can order the multi-site high use
942 functions based on the arcs between them. */
943 qsort (high_arcs, high_arc_count, sizeof (Arc *), cmp_arc_count);
944 order_and_dump_functions_by_arcs (high_arcs, high_arc_count, 1,
945 unplaced_arcs, &unplaced_arc_count);
947 /* Order and dump the high use functions left,
948 these typically have only a few call sites. */
949 order_and_dump_functions_by_arcs (arcs, numarcs, 0,
950 unplaced_arcs, &unplaced_arc_count);
952 /* Now place the rarely used functions. */
953 order_and_dump_functions_by_arcs (unplaced_arcs, unplaced_arc_count, 1,
954 scratch_arcs, &scratch_arc_count);
956 /* Output any functions not emitted by the order_and_dump calls. */
957 for (sym_index = 0; sym_index < used; sym_index++)
958 if (used_syms[sym_index]->has_been_placed == 0)
959 printf("%s\n", used_syms[sym_index]->name);
961 /* Output the unused functions. */
962 for (sym_index = 0; sym_index < unused; sym_index++)
963 printf("%s\n", unused_syms[sym_index]->name);
965 unused_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
966 used_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
967 scratch_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
968 high_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
969 scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
970 unplaced_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
972 free (unused_syms);
973 free (used_syms);
974 free (scratch_syms);
975 free (high_arcs);
976 free (scratch_arcs);
977 free (unplaced_arcs);
980 /* Place functions based on the arcs in THE_ARCS with ARC_COUNT entries;
981 place unused arcs into UNPLACED_ARCS/UNPLACED_ARC_COUNT.
983 If ALL is nonzero, then place all functions referenced by THE_ARCS,
984 else only place those referenced in the top 99% of the arcs in THE_ARCS. */
986 #define MOST 0.99
987 static void
988 order_and_dump_functions_by_arcs (the_arcs, arc_count, all,
989 unplaced_arcs, unplaced_arc_count)
990 Arc **the_arcs;
991 unsigned long arc_count;
992 int all;
993 Arc **unplaced_arcs;
994 unsigned long *unplaced_arc_count;
996 #ifdef __GNUC__
997 unsigned long long tmp_arcs, total_arcs;
998 #else
999 unsigned long tmp_arcs, total_arcs;
1000 #endif
1001 unsigned int arc_index;
1003 /* If needed, compute the total arc count.
1005 Note we don't compensate for overflow if that happens! */
1006 if (! all)
1008 total_arcs = 0;
1009 for (arc_index = 0; arc_index < arc_count; arc_index++)
1010 total_arcs += the_arcs[arc_index]->count;
1012 else
1013 total_arcs = 0;
1015 tmp_arcs = 0;
1017 for (arc_index = 0; arc_index < arc_count; arc_index++)
1019 Sym *sym1, *sym2;
1020 Sym *child, *parent;
1022 tmp_arcs += the_arcs[arc_index]->count;
1024 /* Ignore this arc if it's already been placed. */
1025 if (the_arcs[arc_index]->has_been_placed)
1026 continue;
1028 child = the_arcs[arc_index]->child;
1029 parent = the_arcs[arc_index]->parent;
1031 /* If we're not using all arcs, and this is a rarely used
1032 arc, then put it on the unplaced_arc list. Similarly
1033 if both the parent and child of this arc have been placed. */
1034 if ((! all && (double)tmp_arcs / (double)total_arcs > MOST)
1035 || child->has_been_placed || parent->has_been_placed)
1037 unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[arc_index];
1038 continue;
1041 /* If all slots in the parent and child are full, then there isn't
1042 anything we can do right now. We'll place this arc on the
1043 unplaced arc list in the hope that a global positioning
1044 algorithm can use it to place function chains. */
1045 if (parent->next && parent->prev && child->next && child->prev)
1047 unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[arc_index];
1048 continue;
1051 /* If the parent is unattached, then find the closest
1052 place to attach it onto child's chain. Similarly
1053 for the opposite case. */
1054 if (!parent->next && !parent->prev)
1056 int next_count = 0;
1057 int prev_count = 0;
1058 Sym *prev = child;
1059 Sym *next = child;
1061 /* Walk to the beginning and end of the child's chain. */
1062 while (next->next)
1064 next = next->next;
1065 next_count++;
1068 while (prev->prev)
1070 prev = prev->prev;
1071 prev_count++;
1074 /* Choose the closest. */
1075 child = next_count < prev_count ? next : prev;
1077 else if (! child->next && !child->prev)
1079 int next_count = 0;
1080 int prev_count = 0;
1081 Sym *prev = parent;
1082 Sym *next = parent;
1084 while (next->next)
1086 next = next->next;
1087 next_count++;
1090 while (prev->prev)
1092 prev = prev->prev;
1093 prev_count++;
1096 parent = prev_count < next_count ? prev : next;
1098 else
1100 /* Couldn't find anywhere to attach the functions,
1101 put the arc on the unplaced arc list. */
1102 unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[arc_index];
1103 continue;
1106 /* Make sure we don't tie two ends together. */
1107 sym1 = parent;
1108 if (sym1->next)
1109 while (sym1->next)
1110 sym1 = sym1->next;
1111 else
1112 while (sym1->prev)
1113 sym1 = sym1->prev;
1115 sym2 = child;
1116 if (sym2->next)
1117 while (sym2->next)
1118 sym2 = sym2->next;
1119 else
1120 while (sym2->prev)
1121 sym2 = sym2->prev;
1123 if (sym1 == child
1124 && sym2 == parent)
1126 /* This would tie two ends together. */
1127 unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[arc_index];
1128 continue;
1131 if (parent->next)
1133 /* Must attach to the parent's prev field. */
1134 if (! child->next)
1136 /* parent-prev and child-next */
1137 parent->prev = child;
1138 child->next = parent;
1139 the_arcs[arc_index]->has_been_placed = 1;
1142 else if (parent->prev)
1144 /* Must attach to the parent's next field. */
1145 if (! child->prev)
1147 /* parent-next and child-prev */
1148 parent->next = child;
1149 child->prev = parent;
1150 the_arcs[arc_index]->has_been_placed = 1;
1153 else
1155 /* Can attach to either field in the parent, depends
1156 on where we've got space in the child. */
1157 if (child->prev)
1159 /* parent-prev and child-next. */
1160 parent->prev = child;
1161 child->next = parent;
1162 the_arcs[arc_index]->has_been_placed = 1;
1164 else
1166 /* parent-next and child-prev. */
1167 parent->next = child;
1168 child->prev = parent;
1169 the_arcs[arc_index]->has_been_placed = 1;
1174 /* Dump the chains of functions we've made. */
1175 for (arc_index = 0; arc_index < arc_count; arc_index++)
1177 Sym *sym;
1178 if (the_arcs[arc_index]->parent->has_been_placed
1179 || the_arcs[arc_index]->child->has_been_placed)
1180 continue;
1182 sym = the_arcs[arc_index]->parent;
1184 /* If this symbol isn't attached to any other
1185 symbols, then we've got a rarely used arc.
1187 Skip it for now, we'll deal with them later. */
1188 if (sym->next == NULL
1189 && sym->prev == NULL)
1190 continue;
1192 /* Get to the start of this chain. */
1193 while (sym->prev)
1194 sym = sym->prev;
1196 while (sym)
1198 /* Mark it as placed. */
1199 sym->has_been_placed = 1;
1200 printf ("%s\n", sym->name);
1201 sym = sym->next;
1205 /* If we want to place all the arcs, then output
1206 those which weren't placed by the main algorithm. */
1207 if (all)
1208 for (arc_index = 0; arc_index < arc_count; arc_index++)
1210 Sym *sym;
1211 if (the_arcs[arc_index]->parent->has_been_placed
1212 || the_arcs[arc_index]->child->has_been_placed)
1213 continue;
1215 sym = the_arcs[arc_index]->parent;
1217 sym->has_been_placed = 1;
1218 printf ("%s\n", sym->name);
1222 /* Compare two function_map structs based on file name.
1223 We want to sort in ascending order. */
1225 static int
1226 cmp_symbol_map (const void * l, const void * r)
1228 return filename_cmp (((struct function_map *) l)->file_name,
1229 ((struct function_map *) r)->file_name);
1232 /* Print a suggested .o ordering for files on a link line based
1233 on profiling information. This uses the function placement
1234 code for the bulk of its work. */
1236 void
1237 cg_print_file_ordering (void)
1239 unsigned long scratch_arc_count;
1240 unsigned long arc_index;
1241 unsigned long sym_index;
1242 Arc **scratch_arcs;
1243 char *last;
1245 scratch_arc_count = 0;
1247 scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
1248 for (arc_index = 0; arc_index < numarcs; arc_index++)
1250 if (! arcs[arc_index]->parent->mapped
1251 || ! arcs[arc_index]->child->mapped)
1252 arcs[arc_index]->has_been_placed = 1;
1255 order_and_dump_functions_by_arcs (arcs, numarcs, 0,
1256 scratch_arcs, &scratch_arc_count);
1258 /* Output .o's not handled by the main placement algorithm. */
1259 for (sym_index = 0; sym_index < symtab.len; sym_index++)
1261 if (symtab.base[sym_index].mapped
1262 && ! symtab.base[sym_index].has_been_placed)
1263 printf ("%s\n", symtab.base[sym_index].name);
1266 qsort (symbol_map, symbol_map_count, sizeof (struct function_map), cmp_symbol_map);
1268 /* Now output any .o's that didn't have any text symbols. */
1269 last = NULL;
1270 for (sym_index = 0; sym_index < symbol_map_count; sym_index++)
1272 unsigned int index2;
1274 /* Don't bother searching if this symbol
1275 is the same as the previous one. */
1276 if (last && !filename_cmp (last, symbol_map[sym_index].file_name))
1277 continue;
1279 for (index2 = 0; index2 < symtab.len; index2++)
1281 if (! symtab.base[index2].mapped)
1282 continue;
1284 if (!filename_cmp (symtab.base[index2].name,
1285 symbol_map[sym_index].file_name))
1286 break;
1289 /* If we didn't find it in the symbol table, then it must
1290 be a .o with no text symbols. Output it last. */
1291 if (index2 == symtab.len)
1292 printf ("%s\n", symbol_map[sym_index].file_name);
1293 last = symbol_map[sym_index].file_name;