1 /* cg_print.c - Print routines for displaying call graphs.
3 Copyright (C) 2000-2024 Free Software Foundation, Inc.
5 This file is part of GNU Binutils.
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
23 #include "libiberty.h"
24 #include "filenames.h"
25 #include "search_list.h"
34 /* Return value of comparison functions used to sort tables. */
39 static void print_header (void);
40 static void print_cycle (Sym
*);
41 static int cmp_member (Sym
*, Sym
*);
42 static void sort_members (Sym
*);
43 static void print_members (Sym
*);
44 static int cmp_arc (Arc
*, Arc
*);
45 static void sort_parents (Sym
*);
46 static void print_parents (Sym
*);
47 static void sort_children (Sym
*);
48 static void print_children (Sym
*);
49 static void print_line (Sym
*);
50 static int cmp_name (const void *, const void *);
51 static int cmp_arc_count (const void *, const void *);
52 static int cmp_fun_nuses (const void *, const void *);
53 static void order_and_dump_functions_by_arcs
54 (Arc
**, unsigned long, int, Arc
**, unsigned long *);
56 /* Declarations of automatically generated functions to output blurbs. */
57 extern void bsd_callg_blurb (FILE * fp
);
58 extern void fsf_callg_blurb (FILE * fp
);
60 double print_time
= 0.0;
71 if (!bsd_style_output
)
73 if (print_descriptions
)
74 printf (_("\t\t Call graph (explanation follows)\n\n"));
76 printf (_("\t\t\tCall graph\n\n"));
79 printf (_("\ngranularity: each sample hit covers %ld byte(s)"),
80 (long) hist_scale
* (long) sizeof (UNIT
));
83 printf (_(" for %.2f%% of %.2f seconds\n\n"),
84 100.0 / print_time
, print_time
/ hz
);
87 printf (_(" no time propagated\n\n"));
89 /* This doesn't hurt, since all the numerators will be 0.0. */
95 printf ("%6.6s %5.5s %7.7s %11.11s %7.7s/%-7.7s %-8.8s\n",
96 "", "", "", "", _("called"), _("total"), _("parents"));
97 printf ("%-6.6s %5.5s %7.7s %11.11s %7.7s+%-7.7s %-8.8s\t%5.5s\n",
99 /* xgettext:no-c-format */
101 _("self"), _("descendants"), _("called"), _("self"),
102 _("name"), _("index"));
103 printf ("%6.6s %5.5s %7.7s %11.11s %7.7s/%-7.7s %-8.8s\n",
104 "", "", "", "", _("called"), _("total"), _("children"));
109 printf (_("index %% time self children called name\n"));
113 /* Print a cycle header. */
116 print_cycle (Sym
*cyc
)
120 sprintf (buf
, "[%d]", cyc
->cg
.index
);
121 printf (bsd_style_output
122 ? "%-6.6s %5.1f %7.2f %11.2f %7lu"
123 : "%-6.6s %5.1f %7.2f %7.2f %7lu", buf
,
124 100 * (cyc
->cg
.prop
.self
+ cyc
->cg
.prop
.child
) / print_time
,
125 cyc
->cg
.prop
.self
/ hz
, cyc
->cg
.prop
.child
/ hz
, cyc
->ncalls
);
127 if (cyc
->cg
.self_calls
!= 0)
128 printf ("+%-7lu", cyc
->cg
.self_calls
);
130 printf (" %7.7s", "");
132 printf (_(" <cycle %d as a whole> [%d]\n"), cyc
->cg
.cyc
.num
, cyc
->cg
.index
);
135 /* Compare LEFT and RIGHT membmer. Major comparison key is
136 CG.PROP.SELF+CG.PROP.CHILD, secondary key is NCALLS+CG.SELF_CALLS. */
139 cmp_member (Sym
*left
, Sym
*right
)
141 double left_time
= left
->cg
.prop
.self
+ left
->cg
.prop
.child
;
142 double right_time
= right
->cg
.prop
.self
+ right
->cg
.prop
.child
;
143 unsigned long left_calls
= left
->ncalls
+ left
->cg
.self_calls
;
144 unsigned long right_calls
= right
->ncalls
+ right
->cg
.self_calls
;
146 if (left_time
> right_time
)
149 if (left_time
< right_time
)
152 if (left_calls
> right_calls
)
155 if (left_calls
< right_calls
)
161 /* Sort members of a cycle. */
164 sort_members (Sym
*cyc
)
166 Sym
*todo
, *doing
, *prev
;
168 /* Detach cycle members from cyclehead,
169 and insertion sort them back on. */
170 todo
= cyc
->cg
.cyc
.next
;
171 cyc
->cg
.cyc
.next
= 0;
173 for (doing
= todo
; doing
!= NULL
; doing
= todo
)
175 todo
= doing
->cg
.cyc
.next
;
177 for (prev
= cyc
; prev
->cg
.cyc
.next
; prev
= prev
->cg
.cyc
.next
)
179 if (cmp_member (doing
, prev
->cg
.cyc
.next
) == GREATERTHAN
)
183 doing
->cg
.cyc
.next
= prev
->cg
.cyc
.next
;
184 prev
->cg
.cyc
.next
= doing
;
188 /* Print the members of a cycle. */
191 print_members (Sym
*cyc
)
197 for (member
= cyc
->cg
.cyc
.next
; member
; member
= member
->cg
.cyc
.next
)
199 printf (bsd_style_output
200 ? "%6.6s %5.5s %7.2f %11.2f %7lu"
201 : "%6.6s %5.5s %7.2f %7.2f %7lu",
202 "", "", member
->cg
.prop
.self
/ hz
, member
->cg
.prop
.child
/ hz
,
205 if (member
->cg
.self_calls
!= 0)
206 printf ("+%-7lu", member
->cg
.self_calls
);
208 printf (" %7.7s", "");
216 /* Compare two arcs to/from the same child/parent.
217 - if one arc is a self arc, it's least.
218 - if one arc is within a cycle, it's less than.
219 - if both arcs are within a cycle, compare arc counts.
220 - if neither arc is within a cycle, compare with
221 time + child_time as major key
222 arc count as minor key. */
225 cmp_arc (Arc
*left
, Arc
*right
)
227 Sym
*left_parent
= left
->parent
;
228 Sym
*left_child
= left
->child
;
229 Sym
*right_parent
= right
->parent
;
230 Sym
*right_child
= right
->child
;
231 double left_time
, right_time
;
234 printf ("[cmp_arc] ");
235 print_name (left_parent
);
237 print_name (left_child
);
238 printf (" %f + %f %lu/%lu\n", left
->time
, left
->child_time
,
239 left
->count
, left_child
->ncalls
);
240 printf ("[cmp_arc] ");
241 print_name (right_parent
);
243 print_name (right_child
);
244 printf (" %f + %f %lu/%lu\n", right
->time
, right
->child_time
,
245 right
->count
, right_child
->ncalls
);
249 if (left_parent
== left_child
)
250 return LESSTHAN
; /* Left is a self call. */
252 if (right_parent
== right_child
)
253 return GREATERTHAN
; /* Right is a self call. */
255 if (left_parent
->cg
.cyc
.num
!= 0 && left_child
->cg
.cyc
.num
!= 0
256 && left_parent
->cg
.cyc
.num
== left_child
->cg
.cyc
.num
)
258 /* Left is a call within a cycle. */
259 if (right_parent
->cg
.cyc
.num
!= 0 && right_child
->cg
.cyc
.num
!= 0
260 && right_parent
->cg
.cyc
.num
== right_child
->cg
.cyc
.num
)
262 /* Right is a call within the cycle, too. */
263 if (left
->count
< right
->count
)
266 if (left
->count
> right
->count
)
273 /* Right isn't a call within the cycle. */
279 /* Left isn't a call within a cycle. */
280 if (right_parent
->cg
.cyc
.num
!= 0 && right_child
->cg
.cyc
.num
!= 0
281 && right_parent
->cg
.cyc
.num
== right_child
->cg
.cyc
.num
)
283 /* Right is a call within a cycle. */
288 /* Neither is a call within a cycle. */
289 left_time
= left
->time
+ left
->child_time
;
290 right_time
= right
->time
+ right
->child_time
;
292 if (left_time
< right_time
)
295 if (left_time
> right_time
)
298 if (left
->count
< right
->count
)
301 if (left
->count
> right
->count
)
311 sort_parents (Sym
* child
)
313 Arc
*arc
, *detached
, sorted
, *prev
;
315 /* Unlink parents from child, then insertion sort back on to
317 *arc the arc you have detached and are inserting.
318 *detached the rest of the arcs to be sorted.
319 sorted arc list onto which you insertion sort.
320 *prev arc before the arc you are comparing. */
321 sorted
.next_parent
= 0;
323 for (arc
= child
->cg
.parents
; arc
; arc
= detached
)
325 detached
= arc
->next_parent
;
327 /* Consider *arc as disconnected; insert it into sorted. */
328 for (prev
= &sorted
; prev
->next_parent
; prev
= prev
->next_parent
)
330 if (cmp_arc (arc
, prev
->next_parent
) != GREATERTHAN
)
334 arc
->next_parent
= prev
->next_parent
;
335 prev
->next_parent
= arc
;
338 /* Reattach sorted arcs to child. */
339 child
->cg
.parents
= sorted
.next_parent
;
344 print_parents (Sym
*child
)
350 if (child
->cg
.cyc
.head
!= 0)
351 cycle_head
= child
->cg
.cyc
.head
;
355 if (!child
->cg
.parents
)
357 printf (bsd_style_output
358 ? _("%6.6s %5.5s %7.7s %11.11s %7.7s %7.7s <spontaneous>\n")
359 : _("%6.6s %5.5s %7.7s %7.7s %7.7s %7.7s <spontaneous>\n"),
360 "", "", "", "", "", "");
364 sort_parents (child
);
366 for (arc
= child
->cg
.parents
; arc
; arc
= arc
->next_parent
)
368 parent
= arc
->parent
;
369 if (child
== parent
|| (child
->cg
.cyc
.num
!= 0
370 && parent
->cg
.cyc
.num
== child
->cg
.cyc
.num
))
372 /* Selfcall or call among siblings. */
373 printf (bsd_style_output
374 ? "%6.6s %5.5s %7.7s %11.11s %7lu %7.7s "
375 : "%6.6s %5.5s %7.7s %7.7s %7lu %7.7s ",
383 /* Regular parent of child. */
384 printf (bsd_style_output
385 ? "%6.6s %5.5s %7.2f %11.2f %7lu/%-7lu "
386 : "%6.6s %5.5s %7.2f %7.2f %7lu/%-7lu ",
388 arc
->time
/ hz
, arc
->child_time
/ hz
,
389 arc
->count
, cycle_head
->ncalls
);
398 sort_children (Sym
*parent
)
400 Arc
*arc
, *detached
, sorted
, *prev
;
402 /* Unlink children from parent, then insertion sort back on to
404 *arc the arc you have detached and are inserting.
405 *detached the rest of the arcs to be sorted.
406 sorted arc list onto which you insertion sort.
407 *prev arc before the arc you are comparing. */
408 sorted
.next_child
= 0;
410 for (arc
= parent
->cg
.children
; arc
; arc
= detached
)
412 detached
= arc
->next_child
;
414 /* Consider *arc as disconnected; insert it into sorted. */
415 for (prev
= &sorted
; prev
->next_child
; prev
= prev
->next_child
)
417 if (cmp_arc (arc
, prev
->next_child
) != LESSTHAN
)
421 arc
->next_child
= prev
->next_child
;
422 prev
->next_child
= arc
;
425 /* Reattach sorted children to parent. */
426 parent
->cg
.children
= sorted
.next_child
;
431 print_children (Sym
*parent
)
436 sort_children (parent
);
437 arc
= parent
->cg
.children
;
439 for (arc
= parent
->cg
.children
; arc
; arc
= arc
->next_child
)
442 if (child
== parent
|| (child
->cg
.cyc
.num
!= 0
443 && child
->cg
.cyc
.num
== parent
->cg
.cyc
.num
))
445 /* Self call or call to sibling. */
446 printf (bsd_style_output
447 ? "%6.6s %5.5s %7.7s %11.11s %7lu %7.7s "
448 : "%6.6s %5.5s %7.7s %7.7s %7lu %7.7s ",
449 "", "", "", "", arc
->count
, "");
455 /* Regular child of parent. */
456 printf (bsd_style_output
457 ? "%6.6s %5.5s %7.2f %11.2f %7lu/%-7lu "
458 : "%6.6s %5.5s %7.2f %7.2f %7lu/%-7lu ",
460 arc
->time
/ hz
, arc
->child_time
/ hz
,
461 arc
->count
, child
->cg
.cyc
.head
->ncalls
);
474 sprintf (buf
, "[%d]", np
->cg
.index
);
475 printf (bsd_style_output
476 ? "%-6.6s %5.1f %7.2f %11.2f"
477 : "%-6.6s %5.1f %7.2f %7.2f", buf
,
478 100 * (np
->cg
.prop
.self
+ np
->cg
.prop
.child
) / print_time
,
479 np
->cg
.prop
.self
/ hz
, np
->cg
.prop
.child
/ hz
);
481 if ((np
->ncalls
+ np
->cg
.self_calls
) != 0)
483 printf (" %7lu", np
->ncalls
);
485 if (np
->cg
.self_calls
!= 0)
486 printf ("+%-7lu ", np
->cg
.self_calls
);
488 printf (" %7.7s ", "");
492 printf (" %7.7s %7.7s ", "", "");
500 /* Print dynamic call graph. */
503 cg_print (Sym
** timesortsym
)
505 unsigned int sym_index
;
508 if (print_descriptions
&& bsd_style_output
)
509 bsd_callg_blurb (stdout
);
513 for (sym_index
= 0; sym_index
< symtab
.len
+ num_cycles
; ++sym_index
)
515 parent
= timesortsym
[sym_index
];
517 if ((ignore_zeros
&& parent
->ncalls
== 0
518 && parent
->cg
.self_calls
== 0 && parent
->cg
.prop
.self
== 0
519 && parent
->cg
.prop
.child
== 0)
520 || !parent
->cg
.print_flag
521 || (line_granularity
&& ! parent
->is_func
))
524 if (!parent
->name
&& parent
->cg
.cyc
.num
!= 0)
527 print_cycle (parent
);
528 print_members (parent
);
532 print_parents (parent
);
534 print_children (parent
);
537 if (bsd_style_output
)
540 printf ("-----------------------------------------------\n");
542 if (bsd_style_output
)
548 if (print_descriptions
&& !bsd_style_output
)
549 fsf_callg_blurb (stdout
);
554 cmp_name (const void *left
, const void *right
)
556 const Sym
**npp1
= (const Sym
**) left
;
557 const Sym
**npp2
= (const Sym
**) right
;
559 return strcmp ((*npp1
)->name
, (*npp2
)->name
);
564 cg_print_index (void)
566 unsigned int sym_index
;
567 unsigned int nnames
, todo
, i
, j
;
568 int col
, starting_col
;
569 Sym
**name_sorted_syms
, *sym
;
570 const char *filename
;
572 int column_width
= (output_width
- 1) / 3; /* Don't write in last col! */
574 /* Now, sort regular function name
575 alphabetically to create an index. */
576 name_sorted_syms
= (Sym
**) xmalloc ((symtab
.len
+ num_cycles
) * sizeof (Sym
*));
578 for (sym_index
= 0, nnames
= 0; sym_index
< symtab
.len
; sym_index
++)
580 if (ignore_zeros
&& symtab
.base
[sym_index
].ncalls
== 0
581 && symtab
.base
[sym_index
].hist
.time
== 0)
584 name_sorted_syms
[nnames
++] = &symtab
.base
[sym_index
];
587 qsort (name_sorted_syms
, nnames
, sizeof (Sym
*), cmp_name
);
589 for (sym_index
= 1, todo
= nnames
; sym_index
<= num_cycles
; sym_index
++)
590 name_sorted_syms
[todo
++] = &cycle_header
[sym_index
];
593 printf (_("Index by function name\n\n"));
594 sym_index
= (todo
+ 2) / 3;
596 for (i
= 0; i
< sym_index
; i
++)
601 for (j
= i
; j
< todo
; j
+= sym_index
)
603 sym
= name_sorted_syms
[j
];
605 if (sym
->cg
.print_flag
)
606 sprintf (buf
, "[%d]", sym
->cg
.index
);
608 sprintf (buf
, "(%d)", sym
->cg
.index
);
612 if (bsd_style_output
)
614 printf ("%6.6s %-19.19s", buf
, sym
->name
);
620 for (; col
< starting_col
+ 5; ++col
)
623 printf (" %s ", buf
);
624 col
+= print_name_only (sym
);
626 if (!line_granularity
&& sym
->is_static
&& sym
->file
)
628 filename
= sym
->file
->name
;
632 filename
= strrchr (filename
, '/');
637 filename
= sym
->file
->name
;
640 printf (" (%s)", filename
);
641 col
+= strlen (filename
) + 3;
647 if (bsd_style_output
)
649 printf ("%6.6s ", buf
);
650 sprintf (buf
, _("<cycle %d>"), sym
->cg
.cyc
.num
);
651 printf ("%-19.19s", buf
);
656 for (; col
< starting_col
+ 5; ++col
)
658 printf (" %s ", buf
);
659 sprintf (buf
, _("<cycle %d>"), sym
->cg
.cyc
.num
);
665 starting_col
+= column_width
;
671 free (name_sorted_syms
);
674 /* Compare two arcs based on their usage counts.
675 We want to sort in descending order. */
678 cmp_arc_count (const void *left
, const void *right
)
680 const Arc
**npp1
= (const Arc
**) left
;
681 const Arc
**npp2
= (const Arc
**) right
;
683 if ((*npp1
)->count
> (*npp2
)->count
)
685 else if ((*npp1
)->count
< (*npp2
)->count
)
691 /* Compare two funtions based on their usage counts.
692 We want to sort in descending order. */
695 cmp_fun_nuses (const void *left
, const void *right
)
697 const Sym
**npp1
= (const Sym
**) left
;
698 const Sym
**npp2
= (const Sym
**) right
;
700 if ((*npp1
)->nuses
> (*npp2
)->nuses
)
702 else if ((*npp1
)->nuses
< (*npp2
)->nuses
)
708 /* Print a suggested function ordering based on the profiling data.
710 We perform 4 major steps when ordering functions:
712 * Group unused functions together and place them at the
713 end of the function order.
715 * Search the highest use arcs (those which account for 90% of
716 the total arc count) for functions which have several parents.
718 Group those with the most call sites together (currently the
719 top 1.25% which have at least five different call sites).
721 These are emitted at the start of the function order.
723 * Use a greedy placement algorithm to place functions which
724 occur in the top 99% of the arcs in the profile. Some provisions
725 are made to handle high usage arcs where the parent and/or
726 child has already been placed.
728 * Run the same greedy placement algorithm on the remaining
729 arcs to place the leftover functions.
732 The various "magic numbers" should (one day) be tuneable by command
733 line options. They were arrived at by benchmarking a few applications
734 with various values to see which values produced better overall function
737 Of course, profiling errors, machine limitations (PA long calls), and
738 poor cutoff values for the placement algorithm may limit the usefullness
739 of the resulting function order. Improvements would be greatly appreciated.
743 * Place the functions with many callers near the middle of the
744 list to reduce long calls.
746 * Propagate arc usage changes as functions are placed. Ie if
747 func1 and func2 are placed together, arcs to/from those arcs
748 to the same parent/child should be combined, then resort the
749 arcs to choose the next one.
751 * Implement some global positioning algorithm to place the
752 chains made by the greedy local positioning algorithm. Probably
753 by examining arcs which haven't been placed yet to tie two
756 * Take a function's size and time into account in the algorithm;
757 size in particular is important on the PA (long calls). Placing
758 many small functions onto their own page may be wise.
760 * Use better profiling information; many published algorithms
761 are based on call sequences through time, rather than just
764 * Prodecure cloning could improve performance when a small number
765 of arcs account for most of the calls to a particular function.
767 * Use relocation information to avoid moving unused functions
768 completely out of the code stream; this would avoid severe lossage
769 when the profile data bears little resemblance to actual runs.
771 * Propagation of arc usages should also improve .o link line
772 ordering which shares the same arc placement algorithm with
773 the function ordering code (in fact it is a degenerate case
774 of function ordering). */
777 cg_print_function_ordering (void)
779 unsigned long sym_index
;
780 unsigned long arc_index
;
781 unsigned long used
, unused
, scratch_index
;
782 unsigned long unplaced_arc_count
, high_arc_count
, scratch_arc_count
;
784 unsigned long long total_arcs
, tmp_arcs_count
;
786 unsigned long total_arcs
, tmp_arcs_count
;
788 Sym
**unused_syms
, **used_syms
, **scratch_syms
;
789 Arc
**unplaced_arcs
, **high_arcs
, **scratch_arcs
;
795 unplaced_arc_count
= 0;
797 scratch_arc_count
= 0;
799 /* First group all the unused functions together. */
800 unused_syms
= (Sym
**) xmalloc (symtab
.len
* sizeof (Sym
*));
801 used_syms
= (Sym
**) xmalloc (symtab
.len
* sizeof (Sym
*));
802 scratch_syms
= (Sym
**) xmalloc (symtab
.len
* sizeof (Sym
*));
803 high_arcs
= (Arc
**) xmalloc (numarcs
* sizeof (Arc
*));
804 scratch_arcs
= (Arc
**) xmalloc (numarcs
* sizeof (Arc
*));
805 unplaced_arcs
= (Arc
**) xmalloc (numarcs
* sizeof (Arc
*));
807 /* Walk through all the functions; mark those which are never
808 called as placed (we'll emit them as a group later). */
809 for (sym_index
= 0, used
= 0, unused
= 0; sym_index
< symtab
.len
; sym_index
++)
811 if (symtab
.base
[sym_index
].ncalls
== 0)
813 unused_syms
[unused
++] = &symtab
.base
[sym_index
];
814 symtab
.base
[sym_index
].has_been_placed
= 1;
818 used_syms
[used
++] = &symtab
.base
[sym_index
];
819 symtab
.base
[sym_index
].has_been_placed
= 0;
820 symtab
.base
[sym_index
].next
= 0;
821 symtab
.base
[sym_index
].prev
= 0;
822 symtab
.base
[sym_index
].nuses
= 0;
826 /* Sort the arcs from most used to least used. */
827 qsort (arcs
, numarcs
, sizeof (Arc
*), cmp_arc_count
);
829 /* Compute the total arc count. Also mark arcs as unplaced.
831 Note we don't compensate for overflow if that happens!
832 Overflow is much less likely when this file is compiled
833 with GCC as it can double-wide integers via long long. */
835 for (arc_index
= 0; arc_index
< numarcs
; arc_index
++)
837 total_arcs
+= arcs
[arc_index
]->count
;
838 arcs
[arc_index
]->has_been_placed
= 0;
841 /* We want to pull out those functions which are referenced
842 by many highly used arcs and emit them as a group. This
843 could probably use some tuning. */
845 for (arc_index
= 0; arc_index
< numarcs
; arc_index
++)
847 tmp_arcs_count
+= arcs
[arc_index
]->count
;
849 /* Count how many times each parent and child are used up
850 to our threshold of arcs (90%). */
851 if ((double)tmp_arcs_count
/ (double)total_arcs
> 0.90)
854 arcs
[arc_index
]->child
->nuses
++;
857 /* Now sort a temporary symbol table based on the number of
858 times each function was used in the highest used arcs. */
859 memcpy (scratch_syms
, used_syms
, used
* sizeof (Sym
*));
860 qsort (scratch_syms
, used
, sizeof (Sym
*), cmp_fun_nuses
);
862 /* Now pick out those symbols we're going to emit as
863 a group. We take up to 1.25% of the used symbols. */
864 for (sym_index
= 0; sym_index
< used
/ 80; sym_index
++)
866 Sym
*sym
= scratch_syms
[sym_index
];
869 /* If we hit symbols that aren't used from many call sites,
870 then we can quit. We choose five as the low limit for
871 no particular reason. */
875 /* We're going to need the arcs between these functions.
876 Unfortunately, we don't know all these functions
877 until we're done. So we keep track of all the arcs
878 to the functions we care about, then prune out those
879 which are uninteresting.
881 An interesting variation would be to quit when we found
882 multi-call site functions which account for some percentage
884 arc
= sym
->cg
.children
;
888 if (arc
->parent
!= arc
->child
)
889 scratch_arcs
[scratch_arc_count
++] = arc
;
890 arc
->has_been_placed
= 1;
891 arc
= arc
->next_child
;
894 arc
= sym
->cg
.parents
;
898 if (arc
->parent
!= arc
->child
)
899 scratch_arcs
[scratch_arc_count
++] = arc
;
900 arc
->has_been_placed
= 1;
901 arc
= arc
->next_parent
;
904 /* Keep track of how many symbols we're going to place. */
905 scratch_index
= sym_index
;
907 /* A lie, but it makes identifying
908 these functions easier later. */
909 sym
->has_been_placed
= 1;
912 /* Now walk through the temporary arcs and copy
913 those we care about into the high arcs array. */
914 for (arc_index
= 0; arc_index
< scratch_arc_count
; arc_index
++)
916 Arc
*arc
= scratch_arcs
[arc_index
];
918 /* If this arc refers to highly used functions, then
919 then we want to keep it. */
920 if (arc
->child
->has_been_placed
921 && arc
->parent
->has_been_placed
)
923 high_arcs
[high_arc_count
++] = scratch_arcs
[arc_index
];
925 /* We need to turn of has_been_placed since we're going to
926 use the main arc placement algorithm on these arcs. */
927 arc
->child
->has_been_placed
= 0;
928 arc
->parent
->has_been_placed
= 0;
932 /* Dump the multi-site high usage functions which are not
933 going to be ordered by the main ordering algorithm. */
934 for (sym_index
= 0; sym_index
< scratch_index
; sym_index
++)
936 if (scratch_syms
[sym_index
]->has_been_placed
)
937 printf ("%s\n", scratch_syms
[sym_index
]->name
);
940 /* Now we can order the multi-site high use
941 functions based on the arcs between them. */
942 qsort (high_arcs
, high_arc_count
, sizeof (Arc
*), cmp_arc_count
);
943 order_and_dump_functions_by_arcs (high_arcs
, high_arc_count
, 1,
944 unplaced_arcs
, &unplaced_arc_count
);
946 /* Order and dump the high use functions left,
947 these typically have only a few call sites. */
948 order_and_dump_functions_by_arcs (arcs
, numarcs
, 0,
949 unplaced_arcs
, &unplaced_arc_count
);
951 /* Now place the rarely used functions. */
952 order_and_dump_functions_by_arcs (unplaced_arcs
, unplaced_arc_count
, 1,
953 scratch_arcs
, &scratch_arc_count
);
955 /* Output any functions not emitted by the order_and_dump calls. */
956 for (sym_index
= 0; sym_index
< used
; sym_index
++)
957 if (used_syms
[sym_index
]->has_been_placed
== 0)
958 printf("%s\n", used_syms
[sym_index
]->name
);
960 /* Output the unused functions. */
961 for (sym_index
= 0; sym_index
< unused
; sym_index
++)
962 printf("%s\n", unused_syms
[sym_index
]->name
);
964 unused_syms
= (Sym
**) xmalloc (symtab
.len
* sizeof (Sym
*));
965 used_syms
= (Sym
**) xmalloc (symtab
.len
* sizeof (Sym
*));
966 scratch_syms
= (Sym
**) xmalloc (symtab
.len
* sizeof (Sym
*));
967 high_arcs
= (Arc
**) xmalloc (numarcs
* sizeof (Arc
*));
968 scratch_arcs
= (Arc
**) xmalloc (numarcs
* sizeof (Arc
*));
969 unplaced_arcs
= (Arc
**) xmalloc (numarcs
* sizeof (Arc
*));
976 free (unplaced_arcs
);
979 /* Place functions based on the arcs in THE_ARCS with ARC_COUNT entries;
980 place unused arcs into UNPLACED_ARCS/UNPLACED_ARC_COUNT.
982 If ALL is nonzero, then place all functions referenced by THE_ARCS,
983 else only place those referenced in the top 99% of the arcs in THE_ARCS. */
987 order_and_dump_functions_by_arcs (Arc
**the_arcs
, unsigned long arc_count
,
988 int all
, Arc
**unplaced_arcs
,
989 unsigned long *unplaced_arc_count
)
992 unsigned long long tmp_arcs
, total_arcs
;
994 unsigned long tmp_arcs
, total_arcs
;
996 unsigned int arc_index
;
998 /* If needed, compute the total arc count.
1000 Note we don't compensate for overflow if that happens! */
1004 for (arc_index
= 0; arc_index
< arc_count
; arc_index
++)
1005 total_arcs
+= the_arcs
[arc_index
]->count
;
1012 for (arc_index
= 0; arc_index
< arc_count
; arc_index
++)
1015 Sym
*child
, *parent
;
1017 tmp_arcs
+= the_arcs
[arc_index
]->count
;
1019 /* Ignore this arc if it's already been placed. */
1020 if (the_arcs
[arc_index
]->has_been_placed
)
1023 child
= the_arcs
[arc_index
]->child
;
1024 parent
= the_arcs
[arc_index
]->parent
;
1026 /* If we're not using all arcs, and this is a rarely used
1027 arc, then put it on the unplaced_arc list. Similarly
1028 if both the parent and child of this arc have been placed. */
1029 if ((! all
&& (double)tmp_arcs
/ (double)total_arcs
> MOST
)
1030 || child
->has_been_placed
|| parent
->has_been_placed
)
1032 unplaced_arcs
[(*unplaced_arc_count
)++] = the_arcs
[arc_index
];
1036 /* If all slots in the parent and child are full, then there isn't
1037 anything we can do right now. We'll place this arc on the
1038 unplaced arc list in the hope that a global positioning
1039 algorithm can use it to place function chains. */
1040 if (parent
->next
&& parent
->prev
&& child
->next
&& child
->prev
)
1042 unplaced_arcs
[(*unplaced_arc_count
)++] = the_arcs
[arc_index
];
1046 /* If the parent is unattached, then find the closest
1047 place to attach it onto child's chain. Similarly
1048 for the opposite case. */
1049 if (!parent
->next
&& !parent
->prev
)
1056 /* Walk to the beginning and end of the child's chain. */
1069 /* Choose the closest. */
1070 child
= next_count
< prev_count
? next
: prev
;
1072 else if (! child
->next
&& !child
->prev
)
1091 parent
= prev_count
< next_count
? prev
: next
;
1095 /* Couldn't find anywhere to attach the functions,
1096 put the arc on the unplaced arc list. */
1097 unplaced_arcs
[(*unplaced_arc_count
)++] = the_arcs
[arc_index
];
1101 /* Make sure we don't tie two ends together. */
1121 /* This would tie two ends together. */
1122 unplaced_arcs
[(*unplaced_arc_count
)++] = the_arcs
[arc_index
];
1128 /* Must attach to the parent's prev field. */
1131 /* parent-prev and child-next */
1132 parent
->prev
= child
;
1133 child
->next
= parent
;
1134 the_arcs
[arc_index
]->has_been_placed
= 1;
1137 else if (parent
->prev
)
1139 /* Must attach to the parent's next field. */
1142 /* parent-next and child-prev */
1143 parent
->next
= child
;
1144 child
->prev
= parent
;
1145 the_arcs
[arc_index
]->has_been_placed
= 1;
1150 /* Can attach to either field in the parent, depends
1151 on where we've got space in the child. */
1154 /* parent-prev and child-next. */
1155 parent
->prev
= child
;
1156 child
->next
= parent
;
1157 the_arcs
[arc_index
]->has_been_placed
= 1;
1161 /* parent-next and child-prev. */
1162 parent
->next
= child
;
1163 child
->prev
= parent
;
1164 the_arcs
[arc_index
]->has_been_placed
= 1;
1169 /* Dump the chains of functions we've made. */
1170 for (arc_index
= 0; arc_index
< arc_count
; arc_index
++)
1173 if (the_arcs
[arc_index
]->parent
->has_been_placed
1174 || the_arcs
[arc_index
]->child
->has_been_placed
)
1177 sym
= the_arcs
[arc_index
]->parent
;
1179 /* If this symbol isn't attached to any other
1180 symbols, then we've got a rarely used arc.
1182 Skip it for now, we'll deal with them later. */
1183 if (sym
->next
== NULL
1184 && sym
->prev
== NULL
)
1187 /* Get to the start of this chain. */
1193 /* Mark it as placed. */
1194 sym
->has_been_placed
= 1;
1195 printf ("%s\n", sym
->name
);
1200 /* If we want to place all the arcs, then output
1201 those which weren't placed by the main algorithm. */
1203 for (arc_index
= 0; arc_index
< arc_count
; arc_index
++)
1206 if (the_arcs
[arc_index
]->parent
->has_been_placed
1207 || the_arcs
[arc_index
]->child
->has_been_placed
)
1210 sym
= the_arcs
[arc_index
]->parent
;
1212 sym
->has_been_placed
= 1;
1213 printf ("%s\n", sym
->name
);
1217 /* Compare two function_map structs based on file name.
1218 We want to sort in ascending order. */
1221 cmp_symbol_map (const void * l
, const void * r
)
1223 return filename_cmp (((struct function_map
*) l
)->file_name
,
1224 ((struct function_map
*) r
)->file_name
);
1227 /* Print a suggested .o ordering for files on a link line based
1228 on profiling information. This uses the function placement
1229 code for the bulk of its work. */
1232 cg_print_file_ordering (void)
1234 unsigned long scratch_arc_count
;
1235 unsigned long arc_index
;
1236 unsigned long sym_index
;
1240 scratch_arc_count
= 0;
1242 scratch_arcs
= (Arc
**) xmalloc (numarcs
* sizeof (Arc
*));
1243 for (arc_index
= 0; arc_index
< numarcs
; arc_index
++)
1245 if (! arcs
[arc_index
]->parent
->mapped
1246 || ! arcs
[arc_index
]->child
->mapped
)
1247 arcs
[arc_index
]->has_been_placed
= 1;
1250 order_and_dump_functions_by_arcs (arcs
, numarcs
, 0,
1251 scratch_arcs
, &scratch_arc_count
);
1253 /* Output .o's not handled by the main placement algorithm. */
1254 for (sym_index
= 0; sym_index
< symtab
.len
; sym_index
++)
1256 if (symtab
.base
[sym_index
].mapped
1257 && ! symtab
.base
[sym_index
].has_been_placed
)
1258 printf ("%s\n", symtab
.base
[sym_index
].name
);
1261 qsort (symbol_map
, symbol_map_count
, sizeof (struct function_map
), cmp_symbol_map
);
1263 /* Now output any .o's that didn't have any text symbols. */
1265 for (sym_index
= 0; sym_index
< symbol_map_count
; sym_index
++)
1267 unsigned int index2
;
1269 /* Don't bother searching if this symbol
1270 is the same as the previous one. */
1271 if (last
&& !filename_cmp (last
, symbol_map
[sym_index
].file_name
))
1274 for (index2
= 0; index2
< symtab
.len
; index2
++)
1276 if (! symtab
.base
[index2
].mapped
)
1279 if (!filename_cmp (symtab
.base
[index2
].name
,
1280 symbol_map
[sym_index
].file_name
))
1284 /* If we didn't find it in the symbol table, then it must
1285 be a .o with no text symbols. Output it last. */
1286 if (index2
== symtab
.len
)
1287 printf ("%s\n", symbol_map
[sym_index
].file_name
);
1288 last
= symbol_map
[sym_index
].file_name
;