* ld-powerpc/tlsexetoc.d: Correct.
[binutils.git] / gprof / cg_print.c
blob21847027c1c7d46896bb08657136441424677c3f
1 /* cg_print.c - Print routines for displaying call graphs.
3 Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
5 This file is part of GNU Binutils.
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA. */
22 #include "libiberty.h"
23 #include "gprof.h"
24 #include "search_list.h"
25 #include "source.h"
26 #include "symtab.h"
27 #include "cg_arcs.h"
28 #include "cg_print.h"
29 #include "hist.h"
30 #include "utils.h"
31 #include "corefile.h"
33 /* Return value of comparison functions used to sort tables. */
34 #define LESSTHAN -1
35 #define EQUALTO 0
36 #define GREATERTHAN 1
38 static void print_header PARAMS ((void));
39 static void print_cycle PARAMS ((Sym *));
40 static int cmp_member PARAMS ((Sym *, Sym *));
41 static void sort_members PARAMS ((Sym *));
42 static void print_members PARAMS ((Sym *));
43 static int cmp_arc PARAMS ((Arc *, Arc *));
44 static void sort_parents PARAMS ((Sym *));
45 static void print_parents PARAMS ((Sym *));
46 static void sort_children PARAMS ((Sym *));
47 static void print_children PARAMS ((Sym *));
48 static void print_line PARAMS ((Sym *));
49 static int cmp_name PARAMS ((const PTR, const PTR));
50 static int cmp_arc_count PARAMS ((const PTR, const PTR));
51 static int cmp_fun_nuses PARAMS ((const PTR, const PTR));
52 static void order_and_dump_functions_by_arcs
53 PARAMS ((Arc **, unsigned long, int, Arc **, unsigned long *));
55 /* Declarations of automatically generated functions to output blurbs. */
56 extern void bsd_callg_blurb PARAMS ((FILE * fp));
57 extern void fsf_callg_blurb PARAMS ((FILE * fp));
59 double print_time = 0.0;
62 static void
63 print_header ()
65 if (first_output)
66 first_output = FALSE;
67 else
68 printf ("\f\n");
70 if (!bsd_style_output)
72 if (print_descriptions)
73 printf (_("\t\t Call graph (explanation follows)\n\n"));
74 else
75 printf (_("\t\t\tCall graph\n\n"));
78 printf (_("\ngranularity: each sample hit covers %ld byte(s)"),
79 (long) hist_scale * sizeof (UNIT));
81 if (print_time > 0.0)
82 printf (_(" for %.2f%% of %.2f seconds\n\n"),
83 100.0 / print_time, print_time / hz);
84 else
86 printf (_(" no time propagated\n\n"));
88 /* This doesn't hurt, since all the numerators will be 0.0. */
89 print_time = 1.0;
92 if (bsd_style_output)
94 printf ("%6.6s %5.5s %7.7s %11.11s %7.7s/%-7.7s %-8.8s\n",
95 "", "", "", "", _("called"), _("total"), _("parents"));
96 printf ("%-6.6s %5.5s %7.7s %11.11s %7.7s+%-7.7s %-8.8s\t%5.5s\n",
97 _("index"), _("%time"), _("self"), _("descendants"),
98 _("called"), _("self"), _("name"), _("index"));
99 printf ("%6.6s %5.5s %7.7s %11.11s %7.7s/%-7.7s %-8.8s\n",
100 "", "", "", "", _("called"), _("total"), _("children"));
101 printf ("\n");
103 else
105 printf (_("index %% time self children called name\n"));
109 /* Print a cycle header. */
111 static void
112 print_cycle (cyc)
113 Sym *cyc;
115 char buf[BUFSIZ];
117 sprintf (buf, "[%d]", cyc->cg.index);
118 printf (bsd_style_output
119 ? "%-6.6s %5.1f %7.2f %11.2f %7lu"
120 : "%-6.6s %5.1f %7.2f %7.2f %7lu", buf,
121 100 * (cyc->cg.prop.self + cyc->cg.prop.child) / print_time,
122 cyc->cg.prop.self / hz, cyc->cg.prop.child / hz, cyc->ncalls);
124 if (cyc->cg.self_calls != 0)
125 printf ("+%-7lu", cyc->cg.self_calls);
126 else
127 printf (" %7.7s", "");
129 printf (_(" <cycle %d as a whole> [%d]\n"), cyc->cg.cyc.num, cyc->cg.index);
132 /* Compare LEFT and RIGHT membmer. Major comparison key is
133 CG.PROP.SELF+CG.PROP.CHILD, secondary key is NCALLS+CG.SELF_CALLS. */
135 static int
136 cmp_member (left, right)
137 Sym *left;
138 Sym *right;
140 double left_time = left->cg.prop.self + left->cg.prop.child;
141 double right_time = right->cg.prop.self + right->cg.prop.child;
142 unsigned long left_calls = left->ncalls + left->cg.self_calls;
143 unsigned long right_calls = right->ncalls + right->cg.self_calls;
145 if (left_time > right_time)
146 return GREATERTHAN;
148 if (left_time < right_time)
149 return LESSTHAN;
151 if (left_calls > right_calls)
152 return GREATERTHAN;
154 if (left_calls < right_calls)
155 return LESSTHAN;
157 return EQUALTO;
160 /* Sort members of a cycle. */
162 static void
163 sort_members (cyc)
164 Sym *cyc;
166 Sym *todo, *doing, *prev;
168 /* Detach cycle members from cyclehead,
169 and insertion sort them back on. */
170 todo = cyc->cg.cyc.next;
171 cyc->cg.cyc.next = 0;
173 for (doing = todo; doing && doing->cg.cyc.next; doing = todo)
175 todo = doing->cg.cyc.next;
177 for (prev = cyc; prev->cg.cyc.next; prev = prev->cg.cyc.next)
179 if (cmp_member (doing, prev->cg.cyc.next) == GREATERTHAN)
180 break;
183 doing->cg.cyc.next = prev->cg.cyc.next;
184 prev->cg.cyc.next = doing;
188 /* Print the members of a cycle. */
190 static void
191 print_members (cyc)
192 Sym *cyc;
194 Sym *member;
196 sort_members (cyc);
198 for (member = cyc->cg.cyc.next; member; member = member->cg.cyc.next)
200 printf (bsd_style_output
201 ? "%6.6s %5.5s %7.2f %11.2f %7lu"
202 : "%6.6s %5.5s %7.2f %7.2f %7lu",
203 "", "", member->cg.prop.self / hz, member->cg.prop.child / hz,
204 member->ncalls);
206 if (member->cg.self_calls != 0)
207 printf ("+%-7lu", member->cg.self_calls);
208 else
209 printf (" %7.7s", "");
211 printf (" ");
212 print_name (member);
213 printf ("\n");
217 /* Compare two arcs to/from the same child/parent.
218 - if one arc is a self arc, it's least.
219 - if one arc is within a cycle, it's less than.
220 - if both arcs are within a cycle, compare arc counts.
221 - if neither arc is within a cycle, compare with
222 time + child_time as major key
223 arc count as minor key. */
225 static int
226 cmp_arc (left, right)
227 Arc *left;
228 Arc *right;
230 Sym *left_parent = left->parent;
231 Sym *left_child = left->child;
232 Sym *right_parent = right->parent;
233 Sym *right_child = right->child;
234 double left_time, right_time;
236 DBG (TIMEDEBUG,
237 printf ("[cmp_arc] ");
238 print_name (left_parent);
239 printf (" calls ");
240 print_name (left_child);
241 printf (" %f + %f %lu/%lu\n", left->time, left->child_time,
242 left->count, left_child->ncalls);
243 printf ("[cmp_arc] ");
244 print_name (right_parent);
245 printf (" calls ");
246 print_name (right_child);
247 printf (" %f + %f %lu/%lu\n", right->time, right->child_time,
248 right->count, right_child->ncalls);
249 printf ("\n");
252 if (left_parent == left_child)
253 return LESSTHAN; /* Left is a self call. */
255 if (right_parent == right_child)
256 return GREATERTHAN; /* Right is a self call. */
258 if (left_parent->cg.cyc.num != 0 && left_child->cg.cyc.num != 0
259 && left_parent->cg.cyc.num == left_child->cg.cyc.num)
261 /* Left is a call within a cycle. */
262 if (right_parent->cg.cyc.num != 0 && right_child->cg.cyc.num != 0
263 && right_parent->cg.cyc.num == right_child->cg.cyc.num)
265 /* Right is a call within the cycle, too. */
266 if (left->count < right->count)
267 return LESSTHAN;
269 if (left->count > right->count)
270 return GREATERTHAN;
272 return EQUALTO;
274 else
276 /* Right isn't a call within the cycle. */
277 return LESSTHAN;
280 else
282 /* Left isn't a call within a cycle. */
283 if (right_parent->cg.cyc.num != 0 && right_child->cg.cyc.num != 0
284 && right_parent->cg.cyc.num == right_child->cg.cyc.num)
286 /* Right is a call within a cycle. */
287 return GREATERTHAN;
289 else
291 /* Neither is a call within a cycle. */
292 left_time = left->time + left->child_time;
293 right_time = right->time + right->child_time;
295 if (left_time < right_time)
296 return LESSTHAN;
298 if (left_time > right_time)
299 return GREATERTHAN;
301 if (left->count < right->count)
302 return LESSTHAN;
304 if (left->count > right->count)
305 return GREATERTHAN;
307 return EQUALTO;
313 static void
314 sort_parents (child)
315 Sym * child;
317 Arc *arc, *detached, sorted, *prev;
319 /* Unlink parents from child, then insertion sort back on to
320 sorted's parents.
321 *arc the arc you have detached and are inserting.
322 *detached the rest of the arcs to be sorted.
323 sorted arc list onto which you insertion sort.
324 *prev arc before the arc you are comparing. */
325 sorted.next_parent = 0;
327 for (arc = child->cg.parents; arc; arc = detached)
329 detached = arc->next_parent;
331 /* Consider *arc as disconnected; insert it into sorted. */
332 for (prev = &sorted; prev->next_parent; prev = prev->next_parent)
334 if (cmp_arc (arc, prev->next_parent) != GREATERTHAN)
335 break;
338 arc->next_parent = prev->next_parent;
339 prev->next_parent = arc;
342 /* Reattach sorted arcs to child. */
343 child->cg.parents = sorted.next_parent;
347 static void
348 print_parents (child)
349 Sym *child;
351 Sym *parent;
352 Arc *arc;
353 Sym *cycle_head;
355 if (child->cg.cyc.head != 0)
356 cycle_head = child->cg.cyc.head;
357 else
358 cycle_head = child;
360 if (!child->cg.parents)
362 printf (bsd_style_output
363 ? _("%6.6s %5.5s %7.7s %11.11s %7.7s %7.7s <spontaneous>\n")
364 : _("%6.6s %5.5s %7.7s %7.7s %7.7s %7.7s <spontaneous>\n"),
365 "", "", "", "", "", "");
366 return;
369 sort_parents (child);
371 for (arc = child->cg.parents; arc; arc = arc->next_parent)
373 parent = arc->parent;
374 if (child == parent || (child->cg.cyc.num != 0
375 && parent->cg.cyc.num == child->cg.cyc.num))
377 /* Selfcall or call among siblings. */
378 printf (bsd_style_output
379 ? "%6.6s %5.5s %7.7s %11.11s %7lu %7.7s "
380 : "%6.6s %5.5s %7.7s %7.7s %7lu %7.7s ",
381 "", "", "", "",
382 arc->count, "");
383 print_name (parent);
384 printf ("\n");
386 else
388 /* Regular parent of child. */
389 printf (bsd_style_output
390 ? "%6.6s %5.5s %7.2f %11.2f %7lu/%-7lu "
391 : "%6.6s %5.5s %7.2f %7.2f %7lu/%-7lu ",
392 "", "",
393 arc->time / hz, arc->child_time / hz,
394 arc->count, cycle_head->ncalls);
395 print_name (parent);
396 printf ("\n");
402 static void
403 sort_children (parent)
404 Sym *parent;
406 Arc *arc, *detached, sorted, *prev;
408 /* Unlink children from parent, then insertion sort back on to
409 sorted's children.
410 *arc the arc you have detached and are inserting.
411 *detached the rest of the arcs to be sorted.
412 sorted arc list onto which you insertion sort.
413 *prev arc before the arc you are comparing. */
414 sorted.next_child = 0;
416 for (arc = parent->cg.children; arc; arc = detached)
418 detached = arc->next_child;
420 /* Consider *arc as disconnected; insert it into sorted. */
421 for (prev = &sorted; prev->next_child; prev = prev->next_child)
423 if (cmp_arc (arc, prev->next_child) != LESSTHAN)
424 break;
427 arc->next_child = prev->next_child;
428 prev->next_child = arc;
431 /* Reattach sorted children to parent. */
432 parent->cg.children = sorted.next_child;
436 static void
437 print_children (parent)
438 Sym *parent;
440 Sym *child;
441 Arc *arc;
443 sort_children (parent);
444 arc = parent->cg.children;
446 for (arc = parent->cg.children; arc; arc = arc->next_child)
448 child = arc->child;
449 if (child == parent || (child->cg.cyc.num != 0
450 && child->cg.cyc.num == parent->cg.cyc.num))
452 /* Self call or call to sibling. */
453 printf (bsd_style_output
454 ? "%6.6s %5.5s %7.7s %11.11s %7lu %7.7s "
455 : "%6.6s %5.5s %7.7s %7.7s %7lu %7.7s ",
456 "", "", "", "", arc->count, "");
457 print_name (child);
458 printf ("\n");
460 else
462 /* Regular child of parent. */
463 printf (bsd_style_output
464 ? "%6.6s %5.5s %7.2f %11.2f %7lu/%-7lu "
465 : "%6.6s %5.5s %7.2f %7.2f %7lu/%-7lu ",
466 "", "",
467 arc->time / hz, arc->child_time / hz,
468 arc->count, child->cg.cyc.head->ncalls);
469 print_name (child);
470 printf ("\n");
476 static void
477 print_line (np)
478 Sym *np;
480 char buf[BUFSIZ];
482 sprintf (buf, "[%d]", np->cg.index);
483 printf (bsd_style_output
484 ? "%-6.6s %5.1f %7.2f %11.2f"
485 : "%-6.6s %5.1f %7.2f %7.2f", buf,
486 100 * (np->cg.prop.self + np->cg.prop.child) / print_time,
487 np->cg.prop.self / hz, np->cg.prop.child / hz);
489 if ((np->ncalls + np->cg.self_calls) != 0)
491 printf (" %7lu", np->ncalls);
493 if (np->cg.self_calls != 0)
494 printf ("+%-7lu ", np->cg.self_calls);
495 else
496 printf (" %7.7s ", "");
498 else
500 printf (" %7.7s %7.7s ", "", "");
503 print_name (np);
504 printf ("\n");
508 /* Print dynamic call graph. */
510 void
511 cg_print (timesortsym)
512 Sym ** timesortsym;
514 unsigned int index;
515 Sym *parent;
517 if (print_descriptions && bsd_style_output)
518 bsd_callg_blurb (stdout);
520 print_header ();
522 for (index = 0; index < symtab.len + num_cycles; ++index)
524 parent = timesortsym[index];
526 if ((ignore_zeros && parent->ncalls == 0
527 && parent->cg.self_calls == 0 && parent->cg.prop.self == 0
528 && parent->cg.prop.child == 0)
529 || !parent->cg.print_flag
530 || (line_granularity && ! parent->is_func))
531 continue;
533 if (!parent->name && parent->cg.cyc.num != 0)
535 /* Cycle header. */
536 print_cycle (parent);
537 print_members (parent);
539 else
541 print_parents (parent);
542 print_line (parent);
543 print_children (parent);
546 if (bsd_style_output)
547 printf ("\n");
549 printf ("-----------------------------------------------\n");
551 if (bsd_style_output)
552 printf ("\n");
555 free (timesortsym);
557 if (print_descriptions && !bsd_style_output)
558 fsf_callg_blurb (stdout);
562 static int
563 cmp_name (left, right)
564 const PTR left;
565 const PTR right;
567 const Sym **npp1 = (const Sym **) left;
568 const Sym **npp2 = (const Sym **) right;
570 return strcmp ((*npp1)->name, (*npp2)->name);
574 void
575 cg_print_index ()
577 unsigned int index;
578 unsigned int nnames, todo, i, j;
579 int col, starting_col;
580 Sym **name_sorted_syms, *sym;
581 const char *filename;
582 char buf[20];
583 int column_width = (output_width - 1) / 3; /* Don't write in last col! */
585 /* Now, sort regular function name
586 alphabetically to create an index. */
587 name_sorted_syms = (Sym **) xmalloc ((symtab.len + num_cycles) * sizeof (Sym *));
589 for (index = 0, nnames = 0; index < symtab.len; index++)
591 if (ignore_zeros && symtab.base[index].ncalls == 0
592 && symtab.base[index].hist.time == 0)
593 continue;
595 name_sorted_syms[nnames++] = &symtab.base[index];
598 qsort (name_sorted_syms, nnames, sizeof (Sym *), cmp_name);
600 for (index = 1, todo = nnames; index <= num_cycles; index++)
601 name_sorted_syms[todo++] = &cycle_header[index];
603 printf ("\f\n");
604 printf (_("Index by function name\n\n"));
605 index = (todo + 2) / 3;
607 for (i = 0; i < index; i++)
609 col = 0;
610 starting_col = 0;
612 for (j = i; j < todo; j += index)
614 sym = name_sorted_syms[j];
616 if (sym->cg.print_flag)
617 sprintf (buf, "[%d]", sym->cg.index);
618 else
619 sprintf (buf, "(%d)", sym->cg.index);
621 if (j < nnames)
623 if (bsd_style_output)
625 printf ("%6.6s %-19.19s", buf, sym->name);
627 else
629 col += strlen (buf);
631 for (; col < starting_col + 5; ++col)
632 putchar (' ');
634 printf (" %s ", buf);
635 col += print_name_only (sym);
637 if (!line_granularity && sym->is_static && sym->file)
639 filename = sym->file->name;
641 if (!print_path)
643 filename = strrchr (filename, '/');
645 if (filename)
646 ++filename;
647 else
648 filename = sym->file->name;
651 printf (" (%s)", filename);
652 col += strlen (filename) + 3;
656 else
658 if (bsd_style_output)
660 printf ("%6.6s ", buf);
661 sprintf (buf, _("<cycle %d>"), sym->cg.cyc.num);
662 printf ("%-19.19s", buf);
664 else
666 col += strlen (buf);
667 for (; col < starting_col + 5; ++col)
668 putchar (' ');
669 printf (" %s ", buf);
670 sprintf (buf, _("<cycle %d>"), sym->cg.cyc.num);
671 printf ("%s", buf);
672 col += strlen (buf);
676 starting_col += column_width;
679 printf ("\n");
682 free (name_sorted_syms);
685 /* Compare two arcs based on their usage counts.
686 We want to sort in descending order. */
688 static int
689 cmp_arc_count (left, right)
690 const PTR left;
691 const PTR right;
693 const Arc **npp1 = (const Arc **) left;
694 const Arc **npp2 = (const Arc **) right;
696 if ((*npp1)->count > (*npp2)->count)
697 return -1;
698 else if ((*npp1)->count < (*npp2)->count)
699 return 1;
700 else
701 return 0;
704 /* Compare two funtions based on their usage counts.
705 We want to sort in descending order. */
707 static int
708 cmp_fun_nuses (left, right)
709 const PTR left;
710 const PTR right;
712 const Sym **npp1 = (const Sym **) left;
713 const Sym **npp2 = (const Sym **) right;
715 if ((*npp1)->nuses > (*npp2)->nuses)
716 return -1;
717 else if ((*npp1)->nuses < (*npp2)->nuses)
718 return 1;
719 else
720 return 0;
723 /* Print a suggested function ordering based on the profiling data.
725 We perform 4 major steps when ordering functions:
727 * Group unused functions together and place them at the
728 end of the function order.
730 * Search the highest use arcs (those which account for 90% of
731 the total arc count) for functions which have several parents.
733 Group those with the most call sites together (currently the
734 top 1.25% which have at least five different call sites).
736 These are emitted at the start of the function order.
738 * Use a greedy placement algorithm to place functions which
739 occur in the top 99% of the arcs in the profile. Some provisions
740 are made to handle high usage arcs where the parent and/or
741 child has already been placed.
743 * Run the same greedy placement algorithm on the remaining
744 arcs to place the leftover functions.
747 The various "magic numbers" should (one day) be tuneable by command
748 line options. They were arrived at by benchmarking a few applications
749 with various values to see which values produced better overall function
750 orderings.
752 Of course, profiling errors, machine limitations (PA long calls), and
753 poor cutoff values for the placement algorithm may limit the usefullness
754 of the resulting function order. Improvements would be greatly appreciated.
756 Suggestions:
758 * Place the functions with many callers near the middle of the
759 list to reduce long calls.
761 * Propagate arc usage changes as functions are placed. Ie if
762 func1 and func2 are placed together, arcs to/from those arcs
763 to the same parent/child should be combined, then resort the
764 arcs to choose the next one.
766 * Implement some global positioning algorithm to place the
767 chains made by the greedy local positioning algorithm. Probably
768 by examining arcs which haven't been placed yet to tie two
769 chains together.
771 * Take a function's size and time into account in the algorithm;
772 size in particular is important on the PA (long calls). Placing
773 many small functions onto their own page may be wise.
775 * Use better profiling information; many published algorithms
776 are based on call sequences through time, rather than just
777 arc counts.
779 * Prodecure cloning could improve performance when a small number
780 of arcs account for most of the calls to a particular function.
782 * Use relocation information to avoid moving unused functions
783 completely out of the code stream; this would avoid severe lossage
784 when the profile data bears little resemblance to actual runs.
786 * Propagation of arc usages should also improve .o link line
787 ordering which shares the same arc placement algorithm with
788 the function ordering code (in fact it is a degenerate case
789 of function ordering). */
791 void
792 cg_print_function_ordering ()
794 unsigned long index, used, unused, scratch_index;
795 unsigned long unplaced_arc_count, high_arc_count, scratch_arc_count;
796 #ifdef __GNUC__
797 unsigned long long total_arcs, tmp_arcs_count;
798 #else
799 unsigned long total_arcs, tmp_arcs_count;
800 #endif
801 Sym **unused_syms, **used_syms, **scratch_syms;
802 Arc **unplaced_arcs, **high_arcs, **scratch_arcs;
804 index = 0;
805 used = 0;
806 unused = 0;
807 scratch_index = 0;
808 unplaced_arc_count = 0;
809 high_arc_count = 0;
810 scratch_arc_count = 0;
812 /* First group all the unused functions together. */
813 unused_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
814 used_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
815 scratch_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
816 high_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
817 scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
818 unplaced_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
820 /* Walk through all the functions; mark those which are never
821 called as placed (we'll emit them as a group later). */
822 for (index = 0, used = 0, unused = 0; index < symtab.len; index++)
824 if (symtab.base[index].ncalls == 0)
826 /* Filter out gprof generated names. */
827 if (strcmp (symtab.base[index].name, "<locore>")
828 && strcmp (symtab.base[index].name, "<hicore>"))
830 unused_syms[unused++] = &symtab.base[index];
831 symtab.base[index].has_been_placed = 1;
834 else
836 used_syms[used++] = &symtab.base[index];
837 symtab.base[index].has_been_placed = 0;
838 symtab.base[index].next = 0;
839 symtab.base[index].prev = 0;
840 symtab.base[index].nuses = 0;
844 /* Sort the arcs from most used to least used. */
845 qsort (arcs, numarcs, sizeof (Arc *), cmp_arc_count);
847 /* Compute the total arc count. Also mark arcs as unplaced.
849 Note we don't compensate for overflow if that happens!
850 Overflow is much less likely when this file is compiled
851 with GCC as it can double-wide integers via long long. */
852 total_arcs = 0;
853 for (index = 0; index < numarcs; index++)
855 total_arcs += arcs[index]->count;
856 arcs[index]->has_been_placed = 0;
859 /* We want to pull out those functions which are referenced
860 by many highly used arcs and emit them as a group. This
861 could probably use some tuning. */
862 tmp_arcs_count = 0;
863 for (index = 0; index < numarcs; index++)
865 tmp_arcs_count += arcs[index]->count;
867 /* Count how many times each parent and child are used up
868 to our threshhold of arcs (90%). */
869 if ((double)tmp_arcs_count / (double)total_arcs > 0.90)
870 break;
872 arcs[index]->child->nuses++;
875 /* Now sort a temporary symbol table based on the number of
876 times each function was used in the highest used arcs. */
877 memcpy (scratch_syms, used_syms, used * sizeof (Sym *));
878 qsort (scratch_syms, used, sizeof (Sym *), cmp_fun_nuses);
880 /* Now pick out those symbols we're going to emit as
881 a group. We take up to 1.25% of the used symbols. */
882 for (index = 0; index < used / 80; index++)
884 Sym *sym = scratch_syms[index];
885 Arc *arc;
887 /* If we hit symbols that aren't used from many call sites,
888 then we can quit. We choose five as the low limit for
889 no particular reason. */
890 if (sym->nuses == 5)
891 break;
893 /* We're going to need the arcs between these functions.
894 Unfortunately, we don't know all these functions
895 until we're done. So we keep track of all the arcs
896 to the functions we care about, then prune out those
897 which are uninteresting.
899 An interesting variation would be to quit when we found
900 multi-call site functions which account for some percentage
901 of the arcs. */
902 arc = sym->cg.children;
904 while (arc)
906 if (arc->parent != arc->child)
907 scratch_arcs[scratch_arc_count++] = arc;
908 arc->has_been_placed = 1;
909 arc = arc->next_child;
912 arc = sym->cg.parents;
914 while (arc)
916 if (arc->parent != arc->child)
917 scratch_arcs[scratch_arc_count++] = arc;
918 arc->has_been_placed = 1;
919 arc = arc->next_parent;
922 /* Keep track of how many symbols we're going to place. */
923 scratch_index = index;
925 /* A lie, but it makes identifying
926 these functions easier later. */
927 sym->has_been_placed = 1;
930 /* Now walk through the temporary arcs and copy
931 those we care about into the high arcs array. */
932 for (index = 0; index < scratch_arc_count; index++)
934 Arc *arc = scratch_arcs[index];
936 /* If this arc refers to highly used functions, then
937 then we want to keep it. */
938 if (arc->child->has_been_placed
939 && arc->parent->has_been_placed)
941 high_arcs[high_arc_count++] = scratch_arcs[index];
943 /* We need to turn of has_been_placed since we're going to
944 use the main arc placement algorithm on these arcs. */
945 arc->child->has_been_placed = 0;
946 arc->parent->has_been_placed = 0;
950 /* Dump the multi-site high usage functions which are not
951 going to be ordered by the main ordering algorithm. */
952 for (index = 0; index < scratch_index; index++)
954 if (scratch_syms[index]->has_been_placed)
955 printf ("%s\n", scratch_syms[index]->name);
958 /* Now we can order the multi-site high use
959 functions based on the arcs between them. */
960 qsort (high_arcs, high_arc_count, sizeof (Arc *), cmp_arc_count);
961 order_and_dump_functions_by_arcs (high_arcs, high_arc_count, 1,
962 unplaced_arcs, &unplaced_arc_count);
964 /* Order and dump the high use functions left,
965 these typically have only a few call sites. */
966 order_and_dump_functions_by_arcs (arcs, numarcs, 0,
967 unplaced_arcs, &unplaced_arc_count);
969 /* Now place the rarely used functions. */
970 order_and_dump_functions_by_arcs (unplaced_arcs, unplaced_arc_count, 1,
971 scratch_arcs, &scratch_arc_count);
973 /* Output any functions not emitted by the order_and_dump calls. */
974 for (index = 0; index < used; index++)
975 if (used_syms[index]->has_been_placed == 0)
976 printf("%s\n", used_syms[index]->name);
978 /* Output the unused functions. */
979 for (index = 0; index < unused; index++)
980 printf("%s\n", unused_syms[index]->name);
982 unused_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
983 used_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
984 scratch_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
985 high_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
986 scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
987 unplaced_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
989 free (unused_syms);
990 free (used_syms);
991 free (scratch_syms);
992 free (high_arcs);
993 free (scratch_arcs);
994 free (unplaced_arcs);
997 /* Place functions based on the arcs in THE_ARCS with ARC_COUNT entries;
998 place unused arcs into UNPLACED_ARCS/UNPLACED_ARC_COUNT.
1000 If ALL is nonzero, then place all functions referenced by THE_ARCS,
1001 else only place those referenced in the top 99% of the arcs in THE_ARCS. */
1003 #define MOST 0.99
1004 static void
1005 order_and_dump_functions_by_arcs (the_arcs, arc_count, all,
1006 unplaced_arcs, unplaced_arc_count)
1007 Arc **the_arcs;
1008 unsigned long arc_count;
1009 int all;
1010 Arc **unplaced_arcs;
1011 unsigned long *unplaced_arc_count;
1013 #ifdef __GNUC__
1014 unsigned long long tmp_arcs, total_arcs;
1015 #else
1016 unsigned long tmp_arcs, total_arcs;
1017 #endif
1018 unsigned int index;
1020 /* If needed, compute the total arc count.
1022 Note we don't compensate for overflow if that happens! */
1023 if (! all)
1025 total_arcs = 0;
1026 for (index = 0; index < arc_count; index++)
1027 total_arcs += the_arcs[index]->count;
1029 else
1030 total_arcs = 0;
1032 tmp_arcs = 0;
1034 for (index = 0; index < arc_count; index++)
1036 Sym *sym1, *sym2;
1037 Sym *child, *parent;
1039 tmp_arcs += the_arcs[index]->count;
1041 /* Ignore this arc if it's already been placed. */
1042 if (the_arcs[index]->has_been_placed)
1043 continue;
1045 child = the_arcs[index]->child;
1046 parent = the_arcs[index]->parent;
1048 /* If we're not using all arcs, and this is a rarely used
1049 arc, then put it on the unplaced_arc list. Similarly
1050 if both the parent and child of this arc have been placed. */
1051 if ((! all && (double)tmp_arcs / (double)total_arcs > MOST)
1052 || child->has_been_placed || parent->has_been_placed)
1054 unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[index];
1055 continue;
1058 /* If all slots in the parent and child are full, then there isn't
1059 anything we can do right now. We'll place this arc on the
1060 unplaced arc list in the hope that a global positioning
1061 algorithm can use it to place function chains. */
1062 if (parent->next && parent->prev && child->next && child->prev)
1064 unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[index];
1065 continue;
1068 /* If the parent is unattached, then find the closest
1069 place to attach it onto child's chain. Similarly
1070 for the opposite case. */
1071 if (!parent->next && !parent->prev)
1073 int next_count = 0;
1074 int prev_count = 0;
1075 Sym *prev = child;
1076 Sym *next = child;
1078 /* Walk to the beginning and end of the child's chain. */
1079 while (next->next)
1081 next = next->next;
1082 next_count++;
1085 while (prev->prev)
1087 prev = prev->prev;
1088 prev_count++;
1091 /* Choose the closest. */
1092 child = next_count < prev_count ? next : prev;
1094 else if (! child->next && !child->prev)
1096 int next_count = 0;
1097 int prev_count = 0;
1098 Sym *prev = parent;
1099 Sym *next = parent;
1101 while (next->next)
1103 next = next->next;
1104 next_count++;
1107 while (prev->prev)
1109 prev = prev->prev;
1110 prev_count++;
1113 parent = prev_count < next_count ? prev : next;
1115 else
1117 /* Couldn't find anywhere to attach the functions,
1118 put the arc on the unplaced arc list. */
1119 unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[index];
1120 continue;
1123 /* Make sure we don't tie two ends together. */
1124 sym1 = parent;
1125 if (sym1->next)
1126 while (sym1->next)
1127 sym1 = sym1->next;
1128 else
1129 while (sym1->prev)
1130 sym1 = sym1->prev;
1132 sym2 = child;
1133 if (sym2->next)
1134 while (sym2->next)
1135 sym2 = sym2->next;
1136 else
1137 while (sym2->prev)
1138 sym2 = sym2->prev;
1140 if (sym1 == child
1141 && sym2 == parent)
1143 /* This would tie two ends together. */
1144 unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[index];
1145 continue;
1148 if (parent->next)
1150 /* Must attach to the parent's prev field. */
1151 if (! child->next)
1153 /* parent-prev and child-next */
1154 parent->prev = child;
1155 child->next = parent;
1156 the_arcs[index]->has_been_placed = 1;
1159 else if (parent->prev)
1161 /* Must attach to the parent's next field. */
1162 if (! child->prev)
1164 /* parent-next and child-prev */
1165 parent->next = child;
1166 child->prev = parent;
1167 the_arcs[index]->has_been_placed = 1;
1170 else
1172 /* Can attach to either field in the parent, depends
1173 on where we've got space in the child. */
1174 if (child->prev)
1176 /* parent-prev and child-next. */
1177 parent->prev = child;
1178 child->next = parent;
1179 the_arcs[index]->has_been_placed = 1;
1181 else
1183 /* parent-next and child-prev. */
1184 parent->next = child;
1185 child->prev = parent;
1186 the_arcs[index]->has_been_placed = 1;
1191 /* Dump the chains of functions we've made. */
1192 for (index = 0; index < arc_count; index++)
1194 Sym *sym;
1195 if (the_arcs[index]->parent->has_been_placed
1196 || the_arcs[index]->child->has_been_placed)
1197 continue;
1199 sym = the_arcs[index]->parent;
1201 /* If this symbol isn't attached to any other
1202 symbols, then we've got a rarely used arc.
1204 Skip it for now, we'll deal with them later. */
1205 if (sym->next == NULL
1206 && sym->prev == NULL)
1207 continue;
1209 /* Get to the start of this chain. */
1210 while (sym->prev)
1211 sym = sym->prev;
1213 while (sym)
1215 /* Mark it as placed. */
1216 sym->has_been_placed = 1;
1217 printf ("%s\n", sym->name);
1218 sym = sym->next;
1222 /* If we want to place all the arcs, then output
1223 those which weren't placed by the main algorithm. */
1224 if (all)
1225 for (index = 0; index < arc_count; index++)
1227 Sym *sym;
1228 if (the_arcs[index]->parent->has_been_placed
1229 || the_arcs[index]->child->has_been_placed)
1230 continue;
1232 sym = the_arcs[index]->parent;
1234 sym->has_been_placed = 1;
1235 printf ("%s\n", sym->name);
1239 /* Print a suggested .o ordering for files on a link line based
1240 on profiling information. This uses the function placement
1241 code for the bulk of its work. */
1243 void
1244 cg_print_file_ordering ()
1246 unsigned long scratch_arc_count, index;
1247 Arc **scratch_arcs;
1248 char *last;
1250 scratch_arc_count = 0;
1252 scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
1253 for (index = 0; index < numarcs; index++)
1255 if (! arcs[index]->parent->mapped
1256 || ! arcs[index]->child->mapped)
1257 arcs[index]->has_been_placed = 1;
1260 order_and_dump_functions_by_arcs (arcs, numarcs, 0,
1261 scratch_arcs, &scratch_arc_count);
1263 /* Output .o's not handled by the main placement algorithm. */
1264 for (index = 0; index < symtab.len; index++)
1266 if (symtab.base[index].mapped
1267 && ! symtab.base[index].has_been_placed)
1268 printf ("%s\n", symtab.base[index].name);
1271 /* Now output any .o's that didn't have any text symbols. */
1272 last = NULL;
1273 for (index = 0; index < symbol_map_count; index++)
1275 unsigned int index2;
1277 /* Don't bother searching if this symbol
1278 is the same as the previous one. */
1279 if (last && !strcmp (last, symbol_map[index].file_name))
1280 continue;
1282 for (index2 = 0; index2 < symtab.len; index2++)
1284 if (! symtab.base[index2].mapped)
1285 continue;
1287 if (!strcmp (symtab.base[index2].name, symbol_map[index].file_name))
1288 break;
1291 /* If we didn't find it in the symbol table, then it must
1292 be a .o with no text symbols. Output it last. */
1293 if (index2 == symtab.len)
1294 printf ("%s\n", symbol_map[index].file_name);
1295 last = symbol_map[index].file_name;