Mark ChangeLog
[official-gcc.git] / gcc / ipa-cp.c
blobce5051fe24215ca447168a777fd8718ee7e6e4c1
1 /* Interprocedural constant propagation
2 Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
4 Contributed by Razya Ladelsky <RAZYA@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 /* Interprocedural constant propagation. The aim of interprocedural constant
23 propagation (IPCP) is to find which function's argument has the same
24 constant value in each invocation throughout the whole program. For example,
25 consider the following program:
27 int g (int y)
29 printf ("value is %d",y);
32 int f (int x)
34 g (x);
37 int h (int y)
39 g (y);
42 void main (void)
44 f (3);
45 h (3);
49 The IPCP algorithm will find that g's formal argument y is always called
50 with the value 3.
52 The algorithm used is based on "Interprocedural Constant Propagation", by
53 Challahan David, Keith D Cooper, Ken Kennedy, Linda Torczon, Comp86, pg
54 152-161
56 The optimization is divided into three stages:
58 First stage - intraprocedural analysis
59 =======================================
60 This phase computes jump_function and modification flags.
62 A jump function for a callsite represents the values passed as an actual
63 arguments of a given callsite. There are three types of values:
64 Pass through - the caller's formal parameter is passed as an actual argument.
65 Constant - a constant is passed as an actual argument.
66 Unknown - neither of the above.
68 The jump function info, ipa_jump_func, is stored in ipa_edge_args
69 structure (defined in ipa_prop.h and pointed to by cgraph_node->aux)
70 modified_flags are defined in ipa_node_params structure
71 (defined in ipa_prop.h and pointed to by cgraph_edge->aux).
73 -ipcp_init_stage() is the first stage driver.
75 Second stage - interprocedural analysis
76 ========================================
77 This phase does the interprocedural constant propagation.
78 It computes lattices for all formal parameters in the program
79 and their value that may be:
80 TOP - unknown.
81 BOTTOM - non constant.
82 CONSTANT - constant value.
84 Lattice describing a formal parameter p will have a constant value if all
85 callsites invoking this function have the same constant value passed to p.
87 The lattices are stored in ipcp_lattice which is itself in ipa_node_params
88 structure (defined in ipa_prop.h and pointed to by cgraph_edge->aux).
90 -ipcp_iterate_stage() is the second stage driver.
92 Third phase - transformation of function code
93 ============================================
94 Propagates the constant-valued formals into the function.
95 For each function whose parameters are constants, we create its clone.
97 Then we process the clone in two ways:
98 1. We insert an assignment statement 'parameter = const' at the beginning
99 of the cloned function.
100 2. For read-only parameters that do not live in memory, we replace all their
101 uses with the constant.
103 We also need to modify some callsites to call the cloned functions instead
104 of the original ones. For a callsite passing an argument found to be a
105 constant by IPCP, there are two different cases to handle:
106 1. A constant is passed as an argument. In this case the callsite in the
107 should be redirected to call the cloned callee.
108 2. A parameter (of the caller) passed as an argument (pass through
109 argument). In such cases both the caller and the callee have clones and
110 only the callsite in the cloned caller is redirected to call to the
111 cloned callee.
113 This update is done in two steps: First all cloned functions are created
114 during a traversal of the call graph, during which all callsites are
115 redirected to call the cloned function. Then the callsites are traversed
116 and many calls redirected back to fit the description above.
118 -ipcp_insert_stage() is the third phase driver.
122 #include "config.h"
123 #include "system.h"
124 #include "coretypes.h"
125 #include "tree.h"
126 #include "target.h"
127 #include "cgraph.h"
128 #include "ipa-prop.h"
129 #include "tree-flow.h"
130 #include "tree-pass.h"
131 #include "flags.h"
132 #include "timevar.h"
133 #include "diagnostic.h"
134 #include "tree-dump.h"
135 #include "tree-inline.h"
136 #include "fibheap.h"
137 #include "params.h"
139 /* Number of functions identified as candidates for cloning. When not cloning
140 we can simplify iterate stage not forcing it to go through the decision
141 on what is profitable and what not. */
142 static int n_cloning_candidates;
144 /* Maximal count found in program. */
145 static gcov_type max_count;
147 /* Cgraph nodes that has been completely replaced by cloning during iterate
148 * stage and will be removed after ipcp is finished. */
149 static bitmap dead_nodes;
151 static void ipcp_print_profile_data (FILE *);
152 static void ipcp_function_scale_print (FILE *);
154 /* Get the original node field of ipa_node_params associated with node NODE. */
155 static inline struct cgraph_node *
156 ipcp_get_orig_node (struct cgraph_node *node)
158 return IPA_NODE_REF (node)->ipcp_orig_node;
161 /* Return true if NODE describes a cloned/versioned function. */
162 static inline bool
163 ipcp_node_is_clone (struct cgraph_node *node)
165 return (ipcp_get_orig_node (node) != NULL);
168 /* Create ipa_node_params and its data structures for NEW_NODE. Set ORIG_NODE
169 as the ipcp_orig_node field in ipa_node_params. */
170 static void
171 ipcp_init_cloned_node (struct cgraph_node *orig_node,
172 struct cgraph_node *new_node)
174 ipa_check_create_node_params ();
175 ipa_initialize_node_params (new_node);
176 IPA_NODE_REF (new_node)->ipcp_orig_node = orig_node;
179 /* Perform intraprocedrual analysis needed for ipcp. */
180 static void
181 ipcp_analyze_node (struct cgraph_node *node)
183 /* Unreachable nodes should have been eliminated before ipcp. */
184 gcc_assert (node->needed || node->reachable);
186 ipa_initialize_node_params (node);
187 ipa_detect_param_modifications (node);
190 /* Return scale for NODE. */
191 static inline gcov_type
192 ipcp_get_node_scale (struct cgraph_node *node)
194 return IPA_NODE_REF (node)->count_scale;
197 /* Set COUNT as scale for NODE. */
198 static inline void
199 ipcp_set_node_scale (struct cgraph_node *node, gcov_type count)
201 IPA_NODE_REF (node)->count_scale = count;
204 /* Return whether LAT is a constant lattice. */
205 static inline bool
206 ipcp_lat_is_const (struct ipcp_lattice *lat)
208 if (lat->type == IPA_CONST_VALUE)
209 return true;
210 else
211 return false;
214 /* Return whether LAT is a constant lattice that ipa-cp can actually insert
215 into the code (i.e. constants excluding member pointers and pointers). */
216 static inline bool
217 ipcp_lat_is_insertable (struct ipcp_lattice *lat)
219 return lat->type == IPA_CONST_VALUE;
222 /* Return true if LAT1 and LAT2 are equal. */
223 static inline bool
224 ipcp_lats_are_equal (struct ipcp_lattice *lat1, struct ipcp_lattice *lat2)
226 gcc_assert (ipcp_lat_is_const (lat1) && ipcp_lat_is_const (lat2));
227 if (lat1->type != lat2->type)
228 return false;
230 if (operand_equal_p (lat1->constant, lat2->constant, 0))
231 return true;
233 return false;
236 /* Compute Meet arithmetics:
237 Meet (IPA_BOTTOM, x) = IPA_BOTTOM
238 Meet (IPA_TOP,x) = x
239 Meet (const_a,const_b) = IPA_BOTTOM, if const_a != const_b.
240 MEET (const_a,const_b) = const_a, if const_a == const_b.*/
241 static void
242 ipa_lattice_meet (struct ipcp_lattice *res, struct ipcp_lattice *lat1,
243 struct ipcp_lattice *lat2)
245 if (lat1->type == IPA_BOTTOM || lat2->type == IPA_BOTTOM)
247 res->type = IPA_BOTTOM;
248 return;
250 if (lat1->type == IPA_TOP)
252 res->type = lat2->type;
253 res->constant = lat2->constant;
254 return;
256 if (lat2->type == IPA_TOP)
258 res->type = lat1->type;
259 res->constant = lat1->constant;
260 return;
262 if (!ipcp_lats_are_equal (lat1, lat2))
264 res->type = IPA_BOTTOM;
265 return;
267 res->type = lat1->type;
268 res->constant = lat1->constant;
271 /* Return the lattice corresponding to the Ith formal parameter of the function
272 described by INFO. */
273 static inline struct ipcp_lattice *
274 ipcp_get_lattice (struct ipa_node_params *info, int i)
276 return &(info->params[i].ipcp_lattice);
279 /* Given the jump function JFUNC, compute the lattice LAT that describes the
280 value coming down the callsite. INFO describes the caller node so that
281 pass-through jump functions can be evaluated. */
282 static void
283 ipcp_lattice_from_jfunc (struct ipa_node_params *info, struct ipcp_lattice *lat,
284 struct ipa_jump_func *jfunc)
286 if (jfunc->type == IPA_JF_CONST)
288 lat->type = IPA_CONST_VALUE;
289 lat->constant = jfunc->value.constant;
291 else if (jfunc->type == IPA_JF_PASS_THROUGH)
293 struct ipcp_lattice *caller_lat;
294 tree cst;
296 caller_lat = ipcp_get_lattice (info, jfunc->value.pass_through.formal_id);
297 lat->type = caller_lat->type;
298 if (caller_lat->type != IPA_CONST_VALUE)
299 return;
300 cst = caller_lat->constant;
302 if (jfunc->value.pass_through.operation != NOP_EXPR)
304 tree restype;
305 if (TREE_CODE_CLASS (jfunc->value.pass_through.operation)
306 == tcc_comparison)
307 restype = boolean_type_node;
308 else
309 restype = TREE_TYPE (cst);
310 cst = fold_binary (jfunc->value.pass_through.operation,
311 restype, cst, jfunc->value.pass_through.operand);
313 if (!cst || !is_gimple_ip_invariant (cst))
314 lat->type = IPA_BOTTOM;
315 lat->constant = cst;
317 else if (jfunc->type == IPA_JF_ANCESTOR)
319 struct ipcp_lattice *caller_lat;
320 tree t;
321 bool ok;
323 caller_lat = ipcp_get_lattice (info, jfunc->value.ancestor.formal_id);
324 lat->type = caller_lat->type;
325 if (caller_lat->type != IPA_CONST_VALUE)
326 return;
327 if (TREE_CODE (caller_lat->constant) != ADDR_EXPR)
329 /* This can happen when the constant is a NULL pointer. */
330 lat->type = IPA_BOTTOM;
331 return;
333 t = TREE_OPERAND (caller_lat->constant, 0);
334 ok = build_ref_for_offset (&t, TREE_TYPE (t),
335 jfunc->value.ancestor.offset,
336 jfunc->value.ancestor.type, false);
337 if (!ok)
339 lat->type = IPA_BOTTOM;
340 lat->constant = NULL_TREE;
342 else
343 lat->constant = build_fold_addr_expr (t);
345 else
346 lat->type = IPA_BOTTOM;
349 /* True when OLD_LAT and NEW_LAT values are not the same. */
351 static bool
352 ipcp_lattice_changed (struct ipcp_lattice *old_lat,
353 struct ipcp_lattice *new_lat)
355 if (old_lat->type == new_lat->type)
357 if (!ipcp_lat_is_const (old_lat))
358 return false;
359 if (ipcp_lats_are_equal (old_lat, new_lat))
360 return false;
362 return true;
365 /* Print all ipcp_lattices of all functions to F. */
366 static void
367 ipcp_print_all_lattices (FILE * f)
369 struct cgraph_node *node;
370 int i, count;
372 fprintf (f, "\nLattice:\n");
373 for (node = cgraph_nodes; node; node = node->next)
375 struct ipa_node_params *info;
377 if (!node->analyzed)
378 continue;
379 info = IPA_NODE_REF (node);
380 fprintf (f, " Node: %s:\n", cgraph_node_name (node));
381 count = ipa_get_param_count (info);
382 for (i = 0; i < count; i++)
384 struct ipcp_lattice *lat = ipcp_get_lattice (info, i);
386 fprintf (f, " param [%d]: ", i);
387 if (lat->type == IPA_CONST_VALUE)
389 fprintf (f, "type is CONST ");
390 print_generic_expr (f, lat->constant, 0);
391 fprintf (f, "\n");
393 else if (lat->type == IPA_TOP)
394 fprintf (f, "type is TOP\n");
395 else
396 fprintf (f, "type is BOTTOM\n");
401 /* Return true if ipcp algorithms would allow cloning NODE. */
403 static bool
404 ipcp_versionable_function_p (struct cgraph_node *node)
406 tree decl = node->decl;
407 basic_block bb;
409 /* There are a number of generic reasons functions cannot be versioned. */
410 if (!tree_versionable_function_p (decl))
411 return false;
413 /* Removing arguments doesn't work if the function takes varargs. */
414 if (DECL_STRUCT_FUNCTION (decl)->stdarg)
415 return false;
417 /* Removing arguments doesn't work if we use __builtin_apply_args. */
418 FOR_EACH_BB_FN (bb, DECL_STRUCT_FUNCTION (decl))
420 gimple_stmt_iterator gsi;
421 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
423 const_gimple stmt = gsi_stmt (gsi);
424 tree t;
426 if (!is_gimple_call (stmt))
427 continue;
428 t = gimple_call_fndecl (stmt);
429 if (t == NULL_TREE)
430 continue;
431 if (DECL_BUILT_IN_CLASS (t) == BUILT_IN_NORMAL
432 && DECL_FUNCTION_CODE (t) == BUILT_IN_APPLY_ARGS)
433 return false;
437 return true;
440 /* Return true if this NODE is viable candidate for cloning. */
441 static bool
442 ipcp_cloning_candidate_p (struct cgraph_node *node)
444 int n_calls = 0;
445 int n_hot_calls = 0;
446 gcov_type direct_call_sum = 0;
447 struct cgraph_edge *e;
449 /* We never clone functions that are not visible from outside.
450 FIXME: in future we should clone such functions when they are called with
451 different constants, but current ipcp implementation is not good on this.
453 if (cgraph_only_called_directly_p (node) || !node->analyzed)
454 return false;
456 if (cgraph_function_body_availability (node) <= AVAIL_OVERWRITABLE)
458 if (dump_file)
459 fprintf (dump_file, "Not considering %s for cloning; body is overwrittable.\n",
460 cgraph_node_name (node));
461 return false;
463 if (!ipcp_versionable_function_p (node))
465 if (dump_file)
466 fprintf (dump_file, "Not considering %s for cloning; body is not versionable.\n",
467 cgraph_node_name (node));
468 return false;
470 for (e = node->callers; e; e = e->next_caller)
472 direct_call_sum += e->count;
473 n_calls ++;
474 if (cgraph_maybe_hot_edge_p (e))
475 n_hot_calls ++;
478 if (!n_calls)
480 if (dump_file)
481 fprintf (dump_file, "Not considering %s for cloning; no direct calls.\n",
482 cgraph_node_name (node));
483 return false;
485 if (node->local.inline_summary.self_size < n_calls)
487 if (dump_file)
488 fprintf (dump_file, "Considering %s for cloning; code would shrink.\n",
489 cgraph_node_name (node));
490 return true;
493 if (!flag_ipa_cp_clone)
495 if (dump_file)
496 fprintf (dump_file, "Not considering %s for cloning; -fipa-cp-clone disabled.\n",
497 cgraph_node_name (node));
498 return false;
501 if (!optimize_function_for_speed_p (DECL_STRUCT_FUNCTION (node->decl)))
503 if (dump_file)
504 fprintf (dump_file, "Not considering %s for cloning; optimizing it for size.\n",
505 cgraph_node_name (node));
506 return false;
509 /* When profile is available and function is hot, propagate into it even if
510 calls seems cold; constant propagation can improve function's speed
511 significandly. */
512 if (max_count)
514 if (direct_call_sum > node->count * 90 / 100)
516 if (dump_file)
517 fprintf (dump_file, "Considering %s for cloning; usually called directly.\n",
518 cgraph_node_name (node));
519 return true;
522 if (!n_hot_calls)
524 if (dump_file)
525 fprintf (dump_file, "Not considering %s for cloning; no hot calls.\n",
526 cgraph_node_name (node));
527 return false;
529 if (dump_file)
530 fprintf (dump_file, "Considering %s for cloning.\n",
531 cgraph_node_name (node));
532 return true;
535 /* Initialize ipcp_lattices array. The lattices corresponding to supported
536 types (integers, real types and Fortran constants defined as const_decls)
537 are initialized to IPA_TOP, the rest of them to IPA_BOTTOM. */
538 static void
539 ipcp_initialize_node_lattices (struct cgraph_node *node)
541 int i;
542 struct ipa_node_params *info = IPA_NODE_REF (node);
543 enum ipa_lattice_type type;
545 if (ipa_is_called_with_var_arguments (info))
546 type = IPA_BOTTOM;
547 else if (cgraph_only_called_directly_p (node))
548 type = IPA_TOP;
549 /* When cloning is allowed, we can assume that externally visible functions
550 are not called. We will compensate this by cloning later. */
551 else if (ipcp_cloning_candidate_p (node))
552 type = IPA_TOP, n_cloning_candidates ++;
553 else
554 type = IPA_BOTTOM;
556 for (i = 0; i < ipa_get_param_count (info) ; i++)
557 ipcp_get_lattice (info, i)->type = type;
560 /* build INTEGER_CST tree with type TREE_TYPE and value according to LAT.
561 Return the tree. */
562 static tree
563 build_const_val (struct ipcp_lattice *lat, tree tree_type)
565 tree val;
567 gcc_assert (ipcp_lat_is_const (lat));
568 val = lat->constant;
570 if (!useless_type_conversion_p (tree_type, TREE_TYPE (val)))
572 if (fold_convertible_p (tree_type, val))
573 return fold_build1 (NOP_EXPR, tree_type, val);
574 else
575 return fold_build1 (VIEW_CONVERT_EXPR, tree_type, val);
577 return val;
580 /* Compute the proper scale for NODE. It is the ratio between the number of
581 direct calls (represented on the incoming cgraph_edges) and sum of all
582 invocations of NODE (represented as count in cgraph_node).
584 FIXME: This code is wrong. Since the callers can be also clones and
585 the clones are not scaled yet, the sums gets unrealistically high.
586 To properly compute the counts, we would need to do propagation across
587 callgraph (as external call to A might imply call to non-clonned B
588 if A's clone calls clonned B). */
589 static void
590 ipcp_compute_node_scale (struct cgraph_node *node)
592 gcov_type sum;
593 struct cgraph_edge *cs;
595 sum = 0;
596 /* Compute sum of all counts of callers. */
597 for (cs = node->callers; cs != NULL; cs = cs->next_caller)
598 sum += cs->count;
599 /* Work around the unrealistically high sum problem. We just don't want
600 the non-cloned body to have negative or very low frequency. Since
601 majority of execution time will be spent in clones anyway, this should
602 give good enough profile. */
603 if (sum > node->count * 9 / 10)
604 sum = node->count * 9 / 10;
605 if (node->count == 0)
606 ipcp_set_node_scale (node, 0);
607 else
608 ipcp_set_node_scale (node, sum * REG_BR_PROB_BASE / node->count);
611 /* Initialization and computation of IPCP data structures. This is the initial
612 intraprocedural analysis of functions, which gathers information to be
613 propagated later on. */
614 static void
615 ipcp_init_stage (void)
617 struct cgraph_node *node;
618 struct cgraph_edge *cs;
620 for (node = cgraph_nodes; node; node = node->next)
621 if (node->analyzed)
622 ipcp_analyze_node (node);
623 for (node = cgraph_nodes; node; node = node->next)
625 if (!node->analyzed)
626 continue;
627 /* building jump functions */
628 for (cs = node->callees; cs; cs = cs->next_callee)
630 /* We do not need to bother analyzing calls to unknown
631 functions unless they may become known during lto/whopr. */
632 if (!cs->callee->analyzed && !flag_lto && !flag_whopr)
633 continue;
634 ipa_count_arguments (cs);
635 if (ipa_get_cs_argument_count (IPA_EDGE_REF (cs))
636 != ipa_get_param_count (IPA_NODE_REF (cs->callee)))
637 ipa_set_called_with_variable_arg (IPA_NODE_REF (cs->callee));
638 ipa_compute_jump_functions (cs);
643 /* Return true if there are some formal parameters whose value is IPA_TOP (in
644 the whole compilation unit). Change their values to IPA_BOTTOM, since they
645 most probably get their values from outside of this compilation unit. */
646 static bool
647 ipcp_change_tops_to_bottom (void)
649 int i, count;
650 struct cgraph_node *node;
651 bool prop_again;
653 prop_again = false;
654 for (node = cgraph_nodes; node; node = node->next)
656 struct ipa_node_params *info = IPA_NODE_REF (node);
657 count = ipa_get_param_count (info);
658 for (i = 0; i < count; i++)
660 struct ipcp_lattice *lat = ipcp_get_lattice (info, i);
661 if (lat->type == IPA_TOP)
663 prop_again = true;
664 if (dump_file)
666 fprintf (dump_file, "Forcing param ");
667 print_generic_expr (dump_file, ipa_get_param (info, i), 0);
668 fprintf (dump_file, " of node %s to bottom.\n",
669 cgraph_node_name (node));
671 lat->type = IPA_BOTTOM;
675 return prop_again;
678 /* Interprocedural analysis. The algorithm propagates constants from the
679 caller's parameters to the callee's arguments. */
680 static void
681 ipcp_propagate_stage (void)
683 int i;
684 struct ipcp_lattice inc_lat = { IPA_BOTTOM, NULL };
685 struct ipcp_lattice new_lat = { IPA_BOTTOM, NULL };
686 struct ipcp_lattice *dest_lat;
687 struct cgraph_edge *cs;
688 struct ipa_jump_func *jump_func;
689 struct ipa_func_list *wl;
690 int count;
692 ipa_check_create_node_params ();
693 ipa_check_create_edge_args ();
695 /* Initialize worklist to contain all functions. */
696 wl = ipa_init_func_list ();
697 while (wl)
699 struct cgraph_node *node = ipa_pop_func_from_list (&wl);
700 struct ipa_node_params *info = IPA_NODE_REF (node);
702 for (cs = node->callees; cs; cs = cs->next_callee)
704 struct ipa_node_params *callee_info = IPA_NODE_REF (cs->callee);
705 struct ipa_edge_args *args = IPA_EDGE_REF (cs);
707 if (ipa_is_called_with_var_arguments (callee_info)
708 || !cs->callee->analyzed
709 || ipa_is_called_with_var_arguments (callee_info))
710 continue;
712 count = ipa_get_cs_argument_count (args);
713 for (i = 0; i < count; i++)
715 jump_func = ipa_get_ith_jump_func (args, i);
716 ipcp_lattice_from_jfunc (info, &inc_lat, jump_func);
717 dest_lat = ipcp_get_lattice (callee_info, i);
718 ipa_lattice_meet (&new_lat, &inc_lat, dest_lat);
719 if (ipcp_lattice_changed (&new_lat, dest_lat))
721 dest_lat->type = new_lat.type;
722 dest_lat->constant = new_lat.constant;
723 ipa_push_func_to_list (&wl, cs->callee);
730 /* Call the constant propagation algorithm and re-call it if necessary
731 (if there are undetermined values left). */
732 static void
733 ipcp_iterate_stage (void)
735 struct cgraph_node *node;
736 n_cloning_candidates = 0;
738 if (dump_file)
739 fprintf (dump_file, "\nIPA iterate stage:\n\n");
741 if (in_lto_p)
742 ipa_update_after_lto_read ();
744 for (node = cgraph_nodes; node; node = node->next)
746 ipcp_initialize_node_lattices (node);
747 ipcp_compute_node_scale (node);
749 if (dump_file && (dump_flags & TDF_DETAILS))
751 ipcp_print_all_lattices (dump_file);
752 ipcp_function_scale_print (dump_file);
755 ipcp_propagate_stage ();
756 if (ipcp_change_tops_to_bottom ())
757 /* Some lattices have changed from IPA_TOP to IPA_BOTTOM.
758 This change should be propagated. */
760 gcc_assert (n_cloning_candidates);
761 ipcp_propagate_stage ();
763 if (dump_file)
765 fprintf (dump_file, "\nIPA lattices after propagation:\n");
766 ipcp_print_all_lattices (dump_file);
767 if (dump_flags & TDF_DETAILS)
768 ipcp_print_profile_data (dump_file);
772 /* Check conditions to forbid constant insertion to function described by
773 NODE. */
774 static inline bool
775 ipcp_node_modifiable_p (struct cgraph_node *node)
777 /* Once we will be able to do in-place replacement, we can be more
778 lax here. */
779 return ipcp_versionable_function_p (node);
782 /* Print count scale data structures. */
783 static void
784 ipcp_function_scale_print (FILE * f)
786 struct cgraph_node *node;
788 for (node = cgraph_nodes; node; node = node->next)
790 if (!node->analyzed)
791 continue;
792 fprintf (f, "printing scale for %s: ", cgraph_node_name (node));
793 fprintf (f, "value is " HOST_WIDE_INT_PRINT_DEC
794 " \n", (HOST_WIDE_INT) ipcp_get_node_scale (node));
798 /* Print counts of all cgraph nodes. */
799 static void
800 ipcp_print_func_profile_counts (FILE * f)
802 struct cgraph_node *node;
804 for (node = cgraph_nodes; node; node = node->next)
806 fprintf (f, "function %s: ", cgraph_node_name (node));
807 fprintf (f, "count is " HOST_WIDE_INT_PRINT_DEC
808 " \n", (HOST_WIDE_INT) node->count);
812 /* Print counts of all cgraph edges. */
813 static void
814 ipcp_print_call_profile_counts (FILE * f)
816 struct cgraph_node *node;
817 struct cgraph_edge *cs;
819 for (node = cgraph_nodes; node; node = node->next)
821 for (cs = node->callees; cs; cs = cs->next_callee)
823 fprintf (f, "%s -> %s ", cgraph_node_name (cs->caller),
824 cgraph_node_name (cs->callee));
825 fprintf (f, "count is " HOST_WIDE_INT_PRINT_DEC " \n",
826 (HOST_WIDE_INT) cs->count);
831 /* Print profile info for all functions. */
832 static void
833 ipcp_print_profile_data (FILE * f)
835 fprintf (f, "\nNODE COUNTS :\n");
836 ipcp_print_func_profile_counts (f);
837 fprintf (f, "\nCS COUNTS stage:\n");
838 ipcp_print_call_profile_counts (f);
841 /* Build and initialize ipa_replace_map struct according to LAT. This struct is
842 processed by versioning, which operates according to the flags set.
843 PARM_TREE is the formal parameter found to be constant. LAT represents the
844 constant. */
845 static struct ipa_replace_map *
846 ipcp_create_replace_map (tree parm_tree, struct ipcp_lattice *lat)
848 struct ipa_replace_map *replace_map;
849 tree const_val;
851 replace_map = GGC_NEW (struct ipa_replace_map);
852 const_val = build_const_val (lat, TREE_TYPE (parm_tree));
853 if (dump_file)
855 fprintf (dump_file, " replacing param ");
856 print_generic_expr (dump_file, parm_tree, 0);
857 fprintf (dump_file, " with const ");
858 print_generic_expr (dump_file, const_val, 0);
859 fprintf (dump_file, "\n");
861 replace_map->old_tree = parm_tree;
862 replace_map->new_tree = const_val;
863 replace_map->replace_p = true;
864 replace_map->ref_p = false;
866 return replace_map;
869 /* Return true if this callsite should be redirected to the original callee
870 (instead of the cloned one). */
871 static bool
872 ipcp_need_redirect_p (struct cgraph_edge *cs)
874 struct ipa_node_params *orig_callee_info;
875 int i, count;
876 struct ipa_jump_func *jump_func;
877 struct cgraph_node *node = cs->callee, *orig;
879 if (!n_cloning_candidates)
880 return false;
882 if ((orig = ipcp_get_orig_node (node)) != NULL)
883 node = orig;
884 if (ipcp_get_orig_node (cs->caller))
885 return false;
887 orig_callee_info = IPA_NODE_REF (node);
888 count = ipa_get_param_count (orig_callee_info);
889 for (i = 0; i < count; i++)
891 struct ipcp_lattice *lat = ipcp_get_lattice (orig_callee_info, i);
892 if (ipcp_lat_is_const (lat))
894 jump_func = ipa_get_ith_jump_func (IPA_EDGE_REF (cs), i);
895 if (jump_func->type != IPA_JF_CONST)
896 return true;
900 return false;
903 /* Fix the callsites and the call graph after function cloning was done. */
904 static void
905 ipcp_update_callgraph (void)
907 struct cgraph_node *node;
909 for (node = cgraph_nodes; node; node = node->next)
910 if (node->analyzed && ipcp_node_is_clone (node))
912 bitmap args_to_skip = BITMAP_ALLOC (NULL);
913 struct cgraph_node *orig_node = ipcp_get_orig_node (node);
914 struct ipa_node_params *info = IPA_NODE_REF (orig_node);
915 int i, count = ipa_get_param_count (info);
916 struct cgraph_edge *cs, *next;
918 for (i = 0; i < count; i++)
920 struct ipcp_lattice *lat = ipcp_get_lattice (info, i);
921 tree parm_tree = ipa_get_param (info, i);
923 /* We can proactively remove obviously unused arguments. */
924 if (is_gimple_reg (parm_tree)
925 && !gimple_default_def (DECL_STRUCT_FUNCTION (orig_node->decl),
926 parm_tree))
928 bitmap_set_bit (args_to_skip, i);
929 continue;
932 if (lat->type == IPA_CONST_VALUE)
933 bitmap_set_bit (args_to_skip, i);
935 for (cs = node->callers; cs; cs = next)
937 next = cs->next_caller;
938 if (!ipcp_node_is_clone (cs->caller) && ipcp_need_redirect_p (cs))
939 cgraph_redirect_edge_callee (cs, orig_node);
944 /* Update profiling info for versioned functions and the functions they were
945 versioned from. */
946 static void
947 ipcp_update_profiling (void)
949 struct cgraph_node *node, *orig_node;
950 gcov_type scale, scale_complement;
951 struct cgraph_edge *cs;
953 for (node = cgraph_nodes; node; node = node->next)
955 if (ipcp_node_is_clone (node))
957 orig_node = ipcp_get_orig_node (node);
958 scale = ipcp_get_node_scale (orig_node);
959 node->count = orig_node->count * scale / REG_BR_PROB_BASE;
960 scale_complement = REG_BR_PROB_BASE - scale;
961 orig_node->count =
962 orig_node->count * scale_complement / REG_BR_PROB_BASE;
963 for (cs = node->callees; cs; cs = cs->next_callee)
964 cs->count = cs->count * scale / REG_BR_PROB_BASE;
965 for (cs = orig_node->callees; cs; cs = cs->next_callee)
966 cs->count = cs->count * scale_complement / REG_BR_PROB_BASE;
971 /* If NODE was cloned, how much would program grow? */
972 static long
973 ipcp_estimate_growth (struct cgraph_node *node)
975 struct cgraph_edge *cs;
976 int redirectable_node_callers = 0;
977 int removable_args = 0;
978 bool need_original = !cgraph_only_called_directly_p (node);
979 struct ipa_node_params *info;
980 int i, count;
981 int growth;
983 for (cs = node->callers; cs != NULL; cs = cs->next_caller)
984 if (cs->caller == node || !ipcp_need_redirect_p (cs))
985 redirectable_node_callers++;
986 else
987 need_original = true;
989 /* If we will be able to fully replace orignal node, we never increase
990 program size. */
991 if (!need_original)
992 return 0;
994 info = IPA_NODE_REF (node);
995 count = ipa_get_param_count (info);
996 for (i = 0; i < count; i++)
998 struct ipcp_lattice *lat = ipcp_get_lattice (info, i);
999 tree parm_tree = ipa_get_param (info, i);
1001 /* We can proactively remove obviously unused arguments. */
1002 if (is_gimple_reg (parm_tree)
1003 && !gimple_default_def (DECL_STRUCT_FUNCTION (node->decl),
1004 parm_tree))
1005 removable_args++;
1007 if (lat->type == IPA_CONST_VALUE)
1008 removable_args++;
1011 /* We make just very simple estimate of savings for removal of operand from
1012 call site. Precise cost is dificult to get, as our size metric counts
1013 constants and moves as free. Generally we are looking for cases that
1014 small function is called very many times. */
1015 growth = node->local.inline_summary.self_size
1016 - removable_args * redirectable_node_callers;
1017 if (growth < 0)
1018 return 0;
1019 return growth;
1023 /* Estimate cost of cloning NODE. */
1024 static long
1025 ipcp_estimate_cloning_cost (struct cgraph_node *node)
1027 int freq_sum = 1;
1028 gcov_type count_sum = 1;
1029 struct cgraph_edge *e;
1030 int cost;
1032 cost = ipcp_estimate_growth (node) * 1000;
1033 if (!cost)
1035 if (dump_file)
1036 fprintf (dump_file, "Versioning of %s will save code size\n",
1037 cgraph_node_name (node));
1038 return 0;
1041 for (e = node->callers; e; e = e->next_caller)
1042 if (!bitmap_bit_p (dead_nodes, e->caller->uid)
1043 && !ipcp_need_redirect_p (e))
1045 count_sum += e->count;
1046 freq_sum += e->frequency + 1;
1049 if (max_count)
1050 cost /= count_sum * 1000 / max_count + 1;
1051 else
1052 cost /= freq_sum * 1000 / REG_BR_PROB_BASE + 1;
1053 if (dump_file)
1054 fprintf (dump_file, "Cost of versioning %s is %i, (size: %i, freq: %i)\n",
1055 cgraph_node_name (node), cost, node->local.inline_summary.self_size,
1056 freq_sum);
1057 return cost + 1;
1060 /* Return number of live constant parameters. */
1061 static int
1062 ipcp_const_param_count (struct cgraph_node *node)
1064 int const_param = 0;
1065 struct ipa_node_params *info = IPA_NODE_REF (node);
1066 int count = ipa_get_param_count (info);
1067 int i;
1069 for (i = 0; i < count; i++)
1071 struct ipcp_lattice *lat = ipcp_get_lattice (info, i);
1072 tree parm_tree = ipa_get_param (info, i);
1073 if (ipcp_lat_is_insertable (lat)
1074 /* Do not count obviously unused arguments. */
1075 && (!is_gimple_reg (parm_tree)
1076 || gimple_default_def (DECL_STRUCT_FUNCTION (node->decl),
1077 parm_tree)))
1078 const_param++;
1080 return const_param;
1083 /* Propagate the constant parameters found by ipcp_iterate_stage()
1084 to the function's code. */
1085 static void
1086 ipcp_insert_stage (void)
1088 struct cgraph_node *node, *node1 = NULL;
1089 int i;
1090 VEC (cgraph_edge_p, heap) * redirect_callers;
1091 VEC (ipa_replace_map_p,gc)* replace_trees;
1092 int node_callers, count;
1093 tree parm_tree;
1094 struct ipa_replace_map *replace_param;
1095 fibheap_t heap;
1096 long overall_size = 0, new_size = 0;
1097 long max_new_size;
1099 ipa_check_create_node_params ();
1100 ipa_check_create_edge_args ();
1101 if (dump_file)
1102 fprintf (dump_file, "\nIPA insert stage:\n\n");
1104 dead_nodes = BITMAP_ALLOC (NULL);
1106 for (node = cgraph_nodes; node; node = node->next)
1107 if (node->analyzed)
1109 if (node->count > max_count)
1110 max_count = node->count;
1111 overall_size += node->local.inline_summary.self_size;
1114 max_new_size = overall_size;
1115 if (max_new_size < PARAM_VALUE (PARAM_LARGE_UNIT_INSNS))
1116 max_new_size = PARAM_VALUE (PARAM_LARGE_UNIT_INSNS);
1117 max_new_size = max_new_size * PARAM_VALUE (PARAM_IPCP_UNIT_GROWTH) / 100 + 1;
1119 /* First collect all functions we proved to have constant arguments to heap. */
1120 heap = fibheap_new ();
1121 for (node = cgraph_nodes; node; node = node->next)
1123 struct ipa_node_params *info;
1124 /* Propagation of the constant is forbidden in certain conditions. */
1125 if (!node->analyzed || !ipcp_node_modifiable_p (node))
1126 continue;
1127 info = IPA_NODE_REF (node);
1128 if (ipa_is_called_with_var_arguments (info))
1129 continue;
1130 if (ipcp_const_param_count (node))
1131 node->aux = fibheap_insert (heap, ipcp_estimate_cloning_cost (node), node);
1134 /* Now clone in priority order until code size growth limits are met or
1135 heap is emptied. */
1136 while (!fibheap_empty (heap))
1138 struct ipa_node_params *info;
1139 int growth = 0;
1140 bitmap args_to_skip;
1141 struct cgraph_edge *cs;
1143 node = (struct cgraph_node *)fibheap_extract_min (heap);
1144 node->aux = NULL;
1145 if (dump_file)
1146 fprintf (dump_file, "considering function %s\n",
1147 cgraph_node_name (node));
1149 growth = ipcp_estimate_growth (node);
1151 if (new_size + growth > max_new_size)
1152 break;
1153 if (growth
1154 && optimize_function_for_size_p (DECL_STRUCT_FUNCTION (node->decl)))
1156 if (dump_file)
1157 fprintf (dump_file, "Not versioning, cold code would grow");
1158 continue;
1161 new_size += growth;
1163 /* Look if original function becomes dead after clonning. */
1164 for (cs = node->callers; cs != NULL; cs = cs->next_caller)
1165 if (cs->caller == node || ipcp_need_redirect_p (cs))
1166 break;
1167 if (!cs && cgraph_only_called_directly_p (node))
1168 bitmap_set_bit (dead_nodes, node->uid);
1170 info = IPA_NODE_REF (node);
1171 count = ipa_get_param_count (info);
1173 replace_trees = VEC_alloc (ipa_replace_map_p, gc, 1);
1174 args_to_skip = BITMAP_GGC_ALLOC ();
1175 for (i = 0; i < count; i++)
1177 struct ipcp_lattice *lat = ipcp_get_lattice (info, i);
1178 parm_tree = ipa_get_param (info, i);
1180 /* We can proactively remove obviously unused arguments. */
1181 if (is_gimple_reg (parm_tree)
1182 && !gimple_default_def (DECL_STRUCT_FUNCTION (node->decl),
1183 parm_tree))
1185 bitmap_set_bit (args_to_skip, i);
1186 continue;
1189 if (lat->type == IPA_CONST_VALUE)
1191 replace_param =
1192 ipcp_create_replace_map (parm_tree, lat);
1193 VEC_safe_push (ipa_replace_map_p, gc, replace_trees, replace_param);
1194 bitmap_set_bit (args_to_skip, i);
1198 /* Compute how many callers node has. */
1199 node_callers = 0;
1200 for (cs = node->callers; cs != NULL; cs = cs->next_caller)
1201 node_callers++;
1202 redirect_callers = VEC_alloc (cgraph_edge_p, heap, node_callers);
1203 for (cs = node->callers; cs != NULL; cs = cs->next_caller)
1204 VEC_quick_push (cgraph_edge_p, redirect_callers, cs);
1206 /* Redirecting all the callers of the node to the
1207 new versioned node. */
1208 node1 =
1209 cgraph_create_virtual_clone (node, redirect_callers, replace_trees,
1210 args_to_skip);
1211 args_to_skip = NULL;
1212 VEC_free (cgraph_edge_p, heap, redirect_callers);
1213 replace_trees = NULL;
1215 if (node1 == NULL)
1216 continue;
1217 if (dump_file)
1218 fprintf (dump_file, "versioned function %s with growth %i, overall %i\n",
1219 cgraph_node_name (node), (int)growth, (int)new_size);
1220 ipcp_init_cloned_node (node, node1);
1222 /* TODO: We can use indirect inlning info to produce new calls. */
1224 if (dump_file)
1225 dump_function_to_file (node1->decl, dump_file, dump_flags);
1227 for (cs = node->callees; cs; cs = cs->next_callee)
1228 if (cs->callee->aux)
1230 fibheap_delete_node (heap, (fibnode_t) cs->callee->aux);
1231 cs->callee->aux = fibheap_insert (heap,
1232 ipcp_estimate_cloning_cost (cs->callee),
1233 cs->callee);
1237 while (!fibheap_empty (heap))
1239 if (dump_file)
1240 fprintf (dump_file, "skipping function %s\n",
1241 cgraph_node_name (node));
1242 node = (struct cgraph_node *) fibheap_extract_min (heap);
1243 node->aux = NULL;
1245 fibheap_delete (heap);
1246 BITMAP_FREE (dead_nodes);
1247 ipcp_update_callgraph ();
1248 ipcp_update_profiling ();
1251 /* The IPCP driver. */
1252 static unsigned int
1253 ipcp_driver (void)
1255 cgraph_remove_unreachable_nodes (true,dump_file);
1256 if (dump_file)
1258 fprintf (dump_file, "\nIPA structures before propagation:\n");
1259 if (dump_flags & TDF_DETAILS)
1260 ipa_print_all_params (dump_file);
1261 ipa_print_all_jump_functions (dump_file);
1263 /* 2. Do the interprocedural propagation. */
1264 ipcp_iterate_stage ();
1265 /* 3. Insert the constants found to the functions. */
1266 ipcp_insert_stage ();
1267 if (dump_file && (dump_flags & TDF_DETAILS))
1269 fprintf (dump_file, "\nProfiling info after insert stage:\n");
1270 ipcp_print_profile_data (dump_file);
1272 /* Free all IPCP structures. */
1273 free_all_ipa_structures_after_ipa_cp ();
1274 if (dump_file)
1275 fprintf (dump_file, "\nIPA constant propagation end\n");
1276 return 0;
1279 /* Note function body size. */
1280 static void
1281 ipcp_generate_summary (void)
1283 if (dump_file)
1284 fprintf (dump_file, "\nIPA constant propagation start:\n");
1285 ipa_check_create_node_params ();
1286 ipa_check_create_edge_args ();
1287 ipa_register_cgraph_hooks ();
1288 /* 1. Call the init stage to initialize
1289 the ipa_node_params and ipa_edge_args structures. */
1290 ipcp_init_stage ();
1293 /* Write ipcp summary for nodes in SET. */
1294 static void
1295 ipcp_write_summary (cgraph_node_set set)
1297 ipa_prop_write_jump_functions (set);
1300 /* Read ipcp summary. */
1301 static void
1302 ipcp_read_summary (void)
1304 ipa_prop_read_jump_functions ();
1307 /* Gate for IPCP optimization. */
1308 static bool
1309 cgraph_gate_cp (void)
1311 return flag_ipa_cp;
1314 struct ipa_opt_pass_d pass_ipa_cp =
1317 IPA_PASS,
1318 "cp", /* name */
1319 cgraph_gate_cp, /* gate */
1320 ipcp_driver, /* execute */
1321 NULL, /* sub */
1322 NULL, /* next */
1323 0, /* static_pass_number */
1324 TV_IPA_CONSTANT_PROP, /* tv_id */
1325 0, /* properties_required */
1326 0, /* properties_provided */
1327 0, /* properties_destroyed */
1328 0, /* todo_flags_start */
1329 TODO_dump_cgraph | TODO_dump_func |
1330 TODO_remove_functions /* todo_flags_finish */
1332 ipcp_generate_summary, /* generate_summary */
1333 ipcp_write_summary, /* write_summary */
1334 ipcp_read_summary, /* read_summary */
1335 NULL, /* function_read_summary */
1336 lto_ipa_fixup_call_notes, /* stmt_fixup */
1337 0, /* TODOs */
1338 NULL, /* function_transform */
1339 NULL, /* variable_transform */