Merged with trunk at revision 162480.
[official-gcc.git] / gcc / ipa-cp.c
blob6918273cba6a5a776687a44d68a1a120e0647804
1 /* Interprocedural constant propagation
2 Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
4 Contributed by Razya Ladelsky <RAZYA@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 /* Interprocedural constant propagation. The aim of interprocedural constant
23 propagation (IPCP) is to find which function's argument has the same
24 constant value in each invocation throughout the whole program. For example,
25 consider the following program:
27 int g (int y)
29 printf ("value is %d",y);
32 int f (int x)
34 g (x);
37 int h (int y)
39 g (y);
42 void main (void)
44 f (3);
45 h (3);
49 The IPCP algorithm will find that g's formal argument y is always called
50 with the value 3.
52 The algorithm used is based on "Interprocedural Constant Propagation", by
53 Challahan David, Keith D Cooper, Ken Kennedy, Linda Torczon, Comp86, pg
54 152-161
56 The optimization is divided into three stages:
58 First stage - intraprocedural analysis
59 =======================================
60 This phase computes jump_function and modification flags.
62 A jump function for a callsite represents the values passed as an actual
63 arguments of a given callsite. There are three types of values:
64 Pass through - the caller's formal parameter is passed as an actual argument.
65 Constant - a constant is passed as an actual argument.
66 Unknown - neither of the above.
68 The jump function info, ipa_jump_func, is stored in ipa_edge_args
69 structure (defined in ipa_prop.h and pointed to by cgraph_node->aux)
70 modified_flags are defined in ipa_node_params structure
71 (defined in ipa_prop.h and pointed to by cgraph_edge->aux).
73 -ipcp_init_stage() is the first stage driver.
75 Second stage - interprocedural analysis
76 ========================================
77 This phase does the interprocedural constant propagation.
78 It computes lattices for all formal parameters in the program
79 and their value that may be:
80 TOP - unknown.
81 BOTTOM - non constant.
82 CONSTANT - constant value.
84 Lattice describing a formal parameter p will have a constant value if all
85 callsites invoking this function have the same constant value passed to p.
87 The lattices are stored in ipcp_lattice which is itself in ipa_node_params
88 structure (defined in ipa_prop.h and pointed to by cgraph_edge->aux).
90 -ipcp_iterate_stage() is the second stage driver.
92 Third phase - transformation of function code
93 ============================================
94 Propagates the constant-valued formals into the function.
95 For each function whose parameters are constants, we create its clone.
97 Then we process the clone in two ways:
98 1. We insert an assignment statement 'parameter = const' at the beginning
99 of the cloned function.
100 2. For read-only parameters that do not live in memory, we replace all their
101 uses with the constant.
103 We also need to modify some callsites to call the cloned functions instead
104 of the original ones. For a callsite passing an argument found to be a
105 constant by IPCP, there are two different cases to handle:
106 1. A constant is passed as an argument. In this case the callsite in the
107 should be redirected to call the cloned callee.
108 2. A parameter (of the caller) passed as an argument (pass through
109 argument). In such cases both the caller and the callee have clones and
110 only the callsite in the cloned caller is redirected to call to the
111 cloned callee.
113 This update is done in two steps: First all cloned functions are created
114 during a traversal of the call graph, during which all callsites are
115 redirected to call the cloned function. Then the callsites are traversed
116 and many calls redirected back to fit the description above.
118 -ipcp_insert_stage() is the third phase driver.
122 #include "config.h"
123 #include "system.h"
124 #include "coretypes.h"
125 #include "tree.h"
126 #include "target.h"
127 #include "cgraph.h"
128 #include "ipa-prop.h"
129 #include "tree-flow.h"
130 #include "tree-pass.h"
131 #include "flags.h"
132 #include "timevar.h"
133 #include "diagnostic.h"
134 #include "tree-pretty-print.h"
135 #include "tree-dump.h"
136 #include "tree-inline.h"
137 #include "fibheap.h"
138 #include "params.h"
140 /* Number of functions identified as candidates for cloning. When not cloning
141 we can simplify iterate stage not forcing it to go through the decision
142 on what is profitable and what not. */
143 static int n_cloning_candidates;
145 /* Maximal count found in program. */
146 static gcov_type max_count;
148 /* Cgraph nodes that has been completely replaced by cloning during iterate
149 * stage and will be removed after ipcp is finished. */
150 static bitmap dead_nodes;
152 static void ipcp_print_profile_data (FILE *);
153 static void ipcp_function_scale_print (FILE *);
155 /* Get the original node field of ipa_node_params associated with node NODE. */
156 static inline struct cgraph_node *
157 ipcp_get_orig_node (struct cgraph_node *node)
159 return IPA_NODE_REF (node)->ipcp_orig_node;
162 /* Return true if NODE describes a cloned/versioned function. */
163 static inline bool
164 ipcp_node_is_clone (struct cgraph_node *node)
166 return (ipcp_get_orig_node (node) != NULL);
169 /* Create ipa_node_params and its data structures for NEW_NODE. Set ORIG_NODE
170 as the ipcp_orig_node field in ipa_node_params. */
171 static void
172 ipcp_init_cloned_node (struct cgraph_node *orig_node,
173 struct cgraph_node *new_node)
175 gcc_checking_assert (ipa_node_params_vector
176 && (VEC_length (ipa_node_params_t,
177 ipa_node_params_vector)
178 > (unsigned) cgraph_max_uid));
179 gcc_checking_assert (IPA_NODE_REF (new_node)->params);
180 IPA_NODE_REF (new_node)->ipcp_orig_node = orig_node;
183 /* Return scale for NODE. */
184 static inline gcov_type
185 ipcp_get_node_scale (struct cgraph_node *node)
187 return IPA_NODE_REF (node)->count_scale;
190 /* Set COUNT as scale for NODE. */
191 static inline void
192 ipcp_set_node_scale (struct cgraph_node *node, gcov_type count)
194 IPA_NODE_REF (node)->count_scale = count;
197 /* Return whether LAT is a constant lattice. */
198 static inline bool
199 ipcp_lat_is_const (struct ipcp_lattice *lat)
201 if (lat->type == IPA_CONST_VALUE)
202 return true;
203 else
204 return false;
207 /* Return whether LAT is a constant lattice that ipa-cp can actually insert
208 into the code (i.e. constants excluding member pointers and pointers). */
209 static inline bool
210 ipcp_lat_is_insertable (struct ipcp_lattice *lat)
212 return lat->type == IPA_CONST_VALUE;
215 /* Return true if LAT1 and LAT2 are equal. */
216 static inline bool
217 ipcp_lats_are_equal (struct ipcp_lattice *lat1, struct ipcp_lattice *lat2)
219 gcc_assert (ipcp_lat_is_const (lat1) && ipcp_lat_is_const (lat2));
220 if (lat1->type != lat2->type)
221 return false;
223 if (TREE_CODE (lat1->constant) == ADDR_EXPR
224 && TREE_CODE (lat2->constant) == ADDR_EXPR
225 && TREE_CODE (TREE_OPERAND (lat1->constant, 0)) == CONST_DECL
226 && TREE_CODE (TREE_OPERAND (lat2->constant, 0)) == CONST_DECL)
227 return operand_equal_p (DECL_INITIAL (TREE_OPERAND (lat1->constant, 0)),
228 DECL_INITIAL (TREE_OPERAND (lat2->constant, 0)), 0);
229 else
230 return operand_equal_p (lat1->constant, lat2->constant, 0);
233 /* Compute Meet arithmetics:
234 Meet (IPA_BOTTOM, x) = IPA_BOTTOM
235 Meet (IPA_TOP,x) = x
236 Meet (const_a,const_b) = IPA_BOTTOM, if const_a != const_b.
237 MEET (const_a,const_b) = const_a, if const_a == const_b.*/
238 static void
239 ipa_lattice_meet (struct ipcp_lattice *res, struct ipcp_lattice *lat1,
240 struct ipcp_lattice *lat2)
242 if (lat1->type == IPA_BOTTOM || lat2->type == IPA_BOTTOM)
244 res->type = IPA_BOTTOM;
245 return;
247 if (lat1->type == IPA_TOP)
249 res->type = lat2->type;
250 res->constant = lat2->constant;
251 return;
253 if (lat2->type == IPA_TOP)
255 res->type = lat1->type;
256 res->constant = lat1->constant;
257 return;
259 if (!ipcp_lats_are_equal (lat1, lat2))
261 res->type = IPA_BOTTOM;
262 return;
264 res->type = lat1->type;
265 res->constant = lat1->constant;
268 /* Return the lattice corresponding to the Ith formal parameter of the function
269 described by INFO. */
270 static inline struct ipcp_lattice *
271 ipcp_get_lattice (struct ipa_node_params *info, int i)
273 return &(info->params[i].ipcp_lattice);
276 /* Given the jump function JFUNC, compute the lattice LAT that describes the
277 value coming down the callsite. INFO describes the caller node so that
278 pass-through jump functions can be evaluated. */
279 static void
280 ipcp_lattice_from_jfunc (struct ipa_node_params *info, struct ipcp_lattice *lat,
281 struct ipa_jump_func *jfunc)
283 if (jfunc->type == IPA_JF_CONST)
285 lat->type = IPA_CONST_VALUE;
286 lat->constant = jfunc->value.constant;
288 else if (jfunc->type == IPA_JF_PASS_THROUGH)
290 struct ipcp_lattice *caller_lat;
291 tree cst;
293 caller_lat = ipcp_get_lattice (info, jfunc->value.pass_through.formal_id);
294 lat->type = caller_lat->type;
295 if (caller_lat->type != IPA_CONST_VALUE)
296 return;
297 cst = caller_lat->constant;
299 if (jfunc->value.pass_through.operation != NOP_EXPR)
301 tree restype;
302 if (TREE_CODE_CLASS (jfunc->value.pass_through.operation)
303 == tcc_comparison)
304 restype = boolean_type_node;
305 else
306 restype = TREE_TYPE (cst);
307 cst = fold_binary (jfunc->value.pass_through.operation,
308 restype, cst, jfunc->value.pass_through.operand);
310 if (!cst || !is_gimple_ip_invariant (cst))
311 lat->type = IPA_BOTTOM;
312 lat->constant = cst;
314 else if (jfunc->type == IPA_JF_ANCESTOR)
316 struct ipcp_lattice *caller_lat;
317 tree t;
318 bool ok;
320 caller_lat = ipcp_get_lattice (info, jfunc->value.ancestor.formal_id);
321 lat->type = caller_lat->type;
322 if (caller_lat->type != IPA_CONST_VALUE)
323 return;
324 if (TREE_CODE (caller_lat->constant) != ADDR_EXPR)
326 /* This can happen when the constant is a NULL pointer. */
327 lat->type = IPA_BOTTOM;
328 return;
330 t = TREE_OPERAND (caller_lat->constant, 0);
331 ok = build_ref_for_offset (&t, TREE_TYPE (t),
332 jfunc->value.ancestor.offset,
333 jfunc->value.ancestor.type, false);
334 if (!ok)
336 lat->type = IPA_BOTTOM;
337 lat->constant = NULL_TREE;
339 else
340 lat->constant = build_fold_addr_expr (t);
342 else
343 lat->type = IPA_BOTTOM;
346 /* True when OLD_LAT and NEW_LAT values are not the same. */
348 static bool
349 ipcp_lattice_changed (struct ipcp_lattice *old_lat,
350 struct ipcp_lattice *new_lat)
352 if (old_lat->type == new_lat->type)
354 if (!ipcp_lat_is_const (old_lat))
355 return false;
356 if (ipcp_lats_are_equal (old_lat, new_lat))
357 return false;
359 return true;
362 /* Print all ipcp_lattices of all functions to F. */
363 static void
364 ipcp_print_all_lattices (FILE * f)
366 struct cgraph_node *node;
367 int i, count;
369 fprintf (f, "\nLattice:\n");
370 for (node = cgraph_nodes; node; node = node->next)
372 struct ipa_node_params *info;
374 if (!node->analyzed)
375 continue;
376 info = IPA_NODE_REF (node);
377 fprintf (f, " Node: %s:\n", cgraph_node_name (node));
378 count = ipa_get_param_count (info);
379 for (i = 0; i < count; i++)
381 struct ipcp_lattice *lat = ipcp_get_lattice (info, i);
383 fprintf (f, " param [%d]: ", i);
384 if (lat->type == IPA_CONST_VALUE)
386 tree cst = lat->constant;
387 fprintf (f, "type is CONST ");
388 print_generic_expr (f, cst, 0);
389 if (TREE_CODE (cst) == ADDR_EXPR
390 && TREE_CODE (TREE_OPERAND (cst, 0)) == CONST_DECL)
392 fprintf (f, " -> ");
393 print_generic_expr (f, DECL_INITIAL (TREE_OPERAND (cst, 0)),
396 fprintf (f, "\n");
398 else if (lat->type == IPA_TOP)
399 fprintf (f, "type is TOP\n");
400 else
401 fprintf (f, "type is BOTTOM\n");
406 /* Return true if ipcp algorithms would allow cloning NODE. */
408 static bool
409 ipcp_versionable_function_p (struct cgraph_node *node)
411 struct cgraph_edge *edge;
413 /* There are a number of generic reasons functions cannot be versioned. */
414 if (!node->local.versionable)
415 return false;
417 /* Removing arguments doesn't work if the function takes varargs
418 or use __builtin_apply_args. */
419 for (edge = node->callees; edge; edge = edge->next_callee)
421 tree t = edge->callee->decl;
422 if (DECL_BUILT_IN_CLASS (t) == BUILT_IN_NORMAL
423 && (DECL_FUNCTION_CODE (t) == BUILT_IN_APPLY_ARGS
424 || DECL_FUNCTION_CODE (t) == BUILT_IN_VA_START))
425 return false;
428 return true;
431 /* Return true if this NODE is viable candidate for cloning. */
432 static bool
433 ipcp_cloning_candidate_p (struct cgraph_node *node)
435 int n_calls = 0;
436 int n_hot_calls = 0;
437 gcov_type direct_call_sum = 0;
438 struct cgraph_edge *e;
440 /* We never clone functions that are not visible from outside.
441 FIXME: in future we should clone such functions when they are called with
442 different constants, but current ipcp implementation is not good on this.
444 if (cgraph_only_called_directly_p (node) || !node->analyzed)
445 return false;
447 if (cgraph_function_body_availability (node) <= AVAIL_OVERWRITABLE)
449 if (dump_file)
450 fprintf (dump_file, "Not considering %s for cloning; body is overwrittable.\n",
451 cgraph_node_name (node));
452 return false;
454 if (!ipcp_versionable_function_p (node))
456 if (dump_file)
457 fprintf (dump_file, "Not considering %s for cloning; body is not versionable.\n",
458 cgraph_node_name (node));
459 return false;
461 for (e = node->callers; e; e = e->next_caller)
463 direct_call_sum += e->count;
464 n_calls ++;
465 if (cgraph_maybe_hot_edge_p (e))
466 n_hot_calls ++;
469 if (!n_calls)
471 if (dump_file)
472 fprintf (dump_file, "Not considering %s for cloning; no direct calls.\n",
473 cgraph_node_name (node));
474 return false;
476 if (node->local.inline_summary.self_size < n_calls)
478 if (dump_file)
479 fprintf (dump_file, "Considering %s for cloning; code would shrink.\n",
480 cgraph_node_name (node));
481 return true;
484 if (!flag_ipa_cp_clone)
486 if (dump_file)
487 fprintf (dump_file, "Not considering %s for cloning; -fipa-cp-clone disabled.\n",
488 cgraph_node_name (node));
489 return false;
492 if (!optimize_function_for_speed_p (DECL_STRUCT_FUNCTION (node->decl)))
494 if (dump_file)
495 fprintf (dump_file, "Not considering %s for cloning; optimizing it for size.\n",
496 cgraph_node_name (node));
497 return false;
500 /* When profile is available and function is hot, propagate into it even if
501 calls seems cold; constant propagation can improve function's speed
502 significandly. */
503 if (max_count)
505 if (direct_call_sum > node->count * 90 / 100)
507 if (dump_file)
508 fprintf (dump_file, "Considering %s for cloning; usually called directly.\n",
509 cgraph_node_name (node));
510 return true;
513 if (!n_hot_calls)
515 if (dump_file)
516 fprintf (dump_file, "Not considering %s for cloning; no hot calls.\n",
517 cgraph_node_name (node));
518 return false;
520 if (dump_file)
521 fprintf (dump_file, "Considering %s for cloning.\n",
522 cgraph_node_name (node));
523 return true;
526 /* Initialize ipcp_lattices array. The lattices corresponding to supported
527 types (integers, real types and Fortran constants defined as const_decls)
528 are initialized to IPA_TOP, the rest of them to IPA_BOTTOM. */
529 static void
530 ipcp_initialize_node_lattices (struct cgraph_node *node)
532 int i;
533 struct ipa_node_params *info = IPA_NODE_REF (node);
534 enum ipa_lattice_type type;
536 if (ipa_is_called_with_var_arguments (info))
537 type = IPA_BOTTOM;
538 else if (cgraph_only_called_directly_p (node))
539 type = IPA_TOP;
540 /* When cloning is allowed, we can assume that externally visible functions
541 are not called. We will compensate this by cloning later. */
542 else if (ipcp_cloning_candidate_p (node))
543 type = IPA_TOP, n_cloning_candidates ++;
544 else
545 type = IPA_BOTTOM;
547 for (i = 0; i < ipa_get_param_count (info) ; i++)
548 ipcp_get_lattice (info, i)->type = type;
551 /* build INTEGER_CST tree with type TREE_TYPE and value according to LAT.
552 Return the tree. */
553 static tree
554 build_const_val (struct ipcp_lattice *lat, tree tree_type)
556 tree val;
558 gcc_assert (ipcp_lat_is_const (lat));
559 val = lat->constant;
561 if (!useless_type_conversion_p (tree_type, TREE_TYPE (val)))
563 if (fold_convertible_p (tree_type, val))
564 return fold_build1 (NOP_EXPR, tree_type, val);
565 else
566 return fold_build1 (VIEW_CONVERT_EXPR, tree_type, val);
568 return val;
571 /* Compute the proper scale for NODE. It is the ratio between the number of
572 direct calls (represented on the incoming cgraph_edges) and sum of all
573 invocations of NODE (represented as count in cgraph_node).
575 FIXME: This code is wrong. Since the callers can be also clones and
576 the clones are not scaled yet, the sums gets unrealistically high.
577 To properly compute the counts, we would need to do propagation across
578 callgraph (as external call to A might imply call to non-clonned B
579 if A's clone calls clonned B). */
580 static void
581 ipcp_compute_node_scale (struct cgraph_node *node)
583 gcov_type sum;
584 struct cgraph_edge *cs;
586 sum = 0;
587 /* Compute sum of all counts of callers. */
588 for (cs = node->callers; cs != NULL; cs = cs->next_caller)
589 sum += cs->count;
590 /* Work around the unrealistically high sum problem. We just don't want
591 the non-cloned body to have negative or very low frequency. Since
592 majority of execution time will be spent in clones anyway, this should
593 give good enough profile. */
594 if (sum > node->count * 9 / 10)
595 sum = node->count * 9 / 10;
596 if (node->count == 0)
597 ipcp_set_node_scale (node, 0);
598 else
599 ipcp_set_node_scale (node, sum * REG_BR_PROB_BASE / node->count);
602 /* Initialization and computation of IPCP data structures. This is the initial
603 intraprocedural analysis of functions, which gathers information to be
604 propagated later on. */
606 static void
607 ipcp_init_stage (void)
609 struct cgraph_node *node;
611 for (node = cgraph_nodes; node; node = node->next)
612 if (node->analyzed)
614 /* Unreachable nodes should have been eliminated before ipcp. */
615 gcc_assert (node->needed || node->reachable);
617 node->local.versionable = tree_versionable_function_p (node->decl);
618 ipa_analyze_node (node);
622 /* Return true if there are some formal parameters whose value is IPA_TOP (in
623 the whole compilation unit). Change their values to IPA_BOTTOM, since they
624 most probably get their values from outside of this compilation unit. */
625 static bool
626 ipcp_change_tops_to_bottom (void)
628 int i, count;
629 struct cgraph_node *node;
630 bool prop_again;
632 prop_again = false;
633 for (node = cgraph_nodes; node; node = node->next)
635 struct ipa_node_params *info = IPA_NODE_REF (node);
636 count = ipa_get_param_count (info);
637 for (i = 0; i < count; i++)
639 struct ipcp_lattice *lat = ipcp_get_lattice (info, i);
640 if (lat->type == IPA_TOP)
642 prop_again = true;
643 if (dump_file)
645 fprintf (dump_file, "Forcing param ");
646 print_generic_expr (dump_file, ipa_get_param (info, i), 0);
647 fprintf (dump_file, " of node %s to bottom.\n",
648 cgraph_node_name (node));
650 lat->type = IPA_BOTTOM;
654 return prop_again;
657 /* Interprocedural analysis. The algorithm propagates constants from the
658 caller's parameters to the callee's arguments. */
659 static void
660 ipcp_propagate_stage (void)
662 int i;
663 struct ipcp_lattice inc_lat = { IPA_BOTTOM, NULL };
664 struct ipcp_lattice new_lat = { IPA_BOTTOM, NULL };
665 struct ipcp_lattice *dest_lat;
666 struct cgraph_edge *cs;
667 struct ipa_jump_func *jump_func;
668 struct ipa_func_list *wl;
669 int count;
671 ipa_check_create_node_params ();
672 ipa_check_create_edge_args ();
674 /* Initialize worklist to contain all functions. */
675 wl = ipa_init_func_list ();
676 while (wl)
678 struct cgraph_node *node = ipa_pop_func_from_list (&wl);
679 struct ipa_node_params *info = IPA_NODE_REF (node);
681 for (cs = node->callees; cs; cs = cs->next_callee)
683 struct ipa_node_params *callee_info = IPA_NODE_REF (cs->callee);
684 struct ipa_edge_args *args = IPA_EDGE_REF (cs);
686 if (ipa_is_called_with_var_arguments (callee_info)
687 || !cs->callee->analyzed
688 || ipa_is_called_with_var_arguments (callee_info))
689 continue;
691 count = ipa_get_cs_argument_count (args);
692 for (i = 0; i < count; i++)
694 jump_func = ipa_get_ith_jump_func (args, i);
695 ipcp_lattice_from_jfunc (info, &inc_lat, jump_func);
696 dest_lat = ipcp_get_lattice (callee_info, i);
697 ipa_lattice_meet (&new_lat, &inc_lat, dest_lat);
698 if (ipcp_lattice_changed (&new_lat, dest_lat))
700 dest_lat->type = new_lat.type;
701 dest_lat->constant = new_lat.constant;
702 ipa_push_func_to_list (&wl, cs->callee);
709 /* Call the constant propagation algorithm and re-call it if necessary
710 (if there are undetermined values left). */
711 static void
712 ipcp_iterate_stage (void)
714 struct cgraph_node *node;
715 n_cloning_candidates = 0;
717 if (dump_file)
718 fprintf (dump_file, "\nIPA iterate stage:\n\n");
720 if (in_lto_p)
721 ipa_update_after_lto_read ();
723 for (node = cgraph_nodes; node; node = node->next)
725 ipcp_initialize_node_lattices (node);
726 ipcp_compute_node_scale (node);
728 if (dump_file && (dump_flags & TDF_DETAILS))
730 ipcp_print_all_lattices (dump_file);
731 ipcp_function_scale_print (dump_file);
734 ipcp_propagate_stage ();
735 if (ipcp_change_tops_to_bottom ())
736 /* Some lattices have changed from IPA_TOP to IPA_BOTTOM.
737 This change should be propagated. */
739 gcc_assert (n_cloning_candidates);
740 ipcp_propagate_stage ();
742 if (dump_file)
744 fprintf (dump_file, "\nIPA lattices after propagation:\n");
745 ipcp_print_all_lattices (dump_file);
746 if (dump_flags & TDF_DETAILS)
747 ipcp_print_profile_data (dump_file);
751 /* Check conditions to forbid constant insertion to function described by
752 NODE. */
753 static inline bool
754 ipcp_node_modifiable_p (struct cgraph_node *node)
756 /* Once we will be able to do in-place replacement, we can be more
757 lax here. */
758 return ipcp_versionable_function_p (node);
761 /* Print count scale data structures. */
762 static void
763 ipcp_function_scale_print (FILE * f)
765 struct cgraph_node *node;
767 for (node = cgraph_nodes; node; node = node->next)
769 if (!node->analyzed)
770 continue;
771 fprintf (f, "printing scale for %s: ", cgraph_node_name (node));
772 fprintf (f, "value is " HOST_WIDE_INT_PRINT_DEC
773 " \n", (HOST_WIDE_INT) ipcp_get_node_scale (node));
777 /* Print counts of all cgraph nodes. */
778 static void
779 ipcp_print_func_profile_counts (FILE * f)
781 struct cgraph_node *node;
783 for (node = cgraph_nodes; node; node = node->next)
785 fprintf (f, "function %s: ", cgraph_node_name (node));
786 fprintf (f, "count is " HOST_WIDE_INT_PRINT_DEC
787 " \n", (HOST_WIDE_INT) node->count);
791 /* Print counts of all cgraph edges. */
792 static void
793 ipcp_print_call_profile_counts (FILE * f)
795 struct cgraph_node *node;
796 struct cgraph_edge *cs;
798 for (node = cgraph_nodes; node; node = node->next)
800 for (cs = node->callees; cs; cs = cs->next_callee)
802 fprintf (f, "%s -> %s ", cgraph_node_name (cs->caller),
803 cgraph_node_name (cs->callee));
804 fprintf (f, "count is " HOST_WIDE_INT_PRINT_DEC " \n",
805 (HOST_WIDE_INT) cs->count);
810 /* Print profile info for all functions. */
811 static void
812 ipcp_print_profile_data (FILE * f)
814 fprintf (f, "\nNODE COUNTS :\n");
815 ipcp_print_func_profile_counts (f);
816 fprintf (f, "\nCS COUNTS stage:\n");
817 ipcp_print_call_profile_counts (f);
820 /* Build and initialize ipa_replace_map struct according to LAT. This struct is
821 processed by versioning, which operates according to the flags set.
822 PARM_TREE is the formal parameter found to be constant. LAT represents the
823 constant. */
824 static struct ipa_replace_map *
825 ipcp_create_replace_map (tree parm_tree, struct ipcp_lattice *lat)
827 struct ipa_replace_map *replace_map;
828 tree const_val;
830 replace_map = ggc_alloc_ipa_replace_map ();
831 const_val = build_const_val (lat, TREE_TYPE (parm_tree));
832 if (dump_file)
834 fprintf (dump_file, " replacing param ");
835 print_generic_expr (dump_file, parm_tree, 0);
836 fprintf (dump_file, " with const ");
837 print_generic_expr (dump_file, const_val, 0);
838 fprintf (dump_file, "\n");
840 replace_map->old_tree = parm_tree;
841 replace_map->new_tree = const_val;
842 replace_map->replace_p = true;
843 replace_map->ref_p = false;
845 return replace_map;
848 /* Return true if this callsite should be redirected to the original callee
849 (instead of the cloned one). */
850 static bool
851 ipcp_need_redirect_p (struct cgraph_edge *cs)
853 struct ipa_node_params *orig_callee_info;
854 int i, count;
855 struct ipa_jump_func *jump_func;
856 struct cgraph_node *node = cs->callee, *orig;
858 if (!n_cloning_candidates)
859 return false;
861 if ((orig = ipcp_get_orig_node (node)) != NULL)
862 node = orig;
863 if (ipcp_get_orig_node (cs->caller))
864 return false;
866 orig_callee_info = IPA_NODE_REF (node);
867 count = ipa_get_param_count (orig_callee_info);
868 for (i = 0; i < count; i++)
870 struct ipcp_lattice *lat = ipcp_get_lattice (orig_callee_info, i);
871 if (ipcp_lat_is_const (lat))
873 jump_func = ipa_get_ith_jump_func (IPA_EDGE_REF (cs), i);
874 if (jump_func->type != IPA_JF_CONST)
875 return true;
879 return false;
882 /* Fix the callsites and the call graph after function cloning was done. */
883 static void
884 ipcp_update_callgraph (void)
886 struct cgraph_node *node;
888 for (node = cgraph_nodes; node; node = node->next)
889 if (node->analyzed && ipcp_node_is_clone (node))
891 bitmap args_to_skip = BITMAP_ALLOC (NULL);
892 struct cgraph_node *orig_node = ipcp_get_orig_node (node);
893 struct ipa_node_params *info = IPA_NODE_REF (orig_node);
894 int i, count = ipa_get_param_count (info);
895 struct cgraph_edge *cs, *next;
897 for (i = 0; i < count; i++)
899 struct ipcp_lattice *lat = ipcp_get_lattice (info, i);
901 /* We can proactively remove obviously unused arguments. */
902 if (!ipa_is_param_used (info, i))
904 bitmap_set_bit (args_to_skip, i);
905 continue;
908 if (lat->type == IPA_CONST_VALUE)
909 bitmap_set_bit (args_to_skip, i);
911 for (cs = node->callers; cs; cs = next)
913 next = cs->next_caller;
914 if (!ipcp_node_is_clone (cs->caller) && ipcp_need_redirect_p (cs))
915 cgraph_redirect_edge_callee (cs, orig_node);
920 /* Update profiling info for versioned functions and the functions they were
921 versioned from. */
922 static void
923 ipcp_update_profiling (void)
925 struct cgraph_node *node, *orig_node;
926 gcov_type scale, scale_complement;
927 struct cgraph_edge *cs;
929 for (node = cgraph_nodes; node; node = node->next)
931 if (ipcp_node_is_clone (node))
933 orig_node = ipcp_get_orig_node (node);
934 scale = ipcp_get_node_scale (orig_node);
935 node->count = orig_node->count * scale / REG_BR_PROB_BASE;
936 scale_complement = REG_BR_PROB_BASE - scale;
937 orig_node->count =
938 orig_node->count * scale_complement / REG_BR_PROB_BASE;
939 for (cs = node->callees; cs; cs = cs->next_callee)
940 cs->count = cs->count * scale / REG_BR_PROB_BASE;
941 for (cs = orig_node->callees; cs; cs = cs->next_callee)
942 cs->count = cs->count * scale_complement / REG_BR_PROB_BASE;
947 /* If NODE was cloned, how much would program grow? */
948 static long
949 ipcp_estimate_growth (struct cgraph_node *node)
951 struct cgraph_edge *cs;
952 int redirectable_node_callers = 0;
953 int removable_args = 0;
954 bool need_original
955 = !cgraph_will_be_removed_from_program_if_no_direct_calls (node);
956 struct ipa_node_params *info;
957 int i, count;
958 int growth;
960 for (cs = node->callers; cs != NULL; cs = cs->next_caller)
961 if (cs->caller == node || !ipcp_need_redirect_p (cs))
962 redirectable_node_callers++;
963 else
964 need_original = true;
966 /* If we will be able to fully replace orignal node, we never increase
967 program size. */
968 if (!need_original)
969 return 0;
971 info = IPA_NODE_REF (node);
972 count = ipa_get_param_count (info);
973 for (i = 0; i < count; i++)
975 struct ipcp_lattice *lat = ipcp_get_lattice (info, i);
977 /* We can proactively remove obviously unused arguments. */
978 if (!ipa_is_param_used (info, i))
979 removable_args++;
981 if (lat->type == IPA_CONST_VALUE)
982 removable_args++;
985 /* We make just very simple estimate of savings for removal of operand from
986 call site. Precise cost is dificult to get, as our size metric counts
987 constants and moves as free. Generally we are looking for cases that
988 small function is called very many times. */
989 growth = node->local.inline_summary.self_size
990 - removable_args * redirectable_node_callers;
991 if (growth < 0)
992 return 0;
993 return growth;
997 /* Estimate cost of cloning NODE. */
998 static long
999 ipcp_estimate_cloning_cost (struct cgraph_node *node)
1001 int freq_sum = 1;
1002 gcov_type count_sum = 1;
1003 struct cgraph_edge *e;
1004 int cost;
1006 cost = ipcp_estimate_growth (node) * 1000;
1007 if (!cost)
1009 if (dump_file)
1010 fprintf (dump_file, "Versioning of %s will save code size\n",
1011 cgraph_node_name (node));
1012 return 0;
1015 for (e = node->callers; e; e = e->next_caller)
1016 if (!bitmap_bit_p (dead_nodes, e->caller->uid)
1017 && !ipcp_need_redirect_p (e))
1019 count_sum += e->count;
1020 freq_sum += e->frequency + 1;
1023 if (max_count)
1024 cost /= count_sum * 1000 / max_count + 1;
1025 else
1026 cost /= freq_sum * 1000 / REG_BR_PROB_BASE + 1;
1027 if (dump_file)
1028 fprintf (dump_file, "Cost of versioning %s is %i, (size: %i, freq: %i)\n",
1029 cgraph_node_name (node), cost, node->local.inline_summary.self_size,
1030 freq_sum);
1031 return cost + 1;
1034 /* Return number of live constant parameters. */
1035 static int
1036 ipcp_const_param_count (struct cgraph_node *node)
1038 int const_param = 0;
1039 struct ipa_node_params *info = IPA_NODE_REF (node);
1040 int count = ipa_get_param_count (info);
1041 int i;
1043 for (i = 0; i < count; i++)
1045 struct ipcp_lattice *lat = ipcp_get_lattice (info, i);
1046 if (ipcp_lat_is_insertable (lat)
1047 /* Do not count obviously unused arguments. */
1048 && ipa_is_param_used (info, i))
1049 const_param++;
1051 return const_param;
1054 /* Propagate the constant parameters found by ipcp_iterate_stage()
1055 to the function's code. */
1056 static void
1057 ipcp_insert_stage (void)
1059 struct cgraph_node *node, *node1 = NULL;
1060 int i;
1061 VEC (cgraph_edge_p, heap) * redirect_callers;
1062 VEC (ipa_replace_map_p,gc)* replace_trees;
1063 int node_callers, count;
1064 tree parm_tree;
1065 struct ipa_replace_map *replace_param;
1066 fibheap_t heap;
1067 long overall_size = 0, new_size = 0;
1068 long max_new_size;
1070 ipa_check_create_node_params ();
1071 ipa_check_create_edge_args ();
1072 if (dump_file)
1073 fprintf (dump_file, "\nIPA insert stage:\n\n");
1075 dead_nodes = BITMAP_ALLOC (NULL);
1077 for (node = cgraph_nodes; node; node = node->next)
1078 if (node->analyzed)
1080 if (node->count > max_count)
1081 max_count = node->count;
1082 overall_size += node->local.inline_summary.self_size;
1085 max_new_size = overall_size;
1086 if (max_new_size < PARAM_VALUE (PARAM_LARGE_UNIT_INSNS))
1087 max_new_size = PARAM_VALUE (PARAM_LARGE_UNIT_INSNS);
1088 max_new_size = max_new_size * PARAM_VALUE (PARAM_IPCP_UNIT_GROWTH) / 100 + 1;
1090 /* First collect all functions we proved to have constant arguments to heap. */
1091 heap = fibheap_new ();
1092 for (node = cgraph_nodes; node; node = node->next)
1094 struct ipa_node_params *info;
1095 /* Propagation of the constant is forbidden in certain conditions. */
1096 if (!node->analyzed || !ipcp_node_modifiable_p (node))
1097 continue;
1098 info = IPA_NODE_REF (node);
1099 if (ipa_is_called_with_var_arguments (info))
1100 continue;
1101 if (ipcp_const_param_count (node))
1102 node->aux = fibheap_insert (heap, ipcp_estimate_cloning_cost (node), node);
1105 /* Now clone in priority order until code size growth limits are met or
1106 heap is emptied. */
1107 while (!fibheap_empty (heap))
1109 struct ipa_node_params *info;
1110 int growth = 0;
1111 bitmap args_to_skip;
1112 struct cgraph_edge *cs;
1114 node = (struct cgraph_node *)fibheap_extract_min (heap);
1115 node->aux = NULL;
1116 if (dump_file)
1117 fprintf (dump_file, "considering function %s\n",
1118 cgraph_node_name (node));
1120 growth = ipcp_estimate_growth (node);
1122 if (new_size + growth > max_new_size)
1123 break;
1124 if (growth
1125 && optimize_function_for_size_p (DECL_STRUCT_FUNCTION (node->decl)))
1127 if (dump_file)
1128 fprintf (dump_file, "Not versioning, cold code would grow");
1129 continue;
1132 new_size += growth;
1134 /* Look if original function becomes dead after clonning. */
1135 for (cs = node->callers; cs != NULL; cs = cs->next_caller)
1136 if (cs->caller == node || ipcp_need_redirect_p (cs))
1137 break;
1138 if (!cs && cgraph_will_be_removed_from_program_if_no_direct_calls (node))
1139 bitmap_set_bit (dead_nodes, node->uid);
1141 info = IPA_NODE_REF (node);
1142 count = ipa_get_param_count (info);
1144 replace_trees = VEC_alloc (ipa_replace_map_p, gc, 1);
1145 args_to_skip = BITMAP_GGC_ALLOC ();
1146 for (i = 0; i < count; i++)
1148 struct ipcp_lattice *lat = ipcp_get_lattice (info, i);
1149 parm_tree = ipa_get_param (info, i);
1151 /* We can proactively remove obviously unused arguments. */
1152 if (!ipa_is_param_used (info, i))
1154 bitmap_set_bit (args_to_skip, i);
1155 continue;
1158 if (lat->type == IPA_CONST_VALUE)
1160 replace_param =
1161 ipcp_create_replace_map (parm_tree, lat);
1162 VEC_safe_push (ipa_replace_map_p, gc, replace_trees, replace_param);
1163 bitmap_set_bit (args_to_skip, i);
1167 /* Compute how many callers node has. */
1168 node_callers = 0;
1169 for (cs = node->callers; cs != NULL; cs = cs->next_caller)
1170 node_callers++;
1171 redirect_callers = VEC_alloc (cgraph_edge_p, heap, node_callers);
1172 for (cs = node->callers; cs != NULL; cs = cs->next_caller)
1173 VEC_quick_push (cgraph_edge_p, redirect_callers, cs);
1175 /* Redirecting all the callers of the node to the
1176 new versioned node. */
1177 node1 =
1178 cgraph_create_virtual_clone (node, redirect_callers, replace_trees,
1179 args_to_skip, "constprop");
1180 args_to_skip = NULL;
1181 VEC_free (cgraph_edge_p, heap, redirect_callers);
1182 replace_trees = NULL;
1184 if (node1 == NULL)
1185 continue;
1186 if (dump_file)
1187 fprintf (dump_file, "versioned function %s with growth %i, overall %i\n",
1188 cgraph_node_name (node), (int)growth, (int)new_size);
1189 ipcp_init_cloned_node (node, node1);
1191 /* TODO: We can use indirect inlning info to produce new calls. */
1193 if (dump_file)
1194 dump_function_to_file (node1->decl, dump_file, dump_flags);
1196 for (cs = node->callees; cs; cs = cs->next_callee)
1197 if (cs->callee->aux)
1199 fibheap_delete_node (heap, (fibnode_t) cs->callee->aux);
1200 cs->callee->aux = fibheap_insert (heap,
1201 ipcp_estimate_cloning_cost (cs->callee),
1202 cs->callee);
1206 while (!fibheap_empty (heap))
1208 if (dump_file)
1209 fprintf (dump_file, "skipping function %s\n",
1210 cgraph_node_name (node));
1211 node = (struct cgraph_node *) fibheap_extract_min (heap);
1212 node->aux = NULL;
1214 fibheap_delete (heap);
1215 BITMAP_FREE (dead_nodes);
1216 ipcp_update_callgraph ();
1217 ipcp_update_profiling ();
1220 /* The IPCP driver. */
1221 static unsigned int
1222 ipcp_driver (void)
1224 cgraph_remove_unreachable_nodes (true,dump_file);
1225 if (dump_file)
1227 fprintf (dump_file, "\nIPA structures before propagation:\n");
1228 if (dump_flags & TDF_DETAILS)
1229 ipa_print_all_params (dump_file);
1230 ipa_print_all_jump_functions (dump_file);
1232 /* 2. Do the interprocedural propagation. */
1233 ipcp_iterate_stage ();
1234 /* 3. Insert the constants found to the functions. */
1235 ipcp_insert_stage ();
1236 if (dump_file && (dump_flags & TDF_DETAILS))
1238 fprintf (dump_file, "\nProfiling info after insert stage:\n");
1239 ipcp_print_profile_data (dump_file);
1241 /* Free all IPCP structures. */
1242 ipa_free_all_structures_after_ipa_cp ();
1243 if (dump_file)
1244 fprintf (dump_file, "\nIPA constant propagation end\n");
1245 return 0;
1248 /* Note function body size. */
1249 static void
1250 ipcp_generate_summary (void)
1252 if (dump_file)
1253 fprintf (dump_file, "\nIPA constant propagation start:\n");
1254 ipa_check_create_node_params ();
1255 ipa_check_create_edge_args ();
1256 ipa_register_cgraph_hooks ();
1257 /* 1. Call the init stage to initialize
1258 the ipa_node_params and ipa_edge_args structures. */
1259 ipcp_init_stage ();
1262 /* Write ipcp summary for nodes in SET. */
1263 static void
1264 ipcp_write_summary (cgraph_node_set set,
1265 varpool_node_set vset ATTRIBUTE_UNUSED)
1267 ipa_prop_write_jump_functions (set);
1270 /* Read ipcp summary. */
1271 static void
1272 ipcp_read_summary (void)
1274 ipa_prop_read_jump_functions ();
1277 /* Gate for IPCP optimization. */
1278 static bool
1279 cgraph_gate_cp (void)
1281 /* FIXME: We should remove the optimize check after we ensure we never run
1282 IPA passes when not optimizng. */
1283 return flag_ipa_cp && optimize;
1286 struct ipa_opt_pass_d pass_ipa_cp =
1289 IPA_PASS,
1290 "cp", /* name */
1291 cgraph_gate_cp, /* gate */
1292 ipcp_driver, /* execute */
1293 NULL, /* sub */
1294 NULL, /* next */
1295 0, /* static_pass_number */
1296 TV_IPA_CONSTANT_PROP, /* tv_id */
1297 0, /* properties_required */
1298 0, /* properties_provided */
1299 0, /* properties_destroyed */
1300 0, /* todo_flags_start */
1301 TODO_dump_cgraph | TODO_dump_func |
1302 TODO_remove_functions | TODO_ggc_collect /* todo_flags_finish */
1304 ipcp_generate_summary, /* generate_summary */
1305 ipcp_write_summary, /* write_summary */
1306 ipcp_read_summary, /* read_summary */
1307 NULL, /* write_optimization_summary */
1308 NULL, /* read_optimization_summary */
1309 NULL, /* stmt_fixup */
1310 0, /* TODOs */
1311 NULL, /* function_transform */
1312 NULL, /* variable_transform */