PR ipa/64481
[official-gcc.git] / gcc / ipa-split.c
blob924c6b1e7cde4c5b41c5261a38e31682db4635ac
1 /* Function splitting pass
2 Copyright (C) 2010-2015 Free Software Foundation, Inc.
3 Contributed by Jan Hubicka <jh@suse.cz>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* The purpose of this pass is to split function bodies to improve
22 inlining. I.e. for function of the form:
24 func (...)
26 if (cheap_test)
27 something_small
28 else
29 something_big
32 Produce:
34 func.part (...)
36 something_big
39 func (...)
41 if (cheap_test)
42 something_small
43 else
44 func.part (...);
47 When func becomes inlinable and when cheap_test is often true, inlining func,
48 but not fund.part leads to performance improvement similar as inlining
49 original func while the code size growth is smaller.
51 The pass is organized in three stages:
52 1) Collect local info about basic block into BB_INFO structure and
53 compute function body estimated size and time.
54 2) Via DFS walk find all possible basic blocks where we can split
55 and chose best one.
56 3) If split point is found, split at the specified BB by creating a clone
57 and updating function to call it.
59 The decisions what functions to split are in execute_split_functions
60 and consider_split.
62 There are several possible future improvements for this pass including:
64 1) Splitting to break up large functions
65 2) Splitting to reduce stack frame usage
66 3) Allow split part of function to use values computed in the header part.
67 The values needs to be passed to split function, perhaps via same
68 interface as for nested functions or as argument.
69 4) Support for simple rematerialization. I.e. when split part use
70 value computed in header from function parameter in very cheap way, we
71 can just recompute it.
72 5) Support splitting of nested functions.
73 6) Support non-SSA arguments.
74 7) There is nothing preventing us from producing multiple parts of single function
75 when needed or splitting also the parts. */
77 #include "config.h"
78 #include "system.h"
79 #include "coretypes.h"
80 #include "hash-set.h"
81 #include "machmode.h"
82 #include "vec.h"
83 #include "double-int.h"
84 #include "input.h"
85 #include "alias.h"
86 #include "symtab.h"
87 #include "options.h"
88 #include "wide-int.h"
89 #include "inchash.h"
90 #include "tree.h"
91 #include "fold-const.h"
92 #include "predict.h"
93 #include "tm.h"
94 #include "hard-reg-set.h"
95 #include "input.h"
96 #include "function.h"
97 #include "dominance.h"
98 #include "cfg.h"
99 #include "cfganal.h"
100 #include "basic-block.h"
101 #include "tree-ssa-alias.h"
102 #include "internal-fn.h"
103 #include "gimple-expr.h"
104 #include "is-a.h"
105 #include "gimple.h"
106 #include "stringpool.h"
107 #include "expr.h"
108 #include "calls.h"
109 #include "gimplify.h"
110 #include "gimple-iterator.h"
111 #include "gimplify-me.h"
112 #include "gimple-walk.h"
113 #include "target.h"
114 #include "hash-map.h"
115 #include "plugin-api.h"
116 #include "ipa-ref.h"
117 #include "cgraph.h"
118 #include "alloc-pool.h"
119 #include "symbol-summary.h"
120 #include "ipa-prop.h"
121 #include "gimple-ssa.h"
122 #include "tree-cfg.h"
123 #include "tree-phinodes.h"
124 #include "ssa-iterators.h"
125 #include "stringpool.h"
126 #include "tree-ssanames.h"
127 #include "tree-into-ssa.h"
128 #include "tree-dfa.h"
129 #include "tree-pass.h"
130 #include "flags.h"
131 #include "diagnostic.h"
132 #include "tree-dump.h"
133 #include "tree-inline.h"
134 #include "params.h"
135 #include "gimple-pretty-print.h"
136 #include "ipa-inline.h"
137 #include "cfgloop.h"
138 #include "tree-chkp.h"
140 /* Per basic block info. */
142 typedef struct
144 unsigned int size;
145 unsigned int time;
146 } split_bb_info;
148 static vec<split_bb_info> bb_info_vec;
150 /* Description of split point. */
152 struct split_point
154 /* Size of the partitions. */
155 unsigned int header_time, header_size, split_time, split_size;
157 /* SSA names that need to be passed into spit function. */
158 bitmap ssa_names_to_pass;
160 /* Basic block where we split (that will become entry point of new function. */
161 basic_block entry_bb;
163 /* Basic blocks we are splitting away. */
164 bitmap split_bbs;
166 /* True when return value is computed on split part and thus it needs
167 to be returned. */
168 bool split_part_set_retval;
171 /* Best split point found. */
173 struct split_point best_split_point;
175 /* Set of basic blocks that are not allowed to dominate a split point. */
177 static bitmap forbidden_dominators;
179 static tree find_retval (basic_block return_bb);
180 static tree find_retbnd (basic_block return_bb);
182 /* Callback for walk_stmt_load_store_addr_ops. If T is non-SSA automatic
183 variable, check it if it is present in bitmap passed via DATA. */
185 static bool
186 test_nonssa_use (gimple, tree t, tree, void *data)
188 t = get_base_address (t);
190 if (!t || is_gimple_reg (t))
191 return false;
193 if (TREE_CODE (t) == PARM_DECL
194 || (TREE_CODE (t) == VAR_DECL
195 && auto_var_in_fn_p (t, current_function_decl))
196 || TREE_CODE (t) == RESULT_DECL
197 /* Normal labels are part of CFG and will be handled gratefuly.
198 Forced labels however can be used directly by statements and
199 need to stay in one partition along with their uses. */
200 || (TREE_CODE (t) == LABEL_DECL
201 && FORCED_LABEL (t)))
202 return bitmap_bit_p ((bitmap)data, DECL_UID (t));
204 /* For DECL_BY_REFERENCE, the return value is actually a pointer. We want
205 to pretend that the value pointed to is actual result decl. */
206 if ((TREE_CODE (t) == MEM_REF || INDIRECT_REF_P (t))
207 && TREE_CODE (TREE_OPERAND (t, 0)) == SSA_NAME
208 && SSA_NAME_VAR (TREE_OPERAND (t, 0))
209 && TREE_CODE (SSA_NAME_VAR (TREE_OPERAND (t, 0))) == RESULT_DECL
210 && DECL_BY_REFERENCE (DECL_RESULT (current_function_decl)))
211 return
212 bitmap_bit_p ((bitmap)data,
213 DECL_UID (DECL_RESULT (current_function_decl)));
215 return false;
218 /* Dump split point CURRENT. */
220 static void
221 dump_split_point (FILE * file, struct split_point *current)
223 fprintf (file,
224 "Split point at BB %i\n"
225 " header time: %i header size: %i\n"
226 " split time: %i split size: %i\n bbs: ",
227 current->entry_bb->index, current->header_time,
228 current->header_size, current->split_time, current->split_size);
229 dump_bitmap (file, current->split_bbs);
230 fprintf (file, " SSA names to pass: ");
231 dump_bitmap (file, current->ssa_names_to_pass);
234 /* Look for all BBs in header that might lead to the split part and verify
235 that they are not defining any non-SSA var used by the split part.
236 Parameters are the same as for consider_split. */
238 static bool
239 verify_non_ssa_vars (struct split_point *current, bitmap non_ssa_vars,
240 basic_block return_bb)
242 bitmap seen = BITMAP_ALLOC (NULL);
243 vec<basic_block> worklist = vNULL;
244 edge e;
245 edge_iterator ei;
246 bool ok = true;
247 basic_block bb;
249 FOR_EACH_EDGE (e, ei, current->entry_bb->preds)
250 if (e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun)
251 && !bitmap_bit_p (current->split_bbs, e->src->index))
253 worklist.safe_push (e->src);
254 bitmap_set_bit (seen, e->src->index);
257 while (!worklist.is_empty ())
259 bb = worklist.pop ();
260 FOR_EACH_EDGE (e, ei, bb->preds)
261 if (e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun)
262 && bitmap_set_bit (seen, e->src->index))
264 gcc_checking_assert (!bitmap_bit_p (current->split_bbs,
265 e->src->index));
266 worklist.safe_push (e->src);
268 for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi);
269 gsi_next (&bsi))
271 gimple stmt = gsi_stmt (bsi);
272 if (is_gimple_debug (stmt))
273 continue;
274 if (walk_stmt_load_store_addr_ops
275 (stmt, non_ssa_vars, test_nonssa_use, test_nonssa_use,
276 test_nonssa_use))
278 ok = false;
279 goto done;
281 if (glabel *label_stmt = dyn_cast <glabel *> (stmt))
282 if (test_nonssa_use (stmt, gimple_label_label (label_stmt),
283 NULL_TREE, non_ssa_vars))
285 ok = false;
286 goto done;
289 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);
290 gsi_next (&bsi))
292 if (walk_stmt_load_store_addr_ops
293 (gsi_stmt (bsi), non_ssa_vars, test_nonssa_use, test_nonssa_use,
294 test_nonssa_use))
296 ok = false;
297 goto done;
300 FOR_EACH_EDGE (e, ei, bb->succs)
302 if (e->dest != return_bb)
303 continue;
304 for (gphi_iterator bsi = gsi_start_phis (return_bb);
305 !gsi_end_p (bsi);
306 gsi_next (&bsi))
308 gphi *stmt = bsi.phi ();
309 tree op = gimple_phi_arg_def (stmt, e->dest_idx);
311 if (virtual_operand_p (gimple_phi_result (stmt)))
312 continue;
313 if (TREE_CODE (op) != SSA_NAME
314 && test_nonssa_use (stmt, op, op, non_ssa_vars))
316 ok = false;
317 goto done;
323 /* Verify that the rest of function does not define any label
324 used by the split part. */
325 FOR_EACH_BB_FN (bb, cfun)
326 if (!bitmap_bit_p (current->split_bbs, bb->index)
327 && !bitmap_bit_p (seen, bb->index))
329 gimple_stmt_iterator bsi;
330 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
331 if (glabel *label_stmt = dyn_cast <glabel *> (gsi_stmt (bsi)))
333 if (test_nonssa_use (label_stmt,
334 gimple_label_label (label_stmt),
335 NULL_TREE, non_ssa_vars))
337 ok = false;
338 goto done;
341 else
342 break;
345 done:
346 BITMAP_FREE (seen);
347 worklist.release ();
348 return ok;
351 /* If STMT is a call, check the callee against a list of forbidden
352 predicate functions. If a match is found, look for uses of the
353 call result in condition statements that compare against zero.
354 For each such use, find the block targeted by the condition
355 statement for the nonzero result, and set the bit for this block
356 in the forbidden dominators bitmap. The purpose of this is to avoid
357 selecting a split point where we are likely to lose the chance
358 to optimize away an unused function call. */
360 static void
361 check_forbidden_calls (gimple stmt)
363 imm_use_iterator use_iter;
364 use_operand_p use_p;
365 tree lhs;
367 /* At the moment, __builtin_constant_p is the only forbidden
368 predicate function call (see PR49642). */
369 if (!gimple_call_builtin_p (stmt, BUILT_IN_CONSTANT_P))
370 return;
372 lhs = gimple_call_lhs (stmt);
374 if (!lhs || TREE_CODE (lhs) != SSA_NAME)
375 return;
377 FOR_EACH_IMM_USE_FAST (use_p, use_iter, lhs)
379 tree op1;
380 basic_block use_bb, forbidden_bb;
381 enum tree_code code;
382 edge true_edge, false_edge;
383 gcond *use_stmt;
385 use_stmt = dyn_cast <gcond *> (USE_STMT (use_p));
386 if (!use_stmt)
387 continue;
389 /* Assuming canonical form for GIMPLE_COND here, with constant
390 in second position. */
391 op1 = gimple_cond_rhs (use_stmt);
392 code = gimple_cond_code (use_stmt);
393 use_bb = gimple_bb (use_stmt);
395 extract_true_false_edges_from_block (use_bb, &true_edge, &false_edge);
397 /* We're only interested in comparisons that distinguish
398 unambiguously from zero. */
399 if (!integer_zerop (op1) || code == LE_EXPR || code == GE_EXPR)
400 continue;
402 if (code == EQ_EXPR)
403 forbidden_bb = false_edge->dest;
404 else
405 forbidden_bb = true_edge->dest;
407 bitmap_set_bit (forbidden_dominators, forbidden_bb->index);
411 /* If BB is dominated by any block in the forbidden dominators set,
412 return TRUE; else FALSE. */
414 static bool
415 dominated_by_forbidden (basic_block bb)
417 unsigned dom_bb;
418 bitmap_iterator bi;
420 EXECUTE_IF_SET_IN_BITMAP (forbidden_dominators, 1, dom_bb, bi)
422 if (dominated_by_p (CDI_DOMINATORS, bb,
423 BASIC_BLOCK_FOR_FN (cfun, dom_bb)))
424 return true;
427 return false;
430 /* For give split point CURRENT and return block RETURN_BB return 1
431 if ssa name VAL is set by split part and 0 otherwise. */
432 static bool
433 split_part_set_ssa_name_p (tree val, struct split_point *current,
434 basic_block return_bb)
436 if (TREE_CODE (val) != SSA_NAME)
437 return false;
439 return (!SSA_NAME_IS_DEFAULT_DEF (val)
440 && (bitmap_bit_p (current->split_bbs,
441 gimple_bb (SSA_NAME_DEF_STMT (val))->index)
442 || gimple_bb (SSA_NAME_DEF_STMT (val)) == return_bb));
445 /* We found an split_point CURRENT. NON_SSA_VARS is bitmap of all non ssa
446 variables used and RETURN_BB is return basic block.
447 See if we can split function here. */
449 static void
450 consider_split (struct split_point *current, bitmap non_ssa_vars,
451 basic_block return_bb)
453 tree parm;
454 unsigned int num_args = 0;
455 unsigned int call_overhead;
456 edge e;
457 edge_iterator ei;
458 gphi_iterator bsi;
459 unsigned int i;
460 int incoming_freq = 0;
461 tree retval;
462 tree retbnd;
463 bool back_edge = false;
465 if (dump_file && (dump_flags & TDF_DETAILS))
466 dump_split_point (dump_file, current);
468 FOR_EACH_EDGE (e, ei, current->entry_bb->preds)
470 if (e->flags & EDGE_DFS_BACK)
471 back_edge = true;
472 if (!bitmap_bit_p (current->split_bbs, e->src->index))
473 incoming_freq += EDGE_FREQUENCY (e);
476 /* Do not split when we would end up calling function anyway. */
477 if (incoming_freq
478 >= (ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency
479 * PARAM_VALUE (PARAM_PARTIAL_INLINING_ENTRY_PROBABILITY) / 100))
481 /* When profile is guessed, we can not expect it to give us
482 realistic estimate on likelyness of function taking the
483 complex path. As a special case, when tail of the function is
484 a loop, enable splitting since inlining code skipping the loop
485 is likely noticeable win. */
486 if (back_edge
487 && profile_status_for_fn (cfun) != PROFILE_READ
488 && incoming_freq < ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency)
490 if (dump_file && (dump_flags & TDF_DETAILS))
491 fprintf (dump_file,
492 " Split before loop, accepting despite low frequencies %i %i.\n",
493 incoming_freq,
494 ENTRY_BLOCK_PTR_FOR_FN (cfun)->frequency);
496 else
498 if (dump_file && (dump_flags & TDF_DETAILS))
499 fprintf (dump_file,
500 " Refused: incoming frequency is too large.\n");
501 return;
505 if (!current->header_size)
507 if (dump_file && (dump_flags & TDF_DETAILS))
508 fprintf (dump_file, " Refused: header empty\n");
509 return;
512 /* Verify that PHI args on entry are either virtual or all their operands
513 incoming from header are the same. */
514 for (bsi = gsi_start_phis (current->entry_bb); !gsi_end_p (bsi); gsi_next (&bsi))
516 gphi *stmt = bsi.phi ();
517 tree val = NULL;
519 if (virtual_operand_p (gimple_phi_result (stmt)))
520 continue;
521 for (i = 0; i < gimple_phi_num_args (stmt); i++)
523 edge e = gimple_phi_arg_edge (stmt, i);
524 if (!bitmap_bit_p (current->split_bbs, e->src->index))
526 tree edge_val = gimple_phi_arg_def (stmt, i);
527 if (val && edge_val != val)
529 if (dump_file && (dump_flags & TDF_DETAILS))
530 fprintf (dump_file,
531 " Refused: entry BB has PHI with multiple variants\n");
532 return;
534 val = edge_val;
540 /* See what argument we will pass to the split function and compute
541 call overhead. */
542 call_overhead = eni_size_weights.call_cost;
543 for (parm = DECL_ARGUMENTS (current_function_decl); parm;
544 parm = DECL_CHAIN (parm))
546 if (!is_gimple_reg (parm))
548 if (bitmap_bit_p (non_ssa_vars, DECL_UID (parm)))
550 if (dump_file && (dump_flags & TDF_DETAILS))
551 fprintf (dump_file,
552 " Refused: need to pass non-ssa param values\n");
553 return;
556 else
558 tree ddef = ssa_default_def (cfun, parm);
559 if (ddef
560 && bitmap_bit_p (current->ssa_names_to_pass,
561 SSA_NAME_VERSION (ddef)))
563 if (!VOID_TYPE_P (TREE_TYPE (parm)))
564 call_overhead += estimate_move_cost (TREE_TYPE (parm), false);
565 num_args++;
569 if (!VOID_TYPE_P (TREE_TYPE (current_function_decl)))
570 call_overhead += estimate_move_cost (TREE_TYPE (current_function_decl),
571 false);
573 if (current->split_size <= call_overhead)
575 if (dump_file && (dump_flags & TDF_DETAILS))
576 fprintf (dump_file,
577 " Refused: split size is smaller than call overhead\n");
578 return;
580 if (current->header_size + call_overhead
581 >= (unsigned int)(DECL_DECLARED_INLINE_P (current_function_decl)
582 ? MAX_INLINE_INSNS_SINGLE
583 : MAX_INLINE_INSNS_AUTO))
585 if (dump_file && (dump_flags & TDF_DETAILS))
586 fprintf (dump_file,
587 " Refused: header size is too large for inline candidate\n");
588 return;
591 /* FIXME: we currently can pass only SSA function parameters to the split
592 arguments. Once parm_adjustment infrastructure is supported by cloning,
593 we can pass more than that. */
594 if (num_args != bitmap_count_bits (current->ssa_names_to_pass))
597 if (dump_file && (dump_flags & TDF_DETAILS))
598 fprintf (dump_file,
599 " Refused: need to pass non-param values\n");
600 return;
603 /* When there are non-ssa vars used in the split region, see if they
604 are used in the header region. If so, reject the split.
605 FIXME: we can use nested function support to access both. */
606 if (!bitmap_empty_p (non_ssa_vars)
607 && !verify_non_ssa_vars (current, non_ssa_vars, return_bb))
609 if (dump_file && (dump_flags & TDF_DETAILS))
610 fprintf (dump_file,
611 " Refused: split part has non-ssa uses\n");
612 return;
615 /* If the split point is dominated by a forbidden block, reject
616 the split. */
617 if (!bitmap_empty_p (forbidden_dominators)
618 && dominated_by_forbidden (current->entry_bb))
620 if (dump_file && (dump_flags & TDF_DETAILS))
621 fprintf (dump_file,
622 " Refused: split point dominated by forbidden block\n");
623 return;
626 /* See if retval used by return bb is computed by header or split part.
627 When it is computed by split part, we need to produce return statement
628 in the split part and add code to header to pass it around.
630 This is bit tricky to test:
631 1) When there is no return_bb or no return value, we always pass
632 value around.
633 2) Invariants are always computed by caller.
634 3) For SSA we need to look if defining statement is in header or split part
635 4) For non-SSA we need to look where the var is computed. */
636 retval = find_retval (return_bb);
637 if (!retval)
638 current->split_part_set_retval = true;
639 else if (is_gimple_min_invariant (retval))
640 current->split_part_set_retval = false;
641 /* Special case is value returned by reference we record as if it was non-ssa
642 set to result_decl. */
643 else if (TREE_CODE (retval) == SSA_NAME
644 && SSA_NAME_VAR (retval)
645 && TREE_CODE (SSA_NAME_VAR (retval)) == RESULT_DECL
646 && DECL_BY_REFERENCE (DECL_RESULT (current_function_decl)))
647 current->split_part_set_retval
648 = bitmap_bit_p (non_ssa_vars, DECL_UID (SSA_NAME_VAR (retval)));
649 else if (TREE_CODE (retval) == SSA_NAME)
650 current->split_part_set_retval
651 = split_part_set_ssa_name_p (retval, current, return_bb);
652 else if (TREE_CODE (retval) == PARM_DECL)
653 current->split_part_set_retval = false;
654 else if (TREE_CODE (retval) == VAR_DECL
655 || TREE_CODE (retval) == RESULT_DECL)
656 current->split_part_set_retval
657 = bitmap_bit_p (non_ssa_vars, DECL_UID (retval));
658 else
659 current->split_part_set_retval = true;
661 /* See if retbnd used by return bb is computed by header or split part. */
662 retbnd = find_retbnd (return_bb);
663 if (retbnd)
665 bool split_part_set_retbnd
666 = split_part_set_ssa_name_p (retbnd, current, return_bb);
668 /* If we have both return value and bounds then keep their definitions
669 in a single function. We use SSA names to link returned bounds and
670 value and therefore do not handle cases when result is passed by
671 reference (which should not be our case anyway since bounds are
672 returned for pointers only). */
673 if ((DECL_BY_REFERENCE (DECL_RESULT (current_function_decl))
674 && current->split_part_set_retval)
675 || split_part_set_retbnd != current->split_part_set_retval)
677 if (dump_file && (dump_flags & TDF_DETAILS))
678 fprintf (dump_file,
679 " Refused: split point splits return value and bounds\n");
680 return;
684 /* split_function fixes up at most one PHI non-virtual PHI node in return_bb,
685 for the return value. If there are other PHIs, give up. */
686 if (return_bb != EXIT_BLOCK_PTR_FOR_FN (cfun))
688 gphi_iterator psi;
690 for (psi = gsi_start_phis (return_bb); !gsi_end_p (psi); gsi_next (&psi))
691 if (!virtual_operand_p (gimple_phi_result (psi.phi ()))
692 && !(retval
693 && current->split_part_set_retval
694 && TREE_CODE (retval) == SSA_NAME
695 && !DECL_BY_REFERENCE (DECL_RESULT (current_function_decl))
696 && SSA_NAME_DEF_STMT (retval) == psi.phi ()))
698 if (dump_file && (dump_flags & TDF_DETAILS))
699 fprintf (dump_file,
700 " Refused: return bb has extra PHIs\n");
701 return;
705 if (dump_file && (dump_flags & TDF_DETAILS))
706 fprintf (dump_file, " Accepted!\n");
708 /* At the moment chose split point with lowest frequency and that leaves
709 out smallest size of header.
710 In future we might re-consider this heuristics. */
711 if (!best_split_point.split_bbs
712 || best_split_point.entry_bb->frequency > current->entry_bb->frequency
713 || (best_split_point.entry_bb->frequency == current->entry_bb->frequency
714 && best_split_point.split_size < current->split_size))
717 if (dump_file && (dump_flags & TDF_DETAILS))
718 fprintf (dump_file, " New best split point!\n");
719 if (best_split_point.ssa_names_to_pass)
721 BITMAP_FREE (best_split_point.ssa_names_to_pass);
722 BITMAP_FREE (best_split_point.split_bbs);
724 best_split_point = *current;
725 best_split_point.ssa_names_to_pass = BITMAP_ALLOC (NULL);
726 bitmap_copy (best_split_point.ssa_names_to_pass,
727 current->ssa_names_to_pass);
728 best_split_point.split_bbs = BITMAP_ALLOC (NULL);
729 bitmap_copy (best_split_point.split_bbs, current->split_bbs);
733 /* Return basic block containing RETURN statement. We allow basic blocks
734 of the form:
735 <retval> = tmp_var;
736 return <retval>
737 but return_bb can not be more complex than this.
738 If nothing is found, return the exit block.
740 When there are multiple RETURN statement, chose one with return value,
741 since that one is more likely shared by multiple code paths.
743 Return BB is special, because for function splitting it is the only
744 basic block that is duplicated in between header and split part of the
745 function.
747 TODO: We might support multiple return blocks. */
749 static basic_block
750 find_return_bb (void)
752 edge e;
753 basic_block return_bb = EXIT_BLOCK_PTR_FOR_FN (cfun);
754 gimple_stmt_iterator bsi;
755 bool found_return = false;
756 tree retval = NULL_TREE;
758 if (!single_pred_p (EXIT_BLOCK_PTR_FOR_FN (cfun)))
759 return return_bb;
761 e = single_pred_edge (EXIT_BLOCK_PTR_FOR_FN (cfun));
762 for (bsi = gsi_last_bb (e->src); !gsi_end_p (bsi); gsi_prev (&bsi))
764 gimple stmt = gsi_stmt (bsi);
765 if (gimple_code (stmt) == GIMPLE_LABEL
766 || is_gimple_debug (stmt)
767 || gimple_clobber_p (stmt))
769 else if (gimple_code (stmt) == GIMPLE_ASSIGN
770 && found_return
771 && gimple_assign_single_p (stmt)
772 && (auto_var_in_fn_p (gimple_assign_rhs1 (stmt),
773 current_function_decl)
774 || is_gimple_min_invariant (gimple_assign_rhs1 (stmt)))
775 && retval == gimple_assign_lhs (stmt))
777 else if (greturn *return_stmt = dyn_cast <greturn *> (stmt))
779 found_return = true;
780 retval = gimple_return_retval (return_stmt);
782 else
783 break;
785 if (gsi_end_p (bsi) && found_return)
786 return_bb = e->src;
788 return return_bb;
791 /* Given return basic block RETURN_BB, see where return value is really
792 stored. */
793 static tree
794 find_retval (basic_block return_bb)
796 gimple_stmt_iterator bsi;
797 for (bsi = gsi_start_bb (return_bb); !gsi_end_p (bsi); gsi_next (&bsi))
798 if (greturn *return_stmt = dyn_cast <greturn *> (gsi_stmt (bsi)))
799 return gimple_return_retval (return_stmt);
800 else if (gimple_code (gsi_stmt (bsi)) == GIMPLE_ASSIGN
801 && !gimple_clobber_p (gsi_stmt (bsi)))
802 return gimple_assign_rhs1 (gsi_stmt (bsi));
803 return NULL;
806 /* Given return basic block RETURN_BB, see where return bounds are really
807 stored. */
808 static tree
809 find_retbnd (basic_block return_bb)
811 gimple_stmt_iterator bsi;
812 for (bsi = gsi_last_bb (return_bb); !gsi_end_p (bsi); gsi_prev (&bsi))
813 if (gimple_code (gsi_stmt (bsi)) == GIMPLE_RETURN)
814 return gimple_return_retbnd (gsi_stmt (bsi));
815 return NULL;
818 /* Callback for walk_stmt_load_store_addr_ops. If T is non-SSA automatic
819 variable, mark it as used in bitmap passed via DATA.
820 Return true when access to T prevents splitting the function. */
822 static bool
823 mark_nonssa_use (gimple, tree t, tree, void *data)
825 t = get_base_address (t);
827 if (!t || is_gimple_reg (t))
828 return false;
830 /* At present we can't pass non-SSA arguments to split function.
831 FIXME: this can be relaxed by passing references to arguments. */
832 if (TREE_CODE (t) == PARM_DECL)
834 if (dump_file && (dump_flags & TDF_DETAILS))
835 fprintf (dump_file,
836 "Cannot split: use of non-ssa function parameter.\n");
837 return true;
840 if ((TREE_CODE (t) == VAR_DECL
841 && auto_var_in_fn_p (t, current_function_decl))
842 || TREE_CODE (t) == RESULT_DECL
843 || (TREE_CODE (t) == LABEL_DECL
844 && FORCED_LABEL (t)))
845 bitmap_set_bit ((bitmap)data, DECL_UID (t));
847 /* For DECL_BY_REFERENCE, the return value is actually a pointer. We want
848 to pretend that the value pointed to is actual result decl. */
849 if ((TREE_CODE (t) == MEM_REF || INDIRECT_REF_P (t))
850 && TREE_CODE (TREE_OPERAND (t, 0)) == SSA_NAME
851 && SSA_NAME_VAR (TREE_OPERAND (t, 0))
852 && TREE_CODE (SSA_NAME_VAR (TREE_OPERAND (t, 0))) == RESULT_DECL
853 && DECL_BY_REFERENCE (DECL_RESULT (current_function_decl)))
854 return
855 bitmap_bit_p ((bitmap)data,
856 DECL_UID (DECL_RESULT (current_function_decl)));
858 return false;
861 /* Compute local properties of basic block BB we collect when looking for
862 split points. We look for ssa defs and store them in SET_SSA_NAMES,
863 for ssa uses and store them in USED_SSA_NAMES and for any non-SSA automatic
864 vars stored in NON_SSA_VARS.
866 When BB has edge to RETURN_BB, collect uses in RETURN_BB too.
868 Return false when BB contains something that prevents it from being put into
869 split function. */
871 static bool
872 visit_bb (basic_block bb, basic_block return_bb,
873 bitmap set_ssa_names, bitmap used_ssa_names,
874 bitmap non_ssa_vars)
876 edge e;
877 edge_iterator ei;
878 bool can_split = true;
880 for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi);
881 gsi_next (&bsi))
883 gimple stmt = gsi_stmt (bsi);
884 tree op;
885 ssa_op_iter iter;
886 tree decl;
888 if (is_gimple_debug (stmt))
889 continue;
891 if (gimple_clobber_p (stmt))
892 continue;
894 /* FIXME: We can split regions containing EH. We can not however
895 split RESX, EH_DISPATCH and EH_POINTER referring to same region
896 into different partitions. This would require tracking of
897 EH regions and checking in consider_split_point if they
898 are not used elsewhere. */
899 if (gimple_code (stmt) == GIMPLE_RESX)
901 if (dump_file && (dump_flags & TDF_DETAILS))
902 fprintf (dump_file, "Cannot split: resx.\n");
903 can_split = false;
905 if (gimple_code (stmt) == GIMPLE_EH_DISPATCH)
907 if (dump_file && (dump_flags & TDF_DETAILS))
908 fprintf (dump_file, "Cannot split: eh dispatch.\n");
909 can_split = false;
912 /* Check builtins that prevent splitting. */
913 if (gimple_code (stmt) == GIMPLE_CALL
914 && (decl = gimple_call_fndecl (stmt)) != NULL_TREE
915 && DECL_BUILT_IN (decl)
916 && DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL)
917 switch (DECL_FUNCTION_CODE (decl))
919 /* FIXME: once we will allow passing non-parm values to split part,
920 we need to be sure to handle correct builtin_stack_save and
921 builtin_stack_restore. At the moment we are safe; there is no
922 way to store builtin_stack_save result in non-SSA variable
923 since all calls to those are compiler generated. */
924 case BUILT_IN_APPLY:
925 case BUILT_IN_APPLY_ARGS:
926 case BUILT_IN_VA_START:
927 if (dump_file && (dump_flags & TDF_DETAILS))
928 fprintf (dump_file,
929 "Cannot split: builtin_apply and va_start.\n");
930 can_split = false;
931 break;
932 case BUILT_IN_EH_POINTER:
933 if (dump_file && (dump_flags & TDF_DETAILS))
934 fprintf (dump_file, "Cannot split: builtin_eh_pointer.\n");
935 can_split = false;
936 break;
937 default:
938 break;
941 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_DEF)
942 bitmap_set_bit (set_ssa_names, SSA_NAME_VERSION (op));
943 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
944 bitmap_set_bit (used_ssa_names, SSA_NAME_VERSION (op));
945 can_split &= !walk_stmt_load_store_addr_ops (stmt, non_ssa_vars,
946 mark_nonssa_use,
947 mark_nonssa_use,
948 mark_nonssa_use);
950 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);
951 gsi_next (&bsi))
953 gphi *stmt = bsi.phi ();
954 unsigned int i;
956 if (virtual_operand_p (gimple_phi_result (stmt)))
957 continue;
958 bitmap_set_bit (set_ssa_names,
959 SSA_NAME_VERSION (gimple_phi_result (stmt)));
960 for (i = 0; i < gimple_phi_num_args (stmt); i++)
962 tree op = gimple_phi_arg_def (stmt, i);
963 if (TREE_CODE (op) == SSA_NAME)
964 bitmap_set_bit (used_ssa_names, SSA_NAME_VERSION (op));
966 can_split &= !walk_stmt_load_store_addr_ops (stmt, non_ssa_vars,
967 mark_nonssa_use,
968 mark_nonssa_use,
969 mark_nonssa_use);
971 /* Record also uses coming from PHI operand in return BB. */
972 FOR_EACH_EDGE (e, ei, bb->succs)
973 if (e->dest == return_bb)
975 for (gphi_iterator bsi = gsi_start_phis (return_bb);
976 !gsi_end_p (bsi);
977 gsi_next (&bsi))
979 gphi *stmt = bsi.phi ();
980 tree op = gimple_phi_arg_def (stmt, e->dest_idx);
982 if (virtual_operand_p (gimple_phi_result (stmt)))
983 continue;
984 if (TREE_CODE (op) == SSA_NAME)
985 bitmap_set_bit (used_ssa_names, SSA_NAME_VERSION (op));
986 else
987 can_split &= !mark_nonssa_use (stmt, op, op, non_ssa_vars);
990 return can_split;
993 /* Stack entry for recursive DFS walk in find_split_point. */
995 typedef struct
997 /* Basic block we are examining. */
998 basic_block bb;
1000 /* SSA names set and used by the BB and all BBs reachable
1001 from it via DFS walk. */
1002 bitmap set_ssa_names, used_ssa_names;
1003 bitmap non_ssa_vars;
1005 /* All BBS visited from this BB via DFS walk. */
1006 bitmap bbs_visited;
1008 /* Last examined edge in DFS walk. Since we walk unoriented graph,
1009 the value is up to sum of incoming and outgoing edges of BB. */
1010 unsigned int edge_num;
1012 /* Stack entry index of earliest BB reachable from current BB
1013 or any BB visited later in DFS walk. */
1014 int earliest;
1016 /* Overall time and size of all BBs reached from this BB in DFS walk. */
1017 int overall_time, overall_size;
1019 /* When false we can not split on this BB. */
1020 bool can_split;
1021 } stack_entry;
1024 /* Find all articulations and call consider_split on them.
1025 OVERALL_TIME and OVERALL_SIZE is time and size of the function.
1027 We perform basic algorithm for finding an articulation in a graph
1028 created from CFG by considering it to be an unoriented graph.
1030 The articulation is discovered via DFS walk. We collect earliest
1031 basic block on stack that is reachable via backward edge. Articulation
1032 is any basic block such that there is no backward edge bypassing it.
1033 To reduce stack usage we maintain heap allocated stack in STACK vector.
1034 AUX pointer of BB is set to index it appears in the stack or -1 once
1035 it is visited and popped off the stack.
1037 The algorithm finds articulation after visiting the whole component
1038 reachable by it. This makes it convenient to collect information about
1039 the component used by consider_split. */
1041 static void
1042 find_split_points (int overall_time, int overall_size)
1044 stack_entry first;
1045 vec<stack_entry> stack = vNULL;
1046 basic_block bb;
1047 basic_block return_bb = find_return_bb ();
1048 struct split_point current;
1050 current.header_time = overall_time;
1051 current.header_size = overall_size;
1052 current.split_time = 0;
1053 current.split_size = 0;
1054 current.ssa_names_to_pass = BITMAP_ALLOC (NULL);
1056 first.bb = ENTRY_BLOCK_PTR_FOR_FN (cfun);
1057 first.edge_num = 0;
1058 first.overall_time = 0;
1059 first.overall_size = 0;
1060 first.earliest = INT_MAX;
1061 first.set_ssa_names = 0;
1062 first.used_ssa_names = 0;
1063 first.non_ssa_vars = 0;
1064 first.bbs_visited = 0;
1065 first.can_split = false;
1066 stack.safe_push (first);
1067 ENTRY_BLOCK_PTR_FOR_FN (cfun)->aux = (void *)(intptr_t)-1;
1069 while (!stack.is_empty ())
1071 stack_entry *entry = &stack.last ();
1073 /* We are walking an acyclic graph, so edge_num counts
1074 succ and pred edges together. However when considering
1075 articulation, we want to have processed everything reachable
1076 from articulation but nothing that reaches into it. */
1077 if (entry->edge_num == EDGE_COUNT (entry->bb->succs)
1078 && entry->bb != ENTRY_BLOCK_PTR_FOR_FN (cfun))
1080 int pos = stack.length ();
1081 entry->can_split &= visit_bb (entry->bb, return_bb,
1082 entry->set_ssa_names,
1083 entry->used_ssa_names,
1084 entry->non_ssa_vars);
1085 if (pos <= entry->earliest && !entry->can_split
1086 && dump_file && (dump_flags & TDF_DETAILS))
1087 fprintf (dump_file,
1088 "found articulation at bb %i but can not split\n",
1089 entry->bb->index);
1090 if (pos <= entry->earliest && entry->can_split)
1092 if (dump_file && (dump_flags & TDF_DETAILS))
1093 fprintf (dump_file, "found articulation at bb %i\n",
1094 entry->bb->index);
1095 current.entry_bb = entry->bb;
1096 current.ssa_names_to_pass = BITMAP_ALLOC (NULL);
1097 bitmap_and_compl (current.ssa_names_to_pass,
1098 entry->used_ssa_names, entry->set_ssa_names);
1099 current.header_time = overall_time - entry->overall_time;
1100 current.header_size = overall_size - entry->overall_size;
1101 current.split_time = entry->overall_time;
1102 current.split_size = entry->overall_size;
1103 current.split_bbs = entry->bbs_visited;
1104 consider_split (&current, entry->non_ssa_vars, return_bb);
1105 BITMAP_FREE (current.ssa_names_to_pass);
1108 /* Do actual DFS walk. */
1109 if (entry->edge_num
1110 < (EDGE_COUNT (entry->bb->succs)
1111 + EDGE_COUNT (entry->bb->preds)))
1113 edge e;
1114 basic_block dest;
1115 if (entry->edge_num < EDGE_COUNT (entry->bb->succs))
1117 e = EDGE_SUCC (entry->bb, entry->edge_num);
1118 dest = e->dest;
1120 else
1122 e = EDGE_PRED (entry->bb, entry->edge_num
1123 - EDGE_COUNT (entry->bb->succs));
1124 dest = e->src;
1127 entry->edge_num++;
1129 /* New BB to visit, push it to the stack. */
1130 if (dest != return_bb && dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
1131 && !dest->aux)
1133 stack_entry new_entry;
1135 new_entry.bb = dest;
1136 new_entry.edge_num = 0;
1137 new_entry.overall_time
1138 = bb_info_vec[dest->index].time;
1139 new_entry.overall_size
1140 = bb_info_vec[dest->index].size;
1141 new_entry.earliest = INT_MAX;
1142 new_entry.set_ssa_names = BITMAP_ALLOC (NULL);
1143 new_entry.used_ssa_names = BITMAP_ALLOC (NULL);
1144 new_entry.bbs_visited = BITMAP_ALLOC (NULL);
1145 new_entry.non_ssa_vars = BITMAP_ALLOC (NULL);
1146 new_entry.can_split = true;
1147 bitmap_set_bit (new_entry.bbs_visited, dest->index);
1148 stack.safe_push (new_entry);
1149 dest->aux = (void *)(intptr_t)stack.length ();
1151 /* Back edge found, record the earliest point. */
1152 else if ((intptr_t)dest->aux > 0
1153 && (intptr_t)dest->aux < entry->earliest)
1154 entry->earliest = (intptr_t)dest->aux;
1156 /* We are done with examining the edges. Pop off the value from stack
1157 and merge stuff we accumulate during the walk. */
1158 else if (entry->bb != ENTRY_BLOCK_PTR_FOR_FN (cfun))
1160 stack_entry *prev = &stack[stack.length () - 2];
1162 entry->bb->aux = (void *)(intptr_t)-1;
1163 prev->can_split &= entry->can_split;
1164 if (prev->set_ssa_names)
1166 bitmap_ior_into (prev->set_ssa_names, entry->set_ssa_names);
1167 bitmap_ior_into (prev->used_ssa_names, entry->used_ssa_names);
1168 bitmap_ior_into (prev->bbs_visited, entry->bbs_visited);
1169 bitmap_ior_into (prev->non_ssa_vars, entry->non_ssa_vars);
1171 if (prev->earliest > entry->earliest)
1172 prev->earliest = entry->earliest;
1173 prev->overall_time += entry->overall_time;
1174 prev->overall_size += entry->overall_size;
1175 BITMAP_FREE (entry->set_ssa_names);
1176 BITMAP_FREE (entry->used_ssa_names);
1177 BITMAP_FREE (entry->bbs_visited);
1178 BITMAP_FREE (entry->non_ssa_vars);
1179 stack.pop ();
1181 else
1182 stack.pop ();
1184 ENTRY_BLOCK_PTR_FOR_FN (cfun)->aux = NULL;
1185 FOR_EACH_BB_FN (bb, cfun)
1186 bb->aux = NULL;
1187 stack.release ();
1188 BITMAP_FREE (current.ssa_names_to_pass);
1191 /* Build and insert initialization of returned bounds RETBND
1192 for returned value RETVAL. Statements are inserted after
1193 a statement pointed by GSI and GSI is modified to point to
1194 the last inserted statement. */
1196 static void
1197 insert_bndret_call_after (tree retbnd, tree retval, gimple_stmt_iterator *gsi)
1199 tree fndecl = targetm.builtin_chkp_function (BUILT_IN_CHKP_BNDRET);
1200 gimple bndret = gimple_build_call (fndecl, 1, retval);
1201 gimple_call_set_lhs (bndret, retbnd);
1202 gsi_insert_after (gsi, bndret, GSI_CONTINUE_LINKING);
1204 /* Split function at SPLIT_POINT. */
1206 static void
1207 split_function (struct split_point *split_point)
1209 vec<tree> args_to_pass = vNULL;
1210 bitmap args_to_skip;
1211 tree parm;
1212 int num = 0;
1213 cgraph_node *node, *cur_node = cgraph_node::get (current_function_decl);
1214 basic_block return_bb = find_return_bb ();
1215 basic_block call_bb;
1216 gcall *call;
1217 edge e;
1218 edge_iterator ei;
1219 tree retval = NULL, real_retval = NULL, retbnd = NULL;
1220 bool split_part_return_p = false;
1221 bool with_bounds = chkp_function_instrumented_p (current_function_decl);
1222 gimple last_stmt = NULL;
1223 unsigned int i;
1224 tree arg, ddef;
1225 vec<tree, va_gc> **debug_args = NULL;
1227 if (dump_file)
1229 fprintf (dump_file, "\n\nSplitting function at:\n");
1230 dump_split_point (dump_file, split_point);
1233 if (cur_node->local.can_change_signature)
1234 args_to_skip = BITMAP_ALLOC (NULL);
1235 else
1236 args_to_skip = NULL;
1238 /* Collect the parameters of new function and args_to_skip bitmap. */
1239 for (parm = DECL_ARGUMENTS (current_function_decl);
1240 parm; parm = DECL_CHAIN (parm), num++)
1241 if (args_to_skip
1242 && (!is_gimple_reg (parm)
1243 || (ddef = ssa_default_def (cfun, parm)) == NULL_TREE
1244 || !bitmap_bit_p (split_point->ssa_names_to_pass,
1245 SSA_NAME_VERSION (ddef))))
1246 bitmap_set_bit (args_to_skip, num);
1247 else
1249 /* This parm might not have been used up to now, but is going to be
1250 used, hence register it. */
1251 if (is_gimple_reg (parm))
1252 arg = get_or_create_ssa_default_def (cfun, parm);
1253 else
1254 arg = parm;
1256 if (!useless_type_conversion_p (DECL_ARG_TYPE (parm), TREE_TYPE (arg)))
1257 arg = fold_convert (DECL_ARG_TYPE (parm), arg);
1258 args_to_pass.safe_push (arg);
1261 /* See if the split function will return. */
1262 FOR_EACH_EDGE (e, ei, return_bb->preds)
1263 if (bitmap_bit_p (split_point->split_bbs, e->src->index))
1264 break;
1265 if (e)
1266 split_part_return_p = true;
1268 /* Add return block to what will become the split function.
1269 We do not return; no return block is needed. */
1270 if (!split_part_return_p)
1272 /* We have no return block, so nothing is needed. */
1273 else if (return_bb == EXIT_BLOCK_PTR_FOR_FN (cfun))
1275 /* When we do not want to return value, we need to construct
1276 new return block with empty return statement.
1277 FIXME: Once we are able to change return type, we should change function
1278 to return void instead of just outputting function with undefined return
1279 value. For structures this affects quality of codegen. */
1280 else if (!split_point->split_part_set_retval
1281 && find_retval (return_bb))
1283 bool redirected = true;
1284 basic_block new_return_bb = create_basic_block (NULL, 0, return_bb);
1285 gimple_stmt_iterator gsi = gsi_start_bb (new_return_bb);
1286 gsi_insert_after (&gsi, gimple_build_return (NULL), GSI_NEW_STMT);
1287 while (redirected)
1289 redirected = false;
1290 FOR_EACH_EDGE (e, ei, return_bb->preds)
1291 if (bitmap_bit_p (split_point->split_bbs, e->src->index))
1293 new_return_bb->count += e->count;
1294 new_return_bb->frequency += EDGE_FREQUENCY (e);
1295 redirect_edge_and_branch (e, new_return_bb);
1296 redirected = true;
1297 break;
1300 e = make_edge (new_return_bb, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
1301 e->probability = REG_BR_PROB_BASE;
1302 e->count = new_return_bb->count;
1303 add_bb_to_loop (new_return_bb, current_loops->tree_root);
1304 bitmap_set_bit (split_point->split_bbs, new_return_bb->index);
1306 /* When we pass around the value, use existing return block. */
1307 else
1308 bitmap_set_bit (split_point->split_bbs, return_bb->index);
1310 /* If RETURN_BB has virtual operand PHIs, they must be removed and the
1311 virtual operand marked for renaming as we change the CFG in a way that
1312 tree-inline is not able to compensate for.
1314 Note this can happen whether or not we have a return value. If we have
1315 a return value, then RETURN_BB may have PHIs for real operands too. */
1316 if (return_bb != EXIT_BLOCK_PTR_FOR_FN (cfun))
1318 bool phi_p = false;
1319 for (gphi_iterator gsi = gsi_start_phis (return_bb);
1320 !gsi_end_p (gsi);)
1322 gphi *stmt = gsi.phi ();
1323 if (!virtual_operand_p (gimple_phi_result (stmt)))
1325 gsi_next (&gsi);
1326 continue;
1328 mark_virtual_phi_result_for_renaming (stmt);
1329 remove_phi_node (&gsi, true);
1330 phi_p = true;
1332 /* In reality we have to rename the reaching definition of the
1333 virtual operand at return_bb as we will eventually release it
1334 when we remove the code region we outlined.
1335 So we have to rename all immediate virtual uses of that region
1336 if we didn't see a PHI definition yet. */
1337 /* ??? In real reality we want to set the reaching vdef of the
1338 entry of the SESE region as the vuse of the call and the reaching
1339 vdef of the exit of the SESE region as the vdef of the call. */
1340 if (!phi_p)
1341 for (gimple_stmt_iterator gsi = gsi_start_bb (return_bb);
1342 !gsi_end_p (gsi);
1343 gsi_next (&gsi))
1345 gimple stmt = gsi_stmt (gsi);
1346 if (gimple_vuse (stmt))
1348 gimple_set_vuse (stmt, NULL_TREE);
1349 update_stmt (stmt);
1351 if (gimple_vdef (stmt))
1352 break;
1356 /* Now create the actual clone. */
1357 cgraph_edge::rebuild_edges ();
1358 node = cur_node->create_version_clone_with_body
1359 (vNULL, NULL, args_to_skip, !split_part_return_p, split_point->split_bbs,
1360 split_point->entry_bb, "part");
1362 /* Let's take a time profile for splitted function. */
1363 node->tp_first_run = cur_node->tp_first_run + 1;
1365 /* For usual cloning it is enough to clear builtin only when signature
1366 changes. For partial inlining we however can not expect the part
1367 of builtin implementation to have same semantic as the whole. */
1368 if (DECL_BUILT_IN (node->decl))
1370 DECL_BUILT_IN_CLASS (node->decl) = NOT_BUILT_IN;
1371 DECL_FUNCTION_CODE (node->decl) = (enum built_in_function) 0;
1374 /* If the original function is instrumented then it's
1375 part is also instrumented. */
1376 if (with_bounds)
1377 chkp_function_mark_instrumented (node->decl);
1379 /* If the original function is declared inline, there is no point in issuing
1380 a warning for the non-inlinable part. */
1381 DECL_NO_INLINE_WARNING_P (node->decl) = 1;
1382 cur_node->remove_callees ();
1383 cur_node->remove_all_references ();
1384 if (!split_part_return_p)
1385 TREE_THIS_VOLATILE (node->decl) = 1;
1386 if (dump_file)
1387 dump_function_to_file (node->decl, dump_file, dump_flags);
1389 /* Create the basic block we place call into. It is the entry basic block
1390 split after last label. */
1391 call_bb = split_point->entry_bb;
1392 for (gimple_stmt_iterator gsi = gsi_start_bb (call_bb); !gsi_end_p (gsi);)
1393 if (gimple_code (gsi_stmt (gsi)) == GIMPLE_LABEL)
1395 last_stmt = gsi_stmt (gsi);
1396 gsi_next (&gsi);
1398 else
1399 break;
1400 e = split_block (split_point->entry_bb, last_stmt);
1401 remove_edge (e);
1403 /* Produce the call statement. */
1404 gimple_stmt_iterator gsi = gsi_last_bb (call_bb);
1405 FOR_EACH_VEC_ELT (args_to_pass, i, arg)
1406 if (!is_gimple_val (arg))
1408 arg = force_gimple_operand_gsi (&gsi, arg, true, NULL_TREE,
1409 false, GSI_CONTINUE_LINKING);
1410 args_to_pass[i] = arg;
1412 call = gimple_build_call_vec (node->decl, args_to_pass);
1413 gimple_call_set_with_bounds (call, with_bounds);
1414 gimple_set_block (call, DECL_INITIAL (current_function_decl));
1415 args_to_pass.release ();
1417 /* For optimized away parameters, add on the caller side
1418 before the call
1419 DEBUG D#X => parm_Y(D)
1420 stmts and associate D#X with parm in decl_debug_args_lookup
1421 vector to say for debug info that if parameter parm had been passed,
1422 it would have value parm_Y(D). */
1423 if (args_to_skip)
1424 for (parm = DECL_ARGUMENTS (current_function_decl), num = 0;
1425 parm; parm = DECL_CHAIN (parm), num++)
1426 if (bitmap_bit_p (args_to_skip, num)
1427 && is_gimple_reg (parm))
1429 tree ddecl;
1430 gimple def_temp;
1432 /* This needs to be done even without MAY_HAVE_DEBUG_STMTS,
1433 otherwise if it didn't exist before, we'd end up with
1434 different SSA_NAME_VERSIONs between -g and -g0. */
1435 arg = get_or_create_ssa_default_def (cfun, parm);
1436 if (!MAY_HAVE_DEBUG_STMTS)
1437 continue;
1439 if (debug_args == NULL)
1440 debug_args = decl_debug_args_insert (node->decl);
1441 ddecl = make_node (DEBUG_EXPR_DECL);
1442 DECL_ARTIFICIAL (ddecl) = 1;
1443 TREE_TYPE (ddecl) = TREE_TYPE (parm);
1444 DECL_MODE (ddecl) = DECL_MODE (parm);
1445 vec_safe_push (*debug_args, DECL_ORIGIN (parm));
1446 vec_safe_push (*debug_args, ddecl);
1447 def_temp = gimple_build_debug_bind (ddecl, unshare_expr (arg),
1448 call);
1449 gsi_insert_after (&gsi, def_temp, GSI_NEW_STMT);
1451 /* And on the callee side, add
1452 DEBUG D#Y s=> parm
1453 DEBUG var => D#Y
1454 stmts to the first bb where var is a VAR_DECL created for the
1455 optimized away parameter in DECL_INITIAL block. This hints
1456 in the debug info that var (whole DECL_ORIGIN is the parm PARM_DECL)
1457 is optimized away, but could be looked up at the call site
1458 as value of D#X there. */
1459 if (debug_args != NULL)
1461 unsigned int i;
1462 tree var, vexpr;
1463 gimple_stmt_iterator cgsi;
1464 gimple def_temp;
1466 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
1467 var = BLOCK_VARS (DECL_INITIAL (node->decl));
1468 i = vec_safe_length (*debug_args);
1469 cgsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1472 i -= 2;
1473 while (var != NULL_TREE
1474 && DECL_ABSTRACT_ORIGIN (var) != (**debug_args)[i])
1475 var = TREE_CHAIN (var);
1476 if (var == NULL_TREE)
1477 break;
1478 vexpr = make_node (DEBUG_EXPR_DECL);
1479 parm = (**debug_args)[i];
1480 DECL_ARTIFICIAL (vexpr) = 1;
1481 TREE_TYPE (vexpr) = TREE_TYPE (parm);
1482 DECL_MODE (vexpr) = DECL_MODE (parm);
1483 def_temp = gimple_build_debug_source_bind (vexpr, parm,
1484 NULL);
1485 gsi_insert_before (&cgsi, def_temp, GSI_SAME_STMT);
1486 def_temp = gimple_build_debug_bind (var, vexpr, NULL);
1487 gsi_insert_before (&cgsi, def_temp, GSI_SAME_STMT);
1489 while (i);
1490 pop_cfun ();
1493 /* We avoid address being taken on any variable used by split part,
1494 so return slot optimization is always possible. Moreover this is
1495 required to make DECL_BY_REFERENCE work. */
1496 if (aggregate_value_p (DECL_RESULT (current_function_decl),
1497 TREE_TYPE (current_function_decl))
1498 && (!is_gimple_reg_type (TREE_TYPE (DECL_RESULT (current_function_decl)))
1499 || DECL_BY_REFERENCE (DECL_RESULT (current_function_decl))))
1500 gimple_call_set_return_slot_opt (call, true);
1502 /* Update return value. This is bit tricky. When we do not return,
1503 do nothing. When we return we might need to update return_bb
1504 or produce a new return statement. */
1505 if (!split_part_return_p)
1506 gsi_insert_after (&gsi, call, GSI_NEW_STMT);
1507 else
1509 e = make_edge (call_bb, return_bb,
1510 return_bb == EXIT_BLOCK_PTR_FOR_FN (cfun)
1511 ? 0 : EDGE_FALLTHRU);
1512 e->count = call_bb->count;
1513 e->probability = REG_BR_PROB_BASE;
1515 /* If there is return basic block, see what value we need to store
1516 return value into and put call just before it. */
1517 if (return_bb != EXIT_BLOCK_PTR_FOR_FN (cfun))
1519 real_retval = retval = find_retval (return_bb);
1520 retbnd = find_retbnd (return_bb);
1522 if (real_retval && split_point->split_part_set_retval)
1524 gphi_iterator psi;
1526 /* See if we need new SSA_NAME for the result.
1527 When DECL_BY_REFERENCE is true, retval is actually pointer to
1528 return value and it is constant in whole function. */
1529 if (TREE_CODE (retval) == SSA_NAME
1530 && !DECL_BY_REFERENCE (DECL_RESULT (current_function_decl)))
1532 retval = copy_ssa_name (retval, call);
1534 /* See if there is PHI defining return value. */
1535 for (psi = gsi_start_phis (return_bb);
1536 !gsi_end_p (psi); gsi_next (&psi))
1537 if (!virtual_operand_p (gimple_phi_result (psi.phi ())))
1538 break;
1540 /* When there is PHI, just update its value. */
1541 if (TREE_CODE (retval) == SSA_NAME
1542 && !gsi_end_p (psi))
1543 add_phi_arg (psi.phi (), retval, e, UNKNOWN_LOCATION);
1544 /* Otherwise update the return BB itself.
1545 find_return_bb allows at most one assignment to return value,
1546 so update first statement. */
1547 else
1549 gimple_stmt_iterator bsi;
1550 for (bsi = gsi_start_bb (return_bb); !gsi_end_p (bsi);
1551 gsi_next (&bsi))
1552 if (greturn *return_stmt
1553 = dyn_cast <greturn *> (gsi_stmt (bsi)))
1555 gimple_return_set_retval (return_stmt, retval);
1556 break;
1558 else if (gimple_code (gsi_stmt (bsi)) == GIMPLE_ASSIGN
1559 && !gimple_clobber_p (gsi_stmt (bsi)))
1561 gimple_assign_set_rhs1 (gsi_stmt (bsi), retval);
1562 break;
1564 update_stmt (gsi_stmt (bsi));
1567 /* Replace retbnd with new one. */
1568 if (retbnd)
1570 gimple_stmt_iterator bsi;
1571 for (bsi = gsi_last_bb (return_bb); !gsi_end_p (bsi);
1572 gsi_prev (&bsi))
1573 if (gimple_code (gsi_stmt (bsi)) == GIMPLE_RETURN)
1575 retbnd = copy_ssa_name (retbnd, call);
1576 gimple_return_set_retbnd (gsi_stmt (bsi), retbnd);
1577 update_stmt (gsi_stmt (bsi));
1578 break;
1582 if (DECL_BY_REFERENCE (DECL_RESULT (current_function_decl)))
1584 gimple_call_set_lhs (call, build_simple_mem_ref (retval));
1585 gsi_insert_after (&gsi, call, GSI_NEW_STMT);
1587 else
1589 tree restype;
1590 restype = TREE_TYPE (DECL_RESULT (current_function_decl));
1591 gsi_insert_after (&gsi, call, GSI_NEW_STMT);
1592 if (!useless_type_conversion_p (TREE_TYPE (retval), restype))
1594 gimple cpy;
1595 tree tem = create_tmp_reg (restype);
1596 tem = make_ssa_name (tem, call);
1597 cpy = gimple_build_assign (retval, NOP_EXPR, tem);
1598 gsi_insert_after (&gsi, cpy, GSI_NEW_STMT);
1599 retval = tem;
1601 /* Build bndret call to obtain returned bounds. */
1602 if (retbnd)
1603 insert_bndret_call_after (retbnd, retval, &gsi);
1604 gimple_call_set_lhs (call, retval);
1605 update_stmt (call);
1608 else
1609 gsi_insert_after (&gsi, call, GSI_NEW_STMT);
1611 /* We don't use return block (there is either no return in function or
1612 multiple of them). So create new basic block with return statement.
1614 else
1616 greturn *ret;
1617 if (split_point->split_part_set_retval
1618 && !VOID_TYPE_P (TREE_TYPE (TREE_TYPE (current_function_decl))))
1620 retval = DECL_RESULT (current_function_decl);
1622 if (chkp_function_instrumented_p (current_function_decl)
1623 && BOUNDED_P (retval))
1624 retbnd = create_tmp_reg (pointer_bounds_type_node);
1626 /* We use temporary register to hold value when aggregate_value_p
1627 is false. Similarly for DECL_BY_REFERENCE we must avoid extra
1628 copy. */
1629 if (!aggregate_value_p (retval, TREE_TYPE (current_function_decl))
1630 && !DECL_BY_REFERENCE (retval))
1631 retval = create_tmp_reg (TREE_TYPE (retval));
1632 if (is_gimple_reg (retval))
1634 /* When returning by reference, there is only one SSA name
1635 assigned to RESULT_DECL (that is pointer to return value).
1636 Look it up or create new one if it is missing. */
1637 if (DECL_BY_REFERENCE (retval))
1638 retval = get_or_create_ssa_default_def (cfun, retval);
1639 /* Otherwise produce new SSA name for return value. */
1640 else
1641 retval = make_ssa_name (retval, call);
1643 if (DECL_BY_REFERENCE (DECL_RESULT (current_function_decl)))
1644 gimple_call_set_lhs (call, build_simple_mem_ref (retval));
1645 else
1646 gimple_call_set_lhs (call, retval);
1648 gsi_insert_after (&gsi, call, GSI_NEW_STMT);
1649 /* Build bndret call to obtain returned bounds. */
1650 if (retbnd)
1651 insert_bndret_call_after (retbnd, retval, &gsi);
1652 ret = gimple_build_return (retval);
1653 gsi_insert_after (&gsi, ret, GSI_NEW_STMT);
1656 free_dominance_info (CDI_DOMINATORS);
1657 free_dominance_info (CDI_POST_DOMINATORS);
1658 compute_inline_parameters (node, true);
1661 /* Execute function splitting pass. */
1663 static unsigned int
1664 execute_split_functions (void)
1666 gimple_stmt_iterator bsi;
1667 basic_block bb;
1668 int overall_time = 0, overall_size = 0;
1669 int todo = 0;
1670 struct cgraph_node *node = cgraph_node::get (current_function_decl);
1672 if (flags_from_decl_or_type (current_function_decl)
1673 & (ECF_NORETURN|ECF_MALLOC))
1675 if (dump_file)
1676 fprintf (dump_file, "Not splitting: noreturn/malloc function.\n");
1677 return 0;
1679 if (MAIN_NAME_P (DECL_NAME (current_function_decl)))
1681 if (dump_file)
1682 fprintf (dump_file, "Not splitting: main function.\n");
1683 return 0;
1685 /* This can be relaxed; function might become inlinable after splitting
1686 away the uninlinable part. */
1687 if (inline_edge_summary_vec.exists ()
1688 && !inline_summaries->get (node)->inlinable)
1690 if (dump_file)
1691 fprintf (dump_file, "Not splitting: not inlinable.\n");
1692 return 0;
1694 if (DECL_DISREGARD_INLINE_LIMITS (node->decl))
1696 if (dump_file)
1697 fprintf (dump_file, "Not splitting: disregarding inline limits.\n");
1698 return 0;
1700 /* This can be relaxed; most of versioning tests actually prevents
1701 a duplication. */
1702 if (!tree_versionable_function_p (current_function_decl))
1704 if (dump_file)
1705 fprintf (dump_file, "Not splitting: not versionable.\n");
1706 return 0;
1708 /* FIXME: we could support this. */
1709 if (DECL_STRUCT_FUNCTION (current_function_decl)->static_chain_decl)
1711 if (dump_file)
1712 fprintf (dump_file, "Not splitting: nested function.\n");
1713 return 0;
1716 /* See if it makes sense to try to split.
1717 It makes sense to split if we inline, that is if we have direct calls to
1718 handle or direct calls are possibly going to appear as result of indirect
1719 inlining or LTO. Also handle -fprofile-generate as LTO to allow non-LTO
1720 training for LTO -fprofile-use build.
1722 Note that we are not completely conservative about disqualifying functions
1723 called once. It is possible that the caller is called more then once and
1724 then inlining would still benefit. */
1725 if ((!node->callers
1726 /* Local functions called once will be completely inlined most of time. */
1727 || (!node->callers->next_caller && node->local.local))
1728 && !node->address_taken
1729 && (!flag_lto || !node->externally_visible))
1731 if (dump_file)
1732 fprintf (dump_file, "Not splitting: not called directly "
1733 "or called once.\n");
1734 return 0;
1737 /* FIXME: We can actually split if splitting reduces call overhead. */
1738 if (!flag_inline_small_functions
1739 && !DECL_DECLARED_INLINE_P (current_function_decl))
1741 if (dump_file)
1742 fprintf (dump_file, "Not splitting: not autoinlining and function"
1743 " is not inline.\n");
1744 return 0;
1747 /* We enforce splitting after loop headers when profile info is not
1748 available. */
1749 if (profile_status_for_fn (cfun) != PROFILE_READ)
1750 mark_dfs_back_edges ();
1752 /* Initialize bitmap to track forbidden calls. */
1753 forbidden_dominators = BITMAP_ALLOC (NULL);
1754 calculate_dominance_info (CDI_DOMINATORS);
1756 /* Compute local info about basic blocks and determine function size/time. */
1757 bb_info_vec.safe_grow_cleared (last_basic_block_for_fn (cfun) + 1);
1758 memset (&best_split_point, 0, sizeof (best_split_point));
1759 FOR_EACH_BB_FN (bb, cfun)
1761 int time = 0;
1762 int size = 0;
1763 int freq = compute_call_stmt_bb_frequency (current_function_decl, bb);
1765 if (dump_file && (dump_flags & TDF_DETAILS))
1766 fprintf (dump_file, "Basic block %i\n", bb->index);
1768 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1770 int this_time, this_size;
1771 gimple stmt = gsi_stmt (bsi);
1773 this_size = estimate_num_insns (stmt, &eni_size_weights);
1774 this_time = estimate_num_insns (stmt, &eni_time_weights) * freq;
1775 size += this_size;
1776 time += this_time;
1777 check_forbidden_calls (stmt);
1779 if (dump_file && (dump_flags & TDF_DETAILS))
1781 fprintf (dump_file, " freq:%6i size:%3i time:%3i ",
1782 freq, this_size, this_time);
1783 print_gimple_stmt (dump_file, stmt, 0, 0);
1786 overall_time += time;
1787 overall_size += size;
1788 bb_info_vec[bb->index].time = time;
1789 bb_info_vec[bb->index].size = size;
1791 find_split_points (overall_time, overall_size);
1792 if (best_split_point.split_bbs)
1794 split_function (&best_split_point);
1795 BITMAP_FREE (best_split_point.ssa_names_to_pass);
1796 BITMAP_FREE (best_split_point.split_bbs);
1797 todo = TODO_update_ssa | TODO_cleanup_cfg;
1799 BITMAP_FREE (forbidden_dominators);
1800 bb_info_vec.release ();
1801 return todo;
1804 namespace {
1806 const pass_data pass_data_split_functions =
1808 GIMPLE_PASS, /* type */
1809 "fnsplit", /* name */
1810 OPTGROUP_NONE, /* optinfo_flags */
1811 TV_IPA_FNSPLIT, /* tv_id */
1812 PROP_cfg, /* properties_required */
1813 0, /* properties_provided */
1814 0, /* properties_destroyed */
1815 0, /* todo_flags_start */
1816 0, /* todo_flags_finish */
1819 class pass_split_functions : public gimple_opt_pass
1821 public:
1822 pass_split_functions (gcc::context *ctxt)
1823 : gimple_opt_pass (pass_data_split_functions, ctxt)
1826 /* opt_pass methods: */
1827 virtual bool gate (function *);
1828 virtual unsigned int execute (function *)
1830 return execute_split_functions ();
1833 }; // class pass_split_functions
1835 bool
1836 pass_split_functions::gate (function *)
1838 /* When doing profile feedback, we want to execute the pass after profiling
1839 is read. So disable one in early optimization. */
1840 return (flag_partial_inlining
1841 && !profile_arc_flag && !flag_branch_probabilities);
1844 } // anon namespace
1846 gimple_opt_pass *
1847 make_pass_split_functions (gcc::context *ctxt)
1849 return new pass_split_functions (ctxt);
1852 /* Execute function splitting pass. */
1854 static unsigned int
1855 execute_feedback_split_functions (void)
1857 unsigned int retval = execute_split_functions ();
1858 if (retval)
1859 retval |= TODO_rebuild_cgraph_edges;
1860 return retval;
1863 namespace {
1865 const pass_data pass_data_feedback_split_functions =
1867 GIMPLE_PASS, /* type */
1868 "feedback_fnsplit", /* name */
1869 OPTGROUP_NONE, /* optinfo_flags */
1870 TV_IPA_FNSPLIT, /* tv_id */
1871 PROP_cfg, /* properties_required */
1872 0, /* properties_provided */
1873 0, /* properties_destroyed */
1874 0, /* todo_flags_start */
1875 0, /* todo_flags_finish */
1878 class pass_feedback_split_functions : public gimple_opt_pass
1880 public:
1881 pass_feedback_split_functions (gcc::context *ctxt)
1882 : gimple_opt_pass (pass_data_feedback_split_functions, ctxt)
1885 /* opt_pass methods: */
1886 virtual bool gate (function *);
1887 virtual unsigned int execute (function *)
1889 return execute_feedback_split_functions ();
1892 }; // class pass_feedback_split_functions
1894 bool
1895 pass_feedback_split_functions::gate (function *)
1897 /* We don't need to split when profiling at all, we are producing
1898 lousy code anyway. */
1899 return (flag_partial_inlining
1900 && flag_branch_probabilities);
1903 } // anon namespace
1905 gimple_opt_pass *
1906 make_pass_feedback_split_functions (gcc::context *ctxt)
1908 return new pass_feedback_split_functions (ctxt);