2012-10-06 Janus Weil <janus@gcc.gnu.org>
[official-gcc.git] / gcc / ipa-split.c
blob6750c11381a7bbe7743162ad58d62bcb45bd0bf2
1 /* Function splitting pass
2 Copyright (C) 2010, 2011, 2012
3 Free Software Foundation, Inc.
4 Contributed by Jan Hubicka <jh@suse.cz>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 /* The purpose of this pass is to split function bodies to improve
23 inlining. I.e. for function of the form:
25 func (...)
27 if (cheap_test)
28 something_small
29 else
30 something_big
33 Produce:
35 func.part (...)
37 something_big
40 func (...)
42 if (cheap_test)
43 something_small
44 else
45 func.part (...);
48 When func becomes inlinable and when cheap_test is often true, inlining func,
49 but not fund.part leads to performance improvement similar as inlining
50 original func while the code size growth is smaller.
52 The pass is organized in three stages:
53 1) Collect local info about basic block into BB_INFO structure and
54 compute function body estimated size and time.
55 2) Via DFS walk find all possible basic blocks where we can split
56 and chose best one.
57 3) If split point is found, split at the specified BB by creating a clone
58 and updating function to call it.
60 The decisions what functions to split are in execute_split_functions
61 and consider_split.
63 There are several possible future improvements for this pass including:
65 1) Splitting to break up large functions
66 2) Splitting to reduce stack frame usage
67 3) Allow split part of function to use values computed in the header part.
68 The values needs to be passed to split function, perhaps via same
69 interface as for nested functions or as argument.
70 4) Support for simple rematerialization. I.e. when split part use
71 value computed in header from function parameter in very cheap way, we
72 can just recompute it.
73 5) Support splitting of nested functions.
74 6) Support non-SSA arguments.
75 7) There is nothing preventing us from producing multiple parts of single function
76 when needed or splitting also the parts. */
78 #include "config.h"
79 #include "system.h"
80 #include "coretypes.h"
81 #include "tree.h"
82 #include "target.h"
83 #include "cgraph.h"
84 #include "ipa-prop.h"
85 #include "tree-flow.h"
86 #include "tree-pass.h"
87 #include "flags.h"
88 #include "diagnostic.h"
89 #include "tree-dump.h"
90 #include "tree-inline.h"
91 #include "params.h"
92 #include "gimple-pretty-print.h"
93 #include "ipa-inline.h"
95 /* Per basic block info. */
97 typedef struct
99 unsigned int size;
100 unsigned int time;
101 } bb_info;
102 DEF_VEC_O(bb_info);
103 DEF_VEC_ALLOC_O(bb_info,heap);
105 static VEC(bb_info, heap) *bb_info_vec;
107 /* Description of split point. */
109 struct split_point
111 /* Size of the partitions. */
112 unsigned int header_time, header_size, split_time, split_size;
114 /* SSA names that need to be passed into spit function. */
115 bitmap ssa_names_to_pass;
117 /* Basic block where we split (that will become entry point of new function. */
118 basic_block entry_bb;
120 /* Basic blocks we are splitting away. */
121 bitmap split_bbs;
123 /* True when return value is computed on split part and thus it needs
124 to be returned. */
125 bool split_part_set_retval;
128 /* Best split point found. */
130 struct split_point best_split_point;
132 /* Set of basic blocks that are not allowed to dominate a split point. */
134 static bitmap forbidden_dominators;
136 static tree find_retval (basic_block return_bb);
138 /* Callback for walk_stmt_load_store_addr_ops. If T is non-SSA automatic
139 variable, check it if it is present in bitmap passed via DATA. */
141 static bool
142 test_nonssa_use (gimple stmt ATTRIBUTE_UNUSED, tree t, void *data)
144 t = get_base_address (t);
146 if (!t || is_gimple_reg (t))
147 return false;
149 if (TREE_CODE (t) == PARM_DECL
150 || (TREE_CODE (t) == VAR_DECL
151 && auto_var_in_fn_p (t, current_function_decl))
152 || TREE_CODE (t) == RESULT_DECL
153 || TREE_CODE (t) == LABEL_DECL)
154 return bitmap_bit_p ((bitmap)data, DECL_UID (t));
156 /* For DECL_BY_REFERENCE, the return value is actually a pointer. We want
157 to pretend that the value pointed to is actual result decl. */
158 if ((TREE_CODE (t) == MEM_REF || INDIRECT_REF_P (t))
159 && TREE_CODE (TREE_OPERAND (t, 0)) == SSA_NAME
160 && SSA_NAME_VAR (TREE_OPERAND (t, 0))
161 && TREE_CODE (SSA_NAME_VAR (TREE_OPERAND (t, 0))) == RESULT_DECL
162 && DECL_BY_REFERENCE (DECL_RESULT (current_function_decl)))
163 return
164 bitmap_bit_p ((bitmap)data,
165 DECL_UID (DECL_RESULT (current_function_decl)));
167 return false;
170 /* Dump split point CURRENT. */
172 static void
173 dump_split_point (FILE * file, struct split_point *current)
175 fprintf (file,
176 "Split point at BB %i\n"
177 " header time: %i header size: %i\n"
178 " split time: %i split size: %i\n bbs: ",
179 current->entry_bb->index, current->header_time,
180 current->header_size, current->split_time, current->split_size);
181 dump_bitmap (file, current->split_bbs);
182 fprintf (file, " SSA names to pass: ");
183 dump_bitmap (file, current->ssa_names_to_pass);
186 /* Look for all BBs in header that might lead to the split part and verify
187 that they are not defining any non-SSA var used by the split part.
188 Parameters are the same as for consider_split. */
190 static bool
191 verify_non_ssa_vars (struct split_point *current, bitmap non_ssa_vars,
192 basic_block return_bb)
194 bitmap seen = BITMAP_ALLOC (NULL);
195 VEC (basic_block,heap) *worklist = NULL;
196 edge e;
197 edge_iterator ei;
198 bool ok = true;
200 FOR_EACH_EDGE (e, ei, current->entry_bb->preds)
201 if (e->src != ENTRY_BLOCK_PTR
202 && !bitmap_bit_p (current->split_bbs, e->src->index))
204 VEC_safe_push (basic_block, heap, worklist, e->src);
205 bitmap_set_bit (seen, e->src->index);
208 while (!VEC_empty (basic_block, worklist))
210 gimple_stmt_iterator bsi;
211 basic_block bb = VEC_pop (basic_block, worklist);
213 FOR_EACH_EDGE (e, ei, bb->preds)
214 if (e->src != ENTRY_BLOCK_PTR
215 && bitmap_set_bit (seen, e->src->index))
217 gcc_checking_assert (!bitmap_bit_p (current->split_bbs,
218 e->src->index));
219 VEC_safe_push (basic_block, heap, worklist, e->src);
221 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
223 gimple stmt = gsi_stmt (bsi);
224 if (is_gimple_debug (stmt))
225 continue;
226 if (walk_stmt_load_store_addr_ops
227 (stmt, non_ssa_vars, test_nonssa_use, test_nonssa_use,
228 test_nonssa_use))
230 ok = false;
231 goto done;
233 if (gimple_code (stmt) == GIMPLE_LABEL
234 && test_nonssa_use (stmt, gimple_label_label (stmt),
235 non_ssa_vars))
237 ok = false;
238 goto done;
241 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
243 if (walk_stmt_load_store_addr_ops
244 (gsi_stmt (bsi), non_ssa_vars, test_nonssa_use, test_nonssa_use,
245 test_nonssa_use))
247 ok = false;
248 goto done;
251 FOR_EACH_EDGE (e, ei, bb->succs)
253 if (e->dest != return_bb)
254 continue;
255 for (bsi = gsi_start_phis (return_bb); !gsi_end_p (bsi);
256 gsi_next (&bsi))
258 gimple stmt = gsi_stmt (bsi);
259 tree op = gimple_phi_arg_def (stmt, e->dest_idx);
261 if (virtual_operand_p (gimple_phi_result (stmt)))
262 continue;
263 if (TREE_CODE (op) != SSA_NAME
264 && test_nonssa_use (stmt, op, non_ssa_vars))
266 ok = false;
267 goto done;
272 done:
273 BITMAP_FREE (seen);
274 VEC_free (basic_block, heap, worklist);
275 return ok;
278 /* If STMT is a call, check the callee against a list of forbidden
279 predicate functions. If a match is found, look for uses of the
280 call result in condition statements that compare against zero.
281 For each such use, find the block targeted by the condition
282 statement for the nonzero result, and set the bit for this block
283 in the forbidden dominators bitmap. The purpose of this is to avoid
284 selecting a split point where we are likely to lose the chance
285 to optimize away an unused function call. */
287 static void
288 check_forbidden_calls (gimple stmt)
290 imm_use_iterator use_iter;
291 use_operand_p use_p;
292 tree lhs;
294 /* At the moment, __builtin_constant_p is the only forbidden
295 predicate function call (see PR49642). */
296 if (!gimple_call_builtin_p (stmt, BUILT_IN_CONSTANT_P))
297 return;
299 lhs = gimple_call_lhs (stmt);
301 if (!lhs || TREE_CODE (lhs) != SSA_NAME)
302 return;
304 FOR_EACH_IMM_USE_FAST (use_p, use_iter, lhs)
306 tree op1;
307 basic_block use_bb, forbidden_bb;
308 enum tree_code code;
309 edge true_edge, false_edge;
310 gimple use_stmt = USE_STMT (use_p);
312 if (gimple_code (use_stmt) != GIMPLE_COND)
313 continue;
315 /* Assuming canonical form for GIMPLE_COND here, with constant
316 in second position. */
317 op1 = gimple_cond_rhs (use_stmt);
318 code = gimple_cond_code (use_stmt);
319 use_bb = gimple_bb (use_stmt);
321 extract_true_false_edges_from_block (use_bb, &true_edge, &false_edge);
323 /* We're only interested in comparisons that distinguish
324 unambiguously from zero. */
325 if (!integer_zerop (op1) || code == LE_EXPR || code == GE_EXPR)
326 continue;
328 if (code == EQ_EXPR)
329 forbidden_bb = false_edge->dest;
330 else
331 forbidden_bb = true_edge->dest;
333 bitmap_set_bit (forbidden_dominators, forbidden_bb->index);
337 /* If BB is dominated by any block in the forbidden dominators set,
338 return TRUE; else FALSE. */
340 static bool
341 dominated_by_forbidden (basic_block bb)
343 unsigned dom_bb;
344 bitmap_iterator bi;
346 EXECUTE_IF_SET_IN_BITMAP (forbidden_dominators, 1, dom_bb, bi)
348 if (dominated_by_p (CDI_DOMINATORS, bb, BASIC_BLOCK (dom_bb)))
349 return true;
352 return false;
355 /* We found an split_point CURRENT. NON_SSA_VARS is bitmap of all non ssa
356 variables used and RETURN_BB is return basic block.
357 See if we can split function here. */
359 static void
360 consider_split (struct split_point *current, bitmap non_ssa_vars,
361 basic_block return_bb)
363 tree parm;
364 unsigned int num_args = 0;
365 unsigned int call_overhead;
366 edge e;
367 edge_iterator ei;
368 gimple_stmt_iterator bsi;
369 unsigned int i;
370 int incoming_freq = 0;
371 tree retval;
373 if (dump_file && (dump_flags & TDF_DETAILS))
374 dump_split_point (dump_file, current);
376 FOR_EACH_EDGE (e, ei, current->entry_bb->preds)
377 if (!bitmap_bit_p (current->split_bbs, e->src->index))
378 incoming_freq += EDGE_FREQUENCY (e);
380 /* Do not split when we would end up calling function anyway. */
381 if (incoming_freq
382 >= (ENTRY_BLOCK_PTR->frequency
383 * PARAM_VALUE (PARAM_PARTIAL_INLINING_ENTRY_PROBABILITY) / 100))
385 if (dump_file && (dump_flags & TDF_DETAILS))
386 fprintf (dump_file,
387 " Refused: incoming frequency is too large.\n");
388 return;
391 if (!current->header_size)
393 if (dump_file && (dump_flags & TDF_DETAILS))
394 fprintf (dump_file, " Refused: header empty\n");
395 return;
398 /* Verify that PHI args on entry are either virtual or all their operands
399 incoming from header are the same. */
400 for (bsi = gsi_start_phis (current->entry_bb); !gsi_end_p (bsi); gsi_next (&bsi))
402 gimple stmt = gsi_stmt (bsi);
403 tree val = NULL;
405 if (virtual_operand_p (gimple_phi_result (stmt)))
406 continue;
407 for (i = 0; i < gimple_phi_num_args (stmt); i++)
409 edge e = gimple_phi_arg_edge (stmt, i);
410 if (!bitmap_bit_p (current->split_bbs, e->src->index))
412 tree edge_val = gimple_phi_arg_def (stmt, i);
413 if (val && edge_val != val)
415 if (dump_file && (dump_flags & TDF_DETAILS))
416 fprintf (dump_file,
417 " Refused: entry BB has PHI with multiple variants\n");
418 return;
420 val = edge_val;
426 /* See what argument we will pass to the split function and compute
427 call overhead. */
428 call_overhead = eni_size_weights.call_cost;
429 for (parm = DECL_ARGUMENTS (current_function_decl); parm;
430 parm = DECL_CHAIN (parm))
432 if (!is_gimple_reg (parm))
434 if (bitmap_bit_p (non_ssa_vars, DECL_UID (parm)))
436 if (dump_file && (dump_flags & TDF_DETAILS))
437 fprintf (dump_file,
438 " Refused: need to pass non-ssa param values\n");
439 return;
442 else
444 tree ddef = ssa_default_def (cfun, parm);
445 if (ddef
446 && bitmap_bit_p (current->ssa_names_to_pass,
447 SSA_NAME_VERSION (ddef)))
449 if (!VOID_TYPE_P (TREE_TYPE (parm)))
450 call_overhead += estimate_move_cost (TREE_TYPE (parm));
451 num_args++;
455 if (!VOID_TYPE_P (TREE_TYPE (current_function_decl)))
456 call_overhead += estimate_move_cost (TREE_TYPE (current_function_decl));
458 if (current->split_size <= call_overhead)
460 if (dump_file && (dump_flags & TDF_DETAILS))
461 fprintf (dump_file,
462 " Refused: split size is smaller than call overhead\n");
463 return;
465 if (current->header_size + call_overhead
466 >= (unsigned int)(DECL_DECLARED_INLINE_P (current_function_decl)
467 ? MAX_INLINE_INSNS_SINGLE
468 : MAX_INLINE_INSNS_AUTO))
470 if (dump_file && (dump_flags & TDF_DETAILS))
471 fprintf (dump_file,
472 " Refused: header size is too large for inline candidate\n");
473 return;
476 /* FIXME: we currently can pass only SSA function parameters to the split
477 arguments. Once parm_adjustment infrastructure is supported by cloning,
478 we can pass more than that. */
479 if (num_args != bitmap_count_bits (current->ssa_names_to_pass))
482 if (dump_file && (dump_flags & TDF_DETAILS))
483 fprintf (dump_file,
484 " Refused: need to pass non-param values\n");
485 return;
488 /* When there are non-ssa vars used in the split region, see if they
489 are used in the header region. If so, reject the split.
490 FIXME: we can use nested function support to access both. */
491 if (!bitmap_empty_p (non_ssa_vars)
492 && !verify_non_ssa_vars (current, non_ssa_vars, return_bb))
494 if (dump_file && (dump_flags & TDF_DETAILS))
495 fprintf (dump_file,
496 " Refused: split part has non-ssa uses\n");
497 return;
500 /* If the split point is dominated by a forbidden block, reject
501 the split. */
502 if (!bitmap_empty_p (forbidden_dominators)
503 && dominated_by_forbidden (current->entry_bb))
505 if (dump_file && (dump_flags & TDF_DETAILS))
506 fprintf (dump_file,
507 " Refused: split point dominated by forbidden block\n");
508 return;
511 /* See if retval used by return bb is computed by header or split part.
512 When it is computed by split part, we need to produce return statement
513 in the split part and add code to header to pass it around.
515 This is bit tricky to test:
516 1) When there is no return_bb or no return value, we always pass
517 value around.
518 2) Invariants are always computed by caller.
519 3) For SSA we need to look if defining statement is in header or split part
520 4) For non-SSA we need to look where the var is computed. */
521 retval = find_retval (return_bb);
522 if (!retval)
523 current->split_part_set_retval = true;
524 else if (is_gimple_min_invariant (retval))
525 current->split_part_set_retval = false;
526 /* Special case is value returned by reference we record as if it was non-ssa
527 set to result_decl. */
528 else if (TREE_CODE (retval) == SSA_NAME
529 && SSA_NAME_VAR (retval)
530 && TREE_CODE (SSA_NAME_VAR (retval)) == RESULT_DECL
531 && DECL_BY_REFERENCE (DECL_RESULT (current_function_decl)))
532 current->split_part_set_retval
533 = bitmap_bit_p (non_ssa_vars, DECL_UID (SSA_NAME_VAR (retval)));
534 else if (TREE_CODE (retval) == SSA_NAME)
535 current->split_part_set_retval
536 = (!SSA_NAME_IS_DEFAULT_DEF (retval)
537 && (bitmap_bit_p (current->split_bbs,
538 gimple_bb (SSA_NAME_DEF_STMT (retval))->index)
539 || gimple_bb (SSA_NAME_DEF_STMT (retval)) == return_bb));
540 else if (TREE_CODE (retval) == PARM_DECL)
541 current->split_part_set_retval = false;
542 else if (TREE_CODE (retval) == VAR_DECL
543 || TREE_CODE (retval) == RESULT_DECL)
544 current->split_part_set_retval
545 = bitmap_bit_p (non_ssa_vars, DECL_UID (retval));
546 else
547 current->split_part_set_retval = true;
549 /* split_function fixes up at most one PHI non-virtual PHI node in return_bb,
550 for the return value. If there are other PHIs, give up. */
551 if (return_bb != EXIT_BLOCK_PTR)
553 gimple_stmt_iterator psi;
555 for (psi = gsi_start_phis (return_bb); !gsi_end_p (psi); gsi_next (&psi))
556 if (!virtual_operand_p (gimple_phi_result (gsi_stmt (psi)))
557 && !(retval
558 && current->split_part_set_retval
559 && TREE_CODE (retval) == SSA_NAME
560 && !DECL_BY_REFERENCE (DECL_RESULT (current_function_decl))
561 && SSA_NAME_DEF_STMT (retval) == gsi_stmt (psi)))
563 if (dump_file && (dump_flags & TDF_DETAILS))
564 fprintf (dump_file,
565 " Refused: return bb has extra PHIs\n");
566 return;
570 if (dump_file && (dump_flags & TDF_DETAILS))
571 fprintf (dump_file, " Accepted!\n");
573 /* At the moment chose split point with lowest frequency and that leaves
574 out smallest size of header.
575 In future we might re-consider this heuristics. */
576 if (!best_split_point.split_bbs
577 || best_split_point.entry_bb->frequency > current->entry_bb->frequency
578 || (best_split_point.entry_bb->frequency == current->entry_bb->frequency
579 && best_split_point.split_size < current->split_size))
582 if (dump_file && (dump_flags & TDF_DETAILS))
583 fprintf (dump_file, " New best split point!\n");
584 if (best_split_point.ssa_names_to_pass)
586 BITMAP_FREE (best_split_point.ssa_names_to_pass);
587 BITMAP_FREE (best_split_point.split_bbs);
589 best_split_point = *current;
590 best_split_point.ssa_names_to_pass = BITMAP_ALLOC (NULL);
591 bitmap_copy (best_split_point.ssa_names_to_pass,
592 current->ssa_names_to_pass);
593 best_split_point.split_bbs = BITMAP_ALLOC (NULL);
594 bitmap_copy (best_split_point.split_bbs, current->split_bbs);
598 /* Return basic block containing RETURN statement. We allow basic blocks
599 of the form:
600 <retval> = tmp_var;
601 return <retval>
602 but return_bb can not be more complex than this.
603 If nothing is found, return EXIT_BLOCK_PTR.
605 When there are multiple RETURN statement, chose one with return value,
606 since that one is more likely shared by multiple code paths.
608 Return BB is special, because for function splitting it is the only
609 basic block that is duplicated in between header and split part of the
610 function.
612 TODO: We might support multiple return blocks. */
614 static basic_block
615 find_return_bb (void)
617 edge e;
618 basic_block return_bb = EXIT_BLOCK_PTR;
619 gimple_stmt_iterator bsi;
620 bool found_return = false;
621 tree retval = NULL_TREE;
623 if (!single_pred_p (EXIT_BLOCK_PTR))
624 return return_bb;
626 e = single_pred_edge (EXIT_BLOCK_PTR);
627 for (bsi = gsi_last_bb (e->src); !gsi_end_p (bsi); gsi_prev (&bsi))
629 gimple stmt = gsi_stmt (bsi);
630 if (gimple_code (stmt) == GIMPLE_LABEL
631 || is_gimple_debug (stmt)
632 || gimple_clobber_p (stmt))
634 else if (gimple_code (stmt) == GIMPLE_ASSIGN
635 && found_return
636 && gimple_assign_single_p (stmt)
637 && (auto_var_in_fn_p (gimple_assign_rhs1 (stmt),
638 current_function_decl)
639 || is_gimple_min_invariant (gimple_assign_rhs1 (stmt)))
640 && retval == gimple_assign_lhs (stmt))
642 else if (gimple_code (stmt) == GIMPLE_RETURN)
644 found_return = true;
645 retval = gimple_return_retval (stmt);
647 else
648 break;
650 if (gsi_end_p (bsi) && found_return)
651 return_bb = e->src;
653 return return_bb;
656 /* Given return basic block RETURN_BB, see where return value is really
657 stored. */
658 static tree
659 find_retval (basic_block return_bb)
661 gimple_stmt_iterator bsi;
662 for (bsi = gsi_start_bb (return_bb); !gsi_end_p (bsi); gsi_next (&bsi))
663 if (gimple_code (gsi_stmt (bsi)) == GIMPLE_RETURN)
664 return gimple_return_retval (gsi_stmt (bsi));
665 else if (gimple_code (gsi_stmt (bsi)) == GIMPLE_ASSIGN
666 && !gimple_clobber_p (gsi_stmt (bsi)))
667 return gimple_assign_rhs1 (gsi_stmt (bsi));
668 return NULL;
671 /* Callback for walk_stmt_load_store_addr_ops. If T is non-SSA automatic
672 variable, mark it as used in bitmap passed via DATA.
673 Return true when access to T prevents splitting the function. */
675 static bool
676 mark_nonssa_use (gimple stmt ATTRIBUTE_UNUSED, tree t, void *data)
678 t = get_base_address (t);
680 if (!t || is_gimple_reg (t))
681 return false;
683 /* At present we can't pass non-SSA arguments to split function.
684 FIXME: this can be relaxed by passing references to arguments. */
685 if (TREE_CODE (t) == PARM_DECL)
687 if (dump_file && (dump_flags & TDF_DETAILS))
688 fprintf (dump_file,
689 "Cannot split: use of non-ssa function parameter.\n");
690 return true;
693 if ((TREE_CODE (t) == VAR_DECL
694 && auto_var_in_fn_p (t, current_function_decl))
695 || TREE_CODE (t) == RESULT_DECL
696 || TREE_CODE (t) == LABEL_DECL)
697 bitmap_set_bit ((bitmap)data, DECL_UID (t));
699 /* For DECL_BY_REFERENCE, the return value is actually a pointer. We want
700 to pretend that the value pointed to is actual result decl. */
701 if ((TREE_CODE (t) == MEM_REF || INDIRECT_REF_P (t))
702 && TREE_CODE (TREE_OPERAND (t, 0)) == SSA_NAME
703 && SSA_NAME_VAR (TREE_OPERAND (t, 0))
704 && TREE_CODE (SSA_NAME_VAR (TREE_OPERAND (t, 0))) == RESULT_DECL
705 && DECL_BY_REFERENCE (DECL_RESULT (current_function_decl)))
706 return
707 bitmap_bit_p ((bitmap)data,
708 DECL_UID (DECL_RESULT (current_function_decl)));
710 return false;
713 /* Compute local properties of basic block BB we collect when looking for
714 split points. We look for ssa defs and store them in SET_SSA_NAMES,
715 for ssa uses and store them in USED_SSA_NAMES and for any non-SSA automatic
716 vars stored in NON_SSA_VARS.
718 When BB has edge to RETURN_BB, collect uses in RETURN_BB too.
720 Return false when BB contains something that prevents it from being put into
721 split function. */
723 static bool
724 visit_bb (basic_block bb, basic_block return_bb,
725 bitmap set_ssa_names, bitmap used_ssa_names,
726 bitmap non_ssa_vars)
728 gimple_stmt_iterator bsi;
729 edge e;
730 edge_iterator ei;
731 bool can_split = true;
733 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
735 gimple stmt = gsi_stmt (bsi);
736 tree op;
737 ssa_op_iter iter;
738 tree decl;
740 if (is_gimple_debug (stmt))
741 continue;
743 if (gimple_clobber_p (stmt))
744 continue;
746 /* FIXME: We can split regions containing EH. We can not however
747 split RESX, EH_DISPATCH and EH_POINTER referring to same region
748 into different partitions. This would require tracking of
749 EH regions and checking in consider_split_point if they
750 are not used elsewhere. */
751 if (gimple_code (stmt) == GIMPLE_RESX)
753 if (dump_file && (dump_flags & TDF_DETAILS))
754 fprintf (dump_file, "Cannot split: resx.\n");
755 can_split = false;
757 if (gimple_code (stmt) == GIMPLE_EH_DISPATCH)
759 if (dump_file && (dump_flags & TDF_DETAILS))
760 fprintf (dump_file, "Cannot split: eh dispatch.\n");
761 can_split = false;
764 /* Check builtins that prevent splitting. */
765 if (gimple_code (stmt) == GIMPLE_CALL
766 && (decl = gimple_call_fndecl (stmt)) != NULL_TREE
767 && DECL_BUILT_IN (decl)
768 && DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL)
769 switch (DECL_FUNCTION_CODE (decl))
771 /* FIXME: once we will allow passing non-parm values to split part,
772 we need to be sure to handle correct builtin_stack_save and
773 builtin_stack_restore. At the moment we are safe; there is no
774 way to store builtin_stack_save result in non-SSA variable
775 since all calls to those are compiler generated. */
776 case BUILT_IN_APPLY:
777 case BUILT_IN_APPLY_ARGS:
778 case BUILT_IN_VA_START:
779 if (dump_file && (dump_flags & TDF_DETAILS))
780 fprintf (dump_file,
781 "Cannot split: builtin_apply and va_start.\n");
782 can_split = false;
783 break;
784 case BUILT_IN_EH_POINTER:
785 if (dump_file && (dump_flags & TDF_DETAILS))
786 fprintf (dump_file, "Cannot split: builtin_eh_pointer.\n");
787 can_split = false;
788 break;
789 default:
790 break;
793 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_DEF)
794 bitmap_set_bit (set_ssa_names, SSA_NAME_VERSION (op));
795 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
796 bitmap_set_bit (used_ssa_names, SSA_NAME_VERSION (op));
797 can_split &= !walk_stmt_load_store_addr_ops (stmt, non_ssa_vars,
798 mark_nonssa_use,
799 mark_nonssa_use,
800 mark_nonssa_use);
802 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
804 gimple stmt = gsi_stmt (bsi);
805 unsigned int i;
807 if (virtual_operand_p (gimple_phi_result (stmt)))
808 continue;
809 bitmap_set_bit (set_ssa_names,
810 SSA_NAME_VERSION (gimple_phi_result (stmt)));
811 for (i = 0; i < gimple_phi_num_args (stmt); i++)
813 tree op = gimple_phi_arg_def (stmt, i);
814 if (TREE_CODE (op) == SSA_NAME)
815 bitmap_set_bit (used_ssa_names, SSA_NAME_VERSION (op));
817 can_split &= !walk_stmt_load_store_addr_ops (stmt, non_ssa_vars,
818 mark_nonssa_use,
819 mark_nonssa_use,
820 mark_nonssa_use);
822 /* Record also uses coming from PHI operand in return BB. */
823 FOR_EACH_EDGE (e, ei, bb->succs)
824 if (e->dest == return_bb)
826 for (bsi = gsi_start_phis (return_bb); !gsi_end_p (bsi); gsi_next (&bsi))
828 gimple stmt = gsi_stmt (bsi);
829 tree op = gimple_phi_arg_def (stmt, e->dest_idx);
831 if (virtual_operand_p (gimple_phi_result (stmt)))
832 continue;
833 if (TREE_CODE (op) == SSA_NAME)
834 bitmap_set_bit (used_ssa_names, SSA_NAME_VERSION (op));
835 else
836 can_split &= !mark_nonssa_use (stmt, op, non_ssa_vars);
839 return can_split;
842 /* Stack entry for recursive DFS walk in find_split_point. */
844 typedef struct
846 /* Basic block we are examining. */
847 basic_block bb;
849 /* SSA names set and used by the BB and all BBs reachable
850 from it via DFS walk. */
851 bitmap set_ssa_names, used_ssa_names;
852 bitmap non_ssa_vars;
854 /* All BBS visited from this BB via DFS walk. */
855 bitmap bbs_visited;
857 /* Last examined edge in DFS walk. Since we walk unoriented graph,
858 the value is up to sum of incoming and outgoing edges of BB. */
859 unsigned int edge_num;
861 /* Stack entry index of earliest BB reachable from current BB
862 or any BB visited later in DFS walk. */
863 int earliest;
865 /* Overall time and size of all BBs reached from this BB in DFS walk. */
866 int overall_time, overall_size;
868 /* When false we can not split on this BB. */
869 bool can_split;
870 } stack_entry;
871 DEF_VEC_O(stack_entry);
872 DEF_VEC_ALLOC_O(stack_entry,heap);
875 /* Find all articulations and call consider_split on them.
876 OVERALL_TIME and OVERALL_SIZE is time and size of the function.
878 We perform basic algorithm for finding an articulation in a graph
879 created from CFG by considering it to be an unoriented graph.
881 The articulation is discovered via DFS walk. We collect earliest
882 basic block on stack that is reachable via backward edge. Articulation
883 is any basic block such that there is no backward edge bypassing it.
884 To reduce stack usage we maintain heap allocated stack in STACK vector.
885 AUX pointer of BB is set to index it appears in the stack or -1 once
886 it is visited and popped off the stack.
888 The algorithm finds articulation after visiting the whole component
889 reachable by it. This makes it convenient to collect information about
890 the component used by consider_split. */
892 static void
893 find_split_points (int overall_time, int overall_size)
895 stack_entry first;
896 VEC(stack_entry, heap) *stack = NULL;
897 basic_block bb;
898 basic_block return_bb = find_return_bb ();
899 struct split_point current;
901 current.header_time = overall_time;
902 current.header_size = overall_size;
903 current.split_time = 0;
904 current.split_size = 0;
905 current.ssa_names_to_pass = BITMAP_ALLOC (NULL);
907 first.bb = ENTRY_BLOCK_PTR;
908 first.edge_num = 0;
909 first.overall_time = 0;
910 first.overall_size = 0;
911 first.earliest = INT_MAX;
912 first.set_ssa_names = 0;
913 first.used_ssa_names = 0;
914 first.bbs_visited = 0;
915 VEC_safe_push (stack_entry, heap, stack, first);
916 ENTRY_BLOCK_PTR->aux = (void *)(intptr_t)-1;
918 while (!VEC_empty (stack_entry, stack))
920 stack_entry *entry = &VEC_last (stack_entry, stack);
922 /* We are walking an acyclic graph, so edge_num counts
923 succ and pred edges together. However when considering
924 articulation, we want to have processed everything reachable
925 from articulation but nothing that reaches into it. */
926 if (entry->edge_num == EDGE_COUNT (entry->bb->succs)
927 && entry->bb != ENTRY_BLOCK_PTR)
929 int pos = VEC_length (stack_entry, stack);
930 entry->can_split &= visit_bb (entry->bb, return_bb,
931 entry->set_ssa_names,
932 entry->used_ssa_names,
933 entry->non_ssa_vars);
934 if (pos <= entry->earliest && !entry->can_split
935 && dump_file && (dump_flags & TDF_DETAILS))
936 fprintf (dump_file,
937 "found articulation at bb %i but can not split\n",
938 entry->bb->index);
939 if (pos <= entry->earliest && entry->can_split)
941 if (dump_file && (dump_flags & TDF_DETAILS))
942 fprintf (dump_file, "found articulation at bb %i\n",
943 entry->bb->index);
944 current.entry_bb = entry->bb;
945 current.ssa_names_to_pass = BITMAP_ALLOC (NULL);
946 bitmap_and_compl (current.ssa_names_to_pass,
947 entry->used_ssa_names, entry->set_ssa_names);
948 current.header_time = overall_time - entry->overall_time;
949 current.header_size = overall_size - entry->overall_size;
950 current.split_time = entry->overall_time;
951 current.split_size = entry->overall_size;
952 current.split_bbs = entry->bbs_visited;
953 consider_split (&current, entry->non_ssa_vars, return_bb);
954 BITMAP_FREE (current.ssa_names_to_pass);
957 /* Do actual DFS walk. */
958 if (entry->edge_num
959 < (EDGE_COUNT (entry->bb->succs)
960 + EDGE_COUNT (entry->bb->preds)))
962 edge e;
963 basic_block dest;
964 if (entry->edge_num < EDGE_COUNT (entry->bb->succs))
966 e = EDGE_SUCC (entry->bb, entry->edge_num);
967 dest = e->dest;
969 else
971 e = EDGE_PRED (entry->bb, entry->edge_num
972 - EDGE_COUNT (entry->bb->succs));
973 dest = e->src;
976 entry->edge_num++;
978 /* New BB to visit, push it to the stack. */
979 if (dest != return_bb && dest != EXIT_BLOCK_PTR
980 && !dest->aux)
982 stack_entry new_entry;
984 new_entry.bb = dest;
985 new_entry.edge_num = 0;
986 new_entry.overall_time
987 = VEC_index (bb_info, bb_info_vec, dest->index).time;
988 new_entry.overall_size
989 = VEC_index (bb_info, bb_info_vec, dest->index).size;
990 new_entry.earliest = INT_MAX;
991 new_entry.set_ssa_names = BITMAP_ALLOC (NULL);
992 new_entry.used_ssa_names = BITMAP_ALLOC (NULL);
993 new_entry.bbs_visited = BITMAP_ALLOC (NULL);
994 new_entry.non_ssa_vars = BITMAP_ALLOC (NULL);
995 new_entry.can_split = true;
996 bitmap_set_bit (new_entry.bbs_visited, dest->index);
997 VEC_safe_push (stack_entry, heap, stack, new_entry);
998 dest->aux = (void *)(intptr_t)VEC_length (stack_entry, stack);
1000 /* Back edge found, record the earliest point. */
1001 else if ((intptr_t)dest->aux > 0
1002 && (intptr_t)dest->aux < entry->earliest)
1003 entry->earliest = (intptr_t)dest->aux;
1005 /* We are done with examining the edges. Pop off the value from stack
1006 and merge stuff we accumulate during the walk. */
1007 else if (entry->bb != ENTRY_BLOCK_PTR)
1009 stack_entry *prev = &VEC_index (stack_entry, stack,
1010 VEC_length (stack_entry, stack) - 2);
1012 entry->bb->aux = (void *)(intptr_t)-1;
1013 prev->can_split &= entry->can_split;
1014 if (prev->set_ssa_names)
1016 bitmap_ior_into (prev->set_ssa_names, entry->set_ssa_names);
1017 bitmap_ior_into (prev->used_ssa_names, entry->used_ssa_names);
1018 bitmap_ior_into (prev->bbs_visited, entry->bbs_visited);
1019 bitmap_ior_into (prev->non_ssa_vars, entry->non_ssa_vars);
1021 if (prev->earliest > entry->earliest)
1022 prev->earliest = entry->earliest;
1023 prev->overall_time += entry->overall_time;
1024 prev->overall_size += entry->overall_size;
1025 BITMAP_FREE (entry->set_ssa_names);
1026 BITMAP_FREE (entry->used_ssa_names);
1027 BITMAP_FREE (entry->bbs_visited);
1028 BITMAP_FREE (entry->non_ssa_vars);
1029 VEC_pop (stack_entry, stack);
1031 else
1032 VEC_pop (stack_entry, stack);
1034 ENTRY_BLOCK_PTR->aux = NULL;
1035 FOR_EACH_BB (bb)
1036 bb->aux = NULL;
1037 VEC_free (stack_entry, heap, stack);
1038 BITMAP_FREE (current.ssa_names_to_pass);
1041 /* Split function at SPLIT_POINT. */
1043 static void
1044 split_function (struct split_point *split_point)
1046 VEC (tree, heap) *args_to_pass = NULL;
1047 bitmap args_to_skip;
1048 tree parm;
1049 int num = 0;
1050 struct cgraph_node *node, *cur_node = cgraph_get_node (current_function_decl);
1051 basic_block return_bb = find_return_bb ();
1052 basic_block call_bb;
1053 gimple_stmt_iterator gsi;
1054 gimple call;
1055 edge e;
1056 edge_iterator ei;
1057 tree retval = NULL, real_retval = NULL;
1058 bool split_part_return_p = false;
1059 gimple last_stmt = NULL;
1060 unsigned int i;
1061 tree arg, ddef;
1062 VEC(tree, gc) **debug_args = NULL;
1064 if (dump_file)
1066 fprintf (dump_file, "\n\nSplitting function at:\n");
1067 dump_split_point (dump_file, split_point);
1070 if (cur_node->local.can_change_signature)
1071 args_to_skip = BITMAP_ALLOC (NULL);
1072 else
1073 args_to_skip = NULL;
1075 /* Collect the parameters of new function and args_to_skip bitmap. */
1076 for (parm = DECL_ARGUMENTS (current_function_decl);
1077 parm; parm = DECL_CHAIN (parm), num++)
1078 if (args_to_skip
1079 && (!is_gimple_reg (parm)
1080 || (ddef = ssa_default_def (cfun, parm)) == NULL_TREE
1081 || !bitmap_bit_p (split_point->ssa_names_to_pass,
1082 SSA_NAME_VERSION (ddef))))
1083 bitmap_set_bit (args_to_skip, num);
1084 else
1086 /* This parm might not have been used up to now, but is going to be
1087 used, hence register it. */
1088 if (is_gimple_reg (parm))
1089 arg = get_or_create_ssa_default_def (cfun, parm);
1090 else
1091 arg = parm;
1093 if (!useless_type_conversion_p (DECL_ARG_TYPE (parm), TREE_TYPE (arg)))
1094 arg = fold_convert (DECL_ARG_TYPE (parm), arg);
1095 VEC_safe_push (tree, heap, args_to_pass, arg);
1098 /* See if the split function will return. */
1099 FOR_EACH_EDGE (e, ei, return_bb->preds)
1100 if (bitmap_bit_p (split_point->split_bbs, e->src->index))
1101 break;
1102 if (e)
1103 split_part_return_p = true;
1105 /* Add return block to what will become the split function.
1106 We do not return; no return block is needed. */
1107 if (!split_part_return_p)
1109 /* We have no return block, so nothing is needed. */
1110 else if (return_bb == EXIT_BLOCK_PTR)
1112 /* When we do not want to return value, we need to construct
1113 new return block with empty return statement.
1114 FIXME: Once we are able to change return type, we should change function
1115 to return void instead of just outputting function with undefined return
1116 value. For structures this affects quality of codegen. */
1117 else if (!split_point->split_part_set_retval
1118 && find_retval (return_bb))
1120 bool redirected = true;
1121 basic_block new_return_bb = create_basic_block (NULL, 0, return_bb);
1122 gimple_stmt_iterator gsi = gsi_start_bb (new_return_bb);
1123 gsi_insert_after (&gsi, gimple_build_return (NULL), GSI_NEW_STMT);
1124 while (redirected)
1126 redirected = false;
1127 FOR_EACH_EDGE (e, ei, return_bb->preds)
1128 if (bitmap_bit_p (split_point->split_bbs, e->src->index))
1130 new_return_bb->count += e->count;
1131 new_return_bb->frequency += EDGE_FREQUENCY (e);
1132 redirect_edge_and_branch (e, new_return_bb);
1133 redirected = true;
1134 break;
1137 e = make_edge (new_return_bb, EXIT_BLOCK_PTR, 0);
1138 e->probability = REG_BR_PROB_BASE;
1139 e->count = new_return_bb->count;
1140 bitmap_set_bit (split_point->split_bbs, new_return_bb->index);
1142 /* When we pass around the value, use existing return block. */
1143 else
1144 bitmap_set_bit (split_point->split_bbs, return_bb->index);
1146 /* If RETURN_BB has virtual operand PHIs, they must be removed and the
1147 virtual operand marked for renaming as we change the CFG in a way that
1148 tree-inline is not able to compensate for.
1150 Note this can happen whether or not we have a return value. If we have
1151 a return value, then RETURN_BB may have PHIs for real operands too. */
1152 if (return_bb != EXIT_BLOCK_PTR)
1154 bool phi_p = false;
1155 for (gsi = gsi_start_phis (return_bb); !gsi_end_p (gsi);)
1157 gimple stmt = gsi_stmt (gsi);
1158 if (!virtual_operand_p (gimple_phi_result (stmt)))
1160 gsi_next (&gsi);
1161 continue;
1163 mark_virtual_phi_result_for_renaming (stmt);
1164 remove_phi_node (&gsi, true);
1165 phi_p = true;
1167 /* In reality we have to rename the reaching definition of the
1168 virtual operand at return_bb as we will eventually release it
1169 when we remove the code region we outlined.
1170 So we have to rename all immediate virtual uses of that region
1171 if we didn't see a PHI definition yet. */
1172 /* ??? In real reality we want to set the reaching vdef of the
1173 entry of the SESE region as the vuse of the call and the reaching
1174 vdef of the exit of the SESE region as the vdef of the call. */
1175 if (!phi_p)
1176 for (gsi = gsi_start_bb (return_bb); !gsi_end_p (gsi); gsi_next (&gsi))
1178 gimple stmt = gsi_stmt (gsi);
1179 if (gimple_vuse (stmt))
1181 gimple_set_vuse (stmt, NULL_TREE);
1182 update_stmt (stmt);
1184 if (gimple_vdef (stmt))
1185 break;
1189 /* Now create the actual clone. */
1190 rebuild_cgraph_edges ();
1191 node = cgraph_function_versioning (cur_node, NULL, NULL, args_to_skip,
1192 !split_part_return_p,
1193 split_point->split_bbs,
1194 split_point->entry_bb, "part");
1195 /* For usual cloning it is enough to clear builtin only when signature
1196 changes. For partial inlining we however can not expect the part
1197 of builtin implementation to have same semantic as the whole. */
1198 if (DECL_BUILT_IN (node->symbol.decl))
1200 DECL_BUILT_IN_CLASS (node->symbol.decl) = NOT_BUILT_IN;
1201 DECL_FUNCTION_CODE (node->symbol.decl) = (enum built_in_function) 0;
1203 cgraph_node_remove_callees (cur_node);
1204 if (!split_part_return_p)
1205 TREE_THIS_VOLATILE (node->symbol.decl) = 1;
1206 if (dump_file)
1207 dump_function_to_file (node->symbol.decl, dump_file, dump_flags);
1209 /* Create the basic block we place call into. It is the entry basic block
1210 split after last label. */
1211 call_bb = split_point->entry_bb;
1212 for (gsi = gsi_start_bb (call_bb); !gsi_end_p (gsi);)
1213 if (gimple_code (gsi_stmt (gsi)) == GIMPLE_LABEL)
1215 last_stmt = gsi_stmt (gsi);
1216 gsi_next (&gsi);
1218 else
1219 break;
1220 e = split_block (split_point->entry_bb, last_stmt);
1221 remove_edge (e);
1223 /* Produce the call statement. */
1224 gsi = gsi_last_bb (call_bb);
1225 FOR_EACH_VEC_ELT (tree, args_to_pass, i, arg)
1226 if (!is_gimple_val (arg))
1228 arg = force_gimple_operand_gsi (&gsi, arg, true, NULL_TREE,
1229 false, GSI_CONTINUE_LINKING);
1230 VEC_replace (tree, args_to_pass, i, arg);
1232 call = gimple_build_call_vec (node->symbol.decl, args_to_pass);
1233 gimple_set_block (call, DECL_INITIAL (current_function_decl));
1234 VEC_free (tree, heap, args_to_pass);
1236 /* For optimized away parameters, add on the caller side
1237 before the call
1238 DEBUG D#X => parm_Y(D)
1239 stmts and associate D#X with parm in decl_debug_args_lookup
1240 vector to say for debug info that if parameter parm had been passed,
1241 it would have value parm_Y(D). */
1242 if (args_to_skip)
1243 for (parm = DECL_ARGUMENTS (current_function_decl), num = 0;
1244 parm; parm = DECL_CHAIN (parm), num++)
1245 if (bitmap_bit_p (args_to_skip, num)
1246 && is_gimple_reg (parm))
1248 tree ddecl;
1249 gimple def_temp;
1251 /* This needs to be done even without MAY_HAVE_DEBUG_STMTS,
1252 otherwise if it didn't exist before, we'd end up with
1253 different SSA_NAME_VERSIONs between -g and -g0. */
1254 arg = get_or_create_ssa_default_def (cfun, parm);
1255 if (!MAY_HAVE_DEBUG_STMTS)
1256 continue;
1258 if (debug_args == NULL)
1259 debug_args = decl_debug_args_insert (node->symbol.decl);
1260 ddecl = make_node (DEBUG_EXPR_DECL);
1261 DECL_ARTIFICIAL (ddecl) = 1;
1262 TREE_TYPE (ddecl) = TREE_TYPE (parm);
1263 DECL_MODE (ddecl) = DECL_MODE (parm);
1264 VEC_safe_push (tree, gc, *debug_args, DECL_ORIGIN (parm));
1265 VEC_safe_push (tree, gc, *debug_args, ddecl);
1266 def_temp = gimple_build_debug_bind (ddecl, unshare_expr (arg),
1267 call);
1268 gsi_insert_after (&gsi, def_temp, GSI_NEW_STMT);
1270 /* And on the callee side, add
1271 DEBUG D#Y s=> parm
1272 DEBUG var => D#Y
1273 stmts to the first bb where var is a VAR_DECL created for the
1274 optimized away parameter in DECL_INITIAL block. This hints
1275 in the debug info that var (whole DECL_ORIGIN is the parm PARM_DECL)
1276 is optimized away, but could be looked up at the call site
1277 as value of D#X there. */
1278 if (debug_args != NULL)
1280 unsigned int i;
1281 tree var, vexpr;
1282 gimple_stmt_iterator cgsi;
1283 gimple def_temp;
1285 push_cfun (DECL_STRUCT_FUNCTION (node->symbol.decl));
1286 var = BLOCK_VARS (DECL_INITIAL (node->symbol.decl));
1287 i = VEC_length (tree, *debug_args);
1288 cgsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR));
1291 i -= 2;
1292 while (var != NULL_TREE
1293 && DECL_ABSTRACT_ORIGIN (var)
1294 != VEC_index (tree, *debug_args, i))
1295 var = TREE_CHAIN (var);
1296 if (var == NULL_TREE)
1297 break;
1298 vexpr = make_node (DEBUG_EXPR_DECL);
1299 parm = VEC_index (tree, *debug_args, i);
1300 DECL_ARTIFICIAL (vexpr) = 1;
1301 TREE_TYPE (vexpr) = TREE_TYPE (parm);
1302 DECL_MODE (vexpr) = DECL_MODE (parm);
1303 def_temp = gimple_build_debug_source_bind (vexpr, parm,
1304 NULL);
1305 gsi_insert_before (&cgsi, def_temp, GSI_SAME_STMT);
1306 def_temp = gimple_build_debug_bind (var, vexpr, NULL);
1307 gsi_insert_before (&cgsi, def_temp, GSI_SAME_STMT);
1309 while (i);
1310 pop_cfun ();
1313 /* We avoid address being taken on any variable used by split part,
1314 so return slot optimization is always possible. Moreover this is
1315 required to make DECL_BY_REFERENCE work. */
1316 if (aggregate_value_p (DECL_RESULT (current_function_decl),
1317 TREE_TYPE (current_function_decl)))
1318 gimple_call_set_return_slot_opt (call, true);
1320 /* Update return value. This is bit tricky. When we do not return,
1321 do nothing. When we return we might need to update return_bb
1322 or produce a new return statement. */
1323 if (!split_part_return_p)
1324 gsi_insert_after (&gsi, call, GSI_NEW_STMT);
1325 else
1327 e = make_edge (call_bb, return_bb,
1328 return_bb == EXIT_BLOCK_PTR ? 0 : EDGE_FALLTHRU);
1329 e->count = call_bb->count;
1330 e->probability = REG_BR_PROB_BASE;
1332 /* If there is return basic block, see what value we need to store
1333 return value into and put call just before it. */
1334 if (return_bb != EXIT_BLOCK_PTR)
1336 real_retval = retval = find_retval (return_bb);
1338 if (real_retval && split_point->split_part_set_retval)
1340 gimple_stmt_iterator psi;
1342 /* See if we need new SSA_NAME for the result.
1343 When DECL_BY_REFERENCE is true, retval is actually pointer to
1344 return value and it is constant in whole function. */
1345 if (TREE_CODE (retval) == SSA_NAME
1346 && !DECL_BY_REFERENCE (DECL_RESULT (current_function_decl)))
1348 retval = copy_ssa_name (retval, call);
1350 /* See if there is PHI defining return value. */
1351 for (psi = gsi_start_phis (return_bb);
1352 !gsi_end_p (psi); gsi_next (&psi))
1353 if (!virtual_operand_p (gimple_phi_result (gsi_stmt (psi))))
1354 break;
1356 /* When there is PHI, just update its value. */
1357 if (TREE_CODE (retval) == SSA_NAME
1358 && !gsi_end_p (psi))
1359 add_phi_arg (gsi_stmt (psi), retval, e, UNKNOWN_LOCATION);
1360 /* Otherwise update the return BB itself.
1361 find_return_bb allows at most one assignment to return value,
1362 so update first statement. */
1363 else
1365 gimple_stmt_iterator bsi;
1366 for (bsi = gsi_start_bb (return_bb); !gsi_end_p (bsi);
1367 gsi_next (&bsi))
1368 if (gimple_code (gsi_stmt (bsi)) == GIMPLE_RETURN)
1370 gimple_return_set_retval (gsi_stmt (bsi), retval);
1371 break;
1373 else if (gimple_code (gsi_stmt (bsi)) == GIMPLE_ASSIGN
1374 && !gimple_clobber_p (gsi_stmt (bsi)))
1376 gimple_assign_set_rhs1 (gsi_stmt (bsi), retval);
1377 break;
1379 update_stmt (gsi_stmt (bsi));
1382 if (DECL_BY_REFERENCE (DECL_RESULT (current_function_decl)))
1384 gimple_call_set_lhs (call, build_simple_mem_ref (retval));
1385 gsi_insert_after (&gsi, call, GSI_NEW_STMT);
1387 else
1389 tree restype;
1390 restype = TREE_TYPE (DECL_RESULT (current_function_decl));
1391 gsi_insert_after (&gsi, call, GSI_NEW_STMT);
1392 if (!useless_type_conversion_p (TREE_TYPE (retval), restype))
1394 gimple cpy;
1395 tree tem = create_tmp_reg (restype, NULL);
1396 tem = make_ssa_name (tem, call);
1397 cpy = gimple_build_assign_with_ops (NOP_EXPR, retval,
1398 tem, NULL_TREE);
1399 gsi_insert_after (&gsi, cpy, GSI_NEW_STMT);
1400 retval = tem;
1402 gimple_call_set_lhs (call, retval);
1403 update_stmt (call);
1406 else
1407 gsi_insert_after (&gsi, call, GSI_NEW_STMT);
1409 /* We don't use return block (there is either no return in function or
1410 multiple of them). So create new basic block with return statement.
1412 else
1414 gimple ret;
1415 if (split_point->split_part_set_retval
1416 && !VOID_TYPE_P (TREE_TYPE (TREE_TYPE (current_function_decl))))
1418 retval = DECL_RESULT (current_function_decl);
1420 /* We use temporary register to hold value when aggregate_value_p
1421 is false. Similarly for DECL_BY_REFERENCE we must avoid extra
1422 copy. */
1423 if (!aggregate_value_p (retval, TREE_TYPE (current_function_decl))
1424 && !DECL_BY_REFERENCE (retval))
1425 retval = create_tmp_reg (TREE_TYPE (retval), NULL);
1426 if (is_gimple_reg (retval))
1428 /* When returning by reference, there is only one SSA name
1429 assigned to RESULT_DECL (that is pointer to return value).
1430 Look it up or create new one if it is missing. */
1431 if (DECL_BY_REFERENCE (retval))
1432 retval = get_or_create_ssa_default_def (cfun, retval);
1433 /* Otherwise produce new SSA name for return value. */
1434 else
1435 retval = make_ssa_name (retval, call);
1437 if (DECL_BY_REFERENCE (DECL_RESULT (current_function_decl)))
1438 gimple_call_set_lhs (call, build_simple_mem_ref (retval));
1439 else
1440 gimple_call_set_lhs (call, retval);
1442 gsi_insert_after (&gsi, call, GSI_NEW_STMT);
1443 ret = gimple_build_return (retval);
1444 gsi_insert_after (&gsi, ret, GSI_NEW_STMT);
1447 free_dominance_info (CDI_DOMINATORS);
1448 free_dominance_info (CDI_POST_DOMINATORS);
1449 compute_inline_parameters (node, true);
1452 /* Execute function splitting pass. */
1454 static unsigned int
1455 execute_split_functions (void)
1457 gimple_stmt_iterator bsi;
1458 basic_block bb;
1459 int overall_time = 0, overall_size = 0;
1460 int todo = 0;
1461 struct cgraph_node *node = cgraph_get_node (current_function_decl);
1463 if (flags_from_decl_or_type (current_function_decl)
1464 & (ECF_NORETURN|ECF_MALLOC))
1466 if (dump_file)
1467 fprintf (dump_file, "Not splitting: noreturn/malloc function.\n");
1468 return 0;
1470 if (MAIN_NAME_P (DECL_NAME (current_function_decl)))
1472 if (dump_file)
1473 fprintf (dump_file, "Not splitting: main function.\n");
1474 return 0;
1476 /* This can be relaxed; function might become inlinable after splitting
1477 away the uninlinable part. */
1478 if (inline_edge_summary_vec && !inline_summary (node)->inlinable)
1480 if (dump_file)
1481 fprintf (dump_file, "Not splitting: not inlinable.\n");
1482 return 0;
1484 if (DECL_DISREGARD_INLINE_LIMITS (node->symbol.decl))
1486 if (dump_file)
1487 fprintf (dump_file, "Not splitting: disregarding inline limits.\n");
1488 return 0;
1490 /* This can be relaxed; most of versioning tests actually prevents
1491 a duplication. */
1492 if (!tree_versionable_function_p (current_function_decl))
1494 if (dump_file)
1495 fprintf (dump_file, "Not splitting: not versionable.\n");
1496 return 0;
1498 /* FIXME: we could support this. */
1499 if (DECL_STRUCT_FUNCTION (current_function_decl)->static_chain_decl)
1501 if (dump_file)
1502 fprintf (dump_file, "Not splitting: nested function.\n");
1503 return 0;
1506 /* See if it makes sense to try to split.
1507 It makes sense to split if we inline, that is if we have direct calls to
1508 handle or direct calls are possibly going to appear as result of indirect
1509 inlining or LTO. Also handle -fprofile-generate as LTO to allow non-LTO
1510 training for LTO -fprofile-use build.
1512 Note that we are not completely conservative about disqualifying functions
1513 called once. It is possible that the caller is called more then once and
1514 then inlining would still benefit. */
1515 if ((!node->callers || !node->callers->next_caller)
1516 && !node->symbol.address_taken
1517 && (!flag_lto || !node->symbol.externally_visible))
1519 if (dump_file)
1520 fprintf (dump_file, "Not splitting: not called directly "
1521 "or called once.\n");
1522 return 0;
1525 /* FIXME: We can actually split if splitting reduces call overhead. */
1526 if (!flag_inline_small_functions
1527 && !DECL_DECLARED_INLINE_P (current_function_decl))
1529 if (dump_file)
1530 fprintf (dump_file, "Not splitting: not autoinlining and function"
1531 " is not inline.\n");
1532 return 0;
1535 /* Initialize bitmap to track forbidden calls. */
1536 forbidden_dominators = BITMAP_ALLOC (NULL);
1537 calculate_dominance_info (CDI_DOMINATORS);
1539 /* Compute local info about basic blocks and determine function size/time. */
1540 VEC_safe_grow_cleared (bb_info, heap, bb_info_vec, last_basic_block + 1);
1541 memset (&best_split_point, 0, sizeof (best_split_point));
1542 FOR_EACH_BB (bb)
1544 int time = 0;
1545 int size = 0;
1546 int freq = compute_call_stmt_bb_frequency (current_function_decl, bb);
1548 if (dump_file && (dump_flags & TDF_DETAILS))
1549 fprintf (dump_file, "Basic block %i\n", bb->index);
1551 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1553 int this_time, this_size;
1554 gimple stmt = gsi_stmt (bsi);
1556 this_size = estimate_num_insns (stmt, &eni_size_weights);
1557 this_time = estimate_num_insns (stmt, &eni_time_weights) * freq;
1558 size += this_size;
1559 time += this_time;
1560 check_forbidden_calls (stmt);
1562 if (dump_file && (dump_flags & TDF_DETAILS))
1564 fprintf (dump_file, " freq:%6i size:%3i time:%3i ",
1565 freq, this_size, this_time);
1566 print_gimple_stmt (dump_file, stmt, 0, 0);
1569 overall_time += time;
1570 overall_size += size;
1571 VEC_index (bb_info, bb_info_vec, bb->index).time = time;
1572 VEC_index (bb_info, bb_info_vec, bb->index).size = size;
1574 find_split_points (overall_time, overall_size);
1575 if (best_split_point.split_bbs)
1577 split_function (&best_split_point);
1578 BITMAP_FREE (best_split_point.ssa_names_to_pass);
1579 BITMAP_FREE (best_split_point.split_bbs);
1580 todo = TODO_update_ssa | TODO_cleanup_cfg;
1582 BITMAP_FREE (forbidden_dominators);
1583 VEC_free (bb_info, heap, bb_info_vec);
1584 bb_info_vec = NULL;
1585 return todo;
1588 /* Gate function splitting pass. When doing profile feedback, we want
1589 to execute the pass after profiling is read. So disable one in
1590 early optimization. */
1592 static bool
1593 gate_split_functions (void)
1595 return (flag_partial_inlining
1596 && !profile_arc_flag && !flag_branch_probabilities);
1599 struct gimple_opt_pass pass_split_functions =
1602 GIMPLE_PASS,
1603 "fnsplit", /* name */
1604 gate_split_functions, /* gate */
1605 execute_split_functions, /* execute */
1606 NULL, /* sub */
1607 NULL, /* next */
1608 0, /* static_pass_number */
1609 TV_IPA_FNSPLIT, /* tv_id */
1610 PROP_cfg, /* properties_required */
1611 0, /* properties_provided */
1612 0, /* properties_destroyed */
1613 0, /* todo_flags_start */
1614 TODO_verify_all /* todo_flags_finish */
1618 /* Gate feedback driven function splitting pass.
1619 We don't need to split when profiling at all, we are producing
1620 lousy code anyway. */
1622 static bool
1623 gate_feedback_split_functions (void)
1625 return (flag_partial_inlining
1626 && flag_branch_probabilities);
1629 /* Execute function splitting pass. */
1631 static unsigned int
1632 execute_feedback_split_functions (void)
1634 unsigned int retval = execute_split_functions ();
1635 if (retval)
1636 retval |= TODO_rebuild_cgraph_edges;
1637 return retval;
1640 struct gimple_opt_pass pass_feedback_split_functions =
1643 GIMPLE_PASS,
1644 "feedback_fnsplit", /* name */
1645 gate_feedback_split_functions, /* gate */
1646 execute_feedback_split_functions, /* execute */
1647 NULL, /* sub */
1648 NULL, /* next */
1649 0, /* static_pass_number */
1650 TV_IPA_FNSPLIT, /* tv_id */
1651 PROP_cfg, /* properties_required */
1652 0, /* properties_provided */
1653 0, /* properties_destroyed */
1654 0, /* todo_flags_start */
1655 TODO_verify_all /* todo_flags_finish */