Merge in trunk.
[official-gcc.git] / gcc / tree-ssa-phiopt.c
blob28a6ea76e85679a797b4062ba2478db707b7be20
1 /* Optimization of PHI nodes by converting them into straightline code.
2 Copyright (C) 2004-2014 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "hash-table.h"
24 #include "tm.h"
25 #include "tree.h"
26 #include "stor-layout.h"
27 #include "flags.h"
28 #include "tm_p.h"
29 #include "basic-block.h"
30 #include "pointer-set.h"
31 #include "tree-ssa-alias.h"
32 #include "internal-fn.h"
33 #include "gimple-expr.h"
34 #include "is-a.h"
35 #include "gimple.h"
36 #include "gimplify.h"
37 #include "gimple-iterator.h"
38 #include "gimplify-me.h"
39 #include "gimple-ssa.h"
40 #include "tree-cfg.h"
41 #include "tree-phinodes.h"
42 #include "ssa-iterators.h"
43 #include "stringpool.h"
44 #include "tree-ssanames.h"
45 #include "expr.h"
46 #include "tree-dfa.h"
47 #include "tree-pass.h"
48 #include "langhooks.h"
49 #include "domwalk.h"
50 #include "cfgloop.h"
51 #include "tree-data-ref.h"
52 #include "gimple-pretty-print.h"
53 #include "insn-config.h"
54 #include "expr.h"
55 #include "optabs.h"
56 #include "tree-scalar-evolution.h"
58 #ifndef HAVE_conditional_move
59 #define HAVE_conditional_move (0)
60 #endif
62 static unsigned int tree_ssa_phiopt_worker (bool, bool);
63 static bool conditional_replacement (basic_block, basic_block,
64 edge, edge, gimple, tree, tree);
65 static int value_replacement (basic_block, basic_block,
66 edge, edge, gimple, tree, tree);
67 static bool minmax_replacement (basic_block, basic_block,
68 edge, edge, gimple, tree, tree);
69 static bool abs_replacement (basic_block, basic_block,
70 edge, edge, gimple, tree, tree);
71 static bool neg_replacement (basic_block, basic_block,
72 edge, edge, gimple, tree, tree);
73 static bool cond_store_replacement (basic_block, basic_block, edge, edge,
74 struct pointer_set_t *);
75 static bool cond_if_else_store_replacement (basic_block, basic_block, basic_block);
76 static struct pointer_set_t * get_non_trapping (void);
77 static void replace_phi_edge_with_variable (basic_block, edge, gimple, tree);
78 static void hoist_adjacent_loads (basic_block, basic_block,
79 basic_block, basic_block);
80 static bool gate_hoist_loads (void);
82 /* This pass tries to transform conditional stores into unconditional
83 ones, enabling further simplifications with the simpler then and else
84 blocks. In particular it replaces this:
86 bb0:
87 if (cond) goto bb2; else goto bb1;
88 bb1:
89 *p = RHS;
90 bb2:
92 with
94 bb0:
95 if (cond) goto bb1; else goto bb2;
96 bb1:
97 condtmp' = *p;
98 bb2:
99 condtmp = PHI <RHS, condtmp'>
100 *p = condtmp;
102 This transformation can only be done under several constraints,
103 documented below. It also replaces:
105 bb0:
106 if (cond) goto bb2; else goto bb1;
107 bb1:
108 *p = RHS1;
109 goto bb3;
110 bb2:
111 *p = RHS2;
112 bb3:
114 with
116 bb0:
117 if (cond) goto bb3; else goto bb1;
118 bb1:
119 bb3:
120 condtmp = PHI <RHS1, RHS2>
121 *p = condtmp; */
123 static unsigned int
124 tree_ssa_cs_elim (void)
126 unsigned todo;
127 /* ??? We are not interested in loop related info, but the following
128 will create it, ICEing as we didn't init loops with pre-headers.
129 An interfacing issue of find_data_references_in_bb. */
130 loop_optimizer_init (LOOPS_NORMAL);
131 scev_initialize ();
132 todo = tree_ssa_phiopt_worker (true, false);
133 scev_finalize ();
134 loop_optimizer_finalize ();
135 return todo;
138 /* Return the singleton PHI in the SEQ of PHIs for edges E0 and E1. */
140 static gimple
141 single_non_singleton_phi_for_edges (gimple_seq seq, edge e0, edge e1)
143 gimple_stmt_iterator i;
144 gimple phi = NULL;
145 if (gimple_seq_singleton_p (seq))
146 return gsi_stmt (gsi_start (seq));
147 for (i = gsi_start (seq); !gsi_end_p (i); gsi_next (&i))
149 gimple p = gsi_stmt (i);
150 /* If the PHI arguments are equal then we can skip this PHI. */
151 if (operand_equal_for_phi_arg_p (gimple_phi_arg_def (p, e0->dest_idx),
152 gimple_phi_arg_def (p, e1->dest_idx)))
153 continue;
155 /* If we already have a PHI that has the two edge arguments are
156 different, then return it is not a singleton for these PHIs. */
157 if (phi)
158 return NULL;
160 phi = p;
162 return phi;
165 /* The core routine of conditional store replacement and normal
166 phi optimizations. Both share much of the infrastructure in how
167 to match applicable basic block patterns. DO_STORE_ELIM is true
168 when we want to do conditional store replacement, false otherwise.
169 DO_HOIST_LOADS is true when we want to hoist adjacent loads out
170 of diamond control flow patterns, false otherwise. */
171 static unsigned int
172 tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads)
174 basic_block bb;
175 basic_block *bb_order;
176 unsigned n, i;
177 bool cfgchanged = false;
178 struct pointer_set_t *nontrap = 0;
180 if (do_store_elim)
181 /* Calculate the set of non-trapping memory accesses. */
182 nontrap = get_non_trapping ();
184 /* The replacement of conditional negation with a non-branching
185 sequence is really only a win when optimizing for speed and we
186 can avoid transformations by gimple if-conversion that result
187 in poor RTL generation.
189 Ideally either gimple if-conversion or the RTL expanders will
190 be improved and the code to emit branchless conditional negation
191 can be removed. */
192 bool replace_conditional_negation = false;
193 if (!do_store_elim)
194 replace_conditional_negation
195 = ((!optimize_size && optimize >= 2)
196 || (((flag_tree_loop_vectorize || cfun->has_force_vectorize_loops)
197 && flag_tree_loop_if_convert != 0)
198 || flag_tree_loop_if_convert == 1
199 || flag_tree_loop_if_convert_stores == 1));
201 /* Search every basic block for COND_EXPR we may be able to optimize.
203 We walk the blocks in order that guarantees that a block with
204 a single predecessor is processed before the predecessor.
205 This ensures that we collapse inner ifs before visiting the
206 outer ones, and also that we do not try to visit a removed
207 block. */
208 bb_order = single_pred_before_succ_order ();
209 n = n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS;
211 for (i = 0; i < n; i++)
213 gimple cond_stmt, phi;
214 basic_block bb1, bb2;
215 edge e1, e2;
216 tree arg0, arg1;
218 bb = bb_order[i];
220 cond_stmt = last_stmt (bb);
221 /* Check to see if the last statement is a GIMPLE_COND. */
222 if (!cond_stmt
223 || gimple_code (cond_stmt) != GIMPLE_COND)
224 continue;
226 e1 = EDGE_SUCC (bb, 0);
227 bb1 = e1->dest;
228 e2 = EDGE_SUCC (bb, 1);
229 bb2 = e2->dest;
231 /* We cannot do the optimization on abnormal edges. */
232 if ((e1->flags & EDGE_ABNORMAL) != 0
233 || (e2->flags & EDGE_ABNORMAL) != 0)
234 continue;
236 /* If either bb1's succ or bb2 or bb2's succ is non NULL. */
237 if (EDGE_COUNT (bb1->succs) == 0
238 || bb2 == NULL
239 || EDGE_COUNT (bb2->succs) == 0)
240 continue;
242 /* Find the bb which is the fall through to the other. */
243 if (EDGE_SUCC (bb1, 0)->dest == bb2)
245 else if (EDGE_SUCC (bb2, 0)->dest == bb1)
247 basic_block bb_tmp = bb1;
248 edge e_tmp = e1;
249 bb1 = bb2;
250 bb2 = bb_tmp;
251 e1 = e2;
252 e2 = e_tmp;
254 else if (do_store_elim
255 && EDGE_SUCC (bb1, 0)->dest == EDGE_SUCC (bb2, 0)->dest)
257 basic_block bb3 = EDGE_SUCC (bb1, 0)->dest;
259 if (!single_succ_p (bb1)
260 || (EDGE_SUCC (bb1, 0)->flags & EDGE_FALLTHRU) == 0
261 || !single_succ_p (bb2)
262 || (EDGE_SUCC (bb2, 0)->flags & EDGE_FALLTHRU) == 0
263 || EDGE_COUNT (bb3->preds) != 2)
264 continue;
265 if (cond_if_else_store_replacement (bb1, bb2, bb3))
266 cfgchanged = true;
267 continue;
269 else if (do_hoist_loads
270 && EDGE_SUCC (bb1, 0)->dest == EDGE_SUCC (bb2, 0)->dest)
272 basic_block bb3 = EDGE_SUCC (bb1, 0)->dest;
274 if (!FLOAT_TYPE_P (TREE_TYPE (gimple_cond_lhs (cond_stmt)))
275 && single_succ_p (bb1)
276 && single_succ_p (bb2)
277 && single_pred_p (bb1)
278 && single_pred_p (bb2)
279 && EDGE_COUNT (bb->succs) == 2
280 && EDGE_COUNT (bb3->preds) == 2
281 /* If one edge or the other is dominant, a conditional move
282 is likely to perform worse than the well-predicted branch. */
283 && !predictable_edge_p (EDGE_SUCC (bb, 0))
284 && !predictable_edge_p (EDGE_SUCC (bb, 1)))
285 hoist_adjacent_loads (bb, bb1, bb2, bb3);
286 continue;
288 else
289 continue;
291 e1 = EDGE_SUCC (bb1, 0);
293 /* Make sure that bb1 is just a fall through. */
294 if (!single_succ_p (bb1)
295 || (e1->flags & EDGE_FALLTHRU) == 0)
296 continue;
298 /* Also make sure that bb1 only have one predecessor and that it
299 is bb. */
300 if (!single_pred_p (bb1)
301 || single_pred (bb1) != bb)
302 continue;
304 if (do_store_elim)
306 /* bb1 is the middle block, bb2 the join block, bb the split block,
307 e1 the fallthrough edge from bb1 to bb2. We can't do the
308 optimization if the join block has more than two predecessors. */
309 if (EDGE_COUNT (bb2->preds) > 2)
310 continue;
311 if (cond_store_replacement (bb1, bb2, e1, e2, nontrap))
312 cfgchanged = true;
314 else
316 gimple_seq phis = phi_nodes (bb2);
317 gimple_stmt_iterator gsi;
318 bool candorest = true;
320 /* Value replacement can work with more than one PHI
321 so try that first. */
322 for (gsi = gsi_start (phis); !gsi_end_p (gsi); gsi_next (&gsi))
324 phi = gsi_stmt (gsi);
325 arg0 = gimple_phi_arg_def (phi, e1->dest_idx);
326 arg1 = gimple_phi_arg_def (phi, e2->dest_idx);
327 if (value_replacement (bb, bb1, e1, e2, phi, arg0, arg1) == 2)
329 candorest = false;
330 cfgchanged = true;
331 break;
335 if (!candorest)
336 continue;
338 phi = single_non_singleton_phi_for_edges (phis, e1, e2);
339 if (!phi)
340 continue;
342 arg0 = gimple_phi_arg_def (phi, e1->dest_idx);
343 arg1 = gimple_phi_arg_def (phi, e2->dest_idx);
345 /* Something is wrong if we cannot find the arguments in the PHI
346 node. */
347 gcc_assert (arg0 != NULL && arg1 != NULL);
349 /* Do the replacement of conditional if it can be done. */
350 if (conditional_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
351 cfgchanged = true;
352 else if (abs_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
353 cfgchanged = true;
354 else if (replace_conditional_negation
355 && neg_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
356 cfgchanged = true;
357 else if (minmax_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
358 cfgchanged = true;
362 free (bb_order);
364 if (do_store_elim)
365 pointer_set_destroy (nontrap);
366 /* If the CFG has changed, we should cleanup the CFG. */
367 if (cfgchanged && do_store_elim)
369 /* In cond-store replacement we have added some loads on edges
370 and new VOPS (as we moved the store, and created a load). */
371 gsi_commit_edge_inserts ();
372 return TODO_cleanup_cfg | TODO_update_ssa_only_virtuals;
374 else if (cfgchanged)
375 return TODO_cleanup_cfg;
376 return 0;
379 /* Replace PHI node element whose edge is E in block BB with variable NEW.
380 Remove the edge from COND_BLOCK which does not lead to BB (COND_BLOCK
381 is known to have two edges, one of which must reach BB). */
383 static void
384 replace_phi_edge_with_variable (basic_block cond_block,
385 edge e, gimple phi, tree new_tree)
387 basic_block bb = gimple_bb (phi);
388 basic_block block_to_remove;
389 gimple_stmt_iterator gsi;
391 /* Change the PHI argument to new. */
392 SET_USE (PHI_ARG_DEF_PTR (phi, e->dest_idx), new_tree);
394 /* Remove the empty basic block. */
395 if (EDGE_SUCC (cond_block, 0)->dest == bb)
397 EDGE_SUCC (cond_block, 0)->flags |= EDGE_FALLTHRU;
398 EDGE_SUCC (cond_block, 0)->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
399 EDGE_SUCC (cond_block, 0)->probability = REG_BR_PROB_BASE;
400 EDGE_SUCC (cond_block, 0)->count += EDGE_SUCC (cond_block, 1)->count;
402 block_to_remove = EDGE_SUCC (cond_block, 1)->dest;
404 else
406 EDGE_SUCC (cond_block, 1)->flags |= EDGE_FALLTHRU;
407 EDGE_SUCC (cond_block, 1)->flags
408 &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
409 EDGE_SUCC (cond_block, 1)->probability = REG_BR_PROB_BASE;
410 EDGE_SUCC (cond_block, 1)->count += EDGE_SUCC (cond_block, 0)->count;
412 block_to_remove = EDGE_SUCC (cond_block, 0)->dest;
414 delete_basic_block (block_to_remove);
416 /* Eliminate the COND_EXPR at the end of COND_BLOCK. */
417 gsi = gsi_last_bb (cond_block);
418 gsi_remove (&gsi, true);
420 if (dump_file && (dump_flags & TDF_DETAILS))
421 fprintf (dump_file,
422 "COND_EXPR in block %d and PHI in block %d converted to straightline code.\n",
423 cond_block->index,
424 bb->index);
427 /* The function conditional_replacement does the main work of doing the
428 conditional replacement. Return true if the replacement is done.
429 Otherwise return false.
430 BB is the basic block where the replacement is going to be done on. ARG0
431 is argument 0 from PHI. Likewise for ARG1. */
433 static bool
434 conditional_replacement (basic_block cond_bb, basic_block middle_bb,
435 edge e0, edge e1, gimple phi,
436 tree arg0, tree arg1)
438 tree result;
439 gimple stmt, new_stmt;
440 tree cond;
441 gimple_stmt_iterator gsi;
442 edge true_edge, false_edge;
443 tree new_var, new_var2;
444 bool neg;
446 /* FIXME: Gimplification of complex type is too hard for now. */
447 /* We aren't prepared to handle vectors either (and it is a question
448 if it would be worthwhile anyway). */
449 if (!(INTEGRAL_TYPE_P (TREE_TYPE (arg0))
450 || POINTER_TYPE_P (TREE_TYPE (arg0)))
451 || !(INTEGRAL_TYPE_P (TREE_TYPE (arg1))
452 || POINTER_TYPE_P (TREE_TYPE (arg1))))
453 return false;
455 /* The PHI arguments have the constants 0 and 1, or 0 and -1, then
456 convert it to the conditional. */
457 if ((integer_zerop (arg0) && integer_onep (arg1))
458 || (integer_zerop (arg1) && integer_onep (arg0)))
459 neg = false;
460 else if ((integer_zerop (arg0) && integer_all_onesp (arg1))
461 || (integer_zerop (arg1) && integer_all_onesp (arg0)))
462 neg = true;
463 else
464 return false;
466 if (!empty_block_p (middle_bb))
467 return false;
469 /* At this point we know we have a GIMPLE_COND with two successors.
470 One successor is BB, the other successor is an empty block which
471 falls through into BB.
473 There is a single PHI node at the join point (BB) and its arguments
474 are constants (0, 1) or (0, -1).
476 So, given the condition COND, and the two PHI arguments, we can
477 rewrite this PHI into non-branching code:
479 dest = (COND) or dest = COND'
481 We use the condition as-is if the argument associated with the
482 true edge has the value one or the argument associated with the
483 false edge as the value zero. Note that those conditions are not
484 the same since only one of the outgoing edges from the GIMPLE_COND
485 will directly reach BB and thus be associated with an argument. */
487 stmt = last_stmt (cond_bb);
488 result = PHI_RESULT (phi);
490 /* To handle special cases like floating point comparison, it is easier and
491 less error-prone to build a tree and gimplify it on the fly though it is
492 less efficient. */
493 cond = fold_build2_loc (gimple_location (stmt),
494 gimple_cond_code (stmt), boolean_type_node,
495 gimple_cond_lhs (stmt), gimple_cond_rhs (stmt));
497 /* We need to know which is the true edge and which is the false
498 edge so that we know when to invert the condition below. */
499 extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge);
500 if ((e0 == true_edge && integer_zerop (arg0))
501 || (e0 == false_edge && !integer_zerop (arg0))
502 || (e1 == true_edge && integer_zerop (arg1))
503 || (e1 == false_edge && !integer_zerop (arg1)))
504 cond = fold_build1_loc (gimple_location (stmt),
505 TRUTH_NOT_EXPR, TREE_TYPE (cond), cond);
507 if (neg)
509 cond = fold_convert_loc (gimple_location (stmt),
510 TREE_TYPE (result), cond);
511 cond = fold_build1_loc (gimple_location (stmt),
512 NEGATE_EXPR, TREE_TYPE (cond), cond);
515 /* Insert our new statements at the end of conditional block before the
516 COND_STMT. */
517 gsi = gsi_for_stmt (stmt);
518 new_var = force_gimple_operand_gsi (&gsi, cond, true, NULL, true,
519 GSI_SAME_STMT);
521 if (!useless_type_conversion_p (TREE_TYPE (result), TREE_TYPE (new_var)))
523 source_location locus_0, locus_1;
525 new_var2 = make_ssa_name (TREE_TYPE (result), NULL);
526 new_stmt = gimple_build_assign_with_ops (CONVERT_EXPR, new_var2,
527 new_var, NULL);
528 gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
529 new_var = new_var2;
531 /* Set the locus to the first argument, unless is doesn't have one. */
532 locus_0 = gimple_phi_arg_location (phi, 0);
533 locus_1 = gimple_phi_arg_location (phi, 1);
534 if (locus_0 == UNKNOWN_LOCATION)
535 locus_0 = locus_1;
536 gimple_set_location (new_stmt, locus_0);
539 replace_phi_edge_with_variable (cond_bb, e1, phi, new_var);
541 /* Note that we optimized this PHI. */
542 return true;
545 /* Update *ARG which is defined in STMT so that it contains the
546 computed value if that seems profitable. Return true if the
547 statement is made dead by that rewriting. */
549 static bool
550 jump_function_from_stmt (tree *arg, gimple stmt)
552 enum tree_code code = gimple_assign_rhs_code (stmt);
553 if (code == ADDR_EXPR)
555 /* For arg = &p->i transform it to p, if possible. */
556 tree rhs1 = gimple_assign_rhs1 (stmt);
557 HOST_WIDE_INT offset;
558 tree tem = get_addr_base_and_unit_offset (TREE_OPERAND (rhs1, 0),
559 &offset);
560 if (tem
561 && TREE_CODE (tem) == MEM_REF
562 && (mem_ref_offset (tem) + offset) == 0)
564 *arg = TREE_OPERAND (tem, 0);
565 return true;
568 /* TODO: Much like IPA-CP jump-functions we want to handle constant
569 additions symbolically here, and we'd need to update the comparison
570 code that compares the arg + cst tuples in our caller. For now the
571 code above exactly handles the VEC_BASE pattern from vec.h. */
572 return false;
575 /* RHS is a source argument in a BIT_AND_EXPR which feeds a conditional
576 of the form SSA_NAME NE 0.
578 If RHS is fed by a simple EQ_EXPR comparison of two values, see if
579 the two input values of the EQ_EXPR match arg0 and arg1.
581 If so update *code and return TRUE. Otherwise return FALSE. */
583 static bool
584 rhs_is_fed_for_value_replacement (const_tree arg0, const_tree arg1,
585 enum tree_code *code, const_tree rhs)
587 /* Obviously if RHS is not an SSA_NAME, we can't look at the defining
588 statement. */
589 if (TREE_CODE (rhs) == SSA_NAME)
591 gimple def1 = SSA_NAME_DEF_STMT (rhs);
593 /* Verify the defining statement has an EQ_EXPR on the RHS. */
594 if (is_gimple_assign (def1) && gimple_assign_rhs_code (def1) == EQ_EXPR)
596 /* Finally verify the source operands of the EQ_EXPR are equal
597 to arg0 and arg1. */
598 tree op0 = gimple_assign_rhs1 (def1);
599 tree op1 = gimple_assign_rhs2 (def1);
600 if ((operand_equal_for_phi_arg_p (arg0, op0)
601 && operand_equal_for_phi_arg_p (arg1, op1))
602 || (operand_equal_for_phi_arg_p (arg0, op1)
603 && operand_equal_for_phi_arg_p (arg1, op0)))
605 /* We will perform the optimization. */
606 *code = gimple_assign_rhs_code (def1);
607 return true;
611 return false;
614 /* Return TRUE if arg0/arg1 are equal to the rhs/lhs or lhs/rhs of COND.
616 Also return TRUE if arg0/arg1 are equal to the source arguments of a
617 an EQ comparison feeding a BIT_AND_EXPR which feeds COND.
619 Return FALSE otherwise. */
621 static bool
622 operand_equal_for_value_replacement (const_tree arg0, const_tree arg1,
623 enum tree_code *code, gimple cond)
625 gimple def;
626 tree lhs = gimple_cond_lhs (cond);
627 tree rhs = gimple_cond_rhs (cond);
629 if ((operand_equal_for_phi_arg_p (arg0, lhs)
630 && operand_equal_for_phi_arg_p (arg1, rhs))
631 || (operand_equal_for_phi_arg_p (arg1, lhs)
632 && operand_equal_for_phi_arg_p (arg0, rhs)))
633 return true;
635 /* Now handle more complex case where we have an EQ comparison
636 which feeds a BIT_AND_EXPR which feeds COND.
638 First verify that COND is of the form SSA_NAME NE 0. */
639 if (*code != NE_EXPR || !integer_zerop (rhs)
640 || TREE_CODE (lhs) != SSA_NAME)
641 return false;
643 /* Now ensure that SSA_NAME is set by a BIT_AND_EXPR. */
644 def = SSA_NAME_DEF_STMT (lhs);
645 if (!is_gimple_assign (def) || gimple_assign_rhs_code (def) != BIT_AND_EXPR)
646 return false;
648 /* Now verify arg0/arg1 correspond to the source arguments of an
649 EQ comparison feeding the BIT_AND_EXPR. */
651 tree tmp = gimple_assign_rhs1 (def);
652 if (rhs_is_fed_for_value_replacement (arg0, arg1, code, tmp))
653 return true;
655 tmp = gimple_assign_rhs2 (def);
656 if (rhs_is_fed_for_value_replacement (arg0, arg1, code, tmp))
657 return true;
659 return false;
662 /* The function value_replacement does the main work of doing the value
663 replacement. Return non-zero if the replacement is done. Otherwise return
664 0. If we remove the middle basic block, return 2.
665 BB is the basic block where the replacement is going to be done on. ARG0
666 is argument 0 from the PHI. Likewise for ARG1. */
668 static int
669 value_replacement (basic_block cond_bb, basic_block middle_bb,
670 edge e0, edge e1, gimple phi,
671 tree arg0, tree arg1)
673 gimple_stmt_iterator gsi;
674 gimple cond;
675 edge true_edge, false_edge;
676 enum tree_code code;
677 bool emtpy_or_with_defined_p = true;
679 /* If the type says honor signed zeros we cannot do this
680 optimization. */
681 if (HONOR_SIGNED_ZEROS (TYPE_MODE (TREE_TYPE (arg1))))
682 return 0;
684 /* If there is a statement in MIDDLE_BB that defines one of the PHI
685 arguments, then adjust arg0 or arg1. */
686 gsi = gsi_after_labels (middle_bb);
687 if (!gsi_end_p (gsi) && is_gimple_debug (gsi_stmt (gsi)))
688 gsi_next_nondebug (&gsi);
689 while (!gsi_end_p (gsi))
691 gimple stmt = gsi_stmt (gsi);
692 tree lhs;
693 gsi_next_nondebug (&gsi);
694 if (!is_gimple_assign (stmt))
696 emtpy_or_with_defined_p = false;
697 continue;
699 /* Now try to adjust arg0 or arg1 according to the computation
700 in the statement. */
701 lhs = gimple_assign_lhs (stmt);
702 if (!(lhs == arg0
703 && jump_function_from_stmt (&arg0, stmt))
704 || (lhs == arg1
705 && jump_function_from_stmt (&arg1, stmt)))
706 emtpy_or_with_defined_p = false;
709 cond = last_stmt (cond_bb);
710 code = gimple_cond_code (cond);
712 /* This transformation is only valid for equality comparisons. */
713 if (code != NE_EXPR && code != EQ_EXPR)
714 return 0;
716 /* We need to know which is the true edge and which is the false
717 edge so that we know if have abs or negative abs. */
718 extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge);
720 /* At this point we know we have a COND_EXPR with two successors.
721 One successor is BB, the other successor is an empty block which
722 falls through into BB.
724 The condition for the COND_EXPR is known to be NE_EXPR or EQ_EXPR.
726 There is a single PHI node at the join point (BB) with two arguments.
728 We now need to verify that the two arguments in the PHI node match
729 the two arguments to the equality comparison. */
731 if (operand_equal_for_value_replacement (arg0, arg1, &code, cond))
733 edge e;
734 tree arg;
736 /* For NE_EXPR, we want to build an assignment result = arg where
737 arg is the PHI argument associated with the true edge. For
738 EQ_EXPR we want the PHI argument associated with the false edge. */
739 e = (code == NE_EXPR ? true_edge : false_edge);
741 /* Unfortunately, E may not reach BB (it may instead have gone to
742 OTHER_BLOCK). If that is the case, then we want the single outgoing
743 edge from OTHER_BLOCK which reaches BB and represents the desired
744 path from COND_BLOCK. */
745 if (e->dest == middle_bb)
746 e = single_succ_edge (e->dest);
748 /* Now we know the incoming edge to BB that has the argument for the
749 RHS of our new assignment statement. */
750 if (e0 == e)
751 arg = arg0;
752 else
753 arg = arg1;
755 /* If the middle basic block was empty or is defining the
756 PHI arguments and this is a single phi where the args are different
757 for the edges e0 and e1 then we can remove the middle basic block. */
758 if (emtpy_or_with_defined_p
759 && single_non_singleton_phi_for_edges (phi_nodes (gimple_bb (phi)),
760 e0, e1))
762 replace_phi_edge_with_variable (cond_bb, e1, phi, arg);
763 /* Note that we optimized this PHI. */
764 return 2;
766 else
768 /* Replace the PHI arguments with arg. */
769 SET_PHI_ARG_DEF (phi, e0->dest_idx, arg);
770 SET_PHI_ARG_DEF (phi, e1->dest_idx, arg);
771 if (dump_file && (dump_flags & TDF_DETAILS))
773 fprintf (dump_file, "PHI ");
774 print_generic_expr (dump_file, gimple_phi_result (phi), 0);
775 fprintf (dump_file, " reduced for COND_EXPR in block %d to ",
776 cond_bb->index);
777 print_generic_expr (dump_file, arg, 0);
778 fprintf (dump_file, ".\n");
780 return 1;
784 return 0;
787 /* The function minmax_replacement does the main work of doing the minmax
788 replacement. Return true if the replacement is done. Otherwise return
789 false.
790 BB is the basic block where the replacement is going to be done on. ARG0
791 is argument 0 from the PHI. Likewise for ARG1. */
793 static bool
794 minmax_replacement (basic_block cond_bb, basic_block middle_bb,
795 edge e0, edge e1, gimple phi,
796 tree arg0, tree arg1)
798 tree result, type;
799 gimple cond, new_stmt;
800 edge true_edge, false_edge;
801 enum tree_code cmp, minmax, ass_code;
802 tree smaller, larger, arg_true, arg_false;
803 gimple_stmt_iterator gsi, gsi_from;
805 type = TREE_TYPE (PHI_RESULT (phi));
807 /* The optimization may be unsafe due to NaNs. */
808 if (HONOR_NANS (TYPE_MODE (type)))
809 return false;
811 cond = last_stmt (cond_bb);
812 cmp = gimple_cond_code (cond);
814 /* This transformation is only valid for order comparisons. Record which
815 operand is smaller/larger if the result of the comparison is true. */
816 if (cmp == LT_EXPR || cmp == LE_EXPR)
818 smaller = gimple_cond_lhs (cond);
819 larger = gimple_cond_rhs (cond);
821 else if (cmp == GT_EXPR || cmp == GE_EXPR)
823 smaller = gimple_cond_rhs (cond);
824 larger = gimple_cond_lhs (cond);
826 else
827 return false;
829 /* We need to know which is the true edge and which is the false
830 edge so that we know if have abs or negative abs. */
831 extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge);
833 /* Forward the edges over the middle basic block. */
834 if (true_edge->dest == middle_bb)
835 true_edge = EDGE_SUCC (true_edge->dest, 0);
836 if (false_edge->dest == middle_bb)
837 false_edge = EDGE_SUCC (false_edge->dest, 0);
839 if (true_edge == e0)
841 gcc_assert (false_edge == e1);
842 arg_true = arg0;
843 arg_false = arg1;
845 else
847 gcc_assert (false_edge == e0);
848 gcc_assert (true_edge == e1);
849 arg_true = arg1;
850 arg_false = arg0;
853 if (empty_block_p (middle_bb))
855 if (operand_equal_for_phi_arg_p (arg_true, smaller)
856 && operand_equal_for_phi_arg_p (arg_false, larger))
858 /* Case
860 if (smaller < larger)
861 rslt = smaller;
862 else
863 rslt = larger; */
864 minmax = MIN_EXPR;
866 else if (operand_equal_for_phi_arg_p (arg_false, smaller)
867 && operand_equal_for_phi_arg_p (arg_true, larger))
868 minmax = MAX_EXPR;
869 else
870 return false;
872 else
874 /* Recognize the following case, assuming d <= u:
876 if (a <= u)
877 b = MAX (a, d);
878 x = PHI <b, u>
880 This is equivalent to
882 b = MAX (a, d);
883 x = MIN (b, u); */
885 gimple assign = last_and_only_stmt (middle_bb);
886 tree lhs, op0, op1, bound;
888 if (!assign
889 || gimple_code (assign) != GIMPLE_ASSIGN)
890 return false;
892 lhs = gimple_assign_lhs (assign);
893 ass_code = gimple_assign_rhs_code (assign);
894 if (ass_code != MAX_EXPR && ass_code != MIN_EXPR)
895 return false;
896 op0 = gimple_assign_rhs1 (assign);
897 op1 = gimple_assign_rhs2 (assign);
899 if (true_edge->src == middle_bb)
901 /* We got here if the condition is true, i.e., SMALLER < LARGER. */
902 if (!operand_equal_for_phi_arg_p (lhs, arg_true))
903 return false;
905 if (operand_equal_for_phi_arg_p (arg_false, larger))
907 /* Case
909 if (smaller < larger)
911 r' = MAX_EXPR (smaller, bound)
913 r = PHI <r', larger> --> to be turned to MIN_EXPR. */
914 if (ass_code != MAX_EXPR)
915 return false;
917 minmax = MIN_EXPR;
918 if (operand_equal_for_phi_arg_p (op0, smaller))
919 bound = op1;
920 else if (operand_equal_for_phi_arg_p (op1, smaller))
921 bound = op0;
922 else
923 return false;
925 /* We need BOUND <= LARGER. */
926 if (!integer_nonzerop (fold_build2 (LE_EXPR, boolean_type_node,
927 bound, larger)))
928 return false;
930 else if (operand_equal_for_phi_arg_p (arg_false, smaller))
932 /* Case
934 if (smaller < larger)
936 r' = MIN_EXPR (larger, bound)
938 r = PHI <r', smaller> --> to be turned to MAX_EXPR. */
939 if (ass_code != MIN_EXPR)
940 return false;
942 minmax = MAX_EXPR;
943 if (operand_equal_for_phi_arg_p (op0, larger))
944 bound = op1;
945 else if (operand_equal_for_phi_arg_p (op1, larger))
946 bound = op0;
947 else
948 return false;
950 /* We need BOUND >= SMALLER. */
951 if (!integer_nonzerop (fold_build2 (GE_EXPR, boolean_type_node,
952 bound, smaller)))
953 return false;
955 else
956 return false;
958 else
960 /* We got here if the condition is false, i.e., SMALLER > LARGER. */
961 if (!operand_equal_for_phi_arg_p (lhs, arg_false))
962 return false;
964 if (operand_equal_for_phi_arg_p (arg_true, larger))
966 /* Case
968 if (smaller > larger)
970 r' = MIN_EXPR (smaller, bound)
972 r = PHI <r', larger> --> to be turned to MAX_EXPR. */
973 if (ass_code != MIN_EXPR)
974 return false;
976 minmax = MAX_EXPR;
977 if (operand_equal_for_phi_arg_p (op0, smaller))
978 bound = op1;
979 else if (operand_equal_for_phi_arg_p (op1, smaller))
980 bound = op0;
981 else
982 return false;
984 /* We need BOUND >= LARGER. */
985 if (!integer_nonzerop (fold_build2 (GE_EXPR, boolean_type_node,
986 bound, larger)))
987 return false;
989 else if (operand_equal_for_phi_arg_p (arg_true, smaller))
991 /* Case
993 if (smaller > larger)
995 r' = MAX_EXPR (larger, bound)
997 r = PHI <r', smaller> --> to be turned to MIN_EXPR. */
998 if (ass_code != MAX_EXPR)
999 return false;
1001 minmax = MIN_EXPR;
1002 if (operand_equal_for_phi_arg_p (op0, larger))
1003 bound = op1;
1004 else if (operand_equal_for_phi_arg_p (op1, larger))
1005 bound = op0;
1006 else
1007 return false;
1009 /* We need BOUND <= SMALLER. */
1010 if (!integer_nonzerop (fold_build2 (LE_EXPR, boolean_type_node,
1011 bound, smaller)))
1012 return false;
1014 else
1015 return false;
1018 /* Move the statement from the middle block. */
1019 gsi = gsi_last_bb (cond_bb);
1020 gsi_from = gsi_last_nondebug_bb (middle_bb);
1021 gsi_move_before (&gsi_from, &gsi);
1024 /* Emit the statement to compute min/max. */
1025 result = duplicate_ssa_name (PHI_RESULT (phi), NULL);
1026 new_stmt = gimple_build_assign_with_ops (minmax, result, arg0, arg1);
1027 gsi = gsi_last_bb (cond_bb);
1028 gsi_insert_before (&gsi, new_stmt, GSI_NEW_STMT);
1030 replace_phi_edge_with_variable (cond_bb, e1, phi, result);
1031 return true;
1034 /* The function absolute_replacement does the main work of doing the absolute
1035 replacement. Return true if the replacement is done. Otherwise return
1036 false.
1037 bb is the basic block where the replacement is going to be done on. arg0
1038 is argument 0 from the phi. Likewise for arg1. */
1040 static bool
1041 abs_replacement (basic_block cond_bb, basic_block middle_bb,
1042 edge e0 ATTRIBUTE_UNUSED, edge e1,
1043 gimple phi, tree arg0, tree arg1)
1045 tree result;
1046 gimple new_stmt, cond;
1047 gimple_stmt_iterator gsi;
1048 edge true_edge, false_edge;
1049 gimple assign;
1050 edge e;
1051 tree rhs, lhs;
1052 bool negate;
1053 enum tree_code cond_code;
1055 /* If the type says honor signed zeros we cannot do this
1056 optimization. */
1057 if (HONOR_SIGNED_ZEROS (TYPE_MODE (TREE_TYPE (arg1))))
1058 return false;
1060 /* OTHER_BLOCK must have only one executable statement which must have the
1061 form arg0 = -arg1 or arg1 = -arg0. */
1063 assign = last_and_only_stmt (middle_bb);
1064 /* If we did not find the proper negation assignment, then we can not
1065 optimize. */
1066 if (assign == NULL)
1067 return false;
1069 /* If we got here, then we have found the only executable statement
1070 in OTHER_BLOCK. If it is anything other than arg = -arg1 or
1071 arg1 = -arg0, then we can not optimize. */
1072 if (gimple_code (assign) != GIMPLE_ASSIGN)
1073 return false;
1075 lhs = gimple_assign_lhs (assign);
1077 if (gimple_assign_rhs_code (assign) != NEGATE_EXPR)
1078 return false;
1080 rhs = gimple_assign_rhs1 (assign);
1082 /* The assignment has to be arg0 = -arg1 or arg1 = -arg0. */
1083 if (!(lhs == arg0 && rhs == arg1)
1084 && !(lhs == arg1 && rhs == arg0))
1085 return false;
1087 cond = last_stmt (cond_bb);
1088 result = PHI_RESULT (phi);
1090 /* Only relationals comparing arg[01] against zero are interesting. */
1091 cond_code = gimple_cond_code (cond);
1092 if (cond_code != GT_EXPR && cond_code != GE_EXPR
1093 && cond_code != LT_EXPR && cond_code != LE_EXPR)
1094 return false;
1096 /* Make sure the conditional is arg[01] OP y. */
1097 if (gimple_cond_lhs (cond) != rhs)
1098 return false;
1100 if (FLOAT_TYPE_P (TREE_TYPE (gimple_cond_rhs (cond)))
1101 ? real_zerop (gimple_cond_rhs (cond))
1102 : integer_zerop (gimple_cond_rhs (cond)))
1104 else
1105 return false;
1107 /* We need to know which is the true edge and which is the false
1108 edge so that we know if have abs or negative abs. */
1109 extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge);
1111 /* For GT_EXPR/GE_EXPR, if the true edge goes to OTHER_BLOCK, then we
1112 will need to negate the result. Similarly for LT_EXPR/LE_EXPR if
1113 the false edge goes to OTHER_BLOCK. */
1114 if (cond_code == GT_EXPR || cond_code == GE_EXPR)
1115 e = true_edge;
1116 else
1117 e = false_edge;
1119 if (e->dest == middle_bb)
1120 negate = true;
1121 else
1122 negate = false;
1124 result = duplicate_ssa_name (result, NULL);
1126 if (negate)
1127 lhs = make_ssa_name (TREE_TYPE (result), NULL);
1128 else
1129 lhs = result;
1131 /* Build the modify expression with abs expression. */
1132 new_stmt = gimple_build_assign_with_ops (ABS_EXPR, lhs, rhs, NULL);
1134 gsi = gsi_last_bb (cond_bb);
1135 gsi_insert_before (&gsi, new_stmt, GSI_NEW_STMT);
1137 if (negate)
1139 /* Get the right GSI. We want to insert after the recently
1140 added ABS_EXPR statement (which we know is the first statement
1141 in the block. */
1142 new_stmt = gimple_build_assign_with_ops (NEGATE_EXPR, result, lhs, NULL);
1144 gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT);
1147 replace_phi_edge_with_variable (cond_bb, e1, phi, result);
1149 /* Note that we optimized this PHI. */
1150 return true;
1153 /* The function neg_replacement replaces conditional negation with
1154 equivalent straight line code. Returns TRUE if replacement is done,
1155 otherwise returns FALSE.
1157 COND_BB branches around negation occuring in MIDDLE_BB.
1159 E0 and E1 are edges out of COND_BB. E0 reaches MIDDLE_BB and
1160 E1 reaches the other successor which should contain PHI with
1161 arguments ARG0 and ARG1.
1163 Assuming negation is to occur when the condition is true,
1164 then the non-branching sequence is:
1166 result = (rhs ^ -cond) + cond
1168 Inverting the condition or its result gives us negation
1169 when the original condition is false. */
1171 static bool
1172 neg_replacement (basic_block cond_bb, basic_block middle_bb,
1173 edge e0 ATTRIBUTE_UNUSED, edge e1,
1174 gimple phi, tree arg0, tree arg1)
1176 gimple new_stmt, cond;
1177 gimple_stmt_iterator gsi;
1178 gimple assign;
1179 edge true_edge, false_edge;
1180 tree rhs, lhs;
1181 enum tree_code cond_code;
1182 bool invert = false;
1184 /* This transformation performs logical operations on the
1185 incoming arguments. So force them to be integral types. */
1186 if (!INTEGRAL_TYPE_P (TREE_TYPE (arg0)))
1187 return false;
1189 /* OTHER_BLOCK must have only one executable statement which must have the
1190 form arg0 = -arg1 or arg1 = -arg0. */
1192 assign = last_and_only_stmt (middle_bb);
1193 /* If we did not find the proper negation assignment, then we can not
1194 optimize. */
1195 if (assign == NULL)
1196 return false;
1198 /* If we got here, then we have found the only executable statement
1199 in OTHER_BLOCK. If it is anything other than arg0 = -arg1 or
1200 arg1 = -arg0, then we can not optimize. */
1201 if (gimple_code (assign) != GIMPLE_ASSIGN)
1202 return false;
1204 lhs = gimple_assign_lhs (assign);
1206 if (gimple_assign_rhs_code (assign) != NEGATE_EXPR)
1207 return false;
1209 rhs = gimple_assign_rhs1 (assign);
1211 /* The assignment has to be arg0 = -arg1 or arg1 = -arg0. */
1212 if (!(lhs == arg0 && rhs == arg1)
1213 && !(lhs == arg1 && rhs == arg0))
1214 return false;
1216 /* The basic sequence assumes we negate when the condition is true.
1217 If we need the opposite, then we will either need to invert the
1218 condition or its result. */
1219 extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge);
1220 invert = false_edge->dest == middle_bb;
1222 /* Unlike abs_replacement, we can handle arbitrary conditionals here. */
1223 cond = last_stmt (cond_bb);
1224 cond_code = gimple_cond_code (cond);
1226 /* If inversion is needed, first try to invert the test since
1227 that's cheapest. */
1228 if (invert)
1230 bool honor_nans
1231 = HONOR_NANS (TYPE_MODE (TREE_TYPE (gimple_cond_lhs (cond))));
1232 enum tree_code new_code = invert_tree_comparison (cond_code, honor_nans);
1234 /* If invert_tree_comparison was successful, then use its return
1235 value as the new code and note that inversion is no longer
1236 needed. */
1237 if (new_code != ERROR_MARK)
1239 cond_code = new_code;
1240 invert = false;
1244 tree cond_val = make_ssa_name (boolean_type_node, NULL);
1245 new_stmt = gimple_build_assign_with_ops (cond_code, cond_val,
1246 gimple_cond_lhs (cond),
1247 gimple_cond_rhs (cond));
1248 gsi = gsi_last_bb (cond_bb);
1249 gsi_insert_before (&gsi, new_stmt, GSI_NEW_STMT);
1251 /* If we still need inversion, then invert the result of the
1252 condition. */
1253 if (invert)
1255 tree tmp = make_ssa_name (boolean_type_node, NULL);
1256 new_stmt = gimple_build_assign_with_ops (BIT_XOR_EXPR, tmp,
1257 cond_val, boolean_true_node);
1258 gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT);
1259 cond_val = tmp;
1262 /* Get the condition in the right type so that we can perform
1263 logical and arithmetic operations on it. */
1264 tree cond_val_converted = make_ssa_name (TREE_TYPE (rhs), NULL);
1265 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, cond_val_converted,
1266 cond_val, NULL_TREE);
1267 gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT);
1269 tree neg_cond_val_converted = make_ssa_name (TREE_TYPE (rhs), NULL);
1270 new_stmt = gimple_build_assign_with_ops (NEGATE_EXPR, neg_cond_val_converted,
1271 cond_val_converted, NULL_TREE);
1272 gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT);
1274 tree tmp = make_ssa_name (TREE_TYPE (rhs), NULL);
1275 new_stmt = gimple_build_assign_with_ops (BIT_XOR_EXPR, tmp,
1276 rhs, neg_cond_val_converted);
1277 gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT);
1279 tree new_lhs = make_ssa_name (TREE_TYPE (rhs), NULL);
1280 new_stmt = gimple_build_assign_with_ops (PLUS_EXPR, new_lhs,
1281 tmp, cond_val_converted);
1282 gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT);
1284 replace_phi_edge_with_variable (cond_bb, e1, phi, new_lhs);
1286 /* Note that we optimized this PHI. */
1287 return true;
1290 /* Auxiliary functions to determine the set of memory accesses which
1291 can't trap because they are preceded by accesses to the same memory
1292 portion. We do that for MEM_REFs, so we only need to track
1293 the SSA_NAME of the pointer indirectly referenced. The algorithm
1294 simply is a walk over all instructions in dominator order. When
1295 we see an MEM_REF we determine if we've already seen a same
1296 ref anywhere up to the root of the dominator tree. If we do the
1297 current access can't trap. If we don't see any dominating access
1298 the current access might trap, but might also make later accesses
1299 non-trapping, so we remember it. We need to be careful with loads
1300 or stores, for instance a load might not trap, while a store would,
1301 so if we see a dominating read access this doesn't mean that a later
1302 write access would not trap. Hence we also need to differentiate the
1303 type of access(es) seen.
1305 ??? We currently are very conservative and assume that a load might
1306 trap even if a store doesn't (write-only memory). This probably is
1307 overly conservative. */
1309 /* A hash-table of SSA_NAMEs, and in which basic block an MEM_REF
1310 through it was seen, which would constitute a no-trap region for
1311 same accesses. */
1312 struct name_to_bb
1314 unsigned int ssa_name_ver;
1315 unsigned int phase;
1316 bool store;
1317 HOST_WIDE_INT offset, size;
1318 basic_block bb;
1321 /* Hashtable helpers. */
1323 struct ssa_names_hasher : typed_free_remove <name_to_bb>
1325 typedef name_to_bb value_type;
1326 typedef name_to_bb compare_type;
1327 static inline hashval_t hash (const value_type *);
1328 static inline bool equal (const value_type *, const compare_type *);
1331 /* Used for quick clearing of the hash-table when we see calls.
1332 Hash entries with phase < nt_call_phase are invalid. */
1333 static unsigned int nt_call_phase;
1335 /* The hash function. */
1337 inline hashval_t
1338 ssa_names_hasher::hash (const value_type *n)
1340 return n->ssa_name_ver ^ (((hashval_t) n->store) << 31)
1341 ^ (n->offset << 6) ^ (n->size << 3);
1344 /* The equality function of *P1 and *P2. */
1346 inline bool
1347 ssa_names_hasher::equal (const value_type *n1, const compare_type *n2)
1349 return n1->ssa_name_ver == n2->ssa_name_ver
1350 && n1->store == n2->store
1351 && n1->offset == n2->offset
1352 && n1->size == n2->size;
1355 /* The hash table for remembering what we've seen. */
1356 static hash_table <ssa_names_hasher> seen_ssa_names;
1358 /* We see the expression EXP in basic block BB. If it's an interesting
1359 expression (an MEM_REF through an SSA_NAME) possibly insert the
1360 expression into the set NONTRAP or the hash table of seen expressions.
1361 STORE is true if this expression is on the LHS, otherwise it's on
1362 the RHS. */
1363 static void
1364 add_or_mark_expr (basic_block bb, tree exp,
1365 struct pointer_set_t *nontrap, bool store)
1367 HOST_WIDE_INT size;
1369 if (TREE_CODE (exp) == MEM_REF
1370 && TREE_CODE (TREE_OPERAND (exp, 0)) == SSA_NAME
1371 && tree_fits_shwi_p (TREE_OPERAND (exp, 1))
1372 && (size = int_size_in_bytes (TREE_TYPE (exp))) > 0)
1374 tree name = TREE_OPERAND (exp, 0);
1375 struct name_to_bb map;
1376 name_to_bb **slot;
1377 struct name_to_bb *n2bb;
1378 basic_block found_bb = 0;
1380 /* Try to find the last seen MEM_REF through the same
1381 SSA_NAME, which can trap. */
1382 map.ssa_name_ver = SSA_NAME_VERSION (name);
1383 map.phase = 0;
1384 map.bb = 0;
1385 map.store = store;
1386 map.offset = tree_to_shwi (TREE_OPERAND (exp, 1));
1387 map.size = size;
1389 slot = seen_ssa_names.find_slot (&map, INSERT);
1390 n2bb = *slot;
1391 if (n2bb && n2bb->phase >= nt_call_phase)
1392 found_bb = n2bb->bb;
1394 /* If we've found a trapping MEM_REF, _and_ it dominates EXP
1395 (it's in a basic block on the path from us to the dominator root)
1396 then we can't trap. */
1397 if (found_bb && (((size_t)found_bb->aux) & 1) == 1)
1399 pointer_set_insert (nontrap, exp);
1401 else
1403 /* EXP might trap, so insert it into the hash table. */
1404 if (n2bb)
1406 n2bb->phase = nt_call_phase;
1407 n2bb->bb = bb;
1409 else
1411 n2bb = XNEW (struct name_to_bb);
1412 n2bb->ssa_name_ver = SSA_NAME_VERSION (name);
1413 n2bb->phase = nt_call_phase;
1414 n2bb->bb = bb;
1415 n2bb->store = store;
1416 n2bb->offset = map.offset;
1417 n2bb->size = size;
1418 *slot = n2bb;
1424 class nontrapping_dom_walker : public dom_walker
1426 public:
1427 nontrapping_dom_walker (cdi_direction direction, pointer_set_t *ps)
1428 : dom_walker (direction), m_nontrapping (ps) {}
1430 virtual void before_dom_children (basic_block);
1431 virtual void after_dom_children (basic_block);
1433 private:
1434 pointer_set_t *m_nontrapping;
1437 /* Called by walk_dominator_tree, when entering the block BB. */
1438 void
1439 nontrapping_dom_walker::before_dom_children (basic_block bb)
1441 edge e;
1442 edge_iterator ei;
1443 gimple_stmt_iterator gsi;
1445 /* If we haven't seen all our predecessors, clear the hash-table. */
1446 FOR_EACH_EDGE (e, ei, bb->preds)
1447 if ((((size_t)e->src->aux) & 2) == 0)
1449 nt_call_phase++;
1450 break;
1453 /* Mark this BB as being on the path to dominator root and as visited. */
1454 bb->aux = (void*)(1 | 2);
1456 /* And walk the statements in order. */
1457 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1459 gimple stmt = gsi_stmt (gsi);
1461 if (is_gimple_call (stmt) && !nonfreeing_call_p (stmt))
1462 nt_call_phase++;
1463 else if (gimple_assign_single_p (stmt) && !gimple_has_volatile_ops (stmt))
1465 add_or_mark_expr (bb, gimple_assign_lhs (stmt), m_nontrapping, true);
1466 add_or_mark_expr (bb, gimple_assign_rhs1 (stmt), m_nontrapping, false);
1471 /* Called by walk_dominator_tree, when basic block BB is exited. */
1472 void
1473 nontrapping_dom_walker::after_dom_children (basic_block bb)
1475 /* This BB isn't on the path to dominator root anymore. */
1476 bb->aux = (void*)2;
1479 /* This is the entry point of gathering non trapping memory accesses.
1480 It will do a dominator walk over the whole function, and it will
1481 make use of the bb->aux pointers. It returns a set of trees
1482 (the MEM_REFs itself) which can't trap. */
1483 static struct pointer_set_t *
1484 get_non_trapping (void)
1486 nt_call_phase = 0;
1487 pointer_set_t *nontrap = pointer_set_create ();
1488 seen_ssa_names.create (128);
1489 /* We're going to do a dominator walk, so ensure that we have
1490 dominance information. */
1491 calculate_dominance_info (CDI_DOMINATORS);
1493 nontrapping_dom_walker (CDI_DOMINATORS, nontrap)
1494 .walk (cfun->cfg->x_entry_block_ptr);
1496 seen_ssa_names.dispose ();
1498 clear_aux_for_blocks ();
1499 return nontrap;
1502 /* Do the main work of conditional store replacement. We already know
1503 that the recognized pattern looks like so:
1505 split:
1506 if (cond) goto MIDDLE_BB; else goto JOIN_BB (edge E1)
1507 MIDDLE_BB:
1508 something
1509 fallthrough (edge E0)
1510 JOIN_BB:
1511 some more
1513 We check that MIDDLE_BB contains only one store, that that store
1514 doesn't trap (not via NOTRAP, but via checking if an access to the same
1515 memory location dominates us) and that the store has a "simple" RHS. */
1517 static bool
1518 cond_store_replacement (basic_block middle_bb, basic_block join_bb,
1519 edge e0, edge e1, struct pointer_set_t *nontrap)
1521 gimple assign = last_and_only_stmt (middle_bb);
1522 tree lhs, rhs, name, name2;
1523 gimple newphi, new_stmt;
1524 gimple_stmt_iterator gsi;
1525 source_location locus;
1527 /* Check if middle_bb contains of only one store. */
1528 if (!assign
1529 || !gimple_assign_single_p (assign)
1530 || gimple_has_volatile_ops (assign))
1531 return false;
1533 locus = gimple_location (assign);
1534 lhs = gimple_assign_lhs (assign);
1535 rhs = gimple_assign_rhs1 (assign);
1536 if (TREE_CODE (lhs) != MEM_REF
1537 || TREE_CODE (TREE_OPERAND (lhs, 0)) != SSA_NAME
1538 || !is_gimple_reg_type (TREE_TYPE (lhs)))
1539 return false;
1541 /* Prove that we can move the store down. We could also check
1542 TREE_THIS_NOTRAP here, but in that case we also could move stores,
1543 whose value is not available readily, which we want to avoid. */
1544 if (!pointer_set_contains (nontrap, lhs))
1545 return false;
1547 /* Now we've checked the constraints, so do the transformation:
1548 1) Remove the single store. */
1549 gsi = gsi_for_stmt (assign);
1550 unlink_stmt_vdef (assign);
1551 gsi_remove (&gsi, true);
1552 release_defs (assign);
1554 /* 2) Insert a load from the memory of the store to the temporary
1555 on the edge which did not contain the store. */
1556 lhs = unshare_expr (lhs);
1557 name = make_temp_ssa_name (TREE_TYPE (lhs), NULL, "cstore");
1558 new_stmt = gimple_build_assign (name, lhs);
1559 gimple_set_location (new_stmt, locus);
1560 gsi_insert_on_edge (e1, new_stmt);
1562 /* 3) Create a PHI node at the join block, with one argument
1563 holding the old RHS, and the other holding the temporary
1564 where we stored the old memory contents. */
1565 name2 = make_temp_ssa_name (TREE_TYPE (lhs), NULL, "cstore");
1566 newphi = create_phi_node (name2, join_bb);
1567 add_phi_arg (newphi, rhs, e0, locus);
1568 add_phi_arg (newphi, name, e1, locus);
1570 lhs = unshare_expr (lhs);
1571 new_stmt = gimple_build_assign (lhs, PHI_RESULT (newphi));
1573 /* 4) Insert that PHI node. */
1574 gsi = gsi_after_labels (join_bb);
1575 if (gsi_end_p (gsi))
1577 gsi = gsi_last_bb (join_bb);
1578 gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT);
1580 else
1581 gsi_insert_before (&gsi, new_stmt, GSI_NEW_STMT);
1583 return true;
1586 /* Do the main work of conditional store replacement. */
1588 static bool
1589 cond_if_else_store_replacement_1 (basic_block then_bb, basic_block else_bb,
1590 basic_block join_bb, gimple then_assign,
1591 gimple else_assign)
1593 tree lhs_base, lhs, then_rhs, else_rhs, name;
1594 source_location then_locus, else_locus;
1595 gimple_stmt_iterator gsi;
1596 gimple newphi, new_stmt;
1598 if (then_assign == NULL
1599 || !gimple_assign_single_p (then_assign)
1600 || gimple_clobber_p (then_assign)
1601 || gimple_has_volatile_ops (then_assign)
1602 || else_assign == NULL
1603 || !gimple_assign_single_p (else_assign)
1604 || gimple_clobber_p (else_assign)
1605 || gimple_has_volatile_ops (else_assign))
1606 return false;
1608 lhs = gimple_assign_lhs (then_assign);
1609 if (!is_gimple_reg_type (TREE_TYPE (lhs))
1610 || !operand_equal_p (lhs, gimple_assign_lhs (else_assign), 0))
1611 return false;
1613 lhs_base = get_base_address (lhs);
1614 if (lhs_base == NULL_TREE
1615 || (!DECL_P (lhs_base) && TREE_CODE (lhs_base) != MEM_REF))
1616 return false;
1618 then_rhs = gimple_assign_rhs1 (then_assign);
1619 else_rhs = gimple_assign_rhs1 (else_assign);
1620 then_locus = gimple_location (then_assign);
1621 else_locus = gimple_location (else_assign);
1623 /* Now we've checked the constraints, so do the transformation:
1624 1) Remove the stores. */
1625 gsi = gsi_for_stmt (then_assign);
1626 unlink_stmt_vdef (then_assign);
1627 gsi_remove (&gsi, true);
1628 release_defs (then_assign);
1630 gsi = gsi_for_stmt (else_assign);
1631 unlink_stmt_vdef (else_assign);
1632 gsi_remove (&gsi, true);
1633 release_defs (else_assign);
1635 /* 2) Create a PHI node at the join block, with one argument
1636 holding the old RHS, and the other holding the temporary
1637 where we stored the old memory contents. */
1638 name = make_temp_ssa_name (TREE_TYPE (lhs), NULL, "cstore");
1639 newphi = create_phi_node (name, join_bb);
1640 add_phi_arg (newphi, then_rhs, EDGE_SUCC (then_bb, 0), then_locus);
1641 add_phi_arg (newphi, else_rhs, EDGE_SUCC (else_bb, 0), else_locus);
1643 new_stmt = gimple_build_assign (lhs, PHI_RESULT (newphi));
1645 /* 3) Insert that PHI node. */
1646 gsi = gsi_after_labels (join_bb);
1647 if (gsi_end_p (gsi))
1649 gsi = gsi_last_bb (join_bb);
1650 gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT);
1652 else
1653 gsi_insert_before (&gsi, new_stmt, GSI_NEW_STMT);
1655 return true;
1658 /* Conditional store replacement. We already know
1659 that the recognized pattern looks like so:
1661 split:
1662 if (cond) goto THEN_BB; else goto ELSE_BB (edge E1)
1663 THEN_BB:
1665 X = Y;
1667 goto JOIN_BB;
1668 ELSE_BB:
1670 X = Z;
1672 fallthrough (edge E0)
1673 JOIN_BB:
1674 some more
1676 We check that it is safe to sink the store to JOIN_BB by verifying that
1677 there are no read-after-write or write-after-write dependencies in
1678 THEN_BB and ELSE_BB. */
1680 static bool
1681 cond_if_else_store_replacement (basic_block then_bb, basic_block else_bb,
1682 basic_block join_bb)
1684 gimple then_assign = last_and_only_stmt (then_bb);
1685 gimple else_assign = last_and_only_stmt (else_bb);
1686 vec<data_reference_p> then_datarefs, else_datarefs;
1687 vec<ddr_p> then_ddrs, else_ddrs;
1688 gimple then_store, else_store;
1689 bool found, ok = false, res;
1690 struct data_dependence_relation *ddr;
1691 data_reference_p then_dr, else_dr;
1692 int i, j;
1693 tree then_lhs, else_lhs;
1694 basic_block blocks[3];
1696 if (MAX_STORES_TO_SINK == 0)
1697 return false;
1699 /* Handle the case with single statement in THEN_BB and ELSE_BB. */
1700 if (then_assign && else_assign)
1701 return cond_if_else_store_replacement_1 (then_bb, else_bb, join_bb,
1702 then_assign, else_assign);
1704 /* Find data references. */
1705 then_datarefs.create (1);
1706 else_datarefs.create (1);
1707 if ((find_data_references_in_bb (NULL, then_bb, &then_datarefs)
1708 == chrec_dont_know)
1709 || !then_datarefs.length ()
1710 || (find_data_references_in_bb (NULL, else_bb, &else_datarefs)
1711 == chrec_dont_know)
1712 || !else_datarefs.length ())
1714 free_data_refs (then_datarefs);
1715 free_data_refs (else_datarefs);
1716 return false;
1719 /* Find pairs of stores with equal LHS. */
1720 auto_vec<gimple, 1> then_stores, else_stores;
1721 FOR_EACH_VEC_ELT (then_datarefs, i, then_dr)
1723 if (DR_IS_READ (then_dr))
1724 continue;
1726 then_store = DR_STMT (then_dr);
1727 then_lhs = gimple_get_lhs (then_store);
1728 if (then_lhs == NULL_TREE)
1729 continue;
1730 found = false;
1732 FOR_EACH_VEC_ELT (else_datarefs, j, else_dr)
1734 if (DR_IS_READ (else_dr))
1735 continue;
1737 else_store = DR_STMT (else_dr);
1738 else_lhs = gimple_get_lhs (else_store);
1739 if (else_lhs == NULL_TREE)
1740 continue;
1742 if (operand_equal_p (then_lhs, else_lhs, 0))
1744 found = true;
1745 break;
1749 if (!found)
1750 continue;
1752 then_stores.safe_push (then_store);
1753 else_stores.safe_push (else_store);
1756 /* No pairs of stores found. */
1757 if (!then_stores.length ()
1758 || then_stores.length () > (unsigned) MAX_STORES_TO_SINK)
1760 free_data_refs (then_datarefs);
1761 free_data_refs (else_datarefs);
1762 return false;
1765 /* Compute and check data dependencies in both basic blocks. */
1766 then_ddrs.create (1);
1767 else_ddrs.create (1);
1768 if (!compute_all_dependences (then_datarefs, &then_ddrs,
1769 vNULL, false)
1770 || !compute_all_dependences (else_datarefs, &else_ddrs,
1771 vNULL, false))
1773 free_dependence_relations (then_ddrs);
1774 free_dependence_relations (else_ddrs);
1775 free_data_refs (then_datarefs);
1776 free_data_refs (else_datarefs);
1777 return false;
1779 blocks[0] = then_bb;
1780 blocks[1] = else_bb;
1781 blocks[2] = join_bb;
1782 renumber_gimple_stmt_uids_in_blocks (blocks, 3);
1784 /* Check that there are no read-after-write or write-after-write dependencies
1785 in THEN_BB. */
1786 FOR_EACH_VEC_ELT (then_ddrs, i, ddr)
1788 struct data_reference *dra = DDR_A (ddr);
1789 struct data_reference *drb = DDR_B (ddr);
1791 if (DDR_ARE_DEPENDENT (ddr) != chrec_known
1792 && ((DR_IS_READ (dra) && DR_IS_WRITE (drb)
1793 && gimple_uid (DR_STMT (dra)) > gimple_uid (DR_STMT (drb)))
1794 || (DR_IS_READ (drb) && DR_IS_WRITE (dra)
1795 && gimple_uid (DR_STMT (drb)) > gimple_uid (DR_STMT (dra)))
1796 || (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))))
1798 free_dependence_relations (then_ddrs);
1799 free_dependence_relations (else_ddrs);
1800 free_data_refs (then_datarefs);
1801 free_data_refs (else_datarefs);
1802 return false;
1806 /* Check that there are no read-after-write or write-after-write dependencies
1807 in ELSE_BB. */
1808 FOR_EACH_VEC_ELT (else_ddrs, i, ddr)
1810 struct data_reference *dra = DDR_A (ddr);
1811 struct data_reference *drb = DDR_B (ddr);
1813 if (DDR_ARE_DEPENDENT (ddr) != chrec_known
1814 && ((DR_IS_READ (dra) && DR_IS_WRITE (drb)
1815 && gimple_uid (DR_STMT (dra)) > gimple_uid (DR_STMT (drb)))
1816 || (DR_IS_READ (drb) && DR_IS_WRITE (dra)
1817 && gimple_uid (DR_STMT (drb)) > gimple_uid (DR_STMT (dra)))
1818 || (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))))
1820 free_dependence_relations (then_ddrs);
1821 free_dependence_relations (else_ddrs);
1822 free_data_refs (then_datarefs);
1823 free_data_refs (else_datarefs);
1824 return false;
1828 /* Sink stores with same LHS. */
1829 FOR_EACH_VEC_ELT (then_stores, i, then_store)
1831 else_store = else_stores[i];
1832 res = cond_if_else_store_replacement_1 (then_bb, else_bb, join_bb,
1833 then_store, else_store);
1834 ok = ok || res;
1837 free_dependence_relations (then_ddrs);
1838 free_dependence_relations (else_ddrs);
1839 free_data_refs (then_datarefs);
1840 free_data_refs (else_datarefs);
1842 return ok;
1845 /* Return TRUE if STMT has a VUSE whose corresponding VDEF is in BB. */
1847 static bool
1848 local_mem_dependence (gimple stmt, basic_block bb)
1850 tree vuse = gimple_vuse (stmt);
1851 gimple def;
1853 if (!vuse)
1854 return false;
1856 def = SSA_NAME_DEF_STMT (vuse);
1857 return (def && gimple_bb (def) == bb);
1860 /* Given a "diamond" control-flow pattern where BB0 tests a condition,
1861 BB1 and BB2 are "then" and "else" blocks dependent on this test,
1862 and BB3 rejoins control flow following BB1 and BB2, look for
1863 opportunities to hoist loads as follows. If BB3 contains a PHI of
1864 two loads, one each occurring in BB1 and BB2, and the loads are
1865 provably of adjacent fields in the same structure, then move both
1866 loads into BB0. Of course this can only be done if there are no
1867 dependencies preventing such motion.
1869 One of the hoisted loads will always be speculative, so the
1870 transformation is currently conservative:
1872 - The fields must be strictly adjacent.
1873 - The two fields must occupy a single memory block that is
1874 guaranteed to not cross a page boundary.
1876 The last is difficult to prove, as such memory blocks should be
1877 aligned on the minimum of the stack alignment boundary and the
1878 alignment guaranteed by heap allocation interfaces. Thus we rely
1879 on a parameter for the alignment value.
1881 Provided a good value is used for the last case, the first
1882 restriction could possibly be relaxed. */
1884 static void
1885 hoist_adjacent_loads (basic_block bb0, basic_block bb1,
1886 basic_block bb2, basic_block bb3)
1888 int param_align = PARAM_VALUE (PARAM_L1_CACHE_LINE_SIZE);
1889 unsigned param_align_bits = (unsigned) (param_align * BITS_PER_UNIT);
1890 gimple_stmt_iterator gsi;
1892 /* Walk the phis in bb3 looking for an opportunity. We are looking
1893 for phis of two SSA names, one each of which is defined in bb1 and
1894 bb2. */
1895 for (gsi = gsi_start_phis (bb3); !gsi_end_p (gsi); gsi_next (&gsi))
1897 gimple phi_stmt = gsi_stmt (gsi);
1898 gimple def1, def2, defswap;
1899 tree arg1, arg2, ref1, ref2, field1, field2, fieldswap;
1900 tree tree_offset1, tree_offset2, tree_size2, next;
1901 int offset1, offset2, size2;
1902 unsigned align1;
1903 gimple_stmt_iterator gsi2;
1904 basic_block bb_for_def1, bb_for_def2;
1906 if (gimple_phi_num_args (phi_stmt) != 2
1907 || virtual_operand_p (gimple_phi_result (phi_stmt)))
1908 continue;
1910 arg1 = gimple_phi_arg_def (phi_stmt, 0);
1911 arg2 = gimple_phi_arg_def (phi_stmt, 1);
1913 if (TREE_CODE (arg1) != SSA_NAME
1914 || TREE_CODE (arg2) != SSA_NAME
1915 || SSA_NAME_IS_DEFAULT_DEF (arg1)
1916 || SSA_NAME_IS_DEFAULT_DEF (arg2))
1917 continue;
1919 def1 = SSA_NAME_DEF_STMT (arg1);
1920 def2 = SSA_NAME_DEF_STMT (arg2);
1922 if ((gimple_bb (def1) != bb1 || gimple_bb (def2) != bb2)
1923 && (gimple_bb (def2) != bb1 || gimple_bb (def1) != bb2))
1924 continue;
1926 /* Check the mode of the arguments to be sure a conditional move
1927 can be generated for it. */
1928 if (optab_handler (movcc_optab, TYPE_MODE (TREE_TYPE (arg1)))
1929 == CODE_FOR_nothing)
1930 continue;
1932 /* Both statements must be assignments whose RHS is a COMPONENT_REF. */
1933 if (!gimple_assign_single_p (def1)
1934 || !gimple_assign_single_p (def2)
1935 || gimple_has_volatile_ops (def1)
1936 || gimple_has_volatile_ops (def2))
1937 continue;
1939 ref1 = gimple_assign_rhs1 (def1);
1940 ref2 = gimple_assign_rhs1 (def2);
1942 if (TREE_CODE (ref1) != COMPONENT_REF
1943 || TREE_CODE (ref2) != COMPONENT_REF)
1944 continue;
1946 /* The zeroth operand of the two component references must be
1947 identical. It is not sufficient to compare get_base_address of
1948 the two references, because this could allow for different
1949 elements of the same array in the two trees. It is not safe to
1950 assume that the existence of one array element implies the
1951 existence of a different one. */
1952 if (!operand_equal_p (TREE_OPERAND (ref1, 0), TREE_OPERAND (ref2, 0), 0))
1953 continue;
1955 field1 = TREE_OPERAND (ref1, 1);
1956 field2 = TREE_OPERAND (ref2, 1);
1958 /* Check for field adjacency, and ensure field1 comes first. */
1959 for (next = DECL_CHAIN (field1);
1960 next && TREE_CODE (next) != FIELD_DECL;
1961 next = DECL_CHAIN (next))
1964 if (next != field2)
1966 for (next = DECL_CHAIN (field2);
1967 next && TREE_CODE (next) != FIELD_DECL;
1968 next = DECL_CHAIN (next))
1971 if (next != field1)
1972 continue;
1974 fieldswap = field1;
1975 field1 = field2;
1976 field2 = fieldswap;
1977 defswap = def1;
1978 def1 = def2;
1979 def2 = defswap;
1982 bb_for_def1 = gimple_bb (def1);
1983 bb_for_def2 = gimple_bb (def2);
1985 /* Check for proper alignment of the first field. */
1986 tree_offset1 = bit_position (field1);
1987 tree_offset2 = bit_position (field2);
1988 tree_size2 = DECL_SIZE (field2);
1990 if (!tree_fits_uhwi_p (tree_offset1)
1991 || !tree_fits_uhwi_p (tree_offset2)
1992 || !tree_fits_uhwi_p (tree_size2))
1993 continue;
1995 offset1 = tree_to_uhwi (tree_offset1);
1996 offset2 = tree_to_uhwi (tree_offset2);
1997 size2 = tree_to_uhwi (tree_size2);
1998 align1 = DECL_ALIGN (field1) % param_align_bits;
2000 if (offset1 % BITS_PER_UNIT != 0)
2001 continue;
2003 /* For profitability, the two field references should fit within
2004 a single cache line. */
2005 if (align1 + offset2 - offset1 + size2 > param_align_bits)
2006 continue;
2008 /* The two expressions cannot be dependent upon vdefs defined
2009 in bb1/bb2. */
2010 if (local_mem_dependence (def1, bb_for_def1)
2011 || local_mem_dependence (def2, bb_for_def2))
2012 continue;
2014 /* The conditions are satisfied; hoist the loads from bb1 and bb2 into
2015 bb0. We hoist the first one first so that a cache miss is handled
2016 efficiently regardless of hardware cache-fill policy. */
2017 gsi2 = gsi_for_stmt (def1);
2018 gsi_move_to_bb_end (&gsi2, bb0);
2019 gsi2 = gsi_for_stmt (def2);
2020 gsi_move_to_bb_end (&gsi2, bb0);
2022 if (dump_file && (dump_flags & TDF_DETAILS))
2024 fprintf (dump_file,
2025 "\nHoisting adjacent loads from %d and %d into %d: \n",
2026 bb_for_def1->index, bb_for_def2->index, bb0->index);
2027 print_gimple_stmt (dump_file, def1, 0, TDF_VOPS|TDF_MEMSYMS);
2028 print_gimple_stmt (dump_file, def2, 0, TDF_VOPS|TDF_MEMSYMS);
2033 /* Determine whether we should attempt to hoist adjacent loads out of
2034 diamond patterns in pass_phiopt. Always hoist loads if
2035 -fhoist-adjacent-loads is specified and the target machine has
2036 both a conditional move instruction and a defined cache line size. */
2038 static bool
2039 gate_hoist_loads (void)
2041 return (flag_hoist_adjacent_loads == 1
2042 && PARAM_VALUE (PARAM_L1_CACHE_LINE_SIZE)
2043 && HAVE_conditional_move);
2046 /* This pass tries to replaces an if-then-else block with an
2047 assignment. We have four kinds of transformations. Some of these
2048 transformations are also performed by the ifcvt RTL optimizer.
2050 Conditional Replacement
2051 -----------------------
2053 This transformation, implemented in conditional_replacement,
2054 replaces
2056 bb0:
2057 if (cond) goto bb2; else goto bb1;
2058 bb1:
2059 bb2:
2060 x = PHI <0 (bb1), 1 (bb0), ...>;
2062 with
2064 bb0:
2065 x' = cond;
2066 goto bb2;
2067 bb2:
2068 x = PHI <x' (bb0), ...>;
2070 We remove bb1 as it becomes unreachable. This occurs often due to
2071 gimplification of conditionals.
2073 Value Replacement
2074 -----------------
2076 This transformation, implemented in value_replacement, replaces
2078 bb0:
2079 if (a != b) goto bb2; else goto bb1;
2080 bb1:
2081 bb2:
2082 x = PHI <a (bb1), b (bb0), ...>;
2084 with
2086 bb0:
2087 bb2:
2088 x = PHI <b (bb0), ...>;
2090 This opportunity can sometimes occur as a result of other
2091 optimizations.
2094 Another case caught by value replacement looks like this:
2096 bb0:
2097 t1 = a == CONST;
2098 t2 = b > c;
2099 t3 = t1 & t2;
2100 if (t3 != 0) goto bb1; else goto bb2;
2101 bb1:
2102 bb2:
2103 x = PHI (CONST, a)
2105 Gets replaced with:
2106 bb0:
2107 bb2:
2108 t1 = a == CONST;
2109 t2 = b > c;
2110 t3 = t1 & t2;
2111 x = a;
2113 ABS Replacement
2114 ---------------
2116 This transformation, implemented in abs_replacement, replaces
2118 bb0:
2119 if (a >= 0) goto bb2; else goto bb1;
2120 bb1:
2121 x = -a;
2122 bb2:
2123 x = PHI <x (bb1), a (bb0), ...>;
2125 with
2127 bb0:
2128 x' = ABS_EXPR< a >;
2129 bb2:
2130 x = PHI <x' (bb0), ...>;
2132 MIN/MAX Replacement
2133 -------------------
2135 This transformation, minmax_replacement replaces
2137 bb0:
2138 if (a <= b) goto bb2; else goto bb1;
2139 bb1:
2140 bb2:
2141 x = PHI <b (bb1), a (bb0), ...>;
2143 with
2145 bb0:
2146 x' = MIN_EXPR (a, b)
2147 bb2:
2148 x = PHI <x' (bb0), ...>;
2150 A similar transformation is done for MAX_EXPR.
2153 This pass also performs a fifth transformation of a slightly different
2154 flavor.
2156 Adjacent Load Hoisting
2157 ----------------------
2159 This transformation replaces
2161 bb0:
2162 if (...) goto bb2; else goto bb1;
2163 bb1:
2164 x1 = (<expr>).field1;
2165 goto bb3;
2166 bb2:
2167 x2 = (<expr>).field2;
2168 bb3:
2169 # x = PHI <x1, x2>;
2171 with
2173 bb0:
2174 x1 = (<expr>).field1;
2175 x2 = (<expr>).field2;
2176 if (...) goto bb2; else goto bb1;
2177 bb1:
2178 goto bb3;
2179 bb2:
2180 bb3:
2181 # x = PHI <x1, x2>;
2183 The purpose of this transformation is to enable generation of conditional
2184 move instructions such as Intel CMOVE or PowerPC ISEL. Because one of
2185 the loads is speculative, the transformation is restricted to very
2186 specific cases to avoid introducing a page fault. We are looking for
2187 the common idiom:
2189 if (...)
2190 x = y->left;
2191 else
2192 x = y->right;
2194 where left and right are typically adjacent pointers in a tree structure. */
2196 namespace {
2198 const pass_data pass_data_phiopt =
2200 GIMPLE_PASS, /* type */
2201 "phiopt", /* name */
2202 OPTGROUP_NONE, /* optinfo_flags */
2203 true, /* has_execute */
2204 TV_TREE_PHIOPT, /* tv_id */
2205 ( PROP_cfg | PROP_ssa ), /* properties_required */
2206 0, /* properties_provided */
2207 0, /* properties_destroyed */
2208 0, /* todo_flags_start */
2209 0, /* todo_flags_finish */
2212 class pass_phiopt : public gimple_opt_pass
2214 public:
2215 pass_phiopt (gcc::context *ctxt)
2216 : gimple_opt_pass (pass_data_phiopt, ctxt)
2219 /* opt_pass methods: */
2220 opt_pass * clone () { return new pass_phiopt (m_ctxt); }
2221 virtual unsigned int execute (function *)
2223 return tree_ssa_phiopt_worker (false, gate_hoist_loads ());
2226 }; // class pass_phiopt
2228 } // anon namespace
2230 gimple_opt_pass *
2231 make_pass_phiopt (gcc::context *ctxt)
2233 return new pass_phiopt (ctxt);
2236 namespace {
2238 const pass_data pass_data_cselim =
2240 GIMPLE_PASS, /* type */
2241 "cselim", /* name */
2242 OPTGROUP_NONE, /* optinfo_flags */
2243 true, /* has_execute */
2244 TV_TREE_PHIOPT, /* tv_id */
2245 ( PROP_cfg | PROP_ssa ), /* properties_required */
2246 0, /* properties_provided */
2247 0, /* properties_destroyed */
2248 0, /* todo_flags_start */
2249 0, /* todo_flags_finish */
2252 class pass_cselim : public gimple_opt_pass
2254 public:
2255 pass_cselim (gcc::context *ctxt)
2256 : gimple_opt_pass (pass_data_cselim, ctxt)
2259 /* opt_pass methods: */
2260 virtual bool gate (function *) { return flag_tree_cselim; }
2261 virtual unsigned int execute (function *) { return tree_ssa_cs_elim (); }
2263 }; // class pass_cselim
2265 } // anon namespace
2267 gimple_opt_pass *
2268 make_pass_cselim (gcc::context *ctxt)
2270 return new pass_cselim (ctxt);