config.gcc (powerpc*-*-*): Add support for a new configure option --with-advance...
[official-gcc.git] / gcc / tree-ssa-phiopt.c
blobb8c77ab0816106b643dc101c9b68e53aee8d48c7
1 /* Optimization of PHI nodes by converting them into straightline code.
2 Copyright (C) 2004-2015 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "hash-table.h"
24 #include "tm.h"
25 #include "hash-set.h"
26 #include "machmode.h"
27 #include "vec.h"
28 #include "double-int.h"
29 #include "input.h"
30 #include "alias.h"
31 #include "symtab.h"
32 #include "wide-int.h"
33 #include "inchash.h"
34 #include "tree.h"
35 #include "fold-const.h"
36 #include "stor-layout.h"
37 #include "flags.h"
38 #include "tm_p.h"
39 #include "predict.h"
40 #include "hard-reg-set.h"
41 #include "function.h"
42 #include "dominance.h"
43 #include "cfg.h"
44 #include "cfganal.h"
45 #include "basic-block.h"
46 #include "tree-ssa-alias.h"
47 #include "internal-fn.h"
48 #include "gimple-expr.h"
49 #include "is-a.h"
50 #include "gimple.h"
51 #include "gimplify.h"
52 #include "gimple-iterator.h"
53 #include "gimplify-me.h"
54 #include "gimple-ssa.h"
55 #include "tree-cfg.h"
56 #include "tree-phinodes.h"
57 #include "ssa-iterators.h"
58 #include "stringpool.h"
59 #include "tree-ssanames.h"
60 #include "hashtab.h"
61 #include "rtl.h"
62 #include "statistics.h"
63 #include "real.h"
64 #include "fixed-value.h"
65 #include "insn-config.h"
66 #include "expmed.h"
67 #include "dojump.h"
68 #include "explow.h"
69 #include "calls.h"
70 #include "emit-rtl.h"
71 #include "varasm.h"
72 #include "stmt.h"
73 #include "expr.h"
74 #include "tree-dfa.h"
75 #include "tree-pass.h"
76 #include "langhooks.h"
77 #include "domwalk.h"
78 #include "cfgloop.h"
79 #include "tree-data-ref.h"
80 #include "gimple-pretty-print.h"
81 #include "insn-codes.h"
82 #include "optabs.h"
83 #include "tree-scalar-evolution.h"
84 #include "tree-inline.h"
86 static unsigned int tree_ssa_phiopt_worker (bool, bool);
87 static bool conditional_replacement (basic_block, basic_block,
88 edge, edge, gphi *, tree, tree);
89 static int value_replacement (basic_block, basic_block,
90 edge, edge, gimple, tree, tree);
91 static bool minmax_replacement (basic_block, basic_block,
92 edge, edge, gimple, tree, tree);
93 static bool abs_replacement (basic_block, basic_block,
94 edge, edge, gimple, tree, tree);
95 static bool cond_store_replacement (basic_block, basic_block, edge, edge,
96 hash_set<tree> *);
97 static bool cond_if_else_store_replacement (basic_block, basic_block, basic_block);
98 static hash_set<tree> * get_non_trapping ();
99 static void replace_phi_edge_with_variable (basic_block, edge, gimple, tree);
100 static void hoist_adjacent_loads (basic_block, basic_block,
101 basic_block, basic_block);
102 static bool gate_hoist_loads (void);
104 /* This pass tries to transform conditional stores into unconditional
105 ones, enabling further simplifications with the simpler then and else
106 blocks. In particular it replaces this:
108 bb0:
109 if (cond) goto bb2; else goto bb1;
110 bb1:
111 *p = RHS;
112 bb2:
114 with
116 bb0:
117 if (cond) goto bb1; else goto bb2;
118 bb1:
119 condtmp' = *p;
120 bb2:
121 condtmp = PHI <RHS, condtmp'>
122 *p = condtmp;
124 This transformation can only be done under several constraints,
125 documented below. It also replaces:
127 bb0:
128 if (cond) goto bb2; else goto bb1;
129 bb1:
130 *p = RHS1;
131 goto bb3;
132 bb2:
133 *p = RHS2;
134 bb3:
136 with
138 bb0:
139 if (cond) goto bb3; else goto bb1;
140 bb1:
141 bb3:
142 condtmp = PHI <RHS1, RHS2>
143 *p = condtmp; */
145 static unsigned int
146 tree_ssa_cs_elim (void)
148 unsigned todo;
149 /* ??? We are not interested in loop related info, but the following
150 will create it, ICEing as we didn't init loops with pre-headers.
151 An interfacing issue of find_data_references_in_bb. */
152 loop_optimizer_init (LOOPS_NORMAL);
153 scev_initialize ();
154 todo = tree_ssa_phiopt_worker (true, false);
155 scev_finalize ();
156 loop_optimizer_finalize ();
157 return todo;
160 /* Return the singleton PHI in the SEQ of PHIs for edges E0 and E1. */
162 static gphi *
163 single_non_singleton_phi_for_edges (gimple_seq seq, edge e0, edge e1)
165 gimple_stmt_iterator i;
166 gphi *phi = NULL;
167 if (gimple_seq_singleton_p (seq))
168 return as_a <gphi *> (gsi_stmt (gsi_start (seq)));
169 for (i = gsi_start (seq); !gsi_end_p (i); gsi_next (&i))
171 gphi *p = as_a <gphi *> (gsi_stmt (i));
172 /* If the PHI arguments are equal then we can skip this PHI. */
173 if (operand_equal_for_phi_arg_p (gimple_phi_arg_def (p, e0->dest_idx),
174 gimple_phi_arg_def (p, e1->dest_idx)))
175 continue;
177 /* If we already have a PHI that has the two edge arguments are
178 different, then return it is not a singleton for these PHIs. */
179 if (phi)
180 return NULL;
182 phi = p;
184 return phi;
187 /* The core routine of conditional store replacement and normal
188 phi optimizations. Both share much of the infrastructure in how
189 to match applicable basic block patterns. DO_STORE_ELIM is true
190 when we want to do conditional store replacement, false otherwise.
191 DO_HOIST_LOADS is true when we want to hoist adjacent loads out
192 of diamond control flow patterns, false otherwise. */
193 static unsigned int
194 tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads)
196 basic_block bb;
197 basic_block *bb_order;
198 unsigned n, i;
199 bool cfgchanged = false;
200 hash_set<tree> *nontrap = 0;
202 if (do_store_elim)
203 /* Calculate the set of non-trapping memory accesses. */
204 nontrap = get_non_trapping ();
206 /* Search every basic block for COND_EXPR we may be able to optimize.
208 We walk the blocks in order that guarantees that a block with
209 a single predecessor is processed before the predecessor.
210 This ensures that we collapse inner ifs before visiting the
211 outer ones, and also that we do not try to visit a removed
212 block. */
213 bb_order = single_pred_before_succ_order ();
214 n = n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS;
216 for (i = 0; i < n; i++)
218 gimple cond_stmt;
219 gphi *phi;
220 basic_block bb1, bb2;
221 edge e1, e2;
222 tree arg0, arg1;
224 bb = bb_order[i];
226 cond_stmt = last_stmt (bb);
227 /* Check to see if the last statement is a GIMPLE_COND. */
228 if (!cond_stmt
229 || gimple_code (cond_stmt) != GIMPLE_COND)
230 continue;
232 e1 = EDGE_SUCC (bb, 0);
233 bb1 = e1->dest;
234 e2 = EDGE_SUCC (bb, 1);
235 bb2 = e2->dest;
237 /* We cannot do the optimization on abnormal edges. */
238 if ((e1->flags & EDGE_ABNORMAL) != 0
239 || (e2->flags & EDGE_ABNORMAL) != 0)
240 continue;
242 /* If either bb1's succ or bb2 or bb2's succ is non NULL. */
243 if (EDGE_COUNT (bb1->succs) == 0
244 || bb2 == NULL
245 || EDGE_COUNT (bb2->succs) == 0)
246 continue;
248 /* Find the bb which is the fall through to the other. */
249 if (EDGE_SUCC (bb1, 0)->dest == bb2)
251 else if (EDGE_SUCC (bb2, 0)->dest == bb1)
253 basic_block bb_tmp = bb1;
254 edge e_tmp = e1;
255 bb1 = bb2;
256 bb2 = bb_tmp;
257 e1 = e2;
258 e2 = e_tmp;
260 else if (do_store_elim
261 && EDGE_SUCC (bb1, 0)->dest == EDGE_SUCC (bb2, 0)->dest)
263 basic_block bb3 = EDGE_SUCC (bb1, 0)->dest;
265 if (!single_succ_p (bb1)
266 || (EDGE_SUCC (bb1, 0)->flags & EDGE_FALLTHRU) == 0
267 || !single_succ_p (bb2)
268 || (EDGE_SUCC (bb2, 0)->flags & EDGE_FALLTHRU) == 0
269 || EDGE_COUNT (bb3->preds) != 2)
270 continue;
271 if (cond_if_else_store_replacement (bb1, bb2, bb3))
272 cfgchanged = true;
273 continue;
275 else if (do_hoist_loads
276 && EDGE_SUCC (bb1, 0)->dest == EDGE_SUCC (bb2, 0)->dest)
278 basic_block bb3 = EDGE_SUCC (bb1, 0)->dest;
280 if (!FLOAT_TYPE_P (TREE_TYPE (gimple_cond_lhs (cond_stmt)))
281 && single_succ_p (bb1)
282 && single_succ_p (bb2)
283 && single_pred_p (bb1)
284 && single_pred_p (bb2)
285 && EDGE_COUNT (bb->succs) == 2
286 && EDGE_COUNT (bb3->preds) == 2
287 /* If one edge or the other is dominant, a conditional move
288 is likely to perform worse than the well-predicted branch. */
289 && !predictable_edge_p (EDGE_SUCC (bb, 0))
290 && !predictable_edge_p (EDGE_SUCC (bb, 1)))
291 hoist_adjacent_loads (bb, bb1, bb2, bb3);
292 continue;
294 else
295 continue;
297 e1 = EDGE_SUCC (bb1, 0);
299 /* Make sure that bb1 is just a fall through. */
300 if (!single_succ_p (bb1)
301 || (e1->flags & EDGE_FALLTHRU) == 0)
302 continue;
304 /* Also make sure that bb1 only have one predecessor and that it
305 is bb. */
306 if (!single_pred_p (bb1)
307 || single_pred (bb1) != bb)
308 continue;
310 if (do_store_elim)
312 /* bb1 is the middle block, bb2 the join block, bb the split block,
313 e1 the fallthrough edge from bb1 to bb2. We can't do the
314 optimization if the join block has more than two predecessors. */
315 if (EDGE_COUNT (bb2->preds) > 2)
316 continue;
317 if (cond_store_replacement (bb1, bb2, e1, e2, nontrap))
318 cfgchanged = true;
320 else
322 gimple_seq phis = phi_nodes (bb2);
323 gimple_stmt_iterator gsi;
324 bool candorest = true;
326 /* Value replacement can work with more than one PHI
327 so try that first. */
328 for (gsi = gsi_start (phis); !gsi_end_p (gsi); gsi_next (&gsi))
330 phi = as_a <gphi *> (gsi_stmt (gsi));
331 arg0 = gimple_phi_arg_def (phi, e1->dest_idx);
332 arg1 = gimple_phi_arg_def (phi, e2->dest_idx);
333 if (value_replacement (bb, bb1, e1, e2, phi, arg0, arg1) == 2)
335 candorest = false;
336 cfgchanged = true;
337 break;
341 if (!candorest)
342 continue;
344 phi = single_non_singleton_phi_for_edges (phis, e1, e2);
345 if (!phi)
346 continue;
348 arg0 = gimple_phi_arg_def (phi, e1->dest_idx);
349 arg1 = gimple_phi_arg_def (phi, e2->dest_idx);
351 /* Something is wrong if we cannot find the arguments in the PHI
352 node. */
353 gcc_assert (arg0 != NULL && arg1 != NULL);
355 /* Do the replacement of conditional if it can be done. */
356 if (conditional_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
357 cfgchanged = true;
358 else if (abs_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
359 cfgchanged = true;
360 else if (minmax_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
361 cfgchanged = true;
365 free (bb_order);
367 if (do_store_elim)
368 delete nontrap;
369 /* If the CFG has changed, we should cleanup the CFG. */
370 if (cfgchanged && do_store_elim)
372 /* In cond-store replacement we have added some loads on edges
373 and new VOPS (as we moved the store, and created a load). */
374 gsi_commit_edge_inserts ();
375 return TODO_cleanup_cfg | TODO_update_ssa_only_virtuals;
377 else if (cfgchanged)
378 return TODO_cleanup_cfg;
379 return 0;
382 /* Replace PHI node element whose edge is E in block BB with variable NEW.
383 Remove the edge from COND_BLOCK which does not lead to BB (COND_BLOCK
384 is known to have two edges, one of which must reach BB). */
386 static void
387 replace_phi_edge_with_variable (basic_block cond_block,
388 edge e, gimple phi, tree new_tree)
390 basic_block bb = gimple_bb (phi);
391 basic_block block_to_remove;
392 gimple_stmt_iterator gsi;
394 /* Change the PHI argument to new. */
395 SET_USE (PHI_ARG_DEF_PTR (phi, e->dest_idx), new_tree);
397 /* Remove the empty basic block. */
398 if (EDGE_SUCC (cond_block, 0)->dest == bb)
400 EDGE_SUCC (cond_block, 0)->flags |= EDGE_FALLTHRU;
401 EDGE_SUCC (cond_block, 0)->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
402 EDGE_SUCC (cond_block, 0)->probability = REG_BR_PROB_BASE;
403 EDGE_SUCC (cond_block, 0)->count += EDGE_SUCC (cond_block, 1)->count;
405 block_to_remove = EDGE_SUCC (cond_block, 1)->dest;
407 else
409 EDGE_SUCC (cond_block, 1)->flags |= EDGE_FALLTHRU;
410 EDGE_SUCC (cond_block, 1)->flags
411 &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
412 EDGE_SUCC (cond_block, 1)->probability = REG_BR_PROB_BASE;
413 EDGE_SUCC (cond_block, 1)->count += EDGE_SUCC (cond_block, 0)->count;
415 block_to_remove = EDGE_SUCC (cond_block, 0)->dest;
417 delete_basic_block (block_to_remove);
419 /* Eliminate the COND_EXPR at the end of COND_BLOCK. */
420 gsi = gsi_last_bb (cond_block);
421 gsi_remove (&gsi, true);
423 if (dump_file && (dump_flags & TDF_DETAILS))
424 fprintf (dump_file,
425 "COND_EXPR in block %d and PHI in block %d converted to straightline code.\n",
426 cond_block->index,
427 bb->index);
430 /* The function conditional_replacement does the main work of doing the
431 conditional replacement. Return true if the replacement is done.
432 Otherwise return false.
433 BB is the basic block where the replacement is going to be done on. ARG0
434 is argument 0 from PHI. Likewise for ARG1. */
436 static bool
437 conditional_replacement (basic_block cond_bb, basic_block middle_bb,
438 edge e0, edge e1, gphi *phi,
439 tree arg0, tree arg1)
441 tree result;
442 gimple stmt;
443 gassign *new_stmt;
444 tree cond;
445 gimple_stmt_iterator gsi;
446 edge true_edge, false_edge;
447 tree new_var, new_var2;
448 bool neg;
450 /* FIXME: Gimplification of complex type is too hard for now. */
451 /* We aren't prepared to handle vectors either (and it is a question
452 if it would be worthwhile anyway). */
453 if (!(INTEGRAL_TYPE_P (TREE_TYPE (arg0))
454 || POINTER_TYPE_P (TREE_TYPE (arg0)))
455 || !(INTEGRAL_TYPE_P (TREE_TYPE (arg1))
456 || POINTER_TYPE_P (TREE_TYPE (arg1))))
457 return false;
459 /* The PHI arguments have the constants 0 and 1, or 0 and -1, then
460 convert it to the conditional. */
461 if ((integer_zerop (arg0) && integer_onep (arg1))
462 || (integer_zerop (arg1) && integer_onep (arg0)))
463 neg = false;
464 else if ((integer_zerop (arg0) && integer_all_onesp (arg1))
465 || (integer_zerop (arg1) && integer_all_onesp (arg0)))
466 neg = true;
467 else
468 return false;
470 if (!empty_block_p (middle_bb))
471 return false;
473 /* At this point we know we have a GIMPLE_COND with two successors.
474 One successor is BB, the other successor is an empty block which
475 falls through into BB.
477 There is a single PHI node at the join point (BB) and its arguments
478 are constants (0, 1) or (0, -1).
480 So, given the condition COND, and the two PHI arguments, we can
481 rewrite this PHI into non-branching code:
483 dest = (COND) or dest = COND'
485 We use the condition as-is if the argument associated with the
486 true edge has the value one or the argument associated with the
487 false edge as the value zero. Note that those conditions are not
488 the same since only one of the outgoing edges from the GIMPLE_COND
489 will directly reach BB and thus be associated with an argument. */
491 stmt = last_stmt (cond_bb);
492 result = PHI_RESULT (phi);
494 /* To handle special cases like floating point comparison, it is easier and
495 less error-prone to build a tree and gimplify it on the fly though it is
496 less efficient. */
497 cond = fold_build2_loc (gimple_location (stmt),
498 gimple_cond_code (stmt), boolean_type_node,
499 gimple_cond_lhs (stmt), gimple_cond_rhs (stmt));
501 /* We need to know which is the true edge and which is the false
502 edge so that we know when to invert the condition below. */
503 extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge);
504 if ((e0 == true_edge && integer_zerop (arg0))
505 || (e0 == false_edge && !integer_zerop (arg0))
506 || (e1 == true_edge && integer_zerop (arg1))
507 || (e1 == false_edge && !integer_zerop (arg1)))
508 cond = fold_build1_loc (gimple_location (stmt),
509 TRUTH_NOT_EXPR, TREE_TYPE (cond), cond);
511 if (neg)
513 cond = fold_convert_loc (gimple_location (stmt),
514 TREE_TYPE (result), cond);
515 cond = fold_build1_loc (gimple_location (stmt),
516 NEGATE_EXPR, TREE_TYPE (cond), cond);
519 /* Insert our new statements at the end of conditional block before the
520 COND_STMT. */
521 gsi = gsi_for_stmt (stmt);
522 new_var = force_gimple_operand_gsi (&gsi, cond, true, NULL, true,
523 GSI_SAME_STMT);
525 if (!useless_type_conversion_p (TREE_TYPE (result), TREE_TYPE (new_var)))
527 source_location locus_0, locus_1;
529 new_var2 = make_ssa_name (TREE_TYPE (result));
530 new_stmt = gimple_build_assign (new_var2, CONVERT_EXPR, new_var);
531 gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
532 new_var = new_var2;
534 /* Set the locus to the first argument, unless is doesn't have one. */
535 locus_0 = gimple_phi_arg_location (phi, 0);
536 locus_1 = gimple_phi_arg_location (phi, 1);
537 if (locus_0 == UNKNOWN_LOCATION)
538 locus_0 = locus_1;
539 gimple_set_location (new_stmt, locus_0);
542 replace_phi_edge_with_variable (cond_bb, e1, phi, new_var);
544 /* Note that we optimized this PHI. */
545 return true;
548 /* Update *ARG which is defined in STMT so that it contains the
549 computed value if that seems profitable. Return true if the
550 statement is made dead by that rewriting. */
552 static bool
553 jump_function_from_stmt (tree *arg, gimple stmt)
555 enum tree_code code = gimple_assign_rhs_code (stmt);
556 if (code == ADDR_EXPR)
558 /* For arg = &p->i transform it to p, if possible. */
559 tree rhs1 = gimple_assign_rhs1 (stmt);
560 HOST_WIDE_INT offset;
561 tree tem = get_addr_base_and_unit_offset (TREE_OPERAND (rhs1, 0),
562 &offset);
563 if (tem
564 && TREE_CODE (tem) == MEM_REF
565 && (mem_ref_offset (tem) + offset) == 0)
567 *arg = TREE_OPERAND (tem, 0);
568 return true;
571 /* TODO: Much like IPA-CP jump-functions we want to handle constant
572 additions symbolically here, and we'd need to update the comparison
573 code that compares the arg + cst tuples in our caller. For now the
574 code above exactly handles the VEC_BASE pattern from vec.h. */
575 return false;
578 /* RHS is a source argument in a BIT_AND_EXPR which feeds a conditional
579 of the form SSA_NAME NE 0.
581 If RHS is fed by a simple EQ_EXPR comparison of two values, see if
582 the two input values of the EQ_EXPR match arg0 and arg1.
584 If so update *code and return TRUE. Otherwise return FALSE. */
586 static bool
587 rhs_is_fed_for_value_replacement (const_tree arg0, const_tree arg1,
588 enum tree_code *code, const_tree rhs)
590 /* Obviously if RHS is not an SSA_NAME, we can't look at the defining
591 statement. */
592 if (TREE_CODE (rhs) == SSA_NAME)
594 gimple def1 = SSA_NAME_DEF_STMT (rhs);
596 /* Verify the defining statement has an EQ_EXPR on the RHS. */
597 if (is_gimple_assign (def1) && gimple_assign_rhs_code (def1) == EQ_EXPR)
599 /* Finally verify the source operands of the EQ_EXPR are equal
600 to arg0 and arg1. */
601 tree op0 = gimple_assign_rhs1 (def1);
602 tree op1 = gimple_assign_rhs2 (def1);
603 if ((operand_equal_for_phi_arg_p (arg0, op0)
604 && operand_equal_for_phi_arg_p (arg1, op1))
605 || (operand_equal_for_phi_arg_p (arg0, op1)
606 && operand_equal_for_phi_arg_p (arg1, op0)))
608 /* We will perform the optimization. */
609 *code = gimple_assign_rhs_code (def1);
610 return true;
614 return false;
617 /* Return TRUE if arg0/arg1 are equal to the rhs/lhs or lhs/rhs of COND.
619 Also return TRUE if arg0/arg1 are equal to the source arguments of a
620 an EQ comparison feeding a BIT_AND_EXPR which feeds COND.
622 Return FALSE otherwise. */
624 static bool
625 operand_equal_for_value_replacement (const_tree arg0, const_tree arg1,
626 enum tree_code *code, gimple cond)
628 gimple def;
629 tree lhs = gimple_cond_lhs (cond);
630 tree rhs = gimple_cond_rhs (cond);
632 if ((operand_equal_for_phi_arg_p (arg0, lhs)
633 && operand_equal_for_phi_arg_p (arg1, rhs))
634 || (operand_equal_for_phi_arg_p (arg1, lhs)
635 && operand_equal_for_phi_arg_p (arg0, rhs)))
636 return true;
638 /* Now handle more complex case where we have an EQ comparison
639 which feeds a BIT_AND_EXPR which feeds COND.
641 First verify that COND is of the form SSA_NAME NE 0. */
642 if (*code != NE_EXPR || !integer_zerop (rhs)
643 || TREE_CODE (lhs) != SSA_NAME)
644 return false;
646 /* Now ensure that SSA_NAME is set by a BIT_AND_EXPR. */
647 def = SSA_NAME_DEF_STMT (lhs);
648 if (!is_gimple_assign (def) || gimple_assign_rhs_code (def) != BIT_AND_EXPR)
649 return false;
651 /* Now verify arg0/arg1 correspond to the source arguments of an
652 EQ comparison feeding the BIT_AND_EXPR. */
654 tree tmp = gimple_assign_rhs1 (def);
655 if (rhs_is_fed_for_value_replacement (arg0, arg1, code, tmp))
656 return true;
658 tmp = gimple_assign_rhs2 (def);
659 if (rhs_is_fed_for_value_replacement (arg0, arg1, code, tmp))
660 return true;
662 return false;
665 /* Returns true if ARG is a neutral element for operation CODE
666 on the RIGHT side. */
668 static bool
669 neutral_element_p (tree_code code, tree arg, bool right)
671 switch (code)
673 case PLUS_EXPR:
674 case BIT_IOR_EXPR:
675 case BIT_XOR_EXPR:
676 return integer_zerop (arg);
678 case LROTATE_EXPR:
679 case RROTATE_EXPR:
680 case LSHIFT_EXPR:
681 case RSHIFT_EXPR:
682 case MINUS_EXPR:
683 case POINTER_PLUS_EXPR:
684 return right && integer_zerop (arg);
686 case MULT_EXPR:
687 return integer_onep (arg);
689 case TRUNC_DIV_EXPR:
690 case CEIL_DIV_EXPR:
691 case FLOOR_DIV_EXPR:
692 case ROUND_DIV_EXPR:
693 case EXACT_DIV_EXPR:
694 return right && integer_onep (arg);
696 case BIT_AND_EXPR:
697 return integer_all_onesp (arg);
699 default:
700 return false;
704 /* Returns true if ARG is an absorbing element for operation CODE. */
706 static bool
707 absorbing_element_p (tree_code code, tree arg)
709 switch (code)
711 case BIT_IOR_EXPR:
712 return integer_all_onesp (arg);
714 case MULT_EXPR:
715 case BIT_AND_EXPR:
716 return integer_zerop (arg);
718 default:
719 return false;
723 /* The function value_replacement does the main work of doing the value
724 replacement. Return non-zero if the replacement is done. Otherwise return
725 0. If we remove the middle basic block, return 2.
726 BB is the basic block where the replacement is going to be done on. ARG0
727 is argument 0 from the PHI. Likewise for ARG1. */
729 static int
730 value_replacement (basic_block cond_bb, basic_block middle_bb,
731 edge e0, edge e1, gimple phi,
732 tree arg0, tree arg1)
734 gimple_stmt_iterator gsi;
735 gimple cond;
736 edge true_edge, false_edge;
737 enum tree_code code;
738 bool emtpy_or_with_defined_p = true;
740 /* If the type says honor signed zeros we cannot do this
741 optimization. */
742 if (HONOR_SIGNED_ZEROS (arg1))
743 return 0;
745 /* If there is a statement in MIDDLE_BB that defines one of the PHI
746 arguments, then adjust arg0 or arg1. */
747 gsi = gsi_start_nondebug_after_labels_bb (middle_bb);
748 while (!gsi_end_p (gsi))
750 gimple stmt = gsi_stmt (gsi);
751 tree lhs;
752 gsi_next_nondebug (&gsi);
753 if (!is_gimple_assign (stmt))
755 emtpy_or_with_defined_p = false;
756 continue;
758 /* Now try to adjust arg0 or arg1 according to the computation
759 in the statement. */
760 lhs = gimple_assign_lhs (stmt);
761 if (!(lhs == arg0
762 && jump_function_from_stmt (&arg0, stmt))
763 || (lhs == arg1
764 && jump_function_from_stmt (&arg1, stmt)))
765 emtpy_or_with_defined_p = false;
768 cond = last_stmt (cond_bb);
769 code = gimple_cond_code (cond);
771 /* This transformation is only valid for equality comparisons. */
772 if (code != NE_EXPR && code != EQ_EXPR)
773 return 0;
775 /* We need to know which is the true edge and which is the false
776 edge so that we know if have abs or negative abs. */
777 extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge);
779 /* At this point we know we have a COND_EXPR with two successors.
780 One successor is BB, the other successor is an empty block which
781 falls through into BB.
783 The condition for the COND_EXPR is known to be NE_EXPR or EQ_EXPR.
785 There is a single PHI node at the join point (BB) with two arguments.
787 We now need to verify that the two arguments in the PHI node match
788 the two arguments to the equality comparison. */
790 if (operand_equal_for_value_replacement (arg0, arg1, &code, cond))
792 edge e;
793 tree arg;
795 /* For NE_EXPR, we want to build an assignment result = arg where
796 arg is the PHI argument associated with the true edge. For
797 EQ_EXPR we want the PHI argument associated with the false edge. */
798 e = (code == NE_EXPR ? true_edge : false_edge);
800 /* Unfortunately, E may not reach BB (it may instead have gone to
801 OTHER_BLOCK). If that is the case, then we want the single outgoing
802 edge from OTHER_BLOCK which reaches BB and represents the desired
803 path from COND_BLOCK. */
804 if (e->dest == middle_bb)
805 e = single_succ_edge (e->dest);
807 /* Now we know the incoming edge to BB that has the argument for the
808 RHS of our new assignment statement. */
809 if (e0 == e)
810 arg = arg0;
811 else
812 arg = arg1;
814 /* If the middle basic block was empty or is defining the
815 PHI arguments and this is a single phi where the args are different
816 for the edges e0 and e1 then we can remove the middle basic block. */
817 if (emtpy_or_with_defined_p
818 && single_non_singleton_phi_for_edges (phi_nodes (gimple_bb (phi)),
819 e0, e1) == phi)
821 replace_phi_edge_with_variable (cond_bb, e1, phi, arg);
822 /* Note that we optimized this PHI. */
823 return 2;
825 else
827 /* Replace the PHI arguments with arg. */
828 SET_PHI_ARG_DEF (phi, e0->dest_idx, arg);
829 SET_PHI_ARG_DEF (phi, e1->dest_idx, arg);
830 if (dump_file && (dump_flags & TDF_DETAILS))
832 fprintf (dump_file, "PHI ");
833 print_generic_expr (dump_file, gimple_phi_result (phi), 0);
834 fprintf (dump_file, " reduced for COND_EXPR in block %d to ",
835 cond_bb->index);
836 print_generic_expr (dump_file, arg, 0);
837 fprintf (dump_file, ".\n");
839 return 1;
844 /* Now optimize (x != 0) ? x + y : y to just y.
845 The following condition is too restrictive, there can easily be another
846 stmt in middle_bb, for instance a CONVERT_EXPR for the second argument. */
847 gimple assign = last_and_only_stmt (middle_bb);
848 if (!assign || gimple_code (assign) != GIMPLE_ASSIGN
849 || gimple_assign_rhs_class (assign) != GIMPLE_BINARY_RHS
850 || (!INTEGRAL_TYPE_P (TREE_TYPE (arg0))
851 && !POINTER_TYPE_P (TREE_TYPE (arg0))))
852 return 0;
854 /* Punt if there are (degenerate) PHIs in middle_bb, there should not be. */
855 if (!gimple_seq_empty_p (phi_nodes (middle_bb)))
856 return 0;
858 /* Only transform if it removes the condition. */
859 if (!single_non_singleton_phi_for_edges (phi_nodes (gimple_bb (phi)), e0, e1))
860 return 0;
862 /* Size-wise, this is always profitable. */
863 if (optimize_bb_for_speed_p (cond_bb)
864 /* The special case is useless if it has a low probability. */
865 && profile_status_for_fn (cfun) != PROFILE_ABSENT
866 && EDGE_PRED (middle_bb, 0)->probability < PROB_EVEN
867 /* If assign is cheap, there is no point avoiding it. */
868 && estimate_num_insns (assign, &eni_time_weights)
869 >= 3 * estimate_num_insns (cond, &eni_time_weights))
870 return 0;
872 tree lhs = gimple_assign_lhs (assign);
873 tree rhs1 = gimple_assign_rhs1 (assign);
874 tree rhs2 = gimple_assign_rhs2 (assign);
875 enum tree_code code_def = gimple_assign_rhs_code (assign);
876 tree cond_lhs = gimple_cond_lhs (cond);
877 tree cond_rhs = gimple_cond_rhs (cond);
879 if (((code == NE_EXPR && e1 == false_edge)
880 || (code == EQ_EXPR && e1 == true_edge))
881 && arg0 == lhs
882 && ((arg1 == rhs1
883 && operand_equal_for_phi_arg_p (rhs2, cond_lhs)
884 && neutral_element_p (code_def, cond_rhs, true))
885 || (arg1 == rhs2
886 && operand_equal_for_phi_arg_p (rhs1, cond_lhs)
887 && neutral_element_p (code_def, cond_rhs, false))
888 || (operand_equal_for_phi_arg_p (arg1, cond_rhs)
889 && (operand_equal_for_phi_arg_p (rhs2, cond_lhs)
890 || operand_equal_for_phi_arg_p (rhs1, cond_lhs))
891 && absorbing_element_p (code_def, cond_rhs))))
893 gsi = gsi_for_stmt (cond);
894 if (INTEGRAL_TYPE_P (TREE_TYPE (lhs)))
896 /* Moving ASSIGN might change VR of lhs, e.g. when moving u_6
897 def-stmt in:
898 if (n_5 != 0)
899 goto <bb 3>;
900 else
901 goto <bb 4>;
903 <bb 3>:
904 # RANGE [0, 4294967294]
905 u_6 = n_5 + 4294967295;
907 <bb 4>:
908 # u_3 = PHI <u_6(3), 4294967295(2)> */
909 SSA_NAME_RANGE_INFO (lhs) = NULL;
910 SSA_NAME_ANTI_RANGE_P (lhs) = 0;
911 /* If available, we can use VR of phi result at least. */
912 tree phires = gimple_phi_result (phi);
913 struct range_info_def *phires_range_info
914 = SSA_NAME_RANGE_INFO (phires);
915 if (phires_range_info)
916 duplicate_ssa_name_range_info (lhs, SSA_NAME_RANGE_TYPE (phires),
917 phires_range_info);
919 gimple_stmt_iterator gsi_from = gsi_for_stmt (assign);
920 gsi_move_before (&gsi_from, &gsi);
921 replace_phi_edge_with_variable (cond_bb, e1, phi, lhs);
922 return 2;
925 return 0;
928 /* The function minmax_replacement does the main work of doing the minmax
929 replacement. Return true if the replacement is done. Otherwise return
930 false.
931 BB is the basic block where the replacement is going to be done on. ARG0
932 is argument 0 from the PHI. Likewise for ARG1. */
934 static bool
935 minmax_replacement (basic_block cond_bb, basic_block middle_bb,
936 edge e0, edge e1, gimple phi,
937 tree arg0, tree arg1)
939 tree result, type;
940 gcond *cond;
941 gassign *new_stmt;
942 edge true_edge, false_edge;
943 enum tree_code cmp, minmax, ass_code;
944 tree smaller, larger, arg_true, arg_false;
945 gimple_stmt_iterator gsi, gsi_from;
947 type = TREE_TYPE (PHI_RESULT (phi));
949 /* The optimization may be unsafe due to NaNs. */
950 if (HONOR_NANS (type))
951 return false;
953 cond = as_a <gcond *> (last_stmt (cond_bb));
954 cmp = gimple_cond_code (cond);
956 /* This transformation is only valid for order comparisons. Record which
957 operand is smaller/larger if the result of the comparison is true. */
958 if (cmp == LT_EXPR || cmp == LE_EXPR)
960 smaller = gimple_cond_lhs (cond);
961 larger = gimple_cond_rhs (cond);
963 else if (cmp == GT_EXPR || cmp == GE_EXPR)
965 smaller = gimple_cond_rhs (cond);
966 larger = gimple_cond_lhs (cond);
968 else
969 return false;
971 /* We need to know which is the true edge and which is the false
972 edge so that we know if have abs or negative abs. */
973 extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge);
975 /* Forward the edges over the middle basic block. */
976 if (true_edge->dest == middle_bb)
977 true_edge = EDGE_SUCC (true_edge->dest, 0);
978 if (false_edge->dest == middle_bb)
979 false_edge = EDGE_SUCC (false_edge->dest, 0);
981 if (true_edge == e0)
983 gcc_assert (false_edge == e1);
984 arg_true = arg0;
985 arg_false = arg1;
987 else
989 gcc_assert (false_edge == e0);
990 gcc_assert (true_edge == e1);
991 arg_true = arg1;
992 arg_false = arg0;
995 if (empty_block_p (middle_bb))
997 if (operand_equal_for_phi_arg_p (arg_true, smaller)
998 && operand_equal_for_phi_arg_p (arg_false, larger))
1000 /* Case
1002 if (smaller < larger)
1003 rslt = smaller;
1004 else
1005 rslt = larger; */
1006 minmax = MIN_EXPR;
1008 else if (operand_equal_for_phi_arg_p (arg_false, smaller)
1009 && operand_equal_for_phi_arg_p (arg_true, larger))
1010 minmax = MAX_EXPR;
1011 else
1012 return false;
1014 else
1016 /* Recognize the following case, assuming d <= u:
1018 if (a <= u)
1019 b = MAX (a, d);
1020 x = PHI <b, u>
1022 This is equivalent to
1024 b = MAX (a, d);
1025 x = MIN (b, u); */
1027 gimple assign = last_and_only_stmt (middle_bb);
1028 tree lhs, op0, op1, bound;
1030 if (!assign
1031 || gimple_code (assign) != GIMPLE_ASSIGN)
1032 return false;
1034 lhs = gimple_assign_lhs (assign);
1035 ass_code = gimple_assign_rhs_code (assign);
1036 if (ass_code != MAX_EXPR && ass_code != MIN_EXPR)
1037 return false;
1038 op0 = gimple_assign_rhs1 (assign);
1039 op1 = gimple_assign_rhs2 (assign);
1041 if (true_edge->src == middle_bb)
1043 /* We got here if the condition is true, i.e., SMALLER < LARGER. */
1044 if (!operand_equal_for_phi_arg_p (lhs, arg_true))
1045 return false;
1047 if (operand_equal_for_phi_arg_p (arg_false, larger))
1049 /* Case
1051 if (smaller < larger)
1053 r' = MAX_EXPR (smaller, bound)
1055 r = PHI <r', larger> --> to be turned to MIN_EXPR. */
1056 if (ass_code != MAX_EXPR)
1057 return false;
1059 minmax = MIN_EXPR;
1060 if (operand_equal_for_phi_arg_p (op0, smaller))
1061 bound = op1;
1062 else if (operand_equal_for_phi_arg_p (op1, smaller))
1063 bound = op0;
1064 else
1065 return false;
1067 /* We need BOUND <= LARGER. */
1068 if (!integer_nonzerop (fold_build2 (LE_EXPR, boolean_type_node,
1069 bound, larger)))
1070 return false;
1072 else if (operand_equal_for_phi_arg_p (arg_false, smaller))
1074 /* Case
1076 if (smaller < larger)
1078 r' = MIN_EXPR (larger, bound)
1080 r = PHI <r', smaller> --> to be turned to MAX_EXPR. */
1081 if (ass_code != MIN_EXPR)
1082 return false;
1084 minmax = MAX_EXPR;
1085 if (operand_equal_for_phi_arg_p (op0, larger))
1086 bound = op1;
1087 else if (operand_equal_for_phi_arg_p (op1, larger))
1088 bound = op0;
1089 else
1090 return false;
1092 /* We need BOUND >= SMALLER. */
1093 if (!integer_nonzerop (fold_build2 (GE_EXPR, boolean_type_node,
1094 bound, smaller)))
1095 return false;
1097 else
1098 return false;
1100 else
1102 /* We got here if the condition is false, i.e., SMALLER > LARGER. */
1103 if (!operand_equal_for_phi_arg_p (lhs, arg_false))
1104 return false;
1106 if (operand_equal_for_phi_arg_p (arg_true, larger))
1108 /* Case
1110 if (smaller > larger)
1112 r' = MIN_EXPR (smaller, bound)
1114 r = PHI <r', larger> --> to be turned to MAX_EXPR. */
1115 if (ass_code != MIN_EXPR)
1116 return false;
1118 minmax = MAX_EXPR;
1119 if (operand_equal_for_phi_arg_p (op0, smaller))
1120 bound = op1;
1121 else if (operand_equal_for_phi_arg_p (op1, smaller))
1122 bound = op0;
1123 else
1124 return false;
1126 /* We need BOUND >= LARGER. */
1127 if (!integer_nonzerop (fold_build2 (GE_EXPR, boolean_type_node,
1128 bound, larger)))
1129 return false;
1131 else if (operand_equal_for_phi_arg_p (arg_true, smaller))
1133 /* Case
1135 if (smaller > larger)
1137 r' = MAX_EXPR (larger, bound)
1139 r = PHI <r', smaller> --> to be turned to MIN_EXPR. */
1140 if (ass_code != MAX_EXPR)
1141 return false;
1143 minmax = MIN_EXPR;
1144 if (operand_equal_for_phi_arg_p (op0, larger))
1145 bound = op1;
1146 else if (operand_equal_for_phi_arg_p (op1, larger))
1147 bound = op0;
1148 else
1149 return false;
1151 /* We need BOUND <= SMALLER. */
1152 if (!integer_nonzerop (fold_build2 (LE_EXPR, boolean_type_node,
1153 bound, smaller)))
1154 return false;
1156 else
1157 return false;
1160 /* Move the statement from the middle block. */
1161 gsi = gsi_last_bb (cond_bb);
1162 gsi_from = gsi_last_nondebug_bb (middle_bb);
1163 gsi_move_before (&gsi_from, &gsi);
1166 /* Emit the statement to compute min/max. */
1167 result = duplicate_ssa_name (PHI_RESULT (phi), NULL);
1168 new_stmt = gimple_build_assign (result, minmax, arg0, arg1);
1169 gsi = gsi_last_bb (cond_bb);
1170 gsi_insert_before (&gsi, new_stmt, GSI_NEW_STMT);
1172 replace_phi_edge_with_variable (cond_bb, e1, phi, result);
1173 return true;
1176 /* The function absolute_replacement does the main work of doing the absolute
1177 replacement. Return true if the replacement is done. Otherwise return
1178 false.
1179 bb is the basic block where the replacement is going to be done on. arg0
1180 is argument 0 from the phi. Likewise for arg1. */
1182 static bool
1183 abs_replacement (basic_block cond_bb, basic_block middle_bb,
1184 edge e0 ATTRIBUTE_UNUSED, edge e1,
1185 gimple phi, tree arg0, tree arg1)
1187 tree result;
1188 gassign *new_stmt;
1189 gimple cond;
1190 gimple_stmt_iterator gsi;
1191 edge true_edge, false_edge;
1192 gimple assign;
1193 edge e;
1194 tree rhs, lhs;
1195 bool negate;
1196 enum tree_code cond_code;
1198 /* If the type says honor signed zeros we cannot do this
1199 optimization. */
1200 if (HONOR_SIGNED_ZEROS (arg1))
1201 return false;
1203 /* OTHER_BLOCK must have only one executable statement which must have the
1204 form arg0 = -arg1 or arg1 = -arg0. */
1206 assign = last_and_only_stmt (middle_bb);
1207 /* If we did not find the proper negation assignment, then we can not
1208 optimize. */
1209 if (assign == NULL)
1210 return false;
1212 /* If we got here, then we have found the only executable statement
1213 in OTHER_BLOCK. If it is anything other than arg = -arg1 or
1214 arg1 = -arg0, then we can not optimize. */
1215 if (gimple_code (assign) != GIMPLE_ASSIGN)
1216 return false;
1218 lhs = gimple_assign_lhs (assign);
1220 if (gimple_assign_rhs_code (assign) != NEGATE_EXPR)
1221 return false;
1223 rhs = gimple_assign_rhs1 (assign);
1225 /* The assignment has to be arg0 = -arg1 or arg1 = -arg0. */
1226 if (!(lhs == arg0 && rhs == arg1)
1227 && !(lhs == arg1 && rhs == arg0))
1228 return false;
1230 cond = last_stmt (cond_bb);
1231 result = PHI_RESULT (phi);
1233 /* Only relationals comparing arg[01] against zero are interesting. */
1234 cond_code = gimple_cond_code (cond);
1235 if (cond_code != GT_EXPR && cond_code != GE_EXPR
1236 && cond_code != LT_EXPR && cond_code != LE_EXPR)
1237 return false;
1239 /* Make sure the conditional is arg[01] OP y. */
1240 if (gimple_cond_lhs (cond) != rhs)
1241 return false;
1243 if (FLOAT_TYPE_P (TREE_TYPE (gimple_cond_rhs (cond)))
1244 ? real_zerop (gimple_cond_rhs (cond))
1245 : integer_zerop (gimple_cond_rhs (cond)))
1247 else
1248 return false;
1250 /* We need to know which is the true edge and which is the false
1251 edge so that we know if have abs or negative abs. */
1252 extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge);
1254 /* For GT_EXPR/GE_EXPR, if the true edge goes to OTHER_BLOCK, then we
1255 will need to negate the result. Similarly for LT_EXPR/LE_EXPR if
1256 the false edge goes to OTHER_BLOCK. */
1257 if (cond_code == GT_EXPR || cond_code == GE_EXPR)
1258 e = true_edge;
1259 else
1260 e = false_edge;
1262 if (e->dest == middle_bb)
1263 negate = true;
1264 else
1265 negate = false;
1267 result = duplicate_ssa_name (result, NULL);
1269 if (negate)
1270 lhs = make_ssa_name (TREE_TYPE (result));
1271 else
1272 lhs = result;
1274 /* Build the modify expression with abs expression. */
1275 new_stmt = gimple_build_assign (lhs, ABS_EXPR, rhs);
1277 gsi = gsi_last_bb (cond_bb);
1278 gsi_insert_before (&gsi, new_stmt, GSI_NEW_STMT);
1280 if (negate)
1282 /* Get the right GSI. We want to insert after the recently
1283 added ABS_EXPR statement (which we know is the first statement
1284 in the block. */
1285 new_stmt = gimple_build_assign (result, NEGATE_EXPR, lhs);
1287 gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT);
1290 replace_phi_edge_with_variable (cond_bb, e1, phi, result);
1292 /* Note that we optimized this PHI. */
1293 return true;
1296 /* Auxiliary functions to determine the set of memory accesses which
1297 can't trap because they are preceded by accesses to the same memory
1298 portion. We do that for MEM_REFs, so we only need to track
1299 the SSA_NAME of the pointer indirectly referenced. The algorithm
1300 simply is a walk over all instructions in dominator order. When
1301 we see an MEM_REF we determine if we've already seen a same
1302 ref anywhere up to the root of the dominator tree. If we do the
1303 current access can't trap. If we don't see any dominating access
1304 the current access might trap, but might also make later accesses
1305 non-trapping, so we remember it. We need to be careful with loads
1306 or stores, for instance a load might not trap, while a store would,
1307 so if we see a dominating read access this doesn't mean that a later
1308 write access would not trap. Hence we also need to differentiate the
1309 type of access(es) seen.
1311 ??? We currently are very conservative and assume that a load might
1312 trap even if a store doesn't (write-only memory). This probably is
1313 overly conservative. */
1315 /* A hash-table of SSA_NAMEs, and in which basic block an MEM_REF
1316 through it was seen, which would constitute a no-trap region for
1317 same accesses. */
1318 struct name_to_bb
1320 unsigned int ssa_name_ver;
1321 unsigned int phase;
1322 bool store;
1323 HOST_WIDE_INT offset, size;
1324 basic_block bb;
1327 /* Hashtable helpers. */
1329 struct ssa_names_hasher : typed_free_remove <name_to_bb>
1331 typedef name_to_bb *value_type;
1332 typedef name_to_bb *compare_type;
1333 static inline hashval_t hash (const name_to_bb *);
1334 static inline bool equal (const name_to_bb *, const name_to_bb *);
1337 /* Used for quick clearing of the hash-table when we see calls.
1338 Hash entries with phase < nt_call_phase are invalid. */
1339 static unsigned int nt_call_phase;
1341 /* The hash function. */
1343 inline hashval_t
1344 ssa_names_hasher::hash (const name_to_bb *n)
1346 return n->ssa_name_ver ^ (((hashval_t) n->store) << 31)
1347 ^ (n->offset << 6) ^ (n->size << 3);
1350 /* The equality function of *P1 and *P2. */
1352 inline bool
1353 ssa_names_hasher::equal (const name_to_bb *n1, const name_to_bb *n2)
1355 return n1->ssa_name_ver == n2->ssa_name_ver
1356 && n1->store == n2->store
1357 && n1->offset == n2->offset
1358 && n1->size == n2->size;
1361 class nontrapping_dom_walker : public dom_walker
1363 public:
1364 nontrapping_dom_walker (cdi_direction direction, hash_set<tree> *ps)
1365 : dom_walker (direction), m_nontrapping (ps), m_seen_ssa_names (128) {}
1367 virtual void before_dom_children (basic_block);
1368 virtual void after_dom_children (basic_block);
1370 private:
1372 /* We see the expression EXP in basic block BB. If it's an interesting
1373 expression (an MEM_REF through an SSA_NAME) possibly insert the
1374 expression into the set NONTRAP or the hash table of seen expressions.
1375 STORE is true if this expression is on the LHS, otherwise it's on
1376 the RHS. */
1377 void add_or_mark_expr (basic_block, tree, bool);
1379 hash_set<tree> *m_nontrapping;
1381 /* The hash table for remembering what we've seen. */
1382 hash_table<ssa_names_hasher> m_seen_ssa_names;
1385 /* Called by walk_dominator_tree, when entering the block BB. */
1386 void
1387 nontrapping_dom_walker::before_dom_children (basic_block bb)
1389 edge e;
1390 edge_iterator ei;
1391 gimple_stmt_iterator gsi;
1393 /* If we haven't seen all our predecessors, clear the hash-table. */
1394 FOR_EACH_EDGE (e, ei, bb->preds)
1395 if ((((size_t)e->src->aux) & 2) == 0)
1397 nt_call_phase++;
1398 break;
1401 /* Mark this BB as being on the path to dominator root and as visited. */
1402 bb->aux = (void*)(1 | 2);
1404 /* And walk the statements in order. */
1405 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1407 gimple stmt = gsi_stmt (gsi);
1409 if (is_gimple_call (stmt) && !nonfreeing_call_p (stmt))
1410 nt_call_phase++;
1411 else if (gimple_assign_single_p (stmt) && !gimple_has_volatile_ops (stmt))
1413 add_or_mark_expr (bb, gimple_assign_lhs (stmt), true);
1414 add_or_mark_expr (bb, gimple_assign_rhs1 (stmt), false);
1419 /* Called by walk_dominator_tree, when basic block BB is exited. */
1420 void
1421 nontrapping_dom_walker::after_dom_children (basic_block bb)
1423 /* This BB isn't on the path to dominator root anymore. */
1424 bb->aux = (void*)2;
1427 /* We see the expression EXP in basic block BB. If it's an interesting
1428 expression (an MEM_REF through an SSA_NAME) possibly insert the
1429 expression into the set NONTRAP or the hash table of seen expressions.
1430 STORE is true if this expression is on the LHS, otherwise it's on
1431 the RHS. */
1432 void
1433 nontrapping_dom_walker::add_or_mark_expr (basic_block bb, tree exp, bool store)
1435 HOST_WIDE_INT size;
1437 if (TREE_CODE (exp) == MEM_REF
1438 && TREE_CODE (TREE_OPERAND (exp, 0)) == SSA_NAME
1439 && tree_fits_shwi_p (TREE_OPERAND (exp, 1))
1440 && (size = int_size_in_bytes (TREE_TYPE (exp))) > 0)
1442 tree name = TREE_OPERAND (exp, 0);
1443 struct name_to_bb map;
1444 name_to_bb **slot;
1445 struct name_to_bb *n2bb;
1446 basic_block found_bb = 0;
1448 /* Try to find the last seen MEM_REF through the same
1449 SSA_NAME, which can trap. */
1450 map.ssa_name_ver = SSA_NAME_VERSION (name);
1451 map.phase = 0;
1452 map.bb = 0;
1453 map.store = store;
1454 map.offset = tree_to_shwi (TREE_OPERAND (exp, 1));
1455 map.size = size;
1457 slot = m_seen_ssa_names.find_slot (&map, INSERT);
1458 n2bb = *slot;
1459 if (n2bb && n2bb->phase >= nt_call_phase)
1460 found_bb = n2bb->bb;
1462 /* If we've found a trapping MEM_REF, _and_ it dominates EXP
1463 (it's in a basic block on the path from us to the dominator root)
1464 then we can't trap. */
1465 if (found_bb && (((size_t)found_bb->aux) & 1) == 1)
1467 m_nontrapping->add (exp);
1469 else
1471 /* EXP might trap, so insert it into the hash table. */
1472 if (n2bb)
1474 n2bb->phase = nt_call_phase;
1475 n2bb->bb = bb;
1477 else
1479 n2bb = XNEW (struct name_to_bb);
1480 n2bb->ssa_name_ver = SSA_NAME_VERSION (name);
1481 n2bb->phase = nt_call_phase;
1482 n2bb->bb = bb;
1483 n2bb->store = store;
1484 n2bb->offset = map.offset;
1485 n2bb->size = size;
1486 *slot = n2bb;
1492 /* This is the entry point of gathering non trapping memory accesses.
1493 It will do a dominator walk over the whole function, and it will
1494 make use of the bb->aux pointers. It returns a set of trees
1495 (the MEM_REFs itself) which can't trap. */
1496 static hash_set<tree> *
1497 get_non_trapping (void)
1499 nt_call_phase = 0;
1500 hash_set<tree> *nontrap = new hash_set<tree>;
1501 /* We're going to do a dominator walk, so ensure that we have
1502 dominance information. */
1503 calculate_dominance_info (CDI_DOMINATORS);
1505 nontrapping_dom_walker (CDI_DOMINATORS, nontrap)
1506 .walk (cfun->cfg->x_entry_block_ptr);
1508 clear_aux_for_blocks ();
1509 return nontrap;
1512 /* Do the main work of conditional store replacement. We already know
1513 that the recognized pattern looks like so:
1515 split:
1516 if (cond) goto MIDDLE_BB; else goto JOIN_BB (edge E1)
1517 MIDDLE_BB:
1518 something
1519 fallthrough (edge E0)
1520 JOIN_BB:
1521 some more
1523 We check that MIDDLE_BB contains only one store, that that store
1524 doesn't trap (not via NOTRAP, but via checking if an access to the same
1525 memory location dominates us) and that the store has a "simple" RHS. */
1527 static bool
1528 cond_store_replacement (basic_block middle_bb, basic_block join_bb,
1529 edge e0, edge e1, hash_set<tree> *nontrap)
1531 gimple assign = last_and_only_stmt (middle_bb);
1532 tree lhs, rhs, name, name2;
1533 gphi *newphi;
1534 gassign *new_stmt;
1535 gimple_stmt_iterator gsi;
1536 source_location locus;
1538 /* Check if middle_bb contains of only one store. */
1539 if (!assign
1540 || !gimple_assign_single_p (assign)
1541 || gimple_has_volatile_ops (assign))
1542 return false;
1544 locus = gimple_location (assign);
1545 lhs = gimple_assign_lhs (assign);
1546 rhs = gimple_assign_rhs1 (assign);
1547 if (TREE_CODE (lhs) != MEM_REF
1548 || TREE_CODE (TREE_OPERAND (lhs, 0)) != SSA_NAME
1549 || !is_gimple_reg_type (TREE_TYPE (lhs)))
1550 return false;
1552 /* Prove that we can move the store down. We could also check
1553 TREE_THIS_NOTRAP here, but in that case we also could move stores,
1554 whose value is not available readily, which we want to avoid. */
1555 if (!nontrap->contains (lhs))
1556 return false;
1558 /* Now we've checked the constraints, so do the transformation:
1559 1) Remove the single store. */
1560 gsi = gsi_for_stmt (assign);
1561 unlink_stmt_vdef (assign);
1562 gsi_remove (&gsi, true);
1563 release_defs (assign);
1565 /* 2) Insert a load from the memory of the store to the temporary
1566 on the edge which did not contain the store. */
1567 lhs = unshare_expr (lhs);
1568 name = make_temp_ssa_name (TREE_TYPE (lhs), NULL, "cstore");
1569 new_stmt = gimple_build_assign (name, lhs);
1570 gimple_set_location (new_stmt, locus);
1571 gsi_insert_on_edge (e1, new_stmt);
1573 /* 3) Create a PHI node at the join block, with one argument
1574 holding the old RHS, and the other holding the temporary
1575 where we stored the old memory contents. */
1576 name2 = make_temp_ssa_name (TREE_TYPE (lhs), NULL, "cstore");
1577 newphi = create_phi_node (name2, join_bb);
1578 add_phi_arg (newphi, rhs, e0, locus);
1579 add_phi_arg (newphi, name, e1, locus);
1581 lhs = unshare_expr (lhs);
1582 new_stmt = gimple_build_assign (lhs, PHI_RESULT (newphi));
1584 /* 4) Insert that PHI node. */
1585 gsi = gsi_after_labels (join_bb);
1586 if (gsi_end_p (gsi))
1588 gsi = gsi_last_bb (join_bb);
1589 gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT);
1591 else
1592 gsi_insert_before (&gsi, new_stmt, GSI_NEW_STMT);
1594 return true;
1597 /* Do the main work of conditional store replacement. */
1599 static bool
1600 cond_if_else_store_replacement_1 (basic_block then_bb, basic_block else_bb,
1601 basic_block join_bb, gimple then_assign,
1602 gimple else_assign)
1604 tree lhs_base, lhs, then_rhs, else_rhs, name;
1605 source_location then_locus, else_locus;
1606 gimple_stmt_iterator gsi;
1607 gphi *newphi;
1608 gassign *new_stmt;
1610 if (then_assign == NULL
1611 || !gimple_assign_single_p (then_assign)
1612 || gimple_clobber_p (then_assign)
1613 || gimple_has_volatile_ops (then_assign)
1614 || else_assign == NULL
1615 || !gimple_assign_single_p (else_assign)
1616 || gimple_clobber_p (else_assign)
1617 || gimple_has_volatile_ops (else_assign))
1618 return false;
1620 lhs = gimple_assign_lhs (then_assign);
1621 if (!is_gimple_reg_type (TREE_TYPE (lhs))
1622 || !operand_equal_p (lhs, gimple_assign_lhs (else_assign), 0))
1623 return false;
1625 lhs_base = get_base_address (lhs);
1626 if (lhs_base == NULL_TREE
1627 || (!DECL_P (lhs_base) && TREE_CODE (lhs_base) != MEM_REF))
1628 return false;
1630 then_rhs = gimple_assign_rhs1 (then_assign);
1631 else_rhs = gimple_assign_rhs1 (else_assign);
1632 then_locus = gimple_location (then_assign);
1633 else_locus = gimple_location (else_assign);
1635 /* Now we've checked the constraints, so do the transformation:
1636 1) Remove the stores. */
1637 gsi = gsi_for_stmt (then_assign);
1638 unlink_stmt_vdef (then_assign);
1639 gsi_remove (&gsi, true);
1640 release_defs (then_assign);
1642 gsi = gsi_for_stmt (else_assign);
1643 unlink_stmt_vdef (else_assign);
1644 gsi_remove (&gsi, true);
1645 release_defs (else_assign);
1647 /* 2) Create a PHI node at the join block, with one argument
1648 holding the old RHS, and the other holding the temporary
1649 where we stored the old memory contents. */
1650 name = make_temp_ssa_name (TREE_TYPE (lhs), NULL, "cstore");
1651 newphi = create_phi_node (name, join_bb);
1652 add_phi_arg (newphi, then_rhs, EDGE_SUCC (then_bb, 0), then_locus);
1653 add_phi_arg (newphi, else_rhs, EDGE_SUCC (else_bb, 0), else_locus);
1655 new_stmt = gimple_build_assign (lhs, PHI_RESULT (newphi));
1657 /* 3) Insert that PHI node. */
1658 gsi = gsi_after_labels (join_bb);
1659 if (gsi_end_p (gsi))
1661 gsi = gsi_last_bb (join_bb);
1662 gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT);
1664 else
1665 gsi_insert_before (&gsi, new_stmt, GSI_NEW_STMT);
1667 return true;
1670 /* Conditional store replacement. We already know
1671 that the recognized pattern looks like so:
1673 split:
1674 if (cond) goto THEN_BB; else goto ELSE_BB (edge E1)
1675 THEN_BB:
1677 X = Y;
1679 goto JOIN_BB;
1680 ELSE_BB:
1682 X = Z;
1684 fallthrough (edge E0)
1685 JOIN_BB:
1686 some more
1688 We check that it is safe to sink the store to JOIN_BB by verifying that
1689 there are no read-after-write or write-after-write dependencies in
1690 THEN_BB and ELSE_BB. */
1692 static bool
1693 cond_if_else_store_replacement (basic_block then_bb, basic_block else_bb,
1694 basic_block join_bb)
1696 gimple then_assign = last_and_only_stmt (then_bb);
1697 gimple else_assign = last_and_only_stmt (else_bb);
1698 vec<data_reference_p> then_datarefs, else_datarefs;
1699 vec<ddr_p> then_ddrs, else_ddrs;
1700 gimple then_store, else_store;
1701 bool found, ok = false, res;
1702 struct data_dependence_relation *ddr;
1703 data_reference_p then_dr, else_dr;
1704 int i, j;
1705 tree then_lhs, else_lhs;
1706 basic_block blocks[3];
1708 if (MAX_STORES_TO_SINK == 0)
1709 return false;
1711 /* Handle the case with single statement in THEN_BB and ELSE_BB. */
1712 if (then_assign && else_assign)
1713 return cond_if_else_store_replacement_1 (then_bb, else_bb, join_bb,
1714 then_assign, else_assign);
1716 /* Find data references. */
1717 then_datarefs.create (1);
1718 else_datarefs.create (1);
1719 if ((find_data_references_in_bb (NULL, then_bb, &then_datarefs)
1720 == chrec_dont_know)
1721 || !then_datarefs.length ()
1722 || (find_data_references_in_bb (NULL, else_bb, &else_datarefs)
1723 == chrec_dont_know)
1724 || !else_datarefs.length ())
1726 free_data_refs (then_datarefs);
1727 free_data_refs (else_datarefs);
1728 return false;
1731 /* Find pairs of stores with equal LHS. */
1732 auto_vec<gimple, 1> then_stores, else_stores;
1733 FOR_EACH_VEC_ELT (then_datarefs, i, then_dr)
1735 if (DR_IS_READ (then_dr))
1736 continue;
1738 then_store = DR_STMT (then_dr);
1739 then_lhs = gimple_get_lhs (then_store);
1740 if (then_lhs == NULL_TREE)
1741 continue;
1742 found = false;
1744 FOR_EACH_VEC_ELT (else_datarefs, j, else_dr)
1746 if (DR_IS_READ (else_dr))
1747 continue;
1749 else_store = DR_STMT (else_dr);
1750 else_lhs = gimple_get_lhs (else_store);
1751 if (else_lhs == NULL_TREE)
1752 continue;
1754 if (operand_equal_p (then_lhs, else_lhs, 0))
1756 found = true;
1757 break;
1761 if (!found)
1762 continue;
1764 then_stores.safe_push (then_store);
1765 else_stores.safe_push (else_store);
1768 /* No pairs of stores found. */
1769 if (!then_stores.length ()
1770 || then_stores.length () > (unsigned) MAX_STORES_TO_SINK)
1772 free_data_refs (then_datarefs);
1773 free_data_refs (else_datarefs);
1774 return false;
1777 /* Compute and check data dependencies in both basic blocks. */
1778 then_ddrs.create (1);
1779 else_ddrs.create (1);
1780 if (!compute_all_dependences (then_datarefs, &then_ddrs,
1781 vNULL, false)
1782 || !compute_all_dependences (else_datarefs, &else_ddrs,
1783 vNULL, false))
1785 free_dependence_relations (then_ddrs);
1786 free_dependence_relations (else_ddrs);
1787 free_data_refs (then_datarefs);
1788 free_data_refs (else_datarefs);
1789 return false;
1791 blocks[0] = then_bb;
1792 blocks[1] = else_bb;
1793 blocks[2] = join_bb;
1794 renumber_gimple_stmt_uids_in_blocks (blocks, 3);
1796 /* Check that there are no read-after-write or write-after-write dependencies
1797 in THEN_BB. */
1798 FOR_EACH_VEC_ELT (then_ddrs, i, ddr)
1800 struct data_reference *dra = DDR_A (ddr);
1801 struct data_reference *drb = DDR_B (ddr);
1803 if (DDR_ARE_DEPENDENT (ddr) != chrec_known
1804 && ((DR_IS_READ (dra) && DR_IS_WRITE (drb)
1805 && gimple_uid (DR_STMT (dra)) > gimple_uid (DR_STMT (drb)))
1806 || (DR_IS_READ (drb) && DR_IS_WRITE (dra)
1807 && gimple_uid (DR_STMT (drb)) > gimple_uid (DR_STMT (dra)))
1808 || (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))))
1810 free_dependence_relations (then_ddrs);
1811 free_dependence_relations (else_ddrs);
1812 free_data_refs (then_datarefs);
1813 free_data_refs (else_datarefs);
1814 return false;
1818 /* Check that there are no read-after-write or write-after-write dependencies
1819 in ELSE_BB. */
1820 FOR_EACH_VEC_ELT (else_ddrs, i, ddr)
1822 struct data_reference *dra = DDR_A (ddr);
1823 struct data_reference *drb = DDR_B (ddr);
1825 if (DDR_ARE_DEPENDENT (ddr) != chrec_known
1826 && ((DR_IS_READ (dra) && DR_IS_WRITE (drb)
1827 && gimple_uid (DR_STMT (dra)) > gimple_uid (DR_STMT (drb)))
1828 || (DR_IS_READ (drb) && DR_IS_WRITE (dra)
1829 && gimple_uid (DR_STMT (drb)) > gimple_uid (DR_STMT (dra)))
1830 || (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))))
1832 free_dependence_relations (then_ddrs);
1833 free_dependence_relations (else_ddrs);
1834 free_data_refs (then_datarefs);
1835 free_data_refs (else_datarefs);
1836 return false;
1840 /* Sink stores with same LHS. */
1841 FOR_EACH_VEC_ELT (then_stores, i, then_store)
1843 else_store = else_stores[i];
1844 res = cond_if_else_store_replacement_1 (then_bb, else_bb, join_bb,
1845 then_store, else_store);
1846 ok = ok || res;
1849 free_dependence_relations (then_ddrs);
1850 free_dependence_relations (else_ddrs);
1851 free_data_refs (then_datarefs);
1852 free_data_refs (else_datarefs);
1854 return ok;
1857 /* Return TRUE if STMT has a VUSE whose corresponding VDEF is in BB. */
1859 static bool
1860 local_mem_dependence (gimple stmt, basic_block bb)
1862 tree vuse = gimple_vuse (stmt);
1863 gimple def;
1865 if (!vuse)
1866 return false;
1868 def = SSA_NAME_DEF_STMT (vuse);
1869 return (def && gimple_bb (def) == bb);
1872 /* Given a "diamond" control-flow pattern where BB0 tests a condition,
1873 BB1 and BB2 are "then" and "else" blocks dependent on this test,
1874 and BB3 rejoins control flow following BB1 and BB2, look for
1875 opportunities to hoist loads as follows. If BB3 contains a PHI of
1876 two loads, one each occurring in BB1 and BB2, and the loads are
1877 provably of adjacent fields in the same structure, then move both
1878 loads into BB0. Of course this can only be done if there are no
1879 dependencies preventing such motion.
1881 One of the hoisted loads will always be speculative, so the
1882 transformation is currently conservative:
1884 - The fields must be strictly adjacent.
1885 - The two fields must occupy a single memory block that is
1886 guaranteed to not cross a page boundary.
1888 The last is difficult to prove, as such memory blocks should be
1889 aligned on the minimum of the stack alignment boundary and the
1890 alignment guaranteed by heap allocation interfaces. Thus we rely
1891 on a parameter for the alignment value.
1893 Provided a good value is used for the last case, the first
1894 restriction could possibly be relaxed. */
1896 static void
1897 hoist_adjacent_loads (basic_block bb0, basic_block bb1,
1898 basic_block bb2, basic_block bb3)
1900 int param_align = PARAM_VALUE (PARAM_L1_CACHE_LINE_SIZE);
1901 unsigned param_align_bits = (unsigned) (param_align * BITS_PER_UNIT);
1902 gphi_iterator gsi;
1904 /* Walk the phis in bb3 looking for an opportunity. We are looking
1905 for phis of two SSA names, one each of which is defined in bb1 and
1906 bb2. */
1907 for (gsi = gsi_start_phis (bb3); !gsi_end_p (gsi); gsi_next (&gsi))
1909 gphi *phi_stmt = gsi.phi ();
1910 gimple def1, def2;
1911 tree arg1, arg2, ref1, ref2, field1, field2;
1912 tree tree_offset1, tree_offset2, tree_size2, next;
1913 int offset1, offset2, size2;
1914 unsigned align1;
1915 gimple_stmt_iterator gsi2;
1916 basic_block bb_for_def1, bb_for_def2;
1918 if (gimple_phi_num_args (phi_stmt) != 2
1919 || virtual_operand_p (gimple_phi_result (phi_stmt)))
1920 continue;
1922 arg1 = gimple_phi_arg_def (phi_stmt, 0);
1923 arg2 = gimple_phi_arg_def (phi_stmt, 1);
1925 if (TREE_CODE (arg1) != SSA_NAME
1926 || TREE_CODE (arg2) != SSA_NAME
1927 || SSA_NAME_IS_DEFAULT_DEF (arg1)
1928 || SSA_NAME_IS_DEFAULT_DEF (arg2))
1929 continue;
1931 def1 = SSA_NAME_DEF_STMT (arg1);
1932 def2 = SSA_NAME_DEF_STMT (arg2);
1934 if ((gimple_bb (def1) != bb1 || gimple_bb (def2) != bb2)
1935 && (gimple_bb (def2) != bb1 || gimple_bb (def1) != bb2))
1936 continue;
1938 /* Check the mode of the arguments to be sure a conditional move
1939 can be generated for it. */
1940 if (optab_handler (movcc_optab, TYPE_MODE (TREE_TYPE (arg1)))
1941 == CODE_FOR_nothing)
1942 continue;
1944 /* Both statements must be assignments whose RHS is a COMPONENT_REF. */
1945 if (!gimple_assign_single_p (def1)
1946 || !gimple_assign_single_p (def2)
1947 || gimple_has_volatile_ops (def1)
1948 || gimple_has_volatile_ops (def2))
1949 continue;
1951 ref1 = gimple_assign_rhs1 (def1);
1952 ref2 = gimple_assign_rhs1 (def2);
1954 if (TREE_CODE (ref1) != COMPONENT_REF
1955 || TREE_CODE (ref2) != COMPONENT_REF)
1956 continue;
1958 /* The zeroth operand of the two component references must be
1959 identical. It is not sufficient to compare get_base_address of
1960 the two references, because this could allow for different
1961 elements of the same array in the two trees. It is not safe to
1962 assume that the existence of one array element implies the
1963 existence of a different one. */
1964 if (!operand_equal_p (TREE_OPERAND (ref1, 0), TREE_OPERAND (ref2, 0), 0))
1965 continue;
1967 field1 = TREE_OPERAND (ref1, 1);
1968 field2 = TREE_OPERAND (ref2, 1);
1970 /* Check for field adjacency, and ensure field1 comes first. */
1971 for (next = DECL_CHAIN (field1);
1972 next && TREE_CODE (next) != FIELD_DECL;
1973 next = DECL_CHAIN (next))
1976 if (next != field2)
1978 for (next = DECL_CHAIN (field2);
1979 next && TREE_CODE (next) != FIELD_DECL;
1980 next = DECL_CHAIN (next))
1983 if (next != field1)
1984 continue;
1986 std::swap (field1, field2);
1987 std::swap (def1, def2);
1990 bb_for_def1 = gimple_bb (def1);
1991 bb_for_def2 = gimple_bb (def2);
1993 /* Check for proper alignment of the first field. */
1994 tree_offset1 = bit_position (field1);
1995 tree_offset2 = bit_position (field2);
1996 tree_size2 = DECL_SIZE (field2);
1998 if (!tree_fits_uhwi_p (tree_offset1)
1999 || !tree_fits_uhwi_p (tree_offset2)
2000 || !tree_fits_uhwi_p (tree_size2))
2001 continue;
2003 offset1 = tree_to_uhwi (tree_offset1);
2004 offset2 = tree_to_uhwi (tree_offset2);
2005 size2 = tree_to_uhwi (tree_size2);
2006 align1 = DECL_ALIGN (field1) % param_align_bits;
2008 if (offset1 % BITS_PER_UNIT != 0)
2009 continue;
2011 /* For profitability, the two field references should fit within
2012 a single cache line. */
2013 if (align1 + offset2 - offset1 + size2 > param_align_bits)
2014 continue;
2016 /* The two expressions cannot be dependent upon vdefs defined
2017 in bb1/bb2. */
2018 if (local_mem_dependence (def1, bb_for_def1)
2019 || local_mem_dependence (def2, bb_for_def2))
2020 continue;
2022 /* The conditions are satisfied; hoist the loads from bb1 and bb2 into
2023 bb0. We hoist the first one first so that a cache miss is handled
2024 efficiently regardless of hardware cache-fill policy. */
2025 gsi2 = gsi_for_stmt (def1);
2026 gsi_move_to_bb_end (&gsi2, bb0);
2027 gsi2 = gsi_for_stmt (def2);
2028 gsi_move_to_bb_end (&gsi2, bb0);
2030 if (dump_file && (dump_flags & TDF_DETAILS))
2032 fprintf (dump_file,
2033 "\nHoisting adjacent loads from %d and %d into %d: \n",
2034 bb_for_def1->index, bb_for_def2->index, bb0->index);
2035 print_gimple_stmt (dump_file, def1, 0, TDF_VOPS|TDF_MEMSYMS);
2036 print_gimple_stmt (dump_file, def2, 0, TDF_VOPS|TDF_MEMSYMS);
2041 /* Determine whether we should attempt to hoist adjacent loads out of
2042 diamond patterns in pass_phiopt. Always hoist loads if
2043 -fhoist-adjacent-loads is specified and the target machine has
2044 both a conditional move instruction and a defined cache line size. */
2046 static bool
2047 gate_hoist_loads (void)
2049 return (flag_hoist_adjacent_loads == 1
2050 && PARAM_VALUE (PARAM_L1_CACHE_LINE_SIZE)
2051 && HAVE_conditional_move);
2054 /* This pass tries to replaces an if-then-else block with an
2055 assignment. We have four kinds of transformations. Some of these
2056 transformations are also performed by the ifcvt RTL optimizer.
2058 Conditional Replacement
2059 -----------------------
2061 This transformation, implemented in conditional_replacement,
2062 replaces
2064 bb0:
2065 if (cond) goto bb2; else goto bb1;
2066 bb1:
2067 bb2:
2068 x = PHI <0 (bb1), 1 (bb0), ...>;
2070 with
2072 bb0:
2073 x' = cond;
2074 goto bb2;
2075 bb2:
2076 x = PHI <x' (bb0), ...>;
2078 We remove bb1 as it becomes unreachable. This occurs often due to
2079 gimplification of conditionals.
2081 Value Replacement
2082 -----------------
2084 This transformation, implemented in value_replacement, replaces
2086 bb0:
2087 if (a != b) goto bb2; else goto bb1;
2088 bb1:
2089 bb2:
2090 x = PHI <a (bb1), b (bb0), ...>;
2092 with
2094 bb0:
2095 bb2:
2096 x = PHI <b (bb0), ...>;
2098 This opportunity can sometimes occur as a result of other
2099 optimizations.
2102 Another case caught by value replacement looks like this:
2104 bb0:
2105 t1 = a == CONST;
2106 t2 = b > c;
2107 t3 = t1 & t2;
2108 if (t3 != 0) goto bb1; else goto bb2;
2109 bb1:
2110 bb2:
2111 x = PHI (CONST, a)
2113 Gets replaced with:
2114 bb0:
2115 bb2:
2116 t1 = a == CONST;
2117 t2 = b > c;
2118 t3 = t1 & t2;
2119 x = a;
2121 ABS Replacement
2122 ---------------
2124 This transformation, implemented in abs_replacement, replaces
2126 bb0:
2127 if (a >= 0) goto bb2; else goto bb1;
2128 bb1:
2129 x = -a;
2130 bb2:
2131 x = PHI <x (bb1), a (bb0), ...>;
2133 with
2135 bb0:
2136 x' = ABS_EXPR< a >;
2137 bb2:
2138 x = PHI <x' (bb0), ...>;
2140 MIN/MAX Replacement
2141 -------------------
2143 This transformation, minmax_replacement replaces
2145 bb0:
2146 if (a <= b) goto bb2; else goto bb1;
2147 bb1:
2148 bb2:
2149 x = PHI <b (bb1), a (bb0), ...>;
2151 with
2153 bb0:
2154 x' = MIN_EXPR (a, b)
2155 bb2:
2156 x = PHI <x' (bb0), ...>;
2158 A similar transformation is done for MAX_EXPR.
2161 This pass also performs a fifth transformation of a slightly different
2162 flavor.
2164 Adjacent Load Hoisting
2165 ----------------------
2167 This transformation replaces
2169 bb0:
2170 if (...) goto bb2; else goto bb1;
2171 bb1:
2172 x1 = (<expr>).field1;
2173 goto bb3;
2174 bb2:
2175 x2 = (<expr>).field2;
2176 bb3:
2177 # x = PHI <x1, x2>;
2179 with
2181 bb0:
2182 x1 = (<expr>).field1;
2183 x2 = (<expr>).field2;
2184 if (...) goto bb2; else goto bb1;
2185 bb1:
2186 goto bb3;
2187 bb2:
2188 bb3:
2189 # x = PHI <x1, x2>;
2191 The purpose of this transformation is to enable generation of conditional
2192 move instructions such as Intel CMOVE or PowerPC ISEL. Because one of
2193 the loads is speculative, the transformation is restricted to very
2194 specific cases to avoid introducing a page fault. We are looking for
2195 the common idiom:
2197 if (...)
2198 x = y->left;
2199 else
2200 x = y->right;
2202 where left and right are typically adjacent pointers in a tree structure. */
2204 namespace {
2206 const pass_data pass_data_phiopt =
2208 GIMPLE_PASS, /* type */
2209 "phiopt", /* name */
2210 OPTGROUP_NONE, /* optinfo_flags */
2211 TV_TREE_PHIOPT, /* tv_id */
2212 ( PROP_cfg | PROP_ssa ), /* properties_required */
2213 0, /* properties_provided */
2214 0, /* properties_destroyed */
2215 0, /* todo_flags_start */
2216 0, /* todo_flags_finish */
2219 class pass_phiopt : public gimple_opt_pass
2221 public:
2222 pass_phiopt (gcc::context *ctxt)
2223 : gimple_opt_pass (pass_data_phiopt, ctxt)
2226 /* opt_pass methods: */
2227 opt_pass * clone () { return new pass_phiopt (m_ctxt); }
2228 virtual bool gate (function *) { return flag_ssa_phiopt; }
2229 virtual unsigned int execute (function *)
2231 return tree_ssa_phiopt_worker (false, gate_hoist_loads ());
2234 }; // class pass_phiopt
2236 } // anon namespace
2238 gimple_opt_pass *
2239 make_pass_phiopt (gcc::context *ctxt)
2241 return new pass_phiopt (ctxt);
2244 namespace {
2246 const pass_data pass_data_cselim =
2248 GIMPLE_PASS, /* type */
2249 "cselim", /* name */
2250 OPTGROUP_NONE, /* optinfo_flags */
2251 TV_TREE_PHIOPT, /* tv_id */
2252 ( PROP_cfg | PROP_ssa ), /* properties_required */
2253 0, /* properties_provided */
2254 0, /* properties_destroyed */
2255 0, /* todo_flags_start */
2256 0, /* todo_flags_finish */
2259 class pass_cselim : public gimple_opt_pass
2261 public:
2262 pass_cselim (gcc::context *ctxt)
2263 : gimple_opt_pass (pass_data_cselim, ctxt)
2266 /* opt_pass methods: */
2267 virtual bool gate (function *) { return flag_tree_cselim; }
2268 virtual unsigned int execute (function *) { return tree_ssa_cs_elim (); }
2270 }; // class pass_cselim
2272 } // anon namespace
2274 gimple_opt_pass *
2275 make_pass_cselim (gcc::context *ctxt)
2277 return new pass_cselim (ctxt);