libgomp: Use pthread mutexes in the nvptx plugin.
[official-gcc.git] / gcc / tree-ssa-phiopt.c
blob209b4ebb853c1284c5b225e62192e4df5f1e1df3
1 /* Optimization of PHI nodes by converting them into straightline code.
2 Copyright (C) 2004-2015 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "hash-table.h"
24 #include "tm.h"
25 #include "hash-set.h"
26 #include "machmode.h"
27 #include "vec.h"
28 #include "double-int.h"
29 #include "input.h"
30 #include "alias.h"
31 #include "symtab.h"
32 #include "wide-int.h"
33 #include "inchash.h"
34 #include "tree.h"
35 #include "fold-const.h"
36 #include "stor-layout.h"
37 #include "flags.h"
38 #include "tm_p.h"
39 #include "predict.h"
40 #include "hard-reg-set.h"
41 #include "input.h"
42 #include "function.h"
43 #include "dominance.h"
44 #include "cfg.h"
45 #include "cfganal.h"
46 #include "basic-block.h"
47 #include "tree-ssa-alias.h"
48 #include "internal-fn.h"
49 #include "gimple-expr.h"
50 #include "is-a.h"
51 #include "gimple.h"
52 #include "gimplify.h"
53 #include "gimple-iterator.h"
54 #include "gimplify-me.h"
55 #include "gimple-ssa.h"
56 #include "tree-cfg.h"
57 #include "tree-phinodes.h"
58 #include "ssa-iterators.h"
59 #include "stringpool.h"
60 #include "tree-ssanames.h"
61 #include "expr.h"
62 #include "tree-dfa.h"
63 #include "tree-pass.h"
64 #include "langhooks.h"
65 #include "domwalk.h"
66 #include "cfgloop.h"
67 #include "tree-data-ref.h"
68 #include "gimple-pretty-print.h"
69 #include "insn-config.h"
70 #include "expr.h"
71 #include "insn-codes.h"
72 #include "optabs.h"
73 #include "tree-scalar-evolution.h"
74 #include "tree-inline.h"
76 #ifndef HAVE_conditional_move
77 #define HAVE_conditional_move (0)
78 #endif
80 static unsigned int tree_ssa_phiopt_worker (bool, bool);
81 static bool conditional_replacement (basic_block, basic_block,
82 edge, edge, gphi *, tree, tree);
83 static int value_replacement (basic_block, basic_block,
84 edge, edge, gimple, tree, tree);
85 static bool minmax_replacement (basic_block, basic_block,
86 edge, edge, gimple, tree, tree);
87 static bool abs_replacement (basic_block, basic_block,
88 edge, edge, gimple, tree, tree);
89 static bool neg_replacement (basic_block, basic_block,
90 edge, edge, gimple, tree, tree);
91 static bool cond_store_replacement (basic_block, basic_block, edge, edge,
92 hash_set<tree> *);
93 static bool cond_if_else_store_replacement (basic_block, basic_block, basic_block);
94 static hash_set<tree> * get_non_trapping ();
95 static void replace_phi_edge_with_variable (basic_block, edge, gimple, tree);
96 static void hoist_adjacent_loads (basic_block, basic_block,
97 basic_block, basic_block);
98 static bool gate_hoist_loads (void);
100 /* This pass tries to transform conditional stores into unconditional
101 ones, enabling further simplifications with the simpler then and else
102 blocks. In particular it replaces this:
104 bb0:
105 if (cond) goto bb2; else goto bb1;
106 bb1:
107 *p = RHS;
108 bb2:
110 with
112 bb0:
113 if (cond) goto bb1; else goto bb2;
114 bb1:
115 condtmp' = *p;
116 bb2:
117 condtmp = PHI <RHS, condtmp'>
118 *p = condtmp;
120 This transformation can only be done under several constraints,
121 documented below. It also replaces:
123 bb0:
124 if (cond) goto bb2; else goto bb1;
125 bb1:
126 *p = RHS1;
127 goto bb3;
128 bb2:
129 *p = RHS2;
130 bb3:
132 with
134 bb0:
135 if (cond) goto bb3; else goto bb1;
136 bb1:
137 bb3:
138 condtmp = PHI <RHS1, RHS2>
139 *p = condtmp; */
141 static unsigned int
142 tree_ssa_cs_elim (void)
144 unsigned todo;
145 /* ??? We are not interested in loop related info, but the following
146 will create it, ICEing as we didn't init loops with pre-headers.
147 An interfacing issue of find_data_references_in_bb. */
148 loop_optimizer_init (LOOPS_NORMAL);
149 scev_initialize ();
150 todo = tree_ssa_phiopt_worker (true, false);
151 scev_finalize ();
152 loop_optimizer_finalize ();
153 return todo;
156 /* Return the singleton PHI in the SEQ of PHIs for edges E0 and E1. */
158 static gphi *
159 single_non_singleton_phi_for_edges (gimple_seq seq, edge e0, edge e1)
161 gimple_stmt_iterator i;
162 gphi *phi = NULL;
163 if (gimple_seq_singleton_p (seq))
164 return as_a <gphi *> (gsi_stmt (gsi_start (seq)));
165 for (i = gsi_start (seq); !gsi_end_p (i); gsi_next (&i))
167 gphi *p = as_a <gphi *> (gsi_stmt (i));
168 /* If the PHI arguments are equal then we can skip this PHI. */
169 if (operand_equal_for_phi_arg_p (gimple_phi_arg_def (p, e0->dest_idx),
170 gimple_phi_arg_def (p, e1->dest_idx)))
171 continue;
173 /* If we already have a PHI that has the two edge arguments are
174 different, then return it is not a singleton for these PHIs. */
175 if (phi)
176 return NULL;
178 phi = p;
180 return phi;
183 /* The core routine of conditional store replacement and normal
184 phi optimizations. Both share much of the infrastructure in how
185 to match applicable basic block patterns. DO_STORE_ELIM is true
186 when we want to do conditional store replacement, false otherwise.
187 DO_HOIST_LOADS is true when we want to hoist adjacent loads out
188 of diamond control flow patterns, false otherwise. */
189 static unsigned int
190 tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads)
192 basic_block bb;
193 basic_block *bb_order;
194 unsigned n, i;
195 bool cfgchanged = false;
196 hash_set<tree> *nontrap = 0;
198 if (do_store_elim)
199 /* Calculate the set of non-trapping memory accesses. */
200 nontrap = get_non_trapping ();
202 /* The replacement of conditional negation with a non-branching
203 sequence is really only a win when optimizing for speed and we
204 can avoid transformations by gimple if-conversion that result
205 in poor RTL generation.
207 Ideally either gimple if-conversion or the RTL expanders will
208 be improved and the code to emit branchless conditional negation
209 can be removed. */
210 bool replace_conditional_negation = false;
211 if (!do_store_elim)
212 replace_conditional_negation
213 = ((!optimize_size && optimize >= 2)
214 || (((flag_tree_loop_vectorize || cfun->has_force_vectorize_loops)
215 && flag_tree_loop_if_convert != 0)
216 || flag_tree_loop_if_convert == 1
217 || flag_tree_loop_if_convert_stores == 1));
219 /* Search every basic block for COND_EXPR we may be able to optimize.
221 We walk the blocks in order that guarantees that a block with
222 a single predecessor is processed before the predecessor.
223 This ensures that we collapse inner ifs before visiting the
224 outer ones, and also that we do not try to visit a removed
225 block. */
226 bb_order = single_pred_before_succ_order ();
227 n = n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS;
229 for (i = 0; i < n; i++)
231 gimple cond_stmt;
232 gphi *phi;
233 basic_block bb1, bb2;
234 edge e1, e2;
235 tree arg0, arg1;
237 bb = bb_order[i];
239 cond_stmt = last_stmt (bb);
240 /* Check to see if the last statement is a GIMPLE_COND. */
241 if (!cond_stmt
242 || gimple_code (cond_stmt) != GIMPLE_COND)
243 continue;
245 e1 = EDGE_SUCC (bb, 0);
246 bb1 = e1->dest;
247 e2 = EDGE_SUCC (bb, 1);
248 bb2 = e2->dest;
250 /* We cannot do the optimization on abnormal edges. */
251 if ((e1->flags & EDGE_ABNORMAL) != 0
252 || (e2->flags & EDGE_ABNORMAL) != 0)
253 continue;
255 /* If either bb1's succ or bb2 or bb2's succ is non NULL. */
256 if (EDGE_COUNT (bb1->succs) == 0
257 || bb2 == NULL
258 || EDGE_COUNT (bb2->succs) == 0)
259 continue;
261 /* Find the bb which is the fall through to the other. */
262 if (EDGE_SUCC (bb1, 0)->dest == bb2)
264 else if (EDGE_SUCC (bb2, 0)->dest == bb1)
266 basic_block bb_tmp = bb1;
267 edge e_tmp = e1;
268 bb1 = bb2;
269 bb2 = bb_tmp;
270 e1 = e2;
271 e2 = e_tmp;
273 else if (do_store_elim
274 && EDGE_SUCC (bb1, 0)->dest == EDGE_SUCC (bb2, 0)->dest)
276 basic_block bb3 = EDGE_SUCC (bb1, 0)->dest;
278 if (!single_succ_p (bb1)
279 || (EDGE_SUCC (bb1, 0)->flags & EDGE_FALLTHRU) == 0
280 || !single_succ_p (bb2)
281 || (EDGE_SUCC (bb2, 0)->flags & EDGE_FALLTHRU) == 0
282 || EDGE_COUNT (bb3->preds) != 2)
283 continue;
284 if (cond_if_else_store_replacement (bb1, bb2, bb3))
285 cfgchanged = true;
286 continue;
288 else if (do_hoist_loads
289 && EDGE_SUCC (bb1, 0)->dest == EDGE_SUCC (bb2, 0)->dest)
291 basic_block bb3 = EDGE_SUCC (bb1, 0)->dest;
293 if (!FLOAT_TYPE_P (TREE_TYPE (gimple_cond_lhs (cond_stmt)))
294 && single_succ_p (bb1)
295 && single_succ_p (bb2)
296 && single_pred_p (bb1)
297 && single_pred_p (bb2)
298 && EDGE_COUNT (bb->succs) == 2
299 && EDGE_COUNT (bb3->preds) == 2
300 /* If one edge or the other is dominant, a conditional move
301 is likely to perform worse than the well-predicted branch. */
302 && !predictable_edge_p (EDGE_SUCC (bb, 0))
303 && !predictable_edge_p (EDGE_SUCC (bb, 1)))
304 hoist_adjacent_loads (bb, bb1, bb2, bb3);
305 continue;
307 else
308 continue;
310 e1 = EDGE_SUCC (bb1, 0);
312 /* Make sure that bb1 is just a fall through. */
313 if (!single_succ_p (bb1)
314 || (e1->flags & EDGE_FALLTHRU) == 0)
315 continue;
317 /* Also make sure that bb1 only have one predecessor and that it
318 is bb. */
319 if (!single_pred_p (bb1)
320 || single_pred (bb1) != bb)
321 continue;
323 if (do_store_elim)
325 /* bb1 is the middle block, bb2 the join block, bb the split block,
326 e1 the fallthrough edge from bb1 to bb2. We can't do the
327 optimization if the join block has more than two predecessors. */
328 if (EDGE_COUNT (bb2->preds) > 2)
329 continue;
330 if (cond_store_replacement (bb1, bb2, e1, e2, nontrap))
331 cfgchanged = true;
333 else
335 gimple_seq phis = phi_nodes (bb2);
336 gimple_stmt_iterator gsi;
337 bool candorest = true;
339 /* Value replacement can work with more than one PHI
340 so try that first. */
341 for (gsi = gsi_start (phis); !gsi_end_p (gsi); gsi_next (&gsi))
343 phi = as_a <gphi *> (gsi_stmt (gsi));
344 arg0 = gimple_phi_arg_def (phi, e1->dest_idx);
345 arg1 = gimple_phi_arg_def (phi, e2->dest_idx);
346 if (value_replacement (bb, bb1, e1, e2, phi, arg0, arg1) == 2)
348 candorest = false;
349 cfgchanged = true;
350 break;
354 if (!candorest)
355 continue;
357 phi = single_non_singleton_phi_for_edges (phis, e1, e2);
358 if (!phi)
359 continue;
361 arg0 = gimple_phi_arg_def (phi, e1->dest_idx);
362 arg1 = gimple_phi_arg_def (phi, e2->dest_idx);
364 /* Something is wrong if we cannot find the arguments in the PHI
365 node. */
366 gcc_assert (arg0 != NULL && arg1 != NULL);
368 /* Do the replacement of conditional if it can be done. */
369 if (conditional_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
370 cfgchanged = true;
371 else if (abs_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
372 cfgchanged = true;
373 else if (replace_conditional_negation
374 && neg_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
375 cfgchanged = true;
376 else if (minmax_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
377 cfgchanged = true;
381 free (bb_order);
383 if (do_store_elim)
384 delete nontrap;
385 /* If the CFG has changed, we should cleanup the CFG. */
386 if (cfgchanged && do_store_elim)
388 /* In cond-store replacement we have added some loads on edges
389 and new VOPS (as we moved the store, and created a load). */
390 gsi_commit_edge_inserts ();
391 return TODO_cleanup_cfg | TODO_update_ssa_only_virtuals;
393 else if (cfgchanged)
394 return TODO_cleanup_cfg;
395 return 0;
398 /* Replace PHI node element whose edge is E in block BB with variable NEW.
399 Remove the edge from COND_BLOCK which does not lead to BB (COND_BLOCK
400 is known to have two edges, one of which must reach BB). */
402 static void
403 replace_phi_edge_with_variable (basic_block cond_block,
404 edge e, gimple phi, tree new_tree)
406 basic_block bb = gimple_bb (phi);
407 basic_block block_to_remove;
408 gimple_stmt_iterator gsi;
410 /* Change the PHI argument to new. */
411 SET_USE (PHI_ARG_DEF_PTR (phi, e->dest_idx), new_tree);
413 /* Remove the empty basic block. */
414 if (EDGE_SUCC (cond_block, 0)->dest == bb)
416 EDGE_SUCC (cond_block, 0)->flags |= EDGE_FALLTHRU;
417 EDGE_SUCC (cond_block, 0)->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
418 EDGE_SUCC (cond_block, 0)->probability = REG_BR_PROB_BASE;
419 EDGE_SUCC (cond_block, 0)->count += EDGE_SUCC (cond_block, 1)->count;
421 block_to_remove = EDGE_SUCC (cond_block, 1)->dest;
423 else
425 EDGE_SUCC (cond_block, 1)->flags |= EDGE_FALLTHRU;
426 EDGE_SUCC (cond_block, 1)->flags
427 &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
428 EDGE_SUCC (cond_block, 1)->probability = REG_BR_PROB_BASE;
429 EDGE_SUCC (cond_block, 1)->count += EDGE_SUCC (cond_block, 0)->count;
431 block_to_remove = EDGE_SUCC (cond_block, 0)->dest;
433 delete_basic_block (block_to_remove);
435 /* Eliminate the COND_EXPR at the end of COND_BLOCK. */
436 gsi = gsi_last_bb (cond_block);
437 gsi_remove (&gsi, true);
439 if (dump_file && (dump_flags & TDF_DETAILS))
440 fprintf (dump_file,
441 "COND_EXPR in block %d and PHI in block %d converted to straightline code.\n",
442 cond_block->index,
443 bb->index);
446 /* The function conditional_replacement does the main work of doing the
447 conditional replacement. Return true if the replacement is done.
448 Otherwise return false.
449 BB is the basic block where the replacement is going to be done on. ARG0
450 is argument 0 from PHI. Likewise for ARG1. */
452 static bool
453 conditional_replacement (basic_block cond_bb, basic_block middle_bb,
454 edge e0, edge e1, gphi *phi,
455 tree arg0, tree arg1)
457 tree result;
458 gimple stmt;
459 gassign *new_stmt;
460 tree cond;
461 gimple_stmt_iterator gsi;
462 edge true_edge, false_edge;
463 tree new_var, new_var2;
464 bool neg;
466 /* FIXME: Gimplification of complex type is too hard for now. */
467 /* We aren't prepared to handle vectors either (and it is a question
468 if it would be worthwhile anyway). */
469 if (!(INTEGRAL_TYPE_P (TREE_TYPE (arg0))
470 || POINTER_TYPE_P (TREE_TYPE (arg0)))
471 || !(INTEGRAL_TYPE_P (TREE_TYPE (arg1))
472 || POINTER_TYPE_P (TREE_TYPE (arg1))))
473 return false;
475 /* The PHI arguments have the constants 0 and 1, or 0 and -1, then
476 convert it to the conditional. */
477 if ((integer_zerop (arg0) && integer_onep (arg1))
478 || (integer_zerop (arg1) && integer_onep (arg0)))
479 neg = false;
480 else if ((integer_zerop (arg0) && integer_all_onesp (arg1))
481 || (integer_zerop (arg1) && integer_all_onesp (arg0)))
482 neg = true;
483 else
484 return false;
486 if (!empty_block_p (middle_bb))
487 return false;
489 /* At this point we know we have a GIMPLE_COND with two successors.
490 One successor is BB, the other successor is an empty block which
491 falls through into BB.
493 There is a single PHI node at the join point (BB) and its arguments
494 are constants (0, 1) or (0, -1).
496 So, given the condition COND, and the two PHI arguments, we can
497 rewrite this PHI into non-branching code:
499 dest = (COND) or dest = COND'
501 We use the condition as-is if the argument associated with the
502 true edge has the value one or the argument associated with the
503 false edge as the value zero. Note that those conditions are not
504 the same since only one of the outgoing edges from the GIMPLE_COND
505 will directly reach BB and thus be associated with an argument. */
507 stmt = last_stmt (cond_bb);
508 result = PHI_RESULT (phi);
510 /* To handle special cases like floating point comparison, it is easier and
511 less error-prone to build a tree and gimplify it on the fly though it is
512 less efficient. */
513 cond = fold_build2_loc (gimple_location (stmt),
514 gimple_cond_code (stmt), boolean_type_node,
515 gimple_cond_lhs (stmt), gimple_cond_rhs (stmt));
517 /* We need to know which is the true edge and which is the false
518 edge so that we know when to invert the condition below. */
519 extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge);
520 if ((e0 == true_edge && integer_zerop (arg0))
521 || (e0 == false_edge && !integer_zerop (arg0))
522 || (e1 == true_edge && integer_zerop (arg1))
523 || (e1 == false_edge && !integer_zerop (arg1)))
524 cond = fold_build1_loc (gimple_location (stmt),
525 TRUTH_NOT_EXPR, TREE_TYPE (cond), cond);
527 if (neg)
529 cond = fold_convert_loc (gimple_location (stmt),
530 TREE_TYPE (result), cond);
531 cond = fold_build1_loc (gimple_location (stmt),
532 NEGATE_EXPR, TREE_TYPE (cond), cond);
535 /* Insert our new statements at the end of conditional block before the
536 COND_STMT. */
537 gsi = gsi_for_stmt (stmt);
538 new_var = force_gimple_operand_gsi (&gsi, cond, true, NULL, true,
539 GSI_SAME_STMT);
541 if (!useless_type_conversion_p (TREE_TYPE (result), TREE_TYPE (new_var)))
543 source_location locus_0, locus_1;
545 new_var2 = make_ssa_name (TREE_TYPE (result));
546 new_stmt = gimple_build_assign (new_var2, CONVERT_EXPR, new_var);
547 gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
548 new_var = new_var2;
550 /* Set the locus to the first argument, unless is doesn't have one. */
551 locus_0 = gimple_phi_arg_location (phi, 0);
552 locus_1 = gimple_phi_arg_location (phi, 1);
553 if (locus_0 == UNKNOWN_LOCATION)
554 locus_0 = locus_1;
555 gimple_set_location (new_stmt, locus_0);
558 replace_phi_edge_with_variable (cond_bb, e1, phi, new_var);
560 /* Note that we optimized this PHI. */
561 return true;
564 /* Update *ARG which is defined in STMT so that it contains the
565 computed value if that seems profitable. Return true if the
566 statement is made dead by that rewriting. */
568 static bool
569 jump_function_from_stmt (tree *arg, gimple stmt)
571 enum tree_code code = gimple_assign_rhs_code (stmt);
572 if (code == ADDR_EXPR)
574 /* For arg = &p->i transform it to p, if possible. */
575 tree rhs1 = gimple_assign_rhs1 (stmt);
576 HOST_WIDE_INT offset;
577 tree tem = get_addr_base_and_unit_offset (TREE_OPERAND (rhs1, 0),
578 &offset);
579 if (tem
580 && TREE_CODE (tem) == MEM_REF
581 && (mem_ref_offset (tem) + offset) == 0)
583 *arg = TREE_OPERAND (tem, 0);
584 return true;
587 /* TODO: Much like IPA-CP jump-functions we want to handle constant
588 additions symbolically here, and we'd need to update the comparison
589 code that compares the arg + cst tuples in our caller. For now the
590 code above exactly handles the VEC_BASE pattern from vec.h. */
591 return false;
594 /* RHS is a source argument in a BIT_AND_EXPR which feeds a conditional
595 of the form SSA_NAME NE 0.
597 If RHS is fed by a simple EQ_EXPR comparison of two values, see if
598 the two input values of the EQ_EXPR match arg0 and arg1.
600 If so update *code and return TRUE. Otherwise return FALSE. */
602 static bool
603 rhs_is_fed_for_value_replacement (const_tree arg0, const_tree arg1,
604 enum tree_code *code, const_tree rhs)
606 /* Obviously if RHS is not an SSA_NAME, we can't look at the defining
607 statement. */
608 if (TREE_CODE (rhs) == SSA_NAME)
610 gimple def1 = SSA_NAME_DEF_STMT (rhs);
612 /* Verify the defining statement has an EQ_EXPR on the RHS. */
613 if (is_gimple_assign (def1) && gimple_assign_rhs_code (def1) == EQ_EXPR)
615 /* Finally verify the source operands of the EQ_EXPR are equal
616 to arg0 and arg1. */
617 tree op0 = gimple_assign_rhs1 (def1);
618 tree op1 = gimple_assign_rhs2 (def1);
619 if ((operand_equal_for_phi_arg_p (arg0, op0)
620 && operand_equal_for_phi_arg_p (arg1, op1))
621 || (operand_equal_for_phi_arg_p (arg0, op1)
622 && operand_equal_for_phi_arg_p (arg1, op0)))
624 /* We will perform the optimization. */
625 *code = gimple_assign_rhs_code (def1);
626 return true;
630 return false;
633 /* Return TRUE if arg0/arg1 are equal to the rhs/lhs or lhs/rhs of COND.
635 Also return TRUE if arg0/arg1 are equal to the source arguments of a
636 an EQ comparison feeding a BIT_AND_EXPR which feeds COND.
638 Return FALSE otherwise. */
640 static bool
641 operand_equal_for_value_replacement (const_tree arg0, const_tree arg1,
642 enum tree_code *code, gimple cond)
644 gimple def;
645 tree lhs = gimple_cond_lhs (cond);
646 tree rhs = gimple_cond_rhs (cond);
648 if ((operand_equal_for_phi_arg_p (arg0, lhs)
649 && operand_equal_for_phi_arg_p (arg1, rhs))
650 || (operand_equal_for_phi_arg_p (arg1, lhs)
651 && operand_equal_for_phi_arg_p (arg0, rhs)))
652 return true;
654 /* Now handle more complex case where we have an EQ comparison
655 which feeds a BIT_AND_EXPR which feeds COND.
657 First verify that COND is of the form SSA_NAME NE 0. */
658 if (*code != NE_EXPR || !integer_zerop (rhs)
659 || TREE_CODE (lhs) != SSA_NAME)
660 return false;
662 /* Now ensure that SSA_NAME is set by a BIT_AND_EXPR. */
663 def = SSA_NAME_DEF_STMT (lhs);
664 if (!is_gimple_assign (def) || gimple_assign_rhs_code (def) != BIT_AND_EXPR)
665 return false;
667 /* Now verify arg0/arg1 correspond to the source arguments of an
668 EQ comparison feeding the BIT_AND_EXPR. */
670 tree tmp = gimple_assign_rhs1 (def);
671 if (rhs_is_fed_for_value_replacement (arg0, arg1, code, tmp))
672 return true;
674 tmp = gimple_assign_rhs2 (def);
675 if (rhs_is_fed_for_value_replacement (arg0, arg1, code, tmp))
676 return true;
678 return false;
681 /* Returns true if ARG is a neutral element for operation CODE
682 on the RIGHT side. */
684 static bool
685 neutral_element_p (tree_code code, tree arg, bool right)
687 switch (code)
689 case PLUS_EXPR:
690 case BIT_IOR_EXPR:
691 case BIT_XOR_EXPR:
692 return integer_zerop (arg);
694 case LROTATE_EXPR:
695 case RROTATE_EXPR:
696 case LSHIFT_EXPR:
697 case RSHIFT_EXPR:
698 case MINUS_EXPR:
699 case POINTER_PLUS_EXPR:
700 return right && integer_zerop (arg);
702 case MULT_EXPR:
703 return integer_onep (arg);
705 case TRUNC_DIV_EXPR:
706 case CEIL_DIV_EXPR:
707 case FLOOR_DIV_EXPR:
708 case ROUND_DIV_EXPR:
709 case EXACT_DIV_EXPR:
710 return right && integer_onep (arg);
712 case BIT_AND_EXPR:
713 return integer_all_onesp (arg);
715 default:
716 return false;
720 /* Returns true if ARG is an absorbing element for operation CODE. */
722 static bool
723 absorbing_element_p (tree_code code, tree arg)
725 switch (code)
727 case BIT_IOR_EXPR:
728 return integer_all_onesp (arg);
730 case MULT_EXPR:
731 case BIT_AND_EXPR:
732 return integer_zerop (arg);
734 default:
735 return false;
739 /* The function value_replacement does the main work of doing the value
740 replacement. Return non-zero if the replacement is done. Otherwise return
741 0. If we remove the middle basic block, return 2.
742 BB is the basic block where the replacement is going to be done on. ARG0
743 is argument 0 from the PHI. Likewise for ARG1. */
745 static int
746 value_replacement (basic_block cond_bb, basic_block middle_bb,
747 edge e0, edge e1, gimple phi,
748 tree arg0, tree arg1)
750 gimple_stmt_iterator gsi;
751 gimple cond;
752 edge true_edge, false_edge;
753 enum tree_code code;
754 bool emtpy_or_with_defined_p = true;
756 /* If the type says honor signed zeros we cannot do this
757 optimization. */
758 if (HONOR_SIGNED_ZEROS (arg1))
759 return 0;
761 /* If there is a statement in MIDDLE_BB that defines one of the PHI
762 arguments, then adjust arg0 or arg1. */
763 gsi = gsi_start_nondebug_after_labels_bb (middle_bb);
764 while (!gsi_end_p (gsi))
766 gimple stmt = gsi_stmt (gsi);
767 tree lhs;
768 gsi_next_nondebug (&gsi);
769 if (!is_gimple_assign (stmt))
771 emtpy_or_with_defined_p = false;
772 continue;
774 /* Now try to adjust arg0 or arg1 according to the computation
775 in the statement. */
776 lhs = gimple_assign_lhs (stmt);
777 if (!(lhs == arg0
778 && jump_function_from_stmt (&arg0, stmt))
779 || (lhs == arg1
780 && jump_function_from_stmt (&arg1, stmt)))
781 emtpy_or_with_defined_p = false;
784 cond = last_stmt (cond_bb);
785 code = gimple_cond_code (cond);
787 /* This transformation is only valid for equality comparisons. */
788 if (code != NE_EXPR && code != EQ_EXPR)
789 return 0;
791 /* We need to know which is the true edge and which is the false
792 edge so that we know if have abs or negative abs. */
793 extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge);
795 /* At this point we know we have a COND_EXPR with two successors.
796 One successor is BB, the other successor is an empty block which
797 falls through into BB.
799 The condition for the COND_EXPR is known to be NE_EXPR or EQ_EXPR.
801 There is a single PHI node at the join point (BB) with two arguments.
803 We now need to verify that the two arguments in the PHI node match
804 the two arguments to the equality comparison. */
806 if (operand_equal_for_value_replacement (arg0, arg1, &code, cond))
808 edge e;
809 tree arg;
811 /* For NE_EXPR, we want to build an assignment result = arg where
812 arg is the PHI argument associated with the true edge. For
813 EQ_EXPR we want the PHI argument associated with the false edge. */
814 e = (code == NE_EXPR ? true_edge : false_edge);
816 /* Unfortunately, E may not reach BB (it may instead have gone to
817 OTHER_BLOCK). If that is the case, then we want the single outgoing
818 edge from OTHER_BLOCK which reaches BB and represents the desired
819 path from COND_BLOCK. */
820 if (e->dest == middle_bb)
821 e = single_succ_edge (e->dest);
823 /* Now we know the incoming edge to BB that has the argument for the
824 RHS of our new assignment statement. */
825 if (e0 == e)
826 arg = arg0;
827 else
828 arg = arg1;
830 /* If the middle basic block was empty or is defining the
831 PHI arguments and this is a single phi where the args are different
832 for the edges e0 and e1 then we can remove the middle basic block. */
833 if (emtpy_or_with_defined_p
834 && single_non_singleton_phi_for_edges (phi_nodes (gimple_bb (phi)),
835 e0, e1) == phi)
837 replace_phi_edge_with_variable (cond_bb, e1, phi, arg);
838 /* Note that we optimized this PHI. */
839 return 2;
841 else
843 /* Replace the PHI arguments with arg. */
844 SET_PHI_ARG_DEF (phi, e0->dest_idx, arg);
845 SET_PHI_ARG_DEF (phi, e1->dest_idx, arg);
846 if (dump_file && (dump_flags & TDF_DETAILS))
848 fprintf (dump_file, "PHI ");
849 print_generic_expr (dump_file, gimple_phi_result (phi), 0);
850 fprintf (dump_file, " reduced for COND_EXPR in block %d to ",
851 cond_bb->index);
852 print_generic_expr (dump_file, arg, 0);
853 fprintf (dump_file, ".\n");
855 return 1;
860 /* Now optimize (x != 0) ? x + y : y to just y.
861 The following condition is too restrictive, there can easily be another
862 stmt in middle_bb, for instance a CONVERT_EXPR for the second argument. */
863 gimple assign = last_and_only_stmt (middle_bb);
864 if (!assign || gimple_code (assign) != GIMPLE_ASSIGN
865 || gimple_assign_rhs_class (assign) != GIMPLE_BINARY_RHS
866 || (!INTEGRAL_TYPE_P (TREE_TYPE (arg0))
867 && !POINTER_TYPE_P (TREE_TYPE (arg0))))
868 return 0;
870 /* Punt if there are (degenerate) PHIs in middle_bb, there should not be. */
871 if (!gimple_seq_empty_p (phi_nodes (middle_bb)))
872 return 0;
874 /* Only transform if it removes the condition. */
875 if (!single_non_singleton_phi_for_edges (phi_nodes (gimple_bb (phi)), e0, e1))
876 return 0;
878 /* Size-wise, this is always profitable. */
879 if (optimize_bb_for_speed_p (cond_bb)
880 /* The special case is useless if it has a low probability. */
881 && profile_status_for_fn (cfun) != PROFILE_ABSENT
882 && EDGE_PRED (middle_bb, 0)->probability < PROB_EVEN
883 /* If assign is cheap, there is no point avoiding it. */
884 && estimate_num_insns (assign, &eni_time_weights)
885 >= 3 * estimate_num_insns (cond, &eni_time_weights))
886 return 0;
888 tree lhs = gimple_assign_lhs (assign);
889 tree rhs1 = gimple_assign_rhs1 (assign);
890 tree rhs2 = gimple_assign_rhs2 (assign);
891 enum tree_code code_def = gimple_assign_rhs_code (assign);
892 tree cond_lhs = gimple_cond_lhs (cond);
893 tree cond_rhs = gimple_cond_rhs (cond);
895 if (((code == NE_EXPR && e1 == false_edge)
896 || (code == EQ_EXPR && e1 == true_edge))
897 && arg0 == lhs
898 && ((arg1 == rhs1
899 && operand_equal_for_phi_arg_p (rhs2, cond_lhs)
900 && neutral_element_p (code_def, cond_rhs, true))
901 || (arg1 == rhs2
902 && operand_equal_for_phi_arg_p (rhs1, cond_lhs)
903 && neutral_element_p (code_def, cond_rhs, false))
904 || (operand_equal_for_phi_arg_p (arg1, cond_rhs)
905 && (operand_equal_for_phi_arg_p (rhs2, cond_lhs)
906 || operand_equal_for_phi_arg_p (rhs1, cond_lhs))
907 && absorbing_element_p (code_def, cond_rhs))))
909 gsi = gsi_for_stmt (cond);
910 gimple_stmt_iterator gsi_from = gsi_for_stmt (assign);
911 gsi_move_before (&gsi_from, &gsi);
912 replace_phi_edge_with_variable (cond_bb, e1, phi, lhs);
913 return 2;
916 return 0;
919 /* The function minmax_replacement does the main work of doing the minmax
920 replacement. Return true if the replacement is done. Otherwise return
921 false.
922 BB is the basic block where the replacement is going to be done on. ARG0
923 is argument 0 from the PHI. Likewise for ARG1. */
925 static bool
926 minmax_replacement (basic_block cond_bb, basic_block middle_bb,
927 edge e0, edge e1, gimple phi,
928 tree arg0, tree arg1)
930 tree result, type;
931 gcond *cond;
932 gassign *new_stmt;
933 edge true_edge, false_edge;
934 enum tree_code cmp, minmax, ass_code;
935 tree smaller, larger, arg_true, arg_false;
936 gimple_stmt_iterator gsi, gsi_from;
938 type = TREE_TYPE (PHI_RESULT (phi));
940 /* The optimization may be unsafe due to NaNs. */
941 if (HONOR_NANS (type))
942 return false;
944 cond = as_a <gcond *> (last_stmt (cond_bb));
945 cmp = gimple_cond_code (cond);
947 /* This transformation is only valid for order comparisons. Record which
948 operand is smaller/larger if the result of the comparison is true. */
949 if (cmp == LT_EXPR || cmp == LE_EXPR)
951 smaller = gimple_cond_lhs (cond);
952 larger = gimple_cond_rhs (cond);
954 else if (cmp == GT_EXPR || cmp == GE_EXPR)
956 smaller = gimple_cond_rhs (cond);
957 larger = gimple_cond_lhs (cond);
959 else
960 return false;
962 /* We need to know which is the true edge and which is the false
963 edge so that we know if have abs or negative abs. */
964 extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge);
966 /* Forward the edges over the middle basic block. */
967 if (true_edge->dest == middle_bb)
968 true_edge = EDGE_SUCC (true_edge->dest, 0);
969 if (false_edge->dest == middle_bb)
970 false_edge = EDGE_SUCC (false_edge->dest, 0);
972 if (true_edge == e0)
974 gcc_assert (false_edge == e1);
975 arg_true = arg0;
976 arg_false = arg1;
978 else
980 gcc_assert (false_edge == e0);
981 gcc_assert (true_edge == e1);
982 arg_true = arg1;
983 arg_false = arg0;
986 if (empty_block_p (middle_bb))
988 if (operand_equal_for_phi_arg_p (arg_true, smaller)
989 && operand_equal_for_phi_arg_p (arg_false, larger))
991 /* Case
993 if (smaller < larger)
994 rslt = smaller;
995 else
996 rslt = larger; */
997 minmax = MIN_EXPR;
999 else if (operand_equal_for_phi_arg_p (arg_false, smaller)
1000 && operand_equal_for_phi_arg_p (arg_true, larger))
1001 minmax = MAX_EXPR;
1002 else
1003 return false;
1005 else
1007 /* Recognize the following case, assuming d <= u:
1009 if (a <= u)
1010 b = MAX (a, d);
1011 x = PHI <b, u>
1013 This is equivalent to
1015 b = MAX (a, d);
1016 x = MIN (b, u); */
1018 gimple assign = last_and_only_stmt (middle_bb);
1019 tree lhs, op0, op1, bound;
1021 if (!assign
1022 || gimple_code (assign) != GIMPLE_ASSIGN)
1023 return false;
1025 lhs = gimple_assign_lhs (assign);
1026 ass_code = gimple_assign_rhs_code (assign);
1027 if (ass_code != MAX_EXPR && ass_code != MIN_EXPR)
1028 return false;
1029 op0 = gimple_assign_rhs1 (assign);
1030 op1 = gimple_assign_rhs2 (assign);
1032 if (true_edge->src == middle_bb)
1034 /* We got here if the condition is true, i.e., SMALLER < LARGER. */
1035 if (!operand_equal_for_phi_arg_p (lhs, arg_true))
1036 return false;
1038 if (operand_equal_for_phi_arg_p (arg_false, larger))
1040 /* Case
1042 if (smaller < larger)
1044 r' = MAX_EXPR (smaller, bound)
1046 r = PHI <r', larger> --> to be turned to MIN_EXPR. */
1047 if (ass_code != MAX_EXPR)
1048 return false;
1050 minmax = MIN_EXPR;
1051 if (operand_equal_for_phi_arg_p (op0, smaller))
1052 bound = op1;
1053 else if (operand_equal_for_phi_arg_p (op1, smaller))
1054 bound = op0;
1055 else
1056 return false;
1058 /* We need BOUND <= LARGER. */
1059 if (!integer_nonzerop (fold_build2 (LE_EXPR, boolean_type_node,
1060 bound, larger)))
1061 return false;
1063 else if (operand_equal_for_phi_arg_p (arg_false, smaller))
1065 /* Case
1067 if (smaller < larger)
1069 r' = MIN_EXPR (larger, bound)
1071 r = PHI <r', smaller> --> to be turned to MAX_EXPR. */
1072 if (ass_code != MIN_EXPR)
1073 return false;
1075 minmax = MAX_EXPR;
1076 if (operand_equal_for_phi_arg_p (op0, larger))
1077 bound = op1;
1078 else if (operand_equal_for_phi_arg_p (op1, larger))
1079 bound = op0;
1080 else
1081 return false;
1083 /* We need BOUND >= SMALLER. */
1084 if (!integer_nonzerop (fold_build2 (GE_EXPR, boolean_type_node,
1085 bound, smaller)))
1086 return false;
1088 else
1089 return false;
1091 else
1093 /* We got here if the condition is false, i.e., SMALLER > LARGER. */
1094 if (!operand_equal_for_phi_arg_p (lhs, arg_false))
1095 return false;
1097 if (operand_equal_for_phi_arg_p (arg_true, larger))
1099 /* Case
1101 if (smaller > larger)
1103 r' = MIN_EXPR (smaller, bound)
1105 r = PHI <r', larger> --> to be turned to MAX_EXPR. */
1106 if (ass_code != MIN_EXPR)
1107 return false;
1109 minmax = MAX_EXPR;
1110 if (operand_equal_for_phi_arg_p (op0, smaller))
1111 bound = op1;
1112 else if (operand_equal_for_phi_arg_p (op1, smaller))
1113 bound = op0;
1114 else
1115 return false;
1117 /* We need BOUND >= LARGER. */
1118 if (!integer_nonzerop (fold_build2 (GE_EXPR, boolean_type_node,
1119 bound, larger)))
1120 return false;
1122 else if (operand_equal_for_phi_arg_p (arg_true, smaller))
1124 /* Case
1126 if (smaller > larger)
1128 r' = MAX_EXPR (larger, bound)
1130 r = PHI <r', smaller> --> to be turned to MIN_EXPR. */
1131 if (ass_code != MAX_EXPR)
1132 return false;
1134 minmax = MIN_EXPR;
1135 if (operand_equal_for_phi_arg_p (op0, larger))
1136 bound = op1;
1137 else if (operand_equal_for_phi_arg_p (op1, larger))
1138 bound = op0;
1139 else
1140 return false;
1142 /* We need BOUND <= SMALLER. */
1143 if (!integer_nonzerop (fold_build2 (LE_EXPR, boolean_type_node,
1144 bound, smaller)))
1145 return false;
1147 else
1148 return false;
1151 /* Move the statement from the middle block. */
1152 gsi = gsi_last_bb (cond_bb);
1153 gsi_from = gsi_last_nondebug_bb (middle_bb);
1154 gsi_move_before (&gsi_from, &gsi);
1157 /* Emit the statement to compute min/max. */
1158 result = duplicate_ssa_name (PHI_RESULT (phi), NULL);
1159 new_stmt = gimple_build_assign (result, minmax, arg0, arg1);
1160 gsi = gsi_last_bb (cond_bb);
1161 gsi_insert_before (&gsi, new_stmt, GSI_NEW_STMT);
1163 replace_phi_edge_with_variable (cond_bb, e1, phi, result);
1164 return true;
1167 /* The function absolute_replacement does the main work of doing the absolute
1168 replacement. Return true if the replacement is done. Otherwise return
1169 false.
1170 bb is the basic block where the replacement is going to be done on. arg0
1171 is argument 0 from the phi. Likewise for arg1. */
1173 static bool
1174 abs_replacement (basic_block cond_bb, basic_block middle_bb,
1175 edge e0 ATTRIBUTE_UNUSED, edge e1,
1176 gimple phi, tree arg0, tree arg1)
1178 tree result;
1179 gassign *new_stmt;
1180 gimple cond;
1181 gimple_stmt_iterator gsi;
1182 edge true_edge, false_edge;
1183 gimple assign;
1184 edge e;
1185 tree rhs, lhs;
1186 bool negate;
1187 enum tree_code cond_code;
1189 /* If the type says honor signed zeros we cannot do this
1190 optimization. */
1191 if (HONOR_SIGNED_ZEROS (arg1))
1192 return false;
1194 /* OTHER_BLOCK must have only one executable statement which must have the
1195 form arg0 = -arg1 or arg1 = -arg0. */
1197 assign = last_and_only_stmt (middle_bb);
1198 /* If we did not find the proper negation assignment, then we can not
1199 optimize. */
1200 if (assign == NULL)
1201 return false;
1203 /* If we got here, then we have found the only executable statement
1204 in OTHER_BLOCK. If it is anything other than arg = -arg1 or
1205 arg1 = -arg0, then we can not optimize. */
1206 if (gimple_code (assign) != GIMPLE_ASSIGN)
1207 return false;
1209 lhs = gimple_assign_lhs (assign);
1211 if (gimple_assign_rhs_code (assign) != NEGATE_EXPR)
1212 return false;
1214 rhs = gimple_assign_rhs1 (assign);
1216 /* The assignment has to be arg0 = -arg1 or arg1 = -arg0. */
1217 if (!(lhs == arg0 && rhs == arg1)
1218 && !(lhs == arg1 && rhs == arg0))
1219 return false;
1221 cond = last_stmt (cond_bb);
1222 result = PHI_RESULT (phi);
1224 /* Only relationals comparing arg[01] against zero are interesting. */
1225 cond_code = gimple_cond_code (cond);
1226 if (cond_code != GT_EXPR && cond_code != GE_EXPR
1227 && cond_code != LT_EXPR && cond_code != LE_EXPR)
1228 return false;
1230 /* Make sure the conditional is arg[01] OP y. */
1231 if (gimple_cond_lhs (cond) != rhs)
1232 return false;
1234 if (FLOAT_TYPE_P (TREE_TYPE (gimple_cond_rhs (cond)))
1235 ? real_zerop (gimple_cond_rhs (cond))
1236 : integer_zerop (gimple_cond_rhs (cond)))
1238 else
1239 return false;
1241 /* We need to know which is the true edge and which is the false
1242 edge so that we know if have abs or negative abs. */
1243 extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge);
1245 /* For GT_EXPR/GE_EXPR, if the true edge goes to OTHER_BLOCK, then we
1246 will need to negate the result. Similarly for LT_EXPR/LE_EXPR if
1247 the false edge goes to OTHER_BLOCK. */
1248 if (cond_code == GT_EXPR || cond_code == GE_EXPR)
1249 e = true_edge;
1250 else
1251 e = false_edge;
1253 if (e->dest == middle_bb)
1254 negate = true;
1255 else
1256 negate = false;
1258 result = duplicate_ssa_name (result, NULL);
1260 if (negate)
1261 lhs = make_ssa_name (TREE_TYPE (result));
1262 else
1263 lhs = result;
1265 /* Build the modify expression with abs expression. */
1266 new_stmt = gimple_build_assign (lhs, ABS_EXPR, rhs);
1268 gsi = gsi_last_bb (cond_bb);
1269 gsi_insert_before (&gsi, new_stmt, GSI_NEW_STMT);
1271 if (negate)
1273 /* Get the right GSI. We want to insert after the recently
1274 added ABS_EXPR statement (which we know is the first statement
1275 in the block. */
1276 new_stmt = gimple_build_assign (result, NEGATE_EXPR, lhs);
1278 gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT);
1281 replace_phi_edge_with_variable (cond_bb, e1, phi, result);
1283 /* Note that we optimized this PHI. */
1284 return true;
1287 /* The function neg_replacement replaces conditional negation with
1288 equivalent straight line code. Returns TRUE if replacement is done,
1289 otherwise returns FALSE.
1291 COND_BB branches around negation occuring in MIDDLE_BB.
1293 E0 and E1 are edges out of COND_BB. E0 reaches MIDDLE_BB and
1294 E1 reaches the other successor which should contain PHI with
1295 arguments ARG0 and ARG1.
1297 Assuming negation is to occur when the condition is true,
1298 then the non-branching sequence is:
1300 result = (rhs ^ -cond) + cond
1302 Inverting the condition or its result gives us negation
1303 when the original condition is false. */
1305 static bool
1306 neg_replacement (basic_block cond_bb, basic_block middle_bb,
1307 edge e0 ATTRIBUTE_UNUSED, edge e1,
1308 gimple phi, tree arg0, tree arg1)
1310 gimple new_stmt, cond;
1311 gimple_stmt_iterator gsi;
1312 gimple assign;
1313 edge true_edge, false_edge;
1314 tree rhs, lhs;
1315 enum tree_code cond_code;
1316 bool invert = false;
1318 /* This transformation performs logical operations on the
1319 incoming arguments. So force them to be integral types. */
1320 if (!INTEGRAL_TYPE_P (TREE_TYPE (arg0)))
1321 return false;
1323 /* OTHER_BLOCK must have only one executable statement which must have the
1324 form arg0 = -arg1 or arg1 = -arg0. */
1326 assign = last_and_only_stmt (middle_bb);
1327 /* If we did not find the proper negation assignment, then we can not
1328 optimize. */
1329 if (assign == NULL)
1330 return false;
1332 /* If we got here, then we have found the only executable statement
1333 in OTHER_BLOCK. If it is anything other than arg0 = -arg1 or
1334 arg1 = -arg0, then we can not optimize. */
1335 if (gimple_code (assign) != GIMPLE_ASSIGN)
1336 return false;
1338 lhs = gimple_assign_lhs (assign);
1340 if (gimple_assign_rhs_code (assign) != NEGATE_EXPR)
1341 return false;
1343 rhs = gimple_assign_rhs1 (assign);
1345 /* The assignment has to be arg0 = -arg1 or arg1 = -arg0. */
1346 if (!(lhs == arg0 && rhs == arg1)
1347 && !(lhs == arg1 && rhs == arg0))
1348 return false;
1350 /* The basic sequence assumes we negate when the condition is true.
1351 If we need the opposite, then we will either need to invert the
1352 condition or its result. */
1353 extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge);
1354 invert = false_edge->dest == middle_bb;
1356 /* Unlike abs_replacement, we can handle arbitrary conditionals here. */
1357 cond = last_stmt (cond_bb);
1358 cond_code = gimple_cond_code (cond);
1360 /* If inversion is needed, first try to invert the test since
1361 that's cheapest. */
1362 if (invert)
1364 bool honor_nans = HONOR_NANS (gimple_cond_lhs (cond));
1365 enum tree_code new_code = invert_tree_comparison (cond_code, honor_nans);
1367 /* If invert_tree_comparison was successful, then use its return
1368 value as the new code and note that inversion is no longer
1369 needed. */
1370 if (new_code != ERROR_MARK)
1372 cond_code = new_code;
1373 invert = false;
1377 tree cond_val = make_ssa_name (boolean_type_node);
1378 new_stmt = gimple_build_assign (cond_val, cond_code,
1379 gimple_cond_lhs (cond),
1380 gimple_cond_rhs (cond));
1381 gsi = gsi_last_bb (cond_bb);
1382 gsi_insert_before (&gsi, new_stmt, GSI_NEW_STMT);
1384 /* If we still need inversion, then invert the result of the
1385 condition. */
1386 if (invert)
1388 tree tmp = make_ssa_name (boolean_type_node);
1389 new_stmt = gimple_build_assign (tmp, BIT_XOR_EXPR, cond_val,
1390 boolean_true_node);
1391 gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT);
1392 cond_val = tmp;
1395 /* Get the condition in the right type so that we can perform
1396 logical and arithmetic operations on it. */
1397 tree cond_val_converted = make_ssa_name (TREE_TYPE (rhs));
1398 new_stmt = gimple_build_assign (cond_val_converted, NOP_EXPR, cond_val);
1399 gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT);
1401 tree neg_cond_val_converted = make_ssa_name (TREE_TYPE (rhs));
1402 new_stmt = gimple_build_assign (neg_cond_val_converted, NEGATE_EXPR,
1403 cond_val_converted);
1404 gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT);
1406 tree tmp = make_ssa_name (TREE_TYPE (rhs));
1407 new_stmt = gimple_build_assign (tmp, BIT_XOR_EXPR, rhs,
1408 neg_cond_val_converted);
1409 gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT);
1411 tree new_lhs = make_ssa_name (TREE_TYPE (rhs));
1412 new_stmt = gimple_build_assign (new_lhs, PLUS_EXPR, tmp, cond_val_converted);
1413 gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT);
1415 replace_phi_edge_with_variable (cond_bb, e1, phi, new_lhs);
1417 /* Note that we optimized this PHI. */
1418 return true;
1421 /* Auxiliary functions to determine the set of memory accesses which
1422 can't trap because they are preceded by accesses to the same memory
1423 portion. We do that for MEM_REFs, so we only need to track
1424 the SSA_NAME of the pointer indirectly referenced. The algorithm
1425 simply is a walk over all instructions in dominator order. When
1426 we see an MEM_REF we determine if we've already seen a same
1427 ref anywhere up to the root of the dominator tree. If we do the
1428 current access can't trap. If we don't see any dominating access
1429 the current access might trap, but might also make later accesses
1430 non-trapping, so we remember it. We need to be careful with loads
1431 or stores, for instance a load might not trap, while a store would,
1432 so if we see a dominating read access this doesn't mean that a later
1433 write access would not trap. Hence we also need to differentiate the
1434 type of access(es) seen.
1436 ??? We currently are very conservative and assume that a load might
1437 trap even if a store doesn't (write-only memory). This probably is
1438 overly conservative. */
1440 /* A hash-table of SSA_NAMEs, and in which basic block an MEM_REF
1441 through it was seen, which would constitute a no-trap region for
1442 same accesses. */
1443 struct name_to_bb
1445 unsigned int ssa_name_ver;
1446 unsigned int phase;
1447 bool store;
1448 HOST_WIDE_INT offset, size;
1449 basic_block bb;
1452 /* Hashtable helpers. */
1454 struct ssa_names_hasher : typed_free_remove <name_to_bb>
1456 typedef name_to_bb value_type;
1457 typedef name_to_bb compare_type;
1458 static inline hashval_t hash (const value_type *);
1459 static inline bool equal (const value_type *, const compare_type *);
1462 /* Used for quick clearing of the hash-table when we see calls.
1463 Hash entries with phase < nt_call_phase are invalid. */
1464 static unsigned int nt_call_phase;
1466 /* The hash function. */
1468 inline hashval_t
1469 ssa_names_hasher::hash (const value_type *n)
1471 return n->ssa_name_ver ^ (((hashval_t) n->store) << 31)
1472 ^ (n->offset << 6) ^ (n->size << 3);
1475 /* The equality function of *P1 and *P2. */
1477 inline bool
1478 ssa_names_hasher::equal (const value_type *n1, const compare_type *n2)
1480 return n1->ssa_name_ver == n2->ssa_name_ver
1481 && n1->store == n2->store
1482 && n1->offset == n2->offset
1483 && n1->size == n2->size;
1486 class nontrapping_dom_walker : public dom_walker
1488 public:
1489 nontrapping_dom_walker (cdi_direction direction, hash_set<tree> *ps)
1490 : dom_walker (direction), m_nontrapping (ps), m_seen_ssa_names (128) {}
1492 virtual void before_dom_children (basic_block);
1493 virtual void after_dom_children (basic_block);
1495 private:
1497 /* We see the expression EXP in basic block BB. If it's an interesting
1498 expression (an MEM_REF through an SSA_NAME) possibly insert the
1499 expression into the set NONTRAP or the hash table of seen expressions.
1500 STORE is true if this expression is on the LHS, otherwise it's on
1501 the RHS. */
1502 void add_or_mark_expr (basic_block, tree, bool);
1504 hash_set<tree> *m_nontrapping;
1506 /* The hash table for remembering what we've seen. */
1507 hash_table<ssa_names_hasher> m_seen_ssa_names;
1510 /* Called by walk_dominator_tree, when entering the block BB. */
1511 void
1512 nontrapping_dom_walker::before_dom_children (basic_block bb)
1514 edge e;
1515 edge_iterator ei;
1516 gimple_stmt_iterator gsi;
1518 /* If we haven't seen all our predecessors, clear the hash-table. */
1519 FOR_EACH_EDGE (e, ei, bb->preds)
1520 if ((((size_t)e->src->aux) & 2) == 0)
1522 nt_call_phase++;
1523 break;
1526 /* Mark this BB as being on the path to dominator root and as visited. */
1527 bb->aux = (void*)(1 | 2);
1529 /* And walk the statements in order. */
1530 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1532 gimple stmt = gsi_stmt (gsi);
1534 if (is_gimple_call (stmt) && !nonfreeing_call_p (stmt))
1535 nt_call_phase++;
1536 else if (gimple_assign_single_p (stmt) && !gimple_has_volatile_ops (stmt))
1538 add_or_mark_expr (bb, gimple_assign_lhs (stmt), true);
1539 add_or_mark_expr (bb, gimple_assign_rhs1 (stmt), false);
1544 /* Called by walk_dominator_tree, when basic block BB is exited. */
1545 void
1546 nontrapping_dom_walker::after_dom_children (basic_block bb)
1548 /* This BB isn't on the path to dominator root anymore. */
1549 bb->aux = (void*)2;
1552 /* We see the expression EXP in basic block BB. If it's an interesting
1553 expression (an MEM_REF through an SSA_NAME) possibly insert the
1554 expression into the set NONTRAP or the hash table of seen expressions.
1555 STORE is true if this expression is on the LHS, otherwise it's on
1556 the RHS. */
1557 void
1558 nontrapping_dom_walker::add_or_mark_expr (basic_block bb, tree exp, bool store)
1560 HOST_WIDE_INT size;
1562 if (TREE_CODE (exp) == MEM_REF
1563 && TREE_CODE (TREE_OPERAND (exp, 0)) == SSA_NAME
1564 && tree_fits_shwi_p (TREE_OPERAND (exp, 1))
1565 && (size = int_size_in_bytes (TREE_TYPE (exp))) > 0)
1567 tree name = TREE_OPERAND (exp, 0);
1568 struct name_to_bb map;
1569 name_to_bb **slot;
1570 struct name_to_bb *n2bb;
1571 basic_block found_bb = 0;
1573 /* Try to find the last seen MEM_REF through the same
1574 SSA_NAME, which can trap. */
1575 map.ssa_name_ver = SSA_NAME_VERSION (name);
1576 map.phase = 0;
1577 map.bb = 0;
1578 map.store = store;
1579 map.offset = tree_to_shwi (TREE_OPERAND (exp, 1));
1580 map.size = size;
1582 slot = m_seen_ssa_names.find_slot (&map, INSERT);
1583 n2bb = *slot;
1584 if (n2bb && n2bb->phase >= nt_call_phase)
1585 found_bb = n2bb->bb;
1587 /* If we've found a trapping MEM_REF, _and_ it dominates EXP
1588 (it's in a basic block on the path from us to the dominator root)
1589 then we can't trap. */
1590 if (found_bb && (((size_t)found_bb->aux) & 1) == 1)
1592 m_nontrapping->add (exp);
1594 else
1596 /* EXP might trap, so insert it into the hash table. */
1597 if (n2bb)
1599 n2bb->phase = nt_call_phase;
1600 n2bb->bb = bb;
1602 else
1604 n2bb = XNEW (struct name_to_bb);
1605 n2bb->ssa_name_ver = SSA_NAME_VERSION (name);
1606 n2bb->phase = nt_call_phase;
1607 n2bb->bb = bb;
1608 n2bb->store = store;
1609 n2bb->offset = map.offset;
1610 n2bb->size = size;
1611 *slot = n2bb;
1617 /* This is the entry point of gathering non trapping memory accesses.
1618 It will do a dominator walk over the whole function, and it will
1619 make use of the bb->aux pointers. It returns a set of trees
1620 (the MEM_REFs itself) which can't trap. */
1621 static hash_set<tree> *
1622 get_non_trapping (void)
1624 nt_call_phase = 0;
1625 hash_set<tree> *nontrap = new hash_set<tree>;
1626 /* We're going to do a dominator walk, so ensure that we have
1627 dominance information. */
1628 calculate_dominance_info (CDI_DOMINATORS);
1630 nontrapping_dom_walker (CDI_DOMINATORS, nontrap)
1631 .walk (cfun->cfg->x_entry_block_ptr);
1633 clear_aux_for_blocks ();
1634 return nontrap;
1637 /* Do the main work of conditional store replacement. We already know
1638 that the recognized pattern looks like so:
1640 split:
1641 if (cond) goto MIDDLE_BB; else goto JOIN_BB (edge E1)
1642 MIDDLE_BB:
1643 something
1644 fallthrough (edge E0)
1645 JOIN_BB:
1646 some more
1648 We check that MIDDLE_BB contains only one store, that that store
1649 doesn't trap (not via NOTRAP, but via checking if an access to the same
1650 memory location dominates us) and that the store has a "simple" RHS. */
1652 static bool
1653 cond_store_replacement (basic_block middle_bb, basic_block join_bb,
1654 edge e0, edge e1, hash_set<tree> *nontrap)
1656 gimple assign = last_and_only_stmt (middle_bb);
1657 tree lhs, rhs, name, name2;
1658 gphi *newphi;
1659 gassign *new_stmt;
1660 gimple_stmt_iterator gsi;
1661 source_location locus;
1663 /* Check if middle_bb contains of only one store. */
1664 if (!assign
1665 || !gimple_assign_single_p (assign)
1666 || gimple_has_volatile_ops (assign))
1667 return false;
1669 locus = gimple_location (assign);
1670 lhs = gimple_assign_lhs (assign);
1671 rhs = gimple_assign_rhs1 (assign);
1672 if (TREE_CODE (lhs) != MEM_REF
1673 || TREE_CODE (TREE_OPERAND (lhs, 0)) != SSA_NAME
1674 || !is_gimple_reg_type (TREE_TYPE (lhs)))
1675 return false;
1677 /* Prove that we can move the store down. We could also check
1678 TREE_THIS_NOTRAP here, but in that case we also could move stores,
1679 whose value is not available readily, which we want to avoid. */
1680 if (!nontrap->contains (lhs))
1681 return false;
1683 /* Now we've checked the constraints, so do the transformation:
1684 1) Remove the single store. */
1685 gsi = gsi_for_stmt (assign);
1686 unlink_stmt_vdef (assign);
1687 gsi_remove (&gsi, true);
1688 release_defs (assign);
1690 /* 2) Insert a load from the memory of the store to the temporary
1691 on the edge which did not contain the store. */
1692 lhs = unshare_expr (lhs);
1693 name = make_temp_ssa_name (TREE_TYPE (lhs), NULL, "cstore");
1694 new_stmt = gimple_build_assign (name, lhs);
1695 gimple_set_location (new_stmt, locus);
1696 gsi_insert_on_edge (e1, new_stmt);
1698 /* 3) Create a PHI node at the join block, with one argument
1699 holding the old RHS, and the other holding the temporary
1700 where we stored the old memory contents. */
1701 name2 = make_temp_ssa_name (TREE_TYPE (lhs), NULL, "cstore");
1702 newphi = create_phi_node (name2, join_bb);
1703 add_phi_arg (newphi, rhs, e0, locus);
1704 add_phi_arg (newphi, name, e1, locus);
1706 lhs = unshare_expr (lhs);
1707 new_stmt = gimple_build_assign (lhs, PHI_RESULT (newphi));
1709 /* 4) Insert that PHI node. */
1710 gsi = gsi_after_labels (join_bb);
1711 if (gsi_end_p (gsi))
1713 gsi = gsi_last_bb (join_bb);
1714 gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT);
1716 else
1717 gsi_insert_before (&gsi, new_stmt, GSI_NEW_STMT);
1719 return true;
1722 /* Do the main work of conditional store replacement. */
1724 static bool
1725 cond_if_else_store_replacement_1 (basic_block then_bb, basic_block else_bb,
1726 basic_block join_bb, gimple then_assign,
1727 gimple else_assign)
1729 tree lhs_base, lhs, then_rhs, else_rhs, name;
1730 source_location then_locus, else_locus;
1731 gimple_stmt_iterator gsi;
1732 gphi *newphi;
1733 gassign *new_stmt;
1735 if (then_assign == NULL
1736 || !gimple_assign_single_p (then_assign)
1737 || gimple_clobber_p (then_assign)
1738 || gimple_has_volatile_ops (then_assign)
1739 || else_assign == NULL
1740 || !gimple_assign_single_p (else_assign)
1741 || gimple_clobber_p (else_assign)
1742 || gimple_has_volatile_ops (else_assign))
1743 return false;
1745 lhs = gimple_assign_lhs (then_assign);
1746 if (!is_gimple_reg_type (TREE_TYPE (lhs))
1747 || !operand_equal_p (lhs, gimple_assign_lhs (else_assign), 0))
1748 return false;
1750 lhs_base = get_base_address (lhs);
1751 if (lhs_base == NULL_TREE
1752 || (!DECL_P (lhs_base) && TREE_CODE (lhs_base) != MEM_REF))
1753 return false;
1755 then_rhs = gimple_assign_rhs1 (then_assign);
1756 else_rhs = gimple_assign_rhs1 (else_assign);
1757 then_locus = gimple_location (then_assign);
1758 else_locus = gimple_location (else_assign);
1760 /* Now we've checked the constraints, so do the transformation:
1761 1) Remove the stores. */
1762 gsi = gsi_for_stmt (then_assign);
1763 unlink_stmt_vdef (then_assign);
1764 gsi_remove (&gsi, true);
1765 release_defs (then_assign);
1767 gsi = gsi_for_stmt (else_assign);
1768 unlink_stmt_vdef (else_assign);
1769 gsi_remove (&gsi, true);
1770 release_defs (else_assign);
1772 /* 2) Create a PHI node at the join block, with one argument
1773 holding the old RHS, and the other holding the temporary
1774 where we stored the old memory contents. */
1775 name = make_temp_ssa_name (TREE_TYPE (lhs), NULL, "cstore");
1776 newphi = create_phi_node (name, join_bb);
1777 add_phi_arg (newphi, then_rhs, EDGE_SUCC (then_bb, 0), then_locus);
1778 add_phi_arg (newphi, else_rhs, EDGE_SUCC (else_bb, 0), else_locus);
1780 new_stmt = gimple_build_assign (lhs, PHI_RESULT (newphi));
1782 /* 3) Insert that PHI node. */
1783 gsi = gsi_after_labels (join_bb);
1784 if (gsi_end_p (gsi))
1786 gsi = gsi_last_bb (join_bb);
1787 gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT);
1789 else
1790 gsi_insert_before (&gsi, new_stmt, GSI_NEW_STMT);
1792 return true;
1795 /* Conditional store replacement. We already know
1796 that the recognized pattern looks like so:
1798 split:
1799 if (cond) goto THEN_BB; else goto ELSE_BB (edge E1)
1800 THEN_BB:
1802 X = Y;
1804 goto JOIN_BB;
1805 ELSE_BB:
1807 X = Z;
1809 fallthrough (edge E0)
1810 JOIN_BB:
1811 some more
1813 We check that it is safe to sink the store to JOIN_BB by verifying that
1814 there are no read-after-write or write-after-write dependencies in
1815 THEN_BB and ELSE_BB. */
1817 static bool
1818 cond_if_else_store_replacement (basic_block then_bb, basic_block else_bb,
1819 basic_block join_bb)
1821 gimple then_assign = last_and_only_stmt (then_bb);
1822 gimple else_assign = last_and_only_stmt (else_bb);
1823 vec<data_reference_p> then_datarefs, else_datarefs;
1824 vec<ddr_p> then_ddrs, else_ddrs;
1825 gimple then_store, else_store;
1826 bool found, ok = false, res;
1827 struct data_dependence_relation *ddr;
1828 data_reference_p then_dr, else_dr;
1829 int i, j;
1830 tree then_lhs, else_lhs;
1831 basic_block blocks[3];
1833 if (MAX_STORES_TO_SINK == 0)
1834 return false;
1836 /* Handle the case with single statement in THEN_BB and ELSE_BB. */
1837 if (then_assign && else_assign)
1838 return cond_if_else_store_replacement_1 (then_bb, else_bb, join_bb,
1839 then_assign, else_assign);
1841 /* Find data references. */
1842 then_datarefs.create (1);
1843 else_datarefs.create (1);
1844 if ((find_data_references_in_bb (NULL, then_bb, &then_datarefs)
1845 == chrec_dont_know)
1846 || !then_datarefs.length ()
1847 || (find_data_references_in_bb (NULL, else_bb, &else_datarefs)
1848 == chrec_dont_know)
1849 || !else_datarefs.length ())
1851 free_data_refs (then_datarefs);
1852 free_data_refs (else_datarefs);
1853 return false;
1856 /* Find pairs of stores with equal LHS. */
1857 auto_vec<gimple, 1> then_stores, else_stores;
1858 FOR_EACH_VEC_ELT (then_datarefs, i, then_dr)
1860 if (DR_IS_READ (then_dr))
1861 continue;
1863 then_store = DR_STMT (then_dr);
1864 then_lhs = gimple_get_lhs (then_store);
1865 if (then_lhs == NULL_TREE)
1866 continue;
1867 found = false;
1869 FOR_EACH_VEC_ELT (else_datarefs, j, else_dr)
1871 if (DR_IS_READ (else_dr))
1872 continue;
1874 else_store = DR_STMT (else_dr);
1875 else_lhs = gimple_get_lhs (else_store);
1876 if (else_lhs == NULL_TREE)
1877 continue;
1879 if (operand_equal_p (then_lhs, else_lhs, 0))
1881 found = true;
1882 break;
1886 if (!found)
1887 continue;
1889 then_stores.safe_push (then_store);
1890 else_stores.safe_push (else_store);
1893 /* No pairs of stores found. */
1894 if (!then_stores.length ()
1895 || then_stores.length () > (unsigned) MAX_STORES_TO_SINK)
1897 free_data_refs (then_datarefs);
1898 free_data_refs (else_datarefs);
1899 return false;
1902 /* Compute and check data dependencies in both basic blocks. */
1903 then_ddrs.create (1);
1904 else_ddrs.create (1);
1905 if (!compute_all_dependences (then_datarefs, &then_ddrs,
1906 vNULL, false)
1907 || !compute_all_dependences (else_datarefs, &else_ddrs,
1908 vNULL, false))
1910 free_dependence_relations (then_ddrs);
1911 free_dependence_relations (else_ddrs);
1912 free_data_refs (then_datarefs);
1913 free_data_refs (else_datarefs);
1914 return false;
1916 blocks[0] = then_bb;
1917 blocks[1] = else_bb;
1918 blocks[2] = join_bb;
1919 renumber_gimple_stmt_uids_in_blocks (blocks, 3);
1921 /* Check that there are no read-after-write or write-after-write dependencies
1922 in THEN_BB. */
1923 FOR_EACH_VEC_ELT (then_ddrs, i, ddr)
1925 struct data_reference *dra = DDR_A (ddr);
1926 struct data_reference *drb = DDR_B (ddr);
1928 if (DDR_ARE_DEPENDENT (ddr) != chrec_known
1929 && ((DR_IS_READ (dra) && DR_IS_WRITE (drb)
1930 && gimple_uid (DR_STMT (dra)) > gimple_uid (DR_STMT (drb)))
1931 || (DR_IS_READ (drb) && DR_IS_WRITE (dra)
1932 && gimple_uid (DR_STMT (drb)) > gimple_uid (DR_STMT (dra)))
1933 || (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))))
1935 free_dependence_relations (then_ddrs);
1936 free_dependence_relations (else_ddrs);
1937 free_data_refs (then_datarefs);
1938 free_data_refs (else_datarefs);
1939 return false;
1943 /* Check that there are no read-after-write or write-after-write dependencies
1944 in ELSE_BB. */
1945 FOR_EACH_VEC_ELT (else_ddrs, i, ddr)
1947 struct data_reference *dra = DDR_A (ddr);
1948 struct data_reference *drb = DDR_B (ddr);
1950 if (DDR_ARE_DEPENDENT (ddr) != chrec_known
1951 && ((DR_IS_READ (dra) && DR_IS_WRITE (drb)
1952 && gimple_uid (DR_STMT (dra)) > gimple_uid (DR_STMT (drb)))
1953 || (DR_IS_READ (drb) && DR_IS_WRITE (dra)
1954 && gimple_uid (DR_STMT (drb)) > gimple_uid (DR_STMT (dra)))
1955 || (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))))
1957 free_dependence_relations (then_ddrs);
1958 free_dependence_relations (else_ddrs);
1959 free_data_refs (then_datarefs);
1960 free_data_refs (else_datarefs);
1961 return false;
1965 /* Sink stores with same LHS. */
1966 FOR_EACH_VEC_ELT (then_stores, i, then_store)
1968 else_store = else_stores[i];
1969 res = cond_if_else_store_replacement_1 (then_bb, else_bb, join_bb,
1970 then_store, else_store);
1971 ok = ok || res;
1974 free_dependence_relations (then_ddrs);
1975 free_dependence_relations (else_ddrs);
1976 free_data_refs (then_datarefs);
1977 free_data_refs (else_datarefs);
1979 return ok;
1982 /* Return TRUE if STMT has a VUSE whose corresponding VDEF is in BB. */
1984 static bool
1985 local_mem_dependence (gimple stmt, basic_block bb)
1987 tree vuse = gimple_vuse (stmt);
1988 gimple def;
1990 if (!vuse)
1991 return false;
1993 def = SSA_NAME_DEF_STMT (vuse);
1994 return (def && gimple_bb (def) == bb);
1997 /* Given a "diamond" control-flow pattern where BB0 tests a condition,
1998 BB1 and BB2 are "then" and "else" blocks dependent on this test,
1999 and BB3 rejoins control flow following BB1 and BB2, look for
2000 opportunities to hoist loads as follows. If BB3 contains a PHI of
2001 two loads, one each occurring in BB1 and BB2, and the loads are
2002 provably of adjacent fields in the same structure, then move both
2003 loads into BB0. Of course this can only be done if there are no
2004 dependencies preventing such motion.
2006 One of the hoisted loads will always be speculative, so the
2007 transformation is currently conservative:
2009 - The fields must be strictly adjacent.
2010 - The two fields must occupy a single memory block that is
2011 guaranteed to not cross a page boundary.
2013 The last is difficult to prove, as such memory blocks should be
2014 aligned on the minimum of the stack alignment boundary and the
2015 alignment guaranteed by heap allocation interfaces. Thus we rely
2016 on a parameter for the alignment value.
2018 Provided a good value is used for the last case, the first
2019 restriction could possibly be relaxed. */
2021 static void
2022 hoist_adjacent_loads (basic_block bb0, basic_block bb1,
2023 basic_block bb2, basic_block bb3)
2025 int param_align = PARAM_VALUE (PARAM_L1_CACHE_LINE_SIZE);
2026 unsigned param_align_bits = (unsigned) (param_align * BITS_PER_UNIT);
2027 gphi_iterator gsi;
2029 /* Walk the phis in bb3 looking for an opportunity. We are looking
2030 for phis of two SSA names, one each of which is defined in bb1 and
2031 bb2. */
2032 for (gsi = gsi_start_phis (bb3); !gsi_end_p (gsi); gsi_next (&gsi))
2034 gphi *phi_stmt = gsi.phi ();
2035 gimple def1, def2, defswap;
2036 tree arg1, arg2, ref1, ref2, field1, field2, fieldswap;
2037 tree tree_offset1, tree_offset2, tree_size2, next;
2038 int offset1, offset2, size2;
2039 unsigned align1;
2040 gimple_stmt_iterator gsi2;
2041 basic_block bb_for_def1, bb_for_def2;
2043 if (gimple_phi_num_args (phi_stmt) != 2
2044 || virtual_operand_p (gimple_phi_result (phi_stmt)))
2045 continue;
2047 arg1 = gimple_phi_arg_def (phi_stmt, 0);
2048 arg2 = gimple_phi_arg_def (phi_stmt, 1);
2050 if (TREE_CODE (arg1) != SSA_NAME
2051 || TREE_CODE (arg2) != SSA_NAME
2052 || SSA_NAME_IS_DEFAULT_DEF (arg1)
2053 || SSA_NAME_IS_DEFAULT_DEF (arg2))
2054 continue;
2056 def1 = SSA_NAME_DEF_STMT (arg1);
2057 def2 = SSA_NAME_DEF_STMT (arg2);
2059 if ((gimple_bb (def1) != bb1 || gimple_bb (def2) != bb2)
2060 && (gimple_bb (def2) != bb1 || gimple_bb (def1) != bb2))
2061 continue;
2063 /* Check the mode of the arguments to be sure a conditional move
2064 can be generated for it. */
2065 if (optab_handler (movcc_optab, TYPE_MODE (TREE_TYPE (arg1)))
2066 == CODE_FOR_nothing)
2067 continue;
2069 /* Both statements must be assignments whose RHS is a COMPONENT_REF. */
2070 if (!gimple_assign_single_p (def1)
2071 || !gimple_assign_single_p (def2)
2072 || gimple_has_volatile_ops (def1)
2073 || gimple_has_volatile_ops (def2))
2074 continue;
2076 ref1 = gimple_assign_rhs1 (def1);
2077 ref2 = gimple_assign_rhs1 (def2);
2079 if (TREE_CODE (ref1) != COMPONENT_REF
2080 || TREE_CODE (ref2) != COMPONENT_REF)
2081 continue;
2083 /* The zeroth operand of the two component references must be
2084 identical. It is not sufficient to compare get_base_address of
2085 the two references, because this could allow for different
2086 elements of the same array in the two trees. It is not safe to
2087 assume that the existence of one array element implies the
2088 existence of a different one. */
2089 if (!operand_equal_p (TREE_OPERAND (ref1, 0), TREE_OPERAND (ref2, 0), 0))
2090 continue;
2092 field1 = TREE_OPERAND (ref1, 1);
2093 field2 = TREE_OPERAND (ref2, 1);
2095 /* Check for field adjacency, and ensure field1 comes first. */
2096 for (next = DECL_CHAIN (field1);
2097 next && TREE_CODE (next) != FIELD_DECL;
2098 next = DECL_CHAIN (next))
2101 if (next != field2)
2103 for (next = DECL_CHAIN (field2);
2104 next && TREE_CODE (next) != FIELD_DECL;
2105 next = DECL_CHAIN (next))
2108 if (next != field1)
2109 continue;
2111 fieldswap = field1;
2112 field1 = field2;
2113 field2 = fieldswap;
2114 defswap = def1;
2115 def1 = def2;
2116 def2 = defswap;
2119 bb_for_def1 = gimple_bb (def1);
2120 bb_for_def2 = gimple_bb (def2);
2122 /* Check for proper alignment of the first field. */
2123 tree_offset1 = bit_position (field1);
2124 tree_offset2 = bit_position (field2);
2125 tree_size2 = DECL_SIZE (field2);
2127 if (!tree_fits_uhwi_p (tree_offset1)
2128 || !tree_fits_uhwi_p (tree_offset2)
2129 || !tree_fits_uhwi_p (tree_size2))
2130 continue;
2132 offset1 = tree_to_uhwi (tree_offset1);
2133 offset2 = tree_to_uhwi (tree_offset2);
2134 size2 = tree_to_uhwi (tree_size2);
2135 align1 = DECL_ALIGN (field1) % param_align_bits;
2137 if (offset1 % BITS_PER_UNIT != 0)
2138 continue;
2140 /* For profitability, the two field references should fit within
2141 a single cache line. */
2142 if (align1 + offset2 - offset1 + size2 > param_align_bits)
2143 continue;
2145 /* The two expressions cannot be dependent upon vdefs defined
2146 in bb1/bb2. */
2147 if (local_mem_dependence (def1, bb_for_def1)
2148 || local_mem_dependence (def2, bb_for_def2))
2149 continue;
2151 /* The conditions are satisfied; hoist the loads from bb1 and bb2 into
2152 bb0. We hoist the first one first so that a cache miss is handled
2153 efficiently regardless of hardware cache-fill policy. */
2154 gsi2 = gsi_for_stmt (def1);
2155 gsi_move_to_bb_end (&gsi2, bb0);
2156 gsi2 = gsi_for_stmt (def2);
2157 gsi_move_to_bb_end (&gsi2, bb0);
2159 if (dump_file && (dump_flags & TDF_DETAILS))
2161 fprintf (dump_file,
2162 "\nHoisting adjacent loads from %d and %d into %d: \n",
2163 bb_for_def1->index, bb_for_def2->index, bb0->index);
2164 print_gimple_stmt (dump_file, def1, 0, TDF_VOPS|TDF_MEMSYMS);
2165 print_gimple_stmt (dump_file, def2, 0, TDF_VOPS|TDF_MEMSYMS);
2170 /* Determine whether we should attempt to hoist adjacent loads out of
2171 diamond patterns in pass_phiopt. Always hoist loads if
2172 -fhoist-adjacent-loads is specified and the target machine has
2173 both a conditional move instruction and a defined cache line size. */
2175 static bool
2176 gate_hoist_loads (void)
2178 return (flag_hoist_adjacent_loads == 1
2179 && PARAM_VALUE (PARAM_L1_CACHE_LINE_SIZE)
2180 && HAVE_conditional_move);
2183 /* This pass tries to replaces an if-then-else block with an
2184 assignment. We have four kinds of transformations. Some of these
2185 transformations are also performed by the ifcvt RTL optimizer.
2187 Conditional Replacement
2188 -----------------------
2190 This transformation, implemented in conditional_replacement,
2191 replaces
2193 bb0:
2194 if (cond) goto bb2; else goto bb1;
2195 bb1:
2196 bb2:
2197 x = PHI <0 (bb1), 1 (bb0), ...>;
2199 with
2201 bb0:
2202 x' = cond;
2203 goto bb2;
2204 bb2:
2205 x = PHI <x' (bb0), ...>;
2207 We remove bb1 as it becomes unreachable. This occurs often due to
2208 gimplification of conditionals.
2210 Value Replacement
2211 -----------------
2213 This transformation, implemented in value_replacement, replaces
2215 bb0:
2216 if (a != b) goto bb2; else goto bb1;
2217 bb1:
2218 bb2:
2219 x = PHI <a (bb1), b (bb0), ...>;
2221 with
2223 bb0:
2224 bb2:
2225 x = PHI <b (bb0), ...>;
2227 This opportunity can sometimes occur as a result of other
2228 optimizations.
2231 Another case caught by value replacement looks like this:
2233 bb0:
2234 t1 = a == CONST;
2235 t2 = b > c;
2236 t3 = t1 & t2;
2237 if (t3 != 0) goto bb1; else goto bb2;
2238 bb1:
2239 bb2:
2240 x = PHI (CONST, a)
2242 Gets replaced with:
2243 bb0:
2244 bb2:
2245 t1 = a == CONST;
2246 t2 = b > c;
2247 t3 = t1 & t2;
2248 x = a;
2250 ABS Replacement
2251 ---------------
2253 This transformation, implemented in abs_replacement, replaces
2255 bb0:
2256 if (a >= 0) goto bb2; else goto bb1;
2257 bb1:
2258 x = -a;
2259 bb2:
2260 x = PHI <x (bb1), a (bb0), ...>;
2262 with
2264 bb0:
2265 x' = ABS_EXPR< a >;
2266 bb2:
2267 x = PHI <x' (bb0), ...>;
2269 MIN/MAX Replacement
2270 -------------------
2272 This transformation, minmax_replacement replaces
2274 bb0:
2275 if (a <= b) goto bb2; else goto bb1;
2276 bb1:
2277 bb2:
2278 x = PHI <b (bb1), a (bb0), ...>;
2280 with
2282 bb0:
2283 x' = MIN_EXPR (a, b)
2284 bb2:
2285 x = PHI <x' (bb0), ...>;
2287 A similar transformation is done for MAX_EXPR.
2290 This pass also performs a fifth transformation of a slightly different
2291 flavor.
2293 Adjacent Load Hoisting
2294 ----------------------
2296 This transformation replaces
2298 bb0:
2299 if (...) goto bb2; else goto bb1;
2300 bb1:
2301 x1 = (<expr>).field1;
2302 goto bb3;
2303 bb2:
2304 x2 = (<expr>).field2;
2305 bb3:
2306 # x = PHI <x1, x2>;
2308 with
2310 bb0:
2311 x1 = (<expr>).field1;
2312 x2 = (<expr>).field2;
2313 if (...) goto bb2; else goto bb1;
2314 bb1:
2315 goto bb3;
2316 bb2:
2317 bb3:
2318 # x = PHI <x1, x2>;
2320 The purpose of this transformation is to enable generation of conditional
2321 move instructions such as Intel CMOVE or PowerPC ISEL. Because one of
2322 the loads is speculative, the transformation is restricted to very
2323 specific cases to avoid introducing a page fault. We are looking for
2324 the common idiom:
2326 if (...)
2327 x = y->left;
2328 else
2329 x = y->right;
2331 where left and right are typically adjacent pointers in a tree structure. */
2333 namespace {
2335 const pass_data pass_data_phiopt =
2337 GIMPLE_PASS, /* type */
2338 "phiopt", /* name */
2339 OPTGROUP_NONE, /* optinfo_flags */
2340 TV_TREE_PHIOPT, /* tv_id */
2341 ( PROP_cfg | PROP_ssa ), /* properties_required */
2342 0, /* properties_provided */
2343 0, /* properties_destroyed */
2344 0, /* todo_flags_start */
2345 0, /* todo_flags_finish */
2348 class pass_phiopt : public gimple_opt_pass
2350 public:
2351 pass_phiopt (gcc::context *ctxt)
2352 : gimple_opt_pass (pass_data_phiopt, ctxt)
2355 /* opt_pass methods: */
2356 opt_pass * clone () { return new pass_phiopt (m_ctxt); }
2357 virtual bool gate (function *) { return flag_ssa_phiopt; }
2358 virtual unsigned int execute (function *)
2360 return tree_ssa_phiopt_worker (false, gate_hoist_loads ());
2363 }; // class pass_phiopt
2365 } // anon namespace
2367 gimple_opt_pass *
2368 make_pass_phiopt (gcc::context *ctxt)
2370 return new pass_phiopt (ctxt);
2373 namespace {
2375 const pass_data pass_data_cselim =
2377 GIMPLE_PASS, /* type */
2378 "cselim", /* name */
2379 OPTGROUP_NONE, /* optinfo_flags */
2380 TV_TREE_PHIOPT, /* tv_id */
2381 ( PROP_cfg | PROP_ssa ), /* properties_required */
2382 0, /* properties_provided */
2383 0, /* properties_destroyed */
2384 0, /* todo_flags_start */
2385 0, /* todo_flags_finish */
2388 class pass_cselim : public gimple_opt_pass
2390 public:
2391 pass_cselim (gcc::context *ctxt)
2392 : gimple_opt_pass (pass_data_cselim, ctxt)
2395 /* opt_pass methods: */
2396 virtual bool gate (function *) { return flag_tree_cselim; }
2397 virtual unsigned int execute (function *) { return tree_ssa_cs_elim (); }
2399 }; // class pass_cselim
2401 } // anon namespace
2403 gimple_opt_pass *
2404 make_pass_cselim (gcc::context *ctxt)
2406 return new pass_cselim (ctxt);