PR libstdc++/56278
[official-gcc.git] / gcc / tree-ssa-phiopt.c
blob61199437dbe4c40031a47bde7543e2c7d2a026b9
1 /* Optimization of PHI nodes by converting them into straightline code.
2 Copyright (C) 2004-2013 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "tm.h"
24 #include "ggc.h"
25 #include "tree.h"
26 #include "flags.h"
27 #include "tm_p.h"
28 #include "basic-block.h"
29 #include "tree-flow.h"
30 #include "tree-pass.h"
31 #include "langhooks.h"
32 #include "pointer-set.h"
33 #include "domwalk.h"
34 #include "cfgloop.h"
35 #include "tree-data-ref.h"
36 #include "gimple-pretty-print.h"
37 #include "insn-config.h"
38 #include "expr.h"
39 #include "optabs.h"
41 #ifndef HAVE_conditional_move
42 #define HAVE_conditional_move (0)
43 #endif
45 static unsigned int tree_ssa_phiopt (void);
46 static unsigned int tree_ssa_phiopt_worker (bool, bool);
47 static bool conditional_replacement (basic_block, basic_block,
48 edge, edge, gimple, tree, tree);
49 static int value_replacement (basic_block, basic_block,
50 edge, edge, gimple, tree, tree);
51 static bool minmax_replacement (basic_block, basic_block,
52 edge, edge, gimple, tree, tree);
53 static bool abs_replacement (basic_block, basic_block,
54 edge, edge, gimple, tree, tree);
55 static bool cond_store_replacement (basic_block, basic_block, edge, edge,
56 struct pointer_set_t *);
57 static bool cond_if_else_store_replacement (basic_block, basic_block, basic_block);
58 static struct pointer_set_t * get_non_trapping (void);
59 static void replace_phi_edge_with_variable (basic_block, edge, gimple, tree);
60 static void hoist_adjacent_loads (basic_block, basic_block,
61 basic_block, basic_block);
62 static bool gate_hoist_loads (void);
64 /* This pass tries to replaces an if-then-else block with an
65 assignment. We have four kinds of transformations. Some of these
66 transformations are also performed by the ifcvt RTL optimizer.
68 Conditional Replacement
69 -----------------------
71 This transformation, implemented in conditional_replacement,
72 replaces
74 bb0:
75 if (cond) goto bb2; else goto bb1;
76 bb1:
77 bb2:
78 x = PHI <0 (bb1), 1 (bb0), ...>;
80 with
82 bb0:
83 x' = cond;
84 goto bb2;
85 bb2:
86 x = PHI <x' (bb0), ...>;
88 We remove bb1 as it becomes unreachable. This occurs often due to
89 gimplification of conditionals.
91 Value Replacement
92 -----------------
94 This transformation, implemented in value_replacement, replaces
96 bb0:
97 if (a != b) goto bb2; else goto bb1;
98 bb1:
99 bb2:
100 x = PHI <a (bb1), b (bb0), ...>;
102 with
104 bb0:
105 bb2:
106 x = PHI <b (bb0), ...>;
108 This opportunity can sometimes occur as a result of other
109 optimizations.
111 ABS Replacement
112 ---------------
114 This transformation, implemented in abs_replacement, replaces
116 bb0:
117 if (a >= 0) goto bb2; else goto bb1;
118 bb1:
119 x = -a;
120 bb2:
121 x = PHI <x (bb1), a (bb0), ...>;
123 with
125 bb0:
126 x' = ABS_EXPR< a >;
127 bb2:
128 x = PHI <x' (bb0), ...>;
130 MIN/MAX Replacement
131 -------------------
133 This transformation, minmax_replacement replaces
135 bb0:
136 if (a <= b) goto bb2; else goto bb1;
137 bb1:
138 bb2:
139 x = PHI <b (bb1), a (bb0), ...>;
141 with
143 bb0:
144 x' = MIN_EXPR (a, b)
145 bb2:
146 x = PHI <x' (bb0), ...>;
148 A similar transformation is done for MAX_EXPR.
151 This pass also performs a fifth transformation of a slightly different
152 flavor.
154 Adjacent Load Hoisting
155 ----------------------
157 This transformation replaces
159 bb0:
160 if (...) goto bb2; else goto bb1;
161 bb1:
162 x1 = (<expr>).field1;
163 goto bb3;
164 bb2:
165 x2 = (<expr>).field2;
166 bb3:
167 # x = PHI <x1, x2>;
169 with
171 bb0:
172 x1 = (<expr>).field1;
173 x2 = (<expr>).field2;
174 if (...) goto bb2; else goto bb1;
175 bb1:
176 goto bb3;
177 bb2:
178 bb3:
179 # x = PHI <x1, x2>;
181 The purpose of this transformation is to enable generation of conditional
182 move instructions such as Intel CMOVE or PowerPC ISEL. Because one of
183 the loads is speculative, the transformation is restricted to very
184 specific cases to avoid introducing a page fault. We are looking for
185 the common idiom:
187 if (...)
188 x = y->left;
189 else
190 x = y->right;
192 where left and right are typically adjacent pointers in a tree structure. */
194 static unsigned int
195 tree_ssa_phiopt (void)
197 return tree_ssa_phiopt_worker (false, gate_hoist_loads ());
200 /* This pass tries to transform conditional stores into unconditional
201 ones, enabling further simplifications with the simpler then and else
202 blocks. In particular it replaces this:
204 bb0:
205 if (cond) goto bb2; else goto bb1;
206 bb1:
207 *p = RHS;
208 bb2:
210 with
212 bb0:
213 if (cond) goto bb1; else goto bb2;
214 bb1:
215 condtmp' = *p;
216 bb2:
217 condtmp = PHI <RHS, condtmp'>
218 *p = condtmp;
220 This transformation can only be done under several constraints,
221 documented below. It also replaces:
223 bb0:
224 if (cond) goto bb2; else goto bb1;
225 bb1:
226 *p = RHS1;
227 goto bb3;
228 bb2:
229 *p = RHS2;
230 bb3:
232 with
234 bb0:
235 if (cond) goto bb3; else goto bb1;
236 bb1:
237 bb3:
238 condtmp = PHI <RHS1, RHS2>
239 *p = condtmp; */
241 static unsigned int
242 tree_ssa_cs_elim (void)
244 return tree_ssa_phiopt_worker (true, false);
247 /* Return the singleton PHI in the SEQ of PHIs for edges E0 and E1. */
249 static gimple
250 single_non_singleton_phi_for_edges (gimple_seq seq, edge e0, edge e1)
252 gimple_stmt_iterator i;
253 gimple phi = NULL;
254 if (gimple_seq_singleton_p (seq))
255 return gsi_stmt (gsi_start (seq));
256 for (i = gsi_start (seq); !gsi_end_p (i); gsi_next (&i))
258 gimple p = gsi_stmt (i);
259 /* If the PHI arguments are equal then we can skip this PHI. */
260 if (operand_equal_for_phi_arg_p (gimple_phi_arg_def (p, e0->dest_idx),
261 gimple_phi_arg_def (p, e1->dest_idx)))
262 continue;
264 /* If we already have a PHI that has the two edge arguments are
265 different, then return it is not a singleton for these PHIs. */
266 if (phi)
267 return NULL;
269 phi = p;
271 return phi;
274 /* The core routine of conditional store replacement and normal
275 phi optimizations. Both share much of the infrastructure in how
276 to match applicable basic block patterns. DO_STORE_ELIM is true
277 when we want to do conditional store replacement, false otherwise.
278 DO_HOIST_LOADS is true when we want to hoist adjacent loads out
279 of diamond control flow patterns, false otherwise. */
280 static unsigned int
281 tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads)
283 basic_block bb;
284 basic_block *bb_order;
285 unsigned n, i;
286 bool cfgchanged = false;
287 struct pointer_set_t *nontrap = 0;
289 if (do_store_elim)
290 /* Calculate the set of non-trapping memory accesses. */
291 nontrap = get_non_trapping ();
293 /* Search every basic block for COND_EXPR we may be able to optimize.
295 We walk the blocks in order that guarantees that a block with
296 a single predecessor is processed before the predecessor.
297 This ensures that we collapse inner ifs before visiting the
298 outer ones, and also that we do not try to visit a removed
299 block. */
300 bb_order = blocks_in_phiopt_order ();
301 n = n_basic_blocks - NUM_FIXED_BLOCKS;
303 for (i = 0; i < n; i++)
305 gimple cond_stmt, phi;
306 basic_block bb1, bb2;
307 edge e1, e2;
308 tree arg0, arg1;
310 bb = bb_order[i];
312 cond_stmt = last_stmt (bb);
313 /* Check to see if the last statement is a GIMPLE_COND. */
314 if (!cond_stmt
315 || gimple_code (cond_stmt) != GIMPLE_COND)
316 continue;
318 e1 = EDGE_SUCC (bb, 0);
319 bb1 = e1->dest;
320 e2 = EDGE_SUCC (bb, 1);
321 bb2 = e2->dest;
323 /* We cannot do the optimization on abnormal edges. */
324 if ((e1->flags & EDGE_ABNORMAL) != 0
325 || (e2->flags & EDGE_ABNORMAL) != 0)
326 continue;
328 /* If either bb1's succ or bb2 or bb2's succ is non NULL. */
329 if (EDGE_COUNT (bb1->succs) == 0
330 || bb2 == NULL
331 || EDGE_COUNT (bb2->succs) == 0)
332 continue;
334 /* Find the bb which is the fall through to the other. */
335 if (EDGE_SUCC (bb1, 0)->dest == bb2)
337 else if (EDGE_SUCC (bb2, 0)->dest == bb1)
339 basic_block bb_tmp = bb1;
340 edge e_tmp = e1;
341 bb1 = bb2;
342 bb2 = bb_tmp;
343 e1 = e2;
344 e2 = e_tmp;
346 else if (do_store_elim
347 && EDGE_SUCC (bb1, 0)->dest == EDGE_SUCC (bb2, 0)->dest)
349 basic_block bb3 = EDGE_SUCC (bb1, 0)->dest;
351 if (!single_succ_p (bb1)
352 || (EDGE_SUCC (bb1, 0)->flags & EDGE_FALLTHRU) == 0
353 || !single_succ_p (bb2)
354 || (EDGE_SUCC (bb2, 0)->flags & EDGE_FALLTHRU) == 0
355 || EDGE_COUNT (bb3->preds) != 2)
356 continue;
357 if (cond_if_else_store_replacement (bb1, bb2, bb3))
358 cfgchanged = true;
359 continue;
361 else if (do_hoist_loads
362 && EDGE_SUCC (bb1, 0)->dest == EDGE_SUCC (bb2, 0)->dest)
364 basic_block bb3 = EDGE_SUCC (bb1, 0)->dest;
366 if (!FLOAT_TYPE_P (TREE_TYPE (gimple_cond_lhs (cond_stmt)))
367 && single_succ_p (bb1)
368 && single_succ_p (bb2)
369 && single_pred_p (bb1)
370 && single_pred_p (bb2)
371 && EDGE_COUNT (bb->succs) == 2
372 && EDGE_COUNT (bb3->preds) == 2
373 /* If one edge or the other is dominant, a conditional move
374 is likely to perform worse than the well-predicted branch. */
375 && !predictable_edge_p (EDGE_SUCC (bb, 0))
376 && !predictable_edge_p (EDGE_SUCC (bb, 1)))
377 hoist_adjacent_loads (bb, bb1, bb2, bb3);
378 continue;
380 else
381 continue;
383 e1 = EDGE_SUCC (bb1, 0);
385 /* Make sure that bb1 is just a fall through. */
386 if (!single_succ_p (bb1)
387 || (e1->flags & EDGE_FALLTHRU) == 0)
388 continue;
390 /* Also make sure that bb1 only have one predecessor and that it
391 is bb. */
392 if (!single_pred_p (bb1)
393 || single_pred (bb1) != bb)
394 continue;
396 if (do_store_elim)
398 /* bb1 is the middle block, bb2 the join block, bb the split block,
399 e1 the fallthrough edge from bb1 to bb2. We can't do the
400 optimization if the join block has more than two predecessors. */
401 if (EDGE_COUNT (bb2->preds) > 2)
402 continue;
403 if (cond_store_replacement (bb1, bb2, e1, e2, nontrap))
404 cfgchanged = true;
406 else
408 gimple_seq phis = phi_nodes (bb2);
409 gimple_stmt_iterator gsi;
410 bool candorest = true;
412 /* Value replacement can work with more than one PHI
413 so try that first. */
414 for (gsi = gsi_start (phis); !gsi_end_p (gsi); gsi_next (&gsi))
416 phi = gsi_stmt (gsi);
417 arg0 = gimple_phi_arg_def (phi, e1->dest_idx);
418 arg1 = gimple_phi_arg_def (phi, e2->dest_idx);
419 if (value_replacement (bb, bb1, e1, e2, phi, arg0, arg1) == 2)
421 candorest = false;
422 cfgchanged = true;
423 break;
427 if (!candorest)
428 continue;
430 phi = single_non_singleton_phi_for_edges (phis, e1, e2);
431 if (!phi)
432 continue;
434 arg0 = gimple_phi_arg_def (phi, e1->dest_idx);
435 arg1 = gimple_phi_arg_def (phi, e2->dest_idx);
437 /* Something is wrong if we cannot find the arguments in the PHI
438 node. */
439 gcc_assert (arg0 != NULL && arg1 != NULL);
441 /* Do the replacement of conditional if it can be done. */
442 if (conditional_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
443 cfgchanged = true;
444 else if (abs_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
445 cfgchanged = true;
446 else if (minmax_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
447 cfgchanged = true;
451 free (bb_order);
453 if (do_store_elim)
454 pointer_set_destroy (nontrap);
455 /* If the CFG has changed, we should cleanup the CFG. */
456 if (cfgchanged && do_store_elim)
458 /* In cond-store replacement we have added some loads on edges
459 and new VOPS (as we moved the store, and created a load). */
460 gsi_commit_edge_inserts ();
461 return TODO_cleanup_cfg | TODO_update_ssa_only_virtuals;
463 else if (cfgchanged)
464 return TODO_cleanup_cfg;
465 return 0;
468 /* Returns the list of basic blocks in the function in an order that guarantees
469 that if a block X has just a single predecessor Y, then Y is after X in the
470 ordering. */
472 basic_block *
473 blocks_in_phiopt_order (void)
475 basic_block x, y;
476 basic_block *order = XNEWVEC (basic_block, n_basic_blocks);
477 unsigned n = n_basic_blocks - NUM_FIXED_BLOCKS;
478 unsigned np, i;
479 sbitmap visited = sbitmap_alloc (last_basic_block);
481 #define MARK_VISITED(BB) (bitmap_set_bit (visited, (BB)->index))
482 #define VISITED_P(BB) (bitmap_bit_p (visited, (BB)->index))
484 bitmap_clear (visited);
486 MARK_VISITED (ENTRY_BLOCK_PTR);
487 FOR_EACH_BB (x)
489 if (VISITED_P (x))
490 continue;
492 /* Walk the predecessors of x as long as they have precisely one
493 predecessor and add them to the list, so that they get stored
494 after x. */
495 for (y = x, np = 1;
496 single_pred_p (y) && !VISITED_P (single_pred (y));
497 y = single_pred (y))
498 np++;
499 for (y = x, i = n - np;
500 single_pred_p (y) && !VISITED_P (single_pred (y));
501 y = single_pred (y), i++)
503 order[i] = y;
504 MARK_VISITED (y);
506 order[i] = y;
507 MARK_VISITED (y);
509 gcc_assert (i == n - 1);
510 n -= np;
513 sbitmap_free (visited);
514 gcc_assert (n == 0);
515 return order;
517 #undef MARK_VISITED
518 #undef VISITED_P
521 /* Replace PHI node element whose edge is E in block BB with variable NEW.
522 Remove the edge from COND_BLOCK which does not lead to BB (COND_BLOCK
523 is known to have two edges, one of which must reach BB). */
525 static void
526 replace_phi_edge_with_variable (basic_block cond_block,
527 edge e, gimple phi, tree new_tree)
529 basic_block bb = gimple_bb (phi);
530 basic_block block_to_remove;
531 gimple_stmt_iterator gsi;
533 /* Change the PHI argument to new. */
534 SET_USE (PHI_ARG_DEF_PTR (phi, e->dest_idx), new_tree);
536 /* Remove the empty basic block. */
537 if (EDGE_SUCC (cond_block, 0)->dest == bb)
539 EDGE_SUCC (cond_block, 0)->flags |= EDGE_FALLTHRU;
540 EDGE_SUCC (cond_block, 0)->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
541 EDGE_SUCC (cond_block, 0)->probability = REG_BR_PROB_BASE;
542 EDGE_SUCC (cond_block, 0)->count += EDGE_SUCC (cond_block, 1)->count;
544 block_to_remove = EDGE_SUCC (cond_block, 1)->dest;
546 else
548 EDGE_SUCC (cond_block, 1)->flags |= EDGE_FALLTHRU;
549 EDGE_SUCC (cond_block, 1)->flags
550 &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
551 EDGE_SUCC (cond_block, 1)->probability = REG_BR_PROB_BASE;
552 EDGE_SUCC (cond_block, 1)->count += EDGE_SUCC (cond_block, 0)->count;
554 block_to_remove = EDGE_SUCC (cond_block, 0)->dest;
556 delete_basic_block (block_to_remove);
558 /* Eliminate the COND_EXPR at the end of COND_BLOCK. */
559 gsi = gsi_last_bb (cond_block);
560 gsi_remove (&gsi, true);
562 if (dump_file && (dump_flags & TDF_DETAILS))
563 fprintf (dump_file,
564 "COND_EXPR in block %d and PHI in block %d converted to straightline code.\n",
565 cond_block->index,
566 bb->index);
569 /* The function conditional_replacement does the main work of doing the
570 conditional replacement. Return true if the replacement is done.
571 Otherwise return false.
572 BB is the basic block where the replacement is going to be done on. ARG0
573 is argument 0 from PHI. Likewise for ARG1. */
575 static bool
576 conditional_replacement (basic_block cond_bb, basic_block middle_bb,
577 edge e0, edge e1, gimple phi,
578 tree arg0, tree arg1)
580 tree result;
581 gimple stmt, new_stmt;
582 tree cond;
583 gimple_stmt_iterator gsi;
584 edge true_edge, false_edge;
585 tree new_var, new_var2;
586 bool neg;
588 /* FIXME: Gimplification of complex type is too hard for now. */
589 /* We aren't prepared to handle vectors either (and it is a question
590 if it would be worthwhile anyway). */
591 if (!(INTEGRAL_TYPE_P (TREE_TYPE (arg0))
592 || POINTER_TYPE_P (TREE_TYPE (arg0)))
593 || !(INTEGRAL_TYPE_P (TREE_TYPE (arg1))
594 || POINTER_TYPE_P (TREE_TYPE (arg1))))
595 return false;
597 /* The PHI arguments have the constants 0 and 1, or 0 and -1, then
598 convert it to the conditional. */
599 if ((integer_zerop (arg0) && integer_onep (arg1))
600 || (integer_zerop (arg1) && integer_onep (arg0)))
601 neg = false;
602 else if ((integer_zerop (arg0) && integer_all_onesp (arg1))
603 || (integer_zerop (arg1) && integer_all_onesp (arg0)))
604 neg = true;
605 else
606 return false;
608 if (!empty_block_p (middle_bb))
609 return false;
611 /* At this point we know we have a GIMPLE_COND with two successors.
612 One successor is BB, the other successor is an empty block which
613 falls through into BB.
615 There is a single PHI node at the join point (BB) and its arguments
616 are constants (0, 1) or (0, -1).
618 So, given the condition COND, and the two PHI arguments, we can
619 rewrite this PHI into non-branching code:
621 dest = (COND) or dest = COND'
623 We use the condition as-is if the argument associated with the
624 true edge has the value one or the argument associated with the
625 false edge as the value zero. Note that those conditions are not
626 the same since only one of the outgoing edges from the GIMPLE_COND
627 will directly reach BB and thus be associated with an argument. */
629 stmt = last_stmt (cond_bb);
630 result = PHI_RESULT (phi);
632 /* To handle special cases like floating point comparison, it is easier and
633 less error-prone to build a tree and gimplify it on the fly though it is
634 less efficient. */
635 cond = fold_build2_loc (gimple_location (stmt),
636 gimple_cond_code (stmt), boolean_type_node,
637 gimple_cond_lhs (stmt), gimple_cond_rhs (stmt));
639 /* We need to know which is the true edge and which is the false
640 edge so that we know when to invert the condition below. */
641 extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge);
642 if ((e0 == true_edge && integer_zerop (arg0))
643 || (e0 == false_edge && !integer_zerop (arg0))
644 || (e1 == true_edge && integer_zerop (arg1))
645 || (e1 == false_edge && !integer_zerop (arg1)))
646 cond = fold_build1_loc (gimple_location (stmt),
647 TRUTH_NOT_EXPR, TREE_TYPE (cond), cond);
649 if (neg)
651 cond = fold_convert_loc (gimple_location (stmt),
652 TREE_TYPE (result), cond);
653 cond = fold_build1_loc (gimple_location (stmt),
654 NEGATE_EXPR, TREE_TYPE (cond), cond);
657 /* Insert our new statements at the end of conditional block before the
658 COND_STMT. */
659 gsi = gsi_for_stmt (stmt);
660 new_var = force_gimple_operand_gsi (&gsi, cond, true, NULL, true,
661 GSI_SAME_STMT);
663 if (!useless_type_conversion_p (TREE_TYPE (result), TREE_TYPE (new_var)))
665 source_location locus_0, locus_1;
667 new_var2 = make_ssa_name (TREE_TYPE (result), NULL);
668 new_stmt = gimple_build_assign_with_ops (CONVERT_EXPR, new_var2,
669 new_var, NULL);
670 gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
671 new_var = new_var2;
673 /* Set the locus to the first argument, unless is doesn't have one. */
674 locus_0 = gimple_phi_arg_location (phi, 0);
675 locus_1 = gimple_phi_arg_location (phi, 1);
676 if (locus_0 == UNKNOWN_LOCATION)
677 locus_0 = locus_1;
678 gimple_set_location (new_stmt, locus_0);
681 replace_phi_edge_with_variable (cond_bb, e1, phi, new_var);
683 /* Note that we optimized this PHI. */
684 return true;
687 /* Update *ARG which is defined in STMT so that it contains the
688 computed value if that seems profitable. Return true if the
689 statement is made dead by that rewriting. */
691 static bool
692 jump_function_from_stmt (tree *arg, gimple stmt)
694 enum tree_code code = gimple_assign_rhs_code (stmt);
695 if (code == ADDR_EXPR)
697 /* For arg = &p->i transform it to p, if possible. */
698 tree rhs1 = gimple_assign_rhs1 (stmt);
699 HOST_WIDE_INT offset;
700 tree tem = get_addr_base_and_unit_offset (TREE_OPERAND (rhs1, 0),
701 &offset);
702 if (tem
703 && TREE_CODE (tem) == MEM_REF
704 && (mem_ref_offset (tem) + double_int::from_shwi (offset)).is_zero ())
706 *arg = TREE_OPERAND (tem, 0);
707 return true;
710 /* TODO: Much like IPA-CP jump-functions we want to handle constant
711 additions symbolically here, and we'd need to update the comparison
712 code that compares the arg + cst tuples in our caller. For now the
713 code above exactly handles the VEC_BASE pattern from vec.h. */
714 return false;
717 /* The function value_replacement does the main work of doing the value
718 replacement. Return non-zero if the replacement is done. Otherwise return
719 0. If we remove the middle basic block, return 2.
720 BB is the basic block where the replacement is going to be done on. ARG0
721 is argument 0 from the PHI. Likewise for ARG1. */
723 static int
724 value_replacement (basic_block cond_bb, basic_block middle_bb,
725 edge e0, edge e1, gimple phi,
726 tree arg0, tree arg1)
728 gimple_stmt_iterator gsi;
729 gimple cond;
730 edge true_edge, false_edge;
731 enum tree_code code;
732 bool emtpy_or_with_defined_p = true;
734 /* If the type says honor signed zeros we cannot do this
735 optimization. */
736 if (HONOR_SIGNED_ZEROS (TYPE_MODE (TREE_TYPE (arg1))))
737 return 0;
739 /* If there is a statement in MIDDLE_BB that defines one of the PHI
740 arguments, then adjust arg0 or arg1. */
741 gsi = gsi_after_labels (middle_bb);
742 if (!gsi_end_p (gsi) && is_gimple_debug (gsi_stmt (gsi)))
743 gsi_next_nondebug (&gsi);
744 while (!gsi_end_p (gsi))
746 gimple stmt = gsi_stmt (gsi);
747 tree lhs;
748 gsi_next_nondebug (&gsi);
749 if (!is_gimple_assign (stmt))
751 emtpy_or_with_defined_p = false;
752 continue;
754 /* Now try to adjust arg0 or arg1 according to the computation
755 in the statement. */
756 lhs = gimple_assign_lhs (stmt);
757 if (!(lhs == arg0
758 && jump_function_from_stmt (&arg0, stmt))
759 || (lhs == arg1
760 && jump_function_from_stmt (&arg1, stmt)))
761 emtpy_or_with_defined_p = false;
764 cond = last_stmt (cond_bb);
765 code = gimple_cond_code (cond);
767 /* This transformation is only valid for equality comparisons. */
768 if (code != NE_EXPR && code != EQ_EXPR)
769 return 0;
771 /* We need to know which is the true edge and which is the false
772 edge so that we know if have abs or negative abs. */
773 extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge);
775 /* At this point we know we have a COND_EXPR with two successors.
776 One successor is BB, the other successor is an empty block which
777 falls through into BB.
779 The condition for the COND_EXPR is known to be NE_EXPR or EQ_EXPR.
781 There is a single PHI node at the join point (BB) with two arguments.
783 We now need to verify that the two arguments in the PHI node match
784 the two arguments to the equality comparison. */
786 if ((operand_equal_for_phi_arg_p (arg0, gimple_cond_lhs (cond))
787 && operand_equal_for_phi_arg_p (arg1, gimple_cond_rhs (cond)))
788 || (operand_equal_for_phi_arg_p (arg1, gimple_cond_lhs (cond))
789 && operand_equal_for_phi_arg_p (arg0, gimple_cond_rhs (cond))))
791 edge e;
792 tree arg;
794 /* For NE_EXPR, we want to build an assignment result = arg where
795 arg is the PHI argument associated with the true edge. For
796 EQ_EXPR we want the PHI argument associated with the false edge. */
797 e = (code == NE_EXPR ? true_edge : false_edge);
799 /* Unfortunately, E may not reach BB (it may instead have gone to
800 OTHER_BLOCK). If that is the case, then we want the single outgoing
801 edge from OTHER_BLOCK which reaches BB and represents the desired
802 path from COND_BLOCK. */
803 if (e->dest == middle_bb)
804 e = single_succ_edge (e->dest);
806 /* Now we know the incoming edge to BB that has the argument for the
807 RHS of our new assignment statement. */
808 if (e0 == e)
809 arg = arg0;
810 else
811 arg = arg1;
813 /* If the middle basic block was empty or is defining the
814 PHI arguments and this is a single phi where the args are different
815 for the edges e0 and e1 then we can remove the middle basic block. */
816 if (emtpy_or_with_defined_p
817 && single_non_singleton_phi_for_edges (phi_nodes (gimple_bb (phi)),
818 e0, e1))
820 replace_phi_edge_with_variable (cond_bb, e1, phi, arg);
821 /* Note that we optimized this PHI. */
822 return 2;
824 else
826 /* Replace the PHI arguments with arg. */
827 SET_PHI_ARG_DEF (phi, e0->dest_idx, arg);
828 SET_PHI_ARG_DEF (phi, e1->dest_idx, arg);
829 if (dump_file && (dump_flags & TDF_DETAILS))
831 fprintf (dump_file, "PHI ");
832 print_generic_expr (dump_file, gimple_phi_result (phi), 0);
833 fprintf (dump_file, " reduced for COND_EXPR in block %d to ",
834 cond_bb->index);
835 print_generic_expr (dump_file, arg, 0);
836 fprintf (dump_file, ".\n");
838 return 1;
842 return 0;
845 /* The function minmax_replacement does the main work of doing the minmax
846 replacement. Return true if the replacement is done. Otherwise return
847 false.
848 BB is the basic block where the replacement is going to be done on. ARG0
849 is argument 0 from the PHI. Likewise for ARG1. */
851 static bool
852 minmax_replacement (basic_block cond_bb, basic_block middle_bb,
853 edge e0, edge e1, gimple phi,
854 tree arg0, tree arg1)
856 tree result, type;
857 gimple cond, new_stmt;
858 edge true_edge, false_edge;
859 enum tree_code cmp, minmax, ass_code;
860 tree smaller, larger, arg_true, arg_false;
861 gimple_stmt_iterator gsi, gsi_from;
863 type = TREE_TYPE (PHI_RESULT (phi));
865 /* The optimization may be unsafe due to NaNs. */
866 if (HONOR_NANS (TYPE_MODE (type)))
867 return false;
869 cond = last_stmt (cond_bb);
870 cmp = gimple_cond_code (cond);
872 /* This transformation is only valid for order comparisons. Record which
873 operand is smaller/larger if the result of the comparison is true. */
874 if (cmp == LT_EXPR || cmp == LE_EXPR)
876 smaller = gimple_cond_lhs (cond);
877 larger = gimple_cond_rhs (cond);
879 else if (cmp == GT_EXPR || cmp == GE_EXPR)
881 smaller = gimple_cond_rhs (cond);
882 larger = gimple_cond_lhs (cond);
884 else
885 return false;
887 /* We need to know which is the true edge and which is the false
888 edge so that we know if have abs or negative abs. */
889 extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge);
891 /* Forward the edges over the middle basic block. */
892 if (true_edge->dest == middle_bb)
893 true_edge = EDGE_SUCC (true_edge->dest, 0);
894 if (false_edge->dest == middle_bb)
895 false_edge = EDGE_SUCC (false_edge->dest, 0);
897 if (true_edge == e0)
899 gcc_assert (false_edge == e1);
900 arg_true = arg0;
901 arg_false = arg1;
903 else
905 gcc_assert (false_edge == e0);
906 gcc_assert (true_edge == e1);
907 arg_true = arg1;
908 arg_false = arg0;
911 if (empty_block_p (middle_bb))
913 if (operand_equal_for_phi_arg_p (arg_true, smaller)
914 && operand_equal_for_phi_arg_p (arg_false, larger))
916 /* Case
918 if (smaller < larger)
919 rslt = smaller;
920 else
921 rslt = larger; */
922 minmax = MIN_EXPR;
924 else if (operand_equal_for_phi_arg_p (arg_false, smaller)
925 && operand_equal_for_phi_arg_p (arg_true, larger))
926 minmax = MAX_EXPR;
927 else
928 return false;
930 else
932 /* Recognize the following case, assuming d <= u:
934 if (a <= u)
935 b = MAX (a, d);
936 x = PHI <b, u>
938 This is equivalent to
940 b = MAX (a, d);
941 x = MIN (b, u); */
943 gimple assign = last_and_only_stmt (middle_bb);
944 tree lhs, op0, op1, bound;
946 if (!assign
947 || gimple_code (assign) != GIMPLE_ASSIGN)
948 return false;
950 lhs = gimple_assign_lhs (assign);
951 ass_code = gimple_assign_rhs_code (assign);
952 if (ass_code != MAX_EXPR && ass_code != MIN_EXPR)
953 return false;
954 op0 = gimple_assign_rhs1 (assign);
955 op1 = gimple_assign_rhs2 (assign);
957 if (true_edge->src == middle_bb)
959 /* We got here if the condition is true, i.e., SMALLER < LARGER. */
960 if (!operand_equal_for_phi_arg_p (lhs, arg_true))
961 return false;
963 if (operand_equal_for_phi_arg_p (arg_false, larger))
965 /* Case
967 if (smaller < larger)
969 r' = MAX_EXPR (smaller, bound)
971 r = PHI <r', larger> --> to be turned to MIN_EXPR. */
972 if (ass_code != MAX_EXPR)
973 return false;
975 minmax = MIN_EXPR;
976 if (operand_equal_for_phi_arg_p (op0, smaller))
977 bound = op1;
978 else if (operand_equal_for_phi_arg_p (op1, smaller))
979 bound = op0;
980 else
981 return false;
983 /* We need BOUND <= LARGER. */
984 if (!integer_nonzerop (fold_build2 (LE_EXPR, boolean_type_node,
985 bound, larger)))
986 return false;
988 else if (operand_equal_for_phi_arg_p (arg_false, smaller))
990 /* Case
992 if (smaller < larger)
994 r' = MIN_EXPR (larger, bound)
996 r = PHI <r', smaller> --> to be turned to MAX_EXPR. */
997 if (ass_code != MIN_EXPR)
998 return false;
1000 minmax = MAX_EXPR;
1001 if (operand_equal_for_phi_arg_p (op0, larger))
1002 bound = op1;
1003 else if (operand_equal_for_phi_arg_p (op1, larger))
1004 bound = op0;
1005 else
1006 return false;
1008 /* We need BOUND >= SMALLER. */
1009 if (!integer_nonzerop (fold_build2 (GE_EXPR, boolean_type_node,
1010 bound, smaller)))
1011 return false;
1013 else
1014 return false;
1016 else
1018 /* We got here if the condition is false, i.e., SMALLER > LARGER. */
1019 if (!operand_equal_for_phi_arg_p (lhs, arg_false))
1020 return false;
1022 if (operand_equal_for_phi_arg_p (arg_true, larger))
1024 /* Case
1026 if (smaller > larger)
1028 r' = MIN_EXPR (smaller, bound)
1030 r = PHI <r', larger> --> to be turned to MAX_EXPR. */
1031 if (ass_code != MIN_EXPR)
1032 return false;
1034 minmax = MAX_EXPR;
1035 if (operand_equal_for_phi_arg_p (op0, smaller))
1036 bound = op1;
1037 else if (operand_equal_for_phi_arg_p (op1, smaller))
1038 bound = op0;
1039 else
1040 return false;
1042 /* We need BOUND >= LARGER. */
1043 if (!integer_nonzerop (fold_build2 (GE_EXPR, boolean_type_node,
1044 bound, larger)))
1045 return false;
1047 else if (operand_equal_for_phi_arg_p (arg_true, smaller))
1049 /* Case
1051 if (smaller > larger)
1053 r' = MAX_EXPR (larger, bound)
1055 r = PHI <r', smaller> --> to be turned to MIN_EXPR. */
1056 if (ass_code != MAX_EXPR)
1057 return false;
1059 minmax = MIN_EXPR;
1060 if (operand_equal_for_phi_arg_p (op0, larger))
1061 bound = op1;
1062 else if (operand_equal_for_phi_arg_p (op1, larger))
1063 bound = op0;
1064 else
1065 return false;
1067 /* We need BOUND <= SMALLER. */
1068 if (!integer_nonzerop (fold_build2 (LE_EXPR, boolean_type_node,
1069 bound, smaller)))
1070 return false;
1072 else
1073 return false;
1076 /* Move the statement from the middle block. */
1077 gsi = gsi_last_bb (cond_bb);
1078 gsi_from = gsi_last_nondebug_bb (middle_bb);
1079 gsi_move_before (&gsi_from, &gsi);
1082 /* Emit the statement to compute min/max. */
1083 result = duplicate_ssa_name (PHI_RESULT (phi), NULL);
1084 new_stmt = gimple_build_assign_with_ops (minmax, result, arg0, arg1);
1085 gsi = gsi_last_bb (cond_bb);
1086 gsi_insert_before (&gsi, new_stmt, GSI_NEW_STMT);
1088 replace_phi_edge_with_variable (cond_bb, e1, phi, result);
1089 return true;
1092 /* The function absolute_replacement does the main work of doing the absolute
1093 replacement. Return true if the replacement is done. Otherwise return
1094 false.
1095 bb is the basic block where the replacement is going to be done on. arg0
1096 is argument 0 from the phi. Likewise for arg1. */
1098 static bool
1099 abs_replacement (basic_block cond_bb, basic_block middle_bb,
1100 edge e0 ATTRIBUTE_UNUSED, edge e1,
1101 gimple phi, tree arg0, tree arg1)
1103 tree result;
1104 gimple new_stmt, cond;
1105 gimple_stmt_iterator gsi;
1106 edge true_edge, false_edge;
1107 gimple assign;
1108 edge e;
1109 tree rhs, lhs;
1110 bool negate;
1111 enum tree_code cond_code;
1113 /* If the type says honor signed zeros we cannot do this
1114 optimization. */
1115 if (HONOR_SIGNED_ZEROS (TYPE_MODE (TREE_TYPE (arg1))))
1116 return false;
1118 /* OTHER_BLOCK must have only one executable statement which must have the
1119 form arg0 = -arg1 or arg1 = -arg0. */
1121 assign = last_and_only_stmt (middle_bb);
1122 /* If we did not find the proper negation assignment, then we can not
1123 optimize. */
1124 if (assign == NULL)
1125 return false;
1127 /* If we got here, then we have found the only executable statement
1128 in OTHER_BLOCK. If it is anything other than arg = -arg1 or
1129 arg1 = -arg0, then we can not optimize. */
1130 if (gimple_code (assign) != GIMPLE_ASSIGN)
1131 return false;
1133 lhs = gimple_assign_lhs (assign);
1135 if (gimple_assign_rhs_code (assign) != NEGATE_EXPR)
1136 return false;
1138 rhs = gimple_assign_rhs1 (assign);
1140 /* The assignment has to be arg0 = -arg1 or arg1 = -arg0. */
1141 if (!(lhs == arg0 && rhs == arg1)
1142 && !(lhs == arg1 && rhs == arg0))
1143 return false;
1145 cond = last_stmt (cond_bb);
1146 result = PHI_RESULT (phi);
1148 /* Only relationals comparing arg[01] against zero are interesting. */
1149 cond_code = gimple_cond_code (cond);
1150 if (cond_code != GT_EXPR && cond_code != GE_EXPR
1151 && cond_code != LT_EXPR && cond_code != LE_EXPR)
1152 return false;
1154 /* Make sure the conditional is arg[01] OP y. */
1155 if (gimple_cond_lhs (cond) != rhs)
1156 return false;
1158 if (FLOAT_TYPE_P (TREE_TYPE (gimple_cond_rhs (cond)))
1159 ? real_zerop (gimple_cond_rhs (cond))
1160 : integer_zerop (gimple_cond_rhs (cond)))
1162 else
1163 return false;
1165 /* We need to know which is the true edge and which is the false
1166 edge so that we know if have abs or negative abs. */
1167 extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge);
1169 /* For GT_EXPR/GE_EXPR, if the true edge goes to OTHER_BLOCK, then we
1170 will need to negate the result. Similarly for LT_EXPR/LE_EXPR if
1171 the false edge goes to OTHER_BLOCK. */
1172 if (cond_code == GT_EXPR || cond_code == GE_EXPR)
1173 e = true_edge;
1174 else
1175 e = false_edge;
1177 if (e->dest == middle_bb)
1178 negate = true;
1179 else
1180 negate = false;
1182 result = duplicate_ssa_name (result, NULL);
1184 if (negate)
1185 lhs = make_ssa_name (TREE_TYPE (result), NULL);
1186 else
1187 lhs = result;
1189 /* Build the modify expression with abs expression. */
1190 new_stmt = gimple_build_assign_with_ops (ABS_EXPR, lhs, rhs, NULL);
1192 gsi = gsi_last_bb (cond_bb);
1193 gsi_insert_before (&gsi, new_stmt, GSI_NEW_STMT);
1195 if (negate)
1197 /* Get the right GSI. We want to insert after the recently
1198 added ABS_EXPR statement (which we know is the first statement
1199 in the block. */
1200 new_stmt = gimple_build_assign_with_ops (NEGATE_EXPR, result, lhs, NULL);
1202 gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT);
1205 replace_phi_edge_with_variable (cond_bb, e1, phi, result);
1207 /* Note that we optimized this PHI. */
1208 return true;
1211 /* Auxiliary functions to determine the set of memory accesses which
1212 can't trap because they are preceded by accesses to the same memory
1213 portion. We do that for MEM_REFs, so we only need to track
1214 the SSA_NAME of the pointer indirectly referenced. The algorithm
1215 simply is a walk over all instructions in dominator order. When
1216 we see an MEM_REF we determine if we've already seen a same
1217 ref anywhere up to the root of the dominator tree. If we do the
1218 current access can't trap. If we don't see any dominating access
1219 the current access might trap, but might also make later accesses
1220 non-trapping, so we remember it. We need to be careful with loads
1221 or stores, for instance a load might not trap, while a store would,
1222 so if we see a dominating read access this doesn't mean that a later
1223 write access would not trap. Hence we also need to differentiate the
1224 type of access(es) seen.
1226 ??? We currently are very conservative and assume that a load might
1227 trap even if a store doesn't (write-only memory). This probably is
1228 overly conservative. */
1230 /* A hash-table of SSA_NAMEs, and in which basic block an MEM_REF
1231 through it was seen, which would constitute a no-trap region for
1232 same accesses. */
1233 struct name_to_bb
1235 unsigned int ssa_name_ver;
1236 unsigned int phase;
1237 bool store;
1238 HOST_WIDE_INT offset, size;
1239 basic_block bb;
1242 /* The hash table for remembering what we've seen. */
1243 static htab_t seen_ssa_names;
1245 /* Used for quick clearing of the hash-table when we see calls.
1246 Hash entries with phase < nt_call_phase are invalid. */
1247 static unsigned int nt_call_phase;
1249 /* The set of MEM_REFs which can't trap. */
1250 static struct pointer_set_t *nontrap_set;
1252 /* The hash function. */
1253 static hashval_t
1254 name_to_bb_hash (const void *p)
1256 const struct name_to_bb *n = (const struct name_to_bb *) p;
1257 return n->ssa_name_ver ^ (((hashval_t) n->store) << 31)
1258 ^ (n->offset << 6) ^ (n->size << 3);
1261 /* The equality function of *P1 and *P2. */
1262 static int
1263 name_to_bb_eq (const void *p1, const void *p2)
1265 const struct name_to_bb *n1 = (const struct name_to_bb *)p1;
1266 const struct name_to_bb *n2 = (const struct name_to_bb *)p2;
1268 return n1->ssa_name_ver == n2->ssa_name_ver
1269 && n1->store == n2->store
1270 && n1->offset == n2->offset
1271 && n1->size == n2->size;
1274 /* We see the expression EXP in basic block BB. If it's an interesting
1275 expression (an MEM_REF through an SSA_NAME) possibly insert the
1276 expression into the set NONTRAP or the hash table of seen expressions.
1277 STORE is true if this expression is on the LHS, otherwise it's on
1278 the RHS. */
1279 static void
1280 add_or_mark_expr (basic_block bb, tree exp,
1281 struct pointer_set_t *nontrap, bool store)
1283 HOST_WIDE_INT size;
1285 if (TREE_CODE (exp) == MEM_REF
1286 && TREE_CODE (TREE_OPERAND (exp, 0)) == SSA_NAME
1287 && host_integerp (TREE_OPERAND (exp, 1), 0)
1288 && (size = int_size_in_bytes (TREE_TYPE (exp))) > 0)
1290 tree name = TREE_OPERAND (exp, 0);
1291 struct name_to_bb map;
1292 void **slot;
1293 struct name_to_bb *n2bb;
1294 basic_block found_bb = 0;
1296 /* Try to find the last seen MEM_REF through the same
1297 SSA_NAME, which can trap. */
1298 map.ssa_name_ver = SSA_NAME_VERSION (name);
1299 map.phase = 0;
1300 map.bb = 0;
1301 map.store = store;
1302 map.offset = tree_low_cst (TREE_OPERAND (exp, 1), 0);
1303 map.size = size;
1305 slot = htab_find_slot (seen_ssa_names, &map, INSERT);
1306 n2bb = (struct name_to_bb *) *slot;
1307 if (n2bb && n2bb->phase >= nt_call_phase)
1308 found_bb = n2bb->bb;
1310 /* If we've found a trapping MEM_REF, _and_ it dominates EXP
1311 (it's in a basic block on the path from us to the dominator root)
1312 then we can't trap. */
1313 if (found_bb && (((size_t)found_bb->aux) & 1) == 1)
1315 pointer_set_insert (nontrap, exp);
1317 else
1319 /* EXP might trap, so insert it into the hash table. */
1320 if (n2bb)
1322 n2bb->phase = nt_call_phase;
1323 n2bb->bb = bb;
1325 else
1327 n2bb = XNEW (struct name_to_bb);
1328 n2bb->ssa_name_ver = SSA_NAME_VERSION (name);
1329 n2bb->phase = nt_call_phase;
1330 n2bb->bb = bb;
1331 n2bb->store = store;
1332 n2bb->offset = map.offset;
1333 n2bb->size = size;
1334 *slot = n2bb;
1340 /* Return true when CALL is a call stmt that definitely doesn't
1341 free any memory or makes it unavailable otherwise. */
1342 static bool
1343 nonfreeing_call_p (gimple call)
1345 if (gimple_call_builtin_p (call, BUILT_IN_NORMAL)
1346 && gimple_call_flags (call) & ECF_LEAF)
1347 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (call)))
1349 /* Just in case these become ECF_LEAF in the future. */
1350 case BUILT_IN_FREE:
1351 case BUILT_IN_TM_FREE:
1352 case BUILT_IN_REALLOC:
1353 case BUILT_IN_STACK_RESTORE:
1354 return false;
1355 default:
1356 return true;
1359 return false;
1362 /* Called by walk_dominator_tree, when entering the block BB. */
1363 static void
1364 nt_init_block (struct dom_walk_data *data ATTRIBUTE_UNUSED, basic_block bb)
1366 edge e;
1367 edge_iterator ei;
1368 gimple_stmt_iterator gsi;
1370 /* If we haven't seen all our predecessors, clear the hash-table. */
1371 FOR_EACH_EDGE (e, ei, bb->preds)
1372 if ((((size_t)e->src->aux) & 2) == 0)
1374 nt_call_phase++;
1375 break;
1378 /* Mark this BB as being on the path to dominator root and as visited. */
1379 bb->aux = (void*)(1 | 2);
1381 /* And walk the statements in order. */
1382 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1384 gimple stmt = gsi_stmt (gsi);
1386 if (is_gimple_call (stmt) && !nonfreeing_call_p (stmt))
1387 nt_call_phase++;
1388 else if (gimple_assign_single_p (stmt) && !gimple_has_volatile_ops (stmt))
1390 add_or_mark_expr (bb, gimple_assign_lhs (stmt), nontrap_set, true);
1391 add_or_mark_expr (bb, gimple_assign_rhs1 (stmt), nontrap_set, false);
1396 /* Called by walk_dominator_tree, when basic block BB is exited. */
1397 static void
1398 nt_fini_block (struct dom_walk_data *data ATTRIBUTE_UNUSED, basic_block bb)
1400 /* This BB isn't on the path to dominator root anymore. */
1401 bb->aux = (void*)2;
1404 /* This is the entry point of gathering non trapping memory accesses.
1405 It will do a dominator walk over the whole function, and it will
1406 make use of the bb->aux pointers. It returns a set of trees
1407 (the MEM_REFs itself) which can't trap. */
1408 static struct pointer_set_t *
1409 get_non_trapping (void)
1411 struct pointer_set_t *nontrap;
1412 struct dom_walk_data walk_data;
1414 nt_call_phase = 0;
1415 nontrap = pointer_set_create ();
1416 seen_ssa_names = htab_create (128, name_to_bb_hash, name_to_bb_eq,
1417 free);
1418 /* We're going to do a dominator walk, so ensure that we have
1419 dominance information. */
1420 calculate_dominance_info (CDI_DOMINATORS);
1422 /* Setup callbacks for the generic dominator tree walker. */
1423 nontrap_set = nontrap;
1424 walk_data.dom_direction = CDI_DOMINATORS;
1425 walk_data.initialize_block_local_data = NULL;
1426 walk_data.before_dom_children = nt_init_block;
1427 walk_data.after_dom_children = nt_fini_block;
1428 walk_data.global_data = NULL;
1429 walk_data.block_local_data_size = 0;
1431 init_walk_dominator_tree (&walk_data);
1432 walk_dominator_tree (&walk_data, ENTRY_BLOCK_PTR);
1433 fini_walk_dominator_tree (&walk_data);
1434 htab_delete (seen_ssa_names);
1436 clear_aux_for_blocks ();
1437 return nontrap;
1440 /* Do the main work of conditional store replacement. We already know
1441 that the recognized pattern looks like so:
1443 split:
1444 if (cond) goto MIDDLE_BB; else goto JOIN_BB (edge E1)
1445 MIDDLE_BB:
1446 something
1447 fallthrough (edge E0)
1448 JOIN_BB:
1449 some more
1451 We check that MIDDLE_BB contains only one store, that that store
1452 doesn't trap (not via NOTRAP, but via checking if an access to the same
1453 memory location dominates us) and that the store has a "simple" RHS. */
1455 static bool
1456 cond_store_replacement (basic_block middle_bb, basic_block join_bb,
1457 edge e0, edge e1, struct pointer_set_t *nontrap)
1459 gimple assign = last_and_only_stmt (middle_bb);
1460 tree lhs, rhs, name, name2;
1461 gimple newphi, new_stmt;
1462 gimple_stmt_iterator gsi;
1463 source_location locus;
1465 /* Check if middle_bb contains of only one store. */
1466 if (!assign
1467 || !gimple_assign_single_p (assign)
1468 || gimple_has_volatile_ops (assign))
1469 return false;
1471 locus = gimple_location (assign);
1472 lhs = gimple_assign_lhs (assign);
1473 rhs = gimple_assign_rhs1 (assign);
1474 if (TREE_CODE (lhs) != MEM_REF
1475 || TREE_CODE (TREE_OPERAND (lhs, 0)) != SSA_NAME
1476 || !is_gimple_reg_type (TREE_TYPE (lhs)))
1477 return false;
1479 /* Prove that we can move the store down. We could also check
1480 TREE_THIS_NOTRAP here, but in that case we also could move stores,
1481 whose value is not available readily, which we want to avoid. */
1482 if (!pointer_set_contains (nontrap, lhs))
1483 return false;
1485 /* Now we've checked the constraints, so do the transformation:
1486 1) Remove the single store. */
1487 gsi = gsi_for_stmt (assign);
1488 unlink_stmt_vdef (assign);
1489 gsi_remove (&gsi, true);
1490 release_defs (assign);
1492 /* 2) Insert a load from the memory of the store to the temporary
1493 on the edge which did not contain the store. */
1494 lhs = unshare_expr (lhs);
1495 name = make_temp_ssa_name (TREE_TYPE (lhs), NULL, "cstore");
1496 new_stmt = gimple_build_assign (name, lhs);
1497 gimple_set_location (new_stmt, locus);
1498 gsi_insert_on_edge (e1, new_stmt);
1500 /* 3) Create a PHI node at the join block, with one argument
1501 holding the old RHS, and the other holding the temporary
1502 where we stored the old memory contents. */
1503 name2 = make_temp_ssa_name (TREE_TYPE (lhs), NULL, "cstore");
1504 newphi = create_phi_node (name2, join_bb);
1505 add_phi_arg (newphi, rhs, e0, locus);
1506 add_phi_arg (newphi, name, e1, locus);
1508 lhs = unshare_expr (lhs);
1509 new_stmt = gimple_build_assign (lhs, PHI_RESULT (newphi));
1511 /* 4) Insert that PHI node. */
1512 gsi = gsi_after_labels (join_bb);
1513 if (gsi_end_p (gsi))
1515 gsi = gsi_last_bb (join_bb);
1516 gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT);
1518 else
1519 gsi_insert_before (&gsi, new_stmt, GSI_NEW_STMT);
1521 return true;
1524 /* Do the main work of conditional store replacement. */
1526 static bool
1527 cond_if_else_store_replacement_1 (basic_block then_bb, basic_block else_bb,
1528 basic_block join_bb, gimple then_assign,
1529 gimple else_assign)
1531 tree lhs_base, lhs, then_rhs, else_rhs, name;
1532 source_location then_locus, else_locus;
1533 gimple_stmt_iterator gsi;
1534 gimple newphi, new_stmt;
1536 if (then_assign == NULL
1537 || !gimple_assign_single_p (then_assign)
1538 || gimple_clobber_p (then_assign)
1539 || gimple_has_volatile_ops (then_assign)
1540 || else_assign == NULL
1541 || !gimple_assign_single_p (else_assign)
1542 || gimple_clobber_p (else_assign)
1543 || gimple_has_volatile_ops (else_assign))
1544 return false;
1546 lhs = gimple_assign_lhs (then_assign);
1547 if (!is_gimple_reg_type (TREE_TYPE (lhs))
1548 || !operand_equal_p (lhs, gimple_assign_lhs (else_assign), 0))
1549 return false;
1551 lhs_base = get_base_address (lhs);
1552 if (lhs_base == NULL_TREE
1553 || (!DECL_P (lhs_base) && TREE_CODE (lhs_base) != MEM_REF))
1554 return false;
1556 then_rhs = gimple_assign_rhs1 (then_assign);
1557 else_rhs = gimple_assign_rhs1 (else_assign);
1558 then_locus = gimple_location (then_assign);
1559 else_locus = gimple_location (else_assign);
1561 /* Now we've checked the constraints, so do the transformation:
1562 1) Remove the stores. */
1563 gsi = gsi_for_stmt (then_assign);
1564 unlink_stmt_vdef (then_assign);
1565 gsi_remove (&gsi, true);
1566 release_defs (then_assign);
1568 gsi = gsi_for_stmt (else_assign);
1569 unlink_stmt_vdef (else_assign);
1570 gsi_remove (&gsi, true);
1571 release_defs (else_assign);
1573 /* 2) Create a PHI node at the join block, with one argument
1574 holding the old RHS, and the other holding the temporary
1575 where we stored the old memory contents. */
1576 name = make_temp_ssa_name (TREE_TYPE (lhs), NULL, "cstore");
1577 newphi = create_phi_node (name, join_bb);
1578 add_phi_arg (newphi, then_rhs, EDGE_SUCC (then_bb, 0), then_locus);
1579 add_phi_arg (newphi, else_rhs, EDGE_SUCC (else_bb, 0), else_locus);
1581 new_stmt = gimple_build_assign (lhs, PHI_RESULT (newphi));
1583 /* 3) Insert that PHI node. */
1584 gsi = gsi_after_labels (join_bb);
1585 if (gsi_end_p (gsi))
1587 gsi = gsi_last_bb (join_bb);
1588 gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT);
1590 else
1591 gsi_insert_before (&gsi, new_stmt, GSI_NEW_STMT);
1593 return true;
1596 /* Conditional store replacement. We already know
1597 that the recognized pattern looks like so:
1599 split:
1600 if (cond) goto THEN_BB; else goto ELSE_BB (edge E1)
1601 THEN_BB:
1603 X = Y;
1605 goto JOIN_BB;
1606 ELSE_BB:
1608 X = Z;
1610 fallthrough (edge E0)
1611 JOIN_BB:
1612 some more
1614 We check that it is safe to sink the store to JOIN_BB by verifying that
1615 there are no read-after-write or write-after-write dependencies in
1616 THEN_BB and ELSE_BB. */
1618 static bool
1619 cond_if_else_store_replacement (basic_block then_bb, basic_block else_bb,
1620 basic_block join_bb)
1622 gimple then_assign = last_and_only_stmt (then_bb);
1623 gimple else_assign = last_and_only_stmt (else_bb);
1624 vec<data_reference_p> then_datarefs, else_datarefs;
1625 vec<ddr_p> then_ddrs, else_ddrs;
1626 gimple then_store, else_store;
1627 bool found, ok = false, res;
1628 struct data_dependence_relation *ddr;
1629 data_reference_p then_dr, else_dr;
1630 int i, j;
1631 tree then_lhs, else_lhs;
1632 vec<gimple> then_stores, else_stores;
1633 basic_block blocks[3];
1635 if (MAX_STORES_TO_SINK == 0)
1636 return false;
1638 /* Handle the case with single statement in THEN_BB and ELSE_BB. */
1639 if (then_assign && else_assign)
1640 return cond_if_else_store_replacement_1 (then_bb, else_bb, join_bb,
1641 then_assign, else_assign);
1643 /* Find data references. */
1644 then_datarefs.create (1);
1645 else_datarefs.create (1);
1646 if ((find_data_references_in_bb (NULL, then_bb, &then_datarefs)
1647 == chrec_dont_know)
1648 || !then_datarefs.length ()
1649 || (find_data_references_in_bb (NULL, else_bb, &else_datarefs)
1650 == chrec_dont_know)
1651 || !else_datarefs.length ())
1653 free_data_refs (then_datarefs);
1654 free_data_refs (else_datarefs);
1655 return false;
1658 /* Find pairs of stores with equal LHS. */
1659 then_stores.create (1);
1660 else_stores.create (1);
1661 FOR_EACH_VEC_ELT (then_datarefs, i, then_dr)
1663 if (DR_IS_READ (then_dr))
1664 continue;
1666 then_store = DR_STMT (then_dr);
1667 then_lhs = gimple_get_lhs (then_store);
1668 found = false;
1670 FOR_EACH_VEC_ELT (else_datarefs, j, else_dr)
1672 if (DR_IS_READ (else_dr))
1673 continue;
1675 else_store = DR_STMT (else_dr);
1676 else_lhs = gimple_get_lhs (else_store);
1678 if (operand_equal_p (then_lhs, else_lhs, 0))
1680 found = true;
1681 break;
1685 if (!found)
1686 continue;
1688 then_stores.safe_push (then_store);
1689 else_stores.safe_push (else_store);
1692 /* No pairs of stores found. */
1693 if (!then_stores.length ()
1694 || then_stores.length () > (unsigned) MAX_STORES_TO_SINK)
1696 free_data_refs (then_datarefs);
1697 free_data_refs (else_datarefs);
1698 then_stores.release ();
1699 else_stores.release ();
1700 return false;
1703 /* Compute and check data dependencies in both basic blocks. */
1704 then_ddrs.create (1);
1705 else_ddrs.create (1);
1706 if (!compute_all_dependences (then_datarefs, &then_ddrs,
1707 vNULL, false)
1708 || !compute_all_dependences (else_datarefs, &else_ddrs,
1709 vNULL, false))
1711 free_dependence_relations (then_ddrs);
1712 free_dependence_relations (else_ddrs);
1713 free_data_refs (then_datarefs);
1714 free_data_refs (else_datarefs);
1715 then_stores.release ();
1716 else_stores.release ();
1717 return false;
1719 blocks[0] = then_bb;
1720 blocks[1] = else_bb;
1721 blocks[2] = join_bb;
1722 renumber_gimple_stmt_uids_in_blocks (blocks, 3);
1724 /* Check that there are no read-after-write or write-after-write dependencies
1725 in THEN_BB. */
1726 FOR_EACH_VEC_ELT (then_ddrs, i, ddr)
1728 struct data_reference *dra = DDR_A (ddr);
1729 struct data_reference *drb = DDR_B (ddr);
1731 if (DDR_ARE_DEPENDENT (ddr) != chrec_known
1732 && ((DR_IS_READ (dra) && DR_IS_WRITE (drb)
1733 && gimple_uid (DR_STMT (dra)) > gimple_uid (DR_STMT (drb)))
1734 || (DR_IS_READ (drb) && DR_IS_WRITE (dra)
1735 && gimple_uid (DR_STMT (drb)) > gimple_uid (DR_STMT (dra)))
1736 || (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))))
1738 free_dependence_relations (then_ddrs);
1739 free_dependence_relations (else_ddrs);
1740 free_data_refs (then_datarefs);
1741 free_data_refs (else_datarefs);
1742 then_stores.release ();
1743 else_stores.release ();
1744 return false;
1748 /* Check that there are no read-after-write or write-after-write dependencies
1749 in ELSE_BB. */
1750 FOR_EACH_VEC_ELT (else_ddrs, i, ddr)
1752 struct data_reference *dra = DDR_A (ddr);
1753 struct data_reference *drb = DDR_B (ddr);
1755 if (DDR_ARE_DEPENDENT (ddr) != chrec_known
1756 && ((DR_IS_READ (dra) && DR_IS_WRITE (drb)
1757 && gimple_uid (DR_STMT (dra)) > gimple_uid (DR_STMT (drb)))
1758 || (DR_IS_READ (drb) && DR_IS_WRITE (dra)
1759 && gimple_uid (DR_STMT (drb)) > gimple_uid (DR_STMT (dra)))
1760 || (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))))
1762 free_dependence_relations (then_ddrs);
1763 free_dependence_relations (else_ddrs);
1764 free_data_refs (then_datarefs);
1765 free_data_refs (else_datarefs);
1766 then_stores.release ();
1767 else_stores.release ();
1768 return false;
1772 /* Sink stores with same LHS. */
1773 FOR_EACH_VEC_ELT (then_stores, i, then_store)
1775 else_store = else_stores[i];
1776 res = cond_if_else_store_replacement_1 (then_bb, else_bb, join_bb,
1777 then_store, else_store);
1778 ok = ok || res;
1781 free_dependence_relations (then_ddrs);
1782 free_dependence_relations (else_ddrs);
1783 free_data_refs (then_datarefs);
1784 free_data_refs (else_datarefs);
1785 then_stores.release ();
1786 else_stores.release ();
1788 return ok;
1791 /* Return TRUE if STMT has a VUSE whose corresponding VDEF is in BB. */
1793 static bool
1794 local_mem_dependence (gimple stmt, basic_block bb)
1796 tree vuse = gimple_vuse (stmt);
1797 gimple def;
1799 if (!vuse)
1800 return false;
1802 def = SSA_NAME_DEF_STMT (vuse);
1803 return (def && gimple_bb (def) == bb);
1806 /* Given a "diamond" control-flow pattern where BB0 tests a condition,
1807 BB1 and BB2 are "then" and "else" blocks dependent on this test,
1808 and BB3 rejoins control flow following BB1 and BB2, look for
1809 opportunities to hoist loads as follows. If BB3 contains a PHI of
1810 two loads, one each occurring in BB1 and BB2, and the loads are
1811 provably of adjacent fields in the same structure, then move both
1812 loads into BB0. Of course this can only be done if there are no
1813 dependencies preventing such motion.
1815 One of the hoisted loads will always be speculative, so the
1816 transformation is currently conservative:
1818 - The fields must be strictly adjacent.
1819 - The two fields must occupy a single memory block that is
1820 guaranteed to not cross a page boundary.
1822 The last is difficult to prove, as such memory blocks should be
1823 aligned on the minimum of the stack alignment boundary and the
1824 alignment guaranteed by heap allocation interfaces. Thus we rely
1825 on a parameter for the alignment value.
1827 Provided a good value is used for the last case, the first
1828 restriction could possibly be relaxed. */
1830 static void
1831 hoist_adjacent_loads (basic_block bb0, basic_block bb1,
1832 basic_block bb2, basic_block bb3)
1834 int param_align = PARAM_VALUE (PARAM_L1_CACHE_LINE_SIZE);
1835 unsigned param_align_bits = (unsigned) (param_align * BITS_PER_UNIT);
1836 gimple_stmt_iterator gsi;
1838 /* Walk the phis in bb3 looking for an opportunity. We are looking
1839 for phis of two SSA names, one each of which is defined in bb1 and
1840 bb2. */
1841 for (gsi = gsi_start_phis (bb3); !gsi_end_p (gsi); gsi_next (&gsi))
1843 gimple phi_stmt = gsi_stmt (gsi);
1844 gimple def1, def2, defswap;
1845 tree arg1, arg2, ref1, ref2, field1, field2, fieldswap;
1846 tree tree_offset1, tree_offset2, tree_size2, next;
1847 int offset1, offset2, size2;
1848 unsigned align1;
1849 gimple_stmt_iterator gsi2;
1850 basic_block bb_for_def1, bb_for_def2;
1852 if (gimple_phi_num_args (phi_stmt) != 2
1853 || virtual_operand_p (gimple_phi_result (phi_stmt)))
1854 continue;
1856 arg1 = gimple_phi_arg_def (phi_stmt, 0);
1857 arg2 = gimple_phi_arg_def (phi_stmt, 1);
1859 if (TREE_CODE (arg1) != SSA_NAME
1860 || TREE_CODE (arg2) != SSA_NAME
1861 || SSA_NAME_IS_DEFAULT_DEF (arg1)
1862 || SSA_NAME_IS_DEFAULT_DEF (arg2))
1863 continue;
1865 def1 = SSA_NAME_DEF_STMT (arg1);
1866 def2 = SSA_NAME_DEF_STMT (arg2);
1868 if ((gimple_bb (def1) != bb1 || gimple_bb (def2) != bb2)
1869 && (gimple_bb (def2) != bb1 || gimple_bb (def1) != bb2))
1870 continue;
1872 /* Check the mode of the arguments to be sure a conditional move
1873 can be generated for it. */
1874 if (optab_handler (movcc_optab, TYPE_MODE (TREE_TYPE (arg1)))
1875 == CODE_FOR_nothing)
1876 continue;
1878 /* Both statements must be assignments whose RHS is a COMPONENT_REF. */
1879 if (!gimple_assign_single_p (def1)
1880 || !gimple_assign_single_p (def2)
1881 || gimple_has_volatile_ops (def1)
1882 || gimple_has_volatile_ops (def2))
1883 continue;
1885 ref1 = gimple_assign_rhs1 (def1);
1886 ref2 = gimple_assign_rhs1 (def2);
1888 if (TREE_CODE (ref1) != COMPONENT_REF
1889 || TREE_CODE (ref2) != COMPONENT_REF)
1890 continue;
1892 /* The zeroth operand of the two component references must be
1893 identical. It is not sufficient to compare get_base_address of
1894 the two references, because this could allow for different
1895 elements of the same array in the two trees. It is not safe to
1896 assume that the existence of one array element implies the
1897 existence of a different one. */
1898 if (!operand_equal_p (TREE_OPERAND (ref1, 0), TREE_OPERAND (ref2, 0), 0))
1899 continue;
1901 field1 = TREE_OPERAND (ref1, 1);
1902 field2 = TREE_OPERAND (ref2, 1);
1904 /* Check for field adjacency, and ensure field1 comes first. */
1905 for (next = DECL_CHAIN (field1);
1906 next && TREE_CODE (next) != FIELD_DECL;
1907 next = DECL_CHAIN (next))
1910 if (next != field2)
1912 for (next = DECL_CHAIN (field2);
1913 next && TREE_CODE (next) != FIELD_DECL;
1914 next = DECL_CHAIN (next))
1917 if (next != field1)
1918 continue;
1920 fieldswap = field1;
1921 field1 = field2;
1922 field2 = fieldswap;
1923 defswap = def1;
1924 def1 = def2;
1925 def2 = defswap;
1928 bb_for_def1 = gimple_bb (def1);
1929 bb_for_def2 = gimple_bb (def2);
1931 /* Check for proper alignment of the first field. */
1932 tree_offset1 = bit_position (field1);
1933 tree_offset2 = bit_position (field2);
1934 tree_size2 = DECL_SIZE (field2);
1936 if (!host_integerp (tree_offset1, 1)
1937 || !host_integerp (tree_offset2, 1)
1938 || !host_integerp (tree_size2, 1))
1939 continue;
1941 offset1 = TREE_INT_CST_LOW (tree_offset1);
1942 offset2 = TREE_INT_CST_LOW (tree_offset2);
1943 size2 = TREE_INT_CST_LOW (tree_size2);
1944 align1 = DECL_ALIGN (field1) % param_align_bits;
1946 if (offset1 % BITS_PER_UNIT != 0)
1947 continue;
1949 /* For profitability, the two field references should fit within
1950 a single cache line. */
1951 if (align1 + offset2 - offset1 + size2 > param_align_bits)
1952 continue;
1954 /* The two expressions cannot be dependent upon vdefs defined
1955 in bb1/bb2. */
1956 if (local_mem_dependence (def1, bb_for_def1)
1957 || local_mem_dependence (def2, bb_for_def2))
1958 continue;
1960 /* The conditions are satisfied; hoist the loads from bb1 and bb2 into
1961 bb0. We hoist the first one first so that a cache miss is handled
1962 efficiently regardless of hardware cache-fill policy. */
1963 gsi2 = gsi_for_stmt (def1);
1964 gsi_move_to_bb_end (&gsi2, bb0);
1965 gsi2 = gsi_for_stmt (def2);
1966 gsi_move_to_bb_end (&gsi2, bb0);
1968 if (dump_file && (dump_flags & TDF_DETAILS))
1970 fprintf (dump_file,
1971 "\nHoisting adjacent loads from %d and %d into %d: \n",
1972 bb_for_def1->index, bb_for_def2->index, bb0->index);
1973 print_gimple_stmt (dump_file, def1, 0, TDF_VOPS|TDF_MEMSYMS);
1974 print_gimple_stmt (dump_file, def2, 0, TDF_VOPS|TDF_MEMSYMS);
1979 /* Determine whether we should attempt to hoist adjacent loads out of
1980 diamond patterns in pass_phiopt. Always hoist loads if
1981 -fhoist-adjacent-loads is specified and the target machine has
1982 both a conditional move instruction and a defined cache line size. */
1984 static bool
1985 gate_hoist_loads (void)
1987 return (flag_hoist_adjacent_loads == 1
1988 && PARAM_VALUE (PARAM_L1_CACHE_LINE_SIZE)
1989 && HAVE_conditional_move);
1992 /* Always do these optimizations if we have SSA
1993 trees to work on. */
1994 static bool
1995 gate_phiopt (void)
1997 return 1;
2000 struct gimple_opt_pass pass_phiopt =
2003 GIMPLE_PASS,
2004 "phiopt", /* name */
2005 OPTGROUP_NONE, /* optinfo_flags */
2006 gate_phiopt, /* gate */
2007 tree_ssa_phiopt, /* execute */
2008 NULL, /* sub */
2009 NULL, /* next */
2010 0, /* static_pass_number */
2011 TV_TREE_PHIOPT, /* tv_id */
2012 PROP_cfg | PROP_ssa, /* properties_required */
2013 0, /* properties_provided */
2014 0, /* properties_destroyed */
2015 0, /* todo_flags_start */
2016 TODO_ggc_collect
2017 | TODO_verify_ssa
2018 | TODO_verify_flow
2019 | TODO_verify_stmts /* todo_flags_finish */
2023 static bool
2024 gate_cselim (void)
2026 return flag_tree_cselim;
2029 struct gimple_opt_pass pass_cselim =
2032 GIMPLE_PASS,
2033 "cselim", /* name */
2034 OPTGROUP_NONE, /* optinfo_flags */
2035 gate_cselim, /* gate */
2036 tree_ssa_cs_elim, /* execute */
2037 NULL, /* sub */
2038 NULL, /* next */
2039 0, /* static_pass_number */
2040 TV_TREE_PHIOPT, /* tv_id */
2041 PROP_cfg | PROP_ssa, /* properties_required */
2042 0, /* properties_provided */
2043 0, /* properties_destroyed */
2044 0, /* todo_flags_start */
2045 TODO_ggc_collect
2046 | TODO_verify_ssa
2047 | TODO_verify_flow
2048 | TODO_verify_stmts /* todo_flags_finish */