1 /* Global, SSA-based optimizations using mathematical identities.
2 Copyright (C) 2005-2022 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* Currently, the only mini-pass in this file tries to CSE reciprocal
21 operations. These are common in sequences such as this one:
23 modulus = sqrt(x*x + y*y + z*z);
28 that can be optimized to
30 modulus = sqrt(x*x + y*y + z*z);
31 rmodulus = 1.0 / modulus;
36 We do this for loop invariant divisors, and with this pass whenever
37 we notice that a division has the same divisor multiple times.
39 Of course, like in PRE, we don't insert a division if a dominator
40 already has one. However, this cannot be done as an extension of
41 PRE for several reasons.
43 First of all, with some experiments it was found out that the
44 transformation is not always useful if there are only two divisions
45 by the same divisor. This is probably because modern processors
46 can pipeline the divisions; on older, in-order processors it should
47 still be effective to optimize two divisions by the same number.
48 We make this a param, and it shall be called N in the remainder of
51 Second, if trapping math is active, we have less freedom on where
52 to insert divisions: we can only do so in basic blocks that already
53 contain one. (If divisions don't trap, instead, we can insert
54 divisions elsewhere, which will be in blocks that are common dominators
55 of those that have the division).
57 We really don't want to compute the reciprocal unless a division will
58 be found. To do this, we won't insert the division in a basic block
59 that has less than N divisions *post-dominating* it.
61 The algorithm constructs a subset of the dominator tree, holding the
62 blocks containing the divisions and the common dominators to them,
63 and walk it twice. The first walk is in post-order, and it annotates
64 each block with the number of divisions that post-dominate it: this
65 gives information on where divisions can be inserted profitably.
66 The second walk is in pre-order, and it inserts divisions as explained
67 above, and replaces divisions by multiplications.
69 In the best case, the cost of the pass is O(n_statements). In the
70 worst-case, the cost is due to creating the dominator tree subset,
71 with a cost of O(n_basic_blocks ^ 2); however this can only happen
72 for n_statements / n_basic_blocks statements. So, the amortized cost
73 of creating the dominator tree subset is O(n_basic_blocks) and the
74 worst-case cost of the pass is O(n_statements * n_basic_blocks).
76 More practically, the cost will be small because there are few
77 divisions, and they tend to be in the same basic block, so insert_bb
78 is called very few times.
80 If we did this using domwalk.cc, an efficient implementation would have
81 to work on all the variables in a single pass, because we could not
82 work on just a subset of the dominator tree, as we do now, and the
83 cost would also be something like O(n_statements * n_basic_blocks).
84 The data structures would be more complex in order to work on all the
85 variables in a single pass. */
89 #include "coretypes.h"
96 #include "alloc-pool.h"
97 #include "tree-pass.h"
99 #include "optabs-tree.h"
100 #include "gimple-pretty-print.h"
102 #include "fold-const.h"
103 #include "gimple-fold.h"
104 #include "gimple-iterator.h"
105 #include "gimplify.h"
106 #include "gimplify-me.h"
107 #include "stor-layout.h"
108 #include "tree-cfg.h"
109 #include "tree-dfa.h"
110 #include "tree-ssa.h"
111 #include "builtins.h"
112 #include "internal-fn.h"
113 #include "case-cfn-macros.h"
114 #include "optabs-libfuncs.h"
116 #include "targhooks.h"
118 #include "tree-ssa-math-opts.h"
120 /* This structure represents one basic block that either computes a
121 division, or is a common dominator for basic block that compute a
124 /* The basic block represented by this structure. */
125 basic_block bb
= basic_block();
127 /* If non-NULL, the SSA_NAME holding the definition for a reciprocal
129 tree recip_def
= tree();
131 /* If non-NULL, the SSA_NAME holding the definition for a squared
132 reciprocal inserted in BB. */
133 tree square_recip_def
= tree();
135 /* If non-NULL, the GIMPLE_ASSIGN for a reciprocal computation that
136 was inserted in BB. */
137 gimple
*recip_def_stmt
= nullptr;
139 /* Pointer to a list of "struct occurrence"s for blocks dominated
141 struct occurrence
*children
= nullptr;
143 /* Pointer to the next "struct occurrence"s in the list of blocks
144 sharing a common dominator. */
145 struct occurrence
*next
= nullptr;
147 /* The number of divisions that are in BB before compute_merit. The
148 number of divisions that are in BB or post-dominate it after
150 int num_divisions
= 0;
152 /* True if the basic block has a division, false if it is a common
153 dominator for basic blocks that do. If it is false and trapping
154 math is active, BB is not a candidate for inserting a reciprocal. */
155 bool bb_has_division
= false;
157 /* Construct a struct occurrence for basic block BB, and whose
158 children list is headed by CHILDREN. */
159 occurrence (basic_block bb
, struct occurrence
*children
)
160 : bb (bb
), children (children
)
165 /* Destroy a struct occurrence and remove it from its basic block. */
171 /* Allocate memory for a struct occurrence from OCC_POOL. */
172 static void* operator new (size_t);
174 /* Return memory for a struct occurrence to OCC_POOL. */
175 static void operator delete (void*, size_t);
180 /* Number of 1.0/X ops inserted. */
183 /* Number of 1.0/FUNC ops inserted. */
189 /* Number of cexpi calls inserted. */
192 /* Number of conversions removed. */
199 /* Number of widening multiplication ops inserted. */
200 int widen_mults_inserted
;
202 /* Number of integer multiply-and-accumulate ops inserted. */
205 /* Number of fp fused multiply-add ops inserted. */
208 /* Number of divmod calls inserted. */
209 int divmod_calls_inserted
;
211 /* Number of highpart multiplication ops inserted. */
212 int highpart_mults_inserted
;
215 /* The instance of "struct occurrence" representing the highest
216 interesting block in the dominator tree. */
217 static struct occurrence
*occ_head
;
219 /* Allocation pool for getting instances of "struct occurrence". */
220 static object_allocator
<occurrence
> *occ_pool
;
222 void* occurrence::operator new (size_t n
)
224 gcc_assert (n
== sizeof(occurrence
));
225 return occ_pool
->allocate_raw ();
228 void occurrence::operator delete (void *occ
, size_t n
)
230 gcc_assert (n
== sizeof(occurrence
));
231 occ_pool
->remove_raw (occ
);
234 /* Insert NEW_OCC into our subset of the dominator tree. P_HEAD points to a
235 list of "struct occurrence"s, one per basic block, having IDOM as
236 their common dominator.
238 We try to insert NEW_OCC as deep as possible in the tree, and we also
239 insert any other block that is a common dominator for BB and one
240 block already in the tree. */
243 insert_bb (struct occurrence
*new_occ
, basic_block idom
,
244 struct occurrence
**p_head
)
246 struct occurrence
*occ
, **p_occ
;
248 for (p_occ
= p_head
; (occ
= *p_occ
) != NULL
; )
250 basic_block bb
= new_occ
->bb
, occ_bb
= occ
->bb
;
251 basic_block dom
= nearest_common_dominator (CDI_DOMINATORS
, occ_bb
, bb
);
254 /* BB dominates OCC_BB. OCC becomes NEW_OCC's child: remove OCC
257 occ
->next
= new_occ
->children
;
258 new_occ
->children
= occ
;
260 /* Try the next block (it may as well be dominated by BB). */
263 else if (dom
== occ_bb
)
265 /* OCC_BB dominates BB. Tail recurse to look deeper. */
266 insert_bb (new_occ
, dom
, &occ
->children
);
270 else if (dom
!= idom
)
272 gcc_assert (!dom
->aux
);
274 /* There is a dominator between IDOM and BB, add it and make
275 two children out of NEW_OCC and OCC. First, remove OCC from
281 /* None of the previous blocks has DOM as a dominator: if we tail
282 recursed, we would reexamine them uselessly. Just switch BB with
283 DOM, and go on looking for blocks dominated by DOM. */
284 new_occ
= new occurrence (dom
, new_occ
);
289 /* Nothing special, go on with the next element. */
294 /* No place was found as a child of IDOM. Make BB a sibling of IDOM. */
295 new_occ
->next
= *p_head
;
299 /* Register that we found a division in BB.
300 IMPORTANCE is a measure of how much weighting to give
301 that division. Use IMPORTANCE = 2 to register a single
302 division. If the division is going to be found multiple
303 times use 1 (as it is with squares). */
306 register_division_in (basic_block bb
, int importance
)
308 struct occurrence
*occ
;
310 occ
= (struct occurrence
*) bb
->aux
;
313 occ
= new occurrence (bb
, NULL
);
314 insert_bb (occ
, ENTRY_BLOCK_PTR_FOR_FN (cfun
), &occ_head
);
317 occ
->bb_has_division
= true;
318 occ
->num_divisions
+= importance
;
322 /* Compute the number of divisions that postdominate each block in OCC and
326 compute_merit (struct occurrence
*occ
)
328 struct occurrence
*occ_child
;
329 basic_block dom
= occ
->bb
;
331 for (occ_child
= occ
->children
; occ_child
; occ_child
= occ_child
->next
)
334 if (occ_child
->children
)
335 compute_merit (occ_child
);
338 bb
= single_noncomplex_succ (dom
);
342 if (dominated_by_p (CDI_POST_DOMINATORS
, bb
, occ_child
->bb
))
343 occ
->num_divisions
+= occ_child
->num_divisions
;
348 /* Return whether USE_STMT is a floating-point division by DEF. */
350 is_division_by (gimple
*use_stmt
, tree def
)
352 return is_gimple_assign (use_stmt
)
353 && gimple_assign_rhs_code (use_stmt
) == RDIV_EXPR
354 && gimple_assign_rhs2 (use_stmt
) == def
355 /* Do not recognize x / x as valid division, as we are getting
356 confused later by replacing all immediate uses x in such
358 && gimple_assign_rhs1 (use_stmt
) != def
359 && !stmt_can_throw_internal (cfun
, use_stmt
);
362 /* Return TRUE if USE_STMT is a multiplication of DEF by A. */
364 is_mult_by (gimple
*use_stmt
, tree def
, tree a
)
366 if (gimple_code (use_stmt
) == GIMPLE_ASSIGN
367 && gimple_assign_rhs_code (use_stmt
) == MULT_EXPR
)
369 tree op0
= gimple_assign_rhs1 (use_stmt
);
370 tree op1
= gimple_assign_rhs2 (use_stmt
);
372 return (op0
== def
&& op1
== a
)
373 || (op0
== a
&& op1
== def
);
378 /* Return whether USE_STMT is DEF * DEF. */
380 is_square_of (gimple
*use_stmt
, tree def
)
382 return is_mult_by (use_stmt
, def
, def
);
385 /* Return whether USE_STMT is a floating-point division by
388 is_division_by_square (gimple
*use_stmt
, tree def
)
390 if (gimple_code (use_stmt
) == GIMPLE_ASSIGN
391 && gimple_assign_rhs_code (use_stmt
) == RDIV_EXPR
392 && gimple_assign_rhs1 (use_stmt
) != gimple_assign_rhs2 (use_stmt
)
393 && !stmt_can_throw_internal (cfun
, use_stmt
))
395 tree denominator
= gimple_assign_rhs2 (use_stmt
);
396 if (TREE_CODE (denominator
) == SSA_NAME
)
397 return is_square_of (SSA_NAME_DEF_STMT (denominator
), def
);
402 /* Walk the subset of the dominator tree rooted at OCC, setting the
403 RECIP_DEF field to a definition of 1.0 / DEF that can be used in
404 the given basic block. The field may be left NULL, of course,
405 if it is not possible or profitable to do the optimization.
407 DEF_BSI is an iterator pointing at the statement defining DEF.
408 If RECIP_DEF is set, a dominator already has a computation that can
411 If should_insert_square_recip is set, then this also inserts
412 the square of the reciprocal immediately after the definition
413 of the reciprocal. */
416 insert_reciprocals (gimple_stmt_iterator
*def_gsi
, struct occurrence
*occ
,
417 tree def
, tree recip_def
, tree square_recip_def
,
418 int should_insert_square_recip
, int threshold
)
421 gassign
*new_stmt
, *new_square_stmt
;
422 gimple_stmt_iterator gsi
;
423 struct occurrence
*occ_child
;
426 && (occ
->bb_has_division
|| !flag_trapping_math
)
427 /* Divide by two as all divisions are counted twice in
429 && occ
->num_divisions
/ 2 >= threshold
)
431 /* Make a variable with the replacement and substitute it. */
432 type
= TREE_TYPE (def
);
433 recip_def
= create_tmp_reg (type
, "reciptmp");
434 new_stmt
= gimple_build_assign (recip_def
, RDIV_EXPR
,
435 build_one_cst (type
), def
);
437 if (should_insert_square_recip
)
439 square_recip_def
= create_tmp_reg (type
, "powmult_reciptmp");
440 new_square_stmt
= gimple_build_assign (square_recip_def
, MULT_EXPR
,
441 recip_def
, recip_def
);
444 if (occ
->bb_has_division
)
446 /* Case 1: insert before an existing division. */
447 gsi
= gsi_after_labels (occ
->bb
);
448 while (!gsi_end_p (gsi
)
449 && (!is_division_by (gsi_stmt (gsi
), def
))
450 && (!is_division_by_square (gsi_stmt (gsi
), def
)))
453 gsi_insert_before (&gsi
, new_stmt
, GSI_SAME_STMT
);
454 if (should_insert_square_recip
)
455 gsi_insert_before (&gsi
, new_square_stmt
, GSI_SAME_STMT
);
457 else if (def_gsi
&& occ
->bb
== gsi_bb (*def_gsi
))
459 /* Case 2: insert right after the definition. Note that this will
460 never happen if the definition statement can throw, because in
461 that case the sole successor of the statement's basic block will
462 dominate all the uses as well. */
463 gsi_insert_after (def_gsi
, new_stmt
, GSI_NEW_STMT
);
464 if (should_insert_square_recip
)
465 gsi_insert_after (def_gsi
, new_square_stmt
, GSI_NEW_STMT
);
469 /* Case 3: insert in a basic block not containing defs/uses. */
470 gsi
= gsi_after_labels (occ
->bb
);
471 gsi_insert_before (&gsi
, new_stmt
, GSI_SAME_STMT
);
472 if (should_insert_square_recip
)
473 gsi_insert_before (&gsi
, new_square_stmt
, GSI_SAME_STMT
);
476 reciprocal_stats
.rdivs_inserted
++;
478 occ
->recip_def_stmt
= new_stmt
;
481 occ
->recip_def
= recip_def
;
482 occ
->square_recip_def
= square_recip_def
;
483 for (occ_child
= occ
->children
; occ_child
; occ_child
= occ_child
->next
)
484 insert_reciprocals (def_gsi
, occ_child
, def
, recip_def
,
485 square_recip_def
, should_insert_square_recip
,
489 /* Replace occurrences of expr / (x * x) with expr * ((1 / x) * (1 / x)).
490 Take as argument the use for (x * x). */
492 replace_reciprocal_squares (use_operand_p use_p
)
494 gimple
*use_stmt
= USE_STMT (use_p
);
495 basic_block bb
= gimple_bb (use_stmt
);
496 struct occurrence
*occ
= (struct occurrence
*) bb
->aux
;
498 if (optimize_bb_for_speed_p (bb
) && occ
->square_recip_def
501 gimple_stmt_iterator gsi
= gsi_for_stmt (use_stmt
);
502 gimple_assign_set_rhs_code (use_stmt
, MULT_EXPR
);
503 gimple_assign_set_rhs2 (use_stmt
, occ
->square_recip_def
);
504 SET_USE (use_p
, occ
->square_recip_def
);
505 fold_stmt_inplace (&gsi
);
506 update_stmt (use_stmt
);
511 /* Replace the division at USE_P with a multiplication by the reciprocal, if
515 replace_reciprocal (use_operand_p use_p
)
517 gimple
*use_stmt
= USE_STMT (use_p
);
518 basic_block bb
= gimple_bb (use_stmt
);
519 struct occurrence
*occ
= (struct occurrence
*) bb
->aux
;
521 if (optimize_bb_for_speed_p (bb
)
522 && occ
->recip_def
&& use_stmt
!= occ
->recip_def_stmt
)
524 gimple_stmt_iterator gsi
= gsi_for_stmt (use_stmt
);
525 gimple_assign_set_rhs_code (use_stmt
, MULT_EXPR
);
526 SET_USE (use_p
, occ
->recip_def
);
527 fold_stmt_inplace (&gsi
);
528 update_stmt (use_stmt
);
533 /* Free OCC and return one more "struct occurrence" to be freed. */
535 static struct occurrence
*
536 free_bb (struct occurrence
*occ
)
538 struct occurrence
*child
, *next
;
540 /* First get the two pointers hanging off OCC. */
542 child
= occ
->children
;
545 /* Now ensure that we don't recurse unless it is necessary. */
551 next
= free_bb (next
);
557 /* Transform sequences like
567 depending on the uses of x, r1, r2. This removes one multiplication and
568 allows the sqrt and division operations to execute in parallel.
569 DEF_GSI is the gsi of the initial division by sqrt that defines
570 DEF (x in the example above). */
573 optimize_recip_sqrt (gimple_stmt_iterator
*def_gsi
, tree def
)
576 imm_use_iterator use_iter
;
577 gimple
*stmt
= gsi_stmt (*def_gsi
);
579 tree orig_sqrt_ssa_name
= gimple_assign_rhs2 (stmt
);
580 tree div_rhs1
= gimple_assign_rhs1 (stmt
);
582 if (TREE_CODE (orig_sqrt_ssa_name
) != SSA_NAME
583 || TREE_CODE (div_rhs1
) != REAL_CST
584 || !real_equal (&TREE_REAL_CST (div_rhs1
), &dconst1
))
588 = dyn_cast
<gcall
*> (SSA_NAME_DEF_STMT (orig_sqrt_ssa_name
));
590 if (!sqrt_stmt
|| !gimple_call_lhs (sqrt_stmt
))
593 switch (gimple_call_combined_fn (sqrt_stmt
))
602 tree a
= gimple_call_arg (sqrt_stmt
, 0);
604 /* We have 'a' and 'x'. Now analyze the uses of 'x'. */
606 /* Statements that use x in x * x. */
607 auto_vec
<gimple
*> sqr_stmts
;
608 /* Statements that use x in a * x. */
609 auto_vec
<gimple
*> mult_stmts
;
610 bool has_other_use
= false;
611 bool mult_on_main_path
= false;
613 FOR_EACH_IMM_USE_STMT (use_stmt
, use_iter
, x
)
615 if (is_gimple_debug (use_stmt
))
617 if (is_square_of (use_stmt
, x
))
619 sqr_stmts
.safe_push (use_stmt
);
620 if (gimple_bb (use_stmt
) == gimple_bb (stmt
))
621 mult_on_main_path
= true;
623 else if (is_mult_by (use_stmt
, x
, a
))
625 mult_stmts
.safe_push (use_stmt
);
626 if (gimple_bb (use_stmt
) == gimple_bb (stmt
))
627 mult_on_main_path
= true;
630 has_other_use
= true;
633 /* In the x * x and a * x cases we just rewire stmt operands or
634 remove multiplications. In the has_other_use case we introduce
635 a multiplication so make sure we don't introduce a multiplication
636 on a path where there was none. */
637 if (has_other_use
&& !mult_on_main_path
)
640 if (sqr_stmts
.is_empty () && mult_stmts
.is_empty ())
643 /* If x = 1.0 / sqrt (a) has uses other than those optimized here we want
644 to be able to compose it from the sqr and mult cases. */
645 if (has_other_use
&& (sqr_stmts
.is_empty () || mult_stmts
.is_empty ()))
650 fprintf (dump_file
, "Optimizing reciprocal sqrt multiplications of\n");
651 print_gimple_stmt (dump_file
, sqrt_stmt
, 0, TDF_NONE
);
652 print_gimple_stmt (dump_file
, stmt
, 0, TDF_NONE
);
653 fprintf (dump_file
, "\n");
656 bool delete_div
= !has_other_use
;
657 tree sqr_ssa_name
= NULL_TREE
;
658 if (!sqr_stmts
.is_empty ())
660 /* r1 = x * x. Transform the original
667 = make_temp_ssa_name (TREE_TYPE (a
), NULL
, "recip_sqrt_sqr");
671 fprintf (dump_file
, "Replacing original division\n");
672 print_gimple_stmt (dump_file
, stmt
, 0, TDF_NONE
);
673 fprintf (dump_file
, "with new division\n");
676 = gimple_build_assign (sqr_ssa_name
, gimple_assign_rhs_code (stmt
),
677 gimple_assign_rhs1 (stmt
), a
);
678 gsi_insert_before (def_gsi
, stmt
, GSI_SAME_STMT
);
679 gsi_remove (def_gsi
, true);
680 *def_gsi
= gsi_for_stmt (stmt
);
681 fold_stmt_inplace (def_gsi
);
685 print_gimple_stmt (dump_file
, stmt
, 0, TDF_NONE
);
690 FOR_EACH_VEC_ELT (sqr_stmts
, i
, sqr_stmt
)
692 gimple_stmt_iterator gsi2
= gsi_for_stmt (sqr_stmt
);
693 gimple_assign_set_rhs_from_tree (&gsi2
, sqr_ssa_name
);
694 update_stmt (sqr_stmt
);
697 if (!mult_stmts
.is_empty ())
699 /* r2 = a * x. Transform this into:
700 r2 = t (The original sqrt (a)). */
702 gimple
*mult_stmt
= NULL
;
703 FOR_EACH_VEC_ELT (mult_stmts
, i
, mult_stmt
)
705 gimple_stmt_iterator gsi2
= gsi_for_stmt (mult_stmt
);
709 fprintf (dump_file
, "Replacing squaring multiplication\n");
710 print_gimple_stmt (dump_file
, mult_stmt
, 0, TDF_NONE
);
711 fprintf (dump_file
, "with assignment\n");
713 gimple_assign_set_rhs_from_tree (&gsi2
, orig_sqrt_ssa_name
);
714 fold_stmt_inplace (&gsi2
);
715 update_stmt (mult_stmt
);
717 print_gimple_stmt (dump_file
, mult_stmt
, 0, TDF_NONE
);
723 /* Using the two temporaries tmp1, tmp2 from above
724 the original x is now:
726 gcc_assert (orig_sqrt_ssa_name
);
727 gcc_assert (sqr_ssa_name
);
730 = gimple_build_assign (x
, MULT_EXPR
,
731 orig_sqrt_ssa_name
, sqr_ssa_name
);
732 gsi_insert_after (def_gsi
, new_stmt
, GSI_NEW_STMT
);
737 /* Remove the original division. */
738 gimple_stmt_iterator gsi2
= gsi_for_stmt (stmt
);
739 gsi_remove (&gsi2
, true);
743 release_ssa_name (x
);
746 /* Look for floating-point divisions among DEF's uses, and try to
747 replace them by multiplications with the reciprocal. Add
748 as many statements computing the reciprocal as needed.
750 DEF must be a GIMPLE register of a floating-point type. */
753 execute_cse_reciprocals_1 (gimple_stmt_iterator
*def_gsi
, tree def
)
755 use_operand_p use_p
, square_use_p
;
756 imm_use_iterator use_iter
, square_use_iter
;
758 struct occurrence
*occ
;
761 int square_recip_count
= 0;
762 int sqrt_recip_count
= 0;
764 gcc_assert (FLOAT_TYPE_P (TREE_TYPE (def
)) && TREE_CODE (def
) == SSA_NAME
);
765 threshold
= targetm
.min_divisions_for_recip_mul (TYPE_MODE (TREE_TYPE (def
)));
767 /* If DEF is a square (x * x), count the number of divisions by x.
768 If there are more divisions by x than by (DEF * DEF), prefer to optimize
769 the reciprocal of x instead of DEF. This improves cases like:
774 Reciprocal optimization of x results in 1 division rather than 2 or 3. */
775 gimple
*def_stmt
= SSA_NAME_DEF_STMT (def
);
777 if (is_gimple_assign (def_stmt
)
778 && gimple_assign_rhs_code (def_stmt
) == MULT_EXPR
779 && TREE_CODE (gimple_assign_rhs1 (def_stmt
)) == SSA_NAME
780 && gimple_assign_rhs1 (def_stmt
) == gimple_assign_rhs2 (def_stmt
))
782 tree op0
= gimple_assign_rhs1 (def_stmt
);
784 FOR_EACH_IMM_USE_FAST (use_p
, use_iter
, op0
)
786 gimple
*use_stmt
= USE_STMT (use_p
);
787 if (is_division_by (use_stmt
, op0
))
792 FOR_EACH_IMM_USE_FAST (use_p
, use_iter
, def
)
794 gimple
*use_stmt
= USE_STMT (use_p
);
795 if (is_division_by (use_stmt
, def
))
797 register_division_in (gimple_bb (use_stmt
), 2);
801 if (is_square_of (use_stmt
, def
))
803 square_def
= gimple_assign_lhs (use_stmt
);
804 FOR_EACH_IMM_USE_FAST (square_use_p
, square_use_iter
, square_def
)
806 gimple
*square_use_stmt
= USE_STMT (square_use_p
);
807 if (is_division_by (square_use_stmt
, square_def
))
809 /* This is executed twice for each division by a square. */
810 register_division_in (gimple_bb (square_use_stmt
), 1);
811 square_recip_count
++;
817 /* Square reciprocals were counted twice above. */
818 square_recip_count
/= 2;
820 /* If it is more profitable to optimize 1 / x, don't optimize 1 / (x * x). */
821 if (sqrt_recip_count
> square_recip_count
)
824 /* Do the expensive part only if we can hope to optimize something. */
825 if (count
+ square_recip_count
>= threshold
&& count
>= 1)
828 for (occ
= occ_head
; occ
; occ
= occ
->next
)
831 insert_reciprocals (def_gsi
, occ
, def
, NULL
, NULL
,
832 square_recip_count
, threshold
);
835 FOR_EACH_IMM_USE_STMT (use_stmt
, use_iter
, def
)
837 if (is_division_by (use_stmt
, def
))
839 FOR_EACH_IMM_USE_ON_STMT (use_p
, use_iter
)
840 replace_reciprocal (use_p
);
842 else if (square_recip_count
> 0 && is_square_of (use_stmt
, def
))
844 FOR_EACH_IMM_USE_ON_STMT (use_p
, use_iter
)
846 /* Find all uses of the square that are divisions and
847 * replace them by multiplications with the inverse. */
848 imm_use_iterator square_iterator
;
849 gimple
*powmult_use_stmt
= USE_STMT (use_p
);
850 tree powmult_def_name
= gimple_assign_lhs (powmult_use_stmt
);
852 FOR_EACH_IMM_USE_STMT (powmult_use_stmt
,
853 square_iterator
, powmult_def_name
)
854 FOR_EACH_IMM_USE_ON_STMT (square_use_p
, square_iterator
)
856 gimple
*powmult_use_stmt
= USE_STMT (square_use_p
);
857 if (is_division_by (powmult_use_stmt
, powmult_def_name
))
858 replace_reciprocal_squares (square_use_p
);
866 for (occ
= occ_head
; occ
; )
872 /* Return an internal function that implements the reciprocal of CALL,
873 or IFN_LAST if there is no such function that the target supports. */
876 internal_fn_reciprocal (gcall
*call
)
880 switch (gimple_call_combined_fn (call
))
891 tree_pair types
= direct_internal_fn_types (ifn
, call
);
892 if (!direct_internal_fn_supported_p (ifn
, types
, OPTIMIZE_FOR_SPEED
))
898 /* Go through all the floating-point SSA_NAMEs, and call
899 execute_cse_reciprocals_1 on each of them. */
902 const pass_data pass_data_cse_reciprocals
=
904 GIMPLE_PASS
, /* type */
906 OPTGROUP_NONE
, /* optinfo_flags */
907 TV_TREE_RECIP
, /* tv_id */
908 PROP_ssa
, /* properties_required */
909 0, /* properties_provided */
910 0, /* properties_destroyed */
911 0, /* todo_flags_start */
912 TODO_update_ssa
, /* todo_flags_finish */
915 class pass_cse_reciprocals
: public gimple_opt_pass
918 pass_cse_reciprocals (gcc::context
*ctxt
)
919 : gimple_opt_pass (pass_data_cse_reciprocals
, ctxt
)
922 /* opt_pass methods: */
923 virtual bool gate (function
*) { return optimize
&& flag_reciprocal_math
; }
924 virtual unsigned int execute (function
*);
926 }; // class pass_cse_reciprocals
929 pass_cse_reciprocals::execute (function
*fun
)
934 occ_pool
= new object_allocator
<occurrence
> ("dominators for recip");
936 memset (&reciprocal_stats
, 0, sizeof (reciprocal_stats
));
937 calculate_dominance_info (CDI_DOMINATORS
);
938 calculate_dominance_info (CDI_POST_DOMINATORS
);
941 FOR_EACH_BB_FN (bb
, fun
)
942 gcc_assert (!bb
->aux
);
944 for (arg
= DECL_ARGUMENTS (fun
->decl
); arg
; arg
= DECL_CHAIN (arg
))
945 if (FLOAT_TYPE_P (TREE_TYPE (arg
))
946 && is_gimple_reg (arg
))
948 tree name
= ssa_default_def (fun
, arg
);
950 execute_cse_reciprocals_1 (NULL
, name
);
953 FOR_EACH_BB_FN (bb
, fun
)
957 for (gphi_iterator gsi
= gsi_start_phis (bb
); !gsi_end_p (gsi
);
960 gphi
*phi
= gsi
.phi ();
961 def
= PHI_RESULT (phi
);
962 if (! virtual_operand_p (def
)
963 && FLOAT_TYPE_P (TREE_TYPE (def
)))
964 execute_cse_reciprocals_1 (NULL
, def
);
967 for (gimple_stmt_iterator gsi
= gsi_after_labels (bb
); !gsi_end_p (gsi
);
970 gimple
*stmt
= gsi_stmt (gsi
);
972 if (gimple_has_lhs (stmt
)
973 && (def
= SINGLE_SSA_TREE_OPERAND (stmt
, SSA_OP_DEF
)) != NULL
974 && FLOAT_TYPE_P (TREE_TYPE (def
))
975 && TREE_CODE (def
) == SSA_NAME
)
977 execute_cse_reciprocals_1 (&gsi
, def
);
978 stmt
= gsi_stmt (gsi
);
979 if (flag_unsafe_math_optimizations
980 && is_gimple_assign (stmt
)
981 && gimple_assign_lhs (stmt
) == def
982 && !stmt_can_throw_internal (cfun
, stmt
)
983 && gimple_assign_rhs_code (stmt
) == RDIV_EXPR
)
984 optimize_recip_sqrt (&gsi
, def
);
988 if (optimize_bb_for_size_p (bb
))
991 /* Scan for a/func(b) and convert it to reciprocal a*rfunc(b). */
992 for (gimple_stmt_iterator gsi
= gsi_after_labels (bb
); !gsi_end_p (gsi
);
995 gimple
*stmt
= gsi_stmt (gsi
);
997 if (is_gimple_assign (stmt
)
998 && gimple_assign_rhs_code (stmt
) == RDIV_EXPR
)
1000 tree arg1
= gimple_assign_rhs2 (stmt
);
1003 if (TREE_CODE (arg1
) != SSA_NAME
)
1006 stmt1
= SSA_NAME_DEF_STMT (arg1
);
1008 if (is_gimple_call (stmt1
)
1009 && gimple_call_lhs (stmt1
))
1012 imm_use_iterator ui
;
1013 use_operand_p use_p
;
1014 tree fndecl
= NULL_TREE
;
1016 gcall
*call
= as_a
<gcall
*> (stmt1
);
1017 internal_fn ifn
= internal_fn_reciprocal (call
);
1018 if (ifn
== IFN_LAST
)
1020 fndecl
= gimple_call_fndecl (call
);
1022 || !fndecl_built_in_p (fndecl
, BUILT_IN_MD
))
1024 fndecl
= targetm
.builtin_reciprocal (fndecl
);
1029 /* Check that all uses of the SSA name are divisions,
1030 otherwise replacing the defining statement will do
1033 FOR_EACH_IMM_USE_FAST (use_p
, ui
, arg1
)
1035 gimple
*stmt2
= USE_STMT (use_p
);
1036 if (is_gimple_debug (stmt2
))
1038 if (!is_gimple_assign (stmt2
)
1039 || gimple_assign_rhs_code (stmt2
) != RDIV_EXPR
1040 || gimple_assign_rhs1 (stmt2
) == arg1
1041 || gimple_assign_rhs2 (stmt2
) != arg1
)
1050 gimple_replace_ssa_lhs (call
, arg1
);
1051 if (gimple_call_internal_p (call
) != (ifn
!= IFN_LAST
))
1053 auto_vec
<tree
, 4> args
;
1054 for (unsigned int i
= 0;
1055 i
< gimple_call_num_args (call
); i
++)
1056 args
.safe_push (gimple_call_arg (call
, i
));
1058 if (ifn
== IFN_LAST
)
1059 stmt2
= gimple_build_call_vec (fndecl
, args
);
1061 stmt2
= gimple_build_call_internal_vec (ifn
, args
);
1062 gimple_call_set_lhs (stmt2
, arg1
);
1063 gimple_move_vops (stmt2
, call
);
1064 gimple_call_set_nothrow (stmt2
,
1065 gimple_call_nothrow_p (call
));
1066 gimple_stmt_iterator gsi2
= gsi_for_stmt (call
);
1067 gsi_replace (&gsi2
, stmt2
, true);
1071 if (ifn
== IFN_LAST
)
1072 gimple_call_set_fndecl (call
, fndecl
);
1074 gimple_call_set_internal_fn (call
, ifn
);
1077 reciprocal_stats
.rfuncs_inserted
++;
1079 FOR_EACH_IMM_USE_STMT (stmt
, ui
, arg1
)
1081 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
1082 gimple_assign_set_rhs_code (stmt
, MULT_EXPR
);
1083 fold_stmt_inplace (&gsi
);
1091 statistics_counter_event (fun
, "reciprocal divs inserted",
1092 reciprocal_stats
.rdivs_inserted
);
1093 statistics_counter_event (fun
, "reciprocal functions inserted",
1094 reciprocal_stats
.rfuncs_inserted
);
1096 free_dominance_info (CDI_DOMINATORS
);
1097 free_dominance_info (CDI_POST_DOMINATORS
);
1105 make_pass_cse_reciprocals (gcc::context
*ctxt
)
1107 return new pass_cse_reciprocals (ctxt
);
1110 /* If NAME is the result of a type conversion, look for other
1111 equivalent dominating or dominated conversions, and replace all
1112 uses with the earliest dominating name, removing the redundant
1113 conversions. Return the prevailing name. */
1116 execute_cse_conv_1 (tree name
, bool *cfg_changed
)
1118 if (SSA_NAME_IS_DEFAULT_DEF (name
)
1119 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (name
))
1122 gimple
*def_stmt
= SSA_NAME_DEF_STMT (name
);
1124 if (!gimple_assign_cast_p (def_stmt
))
1127 tree src
= gimple_assign_rhs1 (def_stmt
);
1129 if (TREE_CODE (src
) != SSA_NAME
)
1132 imm_use_iterator use_iter
;
1135 /* Find the earliest dominating def. */
1136 FOR_EACH_IMM_USE_STMT (use_stmt
, use_iter
, src
)
1138 if (use_stmt
== def_stmt
1139 || !gimple_assign_cast_p (use_stmt
))
1142 tree lhs
= gimple_assign_lhs (use_stmt
);
1144 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs
)
1145 || (gimple_assign_rhs1 (use_stmt
)
1146 != gimple_assign_rhs1 (def_stmt
))
1147 || !types_compatible_p (TREE_TYPE (name
), TREE_TYPE (lhs
)))
1151 if (gimple_bb (def_stmt
) == gimple_bb (use_stmt
))
1153 gimple_stmt_iterator gsi
= gsi_for_stmt (use_stmt
);
1154 while (!gsi_end_p (gsi
) && gsi_stmt (gsi
) != def_stmt
)
1156 use_dominates
= !gsi_end_p (gsi
);
1158 else if (dominated_by_p (CDI_DOMINATORS
, gimple_bb (use_stmt
),
1159 gimple_bb (def_stmt
)))
1160 use_dominates
= false;
1161 else if (dominated_by_p (CDI_DOMINATORS
, gimple_bb (def_stmt
),
1162 gimple_bb (use_stmt
)))
1163 use_dominates
= true;
1169 std::swap (name
, lhs
);
1170 std::swap (def_stmt
, use_stmt
);
1174 /* Now go through all uses of SRC again, replacing the equivalent
1175 dominated conversions. We may replace defs that were not
1176 dominated by the then-prevailing defs when we first visited
1178 FOR_EACH_IMM_USE_STMT (use_stmt
, use_iter
, src
)
1180 if (use_stmt
== def_stmt
1181 || !gimple_assign_cast_p (use_stmt
))
1184 tree lhs
= gimple_assign_lhs (use_stmt
);
1186 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs
)
1187 || (gimple_assign_rhs1 (use_stmt
)
1188 != gimple_assign_rhs1 (def_stmt
))
1189 || !types_compatible_p (TREE_TYPE (name
), TREE_TYPE (lhs
)))
1192 basic_block use_bb
= gimple_bb (use_stmt
);
1193 if (gimple_bb (def_stmt
) == use_bb
1194 || dominated_by_p (CDI_DOMINATORS
, use_bb
, gimple_bb (def_stmt
)))
1196 sincos_stats
.conv_removed
++;
1198 gimple_stmt_iterator gsi
= gsi_for_stmt (use_stmt
);
1199 replace_uses_by (lhs
, name
);
1200 if (gsi_remove (&gsi
, true)
1201 && gimple_purge_dead_eh_edges (use_bb
))
1202 *cfg_changed
= true;
1203 release_defs (use_stmt
);
1210 /* Records an occurrence at statement USE_STMT in the vector of trees
1211 STMTS if it is dominated by *TOP_BB or dominates it or this basic block
1212 is not yet initialized. Returns true if the occurrence was pushed on
1213 the vector. Adjusts *TOP_BB to be the basic block dominating all
1214 statements in the vector. */
1217 maybe_record_sincos (vec
<gimple
*> *stmts
,
1218 basic_block
*top_bb
, gimple
*use_stmt
)
1220 basic_block use_bb
= gimple_bb (use_stmt
);
1222 && (*top_bb
== use_bb
1223 || dominated_by_p (CDI_DOMINATORS
, use_bb
, *top_bb
)))
1224 stmts
->safe_push (use_stmt
);
1226 || dominated_by_p (CDI_DOMINATORS
, *top_bb
, use_bb
))
1228 stmts
->safe_push (use_stmt
);
1237 /* Look for sin, cos and cexpi calls with the same argument NAME and
1238 create a single call to cexpi CSEing the result in this case.
1239 We first walk over all immediate uses of the argument collecting
1240 statements that we can CSE in a vector and in a second pass replace
1241 the statement rhs with a REALPART or IMAGPART expression on the
1242 result of the cexpi call we insert before the use statement that
1243 dominates all other candidates. */
1246 execute_cse_sincos_1 (tree name
)
1248 gimple_stmt_iterator gsi
;
1249 imm_use_iterator use_iter
;
1250 tree fndecl
, res
, type
= NULL_TREE
;
1251 gimple
*def_stmt
, *use_stmt
, *stmt
;
1252 int seen_cos
= 0, seen_sin
= 0, seen_cexpi
= 0;
1253 auto_vec
<gimple
*> stmts
;
1254 basic_block top_bb
= NULL
;
1256 bool cfg_changed
= false;
1258 name
= execute_cse_conv_1 (name
, &cfg_changed
);
1260 FOR_EACH_IMM_USE_STMT (use_stmt
, use_iter
, name
)
1262 if (gimple_code (use_stmt
) != GIMPLE_CALL
1263 || !gimple_call_lhs (use_stmt
))
1266 switch (gimple_call_combined_fn (use_stmt
))
1269 seen_cos
|= maybe_record_sincos (&stmts
, &top_bb
, use_stmt
) ? 1 : 0;
1273 seen_sin
|= maybe_record_sincos (&stmts
, &top_bb
, use_stmt
) ? 1 : 0;
1277 seen_cexpi
|= maybe_record_sincos (&stmts
, &top_bb
, use_stmt
) ? 1 : 0;
1284 tree t
= mathfn_built_in_type (gimple_call_combined_fn (use_stmt
));
1288 t
= TREE_TYPE (name
);
1290 /* This checks that NAME has the right type in the first round,
1291 and, in subsequent rounds, that the built_in type is the same
1292 type, or a compatible type. */
1293 if (type
!= t
&& !types_compatible_p (type
, t
))
1296 if (seen_cos
+ seen_sin
+ seen_cexpi
<= 1)
1299 /* Simply insert cexpi at the beginning of top_bb but not earlier than
1300 the name def statement. */
1301 fndecl
= mathfn_built_in (type
, BUILT_IN_CEXPI
);
1304 stmt
= gimple_build_call (fndecl
, 1, name
);
1305 res
= make_temp_ssa_name (TREE_TYPE (TREE_TYPE (fndecl
)), stmt
, "sincostmp");
1306 gimple_call_set_lhs (stmt
, res
);
1308 def_stmt
= SSA_NAME_DEF_STMT (name
);
1309 if (!SSA_NAME_IS_DEFAULT_DEF (name
)
1310 && gimple_code (def_stmt
) != GIMPLE_PHI
1311 && gimple_bb (def_stmt
) == top_bb
)
1313 gsi
= gsi_for_stmt (def_stmt
);
1314 gsi_insert_after (&gsi
, stmt
, GSI_SAME_STMT
);
1318 gsi
= gsi_after_labels (top_bb
);
1319 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
1321 sincos_stats
.inserted
++;
1323 /* And adjust the recorded old call sites. */
1324 for (i
= 0; stmts
.iterate (i
, &use_stmt
); ++i
)
1328 switch (gimple_call_combined_fn (use_stmt
))
1331 rhs
= fold_build1 (REALPART_EXPR
, type
, res
);
1335 rhs
= fold_build1 (IMAGPART_EXPR
, type
, res
);
1346 /* Replace call with a copy. */
1347 stmt
= gimple_build_assign (gimple_call_lhs (use_stmt
), rhs
);
1349 gsi
= gsi_for_stmt (use_stmt
);
1350 gsi_replace (&gsi
, stmt
, true);
1351 if (gimple_purge_dead_eh_edges (gimple_bb (stmt
)))
1358 /* To evaluate powi(x,n), the floating point value x raised to the
1359 constant integer exponent n, we use a hybrid algorithm that
1360 combines the "window method" with look-up tables. For an
1361 introduction to exponentiation algorithms and "addition chains",
1362 see section 4.6.3, "Evaluation of Powers" of Donald E. Knuth,
1363 "Seminumerical Algorithms", Vol. 2, "The Art of Computer Programming",
1364 3rd Edition, 1998, and Daniel M. Gordon, "A Survey of Fast Exponentiation
1365 Methods", Journal of Algorithms, Vol. 27, pp. 129-146, 1998. */
1367 /* Provide a default value for POWI_MAX_MULTS, the maximum number of
1368 multiplications to inline before calling the system library's pow
1369 function. powi(x,n) requires at worst 2*bits(n)-2 multiplications,
1370 so this default never requires calling pow, powf or powl. */
1372 #ifndef POWI_MAX_MULTS
1373 #define POWI_MAX_MULTS (2*HOST_BITS_PER_WIDE_INT-2)
1376 /* The size of the "optimal power tree" lookup table. All
1377 exponents less than this value are simply looked up in the
1378 powi_table below. This threshold is also used to size the
1379 cache of pseudo registers that hold intermediate results. */
1380 #define POWI_TABLE_SIZE 256
1382 /* The size, in bits of the window, used in the "window method"
1383 exponentiation algorithm. This is equivalent to a radix of
1384 (1<<POWI_WINDOW_SIZE) in the corresponding "m-ary method". */
1385 #define POWI_WINDOW_SIZE 3
1387 /* The following table is an efficient representation of an
1388 "optimal power tree". For each value, i, the corresponding
1389 value, j, in the table states than an optimal evaluation
1390 sequence for calculating pow(x,i) can be found by evaluating
1391 pow(x,j)*pow(x,i-j). An optimal power tree for the first
1392 100 integers is given in Knuth's "Seminumerical algorithms". */
1394 static const unsigned char powi_table
[POWI_TABLE_SIZE
] =
1396 0, 1, 1, 2, 2, 3, 3, 4, /* 0 - 7 */
1397 4, 6, 5, 6, 6, 10, 7, 9, /* 8 - 15 */
1398 8, 16, 9, 16, 10, 12, 11, 13, /* 16 - 23 */
1399 12, 17, 13, 18, 14, 24, 15, 26, /* 24 - 31 */
1400 16, 17, 17, 19, 18, 33, 19, 26, /* 32 - 39 */
1401 20, 25, 21, 40, 22, 27, 23, 44, /* 40 - 47 */
1402 24, 32, 25, 34, 26, 29, 27, 44, /* 48 - 55 */
1403 28, 31, 29, 34, 30, 60, 31, 36, /* 56 - 63 */
1404 32, 64, 33, 34, 34, 46, 35, 37, /* 64 - 71 */
1405 36, 65, 37, 50, 38, 48, 39, 69, /* 72 - 79 */
1406 40, 49, 41, 43, 42, 51, 43, 58, /* 80 - 87 */
1407 44, 64, 45, 47, 46, 59, 47, 76, /* 88 - 95 */
1408 48, 65, 49, 66, 50, 67, 51, 66, /* 96 - 103 */
1409 52, 70, 53, 74, 54, 104, 55, 74, /* 104 - 111 */
1410 56, 64, 57, 69, 58, 78, 59, 68, /* 112 - 119 */
1411 60, 61, 61, 80, 62, 75, 63, 68, /* 120 - 127 */
1412 64, 65, 65, 128, 66, 129, 67, 90, /* 128 - 135 */
1413 68, 73, 69, 131, 70, 94, 71, 88, /* 136 - 143 */
1414 72, 128, 73, 98, 74, 132, 75, 121, /* 144 - 151 */
1415 76, 102, 77, 124, 78, 132, 79, 106, /* 152 - 159 */
1416 80, 97, 81, 160, 82, 99, 83, 134, /* 160 - 167 */
1417 84, 86, 85, 95, 86, 160, 87, 100, /* 168 - 175 */
1418 88, 113, 89, 98, 90, 107, 91, 122, /* 176 - 183 */
1419 92, 111, 93, 102, 94, 126, 95, 150, /* 184 - 191 */
1420 96, 128, 97, 130, 98, 133, 99, 195, /* 192 - 199 */
1421 100, 128, 101, 123, 102, 164, 103, 138, /* 200 - 207 */
1422 104, 145, 105, 146, 106, 109, 107, 149, /* 208 - 215 */
1423 108, 200, 109, 146, 110, 170, 111, 157, /* 216 - 223 */
1424 112, 128, 113, 130, 114, 182, 115, 132, /* 224 - 231 */
1425 116, 200, 117, 132, 118, 158, 119, 206, /* 232 - 239 */
1426 120, 240, 121, 162, 122, 147, 123, 152, /* 240 - 247 */
1427 124, 166, 125, 214, 126, 138, 127, 153, /* 248 - 255 */
1431 /* Return the number of multiplications required to calculate
1432 powi(x,n) where n is less than POWI_TABLE_SIZE. This is a
1433 subroutine of powi_cost. CACHE is an array indicating
1434 which exponents have already been calculated. */
1437 powi_lookup_cost (unsigned HOST_WIDE_INT n
, bool *cache
)
1439 /* If we've already calculated this exponent, then this evaluation
1440 doesn't require any additional multiplications. */
1445 return powi_lookup_cost (n
- powi_table
[n
], cache
)
1446 + powi_lookup_cost (powi_table
[n
], cache
) + 1;
1449 /* Return the number of multiplications required to calculate
1450 powi(x,n) for an arbitrary x, given the exponent N. This
1451 function needs to be kept in sync with powi_as_mults below. */
1454 powi_cost (HOST_WIDE_INT n
)
1456 bool cache
[POWI_TABLE_SIZE
];
1457 unsigned HOST_WIDE_INT digit
;
1458 unsigned HOST_WIDE_INT val
;
1464 /* Ignore the reciprocal when calculating the cost. */
1467 /* Initialize the exponent cache. */
1468 memset (cache
, 0, POWI_TABLE_SIZE
* sizeof (bool));
1473 while (val
>= POWI_TABLE_SIZE
)
1477 digit
= val
& ((1 << POWI_WINDOW_SIZE
) - 1);
1478 result
+= powi_lookup_cost (digit
, cache
)
1479 + POWI_WINDOW_SIZE
+ 1;
1480 val
>>= POWI_WINDOW_SIZE
;
1489 return result
+ powi_lookup_cost (val
, cache
);
1492 /* Recursive subroutine of powi_as_mults. This function takes the
1493 array, CACHE, of already calculated exponents and an exponent N and
1494 returns a tree that corresponds to CACHE[1]**N, with type TYPE. */
1497 powi_as_mults_1 (gimple_stmt_iterator
*gsi
, location_t loc
, tree type
,
1498 unsigned HOST_WIDE_INT n
, tree
*cache
)
1500 tree op0
, op1
, ssa_target
;
1501 unsigned HOST_WIDE_INT digit
;
1504 if (n
< POWI_TABLE_SIZE
&& cache
[n
])
1507 ssa_target
= make_temp_ssa_name (type
, NULL
, "powmult");
1509 if (n
< POWI_TABLE_SIZE
)
1511 cache
[n
] = ssa_target
;
1512 op0
= powi_as_mults_1 (gsi
, loc
, type
, n
- powi_table
[n
], cache
);
1513 op1
= powi_as_mults_1 (gsi
, loc
, type
, powi_table
[n
], cache
);
1517 digit
= n
& ((1 << POWI_WINDOW_SIZE
) - 1);
1518 op0
= powi_as_mults_1 (gsi
, loc
, type
, n
- digit
, cache
);
1519 op1
= powi_as_mults_1 (gsi
, loc
, type
, digit
, cache
);
1523 op0
= powi_as_mults_1 (gsi
, loc
, type
, n
>> 1, cache
);
1527 mult_stmt
= gimple_build_assign (ssa_target
, MULT_EXPR
, op0
, op1
);
1528 gimple_set_location (mult_stmt
, loc
);
1529 gsi_insert_before (gsi
, mult_stmt
, GSI_SAME_STMT
);
1534 /* Convert ARG0**N to a tree of multiplications of ARG0 with itself.
1535 This function needs to be kept in sync with powi_cost above. */
1538 powi_as_mults (gimple_stmt_iterator
*gsi
, location_t loc
,
1539 tree arg0
, HOST_WIDE_INT n
)
1541 tree cache
[POWI_TABLE_SIZE
], result
, type
= TREE_TYPE (arg0
);
1546 return build_one_cst (type
);
1548 memset (cache
, 0, sizeof (cache
));
1551 result
= powi_as_mults_1 (gsi
, loc
, type
, absu_hwi (n
), cache
);
1555 /* If the original exponent was negative, reciprocate the result. */
1556 target
= make_temp_ssa_name (type
, NULL
, "powmult");
1557 div_stmt
= gimple_build_assign (target
, RDIV_EXPR
,
1558 build_real (type
, dconst1
), result
);
1559 gimple_set_location (div_stmt
, loc
);
1560 gsi_insert_before (gsi
, div_stmt
, GSI_SAME_STMT
);
1565 /* ARG0 and N are the two arguments to a powi builtin in GSI with
1566 location info LOC. If the arguments are appropriate, create an
1567 equivalent sequence of statements prior to GSI using an optimal
1568 number of multiplications, and return an expession holding the
1572 gimple_expand_builtin_powi (gimple_stmt_iterator
*gsi
, location_t loc
,
1573 tree arg0
, HOST_WIDE_INT n
)
1575 if ((n
>= -1 && n
<= 2)
1576 || (optimize_function_for_speed_p (cfun
)
1577 && powi_cost (n
) <= POWI_MAX_MULTS
))
1578 return powi_as_mults (gsi
, loc
, arg0
, n
);
1583 /* Build a gimple call statement that calls FN with argument ARG.
1584 Set the lhs of the call statement to a fresh SSA name. Insert the
1585 statement prior to GSI's current position, and return the fresh
1589 build_and_insert_call (gimple_stmt_iterator
*gsi
, location_t loc
,
1595 call_stmt
= gimple_build_call (fn
, 1, arg
);
1596 ssa_target
= make_temp_ssa_name (TREE_TYPE (arg
), NULL
, "powroot");
1597 gimple_set_lhs (call_stmt
, ssa_target
);
1598 gimple_set_location (call_stmt
, loc
);
1599 gsi_insert_before (gsi
, call_stmt
, GSI_SAME_STMT
);
1604 /* Build a gimple binary operation with the given CODE and arguments
1605 ARG0, ARG1, assigning the result to a new SSA name for variable
1606 TARGET. Insert the statement prior to GSI's current position, and
1607 return the fresh SSA name.*/
1610 build_and_insert_binop (gimple_stmt_iterator
*gsi
, location_t loc
,
1611 const char *name
, enum tree_code code
,
1612 tree arg0
, tree arg1
)
1614 tree result
= make_temp_ssa_name (TREE_TYPE (arg0
), NULL
, name
);
1615 gassign
*stmt
= gimple_build_assign (result
, code
, arg0
, arg1
);
1616 gimple_set_location (stmt
, loc
);
1617 gsi_insert_before (gsi
, stmt
, GSI_SAME_STMT
);
1621 /* Build a gimple reference operation with the given CODE and argument
1622 ARG, assigning the result to a new SSA name of TYPE with NAME.
1623 Insert the statement prior to GSI's current position, and return
1624 the fresh SSA name. */
1627 build_and_insert_ref (gimple_stmt_iterator
*gsi
, location_t loc
, tree type
,
1628 const char *name
, enum tree_code code
, tree arg0
)
1630 tree result
= make_temp_ssa_name (type
, NULL
, name
);
1631 gimple
*stmt
= gimple_build_assign (result
, build1 (code
, type
, arg0
));
1632 gimple_set_location (stmt
, loc
);
1633 gsi_insert_before (gsi
, stmt
, GSI_SAME_STMT
);
1637 /* Build a gimple assignment to cast VAL to TYPE. Insert the statement
1638 prior to GSI's current position, and return the fresh SSA name. */
1641 build_and_insert_cast (gimple_stmt_iterator
*gsi
, location_t loc
,
1642 tree type
, tree val
)
1644 tree result
= make_ssa_name (type
);
1645 gassign
*stmt
= gimple_build_assign (result
, NOP_EXPR
, val
);
1646 gimple_set_location (stmt
, loc
);
1647 gsi_insert_before (gsi
, stmt
, GSI_SAME_STMT
);
1651 struct pow_synth_sqrt_info
1654 unsigned int deepest
;
1655 unsigned int num_mults
;
1658 /* Return true iff the real value C can be represented as a
1659 sum of powers of 0.5 up to N. That is:
1660 C == SUM<i from 1..N> (a[i]*(0.5**i)) where a[i] is either 0 or 1.
1661 Record in INFO the various parameters of the synthesis algorithm such
1662 as the factors a[i], the maximum 0.5 power and the number of
1663 multiplications that will be required. */
1666 representable_as_half_series_p (REAL_VALUE_TYPE c
, unsigned n
,
1667 struct pow_synth_sqrt_info
*info
)
1669 REAL_VALUE_TYPE factor
= dconsthalf
;
1670 REAL_VALUE_TYPE remainder
= c
;
1673 info
->num_mults
= 0;
1674 memset (info
->factors
, 0, n
* sizeof (bool));
1676 for (unsigned i
= 0; i
< n
; i
++)
1678 REAL_VALUE_TYPE res
;
1680 /* If something inexact happened bail out now. */
1681 if (real_arithmetic (&res
, MINUS_EXPR
, &remainder
, &factor
))
1684 /* We have hit zero. The number is representable as a sum
1685 of powers of 0.5. */
1686 if (real_equal (&res
, &dconst0
))
1688 info
->factors
[i
] = true;
1689 info
->deepest
= i
+ 1;
1692 else if (!REAL_VALUE_NEGATIVE (res
))
1695 info
->factors
[i
] = true;
1699 info
->factors
[i
] = false;
1701 real_arithmetic (&factor
, MULT_EXPR
, &factor
, &dconsthalf
);
1706 /* Return the tree corresponding to FN being applied
1707 to ARG N times at GSI and LOC.
1708 Look up previous results from CACHE if need be.
1709 cache[0] should contain just plain ARG i.e. FN applied to ARG 0 times. */
1712 get_fn_chain (tree arg
, unsigned int n
, gimple_stmt_iterator
*gsi
,
1713 tree fn
, location_t loc
, tree
*cache
)
1715 tree res
= cache
[n
];
1718 tree prev
= get_fn_chain (arg
, n
- 1, gsi
, fn
, loc
, cache
);
1719 res
= build_and_insert_call (gsi
, loc
, fn
, prev
);
1726 /* Print to STREAM the repeated application of function FNAME to ARG
1727 N times. So, for FNAME = "foo", ARG = "x", N = 2 it would print:
1731 print_nested_fn (FILE* stream
, const char *fname
, const char* arg
,
1735 fprintf (stream
, "%s", arg
);
1738 fprintf (stream
, "%s (", fname
);
1739 print_nested_fn (stream
, fname
, arg
, n
- 1);
1740 fprintf (stream
, ")");
1744 /* Print to STREAM the fractional sequence of sqrt chains
1745 applied to ARG, described by INFO. Used for the dump file. */
1748 dump_fractional_sqrt_sequence (FILE *stream
, const char *arg
,
1749 struct pow_synth_sqrt_info
*info
)
1751 for (unsigned int i
= 0; i
< info
->deepest
; i
++)
1753 bool is_set
= info
->factors
[i
];
1756 print_nested_fn (stream
, "sqrt", arg
, i
+ 1);
1757 if (i
!= info
->deepest
- 1)
1758 fprintf (stream
, " * ");
1763 /* Print to STREAM a representation of raising ARG to an integer
1764 power N. Used for the dump file. */
1767 dump_integer_part (FILE *stream
, const char* arg
, HOST_WIDE_INT n
)
1770 fprintf (stream
, "powi (%s, " HOST_WIDE_INT_PRINT_DEC
")", arg
, n
);
1772 fprintf (stream
, "%s", arg
);
1775 /* Attempt to synthesize a POW[F] (ARG0, ARG1) call using chains of
1776 square roots. Place at GSI and LOC. Limit the maximum depth
1777 of the sqrt chains to MAX_DEPTH. Return the tree holding the
1778 result of the expanded sequence or NULL_TREE if the expansion failed.
1780 This routine assumes that ARG1 is a real number with a fractional part
1781 (the integer exponent case will have been handled earlier in
1782 gimple_expand_builtin_pow).
1785 * For ARG1 composed of a whole part WHOLE_PART and a fractional part
1786 FRAC_PART i.e. WHOLE_PART == floor (ARG1) and
1787 FRAC_PART == ARG1 - WHOLE_PART:
1788 Produce POWI (ARG0, WHOLE_PART) * POW (ARG0, FRAC_PART) where
1789 POW (ARG0, FRAC_PART) is expanded as a product of square root chains
1790 if it can be expressed as such, that is if FRAC_PART satisfies:
1791 FRAC_PART == <SUM from i = 1 until MAX_DEPTH> (a[i] * (0.5**i))
1792 where integer a[i] is either 0 or 1.
1795 POW (x, 3.625) == POWI (x, 3) * POW (x, 0.625)
1796 --> POWI (x, 3) * SQRT (x) * SQRT (SQRT (SQRT (x)))
1798 For ARG1 < 0.0 there are two approaches:
1799 * (A) Expand to 1.0 / POW (ARG0, -ARG1) where POW (ARG0, -ARG1)
1800 is calculated as above.
1803 POW (x, -5.625) == 1.0 / POW (x, 5.625)
1804 --> 1.0 / (POWI (x, 5) * SQRT (x) * SQRT (SQRT (SQRT (x))))
1806 * (B) : WHOLE_PART := - ceil (abs (ARG1))
1807 FRAC_PART := ARG1 - WHOLE_PART
1808 and expand to POW (x, FRAC_PART) / POWI (x, WHOLE_PART).
1810 POW (x, -5.875) == POW (x, 0.125) / POWI (X, 6)
1811 --> SQRT (SQRT (SQRT (x))) / (POWI (x, 6))
1813 For ARG1 < 0.0 we choose between (A) and (B) depending on
1814 how many multiplications we'd have to do.
1815 So, for the example in (B): POW (x, -5.875), if we were to
1816 follow algorithm (A) we would produce:
1817 1.0 / POWI (X, 5) * SQRT (X) * SQRT (SQRT (X)) * SQRT (SQRT (SQRT (X)))
1818 which contains more multiplications than approach (B).
1820 Hopefully, this approach will eliminate potentially expensive POW library
1821 calls when unsafe floating point math is enabled and allow the compiler to
1822 further optimise the multiplies, square roots and divides produced by this
1826 expand_pow_as_sqrts (gimple_stmt_iterator
*gsi
, location_t loc
,
1827 tree arg0
, tree arg1
, HOST_WIDE_INT max_depth
)
1829 tree type
= TREE_TYPE (arg0
);
1830 machine_mode mode
= TYPE_MODE (type
);
1831 tree sqrtfn
= mathfn_built_in (type
, BUILT_IN_SQRT
);
1832 bool one_over
= true;
1837 if (TREE_CODE (arg1
) != REAL_CST
)
1840 REAL_VALUE_TYPE exp_init
= TREE_REAL_CST (arg1
);
1842 gcc_assert (max_depth
> 0);
1843 tree
*cache
= XALLOCAVEC (tree
, max_depth
+ 1);
1845 struct pow_synth_sqrt_info synth_info
;
1846 synth_info
.factors
= XALLOCAVEC (bool, max_depth
+ 1);
1847 synth_info
.deepest
= 0;
1848 synth_info
.num_mults
= 0;
1850 bool neg_exp
= REAL_VALUE_NEGATIVE (exp_init
);
1851 REAL_VALUE_TYPE exp
= real_value_abs (&exp_init
);
1853 /* The whole and fractional parts of exp. */
1854 REAL_VALUE_TYPE whole_part
;
1855 REAL_VALUE_TYPE frac_part
;
1857 real_floor (&whole_part
, mode
, &exp
);
1858 real_arithmetic (&frac_part
, MINUS_EXPR
, &exp
, &whole_part
);
1861 REAL_VALUE_TYPE ceil_whole
= dconst0
;
1862 REAL_VALUE_TYPE ceil_fract
= dconst0
;
1866 real_ceil (&ceil_whole
, mode
, &exp
);
1867 real_arithmetic (&ceil_fract
, MINUS_EXPR
, &ceil_whole
, &exp
);
1870 if (!representable_as_half_series_p (frac_part
, max_depth
, &synth_info
))
1873 /* Check whether it's more profitable to not use 1.0 / ... */
1876 struct pow_synth_sqrt_info alt_synth_info
;
1877 alt_synth_info
.factors
= XALLOCAVEC (bool, max_depth
+ 1);
1878 alt_synth_info
.deepest
= 0;
1879 alt_synth_info
.num_mults
= 0;
1881 if (representable_as_half_series_p (ceil_fract
, max_depth
,
1883 && alt_synth_info
.deepest
<= synth_info
.deepest
1884 && alt_synth_info
.num_mults
< synth_info
.num_mults
)
1886 whole_part
= ceil_whole
;
1887 frac_part
= ceil_fract
;
1888 synth_info
.deepest
= alt_synth_info
.deepest
;
1889 synth_info
.num_mults
= alt_synth_info
.num_mults
;
1890 memcpy (synth_info
.factors
, alt_synth_info
.factors
,
1891 (max_depth
+ 1) * sizeof (bool));
1896 HOST_WIDE_INT n
= real_to_integer (&whole_part
);
1897 REAL_VALUE_TYPE cint
;
1898 real_from_integer (&cint
, VOIDmode
, n
, SIGNED
);
1900 if (!real_identical (&whole_part
, &cint
))
1903 if (powi_cost (n
) + synth_info
.num_mults
> POWI_MAX_MULTS
)
1906 memset (cache
, 0, (max_depth
+ 1) * sizeof (tree
));
1908 tree integer_res
= n
== 0 ? build_real (type
, dconst1
) : arg0
;
1910 /* Calculate the integer part of the exponent. */
1913 integer_res
= gimple_expand_builtin_powi (gsi
, loc
, arg0
, n
);
1922 real_to_decimal (string
, &exp_init
, sizeof (string
), 0, 1);
1923 fprintf (dump_file
, "synthesizing pow (x, %s) as:\n", string
);
1929 fprintf (dump_file
, "1.0 / (");
1930 dump_integer_part (dump_file
, "x", n
);
1932 fprintf (dump_file
, " * ");
1933 dump_fractional_sqrt_sequence (dump_file
, "x", &synth_info
);
1934 fprintf (dump_file
, ")");
1938 dump_fractional_sqrt_sequence (dump_file
, "x", &synth_info
);
1939 fprintf (dump_file
, " / (");
1940 dump_integer_part (dump_file
, "x", n
);
1941 fprintf (dump_file
, ")");
1946 dump_fractional_sqrt_sequence (dump_file
, "x", &synth_info
);
1948 fprintf (dump_file
, " * ");
1949 dump_integer_part (dump_file
, "x", n
);
1952 fprintf (dump_file
, "\ndeepest sqrt chain: %d\n", synth_info
.deepest
);
1956 tree fract_res
= NULL_TREE
;
1959 /* Calculate the fractional part of the exponent. */
1960 for (unsigned i
= 0; i
< synth_info
.deepest
; i
++)
1962 if (synth_info
.factors
[i
])
1964 tree sqrt_chain
= get_fn_chain (arg0
, i
+ 1, gsi
, sqrtfn
, loc
, cache
);
1967 fract_res
= sqrt_chain
;
1970 fract_res
= build_and_insert_binop (gsi
, loc
, "powroot", MULT_EXPR
,
1971 fract_res
, sqrt_chain
);
1975 tree res
= NULL_TREE
;
1982 res
= build_and_insert_binop (gsi
, loc
, "powroot", MULT_EXPR
,
1983 fract_res
, integer_res
);
1987 res
= build_and_insert_binop (gsi
, loc
, "powrootrecip", RDIV_EXPR
,
1988 build_real (type
, dconst1
), res
);
1992 res
= build_and_insert_binop (gsi
, loc
, "powroot", RDIV_EXPR
,
1993 fract_res
, integer_res
);
1997 res
= build_and_insert_binop (gsi
, loc
, "powroot", MULT_EXPR
,
1998 fract_res
, integer_res
);
2002 /* ARG0 and ARG1 are the two arguments to a pow builtin call in GSI
2003 with location info LOC. If possible, create an equivalent and
2004 less expensive sequence of statements prior to GSI, and return an
2005 expession holding the result. */
2008 gimple_expand_builtin_pow (gimple_stmt_iterator
*gsi
, location_t loc
,
2009 tree arg0
, tree arg1
)
2011 REAL_VALUE_TYPE c
, cint
, dconst1_3
, dconst1_4
, dconst1_6
;
2012 REAL_VALUE_TYPE c2
, dconst3
;
2014 tree type
, sqrtfn
, cbrtfn
, sqrt_arg0
, result
, cbrt_x
, powi_cbrt_x
;
2016 bool speed_p
= optimize_bb_for_speed_p (gsi_bb (*gsi
));
2017 bool hw_sqrt_exists
, c_is_int
, c2_is_int
;
2019 dconst1_4
= dconst1
;
2020 SET_REAL_EXP (&dconst1_4
, REAL_EXP (&dconst1_4
) - 2);
2022 /* If the exponent isn't a constant, there's nothing of interest
2024 if (TREE_CODE (arg1
) != REAL_CST
)
2027 /* Don't perform the operation if flag_signaling_nans is on
2028 and the operand is a signaling NaN. */
2029 if (HONOR_SNANS (TYPE_MODE (TREE_TYPE (arg1
)))
2030 && ((TREE_CODE (arg0
) == REAL_CST
2031 && REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (arg0
)))
2032 || REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (arg1
))))
2035 /* If the exponent is equivalent to an integer, expand to an optimal
2036 multiplication sequence when profitable. */
2037 c
= TREE_REAL_CST (arg1
);
2038 n
= real_to_integer (&c
);
2039 real_from_integer (&cint
, VOIDmode
, n
, SIGNED
);
2040 c_is_int
= real_identical (&c
, &cint
);
2043 && ((n
>= -1 && n
<= 2)
2044 || (flag_unsafe_math_optimizations
2046 && powi_cost (n
) <= POWI_MAX_MULTS
)))
2047 return gimple_expand_builtin_powi (gsi
, loc
, arg0
, n
);
2049 /* Attempt various optimizations using sqrt and cbrt. */
2050 type
= TREE_TYPE (arg0
);
2051 mode
= TYPE_MODE (type
);
2052 sqrtfn
= mathfn_built_in (type
, BUILT_IN_SQRT
);
2054 /* Optimize pow(x,0.5) = sqrt(x). This replacement is always safe
2055 unless signed zeros must be maintained. pow(-0,0.5) = +0, while
2058 && real_equal (&c
, &dconsthalf
)
2059 && !HONOR_SIGNED_ZEROS (mode
))
2060 return build_and_insert_call (gsi
, loc
, sqrtfn
, arg0
);
2062 hw_sqrt_exists
= optab_handler (sqrt_optab
, mode
) != CODE_FOR_nothing
;
2064 /* Optimize pow(x,1./3.) = cbrt(x). This requires unsafe math
2065 optimizations since 1./3. is not exactly representable. If x
2066 is negative and finite, the correct value of pow(x,1./3.) is
2067 a NaN with the "invalid" exception raised, because the value
2068 of 1./3. actually has an even denominator. The correct value
2069 of cbrt(x) is a negative real value. */
2070 cbrtfn
= mathfn_built_in (type
, BUILT_IN_CBRT
);
2071 dconst1_3
= real_value_truncate (mode
, dconst_third ());
2073 if (flag_unsafe_math_optimizations
2075 && (!HONOR_NANS (mode
) || tree_expr_nonnegative_p (arg0
))
2076 && real_equal (&c
, &dconst1_3
))
2077 return build_and_insert_call (gsi
, loc
, cbrtfn
, arg0
);
2079 /* Optimize pow(x,1./6.) = cbrt(sqrt(x)). Don't do this optimization
2080 if we don't have a hardware sqrt insn. */
2081 dconst1_6
= dconst1_3
;
2082 SET_REAL_EXP (&dconst1_6
, REAL_EXP (&dconst1_6
) - 1);
2084 if (flag_unsafe_math_optimizations
2087 && (!HONOR_NANS (mode
) || tree_expr_nonnegative_p (arg0
))
2090 && real_equal (&c
, &dconst1_6
))
2093 sqrt_arg0
= build_and_insert_call (gsi
, loc
, sqrtfn
, arg0
);
2096 return build_and_insert_call (gsi
, loc
, cbrtfn
, sqrt_arg0
);
2100 /* Attempt to expand the POW as a product of square root chains.
2101 Expand the 0.25 case even when otpimising for size. */
2102 if (flag_unsafe_math_optimizations
2105 && (speed_p
|| real_equal (&c
, &dconst1_4
))
2106 && !HONOR_SIGNED_ZEROS (mode
))
2108 unsigned int max_depth
= speed_p
2109 ? param_max_pow_sqrt_depth
2112 tree expand_with_sqrts
2113 = expand_pow_as_sqrts (gsi
, loc
, arg0
, arg1
, max_depth
);
2115 if (expand_with_sqrts
)
2116 return expand_with_sqrts
;
2119 real_arithmetic (&c2
, MULT_EXPR
, &c
, &dconst2
);
2120 n
= real_to_integer (&c2
);
2121 real_from_integer (&cint
, VOIDmode
, n
, SIGNED
);
2122 c2_is_int
= real_identical (&c2
, &cint
);
2124 /* Optimize pow(x,c), where 3c = n for some nonzero integer n, into
2126 powi(x, n/3) * powi(cbrt(x), n%3), n > 0;
2127 1.0 / (powi(x, abs(n)/3) * powi(cbrt(x), abs(n)%3)), n < 0.
2129 Do not calculate the first factor when n/3 = 0. As cbrt(x) is
2130 different from pow(x, 1./3.) due to rounding and behavior with
2131 negative x, we need to constrain this transformation to unsafe
2132 math and positive x or finite math. */
2133 real_from_integer (&dconst3
, VOIDmode
, 3, SIGNED
);
2134 real_arithmetic (&c2
, MULT_EXPR
, &c
, &dconst3
);
2135 real_round (&c2
, mode
, &c2
);
2136 n
= real_to_integer (&c2
);
2137 real_from_integer (&cint
, VOIDmode
, n
, SIGNED
);
2138 real_arithmetic (&c2
, RDIV_EXPR
, &cint
, &dconst3
);
2139 real_convert (&c2
, mode
, &c2
);
2141 if (flag_unsafe_math_optimizations
2143 && (!HONOR_NANS (mode
) || tree_expr_nonnegative_p (arg0
))
2144 && real_identical (&c2
, &c
)
2146 && optimize_function_for_speed_p (cfun
)
2147 && powi_cost (n
/ 3) <= POWI_MAX_MULTS
)
2149 tree powi_x_ndiv3
= NULL_TREE
;
2151 /* Attempt to fold powi(arg0, abs(n/3)) into multiplies. If not
2152 possible or profitable, give up. Skip the degenerate case when
2153 abs(n) < 3, where the result is always 1. */
2154 if (absu_hwi (n
) >= 3)
2156 powi_x_ndiv3
= gimple_expand_builtin_powi (gsi
, loc
, arg0
,
2162 /* Calculate powi(cbrt(x), n%3). Don't use gimple_expand_builtin_powi
2163 as that creates an unnecessary variable. Instead, just produce
2164 either cbrt(x) or cbrt(x) * cbrt(x). */
2165 cbrt_x
= build_and_insert_call (gsi
, loc
, cbrtfn
, arg0
);
2167 if (absu_hwi (n
) % 3 == 1)
2168 powi_cbrt_x
= cbrt_x
;
2170 powi_cbrt_x
= build_and_insert_binop (gsi
, loc
, "powroot", MULT_EXPR
,
2173 /* Multiply the two subexpressions, unless powi(x,abs(n)/3) = 1. */
2174 if (absu_hwi (n
) < 3)
2175 result
= powi_cbrt_x
;
2177 result
= build_and_insert_binop (gsi
, loc
, "powroot", MULT_EXPR
,
2178 powi_x_ndiv3
, powi_cbrt_x
);
2180 /* If n is negative, reciprocate the result. */
2182 result
= build_and_insert_binop (gsi
, loc
, "powroot", RDIV_EXPR
,
2183 build_real (type
, dconst1
), result
);
2188 /* No optimizations succeeded. */
2192 /* ARG is the argument to a cabs builtin call in GSI with location info
2193 LOC. Create a sequence of statements prior to GSI that calculates
2194 sqrt(R*R + I*I), where R and I are the real and imaginary components
2195 of ARG, respectively. Return an expression holding the result. */
2198 gimple_expand_builtin_cabs (gimple_stmt_iterator
*gsi
, location_t loc
, tree arg
)
2200 tree real_part
, imag_part
, addend1
, addend2
, sum
, result
;
2201 tree type
= TREE_TYPE (TREE_TYPE (arg
));
2202 tree sqrtfn
= mathfn_built_in (type
, BUILT_IN_SQRT
);
2203 machine_mode mode
= TYPE_MODE (type
);
2205 if (!flag_unsafe_math_optimizations
2206 || !optimize_bb_for_speed_p (gimple_bb (gsi_stmt (*gsi
)))
2208 || optab_handler (sqrt_optab
, mode
) == CODE_FOR_nothing
)
2211 real_part
= build_and_insert_ref (gsi
, loc
, type
, "cabs",
2212 REALPART_EXPR
, arg
);
2213 addend1
= build_and_insert_binop (gsi
, loc
, "cabs", MULT_EXPR
,
2214 real_part
, real_part
);
2215 imag_part
= build_and_insert_ref (gsi
, loc
, type
, "cabs",
2216 IMAGPART_EXPR
, arg
);
2217 addend2
= build_and_insert_binop (gsi
, loc
, "cabs", MULT_EXPR
,
2218 imag_part
, imag_part
);
2219 sum
= build_and_insert_binop (gsi
, loc
, "cabs", PLUS_EXPR
, addend1
, addend2
);
2220 result
= build_and_insert_call (gsi
, loc
, sqrtfn
, sum
);
2225 /* Go through all calls to sin, cos and cexpi and call execute_cse_sincos_1
2226 on the SSA_NAME argument of each of them. Also expand powi(x,n) into
2227 an optimal number of multiplies, when n is a constant. */
2231 const pass_data pass_data_cse_sincos
=
2233 GIMPLE_PASS
, /* type */
2234 "sincos", /* name */
2235 OPTGROUP_NONE
, /* optinfo_flags */
2236 TV_TREE_SINCOS
, /* tv_id */
2237 PROP_ssa
, /* properties_required */
2238 PROP_gimple_opt_math
, /* properties_provided */
2239 0, /* properties_destroyed */
2240 0, /* todo_flags_start */
2241 TODO_update_ssa
, /* todo_flags_finish */
2244 class pass_cse_sincos
: public gimple_opt_pass
2247 pass_cse_sincos (gcc::context
*ctxt
)
2248 : gimple_opt_pass (pass_data_cse_sincos
, ctxt
)
2251 /* opt_pass methods: */
2252 virtual bool gate (function
*)
2254 /* We no longer require either sincos or cexp, since powi expansion
2255 piggybacks on this pass. */
2259 virtual unsigned int execute (function
*);
2261 }; // class pass_cse_sincos
2264 pass_cse_sincos::execute (function
*fun
)
2267 bool cfg_changed
= false;
2269 calculate_dominance_info (CDI_DOMINATORS
);
2270 memset (&sincos_stats
, 0, sizeof (sincos_stats
));
2272 FOR_EACH_BB_FN (bb
, fun
)
2274 gimple_stmt_iterator gsi
;
2275 bool cleanup_eh
= false;
2277 for (gsi
= gsi_after_labels (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2279 gimple
*stmt
= gsi_stmt (gsi
);
2281 /* Only the last stmt in a bb could throw, no need to call
2282 gimple_purge_dead_eh_edges if we change something in the middle
2283 of a basic block. */
2286 if (is_gimple_call (stmt
)
2287 && gimple_call_lhs (stmt
))
2289 tree arg
, arg0
, arg1
, result
;
2293 switch (gimple_call_combined_fn (stmt
))
2298 arg
= gimple_call_arg (stmt
, 0);
2299 /* Make sure we have either sincos or cexp. */
2300 if (!targetm
.libc_has_function (function_c99_math_complex
,
2302 && !targetm
.libc_has_function (function_sincos
,
2306 if (TREE_CODE (arg
) == SSA_NAME
)
2307 cfg_changed
|= execute_cse_sincos_1 (arg
);
2311 arg0
= gimple_call_arg (stmt
, 0);
2312 arg1
= gimple_call_arg (stmt
, 1);
2314 loc
= gimple_location (stmt
);
2315 result
= gimple_expand_builtin_pow (&gsi
, loc
, arg0
, arg1
);
2319 tree lhs
= gimple_get_lhs (stmt
);
2320 gassign
*new_stmt
= gimple_build_assign (lhs
, result
);
2321 gimple_set_location (new_stmt
, loc
);
2322 unlink_stmt_vdef (stmt
);
2323 gsi_replace (&gsi
, new_stmt
, true);
2325 if (gimple_vdef (stmt
))
2326 release_ssa_name (gimple_vdef (stmt
));
2331 arg0
= gimple_call_arg (stmt
, 0);
2332 arg1
= gimple_call_arg (stmt
, 1);
2333 loc
= gimple_location (stmt
);
2335 if (real_minus_onep (arg0
))
2337 tree t0
, t1
, cond
, one
, minus_one
;
2340 t0
= TREE_TYPE (arg0
);
2341 t1
= TREE_TYPE (arg1
);
2342 one
= build_real (t0
, dconst1
);
2343 minus_one
= build_real (t0
, dconstm1
);
2345 cond
= make_temp_ssa_name (t1
, NULL
, "powi_cond");
2346 stmt
= gimple_build_assign (cond
, BIT_AND_EXPR
,
2347 arg1
, build_int_cst (t1
, 1));
2348 gimple_set_location (stmt
, loc
);
2349 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
2351 result
= make_temp_ssa_name (t0
, NULL
, "powi");
2352 stmt
= gimple_build_assign (result
, COND_EXPR
, cond
,
2354 gimple_set_location (stmt
, loc
);
2355 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
2359 if (!tree_fits_shwi_p (arg1
))
2362 n
= tree_to_shwi (arg1
);
2363 result
= gimple_expand_builtin_powi (&gsi
, loc
, arg0
, n
);
2368 tree lhs
= gimple_get_lhs (stmt
);
2369 gassign
*new_stmt
= gimple_build_assign (lhs
, result
);
2370 gimple_set_location (new_stmt
, loc
);
2371 unlink_stmt_vdef (stmt
);
2372 gsi_replace (&gsi
, new_stmt
, true);
2374 if (gimple_vdef (stmt
))
2375 release_ssa_name (gimple_vdef (stmt
));
2380 arg0
= gimple_call_arg (stmt
, 0);
2381 loc
= gimple_location (stmt
);
2382 result
= gimple_expand_builtin_cabs (&gsi
, loc
, arg0
);
2386 tree lhs
= gimple_get_lhs (stmt
);
2387 gassign
*new_stmt
= gimple_build_assign (lhs
, result
);
2388 gimple_set_location (new_stmt
, loc
);
2389 unlink_stmt_vdef (stmt
);
2390 gsi_replace (&gsi
, new_stmt
, true);
2392 if (gimple_vdef (stmt
))
2393 release_ssa_name (gimple_vdef (stmt
));
2402 cfg_changed
|= gimple_purge_dead_eh_edges (bb
);
2405 statistics_counter_event (fun
, "sincos statements inserted",
2406 sincos_stats
.inserted
);
2407 statistics_counter_event (fun
, "conv statements removed",
2408 sincos_stats
.conv_removed
);
2410 return cfg_changed
? TODO_cleanup_cfg
: 0;
2416 make_pass_cse_sincos (gcc::context
*ctxt
)
2418 return new pass_cse_sincos (ctxt
);
2421 /* Return true if stmt is a type conversion operation that can be stripped
2422 when used in a widening multiply operation. */
2424 widening_mult_conversion_strippable_p (tree result_type
, gimple
*stmt
)
2426 enum tree_code rhs_code
= gimple_assign_rhs_code (stmt
);
2428 if (TREE_CODE (result_type
) == INTEGER_TYPE
)
2433 if (!CONVERT_EXPR_CODE_P (rhs_code
))
2436 op_type
= TREE_TYPE (gimple_assign_lhs (stmt
));
2438 /* If the type of OP has the same precision as the result, then
2439 we can strip this conversion. The multiply operation will be
2440 selected to create the correct extension as a by-product. */
2441 if (TYPE_PRECISION (result_type
) == TYPE_PRECISION (op_type
))
2444 /* We can also strip a conversion if it preserves the signed-ness of
2445 the operation and doesn't narrow the range. */
2446 inner_op_type
= TREE_TYPE (gimple_assign_rhs1 (stmt
));
2448 /* If the inner-most type is unsigned, then we can strip any
2449 intermediate widening operation. If it's signed, then the
2450 intermediate widening operation must also be signed. */
2451 if ((TYPE_UNSIGNED (inner_op_type
)
2452 || TYPE_UNSIGNED (op_type
) == TYPE_UNSIGNED (inner_op_type
))
2453 && TYPE_PRECISION (op_type
) > TYPE_PRECISION (inner_op_type
))
2459 return rhs_code
== FIXED_CONVERT_EXPR
;
2462 /* Return true if RHS is a suitable operand for a widening multiplication,
2463 assuming a target type of TYPE.
2464 There are two cases:
2466 - RHS makes some value at least twice as wide. Store that value
2467 in *NEW_RHS_OUT if so, and store its type in *TYPE_OUT.
2469 - RHS is an integer constant. Store that value in *NEW_RHS_OUT if so,
2470 but leave *TYPE_OUT untouched. */
2473 is_widening_mult_rhs_p (tree type
, tree rhs
, tree
*type_out
,
2479 if (TREE_CODE (rhs
) == SSA_NAME
)
2481 stmt
= SSA_NAME_DEF_STMT (rhs
);
2482 if (is_gimple_assign (stmt
))
2484 if (! widening_mult_conversion_strippable_p (type
, stmt
))
2488 rhs1
= gimple_assign_rhs1 (stmt
);
2490 if (TREE_CODE (rhs1
) == INTEGER_CST
)
2492 *new_rhs_out
= rhs1
;
2501 type1
= TREE_TYPE (rhs1
);
2503 if (TREE_CODE (type1
) != TREE_CODE (type
)
2504 || TYPE_PRECISION (type1
) * 2 > TYPE_PRECISION (type
))
2507 *new_rhs_out
= rhs1
;
2512 if (TREE_CODE (rhs
) == INTEGER_CST
)
2522 /* Return true if STMT performs a widening multiplication, assuming the
2523 output type is TYPE. If so, store the unwidened types of the operands
2524 in *TYPE1_OUT and *TYPE2_OUT respectively. Also fill *RHS1_OUT and
2525 *RHS2_OUT such that converting those operands to types *TYPE1_OUT
2526 and *TYPE2_OUT would give the operands of the multiplication. */
2529 is_widening_mult_p (gimple
*stmt
,
2530 tree
*type1_out
, tree
*rhs1_out
,
2531 tree
*type2_out
, tree
*rhs2_out
)
2533 tree type
= TREE_TYPE (gimple_assign_lhs (stmt
));
2535 if (TREE_CODE (type
) == INTEGER_TYPE
)
2537 if (TYPE_OVERFLOW_TRAPS (type
))
2540 else if (TREE_CODE (type
) != FIXED_POINT_TYPE
)
2543 if (!is_widening_mult_rhs_p (type
, gimple_assign_rhs1 (stmt
), type1_out
,
2547 if (!is_widening_mult_rhs_p (type
, gimple_assign_rhs2 (stmt
), type2_out
,
2551 if (*type1_out
== NULL
)
2553 if (*type2_out
== NULL
|| !int_fits_type_p (*rhs1_out
, *type2_out
))
2555 *type1_out
= *type2_out
;
2558 if (*type2_out
== NULL
)
2560 if (!int_fits_type_p (*rhs2_out
, *type1_out
))
2562 *type2_out
= *type1_out
;
2565 /* Ensure that the larger of the two operands comes first. */
2566 if (TYPE_PRECISION (*type1_out
) < TYPE_PRECISION (*type2_out
))
2568 std::swap (*type1_out
, *type2_out
);
2569 std::swap (*rhs1_out
, *rhs2_out
);
2575 /* Check to see if the CALL statement is an invocation of copysign
2576 with 1. being the first argument. */
2578 is_copysign_call_with_1 (gimple
*call
)
2580 gcall
*c
= dyn_cast
<gcall
*> (call
);
2584 enum combined_fn code
= gimple_call_combined_fn (c
);
2586 if (code
== CFN_LAST
)
2589 if (builtin_fn_p (code
))
2591 switch (as_builtin_fn (code
))
2593 CASE_FLT_FN (BUILT_IN_COPYSIGN
):
2594 CASE_FLT_FN_FLOATN_NX (BUILT_IN_COPYSIGN
):
2595 return real_onep (gimple_call_arg (c
, 0));
2601 if (internal_fn_p (code
))
2603 switch (as_internal_fn (code
))
2606 return real_onep (gimple_call_arg (c
, 0));
2615 /* Try to expand the pattern x * copysign (1, y) into xorsign (x, y).
2616 This only happens when the xorsign optab is defined, if the
2617 pattern is not a xorsign pattern or if expansion fails FALSE is
2618 returned, otherwise TRUE is returned. */
2620 convert_expand_mult_copysign (gimple
*stmt
, gimple_stmt_iterator
*gsi
)
2622 tree treeop0
, treeop1
, lhs
, type
;
2623 location_t loc
= gimple_location (stmt
);
2624 lhs
= gimple_assign_lhs (stmt
);
2625 treeop0
= gimple_assign_rhs1 (stmt
);
2626 treeop1
= gimple_assign_rhs2 (stmt
);
2627 type
= TREE_TYPE (lhs
);
2628 machine_mode mode
= TYPE_MODE (type
);
2630 if (HONOR_SNANS (type
))
2633 if (TREE_CODE (treeop0
) == SSA_NAME
&& TREE_CODE (treeop1
) == SSA_NAME
)
2635 gimple
*call0
= SSA_NAME_DEF_STMT (treeop0
);
2636 if (!has_single_use (treeop0
) || !is_copysign_call_with_1 (call0
))
2638 call0
= SSA_NAME_DEF_STMT (treeop1
);
2639 if (!has_single_use (treeop1
) || !is_copysign_call_with_1 (call0
))
2644 if (optab_handler (xorsign_optab
, mode
) == CODE_FOR_nothing
)
2647 gcall
*c
= as_a
<gcall
*> (call0
);
2648 treeop0
= gimple_call_arg (c
, 1);
2651 = gimple_build_call_internal (IFN_XORSIGN
, 2, treeop1
, treeop0
);
2652 gimple_set_lhs (call_stmt
, lhs
);
2653 gimple_set_location (call_stmt
, loc
);
2654 gsi_replace (gsi
, call_stmt
, true);
2661 /* Process a single gimple statement STMT, which has a MULT_EXPR as
2662 its rhs, and try to convert it into a WIDEN_MULT_EXPR. The return
2663 value is true iff we converted the statement. */
2666 convert_mult_to_widen (gimple
*stmt
, gimple_stmt_iterator
*gsi
)
2668 tree lhs
, rhs1
, rhs2
, type
, type1
, type2
;
2669 enum insn_code handler
;
2670 scalar_int_mode to_mode
, from_mode
, actual_mode
;
2672 int actual_precision
;
2673 location_t loc
= gimple_location (stmt
);
2674 bool from_unsigned1
, from_unsigned2
;
2676 lhs
= gimple_assign_lhs (stmt
);
2677 type
= TREE_TYPE (lhs
);
2678 if (TREE_CODE (type
) != INTEGER_TYPE
)
2681 if (!is_widening_mult_p (stmt
, &type1
, &rhs1
, &type2
, &rhs2
))
2684 to_mode
= SCALAR_INT_TYPE_MODE (type
);
2685 from_mode
= SCALAR_INT_TYPE_MODE (type1
);
2686 if (to_mode
== from_mode
)
2689 from_unsigned1
= TYPE_UNSIGNED (type1
);
2690 from_unsigned2
= TYPE_UNSIGNED (type2
);
2692 if (from_unsigned1
&& from_unsigned2
)
2693 op
= umul_widen_optab
;
2694 else if (!from_unsigned1
&& !from_unsigned2
)
2695 op
= smul_widen_optab
;
2697 op
= usmul_widen_optab
;
2699 handler
= find_widening_optab_handler_and_mode (op
, to_mode
, from_mode
,
2702 if (handler
== CODE_FOR_nothing
)
2704 if (op
!= smul_widen_optab
)
2706 /* We can use a signed multiply with unsigned types as long as
2707 there is a wider mode to use, or it is the smaller of the two
2708 types that is unsigned. Note that type1 >= type2, always. */
2709 if ((TYPE_UNSIGNED (type1
)
2710 && TYPE_PRECISION (type1
) == GET_MODE_PRECISION (from_mode
))
2711 || (TYPE_UNSIGNED (type2
)
2712 && TYPE_PRECISION (type2
) == GET_MODE_PRECISION (from_mode
)))
2714 if (!GET_MODE_WIDER_MODE (from_mode
).exists (&from_mode
)
2715 || GET_MODE_SIZE (to_mode
) <= GET_MODE_SIZE (from_mode
))
2719 op
= smul_widen_optab
;
2720 handler
= find_widening_optab_handler_and_mode (op
, to_mode
,
2724 if (handler
== CODE_FOR_nothing
)
2727 from_unsigned1
= from_unsigned2
= false;
2731 /* Expand can synthesize smul_widen_optab if the target
2732 supports umul_widen_optab. */
2733 op
= umul_widen_optab
;
2734 handler
= find_widening_optab_handler_and_mode (op
, to_mode
,
2737 if (handler
== CODE_FOR_nothing
)
2742 /* Ensure that the inputs to the handler are in the correct precison
2743 for the opcode. This will be the full mode size. */
2744 actual_precision
= GET_MODE_PRECISION (actual_mode
);
2745 if (2 * actual_precision
> TYPE_PRECISION (type
))
2747 if (actual_precision
!= TYPE_PRECISION (type1
)
2748 || from_unsigned1
!= TYPE_UNSIGNED (type1
))
2749 rhs1
= build_and_insert_cast (gsi
, loc
,
2750 build_nonstandard_integer_type
2751 (actual_precision
, from_unsigned1
), rhs1
);
2752 if (actual_precision
!= TYPE_PRECISION (type2
)
2753 || from_unsigned2
!= TYPE_UNSIGNED (type2
))
2754 rhs2
= build_and_insert_cast (gsi
, loc
,
2755 build_nonstandard_integer_type
2756 (actual_precision
, from_unsigned2
), rhs2
);
2758 /* Handle constants. */
2759 if (TREE_CODE (rhs1
) == INTEGER_CST
)
2760 rhs1
= fold_convert (type1
, rhs1
);
2761 if (TREE_CODE (rhs2
) == INTEGER_CST
)
2762 rhs2
= fold_convert (type2
, rhs2
);
2764 gimple_assign_set_rhs1 (stmt
, rhs1
);
2765 gimple_assign_set_rhs2 (stmt
, rhs2
);
2766 gimple_assign_set_rhs_code (stmt
, WIDEN_MULT_EXPR
);
2768 widen_mul_stats
.widen_mults_inserted
++;
2772 /* Process a single gimple statement STMT, which is found at the
2773 iterator GSI and has a either a PLUS_EXPR or a MINUS_EXPR as its
2774 rhs (given by CODE), and try to convert it into a
2775 WIDEN_MULT_PLUS_EXPR or a WIDEN_MULT_MINUS_EXPR. The return value
2776 is true iff we converted the statement. */
2779 convert_plusminus_to_widen (gimple_stmt_iterator
*gsi
, gimple
*stmt
,
2780 enum tree_code code
)
2782 gimple
*rhs1_stmt
= NULL
, *rhs2_stmt
= NULL
;
2783 gimple
*conv1_stmt
= NULL
, *conv2_stmt
= NULL
, *conv_stmt
;
2784 tree type
, type1
, type2
, optype
;
2785 tree lhs
, rhs1
, rhs2
, mult_rhs1
, mult_rhs2
, add_rhs
;
2786 enum tree_code rhs1_code
= ERROR_MARK
, rhs2_code
= ERROR_MARK
;
2788 enum tree_code wmult_code
;
2789 enum insn_code handler
;
2790 scalar_mode to_mode
, from_mode
, actual_mode
;
2791 location_t loc
= gimple_location (stmt
);
2792 int actual_precision
;
2793 bool from_unsigned1
, from_unsigned2
;
2795 lhs
= gimple_assign_lhs (stmt
);
2796 type
= TREE_TYPE (lhs
);
2797 if (TREE_CODE (type
) != INTEGER_TYPE
2798 && TREE_CODE (type
) != FIXED_POINT_TYPE
)
2801 if (code
== MINUS_EXPR
)
2802 wmult_code
= WIDEN_MULT_MINUS_EXPR
;
2804 wmult_code
= WIDEN_MULT_PLUS_EXPR
;
2806 rhs1
= gimple_assign_rhs1 (stmt
);
2807 rhs2
= gimple_assign_rhs2 (stmt
);
2809 if (TREE_CODE (rhs1
) == SSA_NAME
)
2811 rhs1_stmt
= SSA_NAME_DEF_STMT (rhs1
);
2812 if (is_gimple_assign (rhs1_stmt
))
2813 rhs1_code
= gimple_assign_rhs_code (rhs1_stmt
);
2816 if (TREE_CODE (rhs2
) == SSA_NAME
)
2818 rhs2_stmt
= SSA_NAME_DEF_STMT (rhs2
);
2819 if (is_gimple_assign (rhs2_stmt
))
2820 rhs2_code
= gimple_assign_rhs_code (rhs2_stmt
);
2823 /* Allow for one conversion statement between the multiply
2824 and addition/subtraction statement. If there are more than
2825 one conversions then we assume they would invalidate this
2826 transformation. If that's not the case then they should have
2827 been folded before now. */
2828 if (CONVERT_EXPR_CODE_P (rhs1_code
))
2830 conv1_stmt
= rhs1_stmt
;
2831 rhs1
= gimple_assign_rhs1 (rhs1_stmt
);
2832 if (TREE_CODE (rhs1
) == SSA_NAME
)
2834 rhs1_stmt
= SSA_NAME_DEF_STMT (rhs1
);
2835 if (is_gimple_assign (rhs1_stmt
))
2836 rhs1_code
= gimple_assign_rhs_code (rhs1_stmt
);
2841 if (CONVERT_EXPR_CODE_P (rhs2_code
))
2843 conv2_stmt
= rhs2_stmt
;
2844 rhs2
= gimple_assign_rhs1 (rhs2_stmt
);
2845 if (TREE_CODE (rhs2
) == SSA_NAME
)
2847 rhs2_stmt
= SSA_NAME_DEF_STMT (rhs2
);
2848 if (is_gimple_assign (rhs2_stmt
))
2849 rhs2_code
= gimple_assign_rhs_code (rhs2_stmt
);
2855 /* If code is WIDEN_MULT_EXPR then it would seem unnecessary to call
2856 is_widening_mult_p, but we still need the rhs returns.
2858 It might also appear that it would be sufficient to use the existing
2859 operands of the widening multiply, but that would limit the choice of
2860 multiply-and-accumulate instructions.
2862 If the widened-multiplication result has more than one uses, it is
2863 probably wiser not to do the conversion. Also restrict this operation
2864 to single basic block to avoid moving the multiply to a different block
2865 with a higher execution frequency. */
2866 if (code
== PLUS_EXPR
2867 && (rhs1_code
== MULT_EXPR
|| rhs1_code
== WIDEN_MULT_EXPR
))
2869 if (!has_single_use (rhs1
)
2870 || gimple_bb (rhs1_stmt
) != gimple_bb (stmt
)
2871 || !is_widening_mult_p (rhs1_stmt
, &type1
, &mult_rhs1
,
2872 &type2
, &mult_rhs2
))
2875 conv_stmt
= conv1_stmt
;
2877 else if (rhs2_code
== MULT_EXPR
|| rhs2_code
== WIDEN_MULT_EXPR
)
2879 if (!has_single_use (rhs2
)
2880 || gimple_bb (rhs2_stmt
) != gimple_bb (stmt
)
2881 || !is_widening_mult_p (rhs2_stmt
, &type1
, &mult_rhs1
,
2882 &type2
, &mult_rhs2
))
2885 conv_stmt
= conv2_stmt
;
2890 to_mode
= SCALAR_TYPE_MODE (type
);
2891 from_mode
= SCALAR_TYPE_MODE (type1
);
2892 if (to_mode
== from_mode
)
2895 from_unsigned1
= TYPE_UNSIGNED (type1
);
2896 from_unsigned2
= TYPE_UNSIGNED (type2
);
2899 /* There's no such thing as a mixed sign madd yet, so use a wider mode. */
2900 if (from_unsigned1
!= from_unsigned2
)
2902 if (!INTEGRAL_TYPE_P (type
))
2904 /* We can use a signed multiply with unsigned types as long as
2905 there is a wider mode to use, or it is the smaller of the two
2906 types that is unsigned. Note that type1 >= type2, always. */
2908 && TYPE_PRECISION (type1
) == GET_MODE_PRECISION (from_mode
))
2910 && TYPE_PRECISION (type2
) == GET_MODE_PRECISION (from_mode
)))
2912 if (!GET_MODE_WIDER_MODE (from_mode
).exists (&from_mode
)
2913 || GET_MODE_SIZE (from_mode
) >= GET_MODE_SIZE (to_mode
))
2917 from_unsigned1
= from_unsigned2
= false;
2918 optype
= build_nonstandard_integer_type (GET_MODE_PRECISION (from_mode
),
2922 /* If there was a conversion between the multiply and addition
2923 then we need to make sure it fits a multiply-and-accumulate.
2924 The should be a single mode change which does not change the
2928 /* We use the original, unmodified data types for this. */
2929 tree from_type
= TREE_TYPE (gimple_assign_rhs1 (conv_stmt
));
2930 tree to_type
= TREE_TYPE (gimple_assign_lhs (conv_stmt
));
2931 int data_size
= TYPE_PRECISION (type1
) + TYPE_PRECISION (type2
);
2932 bool is_unsigned
= TYPE_UNSIGNED (type1
) && TYPE_UNSIGNED (type2
);
2934 if (TYPE_PRECISION (from_type
) > TYPE_PRECISION (to_type
))
2936 /* Conversion is a truncate. */
2937 if (TYPE_PRECISION (to_type
) < data_size
)
2940 else if (TYPE_PRECISION (from_type
) < TYPE_PRECISION (to_type
))
2942 /* Conversion is an extend. Check it's the right sort. */
2943 if (TYPE_UNSIGNED (from_type
) != is_unsigned
2944 && !(is_unsigned
&& TYPE_PRECISION (from_type
) > data_size
))
2947 /* else convert is a no-op for our purposes. */
2950 /* Verify that the machine can perform a widening multiply
2951 accumulate in this mode/signedness combination, otherwise
2952 this transformation is likely to pessimize code. */
2953 this_optab
= optab_for_tree_code (wmult_code
, optype
, optab_default
);
2954 handler
= find_widening_optab_handler_and_mode (this_optab
, to_mode
,
2955 from_mode
, &actual_mode
);
2957 if (handler
== CODE_FOR_nothing
)
2960 /* Ensure that the inputs to the handler are in the correct precison
2961 for the opcode. This will be the full mode size. */
2962 actual_precision
= GET_MODE_PRECISION (actual_mode
);
2963 if (actual_precision
!= TYPE_PRECISION (type1
)
2964 || from_unsigned1
!= TYPE_UNSIGNED (type1
))
2965 mult_rhs1
= build_and_insert_cast (gsi
, loc
,
2966 build_nonstandard_integer_type
2967 (actual_precision
, from_unsigned1
),
2969 if (actual_precision
!= TYPE_PRECISION (type2
)
2970 || from_unsigned2
!= TYPE_UNSIGNED (type2
))
2971 mult_rhs2
= build_and_insert_cast (gsi
, loc
,
2972 build_nonstandard_integer_type
2973 (actual_precision
, from_unsigned2
),
2976 if (!useless_type_conversion_p (type
, TREE_TYPE (add_rhs
)))
2977 add_rhs
= build_and_insert_cast (gsi
, loc
, type
, add_rhs
);
2979 /* Handle constants. */
2980 if (TREE_CODE (mult_rhs1
) == INTEGER_CST
)
2981 mult_rhs1
= fold_convert (type1
, mult_rhs1
);
2982 if (TREE_CODE (mult_rhs2
) == INTEGER_CST
)
2983 mult_rhs2
= fold_convert (type2
, mult_rhs2
);
2985 gimple_assign_set_rhs_with_ops (gsi
, wmult_code
, mult_rhs1
, mult_rhs2
,
2987 update_stmt (gsi_stmt (*gsi
));
2988 widen_mul_stats
.maccs_inserted
++;
2992 /* Given a result MUL_RESULT which is a result of a multiplication of OP1 and
2993 OP2 and which we know is used in statements that can be, together with the
2994 multiplication, converted to FMAs, perform the transformation. */
2997 convert_mult_to_fma_1 (tree mul_result
, tree op1
, tree op2
)
2999 tree type
= TREE_TYPE (mul_result
);
3001 imm_use_iterator imm_iter
;
3004 FOR_EACH_IMM_USE_STMT (use_stmt
, imm_iter
, mul_result
)
3006 gimple_stmt_iterator gsi
= gsi_for_stmt (use_stmt
);
3007 tree addop
, mulop1
= op1
, result
= mul_result
;
3008 bool negate_p
= false;
3009 gimple_seq seq
= NULL
;
3011 if (is_gimple_debug (use_stmt
))
3014 if (is_gimple_assign (use_stmt
)
3015 && gimple_assign_rhs_code (use_stmt
) == NEGATE_EXPR
)
3017 result
= gimple_assign_lhs (use_stmt
);
3018 use_operand_p use_p
;
3019 gimple
*neguse_stmt
;
3020 single_imm_use (gimple_assign_lhs (use_stmt
), &use_p
, &neguse_stmt
);
3021 gsi_remove (&gsi
, true);
3022 release_defs (use_stmt
);
3024 use_stmt
= neguse_stmt
;
3025 gsi
= gsi_for_stmt (use_stmt
);
3029 tree cond
, else_value
, ops
[3];
3031 if (!can_interpret_as_conditional_op_p (use_stmt
, &cond
, &code
,
3034 addop
= ops
[0] == result
? ops
[1] : ops
[0];
3036 if (code
== MINUS_EXPR
)
3038 if (ops
[0] == result
)
3039 /* a * b - c -> a * b + (-c) */
3040 addop
= gimple_build (&seq
, NEGATE_EXPR
, type
, addop
);
3042 /* a - b * c -> (-b) * c + a */
3043 negate_p
= !negate_p
;
3047 mulop1
= gimple_build (&seq
, NEGATE_EXPR
, type
, mulop1
);
3050 gsi_insert_seq_before (&gsi
, seq
, GSI_SAME_STMT
);
3053 fma_stmt
= gimple_build_call_internal (IFN_COND_FMA
, 5, cond
, mulop1
,
3054 op2
, addop
, else_value
);
3056 fma_stmt
= gimple_build_call_internal (IFN_FMA
, 3, mulop1
, op2
, addop
);
3057 gimple_set_lhs (fma_stmt
, gimple_get_lhs (use_stmt
));
3058 gimple_call_set_nothrow (fma_stmt
, !stmt_can_throw_internal (cfun
,
3060 gsi_replace (&gsi
, fma_stmt
, true);
3061 /* Follow all SSA edges so that we generate FMS, FNMA and FNMS
3062 regardless of where the negation occurs. */
3063 gimple
*orig_stmt
= gsi_stmt (gsi
);
3064 if (fold_stmt (&gsi
, follow_all_ssa_edges
))
3066 if (maybe_clean_or_replace_eh_stmt (orig_stmt
, gsi_stmt (gsi
)))
3068 update_stmt (gsi_stmt (gsi
));
3071 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3073 fprintf (dump_file
, "Generated FMA ");
3074 print_gimple_stmt (dump_file
, gsi_stmt (gsi
), 0, TDF_NONE
);
3075 fprintf (dump_file
, "\n");
3078 /* If the FMA result is negated in a single use, fold the negation
3080 orig_stmt
= gsi_stmt (gsi
);
3081 use_operand_p use_p
;
3083 if (is_gimple_call (orig_stmt
)
3084 && gimple_call_internal_p (orig_stmt
)
3085 && gimple_call_lhs (orig_stmt
)
3086 && TREE_CODE (gimple_call_lhs (orig_stmt
)) == SSA_NAME
3087 && single_imm_use (gimple_call_lhs (orig_stmt
), &use_p
, &neg_stmt
)
3088 && is_gimple_assign (neg_stmt
)
3089 && gimple_assign_rhs_code (neg_stmt
) == NEGATE_EXPR
3090 && !stmt_could_throw_p (cfun
, neg_stmt
))
3092 gsi
= gsi_for_stmt (neg_stmt
);
3093 if (fold_stmt (&gsi
, follow_all_ssa_edges
))
3095 if (maybe_clean_or_replace_eh_stmt (neg_stmt
, gsi_stmt (gsi
)))
3097 update_stmt (gsi_stmt (gsi
));
3098 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3100 fprintf (dump_file
, "Folded FMA negation ");
3101 print_gimple_stmt (dump_file
, gsi_stmt (gsi
), 0, TDF_NONE
);
3102 fprintf (dump_file
, "\n");
3107 widen_mul_stats
.fmas_inserted
++;
3111 /* Data necessary to perform the actual transformation from a multiplication
3112 and an addition to an FMA after decision is taken it should be done and to
3113 then delete the multiplication statement from the function IL. */
3115 struct fma_transformation_info
3123 /* Structure containing the current state of FMA deferring, i.e. whether we are
3124 deferring, whether to continue deferring, and all data necessary to come
3125 back and perform all deferred transformations. */
3127 class fma_deferring_state
3130 /* Class constructor. Pass true as PERFORM_DEFERRING in order to actually
3131 do any deferring. */
3133 fma_deferring_state (bool perform_deferring
)
3134 : m_candidates (), m_mul_result_set (), m_initial_phi (NULL
),
3135 m_last_result (NULL_TREE
), m_deferring_p (perform_deferring
) {}
3137 /* List of FMA candidates for which we the transformation has been determined
3138 possible but we at this point in BB analysis we do not consider them
3140 auto_vec
<fma_transformation_info
, 8> m_candidates
;
3142 /* Set of results of multiplication that are part of an already deferred FMA
3144 hash_set
<tree
> m_mul_result_set
;
3146 /* The PHI that supposedly feeds back result of a FMA to another over loop
3148 gphi
*m_initial_phi
;
3150 /* Result of the last produced FMA candidate or NULL if there has not been
3154 /* If true, deferring might still be profitable. If false, transform all
3155 candidates and no longer defer. */
3159 /* Transform all deferred FMA candidates and mark STATE as no longer
3163 cancel_fma_deferring (fma_deferring_state
*state
)
3165 if (!state
->m_deferring_p
)
3168 for (unsigned i
= 0; i
< state
->m_candidates
.length (); i
++)
3170 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3171 fprintf (dump_file
, "Generating deferred FMA\n");
3173 const fma_transformation_info
&fti
= state
->m_candidates
[i
];
3174 convert_mult_to_fma_1 (fti
.mul_result
, fti
.op1
, fti
.op2
);
3176 gimple_stmt_iterator gsi
= gsi_for_stmt (fti
.mul_stmt
);
3177 gsi_remove (&gsi
, true);
3178 release_defs (fti
.mul_stmt
);
3180 state
->m_deferring_p
= false;
3183 /* If OP is an SSA name defined by a PHI node, return the PHI statement.
3184 Otherwise return NULL. */
3187 result_of_phi (tree op
)
3189 if (TREE_CODE (op
) != SSA_NAME
)
3192 return dyn_cast
<gphi
*> (SSA_NAME_DEF_STMT (op
));
3195 /* After processing statements of a BB and recording STATE, return true if the
3196 initial phi is fed by the last FMA candidate result ore one such result from
3197 previously processed BBs marked in LAST_RESULT_SET. */
3200 last_fma_candidate_feeds_initial_phi (fma_deferring_state
*state
,
3201 hash_set
<tree
> *last_result_set
)
3205 FOR_EACH_PHI_ARG (use
, state
->m_initial_phi
, iter
, SSA_OP_USE
)
3207 tree t
= USE_FROM_PTR (use
);
3208 if (t
== state
->m_last_result
3209 || last_result_set
->contains (t
))
3216 /* Combine the multiplication at MUL_STMT with operands MULOP1 and MULOP2
3217 with uses in additions and subtractions to form fused multiply-add
3218 operations. Returns true if successful and MUL_STMT should be removed.
3219 If MUL_COND is nonnull, the multiplication in MUL_STMT is conditional
3220 on MUL_COND, otherwise it is unconditional.
3222 If STATE indicates that we are deferring FMA transformation, that means
3223 that we do not produce FMAs for basic blocks which look like:
3226 # accumulator_111 = PHI <0.0(5), accumulator_66(6)>
3228 accumulator_66 = _65 + accumulator_111;
3230 or its unrolled version, i.e. with several FMA candidates that feed result
3231 of one into the addend of another. Instead, we add them to a list in STATE
3232 and if we later discover an FMA candidate that is not part of such a chain,
3233 we go back and perform all deferred past candidates. */
3236 convert_mult_to_fma (gimple
*mul_stmt
, tree op1
, tree op2
,
3237 fma_deferring_state
*state
, tree mul_cond
= NULL_TREE
)
3239 tree mul_result
= gimple_get_lhs (mul_stmt
);
3240 /* If there isn't a LHS then this can't be an FMA. There can be no LHS
3241 if the statement was left just for the side-effects. */
3244 tree type
= TREE_TYPE (mul_result
);
3245 gimple
*use_stmt
, *neguse_stmt
;
3246 use_operand_p use_p
;
3247 imm_use_iterator imm_iter
;
3249 if (FLOAT_TYPE_P (type
)
3250 && flag_fp_contract_mode
== FP_CONTRACT_OFF
)
3253 /* We don't want to do bitfield reduction ops. */
3254 if (INTEGRAL_TYPE_P (type
)
3255 && (!type_has_mode_precision_p (type
) || TYPE_OVERFLOW_TRAPS (type
)))
3258 /* If the target doesn't support it, don't generate it. We assume that
3259 if fma isn't available then fms, fnma or fnms are not either. */
3260 optimization_type opt_type
= bb_optimization_type (gimple_bb (mul_stmt
));
3261 if (!direct_internal_fn_supported_p (IFN_FMA
, type
, opt_type
))
3264 /* If the multiplication has zero uses, it is kept around probably because
3265 of -fnon-call-exceptions. Don't optimize it away in that case,
3267 if (has_zero_uses (mul_result
))
3271 = (state
->m_deferring_p
3272 && maybe_le (tree_to_poly_int64 (TYPE_SIZE (type
)),
3273 param_avoid_fma_max_bits
));
3274 bool defer
= check_defer
;
3275 bool seen_negate_p
= false;
3276 /* Make sure that the multiplication statement becomes dead after
3277 the transformation, thus that all uses are transformed to FMAs.
3278 This means we assume that an FMA operation has the same cost
3280 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, mul_result
)
3282 tree result
= mul_result
;
3283 bool negate_p
= false;
3285 use_stmt
= USE_STMT (use_p
);
3287 if (is_gimple_debug (use_stmt
))
3290 /* For now restrict this operations to single basic blocks. In theory
3291 we would want to support sinking the multiplication in
3297 to form a fma in the then block and sink the multiplication to the
3299 if (gimple_bb (use_stmt
) != gimple_bb (mul_stmt
))
3302 /* A negate on the multiplication leads to FNMA. */
3303 if (is_gimple_assign (use_stmt
)
3304 && gimple_assign_rhs_code (use_stmt
) == NEGATE_EXPR
)
3309 /* If (due to earlier missed optimizations) we have two
3310 negates of the same value, treat them as equivalent
3311 to a single negate with multiple uses. */
3315 result
= gimple_assign_lhs (use_stmt
);
3317 /* Make sure the negate statement becomes dead with this
3318 single transformation. */
3319 if (!single_imm_use (gimple_assign_lhs (use_stmt
),
3320 &use_p
, &neguse_stmt
))
3323 /* Make sure the multiplication isn't also used on that stmt. */
3324 FOR_EACH_PHI_OR_STMT_USE (usep
, neguse_stmt
, iter
, SSA_OP_USE
)
3325 if (USE_FROM_PTR (usep
) == mul_result
)
3329 use_stmt
= neguse_stmt
;
3330 if (gimple_bb (use_stmt
) != gimple_bb (mul_stmt
))
3333 negate_p
= seen_negate_p
= true;
3336 tree cond
, else_value
, ops
[3];
3338 if (!can_interpret_as_conditional_op_p (use_stmt
, &cond
, &code
, ops
,
3345 if (ops
[1] == result
)
3346 negate_p
= !negate_p
;
3351 /* FMA can only be formed from PLUS and MINUS. */
3355 if (mul_cond
&& cond
!= mul_cond
)
3360 if (cond
== result
|| else_value
== result
)
3362 if (!direct_internal_fn_supported_p (IFN_COND_FMA
, type
, opt_type
))
3366 /* If the subtrahend (OPS[1]) is computed by a MULT_EXPR that
3367 we'll visit later, we might be able to get a more profitable
3369 OTOH, if we don't, a negate / fma pair has likely lower latency
3370 that a mult / subtract pair. */
3371 if (code
== MINUS_EXPR
3374 && !direct_internal_fn_supported_p (IFN_FMS
, type
, opt_type
)
3375 && direct_internal_fn_supported_p (IFN_FNMA
, type
, opt_type
)
3376 && TREE_CODE (ops
[1]) == SSA_NAME
3377 && has_single_use (ops
[1]))
3379 gimple
*stmt2
= SSA_NAME_DEF_STMT (ops
[1]);
3380 if (is_gimple_assign (stmt2
)
3381 && gimple_assign_rhs_code (stmt2
) == MULT_EXPR
)
3385 /* We can't handle a * b + a * b. */
3386 if (ops
[0] == ops
[1])
3388 /* If deferring, make sure we are not looking at an instruction that
3389 wouldn't have existed if we were not. */
3390 if (state
->m_deferring_p
3391 && (state
->m_mul_result_set
.contains (ops
[0])
3392 || state
->m_mul_result_set
.contains (ops
[1])))
3397 tree use_lhs
= gimple_get_lhs (use_stmt
);
3398 if (state
->m_last_result
)
3400 if (ops
[1] == state
->m_last_result
3401 || ops
[0] == state
->m_last_result
)
3408 gcc_checking_assert (!state
->m_initial_phi
);
3410 if (ops
[0] == result
)
3411 phi
= result_of_phi (ops
[1]);
3414 gcc_assert (ops
[1] == result
);
3415 phi
= result_of_phi (ops
[0]);
3420 state
->m_initial_phi
= phi
;
3427 state
->m_last_result
= use_lhs
;
3428 check_defer
= false;
3433 /* While it is possible to validate whether or not the exact form that
3434 we've recognized is available in the backend, the assumption is that
3435 if the deferring logic above did not trigger, the transformation is
3436 never a loss. For instance, suppose the target only has the plain FMA
3437 pattern available. Consider a*b-c -> fma(a,b,-c): we've exchanged
3438 MUL+SUB for FMA+NEG, which is still two operations. Consider
3439 -(a*b)-c -> fma(-a,b,-c): we still have 3 operations, but in the FMA
3440 form the two NEGs are independent and could be run in parallel. */
3445 fma_transformation_info fti
;
3446 fti
.mul_stmt
= mul_stmt
;
3447 fti
.mul_result
= mul_result
;
3450 state
->m_candidates
.safe_push (fti
);
3451 state
->m_mul_result_set
.add (mul_result
);
3453 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3455 fprintf (dump_file
, "Deferred generating FMA for multiplication ");
3456 print_gimple_stmt (dump_file
, mul_stmt
, 0, TDF_NONE
);
3457 fprintf (dump_file
, "\n");
3464 if (state
->m_deferring_p
)
3465 cancel_fma_deferring (state
);
3466 convert_mult_to_fma_1 (mul_result
, op1
, op2
);
3472 /* Helper function of match_arith_overflow. For MUL_OVERFLOW, if we have
3473 a check for non-zero like:
3474 _1 = x_4(D) * y_5(D);
3477 goto <bb 3>; [50.00%]
3479 goto <bb 4>; [50.00%]
3481 <bb 3> [local count: 536870913]:
3486 <bb 4> [local count: 1073741824]:
3487 # iftmp.0_3 = PHI <_10(3), 0(2)>
3488 then in addition to using .MUL_OVERFLOW (x_4(D), y_5(D)) we can also
3489 optimize the x_4(D) != 0 condition to 1. */
3492 maybe_optimize_guarding_check (vec
<gimple
*> &mul_stmts
, gimple
*cond_stmt
,
3493 gimple
*div_stmt
, bool *cfg_changed
)
3495 basic_block bb
= gimple_bb (cond_stmt
);
3496 if (gimple_bb (div_stmt
) != bb
|| !single_pred_p (bb
))
3498 edge pred_edge
= single_pred_edge (bb
);
3499 basic_block pred_bb
= pred_edge
->src
;
3500 if (EDGE_COUNT (pred_bb
->succs
) != 2)
3502 edge other_edge
= EDGE_SUCC (pred_bb
, EDGE_SUCC (pred_bb
, 0) == pred_edge
);
3503 edge other_succ_edge
= NULL
;
3504 if (gimple_code (cond_stmt
) == GIMPLE_COND
)
3506 if (EDGE_COUNT (bb
->succs
) != 2)
3508 other_succ_edge
= EDGE_SUCC (bb
, 0);
3509 if (gimple_cond_code (cond_stmt
) == NE_EXPR
)
3511 if (other_succ_edge
->flags
& EDGE_TRUE_VALUE
)
3512 other_succ_edge
= EDGE_SUCC (bb
, 1);
3514 else if (other_succ_edge
->flags
& EDGE_FALSE_VALUE
)
3515 other_succ_edge
= EDGE_SUCC (bb
, 0);
3516 if (other_edge
->dest
!= other_succ_edge
->dest
)
3519 else if (!single_succ_p (bb
) || other_edge
->dest
!= single_succ (bb
))
3521 gimple
*zero_cond
= last_stmt (pred_bb
);
3522 if (zero_cond
== NULL
3523 || gimple_code (zero_cond
) != GIMPLE_COND
3524 || (gimple_cond_code (zero_cond
)
3525 != ((pred_edge
->flags
& EDGE_TRUE_VALUE
) ? NE_EXPR
: EQ_EXPR
))
3526 || !integer_zerop (gimple_cond_rhs (zero_cond
)))
3528 tree zero_cond_lhs
= gimple_cond_lhs (zero_cond
);
3529 if (TREE_CODE (zero_cond_lhs
) != SSA_NAME
)
3531 if (gimple_assign_rhs2 (div_stmt
) != zero_cond_lhs
)
3533 /* Allow the divisor to be result of a same precision cast
3534 from zero_cond_lhs. */
3535 tree rhs2
= gimple_assign_rhs2 (div_stmt
);
3536 if (TREE_CODE (rhs2
) != SSA_NAME
)
3538 gimple
*g
= SSA_NAME_DEF_STMT (rhs2
);
3539 if (!gimple_assign_cast_p (g
)
3540 || gimple_assign_rhs1 (g
) != gimple_cond_lhs (zero_cond
)
3541 || !INTEGRAL_TYPE_P (TREE_TYPE (zero_cond_lhs
))
3542 || (TYPE_PRECISION (TREE_TYPE (zero_cond_lhs
))
3543 != TYPE_PRECISION (TREE_TYPE (rhs2
))))
3546 gimple_stmt_iterator gsi
= gsi_after_labels (bb
);
3547 mul_stmts
.quick_push (div_stmt
);
3548 if (is_gimple_debug (gsi_stmt (gsi
)))
3549 gsi_next_nondebug (&gsi
);
3550 unsigned cast_count
= 0;
3551 while (gsi_stmt (gsi
) != cond_stmt
)
3553 /* If original mul_stmt has a single use, allow it in the same bb,
3554 we are looking then just at __builtin_mul_overflow_p.
3555 Though, in that case the original mul_stmt will be replaced
3556 by .MUL_OVERFLOW, REALPART_EXPR and IMAGPART_EXPR stmts. */
3560 FOR_EACH_VEC_ELT (mul_stmts
, i
, mul_stmt
)
3562 if (gsi_stmt (gsi
) == mul_stmt
)
3568 if (!ok
&& gimple_assign_cast_p (gsi_stmt (gsi
)) && ++cast_count
< 4)
3572 gsi_next_nondebug (&gsi
);
3574 if (gimple_code (cond_stmt
) == GIMPLE_COND
)
3576 basic_block succ_bb
= other_edge
->dest
;
3577 for (gphi_iterator gpi
= gsi_start_phis (succ_bb
); !gsi_end_p (gpi
);
3580 gphi
*phi
= gpi
.phi ();
3581 tree v1
= gimple_phi_arg_def (phi
, other_edge
->dest_idx
);
3582 tree v2
= gimple_phi_arg_def (phi
, other_succ_edge
->dest_idx
);
3583 if (!operand_equal_p (v1
, v2
, 0))
3589 tree lhs
= gimple_assign_lhs (cond_stmt
);
3590 if (!lhs
|| !INTEGRAL_TYPE_P (TREE_TYPE (lhs
)))
3592 gsi_next_nondebug (&gsi
);
3593 if (!gsi_end_p (gsi
))
3595 if (gimple_assign_rhs_code (cond_stmt
) == COND_EXPR
)
3597 gimple
*cast_stmt
= gsi_stmt (gsi
);
3598 if (!gimple_assign_cast_p (cast_stmt
))
3600 tree new_lhs
= gimple_assign_lhs (cast_stmt
);
3601 gsi_next_nondebug (&gsi
);
3602 if (!gsi_end_p (gsi
)
3604 || !INTEGRAL_TYPE_P (TREE_TYPE (new_lhs
))
3605 || TYPE_PRECISION (TREE_TYPE (new_lhs
)) <= 1)
3609 edge succ_edge
= single_succ_edge (bb
);
3610 basic_block succ_bb
= succ_edge
->dest
;
3611 gsi
= gsi_start_phis (succ_bb
);
3612 if (gsi_end_p (gsi
))
3614 gphi
*phi
= as_a
<gphi
*> (gsi_stmt (gsi
));
3616 if (!gsi_end_p (gsi
))
3618 if (gimple_phi_arg_def (phi
, succ_edge
->dest_idx
) != lhs
)
3620 tree other_val
= gimple_phi_arg_def (phi
, other_edge
->dest_idx
);
3621 if (gimple_assign_rhs_code (cond_stmt
) == COND_EXPR
)
3623 tree cond
= gimple_assign_rhs1 (cond_stmt
);
3624 if (TREE_CODE (cond
) == NE_EXPR
)
3626 if (!operand_equal_p (other_val
,
3627 gimple_assign_rhs3 (cond_stmt
), 0))
3630 else if (!operand_equal_p (other_val
,
3631 gimple_assign_rhs2 (cond_stmt
), 0))
3634 else if (gimple_assign_rhs_code (cond_stmt
) == NE_EXPR
)
3636 if (!integer_zerop (other_val
))
3639 else if (!integer_onep (other_val
))
3642 gcond
*zero_gcond
= as_a
<gcond
*> (zero_cond
);
3643 if (pred_edge
->flags
& EDGE_TRUE_VALUE
)
3644 gimple_cond_make_true (zero_gcond
);
3646 gimple_cond_make_false (zero_gcond
);
3647 update_stmt (zero_cond
);
3648 *cfg_changed
= true;
3651 /* Helper function for arith_overflow_check_p. Return true
3652 if VAL1 is equal to VAL2 cast to corresponding integral type
3653 with other signedness or vice versa. */
3656 arith_cast_equal_p (tree val1
, tree val2
)
3658 if (TREE_CODE (val1
) == INTEGER_CST
&& TREE_CODE (val2
) == INTEGER_CST
)
3659 return wi::eq_p (wi::to_wide (val1
), wi::to_wide (val2
));
3660 else if (TREE_CODE (val1
) != SSA_NAME
|| TREE_CODE (val2
) != SSA_NAME
)
3662 if (gimple_assign_cast_p (SSA_NAME_DEF_STMT (val1
))
3663 && gimple_assign_rhs1 (SSA_NAME_DEF_STMT (val1
)) == val2
)
3665 if (gimple_assign_cast_p (SSA_NAME_DEF_STMT (val2
))
3666 && gimple_assign_rhs1 (SSA_NAME_DEF_STMT (val2
)) == val1
)
3671 /* Helper function of match_arith_overflow. Return 1
3672 if USE_STMT is unsigned overflow check ovf != 0 for
3673 STMT, -1 if USE_STMT is unsigned overflow check ovf == 0
3677 arith_overflow_check_p (gimple
*stmt
, gimple
*cast_stmt
, gimple
*&use_stmt
,
3678 tree maxval
, tree
*other
)
3680 enum tree_code ccode
= ERROR_MARK
;
3681 tree crhs1
= NULL_TREE
, crhs2
= NULL_TREE
;
3682 enum tree_code code
= gimple_assign_rhs_code (stmt
);
3683 tree lhs
= gimple_assign_lhs (cast_stmt
? cast_stmt
: stmt
);
3684 tree rhs1
= gimple_assign_rhs1 (stmt
);
3685 tree rhs2
= gimple_assign_rhs2 (stmt
);
3686 tree multop
= NULL_TREE
, divlhs
= NULL_TREE
;
3687 gimple
*cur_use_stmt
= use_stmt
;
3689 if (code
== MULT_EXPR
)
3691 if (!is_gimple_assign (use_stmt
))
3693 if (gimple_assign_rhs_code (use_stmt
) != TRUNC_DIV_EXPR
)
3695 if (gimple_assign_rhs1 (use_stmt
) != lhs
)
3699 if (arith_cast_equal_p (gimple_assign_rhs2 (use_stmt
), rhs1
))
3701 else if (arith_cast_equal_p (gimple_assign_rhs2 (use_stmt
), rhs2
))
3706 else if (gimple_assign_rhs2 (use_stmt
) == rhs1
)
3708 else if (operand_equal_p (gimple_assign_rhs2 (use_stmt
), rhs2
, 0))
3712 if (stmt_ends_bb_p (use_stmt
))
3714 divlhs
= gimple_assign_lhs (use_stmt
);
3718 if (!single_imm_use (divlhs
, &use
, &cur_use_stmt
))
3721 if (gimple_code (cur_use_stmt
) == GIMPLE_COND
)
3723 ccode
= gimple_cond_code (cur_use_stmt
);
3724 crhs1
= gimple_cond_lhs (cur_use_stmt
);
3725 crhs2
= gimple_cond_rhs (cur_use_stmt
);
3727 else if (is_gimple_assign (cur_use_stmt
))
3729 if (gimple_assign_rhs_class (cur_use_stmt
) == GIMPLE_BINARY_RHS
)
3731 ccode
= gimple_assign_rhs_code (cur_use_stmt
);
3732 crhs1
= gimple_assign_rhs1 (cur_use_stmt
);
3733 crhs2
= gimple_assign_rhs2 (cur_use_stmt
);
3735 else if (gimple_assign_rhs_code (cur_use_stmt
) == COND_EXPR
)
3737 tree cond
= gimple_assign_rhs1 (cur_use_stmt
);
3738 if (COMPARISON_CLASS_P (cond
))
3740 ccode
= TREE_CODE (cond
);
3741 crhs1
= TREE_OPERAND (cond
, 0);
3742 crhs2
= TREE_OPERAND (cond
, 1);
3753 if (TREE_CODE_CLASS (ccode
) != tcc_comparison
)
3762 /* r = a + b; r > maxval or r <= maxval */
3764 && TREE_CODE (crhs2
) == INTEGER_CST
3765 && tree_int_cst_equal (crhs2
, maxval
))
3766 return ccode
== GT_EXPR
? 1 : -1;
3769 /* r = a - b; r > a or r <= a
3770 r = a + b; a > r or a <= r or b > r or b <= r. */
3771 if ((code
== MINUS_EXPR
&& crhs1
== lhs
&& crhs2
== rhs1
)
3772 || (code
== PLUS_EXPR
&& (crhs1
== rhs1
|| crhs1
== rhs2
)
3774 return ccode
== GT_EXPR
? 1 : -1;
3775 /* r = ~a; b > r or b <= r. */
3776 if (code
== BIT_NOT_EXPR
&& crhs2
== lhs
)
3780 return ccode
== GT_EXPR
? 1 : -1;
3787 /* r = a - b; a < r or a >= r
3788 r = a + b; r < a or r >= a or r < b or r >= b. */
3789 if ((code
== MINUS_EXPR
&& crhs1
== rhs1
&& crhs2
== lhs
)
3790 || (code
== PLUS_EXPR
&& crhs1
== lhs
3791 && (crhs2
== rhs1
|| crhs2
== rhs2
)))
3792 return ccode
== LT_EXPR
? 1 : -1;
3793 /* r = ~a; r < b or r >= b. */
3794 if (code
== BIT_NOT_EXPR
&& crhs1
== lhs
)
3798 return ccode
== LT_EXPR
? 1 : -1;
3803 /* r = a * b; _1 = r / a; _1 == b
3804 r = a * b; _1 = r / b; _1 == a
3805 r = a * b; _1 = r / a; _1 != b
3806 r = a * b; _1 = r / b; _1 != a. */
3807 if (code
== MULT_EXPR
)
3811 if ((crhs1
== divlhs
&& arith_cast_equal_p (crhs2
, multop
))
3812 || (crhs2
== divlhs
&& arith_cast_equal_p (crhs1
, multop
)))
3814 use_stmt
= cur_use_stmt
;
3815 return ccode
== NE_EXPR
? 1 : -1;
3818 else if ((crhs1
== divlhs
&& operand_equal_p (crhs2
, multop
, 0))
3819 || (crhs2
== divlhs
&& crhs1
== multop
))
3821 use_stmt
= cur_use_stmt
;
3822 return ccode
== NE_EXPR
? 1 : -1;
3832 /* Recognize for unsigned x
3835 where there are other uses of x and replace it with
3836 _7 = .SUB_OVERFLOW (y, z);
3837 x = REALPART_EXPR <_7>;
3838 _8 = IMAGPART_EXPR <_7>;
3840 and similarly for addition.
3847 where y and z have unsigned types with maximum max
3848 and there are other uses of x and all of those cast x
3849 back to that unsigned type and again replace it with
3850 _7 = .ADD_OVERFLOW (y, z);
3851 _9 = REALPART_EXPR <_7>;
3852 _8 = IMAGPART_EXPR <_7>;
3854 and replace (utype) x with _9.
3860 _7 = .ADD_OVERFLOW (y, z);
3861 _8 = IMAGPART_EXPR <_7>;
3867 goto <bb 3>; [50.00%]
3869 goto <bb 4>; [50.00%]
3871 <bb 3> [local count: 536870913]:
3876 <bb 4> [local count: 1073741824]:
3877 # iftmp.0_3 = PHI <_10(3), 0(2)>
3879 _7 = .MUL_OVERFLOW (x, y);
3880 z = IMAGPART_EXPR <_7>;
3881 _8 = IMAGPART_EXPR <_7>;
3883 iftmp.0_3 = (int) _9; */
3886 match_arith_overflow (gimple_stmt_iterator
*gsi
, gimple
*stmt
,
3887 enum tree_code code
, bool *cfg_changed
)
3889 tree lhs
= gimple_assign_lhs (stmt
);
3890 tree type
= TREE_TYPE (lhs
);
3891 use_operand_p use_p
;
3892 imm_use_iterator iter
;
3893 bool use_seen
= false;
3894 bool ovf_use_seen
= false;
3896 gimple
*add_stmt
= NULL
;
3897 bool add_first
= false;
3898 gimple
*cond_stmt
= NULL
;
3899 gimple
*cast_stmt
= NULL
;
3900 tree cast_lhs
= NULL_TREE
;
3902 gcc_checking_assert (code
== PLUS_EXPR
3903 || code
== MINUS_EXPR
3904 || code
== MULT_EXPR
3905 || code
== BIT_NOT_EXPR
);
3906 if (!INTEGRAL_TYPE_P (type
)
3907 || !TYPE_UNSIGNED (type
)
3908 || has_zero_uses (lhs
)
3909 || (code
!= PLUS_EXPR
3910 && code
!= MULT_EXPR
3911 && optab_handler (code
== MINUS_EXPR
? usubv4_optab
: uaddv4_optab
,
3912 TYPE_MODE (type
)) == CODE_FOR_nothing
))
3915 tree rhs1
= gimple_assign_rhs1 (stmt
);
3916 tree rhs2
= gimple_assign_rhs2 (stmt
);
3917 FOR_EACH_IMM_USE_FAST (use_p
, iter
, lhs
)
3919 use_stmt
= USE_STMT (use_p
);
3920 if (is_gimple_debug (use_stmt
))
3923 tree other
= NULL_TREE
;
3924 if (arith_overflow_check_p (stmt
, NULL
, use_stmt
, NULL_TREE
, &other
))
3926 if (code
== BIT_NOT_EXPR
)
3929 if (TREE_CODE (other
) != SSA_NAME
)
3935 cond_stmt
= use_stmt
;
3937 ovf_use_seen
= true;
3942 if (code
== MULT_EXPR
3943 && cast_stmt
== NULL
3944 && gimple_assign_cast_p (use_stmt
))
3946 cast_lhs
= gimple_assign_lhs (use_stmt
);
3947 if (INTEGRAL_TYPE_P (TREE_TYPE (cast_lhs
))
3948 && !TYPE_UNSIGNED (TREE_TYPE (cast_lhs
))
3949 && (TYPE_PRECISION (TREE_TYPE (cast_lhs
))
3950 == TYPE_PRECISION (TREE_TYPE (lhs
))))
3951 cast_stmt
= use_stmt
;
3953 cast_lhs
= NULL_TREE
;
3956 if (ovf_use_seen
&& use_seen
)
3961 && code
== MULT_EXPR
3964 if (TREE_CODE (rhs1
) != SSA_NAME
3965 || (TREE_CODE (rhs2
) != SSA_NAME
&& TREE_CODE (rhs2
) != INTEGER_CST
))
3967 FOR_EACH_IMM_USE_FAST (use_p
, iter
, cast_lhs
)
3969 use_stmt
= USE_STMT (use_p
);
3970 if (is_gimple_debug (use_stmt
))
3973 if (arith_overflow_check_p (stmt
, cast_stmt
, use_stmt
,
3975 ovf_use_seen
= true;
3981 cast_lhs
= NULL_TREE
;
3984 tree maxval
= NULL_TREE
;
3986 || (code
!= MULT_EXPR
&& (code
== BIT_NOT_EXPR
? use_seen
: !use_seen
))
3987 || (code
== PLUS_EXPR
3988 && optab_handler (uaddv4_optab
,
3989 TYPE_MODE (type
)) == CODE_FOR_nothing
)
3990 || (code
== MULT_EXPR
3991 && optab_handler (cast_stmt
? mulv4_optab
: umulv4_optab
,
3992 TYPE_MODE (type
)) == CODE_FOR_nothing
))
3994 if (code
!= PLUS_EXPR
)
3996 if (TREE_CODE (rhs1
) != SSA_NAME
3997 || !gimple_assign_cast_p (SSA_NAME_DEF_STMT (rhs1
)))
3999 rhs1
= gimple_assign_rhs1 (SSA_NAME_DEF_STMT (rhs1
));
4000 tree type1
= TREE_TYPE (rhs1
);
4001 if (!INTEGRAL_TYPE_P (type1
)
4002 || !TYPE_UNSIGNED (type1
)
4003 || TYPE_PRECISION (type1
) >= TYPE_PRECISION (type
)
4004 || (TYPE_PRECISION (type1
)
4005 != GET_MODE_BITSIZE (SCALAR_INT_TYPE_MODE (type1
))))
4007 if (TREE_CODE (rhs2
) == INTEGER_CST
)
4009 if (wi::ne_p (wi::rshift (wi::to_wide (rhs2
),
4010 TYPE_PRECISION (type1
),
4013 rhs2
= fold_convert (type1
, rhs2
);
4017 if (TREE_CODE (rhs2
) != SSA_NAME
4018 || !gimple_assign_cast_p (SSA_NAME_DEF_STMT (rhs2
)))
4020 rhs2
= gimple_assign_rhs1 (SSA_NAME_DEF_STMT (rhs2
));
4021 tree type2
= TREE_TYPE (rhs2
);
4022 if (!INTEGRAL_TYPE_P (type2
)
4023 || !TYPE_UNSIGNED (type2
)
4024 || TYPE_PRECISION (type2
) >= TYPE_PRECISION (type
)
4025 || (TYPE_PRECISION (type2
)
4026 != GET_MODE_BITSIZE (SCALAR_INT_TYPE_MODE (type2
))))
4029 if (TYPE_PRECISION (type1
) >= TYPE_PRECISION (TREE_TYPE (rhs2
)))
4032 type
= TREE_TYPE (rhs2
);
4034 if (TREE_CODE (type
) != INTEGER_TYPE
4035 || optab_handler (uaddv4_optab
,
4036 TYPE_MODE (type
)) == CODE_FOR_nothing
)
4039 maxval
= wide_int_to_tree (type
, wi::max_value (TYPE_PRECISION (type
),
4041 ovf_use_seen
= false;
4043 basic_block use_bb
= NULL
;
4044 FOR_EACH_IMM_USE_FAST (use_p
, iter
, lhs
)
4046 use_stmt
= USE_STMT (use_p
);
4047 if (is_gimple_debug (use_stmt
))
4050 if (arith_overflow_check_p (stmt
, NULL
, use_stmt
, maxval
, NULL
))
4052 ovf_use_seen
= true;
4053 use_bb
= gimple_bb (use_stmt
);
4057 if (!gimple_assign_cast_p (use_stmt
)
4058 || gimple_assign_rhs_code (use_stmt
) == VIEW_CONVERT_EXPR
)
4060 tree use_lhs
= gimple_assign_lhs (use_stmt
);
4061 if (!INTEGRAL_TYPE_P (TREE_TYPE (use_lhs
))
4062 || (TYPE_PRECISION (TREE_TYPE (use_lhs
))
4063 > TYPE_PRECISION (type
)))
4070 if (!useless_type_conversion_p (type
, TREE_TYPE (rhs1
)))
4074 tree new_rhs1
= make_ssa_name (type
);
4075 gimple
*g
= gimple_build_assign (new_rhs1
, NOP_EXPR
, rhs1
);
4076 gsi_insert_before (gsi
, g
, GSI_SAME_STMT
);
4079 else if (!useless_type_conversion_p (type
, TREE_TYPE (rhs2
)))
4083 tree new_rhs2
= make_ssa_name (type
);
4084 gimple
*g
= gimple_build_assign (new_rhs2
, NOP_EXPR
, rhs2
);
4085 gsi_insert_before (gsi
, g
, GSI_SAME_STMT
);
4090 /* If there are no uses of the wider addition, check if
4091 forwprop has not created a narrower addition.
4092 Require it to be in the same bb as the overflow check. */
4093 FOR_EACH_IMM_USE_FAST (use_p
, iter
, rhs1
)
4095 use_stmt
= USE_STMT (use_p
);
4096 if (is_gimple_debug (use_stmt
))
4099 if (use_stmt
== stmt
)
4102 if (!is_gimple_assign (use_stmt
)
4103 || gimple_bb (use_stmt
) != use_bb
4104 || gimple_assign_rhs_code (use_stmt
) != PLUS_EXPR
)
4107 if (gimple_assign_rhs1 (use_stmt
) == rhs1
)
4109 if (!operand_equal_p (gimple_assign_rhs2 (use_stmt
),
4113 else if (gimple_assign_rhs2 (use_stmt
) == rhs1
)
4115 if (gimple_assign_rhs1 (use_stmt
) != rhs2
)
4121 add_stmt
= use_stmt
;
4124 if (add_stmt
== NULL
)
4127 /* If stmt and add_stmt are in the same bb, we need to find out
4128 which one is earlier. If they are in different bbs, we've
4129 checked add_stmt is in the same bb as one of the uses of the
4130 stmt lhs, so stmt needs to dominate add_stmt too. */
4131 if (gimple_bb (stmt
) == gimple_bb (add_stmt
))
4133 gimple_stmt_iterator gsif
= *gsi
;
4134 gimple_stmt_iterator gsib
= *gsi
;
4136 /* Search both forward and backward from stmt and have a small
4138 for (i
= 0; i
< 128; i
++)
4140 if (!gsi_end_p (gsib
))
4142 gsi_prev_nondebug (&gsib
);
4143 if (gsi_stmt (gsib
) == add_stmt
)
4149 else if (gsi_end_p (gsif
))
4151 if (!gsi_end_p (gsif
))
4153 gsi_next_nondebug (&gsif
);
4154 if (gsi_stmt (gsif
) == add_stmt
)
4161 *gsi
= gsi_for_stmt (add_stmt
);
4166 if (code
== BIT_NOT_EXPR
)
4167 *gsi
= gsi_for_stmt (cond_stmt
);
4169 auto_vec
<gimple
*, 8> mul_stmts
;
4170 if (code
== MULT_EXPR
&& cast_stmt
)
4172 type
= TREE_TYPE (cast_lhs
);
4173 gimple
*g
= SSA_NAME_DEF_STMT (rhs1
);
4174 if (gimple_assign_cast_p (g
)
4175 && useless_type_conversion_p (type
,
4176 TREE_TYPE (gimple_assign_rhs1 (g
)))
4177 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g
)))
4178 rhs1
= gimple_assign_rhs1 (g
);
4181 g
= gimple_build_assign (make_ssa_name (type
), NOP_EXPR
, rhs1
);
4182 gsi_insert_before (gsi
, g
, GSI_SAME_STMT
);
4183 rhs1
= gimple_assign_lhs (g
);
4184 mul_stmts
.quick_push (g
);
4186 if (TREE_CODE (rhs2
) == INTEGER_CST
)
4187 rhs2
= fold_convert (type
, rhs2
);
4190 g
= SSA_NAME_DEF_STMT (rhs2
);
4191 if (gimple_assign_cast_p (g
)
4192 && useless_type_conversion_p (type
,
4193 TREE_TYPE (gimple_assign_rhs1 (g
)))
4194 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g
)))
4195 rhs2
= gimple_assign_rhs1 (g
);
4198 g
= gimple_build_assign (make_ssa_name (type
), NOP_EXPR
, rhs2
);
4199 gsi_insert_before (gsi
, g
, GSI_SAME_STMT
);
4200 rhs2
= gimple_assign_lhs (g
);
4201 mul_stmts
.quick_push (g
);
4205 tree ctype
= build_complex_type (type
);
4206 gcall
*g
= gimple_build_call_internal (code
== MULT_EXPR
4208 : code
!= MINUS_EXPR
4209 ? IFN_ADD_OVERFLOW
: IFN_SUB_OVERFLOW
,
4211 tree ctmp
= make_ssa_name (ctype
);
4212 gimple_call_set_lhs (g
, ctmp
);
4213 gsi_insert_before (gsi
, g
, GSI_SAME_STMT
);
4214 tree new_lhs
= (maxval
|| cast_stmt
) ? make_ssa_name (type
) : lhs
;
4216 if (code
!= BIT_NOT_EXPR
)
4218 g2
= gimple_build_assign (new_lhs
, REALPART_EXPR
,
4219 build1 (REALPART_EXPR
, type
, ctmp
));
4220 if (maxval
|| cast_stmt
)
4222 gsi_insert_before (gsi
, g2
, GSI_SAME_STMT
);
4224 *gsi
= gsi_for_stmt (stmt
);
4227 gsi_replace (gsi
, g2
, true);
4228 if (code
== MULT_EXPR
)
4230 mul_stmts
.quick_push (g
);
4231 mul_stmts
.quick_push (g2
);
4234 g2
= gimple_build_assign (lhs
, NOP_EXPR
, new_lhs
);
4235 gsi_replace (gsi
, g2
, true);
4236 mul_stmts
.quick_push (g2
);
4240 tree ovf
= make_ssa_name (type
);
4241 g2
= gimple_build_assign (ovf
, IMAGPART_EXPR
,
4242 build1 (IMAGPART_EXPR
, type
, ctmp
));
4243 if (code
!= BIT_NOT_EXPR
)
4244 gsi_insert_after (gsi
, g2
, GSI_NEW_STMT
);
4246 gsi_insert_before (gsi
, g2
, GSI_SAME_STMT
);
4247 if (code
== MULT_EXPR
)
4248 mul_stmts
.quick_push (g2
);
4250 FOR_EACH_IMM_USE_STMT (use_stmt
, iter
, cast_lhs
? cast_lhs
: lhs
)
4252 if (is_gimple_debug (use_stmt
))
4255 gimple
*orig_use_stmt
= use_stmt
;
4256 int ovf_use
= arith_overflow_check_p (stmt
, cast_stmt
, use_stmt
,
4260 gcc_assert (code
!= BIT_NOT_EXPR
);
4263 tree use_lhs
= gimple_assign_lhs (use_stmt
);
4264 gimple_assign_set_rhs1 (use_stmt
, new_lhs
);
4265 if (useless_type_conversion_p (TREE_TYPE (use_lhs
),
4266 TREE_TYPE (new_lhs
)))
4267 gimple_assign_set_rhs_code (use_stmt
, SSA_NAME
);
4268 update_stmt (use_stmt
);
4272 if (gimple_code (use_stmt
) == GIMPLE_COND
)
4274 gcond
*cond_stmt
= as_a
<gcond
*> (use_stmt
);
4275 gimple_cond_set_lhs (cond_stmt
, ovf
);
4276 gimple_cond_set_rhs (cond_stmt
, build_int_cst (type
, 0));
4277 gimple_cond_set_code (cond_stmt
, ovf_use
== 1 ? NE_EXPR
: EQ_EXPR
);
4281 gcc_checking_assert (is_gimple_assign (use_stmt
));
4282 if (gimple_assign_rhs_class (use_stmt
) == GIMPLE_BINARY_RHS
)
4284 gimple_assign_set_rhs1 (use_stmt
, ovf
);
4285 gimple_assign_set_rhs2 (use_stmt
, build_int_cst (type
, 0));
4286 gimple_assign_set_rhs_code (use_stmt
,
4287 ovf_use
== 1 ? NE_EXPR
: EQ_EXPR
);
4291 gcc_checking_assert (gimple_assign_rhs_code (use_stmt
)
4293 tree cond
= build2 (ovf_use
== 1 ? NE_EXPR
: EQ_EXPR
,
4294 boolean_type_node
, ovf
,
4295 build_int_cst (type
, 0));
4296 gimple_assign_set_rhs1 (use_stmt
, cond
);
4299 update_stmt (use_stmt
);
4300 if (code
== MULT_EXPR
&& use_stmt
!= orig_use_stmt
)
4302 gimple_stmt_iterator gsi2
= gsi_for_stmt (orig_use_stmt
);
4303 maybe_optimize_guarding_check (mul_stmts
, use_stmt
, orig_use_stmt
,
4305 gsi_remove (&gsi2
, true);
4306 release_ssa_name (gimple_assign_lhs (orig_use_stmt
));
4311 gimple_stmt_iterator gsi2
= gsi_for_stmt (stmt
);
4312 gsi_remove (&gsi2
, true);
4315 gimple
*g
= gimple_build_assign (gimple_assign_lhs (add_stmt
),
4317 gsi2
= gsi_for_stmt (add_stmt
);
4318 gsi_replace (&gsi2
, g
, true);
4321 else if (code
== BIT_NOT_EXPR
)
4323 *gsi
= gsi_for_stmt (stmt
);
4324 gsi_remove (gsi
, true);
4325 release_ssa_name (lhs
);
4331 /* Return true if target has support for divmod. */
4334 target_supports_divmod_p (optab divmod_optab
, optab div_optab
, machine_mode mode
)
4336 /* If target supports hardware divmod insn, use it for divmod. */
4337 if (optab_handler (divmod_optab
, mode
) != CODE_FOR_nothing
)
4340 /* Check if libfunc for divmod is available. */
4341 rtx libfunc
= optab_libfunc (divmod_optab
, mode
);
4342 if (libfunc
!= NULL_RTX
)
4344 /* If optab_handler exists for div_optab, perhaps in a wider mode,
4345 we don't want to use the libfunc even if it exists for given mode. */
4346 machine_mode div_mode
;
4347 FOR_EACH_MODE_FROM (div_mode
, mode
)
4348 if (optab_handler (div_optab
, div_mode
) != CODE_FOR_nothing
)
4351 return targetm
.expand_divmod_libfunc
!= NULL
;
4357 /* Check if stmt is candidate for divmod transform. */
4360 divmod_candidate_p (gassign
*stmt
)
4362 tree type
= TREE_TYPE (gimple_assign_lhs (stmt
));
4363 machine_mode mode
= TYPE_MODE (type
);
4364 optab divmod_optab
, div_optab
;
4366 if (TYPE_UNSIGNED (type
))
4368 divmod_optab
= udivmod_optab
;
4369 div_optab
= udiv_optab
;
4373 divmod_optab
= sdivmod_optab
;
4374 div_optab
= sdiv_optab
;
4377 tree op1
= gimple_assign_rhs1 (stmt
);
4378 tree op2
= gimple_assign_rhs2 (stmt
);
4380 /* Disable the transform if either is a constant, since division-by-constant
4381 may have specialized expansion. */
4382 if (CONSTANT_CLASS_P (op1
))
4385 if (CONSTANT_CLASS_P (op2
))
4387 if (integer_pow2p (op2
))
4390 if (TYPE_PRECISION (type
) <= HOST_BITS_PER_WIDE_INT
4391 && TYPE_PRECISION (type
) <= BITS_PER_WORD
)
4394 /* If the divisor is not power of 2 and the precision wider than
4395 HWI, expand_divmod punts on that, so in that case it is better
4396 to use divmod optab or libfunc. Similarly if choose_multiplier
4397 might need pre/post shifts of BITS_PER_WORD or more. */
4400 /* Exclude the case where TYPE_OVERFLOW_TRAPS (type) as that should
4401 expand using the [su]divv optabs. */
4402 if (TYPE_OVERFLOW_TRAPS (type
))
4405 if (!target_supports_divmod_p (divmod_optab
, div_optab
, mode
))
4411 /* This function looks for:
4412 t1 = a TRUNC_DIV_EXPR b;
4413 t2 = a TRUNC_MOD_EXPR b;
4414 and transforms it to the following sequence:
4415 complex_tmp = DIVMOD (a, b);
4416 t1 = REALPART_EXPR(a);
4417 t2 = IMAGPART_EXPR(b);
4418 For conditions enabling the transform see divmod_candidate_p().
4420 The pass has three parts:
4421 1) Find top_stmt which is trunc_div or trunc_mod stmt and dominates all
4422 other trunc_div_expr and trunc_mod_expr stmts.
4423 2) Add top_stmt and all trunc_div and trunc_mod stmts dominated by top_stmt
4425 3) Insert DIVMOD call just before top_stmt and update entries in
4426 stmts vector to use return value of DIMOVD (REALEXPR_PART for div,
4427 IMAGPART_EXPR for mod). */
4430 convert_to_divmod (gassign
*stmt
)
4432 if (stmt_can_throw_internal (cfun
, stmt
)
4433 || !divmod_candidate_p (stmt
))
4436 tree op1
= gimple_assign_rhs1 (stmt
);
4437 tree op2
= gimple_assign_rhs2 (stmt
);
4439 imm_use_iterator use_iter
;
4441 auto_vec
<gimple
*> stmts
;
4443 gimple
*top_stmt
= stmt
;
4444 basic_block top_bb
= gimple_bb (stmt
);
4446 /* Part 1: Try to set top_stmt to "topmost" stmt that dominates
4447 at-least stmt and possibly other trunc_div/trunc_mod stmts
4448 having same operands as stmt. */
4450 FOR_EACH_IMM_USE_STMT (use_stmt
, use_iter
, op1
)
4452 if (is_gimple_assign (use_stmt
)
4453 && (gimple_assign_rhs_code (use_stmt
) == TRUNC_DIV_EXPR
4454 || gimple_assign_rhs_code (use_stmt
) == TRUNC_MOD_EXPR
)
4455 && operand_equal_p (op1
, gimple_assign_rhs1 (use_stmt
), 0)
4456 && operand_equal_p (op2
, gimple_assign_rhs2 (use_stmt
), 0))
4458 if (stmt_can_throw_internal (cfun
, use_stmt
))
4461 basic_block bb
= gimple_bb (use_stmt
);
4465 if (gimple_uid (use_stmt
) < gimple_uid (top_stmt
))
4466 top_stmt
= use_stmt
;
4468 else if (dominated_by_p (CDI_DOMINATORS
, top_bb
, bb
))
4471 top_stmt
= use_stmt
;
4476 tree top_op1
= gimple_assign_rhs1 (top_stmt
);
4477 tree top_op2
= gimple_assign_rhs2 (top_stmt
);
4479 stmts
.safe_push (top_stmt
);
4480 bool div_seen
= (gimple_assign_rhs_code (top_stmt
) == TRUNC_DIV_EXPR
);
4482 /* Part 2: Add all trunc_div/trunc_mod statements domianted by top_bb
4483 to stmts vector. The 2nd loop will always add stmt to stmts vector, since
4484 gimple_bb (top_stmt) dominates gimple_bb (stmt), so the
4485 2nd loop ends up adding at-least single trunc_mod_expr stmt. */
4487 FOR_EACH_IMM_USE_STMT (use_stmt
, use_iter
, top_op1
)
4489 if (is_gimple_assign (use_stmt
)
4490 && (gimple_assign_rhs_code (use_stmt
) == TRUNC_DIV_EXPR
4491 || gimple_assign_rhs_code (use_stmt
) == TRUNC_MOD_EXPR
)
4492 && operand_equal_p (top_op1
, gimple_assign_rhs1 (use_stmt
), 0)
4493 && operand_equal_p (top_op2
, gimple_assign_rhs2 (use_stmt
), 0))
4495 if (use_stmt
== top_stmt
4496 || stmt_can_throw_internal (cfun
, use_stmt
)
4497 || !dominated_by_p (CDI_DOMINATORS
, gimple_bb (use_stmt
), top_bb
))
4500 stmts
.safe_push (use_stmt
);
4501 if (gimple_assign_rhs_code (use_stmt
) == TRUNC_DIV_EXPR
)
4509 /* Part 3: Create libcall to internal fn DIVMOD:
4510 divmod_tmp = DIVMOD (op1, op2). */
4512 gcall
*call_stmt
= gimple_build_call_internal (IFN_DIVMOD
, 2, op1
, op2
);
4513 tree res
= make_temp_ssa_name (build_complex_type (TREE_TYPE (op1
)),
4514 call_stmt
, "divmod_tmp");
4515 gimple_call_set_lhs (call_stmt
, res
);
4516 /* We rejected throwing statements above. */
4517 gimple_call_set_nothrow (call_stmt
, true);
4519 /* Insert the call before top_stmt. */
4520 gimple_stmt_iterator top_stmt_gsi
= gsi_for_stmt (top_stmt
);
4521 gsi_insert_before (&top_stmt_gsi
, call_stmt
, GSI_SAME_STMT
);
4523 widen_mul_stats
.divmod_calls_inserted
++;
4525 /* Update all statements in stmts vector:
4526 lhs = op1 TRUNC_DIV_EXPR op2 -> lhs = REALPART_EXPR<divmod_tmp>
4527 lhs = op1 TRUNC_MOD_EXPR op2 -> lhs = IMAGPART_EXPR<divmod_tmp>. */
4529 for (unsigned i
= 0; stmts
.iterate (i
, &use_stmt
); ++i
)
4533 switch (gimple_assign_rhs_code (use_stmt
))
4535 case TRUNC_DIV_EXPR
:
4536 new_rhs
= fold_build1 (REALPART_EXPR
, TREE_TYPE (op1
), res
);
4539 case TRUNC_MOD_EXPR
:
4540 new_rhs
= fold_build1 (IMAGPART_EXPR
, TREE_TYPE (op1
), res
);
4547 gimple_stmt_iterator gsi
= gsi_for_stmt (use_stmt
);
4548 gimple_assign_set_rhs_from_tree (&gsi
, new_rhs
);
4549 update_stmt (use_stmt
);
4555 /* Process a single gimple assignment STMT, which has a RSHIFT_EXPR as
4556 its rhs, and try to convert it into a MULT_HIGHPART_EXPR. The return
4557 value is true iff we converted the statement. */
4560 convert_mult_to_highpart (gassign
*stmt
, gimple_stmt_iterator
*gsi
)
4562 tree lhs
= gimple_assign_lhs (stmt
);
4563 tree stype
= TREE_TYPE (lhs
);
4564 tree sarg0
= gimple_assign_rhs1 (stmt
);
4565 tree sarg1
= gimple_assign_rhs2 (stmt
);
4567 if (TREE_CODE (stype
) != INTEGER_TYPE
4568 || TREE_CODE (sarg1
) != INTEGER_CST
4569 || TREE_CODE (sarg0
) != SSA_NAME
4570 || !tree_fits_uhwi_p (sarg1
)
4571 || !has_single_use (sarg0
))
4574 gassign
*def
= dyn_cast
<gassign
*> (SSA_NAME_DEF_STMT (sarg0
));
4578 enum tree_code mcode
= gimple_assign_rhs_code (def
);
4579 if (mcode
== NOP_EXPR
)
4581 tree tmp
= gimple_assign_rhs1 (def
);
4582 if (TREE_CODE (tmp
) != SSA_NAME
|| !has_single_use (tmp
))
4584 def
= dyn_cast
<gassign
*> (SSA_NAME_DEF_STMT (tmp
));
4587 mcode
= gimple_assign_rhs_code (def
);
4590 if (mcode
!= WIDEN_MULT_EXPR
4591 || gimple_bb (def
) != gimple_bb (stmt
))
4593 tree mtype
= TREE_TYPE (gimple_assign_lhs (def
));
4594 if (TREE_CODE (mtype
) != INTEGER_TYPE
4595 || TYPE_PRECISION (mtype
) != TYPE_PRECISION (stype
))
4598 tree mop1
= gimple_assign_rhs1 (def
);
4599 tree mop2
= gimple_assign_rhs2 (def
);
4600 tree optype
= TREE_TYPE (mop1
);
4601 bool unsignedp
= TYPE_UNSIGNED (optype
);
4602 unsigned int prec
= TYPE_PRECISION (optype
);
4604 if (unsignedp
!= TYPE_UNSIGNED (mtype
)
4605 || TYPE_PRECISION (mtype
) != 2 * prec
)
4608 unsigned HOST_WIDE_INT bits
= tree_to_uhwi (sarg1
);
4609 if (bits
< prec
|| bits
>= 2 * prec
)
4612 /* For the time being, require operands to have the same sign. */
4613 if (unsignedp
!= TYPE_UNSIGNED (TREE_TYPE (mop2
)))
4616 machine_mode mode
= TYPE_MODE (optype
);
4617 optab tab
= unsignedp
? umul_highpart_optab
: smul_highpart_optab
;
4618 if (optab_handler (tab
, mode
) == CODE_FOR_nothing
)
4621 location_t loc
= gimple_location (stmt
);
4622 tree highpart1
= build_and_insert_binop (gsi
, loc
, "highparttmp",
4623 MULT_HIGHPART_EXPR
, mop1
, mop2
);
4624 tree highpart2
= highpart1
;
4625 tree ntype
= optype
;
4627 if (TYPE_UNSIGNED (stype
) != TYPE_UNSIGNED (optype
))
4629 ntype
= TYPE_UNSIGNED (stype
) ? unsigned_type_for (optype
)
4630 : signed_type_for (optype
);
4631 highpart2
= build_and_insert_cast (gsi
, loc
, ntype
, highpart1
);
4634 highpart2
= build_and_insert_binop (gsi
, loc
, "highparttmp",
4635 RSHIFT_EXPR
, highpart2
,
4636 build_int_cst (ntype
, bits
- prec
));
4638 gassign
*new_stmt
= gimple_build_assign (lhs
, NOP_EXPR
, highpart2
);
4639 gsi_replace (gsi
, new_stmt
, true);
4641 widen_mul_stats
.highpart_mults_inserted
++;
4645 /* If target has spaceship<MODE>3 expander, pattern recognize
4646 <bb 2> [local count: 1073741824]:
4647 if (a_2(D) == b_3(D))
4648 goto <bb 6>; [34.00%]
4650 goto <bb 3>; [66.00%]
4652 <bb 3> [local count: 708669601]:
4653 if (a_2(D) < b_3(D))
4654 goto <bb 6>; [1.04%]
4656 goto <bb 4>; [98.96%]
4658 <bb 4> [local count: 701299439]:
4659 if (a_2(D) > b_3(D))
4660 goto <bb 5>; [48.89%]
4662 goto <bb 6>; [51.11%]
4664 <bb 5> [local count: 342865295]:
4666 <bb 6> [local count: 1073741824]:
4668 <bb 2> [local count: 1073741824]:
4669 _1 = .SPACESHIP (a_2(D), b_3(D));
4671 goto <bb 6>; [34.00%]
4673 goto <bb 3>; [66.00%]
4675 <bb 3> [local count: 708669601]:
4677 goto <bb 6>; [1.04%]
4679 goto <bb 4>; [98.96%]
4681 <bb 4> [local count: 701299439]:
4683 goto <bb 5>; [48.89%]
4685 goto <bb 6>; [51.11%]
4687 <bb 5> [local count: 342865295]:
4689 <bb 6> [local count: 1073741824]:
4690 so that the backend can emit optimal comparison and
4691 conditional jump sequence. */
4694 optimize_spaceship (gimple
*stmt
)
4696 enum tree_code code
= gimple_cond_code (stmt
);
4697 if (code
!= EQ_EXPR
&& code
!= NE_EXPR
)
4699 tree arg1
= gimple_cond_lhs (stmt
);
4700 tree arg2
= gimple_cond_rhs (stmt
);
4701 if (!SCALAR_FLOAT_TYPE_P (TREE_TYPE (arg1
))
4702 || optab_handler (spaceship_optab
,
4703 TYPE_MODE (TREE_TYPE (arg1
))) == CODE_FOR_nothing
4704 || operand_equal_p (arg1
, arg2
, 0))
4707 basic_block bb0
= gimple_bb (stmt
), bb1
, bb2
= NULL
;
4708 edge em1
= NULL
, e1
= NULL
, e2
= NULL
;
4709 bb1
= EDGE_SUCC (bb0
, 1)->dest
;
4710 if (((EDGE_SUCC (bb0
, 0)->flags
& EDGE_TRUE_VALUE
) != 0) ^ (code
== EQ_EXPR
))
4711 bb1
= EDGE_SUCC (bb0
, 0)->dest
;
4713 gimple
*g
= last_stmt (bb1
);
4715 || gimple_code (g
) != GIMPLE_COND
4716 || !single_pred_p (bb1
)
4717 || (operand_equal_p (gimple_cond_lhs (g
), arg1
, 0)
4718 ? !operand_equal_p (gimple_cond_rhs (g
), arg2
, 0)
4719 : (!operand_equal_p (gimple_cond_lhs (g
), arg2
, 0)
4720 || !operand_equal_p (gimple_cond_rhs (g
), arg1
, 0)))
4721 || !cond_only_block_p (bb1
))
4724 enum tree_code ccode
= (operand_equal_p (gimple_cond_lhs (g
), arg1
, 0)
4725 ? LT_EXPR
: GT_EXPR
);
4726 switch (gimple_cond_code (g
))
4733 ccode
= ccode
== LT_EXPR
? GT_EXPR
: LT_EXPR
;
4739 for (int i
= 0; i
< 2; ++i
)
4741 /* With NaNs, </<=/>/>= are false, so we need to look for the
4742 third comparison on the false edge from whatever non-equality
4743 comparison the second comparison is. */
4744 if (HONOR_NANS (TREE_TYPE (arg1
))
4745 && (EDGE_SUCC (bb1
, i
)->flags
& EDGE_TRUE_VALUE
) != 0)
4748 bb2
= EDGE_SUCC (bb1
, i
)->dest
;
4749 g
= last_stmt (bb2
);
4751 || gimple_code (g
) != GIMPLE_COND
4752 || !single_pred_p (bb2
)
4753 || (operand_equal_p (gimple_cond_lhs (g
), arg1
, 0)
4754 ? !operand_equal_p (gimple_cond_rhs (g
), arg2
, 0)
4755 : (!operand_equal_p (gimple_cond_lhs (g
), arg2
, 0)
4756 || !operand_equal_p (gimple_cond_rhs (g
), arg1
, 0)))
4757 || !cond_only_block_p (bb2
)
4758 || EDGE_SUCC (bb2
, 0)->dest
== EDGE_SUCC (bb2
, 1)->dest
)
4761 enum tree_code ccode2
4762 = (operand_equal_p (gimple_cond_lhs (g
), arg1
, 0) ? LT_EXPR
: GT_EXPR
);
4763 switch (gimple_cond_code (g
))
4770 ccode2
= ccode2
== LT_EXPR
? GT_EXPR
: LT_EXPR
;
4775 if (HONOR_NANS (TREE_TYPE (arg1
)) && ccode
== ccode2
)
4778 if ((ccode
== LT_EXPR
)
4779 ^ ((EDGE_SUCC (bb1
, i
)->flags
& EDGE_TRUE_VALUE
) != 0))
4781 em1
= EDGE_SUCC (bb1
, 1 - i
);
4782 e1
= EDGE_SUCC (bb2
, 0);
4783 e2
= EDGE_SUCC (bb2
, 1);
4784 if ((ccode2
== LT_EXPR
) ^ ((e1
->flags
& EDGE_TRUE_VALUE
) == 0))
4789 e1
= EDGE_SUCC (bb1
, 1 - i
);
4790 em1
= EDGE_SUCC (bb2
, 0);
4791 e2
= EDGE_SUCC (bb2
, 1);
4792 if ((ccode2
!= LT_EXPR
) ^ ((em1
->flags
& EDGE_TRUE_VALUE
) == 0))
4793 std::swap (em1
, e2
);
4800 if ((ccode
== LT_EXPR
)
4801 ^ ((EDGE_SUCC (bb1
, 0)->flags
& EDGE_TRUE_VALUE
) != 0))
4803 em1
= EDGE_SUCC (bb1
, 1);
4804 e1
= EDGE_SUCC (bb1
, 0);
4805 e2
= (e1
->flags
& EDGE_TRUE_VALUE
) ? em1
: e1
;
4809 em1
= EDGE_SUCC (bb1
, 0);
4810 e1
= EDGE_SUCC (bb1
, 1);
4811 e2
= (e1
->flags
& EDGE_TRUE_VALUE
) ? em1
: e1
;
4815 g
= gimple_build_call_internal (IFN_SPACESHIP
, 2, arg1
, arg2
);
4816 tree lhs
= make_ssa_name (integer_type_node
);
4817 gimple_call_set_lhs (g
, lhs
);
4818 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
4819 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
4821 gcond
*cond
= as_a
<gcond
*> (stmt
);
4822 gimple_cond_set_lhs (cond
, lhs
);
4823 gimple_cond_set_rhs (cond
, integer_zero_node
);
4826 g
= last_stmt (bb1
);
4827 cond
= as_a
<gcond
*> (g
);
4828 gimple_cond_set_lhs (cond
, lhs
);
4829 if (em1
->src
== bb1
&& e2
!= em1
)
4831 gimple_cond_set_rhs (cond
, integer_minus_one_node
);
4832 gimple_cond_set_code (cond
, (em1
->flags
& EDGE_TRUE_VALUE
)
4833 ? EQ_EXPR
: NE_EXPR
);
4837 gcc_assert (e1
->src
== bb1
&& e2
!= e1
);
4838 gimple_cond_set_rhs (cond
, integer_one_node
);
4839 gimple_cond_set_code (cond
, (e1
->flags
& EDGE_TRUE_VALUE
)
4840 ? EQ_EXPR
: NE_EXPR
);
4844 if (e2
!= e1
&& e2
!= em1
)
4846 g
= last_stmt (bb2
);
4847 cond
= as_a
<gcond
*> (g
);
4848 gimple_cond_set_lhs (cond
, lhs
);
4849 if (em1
->src
== bb2
)
4850 gimple_cond_set_rhs (cond
, integer_minus_one_node
);
4853 gcc_assert (e1
->src
== bb2
);
4854 gimple_cond_set_rhs (cond
, integer_one_node
);
4856 gimple_cond_set_code (cond
,
4857 (e2
->flags
& EDGE_TRUE_VALUE
) ? NE_EXPR
: EQ_EXPR
);
4861 wide_int wm1
= wi::minus_one (TYPE_PRECISION (integer_type_node
));
4862 wide_int w2
= wi::two (TYPE_PRECISION (integer_type_node
));
4863 value_range
vr (TREE_TYPE (lhs
), wm1
, w2
);
4864 set_range_info (lhs
, vr
);
4868 /* Find integer multiplications where the operands are extended from
4869 smaller types, and replace the MULT_EXPR with a WIDEN_MULT_EXPR
4870 or MULT_HIGHPART_EXPR where appropriate. */
4874 const pass_data pass_data_optimize_widening_mul
=
4876 GIMPLE_PASS
, /* type */
4877 "widening_mul", /* name */
4878 OPTGROUP_NONE
, /* optinfo_flags */
4879 TV_TREE_WIDEN_MUL
, /* tv_id */
4880 PROP_ssa
, /* properties_required */
4881 0, /* properties_provided */
4882 0, /* properties_destroyed */
4883 0, /* todo_flags_start */
4884 TODO_update_ssa
, /* todo_flags_finish */
4887 class pass_optimize_widening_mul
: public gimple_opt_pass
4890 pass_optimize_widening_mul (gcc::context
*ctxt
)
4891 : gimple_opt_pass (pass_data_optimize_widening_mul
, ctxt
)
4894 /* opt_pass methods: */
4895 virtual bool gate (function
*)
4897 return flag_expensive_optimizations
&& optimize
;
4900 virtual unsigned int execute (function
*);
4902 }; // class pass_optimize_widening_mul
4904 /* Walker class to perform the transformation in reverse dominance order. */
4906 class math_opts_dom_walker
: public dom_walker
4909 /* Constructor, CFG_CHANGED is a pointer to a boolean flag that will be set
4910 if walking modidifes the CFG. */
4912 math_opts_dom_walker (bool *cfg_changed_p
)
4913 : dom_walker (CDI_DOMINATORS
), m_last_result_set (),
4914 m_cfg_changed_p (cfg_changed_p
) {}
4916 /* The actual actions performed in the walk. */
4918 virtual void after_dom_children (basic_block
);
4920 /* Set of results of chains of multiply and add statement combinations that
4921 were not transformed into FMAs because of active deferring. */
4922 hash_set
<tree
> m_last_result_set
;
4924 /* Pointer to a flag of the user that needs to be set if CFG has been
4926 bool *m_cfg_changed_p
;
4930 math_opts_dom_walker::after_dom_children (basic_block bb
)
4932 gimple_stmt_iterator gsi
;
4934 fma_deferring_state
fma_state (param_avoid_fma_max_bits
> 0);
4936 for (gsi
= gsi_after_labels (bb
); !gsi_end_p (gsi
);)
4938 gimple
*stmt
= gsi_stmt (gsi
);
4939 enum tree_code code
;
4941 if (is_gimple_assign (stmt
))
4943 code
= gimple_assign_rhs_code (stmt
);
4947 if (!convert_mult_to_widen (stmt
, &gsi
)
4948 && !convert_expand_mult_copysign (stmt
, &gsi
)
4949 && convert_mult_to_fma (stmt
,
4950 gimple_assign_rhs1 (stmt
),
4951 gimple_assign_rhs2 (stmt
),
4954 gsi_remove (&gsi
, true);
4955 release_defs (stmt
);
4958 match_arith_overflow (&gsi
, stmt
, code
, m_cfg_changed_p
);
4963 if (!convert_plusminus_to_widen (&gsi
, stmt
, code
))
4964 match_arith_overflow (&gsi
, stmt
, code
, m_cfg_changed_p
);
4968 if (match_arith_overflow (&gsi
, stmt
, code
, m_cfg_changed_p
))
4972 case TRUNC_MOD_EXPR
:
4973 convert_to_divmod (as_a
<gassign
*> (stmt
));
4977 convert_mult_to_highpart (as_a
<gassign
*> (stmt
), &gsi
);
4983 else if (is_gimple_call (stmt
))
4985 switch (gimple_call_combined_fn (stmt
))
4988 if (gimple_call_lhs (stmt
)
4989 && TREE_CODE (gimple_call_arg (stmt
, 1)) == REAL_CST
4990 && real_equal (&TREE_REAL_CST (gimple_call_arg (stmt
, 1)),
4992 && convert_mult_to_fma (stmt
,
4993 gimple_call_arg (stmt
, 0),
4994 gimple_call_arg (stmt
, 0),
4997 unlink_stmt_vdef (stmt
);
4998 if (gsi_remove (&gsi
, true)
4999 && gimple_purge_dead_eh_edges (bb
))
5000 *m_cfg_changed_p
= true;
5001 release_defs (stmt
);
5007 if (convert_mult_to_fma (stmt
,
5008 gimple_call_arg (stmt
, 1),
5009 gimple_call_arg (stmt
, 2),
5011 gimple_call_arg (stmt
, 0)))
5014 gsi_remove (&gsi
, true);
5015 release_defs (stmt
);
5021 cancel_fma_deferring (&fma_state
);
5028 else if (gimple_code (stmt
) == GIMPLE_COND
)
5029 optimize_spaceship (stmt
);
5032 if (fma_state
.m_deferring_p
5033 && fma_state
.m_initial_phi
)
5035 gcc_checking_assert (fma_state
.m_last_result
);
5036 if (!last_fma_candidate_feeds_initial_phi (&fma_state
,
5037 &m_last_result_set
))
5038 cancel_fma_deferring (&fma_state
);
5040 m_last_result_set
.add (fma_state
.m_last_result
);
5046 pass_optimize_widening_mul::execute (function
*fun
)
5048 bool cfg_changed
= false;
5050 memset (&widen_mul_stats
, 0, sizeof (widen_mul_stats
));
5051 calculate_dominance_info (CDI_DOMINATORS
);
5052 renumber_gimple_stmt_uids (cfun
);
5054 math_opts_dom_walker (&cfg_changed
).walk (ENTRY_BLOCK_PTR_FOR_FN (cfun
));
5056 statistics_counter_event (fun
, "widening multiplications inserted",
5057 widen_mul_stats
.widen_mults_inserted
);
5058 statistics_counter_event (fun
, "widening maccs inserted",
5059 widen_mul_stats
.maccs_inserted
);
5060 statistics_counter_event (fun
, "fused multiply-adds inserted",
5061 widen_mul_stats
.fmas_inserted
);
5062 statistics_counter_event (fun
, "divmod calls inserted",
5063 widen_mul_stats
.divmod_calls_inserted
);
5064 statistics_counter_event (fun
, "highpart multiplications inserted",
5065 widen_mul_stats
.highpart_mults_inserted
);
5067 return cfg_changed
? TODO_cleanup_cfg
: 0;
5073 make_pass_optimize_widening_mul (gcc::context
*ctxt
)
5075 return new pass_optimize_widening_mul (ctxt
);