1 /* Global, SSA-based optimizations using mathematical identities.
2 Copyright (C) 2005-2021 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* Currently, the only mini-pass in this file tries to CSE reciprocal
21 operations. These are common in sequences such as this one:
23 modulus = sqrt(x*x + y*y + z*z);
28 that can be optimized to
30 modulus = sqrt(x*x + y*y + z*z);
31 rmodulus = 1.0 / modulus;
36 We do this for loop invariant divisors, and with this pass whenever
37 we notice that a division has the same divisor multiple times.
39 Of course, like in PRE, we don't insert a division if a dominator
40 already has one. However, this cannot be done as an extension of
41 PRE for several reasons.
43 First of all, with some experiments it was found out that the
44 transformation is not always useful if there are only two divisions
45 by the same divisor. This is probably because modern processors
46 can pipeline the divisions; on older, in-order processors it should
47 still be effective to optimize two divisions by the same number.
48 We make this a param, and it shall be called N in the remainder of
51 Second, if trapping math is active, we have less freedom on where
52 to insert divisions: we can only do so in basic blocks that already
53 contain one. (If divisions don't trap, instead, we can insert
54 divisions elsewhere, which will be in blocks that are common dominators
55 of those that have the division).
57 We really don't want to compute the reciprocal unless a division will
58 be found. To do this, we won't insert the division in a basic block
59 that has less than N divisions *post-dominating* it.
61 The algorithm constructs a subset of the dominator tree, holding the
62 blocks containing the divisions and the common dominators to them,
63 and walk it twice. The first walk is in post-order, and it annotates
64 each block with the number of divisions that post-dominate it: this
65 gives information on where divisions can be inserted profitably.
66 The second walk is in pre-order, and it inserts divisions as explained
67 above, and replaces divisions by multiplications.
69 In the best case, the cost of the pass is O(n_statements). In the
70 worst-case, the cost is due to creating the dominator tree subset,
71 with a cost of O(n_basic_blocks ^ 2); however this can only happen
72 for n_statements / n_basic_blocks statements. So, the amortized cost
73 of creating the dominator tree subset is O(n_basic_blocks) and the
74 worst-case cost of the pass is O(n_statements * n_basic_blocks).
76 More practically, the cost will be small because there are few
77 divisions, and they tend to be in the same basic block, so insert_bb
78 is called very few times.
80 If we did this using domwalk.c, an efficient implementation would have
81 to work on all the variables in a single pass, because we could not
82 work on just a subset of the dominator tree, as we do now, and the
83 cost would also be something like O(n_statements * n_basic_blocks).
84 The data structures would be more complex in order to work on all the
85 variables in a single pass. */
89 #include "coretypes.h"
96 #include "alloc-pool.h"
97 #include "tree-pass.h"
99 #include "optabs-tree.h"
100 #include "gimple-pretty-print.h"
102 #include "fold-const.h"
103 #include "gimple-fold.h"
104 #include "gimple-iterator.h"
105 #include "gimplify.h"
106 #include "gimplify-me.h"
107 #include "stor-layout.h"
108 #include "tree-cfg.h"
109 #include "tree-dfa.h"
110 #include "tree-ssa.h"
111 #include "builtins.h"
112 #include "internal-fn.h"
113 #include "case-cfn-macros.h"
114 #include "optabs-libfuncs.h"
116 #include "targhooks.h"
118 #include "tree-ssa-math-opts.h"
120 /* This structure represents one basic block that either computes a
121 division, or is a common dominator for basic block that compute a
124 /* The basic block represented by this structure. */
125 basic_block bb
= basic_block();
127 /* If non-NULL, the SSA_NAME holding the definition for a reciprocal
129 tree recip_def
= tree();
131 /* If non-NULL, the SSA_NAME holding the definition for a squared
132 reciprocal inserted in BB. */
133 tree square_recip_def
= tree();
135 /* If non-NULL, the GIMPLE_ASSIGN for a reciprocal computation that
136 was inserted in BB. */
137 gimple
*recip_def_stmt
= nullptr;
139 /* Pointer to a list of "struct occurrence"s for blocks dominated
141 struct occurrence
*children
= nullptr;
143 /* Pointer to the next "struct occurrence"s in the list of blocks
144 sharing a common dominator. */
145 struct occurrence
*next
= nullptr;
147 /* The number of divisions that are in BB before compute_merit. The
148 number of divisions that are in BB or post-dominate it after
150 int num_divisions
= 0;
152 /* True if the basic block has a division, false if it is a common
153 dominator for basic blocks that do. If it is false and trapping
154 math is active, BB is not a candidate for inserting a reciprocal. */
155 bool bb_has_division
= false;
157 /* Construct a struct occurrence for basic block BB, and whose
158 children list is headed by CHILDREN. */
159 occurrence (basic_block bb
, struct occurrence
*children
)
160 : bb (bb
), children (children
)
165 /* Destroy a struct occurrence and remove it from its basic block. */
171 /* Allocate memory for a struct occurrence from OCC_POOL. */
172 static void* operator new (size_t);
174 /* Return memory for a struct occurrence to OCC_POOL. */
175 static void operator delete (void*, size_t);
180 /* Number of 1.0/X ops inserted. */
183 /* Number of 1.0/FUNC ops inserted. */
189 /* Number of cexpi calls inserted. */
192 /* Number of conversions removed. */
199 /* Number of widening multiplication ops inserted. */
200 int widen_mults_inserted
;
202 /* Number of integer multiply-and-accumulate ops inserted. */
205 /* Number of fp fused multiply-add ops inserted. */
208 /* Number of divmod calls inserted. */
209 int divmod_calls_inserted
;
212 /* The instance of "struct occurrence" representing the highest
213 interesting block in the dominator tree. */
214 static struct occurrence
*occ_head
;
216 /* Allocation pool for getting instances of "struct occurrence". */
217 static object_allocator
<occurrence
> *occ_pool
;
219 void* occurrence::operator new (size_t n
)
221 gcc_assert (n
== sizeof(occurrence
));
222 return occ_pool
->allocate_raw ();
225 void occurrence::operator delete (void *occ
, size_t n
)
227 gcc_assert (n
== sizeof(occurrence
));
228 occ_pool
->remove_raw (occ
);
231 /* Insert NEW_OCC into our subset of the dominator tree. P_HEAD points to a
232 list of "struct occurrence"s, one per basic block, having IDOM as
233 their common dominator.
235 We try to insert NEW_OCC as deep as possible in the tree, and we also
236 insert any other block that is a common dominator for BB and one
237 block already in the tree. */
240 insert_bb (struct occurrence
*new_occ
, basic_block idom
,
241 struct occurrence
**p_head
)
243 struct occurrence
*occ
, **p_occ
;
245 for (p_occ
= p_head
; (occ
= *p_occ
) != NULL
; )
247 basic_block bb
= new_occ
->bb
, occ_bb
= occ
->bb
;
248 basic_block dom
= nearest_common_dominator (CDI_DOMINATORS
, occ_bb
, bb
);
251 /* BB dominates OCC_BB. OCC becomes NEW_OCC's child: remove OCC
254 occ
->next
= new_occ
->children
;
255 new_occ
->children
= occ
;
257 /* Try the next block (it may as well be dominated by BB). */
260 else if (dom
== occ_bb
)
262 /* OCC_BB dominates BB. Tail recurse to look deeper. */
263 insert_bb (new_occ
, dom
, &occ
->children
);
267 else if (dom
!= idom
)
269 gcc_assert (!dom
->aux
);
271 /* There is a dominator between IDOM and BB, add it and make
272 two children out of NEW_OCC and OCC. First, remove OCC from
278 /* None of the previous blocks has DOM as a dominator: if we tail
279 recursed, we would reexamine them uselessly. Just switch BB with
280 DOM, and go on looking for blocks dominated by DOM. */
281 new_occ
= new occurrence (dom
, new_occ
);
286 /* Nothing special, go on with the next element. */
291 /* No place was found as a child of IDOM. Make BB a sibling of IDOM. */
292 new_occ
->next
= *p_head
;
296 /* Register that we found a division in BB.
297 IMPORTANCE is a measure of how much weighting to give
298 that division. Use IMPORTANCE = 2 to register a single
299 division. If the division is going to be found multiple
300 times use 1 (as it is with squares). */
303 register_division_in (basic_block bb
, int importance
)
305 struct occurrence
*occ
;
307 occ
= (struct occurrence
*) bb
->aux
;
310 occ
= new occurrence (bb
, NULL
);
311 insert_bb (occ
, ENTRY_BLOCK_PTR_FOR_FN (cfun
), &occ_head
);
314 occ
->bb_has_division
= true;
315 occ
->num_divisions
+= importance
;
319 /* Compute the number of divisions that postdominate each block in OCC and
323 compute_merit (struct occurrence
*occ
)
325 struct occurrence
*occ_child
;
326 basic_block dom
= occ
->bb
;
328 for (occ_child
= occ
->children
; occ_child
; occ_child
= occ_child
->next
)
331 if (occ_child
->children
)
332 compute_merit (occ_child
);
335 bb
= single_noncomplex_succ (dom
);
339 if (dominated_by_p (CDI_POST_DOMINATORS
, bb
, occ_child
->bb
))
340 occ
->num_divisions
+= occ_child
->num_divisions
;
345 /* Return whether USE_STMT is a floating-point division by DEF. */
347 is_division_by (gimple
*use_stmt
, tree def
)
349 return is_gimple_assign (use_stmt
)
350 && gimple_assign_rhs_code (use_stmt
) == RDIV_EXPR
351 && gimple_assign_rhs2 (use_stmt
) == def
352 /* Do not recognize x / x as valid division, as we are getting
353 confused later by replacing all immediate uses x in such
355 && gimple_assign_rhs1 (use_stmt
) != def
356 && !stmt_can_throw_internal (cfun
, use_stmt
);
359 /* Return TRUE if USE_STMT is a multiplication of DEF by A. */
361 is_mult_by (gimple
*use_stmt
, tree def
, tree a
)
363 if (gimple_code (use_stmt
) == GIMPLE_ASSIGN
364 && gimple_assign_rhs_code (use_stmt
) == MULT_EXPR
)
366 tree op0
= gimple_assign_rhs1 (use_stmt
);
367 tree op1
= gimple_assign_rhs2 (use_stmt
);
369 return (op0
== def
&& op1
== a
)
370 || (op0
== a
&& op1
== def
);
375 /* Return whether USE_STMT is DEF * DEF. */
377 is_square_of (gimple
*use_stmt
, tree def
)
379 return is_mult_by (use_stmt
, def
, def
);
382 /* Return whether USE_STMT is a floating-point division by
385 is_division_by_square (gimple
*use_stmt
, tree def
)
387 if (gimple_code (use_stmt
) == GIMPLE_ASSIGN
388 && gimple_assign_rhs_code (use_stmt
) == RDIV_EXPR
389 && gimple_assign_rhs1 (use_stmt
) != gimple_assign_rhs2 (use_stmt
)
390 && !stmt_can_throw_internal (cfun
, use_stmt
))
392 tree denominator
= gimple_assign_rhs2 (use_stmt
);
393 if (TREE_CODE (denominator
) == SSA_NAME
)
394 return is_square_of (SSA_NAME_DEF_STMT (denominator
), def
);
399 /* Walk the subset of the dominator tree rooted at OCC, setting the
400 RECIP_DEF field to a definition of 1.0 / DEF that can be used in
401 the given basic block. The field may be left NULL, of course,
402 if it is not possible or profitable to do the optimization.
404 DEF_BSI is an iterator pointing at the statement defining DEF.
405 If RECIP_DEF is set, a dominator already has a computation that can
408 If should_insert_square_recip is set, then this also inserts
409 the square of the reciprocal immediately after the definition
410 of the reciprocal. */
413 insert_reciprocals (gimple_stmt_iterator
*def_gsi
, struct occurrence
*occ
,
414 tree def
, tree recip_def
, tree square_recip_def
,
415 int should_insert_square_recip
, int threshold
)
418 gassign
*new_stmt
, *new_square_stmt
;
419 gimple_stmt_iterator gsi
;
420 struct occurrence
*occ_child
;
423 && (occ
->bb_has_division
|| !flag_trapping_math
)
424 /* Divide by two as all divisions are counted twice in
426 && occ
->num_divisions
/ 2 >= threshold
)
428 /* Make a variable with the replacement and substitute it. */
429 type
= TREE_TYPE (def
);
430 recip_def
= create_tmp_reg (type
, "reciptmp");
431 new_stmt
= gimple_build_assign (recip_def
, RDIV_EXPR
,
432 build_one_cst (type
), def
);
434 if (should_insert_square_recip
)
436 square_recip_def
= create_tmp_reg (type
, "powmult_reciptmp");
437 new_square_stmt
= gimple_build_assign (square_recip_def
, MULT_EXPR
,
438 recip_def
, recip_def
);
441 if (occ
->bb_has_division
)
443 /* Case 1: insert before an existing division. */
444 gsi
= gsi_after_labels (occ
->bb
);
445 while (!gsi_end_p (gsi
)
446 && (!is_division_by (gsi_stmt (gsi
), def
))
447 && (!is_division_by_square (gsi_stmt (gsi
), def
)))
450 gsi_insert_before (&gsi
, new_stmt
, GSI_SAME_STMT
);
451 if (should_insert_square_recip
)
452 gsi_insert_before (&gsi
, new_square_stmt
, GSI_SAME_STMT
);
454 else if (def_gsi
&& occ
->bb
== gsi_bb (*def_gsi
))
456 /* Case 2: insert right after the definition. Note that this will
457 never happen if the definition statement can throw, because in
458 that case the sole successor of the statement's basic block will
459 dominate all the uses as well. */
460 gsi_insert_after (def_gsi
, new_stmt
, GSI_NEW_STMT
);
461 if (should_insert_square_recip
)
462 gsi_insert_after (def_gsi
, new_square_stmt
, GSI_NEW_STMT
);
466 /* Case 3: insert in a basic block not containing defs/uses. */
467 gsi
= gsi_after_labels (occ
->bb
);
468 gsi_insert_before (&gsi
, new_stmt
, GSI_SAME_STMT
);
469 if (should_insert_square_recip
)
470 gsi_insert_before (&gsi
, new_square_stmt
, GSI_SAME_STMT
);
473 reciprocal_stats
.rdivs_inserted
++;
475 occ
->recip_def_stmt
= new_stmt
;
478 occ
->recip_def
= recip_def
;
479 occ
->square_recip_def
= square_recip_def
;
480 for (occ_child
= occ
->children
; occ_child
; occ_child
= occ_child
->next
)
481 insert_reciprocals (def_gsi
, occ_child
, def
, recip_def
,
482 square_recip_def
, should_insert_square_recip
,
486 /* Replace occurrences of expr / (x * x) with expr * ((1 / x) * (1 / x)).
487 Take as argument the use for (x * x). */
489 replace_reciprocal_squares (use_operand_p use_p
)
491 gimple
*use_stmt
= USE_STMT (use_p
);
492 basic_block bb
= gimple_bb (use_stmt
);
493 struct occurrence
*occ
= (struct occurrence
*) bb
->aux
;
495 if (optimize_bb_for_speed_p (bb
) && occ
->square_recip_def
498 gimple_stmt_iterator gsi
= gsi_for_stmt (use_stmt
);
499 gimple_assign_set_rhs_code (use_stmt
, MULT_EXPR
);
500 gimple_assign_set_rhs2 (use_stmt
, occ
->square_recip_def
);
501 SET_USE (use_p
, occ
->square_recip_def
);
502 fold_stmt_inplace (&gsi
);
503 update_stmt (use_stmt
);
508 /* Replace the division at USE_P with a multiplication by the reciprocal, if
512 replace_reciprocal (use_operand_p use_p
)
514 gimple
*use_stmt
= USE_STMT (use_p
);
515 basic_block bb
= gimple_bb (use_stmt
);
516 struct occurrence
*occ
= (struct occurrence
*) bb
->aux
;
518 if (optimize_bb_for_speed_p (bb
)
519 && occ
->recip_def
&& use_stmt
!= occ
->recip_def_stmt
)
521 gimple_stmt_iterator gsi
= gsi_for_stmt (use_stmt
);
522 gimple_assign_set_rhs_code (use_stmt
, MULT_EXPR
);
523 SET_USE (use_p
, occ
->recip_def
);
524 fold_stmt_inplace (&gsi
);
525 update_stmt (use_stmt
);
530 /* Free OCC and return one more "struct occurrence" to be freed. */
532 static struct occurrence
*
533 free_bb (struct occurrence
*occ
)
535 struct occurrence
*child
, *next
;
537 /* First get the two pointers hanging off OCC. */
539 child
= occ
->children
;
542 /* Now ensure that we don't recurse unless it is necessary. */
548 next
= free_bb (next
);
554 /* Transform sequences like
564 depending on the uses of x, r1, r2. This removes one multiplication and
565 allows the sqrt and division operations to execute in parallel.
566 DEF_GSI is the gsi of the initial division by sqrt that defines
567 DEF (x in the example above). */
570 optimize_recip_sqrt (gimple_stmt_iterator
*def_gsi
, tree def
)
573 imm_use_iterator use_iter
;
574 gimple
*stmt
= gsi_stmt (*def_gsi
);
576 tree orig_sqrt_ssa_name
= gimple_assign_rhs2 (stmt
);
577 tree div_rhs1
= gimple_assign_rhs1 (stmt
);
579 if (TREE_CODE (orig_sqrt_ssa_name
) != SSA_NAME
580 || TREE_CODE (div_rhs1
) != REAL_CST
581 || !real_equal (&TREE_REAL_CST (div_rhs1
), &dconst1
))
585 = dyn_cast
<gcall
*> (SSA_NAME_DEF_STMT (orig_sqrt_ssa_name
));
587 if (!sqrt_stmt
|| !gimple_call_lhs (sqrt_stmt
))
590 switch (gimple_call_combined_fn (sqrt_stmt
))
599 tree a
= gimple_call_arg (sqrt_stmt
, 0);
601 /* We have 'a' and 'x'. Now analyze the uses of 'x'. */
603 /* Statements that use x in x * x. */
604 auto_vec
<gimple
*> sqr_stmts
;
605 /* Statements that use x in a * x. */
606 auto_vec
<gimple
*> mult_stmts
;
607 bool has_other_use
= false;
608 bool mult_on_main_path
= false;
610 FOR_EACH_IMM_USE_STMT (use_stmt
, use_iter
, x
)
612 if (is_gimple_debug (use_stmt
))
614 if (is_square_of (use_stmt
, x
))
616 sqr_stmts
.safe_push (use_stmt
);
617 if (gimple_bb (use_stmt
) == gimple_bb (stmt
))
618 mult_on_main_path
= true;
620 else if (is_mult_by (use_stmt
, x
, a
))
622 mult_stmts
.safe_push (use_stmt
);
623 if (gimple_bb (use_stmt
) == gimple_bb (stmt
))
624 mult_on_main_path
= true;
627 has_other_use
= true;
630 /* In the x * x and a * x cases we just rewire stmt operands or
631 remove multiplications. In the has_other_use case we introduce
632 a multiplication so make sure we don't introduce a multiplication
633 on a path where there was none. */
634 if (has_other_use
&& !mult_on_main_path
)
637 if (sqr_stmts
.is_empty () && mult_stmts
.is_empty ())
640 /* If x = 1.0 / sqrt (a) has uses other than those optimized here we want
641 to be able to compose it from the sqr and mult cases. */
642 if (has_other_use
&& (sqr_stmts
.is_empty () || mult_stmts
.is_empty ()))
647 fprintf (dump_file
, "Optimizing reciprocal sqrt multiplications of\n");
648 print_gimple_stmt (dump_file
, sqrt_stmt
, 0, TDF_NONE
);
649 print_gimple_stmt (dump_file
, stmt
, 0, TDF_NONE
);
650 fprintf (dump_file
, "\n");
653 bool delete_div
= !has_other_use
;
654 tree sqr_ssa_name
= NULL_TREE
;
655 if (!sqr_stmts
.is_empty ())
657 /* r1 = x * x. Transform the original
664 = make_temp_ssa_name (TREE_TYPE (a
), NULL
, "recip_sqrt_sqr");
668 fprintf (dump_file
, "Replacing original division\n");
669 print_gimple_stmt (dump_file
, stmt
, 0, TDF_NONE
);
670 fprintf (dump_file
, "with new division\n");
673 = gimple_build_assign (sqr_ssa_name
, gimple_assign_rhs_code (stmt
),
674 gimple_assign_rhs1 (stmt
), a
);
675 gsi_insert_before (def_gsi
, stmt
, GSI_SAME_STMT
);
676 gsi_remove (def_gsi
, true);
677 *def_gsi
= gsi_for_stmt (stmt
);
678 fold_stmt_inplace (def_gsi
);
682 print_gimple_stmt (dump_file
, stmt
, 0, TDF_NONE
);
687 FOR_EACH_VEC_ELT (sqr_stmts
, i
, sqr_stmt
)
689 gimple_stmt_iterator gsi2
= gsi_for_stmt (sqr_stmt
);
690 gimple_assign_set_rhs_from_tree (&gsi2
, sqr_ssa_name
);
691 update_stmt (sqr_stmt
);
694 if (!mult_stmts
.is_empty ())
696 /* r2 = a * x. Transform this into:
697 r2 = t (The original sqrt (a)). */
699 gimple
*mult_stmt
= NULL
;
700 FOR_EACH_VEC_ELT (mult_stmts
, i
, mult_stmt
)
702 gimple_stmt_iterator gsi2
= gsi_for_stmt (mult_stmt
);
706 fprintf (dump_file
, "Replacing squaring multiplication\n");
707 print_gimple_stmt (dump_file
, mult_stmt
, 0, TDF_NONE
);
708 fprintf (dump_file
, "with assignment\n");
710 gimple_assign_set_rhs_from_tree (&gsi2
, orig_sqrt_ssa_name
);
711 fold_stmt_inplace (&gsi2
);
712 update_stmt (mult_stmt
);
714 print_gimple_stmt (dump_file
, mult_stmt
, 0, TDF_NONE
);
720 /* Using the two temporaries tmp1, tmp2 from above
721 the original x is now:
723 gcc_assert (orig_sqrt_ssa_name
);
724 gcc_assert (sqr_ssa_name
);
727 = gimple_build_assign (x
, MULT_EXPR
,
728 orig_sqrt_ssa_name
, sqr_ssa_name
);
729 gsi_insert_after (def_gsi
, new_stmt
, GSI_NEW_STMT
);
734 /* Remove the original division. */
735 gimple_stmt_iterator gsi2
= gsi_for_stmt (stmt
);
736 gsi_remove (&gsi2
, true);
740 release_ssa_name (x
);
743 /* Look for floating-point divisions among DEF's uses, and try to
744 replace them by multiplications with the reciprocal. Add
745 as many statements computing the reciprocal as needed.
747 DEF must be a GIMPLE register of a floating-point type. */
750 execute_cse_reciprocals_1 (gimple_stmt_iterator
*def_gsi
, tree def
)
752 use_operand_p use_p
, square_use_p
;
753 imm_use_iterator use_iter
, square_use_iter
;
755 struct occurrence
*occ
;
758 int square_recip_count
= 0;
759 int sqrt_recip_count
= 0;
761 gcc_assert (FLOAT_TYPE_P (TREE_TYPE (def
)) && TREE_CODE (def
) == SSA_NAME
);
762 threshold
= targetm
.min_divisions_for_recip_mul (TYPE_MODE (TREE_TYPE (def
)));
764 /* If DEF is a square (x * x), count the number of divisions by x.
765 If there are more divisions by x than by (DEF * DEF), prefer to optimize
766 the reciprocal of x instead of DEF. This improves cases like:
771 Reciprocal optimization of x results in 1 division rather than 2 or 3. */
772 gimple
*def_stmt
= SSA_NAME_DEF_STMT (def
);
774 if (is_gimple_assign (def_stmt
)
775 && gimple_assign_rhs_code (def_stmt
) == MULT_EXPR
776 && TREE_CODE (gimple_assign_rhs1 (def_stmt
)) == SSA_NAME
777 && gimple_assign_rhs1 (def_stmt
) == gimple_assign_rhs2 (def_stmt
))
779 tree op0
= gimple_assign_rhs1 (def_stmt
);
781 FOR_EACH_IMM_USE_FAST (use_p
, use_iter
, op0
)
783 gimple
*use_stmt
= USE_STMT (use_p
);
784 if (is_division_by (use_stmt
, op0
))
789 FOR_EACH_IMM_USE_FAST (use_p
, use_iter
, def
)
791 gimple
*use_stmt
= USE_STMT (use_p
);
792 if (is_division_by (use_stmt
, def
))
794 register_division_in (gimple_bb (use_stmt
), 2);
798 if (is_square_of (use_stmt
, def
))
800 square_def
= gimple_assign_lhs (use_stmt
);
801 FOR_EACH_IMM_USE_FAST (square_use_p
, square_use_iter
, square_def
)
803 gimple
*square_use_stmt
= USE_STMT (square_use_p
);
804 if (is_division_by (square_use_stmt
, square_def
))
806 /* This is executed twice for each division by a square. */
807 register_division_in (gimple_bb (square_use_stmt
), 1);
808 square_recip_count
++;
814 /* Square reciprocals were counted twice above. */
815 square_recip_count
/= 2;
817 /* If it is more profitable to optimize 1 / x, don't optimize 1 / (x * x). */
818 if (sqrt_recip_count
> square_recip_count
)
821 /* Do the expensive part only if we can hope to optimize something. */
822 if (count
+ square_recip_count
>= threshold
&& count
>= 1)
825 for (occ
= occ_head
; occ
; occ
= occ
->next
)
828 insert_reciprocals (def_gsi
, occ
, def
, NULL
, NULL
,
829 square_recip_count
, threshold
);
832 FOR_EACH_IMM_USE_STMT (use_stmt
, use_iter
, def
)
834 if (is_division_by (use_stmt
, def
))
836 FOR_EACH_IMM_USE_ON_STMT (use_p
, use_iter
)
837 replace_reciprocal (use_p
);
839 else if (square_recip_count
> 0 && is_square_of (use_stmt
, def
))
841 FOR_EACH_IMM_USE_ON_STMT (use_p
, use_iter
)
843 /* Find all uses of the square that are divisions and
844 * replace them by multiplications with the inverse. */
845 imm_use_iterator square_iterator
;
846 gimple
*powmult_use_stmt
= USE_STMT (use_p
);
847 tree powmult_def_name
= gimple_assign_lhs (powmult_use_stmt
);
849 FOR_EACH_IMM_USE_STMT (powmult_use_stmt
,
850 square_iterator
, powmult_def_name
)
851 FOR_EACH_IMM_USE_ON_STMT (square_use_p
, square_iterator
)
853 gimple
*powmult_use_stmt
= USE_STMT (square_use_p
);
854 if (is_division_by (powmult_use_stmt
, powmult_def_name
))
855 replace_reciprocal_squares (square_use_p
);
863 for (occ
= occ_head
; occ
; )
869 /* Return an internal function that implements the reciprocal of CALL,
870 or IFN_LAST if there is no such function that the target supports. */
873 internal_fn_reciprocal (gcall
*call
)
877 switch (gimple_call_combined_fn (call
))
888 tree_pair types
= direct_internal_fn_types (ifn
, call
);
889 if (!direct_internal_fn_supported_p (ifn
, types
, OPTIMIZE_FOR_SPEED
))
895 /* Go through all the floating-point SSA_NAMEs, and call
896 execute_cse_reciprocals_1 on each of them. */
899 const pass_data pass_data_cse_reciprocals
=
901 GIMPLE_PASS
, /* type */
903 OPTGROUP_NONE
, /* optinfo_flags */
904 TV_TREE_RECIP
, /* tv_id */
905 PROP_ssa
, /* properties_required */
906 0, /* properties_provided */
907 0, /* properties_destroyed */
908 0, /* todo_flags_start */
909 TODO_update_ssa
, /* todo_flags_finish */
912 class pass_cse_reciprocals
: public gimple_opt_pass
915 pass_cse_reciprocals (gcc::context
*ctxt
)
916 : gimple_opt_pass (pass_data_cse_reciprocals
, ctxt
)
919 /* opt_pass methods: */
920 virtual bool gate (function
*) { return optimize
&& flag_reciprocal_math
; }
921 virtual unsigned int execute (function
*);
923 }; // class pass_cse_reciprocals
926 pass_cse_reciprocals::execute (function
*fun
)
931 occ_pool
= new object_allocator
<occurrence
> ("dominators for recip");
933 memset (&reciprocal_stats
, 0, sizeof (reciprocal_stats
));
934 calculate_dominance_info (CDI_DOMINATORS
);
935 calculate_dominance_info (CDI_POST_DOMINATORS
);
938 FOR_EACH_BB_FN (bb
, fun
)
939 gcc_assert (!bb
->aux
);
941 for (arg
= DECL_ARGUMENTS (fun
->decl
); arg
; arg
= DECL_CHAIN (arg
))
942 if (FLOAT_TYPE_P (TREE_TYPE (arg
))
943 && is_gimple_reg (arg
))
945 tree name
= ssa_default_def (fun
, arg
);
947 execute_cse_reciprocals_1 (NULL
, name
);
950 FOR_EACH_BB_FN (bb
, fun
)
954 for (gphi_iterator gsi
= gsi_start_phis (bb
); !gsi_end_p (gsi
);
957 gphi
*phi
= gsi
.phi ();
958 def
= PHI_RESULT (phi
);
959 if (! virtual_operand_p (def
)
960 && FLOAT_TYPE_P (TREE_TYPE (def
)))
961 execute_cse_reciprocals_1 (NULL
, def
);
964 for (gimple_stmt_iterator gsi
= gsi_after_labels (bb
); !gsi_end_p (gsi
);
967 gimple
*stmt
= gsi_stmt (gsi
);
969 if (gimple_has_lhs (stmt
)
970 && (def
= SINGLE_SSA_TREE_OPERAND (stmt
, SSA_OP_DEF
)) != NULL
971 && FLOAT_TYPE_P (TREE_TYPE (def
))
972 && TREE_CODE (def
) == SSA_NAME
)
974 execute_cse_reciprocals_1 (&gsi
, def
);
975 stmt
= gsi_stmt (gsi
);
976 if (flag_unsafe_math_optimizations
977 && is_gimple_assign (stmt
)
978 && gimple_assign_lhs (stmt
) == def
979 && !stmt_can_throw_internal (cfun
, stmt
)
980 && gimple_assign_rhs_code (stmt
) == RDIV_EXPR
)
981 optimize_recip_sqrt (&gsi
, def
);
985 if (optimize_bb_for_size_p (bb
))
988 /* Scan for a/func(b) and convert it to reciprocal a*rfunc(b). */
989 for (gimple_stmt_iterator gsi
= gsi_after_labels (bb
); !gsi_end_p (gsi
);
992 gimple
*stmt
= gsi_stmt (gsi
);
994 if (is_gimple_assign (stmt
)
995 && gimple_assign_rhs_code (stmt
) == RDIV_EXPR
)
997 tree arg1
= gimple_assign_rhs2 (stmt
);
1000 if (TREE_CODE (arg1
) != SSA_NAME
)
1003 stmt1
= SSA_NAME_DEF_STMT (arg1
);
1005 if (is_gimple_call (stmt1
)
1006 && gimple_call_lhs (stmt1
))
1009 imm_use_iterator ui
;
1010 use_operand_p use_p
;
1011 tree fndecl
= NULL_TREE
;
1013 gcall
*call
= as_a
<gcall
*> (stmt1
);
1014 internal_fn ifn
= internal_fn_reciprocal (call
);
1015 if (ifn
== IFN_LAST
)
1017 fndecl
= gimple_call_fndecl (call
);
1019 || !fndecl_built_in_p (fndecl
, BUILT_IN_MD
))
1021 fndecl
= targetm
.builtin_reciprocal (fndecl
);
1026 /* Check that all uses of the SSA name are divisions,
1027 otherwise replacing the defining statement will do
1030 FOR_EACH_IMM_USE_FAST (use_p
, ui
, arg1
)
1032 gimple
*stmt2
= USE_STMT (use_p
);
1033 if (is_gimple_debug (stmt2
))
1035 if (!is_gimple_assign (stmt2
)
1036 || gimple_assign_rhs_code (stmt2
) != RDIV_EXPR
1037 || gimple_assign_rhs1 (stmt2
) == arg1
1038 || gimple_assign_rhs2 (stmt2
) != arg1
)
1047 gimple_replace_ssa_lhs (call
, arg1
);
1048 if (gimple_call_internal_p (call
) != (ifn
!= IFN_LAST
))
1050 auto_vec
<tree
, 4> args
;
1051 for (unsigned int i
= 0;
1052 i
< gimple_call_num_args (call
); i
++)
1053 args
.safe_push (gimple_call_arg (call
, i
));
1055 if (ifn
== IFN_LAST
)
1056 stmt2
= gimple_build_call_vec (fndecl
, args
);
1058 stmt2
= gimple_build_call_internal_vec (ifn
, args
);
1059 gimple_call_set_lhs (stmt2
, arg1
);
1060 gimple_move_vops (stmt2
, call
);
1061 gimple_call_set_nothrow (stmt2
,
1062 gimple_call_nothrow_p (call
));
1063 gimple_stmt_iterator gsi2
= gsi_for_stmt (call
);
1064 gsi_replace (&gsi2
, stmt2
, true);
1068 if (ifn
== IFN_LAST
)
1069 gimple_call_set_fndecl (call
, fndecl
);
1071 gimple_call_set_internal_fn (call
, ifn
);
1074 reciprocal_stats
.rfuncs_inserted
++;
1076 FOR_EACH_IMM_USE_STMT (stmt
, ui
, arg1
)
1078 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
1079 gimple_assign_set_rhs_code (stmt
, MULT_EXPR
);
1080 fold_stmt_inplace (&gsi
);
1088 statistics_counter_event (fun
, "reciprocal divs inserted",
1089 reciprocal_stats
.rdivs_inserted
);
1090 statistics_counter_event (fun
, "reciprocal functions inserted",
1091 reciprocal_stats
.rfuncs_inserted
);
1093 free_dominance_info (CDI_DOMINATORS
);
1094 free_dominance_info (CDI_POST_DOMINATORS
);
1102 make_pass_cse_reciprocals (gcc::context
*ctxt
)
1104 return new pass_cse_reciprocals (ctxt
);
1107 /* If NAME is the result of a type conversion, look for other
1108 equivalent dominating or dominated conversions, and replace all
1109 uses with the earliest dominating name, removing the redundant
1110 conversions. Return the prevailing name. */
1113 execute_cse_conv_1 (tree name
)
1115 if (SSA_NAME_IS_DEFAULT_DEF (name
)
1116 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (name
))
1119 gimple
*def_stmt
= SSA_NAME_DEF_STMT (name
);
1121 if (!gimple_assign_cast_p (def_stmt
))
1124 tree src
= gimple_assign_rhs1 (def_stmt
);
1126 if (TREE_CODE (src
) != SSA_NAME
)
1129 imm_use_iterator use_iter
;
1132 /* Find the earliest dominating def. */
1133 FOR_EACH_IMM_USE_STMT (use_stmt
, use_iter
, src
)
1135 if (use_stmt
== def_stmt
1136 || !gimple_assign_cast_p (use_stmt
))
1139 tree lhs
= gimple_assign_lhs (use_stmt
);
1141 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs
)
1142 || (gimple_assign_rhs1 (use_stmt
)
1143 != gimple_assign_rhs1 (def_stmt
))
1144 || !types_compatible_p (TREE_TYPE (name
), TREE_TYPE (lhs
)))
1148 if (gimple_bb (def_stmt
) == gimple_bb (use_stmt
))
1150 gimple_stmt_iterator gsi
= gsi_for_stmt (use_stmt
);
1151 while (!gsi_end_p (gsi
) && gsi_stmt (gsi
) != def_stmt
)
1153 use_dominates
= !gsi_end_p (gsi
);
1155 else if (dominated_by_p (CDI_DOMINATORS
, gimple_bb (use_stmt
),
1156 gimple_bb (def_stmt
)))
1157 use_dominates
= false;
1158 else if (dominated_by_p (CDI_DOMINATORS
, gimple_bb (def_stmt
),
1159 gimple_bb (use_stmt
)))
1160 use_dominates
= true;
1166 std::swap (name
, lhs
);
1167 std::swap (def_stmt
, use_stmt
);
1171 /* Now go through all uses of SRC again, replacing the equivalent
1172 dominated conversions. We may replace defs that were not
1173 dominated by the then-prevailing defs when we first visited
1175 FOR_EACH_IMM_USE_STMT (use_stmt
, use_iter
, src
)
1177 if (use_stmt
== def_stmt
1178 || !gimple_assign_cast_p (use_stmt
))
1181 tree lhs
= gimple_assign_lhs (use_stmt
);
1183 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs
)
1184 || (gimple_assign_rhs1 (use_stmt
)
1185 != gimple_assign_rhs1 (def_stmt
))
1186 || !types_compatible_p (TREE_TYPE (name
), TREE_TYPE (lhs
)))
1189 if (gimple_bb (def_stmt
) == gimple_bb (use_stmt
)
1190 || dominated_by_p (CDI_DOMINATORS
, gimple_bb (use_stmt
),
1191 gimple_bb (def_stmt
)))
1193 sincos_stats
.conv_removed
++;
1195 gimple_stmt_iterator gsi
= gsi_for_stmt (use_stmt
);
1196 replace_uses_by (lhs
, name
);
1197 gsi_remove (&gsi
, true);
1204 /* Records an occurrence at statement USE_STMT in the vector of trees
1205 STMTS if it is dominated by *TOP_BB or dominates it or this basic block
1206 is not yet initialized. Returns true if the occurrence was pushed on
1207 the vector. Adjusts *TOP_BB to be the basic block dominating all
1208 statements in the vector. */
1211 maybe_record_sincos (vec
<gimple
*> *stmts
,
1212 basic_block
*top_bb
, gimple
*use_stmt
)
1214 basic_block use_bb
= gimple_bb (use_stmt
);
1216 && (*top_bb
== use_bb
1217 || dominated_by_p (CDI_DOMINATORS
, use_bb
, *top_bb
)))
1218 stmts
->safe_push (use_stmt
);
1220 || dominated_by_p (CDI_DOMINATORS
, *top_bb
, use_bb
))
1222 stmts
->safe_push (use_stmt
);
1231 /* Look for sin, cos and cexpi calls with the same argument NAME and
1232 create a single call to cexpi CSEing the result in this case.
1233 We first walk over all immediate uses of the argument collecting
1234 statements that we can CSE in a vector and in a second pass replace
1235 the statement rhs with a REALPART or IMAGPART expression on the
1236 result of the cexpi call we insert before the use statement that
1237 dominates all other candidates. */
1240 execute_cse_sincos_1 (tree name
)
1242 gimple_stmt_iterator gsi
;
1243 imm_use_iterator use_iter
;
1244 tree fndecl
, res
, type
= NULL_TREE
;
1245 gimple
*def_stmt
, *use_stmt
, *stmt
;
1246 int seen_cos
= 0, seen_sin
= 0, seen_cexpi
= 0;
1247 auto_vec
<gimple
*> stmts
;
1248 basic_block top_bb
= NULL
;
1250 bool cfg_changed
= false;
1252 name
= execute_cse_conv_1 (name
);
1254 FOR_EACH_IMM_USE_STMT (use_stmt
, use_iter
, name
)
1256 if (gimple_code (use_stmt
) != GIMPLE_CALL
1257 || !gimple_call_lhs (use_stmt
))
1260 switch (gimple_call_combined_fn (use_stmt
))
1263 seen_cos
|= maybe_record_sincos (&stmts
, &top_bb
, use_stmt
) ? 1 : 0;
1267 seen_sin
|= maybe_record_sincos (&stmts
, &top_bb
, use_stmt
) ? 1 : 0;
1271 seen_cexpi
|= maybe_record_sincos (&stmts
, &top_bb
, use_stmt
) ? 1 : 0;
1278 tree t
= mathfn_built_in_type (gimple_call_combined_fn (use_stmt
));
1282 t
= TREE_TYPE (name
);
1284 /* This checks that NAME has the right type in the first round,
1285 and, in subsequent rounds, that the built_in type is the same
1286 type, or a compatible type. */
1287 if (type
!= t
&& !types_compatible_p (type
, t
))
1290 if (seen_cos
+ seen_sin
+ seen_cexpi
<= 1)
1293 /* Simply insert cexpi at the beginning of top_bb but not earlier than
1294 the name def statement. */
1295 fndecl
= mathfn_built_in (type
, BUILT_IN_CEXPI
);
1298 stmt
= gimple_build_call (fndecl
, 1, name
);
1299 res
= make_temp_ssa_name (TREE_TYPE (TREE_TYPE (fndecl
)), stmt
, "sincostmp");
1300 gimple_call_set_lhs (stmt
, res
);
1302 def_stmt
= SSA_NAME_DEF_STMT (name
);
1303 if (!SSA_NAME_IS_DEFAULT_DEF (name
)
1304 && gimple_code (def_stmt
) != GIMPLE_PHI
1305 && gimple_bb (def_stmt
) == top_bb
)
1307 gsi
= gsi_for_stmt (def_stmt
);
1308 gsi_insert_after (&gsi
, stmt
, GSI_SAME_STMT
);
1312 gsi
= gsi_after_labels (top_bb
);
1313 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
1315 sincos_stats
.inserted
++;
1317 /* And adjust the recorded old call sites. */
1318 for (i
= 0; stmts
.iterate (i
, &use_stmt
); ++i
)
1322 switch (gimple_call_combined_fn (use_stmt
))
1325 rhs
= fold_build1 (REALPART_EXPR
, type
, res
);
1329 rhs
= fold_build1 (IMAGPART_EXPR
, type
, res
);
1340 /* Replace call with a copy. */
1341 stmt
= gimple_build_assign (gimple_call_lhs (use_stmt
), rhs
);
1343 gsi
= gsi_for_stmt (use_stmt
);
1344 gsi_replace (&gsi
, stmt
, true);
1345 if (gimple_purge_dead_eh_edges (gimple_bb (stmt
)))
1352 /* To evaluate powi(x,n), the floating point value x raised to the
1353 constant integer exponent n, we use a hybrid algorithm that
1354 combines the "window method" with look-up tables. For an
1355 introduction to exponentiation algorithms and "addition chains",
1356 see section 4.6.3, "Evaluation of Powers" of Donald E. Knuth,
1357 "Seminumerical Algorithms", Vol. 2, "The Art of Computer Programming",
1358 3rd Edition, 1998, and Daniel M. Gordon, "A Survey of Fast Exponentiation
1359 Methods", Journal of Algorithms, Vol. 27, pp. 129-146, 1998. */
1361 /* Provide a default value for POWI_MAX_MULTS, the maximum number of
1362 multiplications to inline before calling the system library's pow
1363 function. powi(x,n) requires at worst 2*bits(n)-2 multiplications,
1364 so this default never requires calling pow, powf or powl. */
1366 #ifndef POWI_MAX_MULTS
1367 #define POWI_MAX_MULTS (2*HOST_BITS_PER_WIDE_INT-2)
1370 /* The size of the "optimal power tree" lookup table. All
1371 exponents less than this value are simply looked up in the
1372 powi_table below. This threshold is also used to size the
1373 cache of pseudo registers that hold intermediate results. */
1374 #define POWI_TABLE_SIZE 256
1376 /* The size, in bits of the window, used in the "window method"
1377 exponentiation algorithm. This is equivalent to a radix of
1378 (1<<POWI_WINDOW_SIZE) in the corresponding "m-ary method". */
1379 #define POWI_WINDOW_SIZE 3
1381 /* The following table is an efficient representation of an
1382 "optimal power tree". For each value, i, the corresponding
1383 value, j, in the table states than an optimal evaluation
1384 sequence for calculating pow(x,i) can be found by evaluating
1385 pow(x,j)*pow(x,i-j). An optimal power tree for the first
1386 100 integers is given in Knuth's "Seminumerical algorithms". */
1388 static const unsigned char powi_table
[POWI_TABLE_SIZE
] =
1390 0, 1, 1, 2, 2, 3, 3, 4, /* 0 - 7 */
1391 4, 6, 5, 6, 6, 10, 7, 9, /* 8 - 15 */
1392 8, 16, 9, 16, 10, 12, 11, 13, /* 16 - 23 */
1393 12, 17, 13, 18, 14, 24, 15, 26, /* 24 - 31 */
1394 16, 17, 17, 19, 18, 33, 19, 26, /* 32 - 39 */
1395 20, 25, 21, 40, 22, 27, 23, 44, /* 40 - 47 */
1396 24, 32, 25, 34, 26, 29, 27, 44, /* 48 - 55 */
1397 28, 31, 29, 34, 30, 60, 31, 36, /* 56 - 63 */
1398 32, 64, 33, 34, 34, 46, 35, 37, /* 64 - 71 */
1399 36, 65, 37, 50, 38, 48, 39, 69, /* 72 - 79 */
1400 40, 49, 41, 43, 42, 51, 43, 58, /* 80 - 87 */
1401 44, 64, 45, 47, 46, 59, 47, 76, /* 88 - 95 */
1402 48, 65, 49, 66, 50, 67, 51, 66, /* 96 - 103 */
1403 52, 70, 53, 74, 54, 104, 55, 74, /* 104 - 111 */
1404 56, 64, 57, 69, 58, 78, 59, 68, /* 112 - 119 */
1405 60, 61, 61, 80, 62, 75, 63, 68, /* 120 - 127 */
1406 64, 65, 65, 128, 66, 129, 67, 90, /* 128 - 135 */
1407 68, 73, 69, 131, 70, 94, 71, 88, /* 136 - 143 */
1408 72, 128, 73, 98, 74, 132, 75, 121, /* 144 - 151 */
1409 76, 102, 77, 124, 78, 132, 79, 106, /* 152 - 159 */
1410 80, 97, 81, 160, 82, 99, 83, 134, /* 160 - 167 */
1411 84, 86, 85, 95, 86, 160, 87, 100, /* 168 - 175 */
1412 88, 113, 89, 98, 90, 107, 91, 122, /* 176 - 183 */
1413 92, 111, 93, 102, 94, 126, 95, 150, /* 184 - 191 */
1414 96, 128, 97, 130, 98, 133, 99, 195, /* 192 - 199 */
1415 100, 128, 101, 123, 102, 164, 103, 138, /* 200 - 207 */
1416 104, 145, 105, 146, 106, 109, 107, 149, /* 208 - 215 */
1417 108, 200, 109, 146, 110, 170, 111, 157, /* 216 - 223 */
1418 112, 128, 113, 130, 114, 182, 115, 132, /* 224 - 231 */
1419 116, 200, 117, 132, 118, 158, 119, 206, /* 232 - 239 */
1420 120, 240, 121, 162, 122, 147, 123, 152, /* 240 - 247 */
1421 124, 166, 125, 214, 126, 138, 127, 153, /* 248 - 255 */
1425 /* Return the number of multiplications required to calculate
1426 powi(x,n) where n is less than POWI_TABLE_SIZE. This is a
1427 subroutine of powi_cost. CACHE is an array indicating
1428 which exponents have already been calculated. */
1431 powi_lookup_cost (unsigned HOST_WIDE_INT n
, bool *cache
)
1433 /* If we've already calculated this exponent, then this evaluation
1434 doesn't require any additional multiplications. */
1439 return powi_lookup_cost (n
- powi_table
[n
], cache
)
1440 + powi_lookup_cost (powi_table
[n
], cache
) + 1;
1443 /* Return the number of multiplications required to calculate
1444 powi(x,n) for an arbitrary x, given the exponent N. This
1445 function needs to be kept in sync with powi_as_mults below. */
1448 powi_cost (HOST_WIDE_INT n
)
1450 bool cache
[POWI_TABLE_SIZE
];
1451 unsigned HOST_WIDE_INT digit
;
1452 unsigned HOST_WIDE_INT val
;
1458 /* Ignore the reciprocal when calculating the cost. */
1459 val
= (n
< 0) ? -n
: n
;
1461 /* Initialize the exponent cache. */
1462 memset (cache
, 0, POWI_TABLE_SIZE
* sizeof (bool));
1467 while (val
>= POWI_TABLE_SIZE
)
1471 digit
= val
& ((1 << POWI_WINDOW_SIZE
) - 1);
1472 result
+= powi_lookup_cost (digit
, cache
)
1473 + POWI_WINDOW_SIZE
+ 1;
1474 val
>>= POWI_WINDOW_SIZE
;
1483 return result
+ powi_lookup_cost (val
, cache
);
1486 /* Recursive subroutine of powi_as_mults. This function takes the
1487 array, CACHE, of already calculated exponents and an exponent N and
1488 returns a tree that corresponds to CACHE[1]**N, with type TYPE. */
1491 powi_as_mults_1 (gimple_stmt_iterator
*gsi
, location_t loc
, tree type
,
1492 HOST_WIDE_INT n
, tree
*cache
)
1494 tree op0
, op1
, ssa_target
;
1495 unsigned HOST_WIDE_INT digit
;
1498 if (n
< POWI_TABLE_SIZE
&& cache
[n
])
1501 ssa_target
= make_temp_ssa_name (type
, NULL
, "powmult");
1503 if (n
< POWI_TABLE_SIZE
)
1505 cache
[n
] = ssa_target
;
1506 op0
= powi_as_mults_1 (gsi
, loc
, type
, n
- powi_table
[n
], cache
);
1507 op1
= powi_as_mults_1 (gsi
, loc
, type
, powi_table
[n
], cache
);
1511 digit
= n
& ((1 << POWI_WINDOW_SIZE
) - 1);
1512 op0
= powi_as_mults_1 (gsi
, loc
, type
, n
- digit
, cache
);
1513 op1
= powi_as_mults_1 (gsi
, loc
, type
, digit
, cache
);
1517 op0
= powi_as_mults_1 (gsi
, loc
, type
, n
>> 1, cache
);
1521 mult_stmt
= gimple_build_assign (ssa_target
, MULT_EXPR
, op0
, op1
);
1522 gimple_set_location (mult_stmt
, loc
);
1523 gsi_insert_before (gsi
, mult_stmt
, GSI_SAME_STMT
);
1528 /* Convert ARG0**N to a tree of multiplications of ARG0 with itself.
1529 This function needs to be kept in sync with powi_cost above. */
1532 powi_as_mults (gimple_stmt_iterator
*gsi
, location_t loc
,
1533 tree arg0
, HOST_WIDE_INT n
)
1535 tree cache
[POWI_TABLE_SIZE
], result
, type
= TREE_TYPE (arg0
);
1540 return build_one_cst (type
);
1542 memset (cache
, 0, sizeof (cache
));
1545 result
= powi_as_mults_1 (gsi
, loc
, type
, (n
< 0) ? -n
: n
, cache
);
1549 /* If the original exponent was negative, reciprocate the result. */
1550 target
= make_temp_ssa_name (type
, NULL
, "powmult");
1551 div_stmt
= gimple_build_assign (target
, RDIV_EXPR
,
1552 build_real (type
, dconst1
), result
);
1553 gimple_set_location (div_stmt
, loc
);
1554 gsi_insert_before (gsi
, div_stmt
, GSI_SAME_STMT
);
1559 /* ARG0 and N are the two arguments to a powi builtin in GSI with
1560 location info LOC. If the arguments are appropriate, create an
1561 equivalent sequence of statements prior to GSI using an optimal
1562 number of multiplications, and return an expession holding the
1566 gimple_expand_builtin_powi (gimple_stmt_iterator
*gsi
, location_t loc
,
1567 tree arg0
, HOST_WIDE_INT n
)
1569 /* Avoid largest negative number. */
1571 && ((n
>= -1 && n
<= 2)
1572 || (optimize_function_for_speed_p (cfun
)
1573 && powi_cost (n
) <= POWI_MAX_MULTS
)))
1574 return powi_as_mults (gsi
, loc
, arg0
, n
);
1579 /* Build a gimple call statement that calls FN with argument ARG.
1580 Set the lhs of the call statement to a fresh SSA name. Insert the
1581 statement prior to GSI's current position, and return the fresh
1585 build_and_insert_call (gimple_stmt_iterator
*gsi
, location_t loc
,
1591 call_stmt
= gimple_build_call (fn
, 1, arg
);
1592 ssa_target
= make_temp_ssa_name (TREE_TYPE (arg
), NULL
, "powroot");
1593 gimple_set_lhs (call_stmt
, ssa_target
);
1594 gimple_set_location (call_stmt
, loc
);
1595 gsi_insert_before (gsi
, call_stmt
, GSI_SAME_STMT
);
1600 /* Build a gimple binary operation with the given CODE and arguments
1601 ARG0, ARG1, assigning the result to a new SSA name for variable
1602 TARGET. Insert the statement prior to GSI's current position, and
1603 return the fresh SSA name.*/
1606 build_and_insert_binop (gimple_stmt_iterator
*gsi
, location_t loc
,
1607 const char *name
, enum tree_code code
,
1608 tree arg0
, tree arg1
)
1610 tree result
= make_temp_ssa_name (TREE_TYPE (arg0
), NULL
, name
);
1611 gassign
*stmt
= gimple_build_assign (result
, code
, arg0
, arg1
);
1612 gimple_set_location (stmt
, loc
);
1613 gsi_insert_before (gsi
, stmt
, GSI_SAME_STMT
);
1617 /* Build a gimple reference operation with the given CODE and argument
1618 ARG, assigning the result to a new SSA name of TYPE with NAME.
1619 Insert the statement prior to GSI's current position, and return
1620 the fresh SSA name. */
1623 build_and_insert_ref (gimple_stmt_iterator
*gsi
, location_t loc
, tree type
,
1624 const char *name
, enum tree_code code
, tree arg0
)
1626 tree result
= make_temp_ssa_name (type
, NULL
, name
);
1627 gimple
*stmt
= gimple_build_assign (result
, build1 (code
, type
, arg0
));
1628 gimple_set_location (stmt
, loc
);
1629 gsi_insert_before (gsi
, stmt
, GSI_SAME_STMT
);
1633 /* Build a gimple assignment to cast VAL to TYPE. Insert the statement
1634 prior to GSI's current position, and return the fresh SSA name. */
1637 build_and_insert_cast (gimple_stmt_iterator
*gsi
, location_t loc
,
1638 tree type
, tree val
)
1640 tree result
= make_ssa_name (type
);
1641 gassign
*stmt
= gimple_build_assign (result
, NOP_EXPR
, val
);
1642 gimple_set_location (stmt
, loc
);
1643 gsi_insert_before (gsi
, stmt
, GSI_SAME_STMT
);
1647 struct pow_synth_sqrt_info
1650 unsigned int deepest
;
1651 unsigned int num_mults
;
1654 /* Return true iff the real value C can be represented as a
1655 sum of powers of 0.5 up to N. That is:
1656 C == SUM<i from 1..N> (a[i]*(0.5**i)) where a[i] is either 0 or 1.
1657 Record in INFO the various parameters of the synthesis algorithm such
1658 as the factors a[i], the maximum 0.5 power and the number of
1659 multiplications that will be required. */
1662 representable_as_half_series_p (REAL_VALUE_TYPE c
, unsigned n
,
1663 struct pow_synth_sqrt_info
*info
)
1665 REAL_VALUE_TYPE factor
= dconsthalf
;
1666 REAL_VALUE_TYPE remainder
= c
;
1669 info
->num_mults
= 0;
1670 memset (info
->factors
, 0, n
* sizeof (bool));
1672 for (unsigned i
= 0; i
< n
; i
++)
1674 REAL_VALUE_TYPE res
;
1676 /* If something inexact happened bail out now. */
1677 if (real_arithmetic (&res
, MINUS_EXPR
, &remainder
, &factor
))
1680 /* We have hit zero. The number is representable as a sum
1681 of powers of 0.5. */
1682 if (real_equal (&res
, &dconst0
))
1684 info
->factors
[i
] = true;
1685 info
->deepest
= i
+ 1;
1688 else if (!REAL_VALUE_NEGATIVE (res
))
1691 info
->factors
[i
] = true;
1695 info
->factors
[i
] = false;
1697 real_arithmetic (&factor
, MULT_EXPR
, &factor
, &dconsthalf
);
1702 /* Return the tree corresponding to FN being applied
1703 to ARG N times at GSI and LOC.
1704 Look up previous results from CACHE if need be.
1705 cache[0] should contain just plain ARG i.e. FN applied to ARG 0 times. */
1708 get_fn_chain (tree arg
, unsigned int n
, gimple_stmt_iterator
*gsi
,
1709 tree fn
, location_t loc
, tree
*cache
)
1711 tree res
= cache
[n
];
1714 tree prev
= get_fn_chain (arg
, n
- 1, gsi
, fn
, loc
, cache
);
1715 res
= build_and_insert_call (gsi
, loc
, fn
, prev
);
1722 /* Print to STREAM the repeated application of function FNAME to ARG
1723 N times. So, for FNAME = "foo", ARG = "x", N = 2 it would print:
1727 print_nested_fn (FILE* stream
, const char *fname
, const char* arg
,
1731 fprintf (stream
, "%s", arg
);
1734 fprintf (stream
, "%s (", fname
);
1735 print_nested_fn (stream
, fname
, arg
, n
- 1);
1736 fprintf (stream
, ")");
1740 /* Print to STREAM the fractional sequence of sqrt chains
1741 applied to ARG, described by INFO. Used for the dump file. */
1744 dump_fractional_sqrt_sequence (FILE *stream
, const char *arg
,
1745 struct pow_synth_sqrt_info
*info
)
1747 for (unsigned int i
= 0; i
< info
->deepest
; i
++)
1749 bool is_set
= info
->factors
[i
];
1752 print_nested_fn (stream
, "sqrt", arg
, i
+ 1);
1753 if (i
!= info
->deepest
- 1)
1754 fprintf (stream
, " * ");
1759 /* Print to STREAM a representation of raising ARG to an integer
1760 power N. Used for the dump file. */
1763 dump_integer_part (FILE *stream
, const char* arg
, HOST_WIDE_INT n
)
1766 fprintf (stream
, "powi (%s, " HOST_WIDE_INT_PRINT_DEC
")", arg
, n
);
1768 fprintf (stream
, "%s", arg
);
1771 /* Attempt to synthesize a POW[F] (ARG0, ARG1) call using chains of
1772 square roots. Place at GSI and LOC. Limit the maximum depth
1773 of the sqrt chains to MAX_DEPTH. Return the tree holding the
1774 result of the expanded sequence or NULL_TREE if the expansion failed.
1776 This routine assumes that ARG1 is a real number with a fractional part
1777 (the integer exponent case will have been handled earlier in
1778 gimple_expand_builtin_pow).
1781 * For ARG1 composed of a whole part WHOLE_PART and a fractional part
1782 FRAC_PART i.e. WHOLE_PART == floor (ARG1) and
1783 FRAC_PART == ARG1 - WHOLE_PART:
1784 Produce POWI (ARG0, WHOLE_PART) * POW (ARG0, FRAC_PART) where
1785 POW (ARG0, FRAC_PART) is expanded as a product of square root chains
1786 if it can be expressed as such, that is if FRAC_PART satisfies:
1787 FRAC_PART == <SUM from i = 1 until MAX_DEPTH> (a[i] * (0.5**i))
1788 where integer a[i] is either 0 or 1.
1791 POW (x, 3.625) == POWI (x, 3) * POW (x, 0.625)
1792 --> POWI (x, 3) * SQRT (x) * SQRT (SQRT (SQRT (x)))
1794 For ARG1 < 0.0 there are two approaches:
1795 * (A) Expand to 1.0 / POW (ARG0, -ARG1) where POW (ARG0, -ARG1)
1796 is calculated as above.
1799 POW (x, -5.625) == 1.0 / POW (x, 5.625)
1800 --> 1.0 / (POWI (x, 5) * SQRT (x) * SQRT (SQRT (SQRT (x))))
1802 * (B) : WHOLE_PART := - ceil (abs (ARG1))
1803 FRAC_PART := ARG1 - WHOLE_PART
1804 and expand to POW (x, FRAC_PART) / POWI (x, WHOLE_PART).
1806 POW (x, -5.875) == POW (x, 0.125) / POWI (X, 6)
1807 --> SQRT (SQRT (SQRT (x))) / (POWI (x, 6))
1809 For ARG1 < 0.0 we choose between (A) and (B) depending on
1810 how many multiplications we'd have to do.
1811 So, for the example in (B): POW (x, -5.875), if we were to
1812 follow algorithm (A) we would produce:
1813 1.0 / POWI (X, 5) * SQRT (X) * SQRT (SQRT (X)) * SQRT (SQRT (SQRT (X)))
1814 which contains more multiplications than approach (B).
1816 Hopefully, this approach will eliminate potentially expensive POW library
1817 calls when unsafe floating point math is enabled and allow the compiler to
1818 further optimise the multiplies, square roots and divides produced by this
1822 expand_pow_as_sqrts (gimple_stmt_iterator
*gsi
, location_t loc
,
1823 tree arg0
, tree arg1
, HOST_WIDE_INT max_depth
)
1825 tree type
= TREE_TYPE (arg0
);
1826 machine_mode mode
= TYPE_MODE (type
);
1827 tree sqrtfn
= mathfn_built_in (type
, BUILT_IN_SQRT
);
1828 bool one_over
= true;
1833 if (TREE_CODE (arg1
) != REAL_CST
)
1836 REAL_VALUE_TYPE exp_init
= TREE_REAL_CST (arg1
);
1838 gcc_assert (max_depth
> 0);
1839 tree
*cache
= XALLOCAVEC (tree
, max_depth
+ 1);
1841 struct pow_synth_sqrt_info synth_info
;
1842 synth_info
.factors
= XALLOCAVEC (bool, max_depth
+ 1);
1843 synth_info
.deepest
= 0;
1844 synth_info
.num_mults
= 0;
1846 bool neg_exp
= REAL_VALUE_NEGATIVE (exp_init
);
1847 REAL_VALUE_TYPE exp
= real_value_abs (&exp_init
);
1849 /* The whole and fractional parts of exp. */
1850 REAL_VALUE_TYPE whole_part
;
1851 REAL_VALUE_TYPE frac_part
;
1853 real_floor (&whole_part
, mode
, &exp
);
1854 real_arithmetic (&frac_part
, MINUS_EXPR
, &exp
, &whole_part
);
1857 REAL_VALUE_TYPE ceil_whole
= dconst0
;
1858 REAL_VALUE_TYPE ceil_fract
= dconst0
;
1862 real_ceil (&ceil_whole
, mode
, &exp
);
1863 real_arithmetic (&ceil_fract
, MINUS_EXPR
, &ceil_whole
, &exp
);
1866 if (!representable_as_half_series_p (frac_part
, max_depth
, &synth_info
))
1869 /* Check whether it's more profitable to not use 1.0 / ... */
1872 struct pow_synth_sqrt_info alt_synth_info
;
1873 alt_synth_info
.factors
= XALLOCAVEC (bool, max_depth
+ 1);
1874 alt_synth_info
.deepest
= 0;
1875 alt_synth_info
.num_mults
= 0;
1877 if (representable_as_half_series_p (ceil_fract
, max_depth
,
1879 && alt_synth_info
.deepest
<= synth_info
.deepest
1880 && alt_synth_info
.num_mults
< synth_info
.num_mults
)
1882 whole_part
= ceil_whole
;
1883 frac_part
= ceil_fract
;
1884 synth_info
.deepest
= alt_synth_info
.deepest
;
1885 synth_info
.num_mults
= alt_synth_info
.num_mults
;
1886 memcpy (synth_info
.factors
, alt_synth_info
.factors
,
1887 (max_depth
+ 1) * sizeof (bool));
1892 HOST_WIDE_INT n
= real_to_integer (&whole_part
);
1893 REAL_VALUE_TYPE cint
;
1894 real_from_integer (&cint
, VOIDmode
, n
, SIGNED
);
1896 if (!real_identical (&whole_part
, &cint
))
1899 if (powi_cost (n
) + synth_info
.num_mults
> POWI_MAX_MULTS
)
1902 memset (cache
, 0, (max_depth
+ 1) * sizeof (tree
));
1904 tree integer_res
= n
== 0 ? build_real (type
, dconst1
) : arg0
;
1906 /* Calculate the integer part of the exponent. */
1909 integer_res
= gimple_expand_builtin_powi (gsi
, loc
, arg0
, n
);
1918 real_to_decimal (string
, &exp_init
, sizeof (string
), 0, 1);
1919 fprintf (dump_file
, "synthesizing pow (x, %s) as:\n", string
);
1925 fprintf (dump_file
, "1.0 / (");
1926 dump_integer_part (dump_file
, "x", n
);
1928 fprintf (dump_file
, " * ");
1929 dump_fractional_sqrt_sequence (dump_file
, "x", &synth_info
);
1930 fprintf (dump_file
, ")");
1934 dump_fractional_sqrt_sequence (dump_file
, "x", &synth_info
);
1935 fprintf (dump_file
, " / (");
1936 dump_integer_part (dump_file
, "x", n
);
1937 fprintf (dump_file
, ")");
1942 dump_fractional_sqrt_sequence (dump_file
, "x", &synth_info
);
1944 fprintf (dump_file
, " * ");
1945 dump_integer_part (dump_file
, "x", n
);
1948 fprintf (dump_file
, "\ndeepest sqrt chain: %d\n", synth_info
.deepest
);
1952 tree fract_res
= NULL_TREE
;
1955 /* Calculate the fractional part of the exponent. */
1956 for (unsigned i
= 0; i
< synth_info
.deepest
; i
++)
1958 if (synth_info
.factors
[i
])
1960 tree sqrt_chain
= get_fn_chain (arg0
, i
+ 1, gsi
, sqrtfn
, loc
, cache
);
1963 fract_res
= sqrt_chain
;
1966 fract_res
= build_and_insert_binop (gsi
, loc
, "powroot", MULT_EXPR
,
1967 fract_res
, sqrt_chain
);
1971 tree res
= NULL_TREE
;
1978 res
= build_and_insert_binop (gsi
, loc
, "powroot", MULT_EXPR
,
1979 fract_res
, integer_res
);
1983 res
= build_and_insert_binop (gsi
, loc
, "powrootrecip", RDIV_EXPR
,
1984 build_real (type
, dconst1
), res
);
1988 res
= build_and_insert_binop (gsi
, loc
, "powroot", RDIV_EXPR
,
1989 fract_res
, integer_res
);
1993 res
= build_and_insert_binop (gsi
, loc
, "powroot", MULT_EXPR
,
1994 fract_res
, integer_res
);
1998 /* ARG0 and ARG1 are the two arguments to a pow builtin call in GSI
1999 with location info LOC. If possible, create an equivalent and
2000 less expensive sequence of statements prior to GSI, and return an
2001 expession holding the result. */
2004 gimple_expand_builtin_pow (gimple_stmt_iterator
*gsi
, location_t loc
,
2005 tree arg0
, tree arg1
)
2007 REAL_VALUE_TYPE c
, cint
, dconst1_3
, dconst1_4
, dconst1_6
;
2008 REAL_VALUE_TYPE c2
, dconst3
;
2010 tree type
, sqrtfn
, cbrtfn
, sqrt_arg0
, result
, cbrt_x
, powi_cbrt_x
;
2012 bool speed_p
= optimize_bb_for_speed_p (gsi_bb (*gsi
));
2013 bool hw_sqrt_exists
, c_is_int
, c2_is_int
;
2015 dconst1_4
= dconst1
;
2016 SET_REAL_EXP (&dconst1_4
, REAL_EXP (&dconst1_4
) - 2);
2018 /* If the exponent isn't a constant, there's nothing of interest
2020 if (TREE_CODE (arg1
) != REAL_CST
)
2023 /* Don't perform the operation if flag_signaling_nans is on
2024 and the operand is a signaling NaN. */
2025 if (HONOR_SNANS (TYPE_MODE (TREE_TYPE (arg1
)))
2026 && ((TREE_CODE (arg0
) == REAL_CST
2027 && REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (arg0
)))
2028 || REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (arg1
))))
2031 /* If the exponent is equivalent to an integer, expand to an optimal
2032 multiplication sequence when profitable. */
2033 c
= TREE_REAL_CST (arg1
);
2034 n
= real_to_integer (&c
);
2035 real_from_integer (&cint
, VOIDmode
, n
, SIGNED
);
2036 c_is_int
= real_identical (&c
, &cint
);
2039 && ((n
>= -1 && n
<= 2)
2040 || (flag_unsafe_math_optimizations
2042 && powi_cost (n
) <= POWI_MAX_MULTS
)))
2043 return gimple_expand_builtin_powi (gsi
, loc
, arg0
, n
);
2045 /* Attempt various optimizations using sqrt and cbrt. */
2046 type
= TREE_TYPE (arg0
);
2047 mode
= TYPE_MODE (type
);
2048 sqrtfn
= mathfn_built_in (type
, BUILT_IN_SQRT
);
2050 /* Optimize pow(x,0.5) = sqrt(x). This replacement is always safe
2051 unless signed zeros must be maintained. pow(-0,0.5) = +0, while
2054 && real_equal (&c
, &dconsthalf
)
2055 && !HONOR_SIGNED_ZEROS (mode
))
2056 return build_and_insert_call (gsi
, loc
, sqrtfn
, arg0
);
2058 hw_sqrt_exists
= optab_handler (sqrt_optab
, mode
) != CODE_FOR_nothing
;
2060 /* Optimize pow(x,1./3.) = cbrt(x). This requires unsafe math
2061 optimizations since 1./3. is not exactly representable. If x
2062 is negative and finite, the correct value of pow(x,1./3.) is
2063 a NaN with the "invalid" exception raised, because the value
2064 of 1./3. actually has an even denominator. The correct value
2065 of cbrt(x) is a negative real value. */
2066 cbrtfn
= mathfn_built_in (type
, BUILT_IN_CBRT
);
2067 dconst1_3
= real_value_truncate (mode
, dconst_third ());
2069 if (flag_unsafe_math_optimizations
2071 && (!HONOR_NANS (mode
) || tree_expr_nonnegative_p (arg0
))
2072 && real_equal (&c
, &dconst1_3
))
2073 return build_and_insert_call (gsi
, loc
, cbrtfn
, arg0
);
2075 /* Optimize pow(x,1./6.) = cbrt(sqrt(x)). Don't do this optimization
2076 if we don't have a hardware sqrt insn. */
2077 dconst1_6
= dconst1_3
;
2078 SET_REAL_EXP (&dconst1_6
, REAL_EXP (&dconst1_6
) - 1);
2080 if (flag_unsafe_math_optimizations
2083 && (!HONOR_NANS (mode
) || tree_expr_nonnegative_p (arg0
))
2086 && real_equal (&c
, &dconst1_6
))
2089 sqrt_arg0
= build_and_insert_call (gsi
, loc
, sqrtfn
, arg0
);
2092 return build_and_insert_call (gsi
, loc
, cbrtfn
, sqrt_arg0
);
2096 /* Attempt to expand the POW as a product of square root chains.
2097 Expand the 0.25 case even when otpimising for size. */
2098 if (flag_unsafe_math_optimizations
2101 && (speed_p
|| real_equal (&c
, &dconst1_4
))
2102 && !HONOR_SIGNED_ZEROS (mode
))
2104 unsigned int max_depth
= speed_p
2105 ? param_max_pow_sqrt_depth
2108 tree expand_with_sqrts
2109 = expand_pow_as_sqrts (gsi
, loc
, arg0
, arg1
, max_depth
);
2111 if (expand_with_sqrts
)
2112 return expand_with_sqrts
;
2115 real_arithmetic (&c2
, MULT_EXPR
, &c
, &dconst2
);
2116 n
= real_to_integer (&c2
);
2117 real_from_integer (&cint
, VOIDmode
, n
, SIGNED
);
2118 c2_is_int
= real_identical (&c2
, &cint
);
2120 /* Optimize pow(x,c), where 3c = n for some nonzero integer n, into
2122 powi(x, n/3) * powi(cbrt(x), n%3), n > 0;
2123 1.0 / (powi(x, abs(n)/3) * powi(cbrt(x), abs(n)%3)), n < 0.
2125 Do not calculate the first factor when n/3 = 0. As cbrt(x) is
2126 different from pow(x, 1./3.) due to rounding and behavior with
2127 negative x, we need to constrain this transformation to unsafe
2128 math and positive x or finite math. */
2129 real_from_integer (&dconst3
, VOIDmode
, 3, SIGNED
);
2130 real_arithmetic (&c2
, MULT_EXPR
, &c
, &dconst3
);
2131 real_round (&c2
, mode
, &c2
);
2132 n
= real_to_integer (&c2
);
2133 real_from_integer (&cint
, VOIDmode
, n
, SIGNED
);
2134 real_arithmetic (&c2
, RDIV_EXPR
, &cint
, &dconst3
);
2135 real_convert (&c2
, mode
, &c2
);
2137 if (flag_unsafe_math_optimizations
2139 && (!HONOR_NANS (mode
) || tree_expr_nonnegative_p (arg0
))
2140 && real_identical (&c2
, &c
)
2142 && optimize_function_for_speed_p (cfun
)
2143 && powi_cost (n
/ 3) <= POWI_MAX_MULTS
)
2145 tree powi_x_ndiv3
= NULL_TREE
;
2147 /* Attempt to fold powi(arg0, abs(n/3)) into multiplies. If not
2148 possible or profitable, give up. Skip the degenerate case when
2149 abs(n) < 3, where the result is always 1. */
2150 if (absu_hwi (n
) >= 3)
2152 powi_x_ndiv3
= gimple_expand_builtin_powi (gsi
, loc
, arg0
,
2158 /* Calculate powi(cbrt(x), n%3). Don't use gimple_expand_builtin_powi
2159 as that creates an unnecessary variable. Instead, just produce
2160 either cbrt(x) or cbrt(x) * cbrt(x). */
2161 cbrt_x
= build_and_insert_call (gsi
, loc
, cbrtfn
, arg0
);
2163 if (absu_hwi (n
) % 3 == 1)
2164 powi_cbrt_x
= cbrt_x
;
2166 powi_cbrt_x
= build_and_insert_binop (gsi
, loc
, "powroot", MULT_EXPR
,
2169 /* Multiply the two subexpressions, unless powi(x,abs(n)/3) = 1. */
2170 if (absu_hwi (n
) < 3)
2171 result
= powi_cbrt_x
;
2173 result
= build_and_insert_binop (gsi
, loc
, "powroot", MULT_EXPR
,
2174 powi_x_ndiv3
, powi_cbrt_x
);
2176 /* If n is negative, reciprocate the result. */
2178 result
= build_and_insert_binop (gsi
, loc
, "powroot", RDIV_EXPR
,
2179 build_real (type
, dconst1
), result
);
2184 /* No optimizations succeeded. */
2188 /* ARG is the argument to a cabs builtin call in GSI with location info
2189 LOC. Create a sequence of statements prior to GSI that calculates
2190 sqrt(R*R + I*I), where R and I are the real and imaginary components
2191 of ARG, respectively. Return an expression holding the result. */
2194 gimple_expand_builtin_cabs (gimple_stmt_iterator
*gsi
, location_t loc
, tree arg
)
2196 tree real_part
, imag_part
, addend1
, addend2
, sum
, result
;
2197 tree type
= TREE_TYPE (TREE_TYPE (arg
));
2198 tree sqrtfn
= mathfn_built_in (type
, BUILT_IN_SQRT
);
2199 machine_mode mode
= TYPE_MODE (type
);
2201 if (!flag_unsafe_math_optimizations
2202 || !optimize_bb_for_speed_p (gimple_bb (gsi_stmt (*gsi
)))
2204 || optab_handler (sqrt_optab
, mode
) == CODE_FOR_nothing
)
2207 real_part
= build_and_insert_ref (gsi
, loc
, type
, "cabs",
2208 REALPART_EXPR
, arg
);
2209 addend1
= build_and_insert_binop (gsi
, loc
, "cabs", MULT_EXPR
,
2210 real_part
, real_part
);
2211 imag_part
= build_and_insert_ref (gsi
, loc
, type
, "cabs",
2212 IMAGPART_EXPR
, arg
);
2213 addend2
= build_and_insert_binop (gsi
, loc
, "cabs", MULT_EXPR
,
2214 imag_part
, imag_part
);
2215 sum
= build_and_insert_binop (gsi
, loc
, "cabs", PLUS_EXPR
, addend1
, addend2
);
2216 result
= build_and_insert_call (gsi
, loc
, sqrtfn
, sum
);
2221 /* Go through all calls to sin, cos and cexpi and call execute_cse_sincos_1
2222 on the SSA_NAME argument of each of them. Also expand powi(x,n) into
2223 an optimal number of multiplies, when n is a constant. */
2227 const pass_data pass_data_cse_sincos
=
2229 GIMPLE_PASS
, /* type */
2230 "sincos", /* name */
2231 OPTGROUP_NONE
, /* optinfo_flags */
2232 TV_TREE_SINCOS
, /* tv_id */
2233 PROP_ssa
, /* properties_required */
2234 PROP_gimple_opt_math
, /* properties_provided */
2235 0, /* properties_destroyed */
2236 0, /* todo_flags_start */
2237 TODO_update_ssa
, /* todo_flags_finish */
2240 class pass_cse_sincos
: public gimple_opt_pass
2243 pass_cse_sincos (gcc::context
*ctxt
)
2244 : gimple_opt_pass (pass_data_cse_sincos
, ctxt
)
2247 /* opt_pass methods: */
2248 virtual bool gate (function
*)
2250 /* We no longer require either sincos or cexp, since powi expansion
2251 piggybacks on this pass. */
2255 virtual unsigned int execute (function
*);
2257 }; // class pass_cse_sincos
2260 pass_cse_sincos::execute (function
*fun
)
2263 bool cfg_changed
= false;
2265 calculate_dominance_info (CDI_DOMINATORS
);
2266 memset (&sincos_stats
, 0, sizeof (sincos_stats
));
2268 FOR_EACH_BB_FN (bb
, fun
)
2270 gimple_stmt_iterator gsi
;
2271 bool cleanup_eh
= false;
2273 for (gsi
= gsi_after_labels (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2275 gimple
*stmt
= gsi_stmt (gsi
);
2277 /* Only the last stmt in a bb could throw, no need to call
2278 gimple_purge_dead_eh_edges if we change something in the middle
2279 of a basic block. */
2282 if (is_gimple_call (stmt
)
2283 && gimple_call_lhs (stmt
))
2285 tree arg
, arg0
, arg1
, result
;
2289 switch (gimple_call_combined_fn (stmt
))
2294 arg
= gimple_call_arg (stmt
, 0);
2295 /* Make sure we have either sincos or cexp. */
2296 if (!targetm
.libc_has_function (function_c99_math_complex
,
2298 && !targetm
.libc_has_function (function_sincos
,
2302 if (TREE_CODE (arg
) == SSA_NAME
)
2303 cfg_changed
|= execute_cse_sincos_1 (arg
);
2307 arg0
= gimple_call_arg (stmt
, 0);
2308 arg1
= gimple_call_arg (stmt
, 1);
2310 loc
= gimple_location (stmt
);
2311 result
= gimple_expand_builtin_pow (&gsi
, loc
, arg0
, arg1
);
2315 tree lhs
= gimple_get_lhs (stmt
);
2316 gassign
*new_stmt
= gimple_build_assign (lhs
, result
);
2317 gimple_set_location (new_stmt
, loc
);
2318 unlink_stmt_vdef (stmt
);
2319 gsi_replace (&gsi
, new_stmt
, true);
2321 if (gimple_vdef (stmt
))
2322 release_ssa_name (gimple_vdef (stmt
));
2327 arg0
= gimple_call_arg (stmt
, 0);
2328 arg1
= gimple_call_arg (stmt
, 1);
2329 loc
= gimple_location (stmt
);
2331 if (real_minus_onep (arg0
))
2333 tree t0
, t1
, cond
, one
, minus_one
;
2336 t0
= TREE_TYPE (arg0
);
2337 t1
= TREE_TYPE (arg1
);
2338 one
= build_real (t0
, dconst1
);
2339 minus_one
= build_real (t0
, dconstm1
);
2341 cond
= make_temp_ssa_name (t1
, NULL
, "powi_cond");
2342 stmt
= gimple_build_assign (cond
, BIT_AND_EXPR
,
2343 arg1
, build_int_cst (t1
, 1));
2344 gimple_set_location (stmt
, loc
);
2345 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
2347 result
= make_temp_ssa_name (t0
, NULL
, "powi");
2348 stmt
= gimple_build_assign (result
, COND_EXPR
, cond
,
2350 gimple_set_location (stmt
, loc
);
2351 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
2355 if (!tree_fits_shwi_p (arg1
))
2358 n
= tree_to_shwi (arg1
);
2359 result
= gimple_expand_builtin_powi (&gsi
, loc
, arg0
, n
);
2364 tree lhs
= gimple_get_lhs (stmt
);
2365 gassign
*new_stmt
= gimple_build_assign (lhs
, result
);
2366 gimple_set_location (new_stmt
, loc
);
2367 unlink_stmt_vdef (stmt
);
2368 gsi_replace (&gsi
, new_stmt
, true);
2370 if (gimple_vdef (stmt
))
2371 release_ssa_name (gimple_vdef (stmt
));
2376 arg0
= gimple_call_arg (stmt
, 0);
2377 loc
= gimple_location (stmt
);
2378 result
= gimple_expand_builtin_cabs (&gsi
, loc
, arg0
);
2382 tree lhs
= gimple_get_lhs (stmt
);
2383 gassign
*new_stmt
= gimple_build_assign (lhs
, result
);
2384 gimple_set_location (new_stmt
, loc
);
2385 unlink_stmt_vdef (stmt
);
2386 gsi_replace (&gsi
, new_stmt
, true);
2388 if (gimple_vdef (stmt
))
2389 release_ssa_name (gimple_vdef (stmt
));
2398 cfg_changed
|= gimple_purge_dead_eh_edges (bb
);
2401 statistics_counter_event (fun
, "sincos statements inserted",
2402 sincos_stats
.inserted
);
2403 statistics_counter_event (fun
, "conv statements removed",
2404 sincos_stats
.conv_removed
);
2406 return cfg_changed
? TODO_cleanup_cfg
: 0;
2412 make_pass_cse_sincos (gcc::context
*ctxt
)
2414 return new pass_cse_sincos (ctxt
);
2417 /* Return true if stmt is a type conversion operation that can be stripped
2418 when used in a widening multiply operation. */
2420 widening_mult_conversion_strippable_p (tree result_type
, gimple
*stmt
)
2422 enum tree_code rhs_code
= gimple_assign_rhs_code (stmt
);
2424 if (TREE_CODE (result_type
) == INTEGER_TYPE
)
2429 if (!CONVERT_EXPR_CODE_P (rhs_code
))
2432 op_type
= TREE_TYPE (gimple_assign_lhs (stmt
));
2434 /* If the type of OP has the same precision as the result, then
2435 we can strip this conversion. The multiply operation will be
2436 selected to create the correct extension as a by-product. */
2437 if (TYPE_PRECISION (result_type
) == TYPE_PRECISION (op_type
))
2440 /* We can also strip a conversion if it preserves the signed-ness of
2441 the operation and doesn't narrow the range. */
2442 inner_op_type
= TREE_TYPE (gimple_assign_rhs1 (stmt
));
2444 /* If the inner-most type is unsigned, then we can strip any
2445 intermediate widening operation. If it's signed, then the
2446 intermediate widening operation must also be signed. */
2447 if ((TYPE_UNSIGNED (inner_op_type
)
2448 || TYPE_UNSIGNED (op_type
) == TYPE_UNSIGNED (inner_op_type
))
2449 && TYPE_PRECISION (op_type
) > TYPE_PRECISION (inner_op_type
))
2455 return rhs_code
== FIXED_CONVERT_EXPR
;
2458 /* Return true if RHS is a suitable operand for a widening multiplication,
2459 assuming a target type of TYPE.
2460 There are two cases:
2462 - RHS makes some value at least twice as wide. Store that value
2463 in *NEW_RHS_OUT if so, and store its type in *TYPE_OUT.
2465 - RHS is an integer constant. Store that value in *NEW_RHS_OUT if so,
2466 but leave *TYPE_OUT untouched. */
2469 is_widening_mult_rhs_p (tree type
, tree rhs
, tree
*type_out
,
2475 if (TREE_CODE (rhs
) == SSA_NAME
)
2477 stmt
= SSA_NAME_DEF_STMT (rhs
);
2478 if (is_gimple_assign (stmt
))
2480 if (! widening_mult_conversion_strippable_p (type
, stmt
))
2484 rhs1
= gimple_assign_rhs1 (stmt
);
2486 if (TREE_CODE (rhs1
) == INTEGER_CST
)
2488 *new_rhs_out
= rhs1
;
2497 type1
= TREE_TYPE (rhs1
);
2499 if (TREE_CODE (type1
) != TREE_CODE (type
)
2500 || TYPE_PRECISION (type1
) * 2 > TYPE_PRECISION (type
))
2503 *new_rhs_out
= rhs1
;
2508 if (TREE_CODE (rhs
) == INTEGER_CST
)
2518 /* Return true if STMT performs a widening multiplication, assuming the
2519 output type is TYPE. If so, store the unwidened types of the operands
2520 in *TYPE1_OUT and *TYPE2_OUT respectively. Also fill *RHS1_OUT and
2521 *RHS2_OUT such that converting those operands to types *TYPE1_OUT
2522 and *TYPE2_OUT would give the operands of the multiplication. */
2525 is_widening_mult_p (gimple
*stmt
,
2526 tree
*type1_out
, tree
*rhs1_out
,
2527 tree
*type2_out
, tree
*rhs2_out
)
2529 tree type
= TREE_TYPE (gimple_assign_lhs (stmt
));
2531 if (TREE_CODE (type
) == INTEGER_TYPE
)
2533 if (TYPE_OVERFLOW_TRAPS (type
))
2536 else if (TREE_CODE (type
) != FIXED_POINT_TYPE
)
2539 if (!is_widening_mult_rhs_p (type
, gimple_assign_rhs1 (stmt
), type1_out
,
2543 if (!is_widening_mult_rhs_p (type
, gimple_assign_rhs2 (stmt
), type2_out
,
2547 if (*type1_out
== NULL
)
2549 if (*type2_out
== NULL
|| !int_fits_type_p (*rhs1_out
, *type2_out
))
2551 *type1_out
= *type2_out
;
2554 if (*type2_out
== NULL
)
2556 if (!int_fits_type_p (*rhs2_out
, *type1_out
))
2558 *type2_out
= *type1_out
;
2561 /* Ensure that the larger of the two operands comes first. */
2562 if (TYPE_PRECISION (*type1_out
) < TYPE_PRECISION (*type2_out
))
2564 std::swap (*type1_out
, *type2_out
);
2565 std::swap (*rhs1_out
, *rhs2_out
);
2571 /* Check to see if the CALL statement is an invocation of copysign
2572 with 1. being the first argument. */
2574 is_copysign_call_with_1 (gimple
*call
)
2576 gcall
*c
= dyn_cast
<gcall
*> (call
);
2580 enum combined_fn code
= gimple_call_combined_fn (c
);
2582 if (code
== CFN_LAST
)
2585 if (builtin_fn_p (code
))
2587 switch (as_builtin_fn (code
))
2589 CASE_FLT_FN (BUILT_IN_COPYSIGN
):
2590 CASE_FLT_FN_FLOATN_NX (BUILT_IN_COPYSIGN
):
2591 return real_onep (gimple_call_arg (c
, 0));
2597 if (internal_fn_p (code
))
2599 switch (as_internal_fn (code
))
2602 return real_onep (gimple_call_arg (c
, 0));
2611 /* Try to expand the pattern x * copysign (1, y) into xorsign (x, y).
2612 This only happens when the xorsign optab is defined, if the
2613 pattern is not a xorsign pattern or if expansion fails FALSE is
2614 returned, otherwise TRUE is returned. */
2616 convert_expand_mult_copysign (gimple
*stmt
, gimple_stmt_iterator
*gsi
)
2618 tree treeop0
, treeop1
, lhs
, type
;
2619 location_t loc
= gimple_location (stmt
);
2620 lhs
= gimple_assign_lhs (stmt
);
2621 treeop0
= gimple_assign_rhs1 (stmt
);
2622 treeop1
= gimple_assign_rhs2 (stmt
);
2623 type
= TREE_TYPE (lhs
);
2624 machine_mode mode
= TYPE_MODE (type
);
2626 if (HONOR_SNANS (type
))
2629 if (TREE_CODE (treeop0
) == SSA_NAME
&& TREE_CODE (treeop1
) == SSA_NAME
)
2631 gimple
*call0
= SSA_NAME_DEF_STMT (treeop0
);
2632 if (!has_single_use (treeop0
) || !is_copysign_call_with_1 (call0
))
2634 call0
= SSA_NAME_DEF_STMT (treeop1
);
2635 if (!has_single_use (treeop1
) || !is_copysign_call_with_1 (call0
))
2640 if (optab_handler (xorsign_optab
, mode
) == CODE_FOR_nothing
)
2643 gcall
*c
= as_a
<gcall
*> (call0
);
2644 treeop0
= gimple_call_arg (c
, 1);
2647 = gimple_build_call_internal (IFN_XORSIGN
, 2, treeop1
, treeop0
);
2648 gimple_set_lhs (call_stmt
, lhs
);
2649 gimple_set_location (call_stmt
, loc
);
2650 gsi_replace (gsi
, call_stmt
, true);
2657 /* Process a single gimple statement STMT, which has a MULT_EXPR as
2658 its rhs, and try to convert it into a WIDEN_MULT_EXPR. The return
2659 value is true iff we converted the statement. */
2662 convert_mult_to_widen (gimple
*stmt
, gimple_stmt_iterator
*gsi
)
2664 tree lhs
, rhs1
, rhs2
, type
, type1
, type2
;
2665 enum insn_code handler
;
2666 scalar_int_mode to_mode
, from_mode
, actual_mode
;
2668 int actual_precision
;
2669 location_t loc
= gimple_location (stmt
);
2670 bool from_unsigned1
, from_unsigned2
;
2672 lhs
= gimple_assign_lhs (stmt
);
2673 type
= TREE_TYPE (lhs
);
2674 if (TREE_CODE (type
) != INTEGER_TYPE
)
2677 if (!is_widening_mult_p (stmt
, &type1
, &rhs1
, &type2
, &rhs2
))
2680 to_mode
= SCALAR_INT_TYPE_MODE (type
);
2681 from_mode
= SCALAR_INT_TYPE_MODE (type1
);
2682 if (to_mode
== from_mode
)
2685 from_unsigned1
= TYPE_UNSIGNED (type1
);
2686 from_unsigned2
= TYPE_UNSIGNED (type2
);
2688 if (from_unsigned1
&& from_unsigned2
)
2689 op
= umul_widen_optab
;
2690 else if (!from_unsigned1
&& !from_unsigned2
)
2691 op
= smul_widen_optab
;
2693 op
= usmul_widen_optab
;
2695 handler
= find_widening_optab_handler_and_mode (op
, to_mode
, from_mode
,
2698 if (handler
== CODE_FOR_nothing
)
2700 if (op
!= smul_widen_optab
)
2702 /* We can use a signed multiply with unsigned types as long as
2703 there is a wider mode to use, or it is the smaller of the two
2704 types that is unsigned. Note that type1 >= type2, always. */
2705 if ((TYPE_UNSIGNED (type1
)
2706 && TYPE_PRECISION (type1
) == GET_MODE_PRECISION (from_mode
))
2707 || (TYPE_UNSIGNED (type2
)
2708 && TYPE_PRECISION (type2
) == GET_MODE_PRECISION (from_mode
)))
2710 if (!GET_MODE_WIDER_MODE (from_mode
).exists (&from_mode
)
2711 || GET_MODE_SIZE (to_mode
) <= GET_MODE_SIZE (from_mode
))
2715 op
= smul_widen_optab
;
2716 handler
= find_widening_optab_handler_and_mode (op
, to_mode
,
2720 if (handler
== CODE_FOR_nothing
)
2723 from_unsigned1
= from_unsigned2
= false;
2729 /* Ensure that the inputs to the handler are in the correct precison
2730 for the opcode. This will be the full mode size. */
2731 actual_precision
= GET_MODE_PRECISION (actual_mode
);
2732 if (2 * actual_precision
> TYPE_PRECISION (type
))
2734 if (actual_precision
!= TYPE_PRECISION (type1
)
2735 || from_unsigned1
!= TYPE_UNSIGNED (type1
))
2736 rhs1
= build_and_insert_cast (gsi
, loc
,
2737 build_nonstandard_integer_type
2738 (actual_precision
, from_unsigned1
), rhs1
);
2739 if (actual_precision
!= TYPE_PRECISION (type2
)
2740 || from_unsigned2
!= TYPE_UNSIGNED (type2
))
2741 rhs2
= build_and_insert_cast (gsi
, loc
,
2742 build_nonstandard_integer_type
2743 (actual_precision
, from_unsigned2
), rhs2
);
2745 /* Handle constants. */
2746 if (TREE_CODE (rhs1
) == INTEGER_CST
)
2747 rhs1
= fold_convert (type1
, rhs1
);
2748 if (TREE_CODE (rhs2
) == INTEGER_CST
)
2749 rhs2
= fold_convert (type2
, rhs2
);
2751 gimple_assign_set_rhs1 (stmt
, rhs1
);
2752 gimple_assign_set_rhs2 (stmt
, rhs2
);
2753 gimple_assign_set_rhs_code (stmt
, WIDEN_MULT_EXPR
);
2755 widen_mul_stats
.widen_mults_inserted
++;
2759 /* Process a single gimple statement STMT, which is found at the
2760 iterator GSI and has a either a PLUS_EXPR or a MINUS_EXPR as its
2761 rhs (given by CODE), and try to convert it into a
2762 WIDEN_MULT_PLUS_EXPR or a WIDEN_MULT_MINUS_EXPR. The return value
2763 is true iff we converted the statement. */
2766 convert_plusminus_to_widen (gimple_stmt_iterator
*gsi
, gimple
*stmt
,
2767 enum tree_code code
)
2769 gimple
*rhs1_stmt
= NULL
, *rhs2_stmt
= NULL
;
2770 gimple
*conv1_stmt
= NULL
, *conv2_stmt
= NULL
, *conv_stmt
;
2771 tree type
, type1
, type2
, optype
;
2772 tree lhs
, rhs1
, rhs2
, mult_rhs1
, mult_rhs2
, add_rhs
;
2773 enum tree_code rhs1_code
= ERROR_MARK
, rhs2_code
= ERROR_MARK
;
2775 enum tree_code wmult_code
;
2776 enum insn_code handler
;
2777 scalar_mode to_mode
, from_mode
, actual_mode
;
2778 location_t loc
= gimple_location (stmt
);
2779 int actual_precision
;
2780 bool from_unsigned1
, from_unsigned2
;
2782 lhs
= gimple_assign_lhs (stmt
);
2783 type
= TREE_TYPE (lhs
);
2784 if (TREE_CODE (type
) != INTEGER_TYPE
2785 && TREE_CODE (type
) != FIXED_POINT_TYPE
)
2788 if (code
== MINUS_EXPR
)
2789 wmult_code
= WIDEN_MULT_MINUS_EXPR
;
2791 wmult_code
= WIDEN_MULT_PLUS_EXPR
;
2793 rhs1
= gimple_assign_rhs1 (stmt
);
2794 rhs2
= gimple_assign_rhs2 (stmt
);
2796 if (TREE_CODE (rhs1
) == SSA_NAME
)
2798 rhs1_stmt
= SSA_NAME_DEF_STMT (rhs1
);
2799 if (is_gimple_assign (rhs1_stmt
))
2800 rhs1_code
= gimple_assign_rhs_code (rhs1_stmt
);
2803 if (TREE_CODE (rhs2
) == SSA_NAME
)
2805 rhs2_stmt
= SSA_NAME_DEF_STMT (rhs2
);
2806 if (is_gimple_assign (rhs2_stmt
))
2807 rhs2_code
= gimple_assign_rhs_code (rhs2_stmt
);
2810 /* Allow for one conversion statement between the multiply
2811 and addition/subtraction statement. If there are more than
2812 one conversions then we assume they would invalidate this
2813 transformation. If that's not the case then they should have
2814 been folded before now. */
2815 if (CONVERT_EXPR_CODE_P (rhs1_code
))
2817 conv1_stmt
= rhs1_stmt
;
2818 rhs1
= gimple_assign_rhs1 (rhs1_stmt
);
2819 if (TREE_CODE (rhs1
) == SSA_NAME
)
2821 rhs1_stmt
= SSA_NAME_DEF_STMT (rhs1
);
2822 if (is_gimple_assign (rhs1_stmt
))
2823 rhs1_code
= gimple_assign_rhs_code (rhs1_stmt
);
2828 if (CONVERT_EXPR_CODE_P (rhs2_code
))
2830 conv2_stmt
= rhs2_stmt
;
2831 rhs2
= gimple_assign_rhs1 (rhs2_stmt
);
2832 if (TREE_CODE (rhs2
) == SSA_NAME
)
2834 rhs2_stmt
= SSA_NAME_DEF_STMT (rhs2
);
2835 if (is_gimple_assign (rhs2_stmt
))
2836 rhs2_code
= gimple_assign_rhs_code (rhs2_stmt
);
2842 /* If code is WIDEN_MULT_EXPR then it would seem unnecessary to call
2843 is_widening_mult_p, but we still need the rhs returns.
2845 It might also appear that it would be sufficient to use the existing
2846 operands of the widening multiply, but that would limit the choice of
2847 multiply-and-accumulate instructions.
2849 If the widened-multiplication result has more than one uses, it is
2850 probably wiser not to do the conversion. Also restrict this operation
2851 to single basic block to avoid moving the multiply to a different block
2852 with a higher execution frequency. */
2853 if (code
== PLUS_EXPR
2854 && (rhs1_code
== MULT_EXPR
|| rhs1_code
== WIDEN_MULT_EXPR
))
2856 if (!has_single_use (rhs1
)
2857 || gimple_bb (rhs1_stmt
) != gimple_bb (stmt
)
2858 || !is_widening_mult_p (rhs1_stmt
, &type1
, &mult_rhs1
,
2859 &type2
, &mult_rhs2
))
2862 conv_stmt
= conv1_stmt
;
2864 else if (rhs2_code
== MULT_EXPR
|| rhs2_code
== WIDEN_MULT_EXPR
)
2866 if (!has_single_use (rhs2
)
2867 || gimple_bb (rhs2_stmt
) != gimple_bb (stmt
)
2868 || !is_widening_mult_p (rhs2_stmt
, &type1
, &mult_rhs1
,
2869 &type2
, &mult_rhs2
))
2872 conv_stmt
= conv2_stmt
;
2877 to_mode
= SCALAR_TYPE_MODE (type
);
2878 from_mode
= SCALAR_TYPE_MODE (type1
);
2879 if (to_mode
== from_mode
)
2882 from_unsigned1
= TYPE_UNSIGNED (type1
);
2883 from_unsigned2
= TYPE_UNSIGNED (type2
);
2886 /* There's no such thing as a mixed sign madd yet, so use a wider mode. */
2887 if (from_unsigned1
!= from_unsigned2
)
2889 if (!INTEGRAL_TYPE_P (type
))
2891 /* We can use a signed multiply with unsigned types as long as
2892 there is a wider mode to use, or it is the smaller of the two
2893 types that is unsigned. Note that type1 >= type2, always. */
2895 && TYPE_PRECISION (type1
) == GET_MODE_PRECISION (from_mode
))
2897 && TYPE_PRECISION (type2
) == GET_MODE_PRECISION (from_mode
)))
2899 if (!GET_MODE_WIDER_MODE (from_mode
).exists (&from_mode
)
2900 || GET_MODE_SIZE (from_mode
) >= GET_MODE_SIZE (to_mode
))
2904 from_unsigned1
= from_unsigned2
= false;
2905 optype
= build_nonstandard_integer_type (GET_MODE_PRECISION (from_mode
),
2909 /* If there was a conversion between the multiply and addition
2910 then we need to make sure it fits a multiply-and-accumulate.
2911 The should be a single mode change which does not change the
2915 /* We use the original, unmodified data types for this. */
2916 tree from_type
= TREE_TYPE (gimple_assign_rhs1 (conv_stmt
));
2917 tree to_type
= TREE_TYPE (gimple_assign_lhs (conv_stmt
));
2918 int data_size
= TYPE_PRECISION (type1
) + TYPE_PRECISION (type2
);
2919 bool is_unsigned
= TYPE_UNSIGNED (type1
) && TYPE_UNSIGNED (type2
);
2921 if (TYPE_PRECISION (from_type
) > TYPE_PRECISION (to_type
))
2923 /* Conversion is a truncate. */
2924 if (TYPE_PRECISION (to_type
) < data_size
)
2927 else if (TYPE_PRECISION (from_type
) < TYPE_PRECISION (to_type
))
2929 /* Conversion is an extend. Check it's the right sort. */
2930 if (TYPE_UNSIGNED (from_type
) != is_unsigned
2931 && !(is_unsigned
&& TYPE_PRECISION (from_type
) > data_size
))
2934 /* else convert is a no-op for our purposes. */
2937 /* Verify that the machine can perform a widening multiply
2938 accumulate in this mode/signedness combination, otherwise
2939 this transformation is likely to pessimize code. */
2940 this_optab
= optab_for_tree_code (wmult_code
, optype
, optab_default
);
2941 handler
= find_widening_optab_handler_and_mode (this_optab
, to_mode
,
2942 from_mode
, &actual_mode
);
2944 if (handler
== CODE_FOR_nothing
)
2947 /* Ensure that the inputs to the handler are in the correct precison
2948 for the opcode. This will be the full mode size. */
2949 actual_precision
= GET_MODE_PRECISION (actual_mode
);
2950 if (actual_precision
!= TYPE_PRECISION (type1
)
2951 || from_unsigned1
!= TYPE_UNSIGNED (type1
))
2952 mult_rhs1
= build_and_insert_cast (gsi
, loc
,
2953 build_nonstandard_integer_type
2954 (actual_precision
, from_unsigned1
),
2956 if (actual_precision
!= TYPE_PRECISION (type2
)
2957 || from_unsigned2
!= TYPE_UNSIGNED (type2
))
2958 mult_rhs2
= build_and_insert_cast (gsi
, loc
,
2959 build_nonstandard_integer_type
2960 (actual_precision
, from_unsigned2
),
2963 if (!useless_type_conversion_p (type
, TREE_TYPE (add_rhs
)))
2964 add_rhs
= build_and_insert_cast (gsi
, loc
, type
, add_rhs
);
2966 /* Handle constants. */
2967 if (TREE_CODE (mult_rhs1
) == INTEGER_CST
)
2968 mult_rhs1
= fold_convert (type1
, mult_rhs1
);
2969 if (TREE_CODE (mult_rhs2
) == INTEGER_CST
)
2970 mult_rhs2
= fold_convert (type2
, mult_rhs2
);
2972 gimple_assign_set_rhs_with_ops (gsi
, wmult_code
, mult_rhs1
, mult_rhs2
,
2974 update_stmt (gsi_stmt (*gsi
));
2975 widen_mul_stats
.maccs_inserted
++;
2979 /* Given a result MUL_RESULT which is a result of a multiplication of OP1 and
2980 OP2 and which we know is used in statements that can be, together with the
2981 multiplication, converted to FMAs, perform the transformation. */
2984 convert_mult_to_fma_1 (tree mul_result
, tree op1
, tree op2
)
2986 tree type
= TREE_TYPE (mul_result
);
2988 imm_use_iterator imm_iter
;
2991 FOR_EACH_IMM_USE_STMT (use_stmt
, imm_iter
, mul_result
)
2993 gimple_stmt_iterator gsi
= gsi_for_stmt (use_stmt
);
2994 tree addop
, mulop1
= op1
, result
= mul_result
;
2995 bool negate_p
= false;
2996 gimple_seq seq
= NULL
;
2998 if (is_gimple_debug (use_stmt
))
3001 if (is_gimple_assign (use_stmt
)
3002 && gimple_assign_rhs_code (use_stmt
) == NEGATE_EXPR
)
3004 result
= gimple_assign_lhs (use_stmt
);
3005 use_operand_p use_p
;
3006 gimple
*neguse_stmt
;
3007 single_imm_use (gimple_assign_lhs (use_stmt
), &use_p
, &neguse_stmt
);
3008 gsi_remove (&gsi
, true);
3009 release_defs (use_stmt
);
3011 use_stmt
= neguse_stmt
;
3012 gsi
= gsi_for_stmt (use_stmt
);
3016 tree cond
, else_value
, ops
[3];
3018 if (!can_interpret_as_conditional_op_p (use_stmt
, &cond
, &code
,
3021 addop
= ops
[0] == result
? ops
[1] : ops
[0];
3023 if (code
== MINUS_EXPR
)
3025 if (ops
[0] == result
)
3026 /* a * b - c -> a * b + (-c) */
3027 addop
= gimple_build (&seq
, NEGATE_EXPR
, type
, addop
);
3029 /* a - b * c -> (-b) * c + a */
3030 negate_p
= !negate_p
;
3034 mulop1
= gimple_build (&seq
, NEGATE_EXPR
, type
, mulop1
);
3037 gsi_insert_seq_before (&gsi
, seq
, GSI_SAME_STMT
);
3040 fma_stmt
= gimple_build_call_internal (IFN_COND_FMA
, 5, cond
, mulop1
,
3041 op2
, addop
, else_value
);
3043 fma_stmt
= gimple_build_call_internal (IFN_FMA
, 3, mulop1
, op2
, addop
);
3044 gimple_set_lhs (fma_stmt
, gimple_get_lhs (use_stmt
));
3045 gimple_call_set_nothrow (fma_stmt
, !stmt_can_throw_internal (cfun
,
3047 gsi_replace (&gsi
, fma_stmt
, true);
3048 /* Follow all SSA edges so that we generate FMS, FNMA and FNMS
3049 regardless of where the negation occurs. */
3050 gimple
*orig_stmt
= gsi_stmt (gsi
);
3051 if (fold_stmt (&gsi
, follow_all_ssa_edges
))
3053 if (maybe_clean_or_replace_eh_stmt (orig_stmt
, gsi_stmt (gsi
)))
3055 update_stmt (gsi_stmt (gsi
));
3058 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3060 fprintf (dump_file
, "Generated FMA ");
3061 print_gimple_stmt (dump_file
, gsi_stmt (gsi
), 0, TDF_NONE
);
3062 fprintf (dump_file
, "\n");
3065 /* If the FMA result is negated in a single use, fold the negation
3067 orig_stmt
= gsi_stmt (gsi
);
3068 use_operand_p use_p
;
3070 if (is_gimple_call (orig_stmt
)
3071 && gimple_call_internal_p (orig_stmt
)
3072 && gimple_call_lhs (orig_stmt
)
3073 && TREE_CODE (gimple_call_lhs (orig_stmt
)) == SSA_NAME
3074 && single_imm_use (gimple_call_lhs (orig_stmt
), &use_p
, &neg_stmt
)
3075 && is_gimple_assign (neg_stmt
)
3076 && gimple_assign_rhs_code (neg_stmt
) == NEGATE_EXPR
3077 && !stmt_could_throw_p (cfun
, neg_stmt
))
3079 gsi
= gsi_for_stmt (neg_stmt
);
3080 if (fold_stmt (&gsi
, follow_all_ssa_edges
))
3082 if (maybe_clean_or_replace_eh_stmt (neg_stmt
, gsi_stmt (gsi
)))
3084 update_stmt (gsi_stmt (gsi
));
3085 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3087 fprintf (dump_file
, "Folded FMA negation ");
3088 print_gimple_stmt (dump_file
, gsi_stmt (gsi
), 0, TDF_NONE
);
3089 fprintf (dump_file
, "\n");
3094 widen_mul_stats
.fmas_inserted
++;
3098 /* Data necessary to perform the actual transformation from a multiplication
3099 and an addition to an FMA after decision is taken it should be done and to
3100 then delete the multiplication statement from the function IL. */
3102 struct fma_transformation_info
3110 /* Structure containing the current state of FMA deferring, i.e. whether we are
3111 deferring, whether to continue deferring, and all data necessary to come
3112 back and perform all deferred transformations. */
3114 class fma_deferring_state
3117 /* Class constructor. Pass true as PERFORM_DEFERRING in order to actually
3118 do any deferring. */
3120 fma_deferring_state (bool perform_deferring
)
3121 : m_candidates (), m_mul_result_set (), m_initial_phi (NULL
),
3122 m_last_result (NULL_TREE
), m_deferring_p (perform_deferring
) {}
3124 /* List of FMA candidates for which we the transformation has been determined
3125 possible but we at this point in BB analysis we do not consider them
3127 auto_vec
<fma_transformation_info
, 8> m_candidates
;
3129 /* Set of results of multiplication that are part of an already deferred FMA
3131 hash_set
<tree
> m_mul_result_set
;
3133 /* The PHI that supposedly feeds back result of a FMA to another over loop
3135 gphi
*m_initial_phi
;
3137 /* Result of the last produced FMA candidate or NULL if there has not been
3141 /* If true, deferring might still be profitable. If false, transform all
3142 candidates and no longer defer. */
3146 /* Transform all deferred FMA candidates and mark STATE as no longer
3150 cancel_fma_deferring (fma_deferring_state
*state
)
3152 if (!state
->m_deferring_p
)
3155 for (unsigned i
= 0; i
< state
->m_candidates
.length (); i
++)
3157 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3158 fprintf (dump_file
, "Generating deferred FMA\n");
3160 const fma_transformation_info
&fti
= state
->m_candidates
[i
];
3161 convert_mult_to_fma_1 (fti
.mul_result
, fti
.op1
, fti
.op2
);
3163 gimple_stmt_iterator gsi
= gsi_for_stmt (fti
.mul_stmt
);
3164 gsi_remove (&gsi
, true);
3165 release_defs (fti
.mul_stmt
);
3167 state
->m_deferring_p
= false;
3170 /* If OP is an SSA name defined by a PHI node, return the PHI statement.
3171 Otherwise return NULL. */
3174 result_of_phi (tree op
)
3176 if (TREE_CODE (op
) != SSA_NAME
)
3179 return dyn_cast
<gphi
*> (SSA_NAME_DEF_STMT (op
));
3182 /* After processing statements of a BB and recording STATE, return true if the
3183 initial phi is fed by the last FMA candidate result ore one such result from
3184 previously processed BBs marked in LAST_RESULT_SET. */
3187 last_fma_candidate_feeds_initial_phi (fma_deferring_state
*state
,
3188 hash_set
<tree
> *last_result_set
)
3192 FOR_EACH_PHI_ARG (use
, state
->m_initial_phi
, iter
, SSA_OP_USE
)
3194 tree t
= USE_FROM_PTR (use
);
3195 if (t
== state
->m_last_result
3196 || last_result_set
->contains (t
))
3203 /* Combine the multiplication at MUL_STMT with operands MULOP1 and MULOP2
3204 with uses in additions and subtractions to form fused multiply-add
3205 operations. Returns true if successful and MUL_STMT should be removed.
3206 If MUL_COND is nonnull, the multiplication in MUL_STMT is conditional
3207 on MUL_COND, otherwise it is unconditional.
3209 If STATE indicates that we are deferring FMA transformation, that means
3210 that we do not produce FMAs for basic blocks which look like:
3213 # accumulator_111 = PHI <0.0(5), accumulator_66(6)>
3215 accumulator_66 = _65 + accumulator_111;
3217 or its unrolled version, i.e. with several FMA candidates that feed result
3218 of one into the addend of another. Instead, we add them to a list in STATE
3219 and if we later discover an FMA candidate that is not part of such a chain,
3220 we go back and perform all deferred past candidates. */
3223 convert_mult_to_fma (gimple
*mul_stmt
, tree op1
, tree op2
,
3224 fma_deferring_state
*state
, tree mul_cond
= NULL_TREE
)
3226 tree mul_result
= gimple_get_lhs (mul_stmt
);
3227 tree type
= TREE_TYPE (mul_result
);
3228 gimple
*use_stmt
, *neguse_stmt
;
3229 use_operand_p use_p
;
3230 imm_use_iterator imm_iter
;
3232 if (FLOAT_TYPE_P (type
)
3233 && flag_fp_contract_mode
== FP_CONTRACT_OFF
)
3236 /* We don't want to do bitfield reduction ops. */
3237 if (INTEGRAL_TYPE_P (type
)
3238 && (!type_has_mode_precision_p (type
) || TYPE_OVERFLOW_TRAPS (type
)))
3241 /* If the target doesn't support it, don't generate it. We assume that
3242 if fma isn't available then fms, fnma or fnms are not either. */
3243 optimization_type opt_type
= bb_optimization_type (gimple_bb (mul_stmt
));
3244 if (!direct_internal_fn_supported_p (IFN_FMA
, type
, opt_type
))
3247 /* If the multiplication has zero uses, it is kept around probably because
3248 of -fnon-call-exceptions. Don't optimize it away in that case,
3250 if (has_zero_uses (mul_result
))
3254 = (state
->m_deferring_p
3255 && maybe_le (tree_to_poly_int64 (TYPE_SIZE (type
)),
3256 param_avoid_fma_max_bits
));
3257 bool defer
= check_defer
;
3258 bool seen_negate_p
= false;
3259 /* Make sure that the multiplication statement becomes dead after
3260 the transformation, thus that all uses are transformed to FMAs.
3261 This means we assume that an FMA operation has the same cost
3263 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, mul_result
)
3265 tree result
= mul_result
;
3266 bool negate_p
= false;
3268 use_stmt
= USE_STMT (use_p
);
3270 if (is_gimple_debug (use_stmt
))
3273 /* For now restrict this operations to single basic blocks. In theory
3274 we would want to support sinking the multiplication in
3280 to form a fma in the then block and sink the multiplication to the
3282 if (gimple_bb (use_stmt
) != gimple_bb (mul_stmt
))
3285 /* A negate on the multiplication leads to FNMA. */
3286 if (is_gimple_assign (use_stmt
)
3287 && gimple_assign_rhs_code (use_stmt
) == NEGATE_EXPR
)
3292 /* If (due to earlier missed optimizations) we have two
3293 negates of the same value, treat them as equivalent
3294 to a single negate with multiple uses. */
3298 result
= gimple_assign_lhs (use_stmt
);
3300 /* Make sure the negate statement becomes dead with this
3301 single transformation. */
3302 if (!single_imm_use (gimple_assign_lhs (use_stmt
),
3303 &use_p
, &neguse_stmt
))
3306 /* Make sure the multiplication isn't also used on that stmt. */
3307 FOR_EACH_PHI_OR_STMT_USE (usep
, neguse_stmt
, iter
, SSA_OP_USE
)
3308 if (USE_FROM_PTR (usep
) == mul_result
)
3312 use_stmt
= neguse_stmt
;
3313 if (gimple_bb (use_stmt
) != gimple_bb (mul_stmt
))
3316 negate_p
= seen_negate_p
= true;
3319 tree cond
, else_value
, ops
[3];
3321 if (!can_interpret_as_conditional_op_p (use_stmt
, &cond
, &code
, ops
,
3328 if (ops
[1] == result
)
3329 negate_p
= !negate_p
;
3334 /* FMA can only be formed from PLUS and MINUS. */
3338 if (mul_cond
&& cond
!= mul_cond
)
3343 if (cond
== result
|| else_value
== result
)
3345 if (!direct_internal_fn_supported_p (IFN_COND_FMA
, type
, opt_type
))
3349 /* If the subtrahend (OPS[1]) is computed by a MULT_EXPR that
3350 we'll visit later, we might be able to get a more profitable
3352 OTOH, if we don't, a negate / fma pair has likely lower latency
3353 that a mult / subtract pair. */
3354 if (code
== MINUS_EXPR
3357 && !direct_internal_fn_supported_p (IFN_FMS
, type
, opt_type
)
3358 && direct_internal_fn_supported_p (IFN_FNMA
, type
, opt_type
)
3359 && TREE_CODE (ops
[1]) == SSA_NAME
3360 && has_single_use (ops
[1]))
3362 gimple
*stmt2
= SSA_NAME_DEF_STMT (ops
[1]);
3363 if (is_gimple_assign (stmt2
)
3364 && gimple_assign_rhs_code (stmt2
) == MULT_EXPR
)
3368 /* We can't handle a * b + a * b. */
3369 if (ops
[0] == ops
[1])
3371 /* If deferring, make sure we are not looking at an instruction that
3372 wouldn't have existed if we were not. */
3373 if (state
->m_deferring_p
3374 && (state
->m_mul_result_set
.contains (ops
[0])
3375 || state
->m_mul_result_set
.contains (ops
[1])))
3380 tree use_lhs
= gimple_get_lhs (use_stmt
);
3381 if (state
->m_last_result
)
3383 if (ops
[1] == state
->m_last_result
3384 || ops
[0] == state
->m_last_result
)
3391 gcc_checking_assert (!state
->m_initial_phi
);
3393 if (ops
[0] == result
)
3394 phi
= result_of_phi (ops
[1]);
3397 gcc_assert (ops
[1] == result
);
3398 phi
= result_of_phi (ops
[0]);
3403 state
->m_initial_phi
= phi
;
3410 state
->m_last_result
= use_lhs
;
3411 check_defer
= false;
3416 /* While it is possible to validate whether or not the exact form that
3417 we've recognized is available in the backend, the assumption is that
3418 if the deferring logic above did not trigger, the transformation is
3419 never a loss. For instance, suppose the target only has the plain FMA
3420 pattern available. Consider a*b-c -> fma(a,b,-c): we've exchanged
3421 MUL+SUB for FMA+NEG, which is still two operations. Consider
3422 -(a*b)-c -> fma(-a,b,-c): we still have 3 operations, but in the FMA
3423 form the two NEGs are independent and could be run in parallel. */
3428 fma_transformation_info fti
;
3429 fti
.mul_stmt
= mul_stmt
;
3430 fti
.mul_result
= mul_result
;
3433 state
->m_candidates
.safe_push (fti
);
3434 state
->m_mul_result_set
.add (mul_result
);
3436 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3438 fprintf (dump_file
, "Deferred generating FMA for multiplication ");
3439 print_gimple_stmt (dump_file
, mul_stmt
, 0, TDF_NONE
);
3440 fprintf (dump_file
, "\n");
3447 if (state
->m_deferring_p
)
3448 cancel_fma_deferring (state
);
3449 convert_mult_to_fma_1 (mul_result
, op1
, op2
);
3455 /* Helper function of match_arith_overflow. For MUL_OVERFLOW, if we have
3456 a check for non-zero like:
3457 _1 = x_4(D) * y_5(D);
3460 goto <bb 3>; [50.00%]
3462 goto <bb 4>; [50.00%]
3464 <bb 3> [local count: 536870913]:
3469 <bb 4> [local count: 1073741824]:
3470 # iftmp.0_3 = PHI <_10(3), 0(2)>
3471 then in addition to using .MUL_OVERFLOW (x_4(D), y_5(D)) we can also
3472 optimize the x_4(D) != 0 condition to 1. */
3475 maybe_optimize_guarding_check (vec
<gimple
*> &mul_stmts
, gimple
*cond_stmt
,
3476 gimple
*div_stmt
, bool *cfg_changed
)
3478 basic_block bb
= gimple_bb (cond_stmt
);
3479 if (gimple_bb (div_stmt
) != bb
|| !single_pred_p (bb
))
3481 edge pred_edge
= single_pred_edge (bb
);
3482 basic_block pred_bb
= pred_edge
->src
;
3483 if (EDGE_COUNT (pred_bb
->succs
) != 2)
3485 edge other_edge
= EDGE_SUCC (pred_bb
, EDGE_SUCC (pred_bb
, 0) == pred_edge
);
3486 edge other_succ_edge
= NULL
;
3487 if (gimple_code (cond_stmt
) == GIMPLE_COND
)
3489 if (EDGE_COUNT (bb
->succs
) != 2)
3491 other_succ_edge
= EDGE_SUCC (bb
, 0);
3492 if (gimple_cond_code (cond_stmt
) == NE_EXPR
)
3494 if (other_succ_edge
->flags
& EDGE_TRUE_VALUE
)
3495 other_succ_edge
= EDGE_SUCC (bb
, 1);
3497 else if (other_succ_edge
->flags
& EDGE_FALSE_VALUE
)
3498 other_succ_edge
= EDGE_SUCC (bb
, 0);
3499 if (other_edge
->dest
!= other_succ_edge
->dest
)
3502 else if (!single_succ_p (bb
) || other_edge
->dest
!= single_succ (bb
))
3504 gimple
*zero_cond
= last_stmt (pred_bb
);
3505 if (zero_cond
== NULL
3506 || gimple_code (zero_cond
) != GIMPLE_COND
3507 || (gimple_cond_code (zero_cond
)
3508 != ((pred_edge
->flags
& EDGE_TRUE_VALUE
) ? NE_EXPR
: EQ_EXPR
))
3509 || !integer_zerop (gimple_cond_rhs (zero_cond
)))
3511 tree zero_cond_lhs
= gimple_cond_lhs (zero_cond
);
3512 if (TREE_CODE (zero_cond_lhs
) != SSA_NAME
)
3514 if (gimple_assign_rhs2 (div_stmt
) != zero_cond_lhs
)
3516 /* Allow the divisor to be result of a same precision cast
3517 from zero_cond_lhs. */
3518 tree rhs2
= gimple_assign_rhs2 (div_stmt
);
3519 if (TREE_CODE (rhs2
) != SSA_NAME
)
3521 gimple
*g
= SSA_NAME_DEF_STMT (rhs2
);
3522 if (!gimple_assign_cast_p (g
)
3523 || gimple_assign_rhs1 (g
) != gimple_cond_lhs (zero_cond
)
3524 || !INTEGRAL_TYPE_P (TREE_TYPE (zero_cond_lhs
))
3525 || (TYPE_PRECISION (TREE_TYPE (zero_cond_lhs
))
3526 != TYPE_PRECISION (TREE_TYPE (rhs2
))))
3529 gimple_stmt_iterator gsi
= gsi_after_labels (bb
);
3530 mul_stmts
.quick_push (div_stmt
);
3531 if (is_gimple_debug (gsi_stmt (gsi
)))
3532 gsi_next_nondebug (&gsi
);
3533 unsigned cast_count
= 0;
3534 while (gsi_stmt (gsi
) != cond_stmt
)
3536 /* If original mul_stmt has a single use, allow it in the same bb,
3537 we are looking then just at __builtin_mul_overflow_p.
3538 Though, in that case the original mul_stmt will be replaced
3539 by .MUL_OVERFLOW, REALPART_EXPR and IMAGPART_EXPR stmts. */
3543 FOR_EACH_VEC_ELT (mul_stmts
, i
, mul_stmt
)
3545 if (gsi_stmt (gsi
) == mul_stmt
)
3551 if (!ok
&& gimple_assign_cast_p (gsi_stmt (gsi
)) && ++cast_count
< 4)
3555 gsi_next_nondebug (&gsi
);
3557 if (gimple_code (cond_stmt
) == GIMPLE_COND
)
3559 basic_block succ_bb
= other_edge
->dest
;
3560 for (gphi_iterator gpi
= gsi_start_phis (succ_bb
); !gsi_end_p (gpi
);
3563 gphi
*phi
= gpi
.phi ();
3564 tree v1
= gimple_phi_arg_def (phi
, other_edge
->dest_idx
);
3565 tree v2
= gimple_phi_arg_def (phi
, other_succ_edge
->dest_idx
);
3566 if (!operand_equal_p (v1
, v2
, 0))
3572 tree lhs
= gimple_assign_lhs (cond_stmt
);
3573 if (!lhs
|| !INTEGRAL_TYPE_P (TREE_TYPE (lhs
)))
3575 gsi_next_nondebug (&gsi
);
3576 if (!gsi_end_p (gsi
))
3578 if (gimple_assign_rhs_code (cond_stmt
) == COND_EXPR
)
3580 gimple
*cast_stmt
= gsi_stmt (gsi
);
3581 if (!gimple_assign_cast_p (cast_stmt
))
3583 tree new_lhs
= gimple_assign_lhs (cast_stmt
);
3584 gsi_next_nondebug (&gsi
);
3585 if (!gsi_end_p (gsi
)
3587 || !INTEGRAL_TYPE_P (TREE_TYPE (new_lhs
))
3588 || TYPE_PRECISION (TREE_TYPE (new_lhs
)) <= 1)
3592 edge succ_edge
= single_succ_edge (bb
);
3593 basic_block succ_bb
= succ_edge
->dest
;
3594 gsi
= gsi_start_phis (succ_bb
);
3595 if (gsi_end_p (gsi
))
3597 gphi
*phi
= as_a
<gphi
*> (gsi_stmt (gsi
));
3599 if (!gsi_end_p (gsi
))
3601 if (gimple_phi_arg_def (phi
, succ_edge
->dest_idx
) != lhs
)
3603 tree other_val
= gimple_phi_arg_def (phi
, other_edge
->dest_idx
);
3604 if (gimple_assign_rhs_code (cond_stmt
) == COND_EXPR
)
3606 tree cond
= gimple_assign_rhs1 (cond_stmt
);
3607 if (TREE_CODE (cond
) == NE_EXPR
)
3609 if (!operand_equal_p (other_val
,
3610 gimple_assign_rhs3 (cond_stmt
), 0))
3613 else if (!operand_equal_p (other_val
,
3614 gimple_assign_rhs2 (cond_stmt
), 0))
3617 else if (gimple_assign_rhs_code (cond_stmt
) == NE_EXPR
)
3619 if (!integer_zerop (other_val
))
3622 else if (!integer_onep (other_val
))
3625 gcond
*zero_gcond
= as_a
<gcond
*> (zero_cond
);
3626 if (pred_edge
->flags
& EDGE_TRUE_VALUE
)
3627 gimple_cond_make_true (zero_gcond
);
3629 gimple_cond_make_false (zero_gcond
);
3630 update_stmt (zero_cond
);
3631 *cfg_changed
= true;
3634 /* Helper function for arith_overflow_check_p. Return true
3635 if VAL1 is equal to VAL2 cast to corresponding integral type
3636 with other signedness or vice versa. */
3639 arith_cast_equal_p (tree val1
, tree val2
)
3641 if (TREE_CODE (val1
) == INTEGER_CST
&& TREE_CODE (val2
) == INTEGER_CST
)
3642 return wi::eq_p (wi::to_wide (val1
), wi::to_wide (val2
));
3643 else if (TREE_CODE (val1
) != SSA_NAME
|| TREE_CODE (val2
) != SSA_NAME
)
3645 if (gimple_assign_cast_p (SSA_NAME_DEF_STMT (val1
))
3646 && gimple_assign_rhs1 (SSA_NAME_DEF_STMT (val1
)) == val2
)
3648 if (gimple_assign_cast_p (SSA_NAME_DEF_STMT (val2
))
3649 && gimple_assign_rhs1 (SSA_NAME_DEF_STMT (val2
)) == val1
)
3654 /* Helper function of match_arith_overflow. Return 1
3655 if USE_STMT is unsigned overflow check ovf != 0 for
3656 STMT, -1 if USE_STMT is unsigned overflow check ovf == 0
3660 arith_overflow_check_p (gimple
*stmt
, gimple
*cast_stmt
, gimple
*&use_stmt
,
3661 tree maxval
, tree
*other
)
3663 enum tree_code ccode
= ERROR_MARK
;
3664 tree crhs1
= NULL_TREE
, crhs2
= NULL_TREE
;
3665 enum tree_code code
= gimple_assign_rhs_code (stmt
);
3666 tree lhs
= gimple_assign_lhs (cast_stmt
? cast_stmt
: stmt
);
3667 tree rhs1
= gimple_assign_rhs1 (stmt
);
3668 tree rhs2
= gimple_assign_rhs2 (stmt
);
3669 tree multop
= NULL_TREE
, divlhs
= NULL_TREE
;
3670 gimple
*cur_use_stmt
= use_stmt
;
3672 if (code
== MULT_EXPR
)
3674 if (!is_gimple_assign (use_stmt
))
3676 if (gimple_assign_rhs_code (use_stmt
) != TRUNC_DIV_EXPR
)
3678 if (gimple_assign_rhs1 (use_stmt
) != lhs
)
3682 if (arith_cast_equal_p (gimple_assign_rhs2 (use_stmt
), rhs1
))
3684 else if (arith_cast_equal_p (gimple_assign_rhs2 (use_stmt
), rhs2
))
3689 else if (gimple_assign_rhs2 (use_stmt
) == rhs1
)
3691 else if (operand_equal_p (gimple_assign_rhs2 (use_stmt
), rhs2
, 0))
3695 if (stmt_ends_bb_p (use_stmt
))
3697 divlhs
= gimple_assign_lhs (use_stmt
);
3701 if (!single_imm_use (divlhs
, &use
, &cur_use_stmt
))
3704 if (gimple_code (cur_use_stmt
) == GIMPLE_COND
)
3706 ccode
= gimple_cond_code (cur_use_stmt
);
3707 crhs1
= gimple_cond_lhs (cur_use_stmt
);
3708 crhs2
= gimple_cond_rhs (cur_use_stmt
);
3710 else if (is_gimple_assign (cur_use_stmt
))
3712 if (gimple_assign_rhs_class (cur_use_stmt
) == GIMPLE_BINARY_RHS
)
3714 ccode
= gimple_assign_rhs_code (cur_use_stmt
);
3715 crhs1
= gimple_assign_rhs1 (cur_use_stmt
);
3716 crhs2
= gimple_assign_rhs2 (cur_use_stmt
);
3718 else if (gimple_assign_rhs_code (cur_use_stmt
) == COND_EXPR
)
3720 tree cond
= gimple_assign_rhs1 (cur_use_stmt
);
3721 if (COMPARISON_CLASS_P (cond
))
3723 ccode
= TREE_CODE (cond
);
3724 crhs1
= TREE_OPERAND (cond
, 0);
3725 crhs2
= TREE_OPERAND (cond
, 1);
3736 if (TREE_CODE_CLASS (ccode
) != tcc_comparison
)
3745 /* r = a + b; r > maxval or r <= maxval */
3747 && TREE_CODE (crhs2
) == INTEGER_CST
3748 && tree_int_cst_equal (crhs2
, maxval
))
3749 return ccode
== GT_EXPR
? 1 : -1;
3752 /* r = a - b; r > a or r <= a
3753 r = a + b; a > r or a <= r or b > r or b <= r. */
3754 if ((code
== MINUS_EXPR
&& crhs1
== lhs
&& crhs2
== rhs1
)
3755 || (code
== PLUS_EXPR
&& (crhs1
== rhs1
|| crhs1
== rhs2
)
3757 return ccode
== GT_EXPR
? 1 : -1;
3758 /* r = ~a; b > r or b <= r. */
3759 if (code
== BIT_NOT_EXPR
&& crhs2
== lhs
)
3763 return ccode
== GT_EXPR
? 1 : -1;
3770 /* r = a - b; a < r or a >= r
3771 r = a + b; r < a or r >= a or r < b or r >= b. */
3772 if ((code
== MINUS_EXPR
&& crhs1
== rhs1
&& crhs2
== lhs
)
3773 || (code
== PLUS_EXPR
&& crhs1
== lhs
3774 && (crhs2
== rhs1
|| crhs2
== rhs2
)))
3775 return ccode
== LT_EXPR
? 1 : -1;
3776 /* r = ~a; r < b or r >= b. */
3777 if (code
== BIT_NOT_EXPR
&& crhs1
== lhs
)
3781 return ccode
== LT_EXPR
? 1 : -1;
3786 /* r = a * b; _1 = r / a; _1 == b
3787 r = a * b; _1 = r / b; _1 == a
3788 r = a * b; _1 = r / a; _1 != b
3789 r = a * b; _1 = r / b; _1 != a. */
3790 if (code
== MULT_EXPR
)
3794 if ((crhs1
== divlhs
&& arith_cast_equal_p (crhs2
, multop
))
3795 || (crhs2
== divlhs
&& arith_cast_equal_p (crhs1
, multop
)))
3797 use_stmt
= cur_use_stmt
;
3798 return ccode
== NE_EXPR
? 1 : -1;
3801 else if ((crhs1
== divlhs
&& operand_equal_p (crhs2
, multop
, 0))
3802 || (crhs2
== divlhs
&& crhs1
== multop
))
3804 use_stmt
= cur_use_stmt
;
3805 return ccode
== NE_EXPR
? 1 : -1;
3815 /* Recognize for unsigned x
3818 where there are other uses of x and replace it with
3819 _7 = .SUB_OVERFLOW (y, z);
3820 x = REALPART_EXPR <_7>;
3821 _8 = IMAGPART_EXPR <_7>;
3823 and similarly for addition.
3830 where y and z have unsigned types with maximum max
3831 and there are other uses of x and all of those cast x
3832 back to that unsigned type and again replace it with
3833 _7 = .ADD_OVERFLOW (y, z);
3834 _9 = REALPART_EXPR <_7>;
3835 _8 = IMAGPART_EXPR <_7>;
3837 and replace (utype) x with _9.
3843 _7 = .ADD_OVERFLOW (y, z);
3844 _8 = IMAGPART_EXPR <_7>;
3850 goto <bb 3>; [50.00%]
3852 goto <bb 4>; [50.00%]
3854 <bb 3> [local count: 536870913]:
3859 <bb 4> [local count: 1073741824]:
3860 # iftmp.0_3 = PHI <_10(3), 0(2)>
3862 _7 = .MUL_OVERFLOW (x, y);
3863 z = IMAGPART_EXPR <_7>;
3864 _8 = IMAGPART_EXPR <_7>;
3866 iftmp.0_3 = (int) _9; */
3869 match_arith_overflow (gimple_stmt_iterator
*gsi
, gimple
*stmt
,
3870 enum tree_code code
, bool *cfg_changed
)
3872 tree lhs
= gimple_assign_lhs (stmt
);
3873 tree type
= TREE_TYPE (lhs
);
3874 use_operand_p use_p
;
3875 imm_use_iterator iter
;
3876 bool use_seen
= false;
3877 bool ovf_use_seen
= false;
3879 gimple
*add_stmt
= NULL
;
3880 bool add_first
= false;
3881 gimple
*cond_stmt
= NULL
;
3882 gimple
*cast_stmt
= NULL
;
3883 tree cast_lhs
= NULL_TREE
;
3885 gcc_checking_assert (code
== PLUS_EXPR
3886 || code
== MINUS_EXPR
3887 || code
== MULT_EXPR
3888 || code
== BIT_NOT_EXPR
);
3889 if (!INTEGRAL_TYPE_P (type
)
3890 || !TYPE_UNSIGNED (type
)
3891 || has_zero_uses (lhs
)
3892 || (code
!= PLUS_EXPR
3893 && code
!= MULT_EXPR
3894 && optab_handler (code
== MINUS_EXPR
? usubv4_optab
: uaddv4_optab
,
3895 TYPE_MODE (type
)) == CODE_FOR_nothing
))
3898 tree rhs1
= gimple_assign_rhs1 (stmt
);
3899 tree rhs2
= gimple_assign_rhs2 (stmt
);
3900 FOR_EACH_IMM_USE_FAST (use_p
, iter
, lhs
)
3902 use_stmt
= USE_STMT (use_p
);
3903 if (is_gimple_debug (use_stmt
))
3906 tree other
= NULL_TREE
;
3907 if (arith_overflow_check_p (stmt
, NULL
, use_stmt
, NULL_TREE
, &other
))
3909 if (code
== BIT_NOT_EXPR
)
3912 if (TREE_CODE (other
) != SSA_NAME
)
3918 cond_stmt
= use_stmt
;
3920 ovf_use_seen
= true;
3925 if (code
== MULT_EXPR
3926 && cast_stmt
== NULL
3927 && gimple_assign_cast_p (use_stmt
))
3929 cast_lhs
= gimple_assign_lhs (use_stmt
);
3930 if (INTEGRAL_TYPE_P (TREE_TYPE (cast_lhs
))
3931 && !TYPE_UNSIGNED (TREE_TYPE (cast_lhs
))
3932 && (TYPE_PRECISION (TREE_TYPE (cast_lhs
))
3933 == TYPE_PRECISION (TREE_TYPE (lhs
))))
3934 cast_stmt
= use_stmt
;
3936 cast_lhs
= NULL_TREE
;
3939 if (ovf_use_seen
&& use_seen
)
3944 && code
== MULT_EXPR
3947 if (TREE_CODE (rhs1
) != SSA_NAME
3948 || (TREE_CODE (rhs2
) != SSA_NAME
&& TREE_CODE (rhs2
) != INTEGER_CST
))
3950 FOR_EACH_IMM_USE_FAST (use_p
, iter
, cast_lhs
)
3952 use_stmt
= USE_STMT (use_p
);
3953 if (is_gimple_debug (use_stmt
))
3956 if (arith_overflow_check_p (stmt
, cast_stmt
, use_stmt
,
3958 ovf_use_seen
= true;
3964 cast_lhs
= NULL_TREE
;
3967 tree maxval
= NULL_TREE
;
3969 || (code
!= MULT_EXPR
&& (code
== BIT_NOT_EXPR
? use_seen
: !use_seen
))
3970 || (code
== PLUS_EXPR
3971 && optab_handler (uaddv4_optab
,
3972 TYPE_MODE (type
)) == CODE_FOR_nothing
)
3973 || (code
== MULT_EXPR
3974 && optab_handler (cast_stmt
? mulv4_optab
: umulv4_optab
,
3975 TYPE_MODE (type
)) == CODE_FOR_nothing
))
3977 if (code
!= PLUS_EXPR
)
3979 if (TREE_CODE (rhs1
) != SSA_NAME
3980 || !gimple_assign_cast_p (SSA_NAME_DEF_STMT (rhs1
)))
3982 rhs1
= gimple_assign_rhs1 (SSA_NAME_DEF_STMT (rhs1
));
3983 tree type1
= TREE_TYPE (rhs1
);
3984 if (!INTEGRAL_TYPE_P (type1
)
3985 || !TYPE_UNSIGNED (type1
)
3986 || TYPE_PRECISION (type1
) >= TYPE_PRECISION (type
)
3987 || (TYPE_PRECISION (type1
)
3988 != GET_MODE_BITSIZE (SCALAR_INT_TYPE_MODE (type1
))))
3990 if (TREE_CODE (rhs2
) == INTEGER_CST
)
3992 if (wi::ne_p (wi::rshift (wi::to_wide (rhs2
),
3993 TYPE_PRECISION (type1
),
3996 rhs2
= fold_convert (type1
, rhs2
);
4000 if (TREE_CODE (rhs2
) != SSA_NAME
4001 || !gimple_assign_cast_p (SSA_NAME_DEF_STMT (rhs2
)))
4003 rhs2
= gimple_assign_rhs1 (SSA_NAME_DEF_STMT (rhs2
));
4004 tree type2
= TREE_TYPE (rhs2
);
4005 if (!INTEGRAL_TYPE_P (type2
)
4006 || !TYPE_UNSIGNED (type2
)
4007 || TYPE_PRECISION (type2
) >= TYPE_PRECISION (type
)
4008 || (TYPE_PRECISION (type2
)
4009 != GET_MODE_BITSIZE (SCALAR_INT_TYPE_MODE (type2
))))
4012 if (TYPE_PRECISION (type1
) >= TYPE_PRECISION (TREE_TYPE (rhs2
)))
4015 type
= TREE_TYPE (rhs2
);
4017 if (TREE_CODE (type
) != INTEGER_TYPE
4018 || optab_handler (uaddv4_optab
,
4019 TYPE_MODE (type
)) == CODE_FOR_nothing
)
4022 maxval
= wide_int_to_tree (type
, wi::max_value (TYPE_PRECISION (type
),
4024 ovf_use_seen
= false;
4026 basic_block use_bb
= NULL
;
4027 FOR_EACH_IMM_USE_FAST (use_p
, iter
, lhs
)
4029 use_stmt
= USE_STMT (use_p
);
4030 if (is_gimple_debug (use_stmt
))
4033 if (arith_overflow_check_p (stmt
, NULL
, use_stmt
, maxval
, NULL
))
4035 ovf_use_seen
= true;
4036 use_bb
= gimple_bb (use_stmt
);
4040 if (!gimple_assign_cast_p (use_stmt
)
4041 || gimple_assign_rhs_code (use_stmt
) == VIEW_CONVERT_EXPR
)
4043 tree use_lhs
= gimple_assign_lhs (use_stmt
);
4044 if (!INTEGRAL_TYPE_P (TREE_TYPE (use_lhs
))
4045 || (TYPE_PRECISION (TREE_TYPE (use_lhs
))
4046 > TYPE_PRECISION (type
)))
4053 if (!useless_type_conversion_p (type
, TREE_TYPE (rhs1
)))
4057 tree new_rhs1
= make_ssa_name (type
);
4058 gimple
*g
= gimple_build_assign (new_rhs1
, NOP_EXPR
, rhs1
);
4059 gsi_insert_before (gsi
, g
, GSI_SAME_STMT
);
4062 else if (!useless_type_conversion_p (type
, TREE_TYPE (rhs2
)))
4066 tree new_rhs2
= make_ssa_name (type
);
4067 gimple
*g
= gimple_build_assign (new_rhs2
, NOP_EXPR
, rhs2
);
4068 gsi_insert_before (gsi
, g
, GSI_SAME_STMT
);
4073 /* If there are no uses of the wider addition, check if
4074 forwprop has not created a narrower addition.
4075 Require it to be in the same bb as the overflow check. */
4076 FOR_EACH_IMM_USE_FAST (use_p
, iter
, rhs1
)
4078 use_stmt
= USE_STMT (use_p
);
4079 if (is_gimple_debug (use_stmt
))
4082 if (use_stmt
== stmt
)
4085 if (!is_gimple_assign (use_stmt
)
4086 || gimple_bb (use_stmt
) != use_bb
4087 || gimple_assign_rhs_code (use_stmt
) != PLUS_EXPR
)
4090 if (gimple_assign_rhs1 (use_stmt
) == rhs1
)
4092 if (!operand_equal_p (gimple_assign_rhs2 (use_stmt
),
4096 else if (gimple_assign_rhs2 (use_stmt
) == rhs1
)
4098 if (gimple_assign_rhs1 (use_stmt
) != rhs2
)
4104 add_stmt
= use_stmt
;
4107 if (add_stmt
== NULL
)
4110 /* If stmt and add_stmt are in the same bb, we need to find out
4111 which one is earlier. If they are in different bbs, we've
4112 checked add_stmt is in the same bb as one of the uses of the
4113 stmt lhs, so stmt needs to dominate add_stmt too. */
4114 if (gimple_bb (stmt
) == gimple_bb (add_stmt
))
4116 gimple_stmt_iterator gsif
= *gsi
;
4117 gimple_stmt_iterator gsib
= *gsi
;
4119 /* Search both forward and backward from stmt and have a small
4121 for (i
= 0; i
< 128; i
++)
4123 if (!gsi_end_p (gsib
))
4125 gsi_prev_nondebug (&gsib
);
4126 if (gsi_stmt (gsib
) == add_stmt
)
4132 else if (gsi_end_p (gsif
))
4134 if (!gsi_end_p (gsif
))
4136 gsi_next_nondebug (&gsif
);
4137 if (gsi_stmt (gsif
) == add_stmt
)
4144 *gsi
= gsi_for_stmt (add_stmt
);
4149 if (code
== BIT_NOT_EXPR
)
4150 *gsi
= gsi_for_stmt (cond_stmt
);
4152 auto_vec
<gimple
*, 8> mul_stmts
;
4153 if (code
== MULT_EXPR
&& cast_stmt
)
4155 type
= TREE_TYPE (cast_lhs
);
4156 gimple
*g
= SSA_NAME_DEF_STMT (rhs1
);
4157 if (gimple_assign_cast_p (g
)
4158 && useless_type_conversion_p (type
,
4159 TREE_TYPE (gimple_assign_rhs1 (g
)))
4160 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g
)))
4161 rhs1
= gimple_assign_rhs1 (g
);
4164 g
= gimple_build_assign (make_ssa_name (type
), NOP_EXPR
, rhs1
);
4165 gsi_insert_before (gsi
, g
, GSI_SAME_STMT
);
4166 rhs1
= gimple_assign_lhs (g
);
4167 mul_stmts
.quick_push (g
);
4169 if (TREE_CODE (rhs2
) == INTEGER_CST
)
4170 rhs2
= fold_convert (type
, rhs2
);
4173 g
= SSA_NAME_DEF_STMT (rhs2
);
4174 if (gimple_assign_cast_p (g
)
4175 && useless_type_conversion_p (type
,
4176 TREE_TYPE (gimple_assign_rhs1 (g
)))
4177 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g
)))
4178 rhs2
= gimple_assign_rhs1 (g
);
4181 g
= gimple_build_assign (make_ssa_name (type
), NOP_EXPR
, rhs2
);
4182 gsi_insert_before (gsi
, g
, GSI_SAME_STMT
);
4183 rhs2
= gimple_assign_lhs (g
);
4184 mul_stmts
.quick_push (g
);
4188 tree ctype
= build_complex_type (type
);
4189 gcall
*g
= gimple_build_call_internal (code
== MULT_EXPR
4191 : code
!= MINUS_EXPR
4192 ? IFN_ADD_OVERFLOW
: IFN_SUB_OVERFLOW
,
4194 tree ctmp
= make_ssa_name (ctype
);
4195 gimple_call_set_lhs (g
, ctmp
);
4196 gsi_insert_before (gsi
, g
, GSI_SAME_STMT
);
4197 tree new_lhs
= (maxval
|| cast_stmt
) ? make_ssa_name (type
) : lhs
;
4199 if (code
!= BIT_NOT_EXPR
)
4201 g2
= gimple_build_assign (new_lhs
, REALPART_EXPR
,
4202 build1 (REALPART_EXPR
, type
, ctmp
));
4203 if (maxval
|| cast_stmt
)
4205 gsi_insert_before (gsi
, g2
, GSI_SAME_STMT
);
4207 *gsi
= gsi_for_stmt (stmt
);
4210 gsi_replace (gsi
, g2
, true);
4211 if (code
== MULT_EXPR
)
4213 mul_stmts
.quick_push (g
);
4214 mul_stmts
.quick_push (g2
);
4217 g2
= gimple_build_assign (lhs
, NOP_EXPR
, new_lhs
);
4218 gsi_replace (gsi
, g2
, true);
4219 mul_stmts
.quick_push (g2
);
4223 tree ovf
= make_ssa_name (type
);
4224 g2
= gimple_build_assign (ovf
, IMAGPART_EXPR
,
4225 build1 (IMAGPART_EXPR
, type
, ctmp
));
4226 if (code
!= BIT_NOT_EXPR
)
4227 gsi_insert_after (gsi
, g2
, GSI_NEW_STMT
);
4229 gsi_insert_before (gsi
, g2
, GSI_SAME_STMT
);
4230 if (code
== MULT_EXPR
)
4231 mul_stmts
.quick_push (g2
);
4233 FOR_EACH_IMM_USE_STMT (use_stmt
, iter
, cast_lhs
? cast_lhs
: lhs
)
4235 if (is_gimple_debug (use_stmt
))
4238 gimple
*orig_use_stmt
= use_stmt
;
4239 int ovf_use
= arith_overflow_check_p (stmt
, cast_stmt
, use_stmt
,
4243 gcc_assert (code
!= BIT_NOT_EXPR
);
4246 tree use_lhs
= gimple_assign_lhs (use_stmt
);
4247 gimple_assign_set_rhs1 (use_stmt
, new_lhs
);
4248 if (useless_type_conversion_p (TREE_TYPE (use_lhs
),
4249 TREE_TYPE (new_lhs
)))
4250 gimple_assign_set_rhs_code (use_stmt
, SSA_NAME
);
4251 update_stmt (use_stmt
);
4255 if (gimple_code (use_stmt
) == GIMPLE_COND
)
4257 gcond
*cond_stmt
= as_a
<gcond
*> (use_stmt
);
4258 gimple_cond_set_lhs (cond_stmt
, ovf
);
4259 gimple_cond_set_rhs (cond_stmt
, build_int_cst (type
, 0));
4260 gimple_cond_set_code (cond_stmt
, ovf_use
== 1 ? NE_EXPR
: EQ_EXPR
);
4264 gcc_checking_assert (is_gimple_assign (use_stmt
));
4265 if (gimple_assign_rhs_class (use_stmt
) == GIMPLE_BINARY_RHS
)
4267 gimple_assign_set_rhs1 (use_stmt
, ovf
);
4268 gimple_assign_set_rhs2 (use_stmt
, build_int_cst (type
, 0));
4269 gimple_assign_set_rhs_code (use_stmt
,
4270 ovf_use
== 1 ? NE_EXPR
: EQ_EXPR
);
4274 gcc_checking_assert (gimple_assign_rhs_code (use_stmt
)
4276 tree cond
= build2 (ovf_use
== 1 ? NE_EXPR
: EQ_EXPR
,
4277 boolean_type_node
, ovf
,
4278 build_int_cst (type
, 0));
4279 gimple_assign_set_rhs1 (use_stmt
, cond
);
4282 update_stmt (use_stmt
);
4283 if (code
== MULT_EXPR
&& use_stmt
!= orig_use_stmt
)
4285 gimple_stmt_iterator gsi2
= gsi_for_stmt (orig_use_stmt
);
4286 maybe_optimize_guarding_check (mul_stmts
, use_stmt
, orig_use_stmt
,
4288 gsi_remove (&gsi2
, true);
4289 release_ssa_name (gimple_assign_lhs (orig_use_stmt
));
4294 gimple_stmt_iterator gsi2
= gsi_for_stmt (stmt
);
4295 gsi_remove (&gsi2
, true);
4298 gimple
*g
= gimple_build_assign (gimple_assign_lhs (add_stmt
),
4300 gsi2
= gsi_for_stmt (add_stmt
);
4301 gsi_replace (&gsi2
, g
, true);
4304 else if (code
== BIT_NOT_EXPR
)
4306 *gsi
= gsi_for_stmt (stmt
);
4307 gsi_remove (gsi
, true);
4308 release_ssa_name (lhs
);
4314 /* Return true if target has support for divmod. */
4317 target_supports_divmod_p (optab divmod_optab
, optab div_optab
, machine_mode mode
)
4319 /* If target supports hardware divmod insn, use it for divmod. */
4320 if (optab_handler (divmod_optab
, mode
) != CODE_FOR_nothing
)
4323 /* Check if libfunc for divmod is available. */
4324 rtx libfunc
= optab_libfunc (divmod_optab
, mode
);
4325 if (libfunc
!= NULL_RTX
)
4327 /* If optab_handler exists for div_optab, perhaps in a wider mode,
4328 we don't want to use the libfunc even if it exists for given mode. */
4329 machine_mode div_mode
;
4330 FOR_EACH_MODE_FROM (div_mode
, mode
)
4331 if (optab_handler (div_optab
, div_mode
) != CODE_FOR_nothing
)
4334 return targetm
.expand_divmod_libfunc
!= NULL
;
4340 /* Check if stmt is candidate for divmod transform. */
4343 divmod_candidate_p (gassign
*stmt
)
4345 tree type
= TREE_TYPE (gimple_assign_lhs (stmt
));
4346 machine_mode mode
= TYPE_MODE (type
);
4347 optab divmod_optab
, div_optab
;
4349 if (TYPE_UNSIGNED (type
))
4351 divmod_optab
= udivmod_optab
;
4352 div_optab
= udiv_optab
;
4356 divmod_optab
= sdivmod_optab
;
4357 div_optab
= sdiv_optab
;
4360 tree op1
= gimple_assign_rhs1 (stmt
);
4361 tree op2
= gimple_assign_rhs2 (stmt
);
4363 /* Disable the transform if either is a constant, since division-by-constant
4364 may have specialized expansion. */
4365 if (CONSTANT_CLASS_P (op1
))
4368 if (CONSTANT_CLASS_P (op2
))
4370 if (integer_pow2p (op2
))
4373 if (TYPE_PRECISION (type
) <= HOST_BITS_PER_WIDE_INT
4374 && TYPE_PRECISION (type
) <= BITS_PER_WORD
)
4377 /* If the divisor is not power of 2 and the precision wider than
4378 HWI, expand_divmod punts on that, so in that case it is better
4379 to use divmod optab or libfunc. Similarly if choose_multiplier
4380 might need pre/post shifts of BITS_PER_WORD or more. */
4383 /* Exclude the case where TYPE_OVERFLOW_TRAPS (type) as that should
4384 expand using the [su]divv optabs. */
4385 if (TYPE_OVERFLOW_TRAPS (type
))
4388 if (!target_supports_divmod_p (divmod_optab
, div_optab
, mode
))
4394 /* This function looks for:
4395 t1 = a TRUNC_DIV_EXPR b;
4396 t2 = a TRUNC_MOD_EXPR b;
4397 and transforms it to the following sequence:
4398 complex_tmp = DIVMOD (a, b);
4399 t1 = REALPART_EXPR(a);
4400 t2 = IMAGPART_EXPR(b);
4401 For conditions enabling the transform see divmod_candidate_p().
4403 The pass has three parts:
4404 1) Find top_stmt which is trunc_div or trunc_mod stmt and dominates all
4405 other trunc_div_expr and trunc_mod_expr stmts.
4406 2) Add top_stmt and all trunc_div and trunc_mod stmts dominated by top_stmt
4408 3) Insert DIVMOD call just before top_stmt and update entries in
4409 stmts vector to use return value of DIMOVD (REALEXPR_PART for div,
4410 IMAGPART_EXPR for mod). */
4413 convert_to_divmod (gassign
*stmt
)
4415 if (stmt_can_throw_internal (cfun
, stmt
)
4416 || !divmod_candidate_p (stmt
))
4419 tree op1
= gimple_assign_rhs1 (stmt
);
4420 tree op2
= gimple_assign_rhs2 (stmt
);
4422 imm_use_iterator use_iter
;
4424 auto_vec
<gimple
*> stmts
;
4426 gimple
*top_stmt
= stmt
;
4427 basic_block top_bb
= gimple_bb (stmt
);
4429 /* Part 1: Try to set top_stmt to "topmost" stmt that dominates
4430 at-least stmt and possibly other trunc_div/trunc_mod stmts
4431 having same operands as stmt. */
4433 FOR_EACH_IMM_USE_STMT (use_stmt
, use_iter
, op1
)
4435 if (is_gimple_assign (use_stmt
)
4436 && (gimple_assign_rhs_code (use_stmt
) == TRUNC_DIV_EXPR
4437 || gimple_assign_rhs_code (use_stmt
) == TRUNC_MOD_EXPR
)
4438 && operand_equal_p (op1
, gimple_assign_rhs1 (use_stmt
), 0)
4439 && operand_equal_p (op2
, gimple_assign_rhs2 (use_stmt
), 0))
4441 if (stmt_can_throw_internal (cfun
, use_stmt
))
4444 basic_block bb
= gimple_bb (use_stmt
);
4448 if (gimple_uid (use_stmt
) < gimple_uid (top_stmt
))
4449 top_stmt
= use_stmt
;
4451 else if (dominated_by_p (CDI_DOMINATORS
, top_bb
, bb
))
4454 top_stmt
= use_stmt
;
4459 tree top_op1
= gimple_assign_rhs1 (top_stmt
);
4460 tree top_op2
= gimple_assign_rhs2 (top_stmt
);
4462 stmts
.safe_push (top_stmt
);
4463 bool div_seen
= (gimple_assign_rhs_code (top_stmt
) == TRUNC_DIV_EXPR
);
4465 /* Part 2: Add all trunc_div/trunc_mod statements domianted by top_bb
4466 to stmts vector. The 2nd loop will always add stmt to stmts vector, since
4467 gimple_bb (top_stmt) dominates gimple_bb (stmt), so the
4468 2nd loop ends up adding at-least single trunc_mod_expr stmt. */
4470 FOR_EACH_IMM_USE_STMT (use_stmt
, use_iter
, top_op1
)
4472 if (is_gimple_assign (use_stmt
)
4473 && (gimple_assign_rhs_code (use_stmt
) == TRUNC_DIV_EXPR
4474 || gimple_assign_rhs_code (use_stmt
) == TRUNC_MOD_EXPR
)
4475 && operand_equal_p (top_op1
, gimple_assign_rhs1 (use_stmt
), 0)
4476 && operand_equal_p (top_op2
, gimple_assign_rhs2 (use_stmt
), 0))
4478 if (use_stmt
== top_stmt
4479 || stmt_can_throw_internal (cfun
, use_stmt
)
4480 || !dominated_by_p (CDI_DOMINATORS
, gimple_bb (use_stmt
), top_bb
))
4483 stmts
.safe_push (use_stmt
);
4484 if (gimple_assign_rhs_code (use_stmt
) == TRUNC_DIV_EXPR
)
4492 /* Part 3: Create libcall to internal fn DIVMOD:
4493 divmod_tmp = DIVMOD (op1, op2). */
4495 gcall
*call_stmt
= gimple_build_call_internal (IFN_DIVMOD
, 2, op1
, op2
);
4496 tree res
= make_temp_ssa_name (build_complex_type (TREE_TYPE (op1
)),
4497 call_stmt
, "divmod_tmp");
4498 gimple_call_set_lhs (call_stmt
, res
);
4499 /* We rejected throwing statements above. */
4500 gimple_call_set_nothrow (call_stmt
, true);
4502 /* Insert the call before top_stmt. */
4503 gimple_stmt_iterator top_stmt_gsi
= gsi_for_stmt (top_stmt
);
4504 gsi_insert_before (&top_stmt_gsi
, call_stmt
, GSI_SAME_STMT
);
4506 widen_mul_stats
.divmod_calls_inserted
++;
4508 /* Update all statements in stmts vector:
4509 lhs = op1 TRUNC_DIV_EXPR op2 -> lhs = REALPART_EXPR<divmod_tmp>
4510 lhs = op1 TRUNC_MOD_EXPR op2 -> lhs = IMAGPART_EXPR<divmod_tmp>. */
4512 for (unsigned i
= 0; stmts
.iterate (i
, &use_stmt
); ++i
)
4516 switch (gimple_assign_rhs_code (use_stmt
))
4518 case TRUNC_DIV_EXPR
:
4519 new_rhs
= fold_build1 (REALPART_EXPR
, TREE_TYPE (op1
), res
);
4522 case TRUNC_MOD_EXPR
:
4523 new_rhs
= fold_build1 (IMAGPART_EXPR
, TREE_TYPE (op1
), res
);
4530 gimple_stmt_iterator gsi
= gsi_for_stmt (use_stmt
);
4531 gimple_assign_set_rhs_from_tree (&gsi
, new_rhs
);
4532 update_stmt (use_stmt
);
4538 /* Find integer multiplications where the operands are extended from
4539 smaller types, and replace the MULT_EXPR with a WIDEN_MULT_EXPR
4540 where appropriate. */
4544 const pass_data pass_data_optimize_widening_mul
=
4546 GIMPLE_PASS
, /* type */
4547 "widening_mul", /* name */
4548 OPTGROUP_NONE
, /* optinfo_flags */
4549 TV_TREE_WIDEN_MUL
, /* tv_id */
4550 PROP_ssa
, /* properties_required */
4551 0, /* properties_provided */
4552 0, /* properties_destroyed */
4553 0, /* todo_flags_start */
4554 TODO_update_ssa
, /* todo_flags_finish */
4557 class pass_optimize_widening_mul
: public gimple_opt_pass
4560 pass_optimize_widening_mul (gcc::context
*ctxt
)
4561 : gimple_opt_pass (pass_data_optimize_widening_mul
, ctxt
)
4564 /* opt_pass methods: */
4565 virtual bool gate (function
*)
4567 return flag_expensive_optimizations
&& optimize
;
4570 virtual unsigned int execute (function
*);
4572 }; // class pass_optimize_widening_mul
4574 /* Walker class to perform the transformation in reverse dominance order. */
4576 class math_opts_dom_walker
: public dom_walker
4579 /* Constructor, CFG_CHANGED is a pointer to a boolean flag that will be set
4580 if walking modidifes the CFG. */
4582 math_opts_dom_walker (bool *cfg_changed_p
)
4583 : dom_walker (CDI_DOMINATORS
), m_last_result_set (),
4584 m_cfg_changed_p (cfg_changed_p
) {}
4586 /* The actual actions performed in the walk. */
4588 virtual void after_dom_children (basic_block
);
4590 /* Set of results of chains of multiply and add statement combinations that
4591 were not transformed into FMAs because of active deferring. */
4592 hash_set
<tree
> m_last_result_set
;
4594 /* Pointer to a flag of the user that needs to be set if CFG has been
4596 bool *m_cfg_changed_p
;
4600 math_opts_dom_walker::after_dom_children (basic_block bb
)
4602 gimple_stmt_iterator gsi
;
4604 fma_deferring_state
fma_state (param_avoid_fma_max_bits
> 0);
4606 for (gsi
= gsi_after_labels (bb
); !gsi_end_p (gsi
);)
4608 gimple
*stmt
= gsi_stmt (gsi
);
4609 enum tree_code code
;
4611 if (is_gimple_assign (stmt
))
4613 code
= gimple_assign_rhs_code (stmt
);
4617 if (!convert_mult_to_widen (stmt
, &gsi
)
4618 && !convert_expand_mult_copysign (stmt
, &gsi
)
4619 && convert_mult_to_fma (stmt
,
4620 gimple_assign_rhs1 (stmt
),
4621 gimple_assign_rhs2 (stmt
),
4624 gsi_remove (&gsi
, true);
4625 release_defs (stmt
);
4628 match_arith_overflow (&gsi
, stmt
, code
, m_cfg_changed_p
);
4633 if (!convert_plusminus_to_widen (&gsi
, stmt
, code
))
4634 match_arith_overflow (&gsi
, stmt
, code
, m_cfg_changed_p
);
4638 if (match_arith_overflow (&gsi
, stmt
, code
, m_cfg_changed_p
))
4642 case TRUNC_MOD_EXPR
:
4643 convert_to_divmod (as_a
<gassign
*> (stmt
));
4649 else if (is_gimple_call (stmt
))
4651 switch (gimple_call_combined_fn (stmt
))
4654 if (gimple_call_lhs (stmt
)
4655 && TREE_CODE (gimple_call_arg (stmt
, 1)) == REAL_CST
4656 && real_equal (&TREE_REAL_CST (gimple_call_arg (stmt
, 1)),
4658 && convert_mult_to_fma (stmt
,
4659 gimple_call_arg (stmt
, 0),
4660 gimple_call_arg (stmt
, 0),
4663 unlink_stmt_vdef (stmt
);
4664 if (gsi_remove (&gsi
, true)
4665 && gimple_purge_dead_eh_edges (bb
))
4666 *m_cfg_changed_p
= true;
4667 release_defs (stmt
);
4673 if (convert_mult_to_fma (stmt
,
4674 gimple_call_arg (stmt
, 1),
4675 gimple_call_arg (stmt
, 2),
4677 gimple_call_arg (stmt
, 0)))
4680 gsi_remove (&gsi
, true);
4681 release_defs (stmt
);
4687 cancel_fma_deferring (&fma_state
);
4696 if (fma_state
.m_deferring_p
4697 && fma_state
.m_initial_phi
)
4699 gcc_checking_assert (fma_state
.m_last_result
);
4700 if (!last_fma_candidate_feeds_initial_phi (&fma_state
,
4701 &m_last_result_set
))
4702 cancel_fma_deferring (&fma_state
);
4704 m_last_result_set
.add (fma_state
.m_last_result
);
4710 pass_optimize_widening_mul::execute (function
*fun
)
4712 bool cfg_changed
= false;
4714 memset (&widen_mul_stats
, 0, sizeof (widen_mul_stats
));
4715 calculate_dominance_info (CDI_DOMINATORS
);
4716 renumber_gimple_stmt_uids (cfun
);
4718 math_opts_dom_walker (&cfg_changed
).walk (ENTRY_BLOCK_PTR_FOR_FN (cfun
));
4720 statistics_counter_event (fun
, "widening multiplications inserted",
4721 widen_mul_stats
.widen_mults_inserted
);
4722 statistics_counter_event (fun
, "widening maccs inserted",
4723 widen_mul_stats
.maccs_inserted
);
4724 statistics_counter_event (fun
, "fused multiply-adds inserted",
4725 widen_mul_stats
.fmas_inserted
);
4726 statistics_counter_event (fun
, "divmod calls inserted",
4727 widen_mul_stats
.divmod_calls_inserted
);
4729 return cfg_changed
? TODO_cleanup_cfg
: 0;
4735 make_pass_optimize_widening_mul (gcc::context
*ctxt
)
4737 return new pass_optimize_widening_mul (ctxt
);