1 /* Global, SSA-based optimizations using mathematical identities.
2 Copyright (C) 2005-2024 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* Currently, the only mini-pass in this file tries to CSE reciprocal
21 operations. These are common in sequences such as this one:
23 modulus = sqrt(x*x + y*y + z*z);
28 that can be optimized to
30 modulus = sqrt(x*x + y*y + z*z);
31 rmodulus = 1.0 / modulus;
36 We do this for loop invariant divisors, and with this pass whenever
37 we notice that a division has the same divisor multiple times.
39 Of course, like in PRE, we don't insert a division if a dominator
40 already has one. However, this cannot be done as an extension of
41 PRE for several reasons.
43 First of all, with some experiments it was found out that the
44 transformation is not always useful if there are only two divisions
45 by the same divisor. This is probably because modern processors
46 can pipeline the divisions; on older, in-order processors it should
47 still be effective to optimize two divisions by the same number.
48 We make this a param, and it shall be called N in the remainder of
51 Second, if trapping math is active, we have less freedom on where
52 to insert divisions: we can only do so in basic blocks that already
53 contain one. (If divisions don't trap, instead, we can insert
54 divisions elsewhere, which will be in blocks that are common dominators
55 of those that have the division).
57 We really don't want to compute the reciprocal unless a division will
58 be found. To do this, we won't insert the division in a basic block
59 that has less than N divisions *post-dominating* it.
61 The algorithm constructs a subset of the dominator tree, holding the
62 blocks containing the divisions and the common dominators to them,
63 and walk it twice. The first walk is in post-order, and it annotates
64 each block with the number of divisions that post-dominate it: this
65 gives information on where divisions can be inserted profitably.
66 The second walk is in pre-order, and it inserts divisions as explained
67 above, and replaces divisions by multiplications.
69 In the best case, the cost of the pass is O(n_statements). In the
70 worst-case, the cost is due to creating the dominator tree subset,
71 with a cost of O(n_basic_blocks ^ 2); however this can only happen
72 for n_statements / n_basic_blocks statements. So, the amortized cost
73 of creating the dominator tree subset is O(n_basic_blocks) and the
74 worst-case cost of the pass is O(n_statements * n_basic_blocks).
76 More practically, the cost will be small because there are few
77 divisions, and they tend to be in the same basic block, so insert_bb
78 is called very few times.
80 If we did this using domwalk.cc, an efficient implementation would have
81 to work on all the variables in a single pass, because we could not
82 work on just a subset of the dominator tree, as we do now, and the
83 cost would also be something like O(n_statements * n_basic_blocks).
84 The data structures would be more complex in order to work on all the
85 variables in a single pass. */
89 #include "coretypes.h"
96 #include "alloc-pool.h"
97 #include "tree-pass.h"
99 #include "optabs-tree.h"
100 #include "gimple-pretty-print.h"
102 #include "fold-const.h"
103 #include "gimple-iterator.h"
104 #include "gimple-fold.h"
105 #include "gimplify.h"
106 #include "gimplify-me.h"
107 #include "stor-layout.h"
108 #include "tree-cfg.h"
109 #include "tree-dfa.h"
110 #include "tree-ssa.h"
111 #include "builtins.h"
112 #include "internal-fn.h"
113 #include "case-cfn-macros.h"
114 #include "optabs-libfuncs.h"
116 #include "targhooks.h"
118 #include "tree-ssa-math-opts.h"
121 /* This structure represents one basic block that either computes a
122 division, or is a common dominator for basic block that compute a
125 /* The basic block represented by this structure. */
126 basic_block bb
= basic_block();
128 /* If non-NULL, the SSA_NAME holding the definition for a reciprocal
130 tree recip_def
= tree();
132 /* If non-NULL, the SSA_NAME holding the definition for a squared
133 reciprocal inserted in BB. */
134 tree square_recip_def
= tree();
136 /* If non-NULL, the GIMPLE_ASSIGN for a reciprocal computation that
137 was inserted in BB. */
138 gimple
*recip_def_stmt
= nullptr;
140 /* Pointer to a list of "struct occurrence"s for blocks dominated
142 struct occurrence
*children
= nullptr;
144 /* Pointer to the next "struct occurrence"s in the list of blocks
145 sharing a common dominator. */
146 struct occurrence
*next
= nullptr;
148 /* The number of divisions that are in BB before compute_merit. The
149 number of divisions that are in BB or post-dominate it after
151 int num_divisions
= 0;
153 /* True if the basic block has a division, false if it is a common
154 dominator for basic blocks that do. If it is false and trapping
155 math is active, BB is not a candidate for inserting a reciprocal. */
156 bool bb_has_division
= false;
158 /* Construct a struct occurrence for basic block BB, and whose
159 children list is headed by CHILDREN. */
160 occurrence (basic_block bb
, struct occurrence
*children
)
161 : bb (bb
), children (children
)
166 /* Destroy a struct occurrence and remove it from its basic block. */
172 /* Allocate memory for a struct occurrence from OCC_POOL. */
173 static void* operator new (size_t);
175 /* Return memory for a struct occurrence to OCC_POOL. */
176 static void operator delete (void*, size_t);
181 /* Number of 1.0/X ops inserted. */
184 /* Number of 1.0/FUNC ops inserted. */
190 /* Number of cexpi calls inserted. */
193 /* Number of conversions removed. */
200 /* Number of widening multiplication ops inserted. */
201 int widen_mults_inserted
;
203 /* Number of integer multiply-and-accumulate ops inserted. */
206 /* Number of fp fused multiply-add ops inserted. */
209 /* Number of divmod calls inserted. */
210 int divmod_calls_inserted
;
212 /* Number of highpart multiplication ops inserted. */
213 int highpart_mults_inserted
;
216 /* The instance of "struct occurrence" representing the highest
217 interesting block in the dominator tree. */
218 static struct occurrence
*occ_head
;
220 /* Allocation pool for getting instances of "struct occurrence". */
221 static object_allocator
<occurrence
> *occ_pool
;
223 void* occurrence::operator new (size_t n
)
225 gcc_assert (n
== sizeof(occurrence
));
226 return occ_pool
->allocate_raw ();
229 void occurrence::operator delete (void *occ
, size_t n
)
231 gcc_assert (n
== sizeof(occurrence
));
232 occ_pool
->remove_raw (occ
);
235 /* Insert NEW_OCC into our subset of the dominator tree. P_HEAD points to a
236 list of "struct occurrence"s, one per basic block, having IDOM as
237 their common dominator.
239 We try to insert NEW_OCC as deep as possible in the tree, and we also
240 insert any other block that is a common dominator for BB and one
241 block already in the tree. */
244 insert_bb (struct occurrence
*new_occ
, basic_block idom
,
245 struct occurrence
**p_head
)
247 struct occurrence
*occ
, **p_occ
;
249 for (p_occ
= p_head
; (occ
= *p_occ
) != NULL
; )
251 basic_block bb
= new_occ
->bb
, occ_bb
= occ
->bb
;
252 basic_block dom
= nearest_common_dominator (CDI_DOMINATORS
, occ_bb
, bb
);
255 /* BB dominates OCC_BB. OCC becomes NEW_OCC's child: remove OCC
258 occ
->next
= new_occ
->children
;
259 new_occ
->children
= occ
;
261 /* Try the next block (it may as well be dominated by BB). */
264 else if (dom
== occ_bb
)
266 /* OCC_BB dominates BB. Tail recurse to look deeper. */
267 insert_bb (new_occ
, dom
, &occ
->children
);
271 else if (dom
!= idom
)
273 gcc_assert (!dom
->aux
);
275 /* There is a dominator between IDOM and BB, add it and make
276 two children out of NEW_OCC and OCC. First, remove OCC from
282 /* None of the previous blocks has DOM as a dominator: if we tail
283 recursed, we would reexamine them uselessly. Just switch BB with
284 DOM, and go on looking for blocks dominated by DOM. */
285 new_occ
= new occurrence (dom
, new_occ
);
290 /* Nothing special, go on with the next element. */
295 /* No place was found as a child of IDOM. Make BB a sibling of IDOM. */
296 new_occ
->next
= *p_head
;
300 /* Register that we found a division in BB.
301 IMPORTANCE is a measure of how much weighting to give
302 that division. Use IMPORTANCE = 2 to register a single
303 division. If the division is going to be found multiple
304 times use 1 (as it is with squares). */
307 register_division_in (basic_block bb
, int importance
)
309 struct occurrence
*occ
;
311 occ
= (struct occurrence
*) bb
->aux
;
314 occ
= new occurrence (bb
, NULL
);
315 insert_bb (occ
, ENTRY_BLOCK_PTR_FOR_FN (cfun
), &occ_head
);
318 occ
->bb_has_division
= true;
319 occ
->num_divisions
+= importance
;
323 /* Compute the number of divisions that postdominate each block in OCC and
327 compute_merit (struct occurrence
*occ
)
329 struct occurrence
*occ_child
;
330 basic_block dom
= occ
->bb
;
332 for (occ_child
= occ
->children
; occ_child
; occ_child
= occ_child
->next
)
335 if (occ_child
->children
)
336 compute_merit (occ_child
);
339 bb
= single_noncomplex_succ (dom
);
343 if (dominated_by_p (CDI_POST_DOMINATORS
, bb
, occ_child
->bb
))
344 occ
->num_divisions
+= occ_child
->num_divisions
;
349 /* Return whether USE_STMT is a floating-point division by DEF. */
351 is_division_by (gimple
*use_stmt
, tree def
)
353 return is_gimple_assign (use_stmt
)
354 && gimple_assign_rhs_code (use_stmt
) == RDIV_EXPR
355 && gimple_assign_rhs2 (use_stmt
) == def
356 /* Do not recognize x / x as valid division, as we are getting
357 confused later by replacing all immediate uses x in such
359 && gimple_assign_rhs1 (use_stmt
) != def
360 && !stmt_can_throw_internal (cfun
, use_stmt
);
363 /* Return TRUE if USE_STMT is a multiplication of DEF by A. */
365 is_mult_by (gimple
*use_stmt
, tree def
, tree a
)
367 if (gimple_code (use_stmt
) == GIMPLE_ASSIGN
368 && gimple_assign_rhs_code (use_stmt
) == MULT_EXPR
)
370 tree op0
= gimple_assign_rhs1 (use_stmt
);
371 tree op1
= gimple_assign_rhs2 (use_stmt
);
373 return (op0
== def
&& op1
== a
)
374 || (op0
== a
&& op1
== def
);
379 /* Return whether USE_STMT is DEF * DEF. */
381 is_square_of (gimple
*use_stmt
, tree def
)
383 return is_mult_by (use_stmt
, def
, def
);
386 /* Return whether USE_STMT is a floating-point division by
389 is_division_by_square (gimple
*use_stmt
, tree def
)
391 if (gimple_code (use_stmt
) == GIMPLE_ASSIGN
392 && gimple_assign_rhs_code (use_stmt
) == RDIV_EXPR
393 && gimple_assign_rhs1 (use_stmt
) != gimple_assign_rhs2 (use_stmt
)
394 && !stmt_can_throw_internal (cfun
, use_stmt
))
396 tree denominator
= gimple_assign_rhs2 (use_stmt
);
397 if (TREE_CODE (denominator
) == SSA_NAME
)
398 return is_square_of (SSA_NAME_DEF_STMT (denominator
), def
);
403 /* Walk the subset of the dominator tree rooted at OCC, setting the
404 RECIP_DEF field to a definition of 1.0 / DEF that can be used in
405 the given basic block. The field may be left NULL, of course,
406 if it is not possible or profitable to do the optimization.
408 DEF_BSI is an iterator pointing at the statement defining DEF.
409 If RECIP_DEF is set, a dominator already has a computation that can
412 If should_insert_square_recip is set, then this also inserts
413 the square of the reciprocal immediately after the definition
414 of the reciprocal. */
417 insert_reciprocals (gimple_stmt_iterator
*def_gsi
, struct occurrence
*occ
,
418 tree def
, tree recip_def
, tree square_recip_def
,
419 int should_insert_square_recip
, int threshold
)
422 gassign
*new_stmt
, *new_square_stmt
;
423 gimple_stmt_iterator gsi
;
424 struct occurrence
*occ_child
;
427 && (occ
->bb_has_division
|| !flag_trapping_math
)
428 /* Divide by two as all divisions are counted twice in
430 && occ
->num_divisions
/ 2 >= threshold
)
432 /* Make a variable with the replacement and substitute it. */
433 type
= TREE_TYPE (def
);
434 recip_def
= create_tmp_reg (type
, "reciptmp");
435 new_stmt
= gimple_build_assign (recip_def
, RDIV_EXPR
,
436 build_one_cst (type
), def
);
438 if (should_insert_square_recip
)
440 square_recip_def
= create_tmp_reg (type
, "powmult_reciptmp");
441 new_square_stmt
= gimple_build_assign (square_recip_def
, MULT_EXPR
,
442 recip_def
, recip_def
);
445 if (occ
->bb_has_division
)
447 /* Case 1: insert before an existing division. */
448 gsi
= gsi_after_labels (occ
->bb
);
449 while (!gsi_end_p (gsi
)
450 && (!is_division_by (gsi_stmt (gsi
), def
))
451 && (!is_division_by_square (gsi_stmt (gsi
), def
)))
454 gsi_insert_before (&gsi
, new_stmt
, GSI_SAME_STMT
);
455 if (should_insert_square_recip
)
456 gsi_insert_before (&gsi
, new_square_stmt
, GSI_SAME_STMT
);
458 else if (def_gsi
&& occ
->bb
== gsi_bb (*def_gsi
))
460 /* Case 2: insert right after the definition. Note that this will
461 never happen if the definition statement can throw, because in
462 that case the sole successor of the statement's basic block will
463 dominate all the uses as well. */
464 gsi_insert_after (def_gsi
, new_stmt
, GSI_NEW_STMT
);
465 if (should_insert_square_recip
)
466 gsi_insert_after (def_gsi
, new_square_stmt
, GSI_NEW_STMT
);
470 /* Case 3: insert in a basic block not containing defs/uses. */
471 gsi
= gsi_after_labels (occ
->bb
);
472 gsi_insert_before (&gsi
, new_stmt
, GSI_SAME_STMT
);
473 if (should_insert_square_recip
)
474 gsi_insert_before (&gsi
, new_square_stmt
, GSI_SAME_STMT
);
477 reciprocal_stats
.rdivs_inserted
++;
479 occ
->recip_def_stmt
= new_stmt
;
482 occ
->recip_def
= recip_def
;
483 occ
->square_recip_def
= square_recip_def
;
484 for (occ_child
= occ
->children
; occ_child
; occ_child
= occ_child
->next
)
485 insert_reciprocals (def_gsi
, occ_child
, def
, recip_def
,
486 square_recip_def
, should_insert_square_recip
,
490 /* Replace occurrences of expr / (x * x) with expr * ((1 / x) * (1 / x)).
491 Take as argument the use for (x * x). */
493 replace_reciprocal_squares (use_operand_p use_p
)
495 gimple
*use_stmt
= USE_STMT (use_p
);
496 basic_block bb
= gimple_bb (use_stmt
);
497 struct occurrence
*occ
= (struct occurrence
*) bb
->aux
;
499 if (optimize_bb_for_speed_p (bb
) && occ
->square_recip_def
502 gimple_stmt_iterator gsi
= gsi_for_stmt (use_stmt
);
503 gimple_assign_set_rhs_code (use_stmt
, MULT_EXPR
);
504 gimple_assign_set_rhs2 (use_stmt
, occ
->square_recip_def
);
505 SET_USE (use_p
, occ
->square_recip_def
);
506 fold_stmt_inplace (&gsi
);
507 update_stmt (use_stmt
);
512 /* Replace the division at USE_P with a multiplication by the reciprocal, if
516 replace_reciprocal (use_operand_p use_p
)
518 gimple
*use_stmt
= USE_STMT (use_p
);
519 basic_block bb
= gimple_bb (use_stmt
);
520 struct occurrence
*occ
= (struct occurrence
*) bb
->aux
;
522 if (optimize_bb_for_speed_p (bb
)
523 && occ
->recip_def
&& use_stmt
!= occ
->recip_def_stmt
)
525 gimple_stmt_iterator gsi
= gsi_for_stmt (use_stmt
);
526 gimple_assign_set_rhs_code (use_stmt
, MULT_EXPR
);
527 SET_USE (use_p
, occ
->recip_def
);
528 fold_stmt_inplace (&gsi
);
529 update_stmt (use_stmt
);
534 /* Free OCC and return one more "struct occurrence" to be freed. */
536 static struct occurrence
*
537 free_bb (struct occurrence
*occ
)
539 struct occurrence
*child
, *next
;
541 /* First get the two pointers hanging off OCC. */
543 child
= occ
->children
;
546 /* Now ensure that we don't recurse unless it is necessary. */
552 next
= free_bb (next
);
558 /* Transform sequences like
568 depending on the uses of x, r1, r2. This removes one multiplication and
569 allows the sqrt and division operations to execute in parallel.
570 DEF_GSI is the gsi of the initial division by sqrt that defines
571 DEF (x in the example above). */
574 optimize_recip_sqrt (gimple_stmt_iterator
*def_gsi
, tree def
)
577 imm_use_iterator use_iter
;
578 gimple
*stmt
= gsi_stmt (*def_gsi
);
580 tree orig_sqrt_ssa_name
= gimple_assign_rhs2 (stmt
);
581 tree div_rhs1
= gimple_assign_rhs1 (stmt
);
583 if (TREE_CODE (orig_sqrt_ssa_name
) != SSA_NAME
584 || TREE_CODE (div_rhs1
) != REAL_CST
585 || !real_equal (&TREE_REAL_CST (div_rhs1
), &dconst1
))
589 = dyn_cast
<gcall
*> (SSA_NAME_DEF_STMT (orig_sqrt_ssa_name
));
591 if (!sqrt_stmt
|| !gimple_call_lhs (sqrt_stmt
))
594 switch (gimple_call_combined_fn (sqrt_stmt
))
603 tree a
= gimple_call_arg (sqrt_stmt
, 0);
605 /* We have 'a' and 'x'. Now analyze the uses of 'x'. */
607 /* Statements that use x in x * x. */
608 auto_vec
<gimple
*> sqr_stmts
;
609 /* Statements that use x in a * x. */
610 auto_vec
<gimple
*> mult_stmts
;
611 bool has_other_use
= false;
612 bool mult_on_main_path
= false;
614 FOR_EACH_IMM_USE_STMT (use_stmt
, use_iter
, x
)
616 if (is_gimple_debug (use_stmt
))
618 if (is_square_of (use_stmt
, x
))
620 sqr_stmts
.safe_push (use_stmt
);
621 if (gimple_bb (use_stmt
) == gimple_bb (stmt
))
622 mult_on_main_path
= true;
624 else if (is_mult_by (use_stmt
, x
, a
))
626 mult_stmts
.safe_push (use_stmt
);
627 if (gimple_bb (use_stmt
) == gimple_bb (stmt
))
628 mult_on_main_path
= true;
631 has_other_use
= true;
634 /* In the x * x and a * x cases we just rewire stmt operands or
635 remove multiplications. In the has_other_use case we introduce
636 a multiplication so make sure we don't introduce a multiplication
637 on a path where there was none. */
638 if (has_other_use
&& !mult_on_main_path
)
641 if (sqr_stmts
.is_empty () && mult_stmts
.is_empty ())
644 /* If x = 1.0 / sqrt (a) has uses other than those optimized here we want
645 to be able to compose it from the sqr and mult cases. */
646 if (has_other_use
&& (sqr_stmts
.is_empty () || mult_stmts
.is_empty ()))
651 fprintf (dump_file
, "Optimizing reciprocal sqrt multiplications of\n");
652 print_gimple_stmt (dump_file
, sqrt_stmt
, 0, TDF_NONE
);
653 print_gimple_stmt (dump_file
, stmt
, 0, TDF_NONE
);
654 fprintf (dump_file
, "\n");
657 bool delete_div
= !has_other_use
;
658 tree sqr_ssa_name
= NULL_TREE
;
659 if (!sqr_stmts
.is_empty ())
661 /* r1 = x * x. Transform the original
668 = make_temp_ssa_name (TREE_TYPE (a
), NULL
, "recip_sqrt_sqr");
672 fprintf (dump_file
, "Replacing original division\n");
673 print_gimple_stmt (dump_file
, stmt
, 0, TDF_NONE
);
674 fprintf (dump_file
, "with new division\n");
677 = gimple_build_assign (sqr_ssa_name
, gimple_assign_rhs_code (stmt
),
678 gimple_assign_rhs1 (stmt
), a
);
679 gsi_insert_before (def_gsi
, stmt
, GSI_SAME_STMT
);
680 gsi_remove (def_gsi
, true);
681 *def_gsi
= gsi_for_stmt (stmt
);
682 fold_stmt_inplace (def_gsi
);
686 print_gimple_stmt (dump_file
, stmt
, 0, TDF_NONE
);
691 FOR_EACH_VEC_ELT (sqr_stmts
, i
, sqr_stmt
)
693 gimple_stmt_iterator gsi2
= gsi_for_stmt (sqr_stmt
);
694 gimple_assign_set_rhs_from_tree (&gsi2
, sqr_ssa_name
);
695 update_stmt (sqr_stmt
);
698 if (!mult_stmts
.is_empty ())
700 /* r2 = a * x. Transform this into:
701 r2 = t (The original sqrt (a)). */
703 gimple
*mult_stmt
= NULL
;
704 FOR_EACH_VEC_ELT (mult_stmts
, i
, mult_stmt
)
706 gimple_stmt_iterator gsi2
= gsi_for_stmt (mult_stmt
);
710 fprintf (dump_file
, "Replacing squaring multiplication\n");
711 print_gimple_stmt (dump_file
, mult_stmt
, 0, TDF_NONE
);
712 fprintf (dump_file
, "with assignment\n");
714 gimple_assign_set_rhs_from_tree (&gsi2
, orig_sqrt_ssa_name
);
715 fold_stmt_inplace (&gsi2
);
716 update_stmt (mult_stmt
);
718 print_gimple_stmt (dump_file
, mult_stmt
, 0, TDF_NONE
);
724 /* Using the two temporaries tmp1, tmp2 from above
725 the original x is now:
727 gcc_assert (orig_sqrt_ssa_name
);
728 gcc_assert (sqr_ssa_name
);
731 = gimple_build_assign (x
, MULT_EXPR
,
732 orig_sqrt_ssa_name
, sqr_ssa_name
);
733 gsi_insert_after (def_gsi
, new_stmt
, GSI_NEW_STMT
);
738 /* Remove the original division. */
739 gimple_stmt_iterator gsi2
= gsi_for_stmt (stmt
);
740 gsi_remove (&gsi2
, true);
744 release_ssa_name (x
);
747 /* Look for floating-point divisions among DEF's uses, and try to
748 replace them by multiplications with the reciprocal. Add
749 as many statements computing the reciprocal as needed.
751 DEF must be a GIMPLE register of a floating-point type. */
754 execute_cse_reciprocals_1 (gimple_stmt_iterator
*def_gsi
, tree def
)
756 use_operand_p use_p
, square_use_p
;
757 imm_use_iterator use_iter
, square_use_iter
;
759 struct occurrence
*occ
;
762 int square_recip_count
= 0;
763 int sqrt_recip_count
= 0;
765 gcc_assert (FLOAT_TYPE_P (TREE_TYPE (def
)) && TREE_CODE (def
) == SSA_NAME
);
766 threshold
= targetm
.min_divisions_for_recip_mul (TYPE_MODE (TREE_TYPE (def
)));
768 /* If DEF is a square (x * x), count the number of divisions by x.
769 If there are more divisions by x than by (DEF * DEF), prefer to optimize
770 the reciprocal of x instead of DEF. This improves cases like:
775 Reciprocal optimization of x results in 1 division rather than 2 or 3. */
776 gimple
*def_stmt
= SSA_NAME_DEF_STMT (def
);
778 if (is_gimple_assign (def_stmt
)
779 && gimple_assign_rhs_code (def_stmt
) == MULT_EXPR
780 && TREE_CODE (gimple_assign_rhs1 (def_stmt
)) == SSA_NAME
781 && gimple_assign_rhs1 (def_stmt
) == gimple_assign_rhs2 (def_stmt
))
783 tree op0
= gimple_assign_rhs1 (def_stmt
);
785 FOR_EACH_IMM_USE_FAST (use_p
, use_iter
, op0
)
787 gimple
*use_stmt
= USE_STMT (use_p
);
788 if (is_division_by (use_stmt
, op0
))
793 FOR_EACH_IMM_USE_FAST (use_p
, use_iter
, def
)
795 gimple
*use_stmt
= USE_STMT (use_p
);
796 if (is_division_by (use_stmt
, def
))
798 register_division_in (gimple_bb (use_stmt
), 2);
802 if (is_square_of (use_stmt
, def
))
804 square_def
= gimple_assign_lhs (use_stmt
);
805 FOR_EACH_IMM_USE_FAST (square_use_p
, square_use_iter
, square_def
)
807 gimple
*square_use_stmt
= USE_STMT (square_use_p
);
808 if (is_division_by (square_use_stmt
, square_def
))
810 /* This is executed twice for each division by a square. */
811 register_division_in (gimple_bb (square_use_stmt
), 1);
812 square_recip_count
++;
818 /* Square reciprocals were counted twice above. */
819 square_recip_count
/= 2;
821 /* If it is more profitable to optimize 1 / x, don't optimize 1 / (x * x). */
822 if (sqrt_recip_count
> square_recip_count
)
825 /* Do the expensive part only if we can hope to optimize something. */
826 if (count
+ square_recip_count
>= threshold
&& count
>= 1)
829 for (occ
= occ_head
; occ
; occ
= occ
->next
)
832 insert_reciprocals (def_gsi
, occ
, def
, NULL
, NULL
,
833 square_recip_count
, threshold
);
836 FOR_EACH_IMM_USE_STMT (use_stmt
, use_iter
, def
)
838 if (is_division_by (use_stmt
, def
))
840 FOR_EACH_IMM_USE_ON_STMT (use_p
, use_iter
)
841 replace_reciprocal (use_p
);
843 else if (square_recip_count
> 0 && is_square_of (use_stmt
, def
))
845 FOR_EACH_IMM_USE_ON_STMT (use_p
, use_iter
)
847 /* Find all uses of the square that are divisions and
848 * replace them by multiplications with the inverse. */
849 imm_use_iterator square_iterator
;
850 gimple
*powmult_use_stmt
= USE_STMT (use_p
);
851 tree powmult_def_name
= gimple_assign_lhs (powmult_use_stmt
);
853 FOR_EACH_IMM_USE_STMT (powmult_use_stmt
,
854 square_iterator
, powmult_def_name
)
855 FOR_EACH_IMM_USE_ON_STMT (square_use_p
, square_iterator
)
857 gimple
*powmult_use_stmt
= USE_STMT (square_use_p
);
858 if (is_division_by (powmult_use_stmt
, powmult_def_name
))
859 replace_reciprocal_squares (square_use_p
);
867 for (occ
= occ_head
; occ
; )
873 /* Return an internal function that implements the reciprocal of CALL,
874 or IFN_LAST if there is no such function that the target supports. */
877 internal_fn_reciprocal (gcall
*call
)
881 switch (gimple_call_combined_fn (call
))
892 tree_pair types
= direct_internal_fn_types (ifn
, call
);
893 if (!direct_internal_fn_supported_p (ifn
, types
, OPTIMIZE_FOR_SPEED
))
899 /* Go through all the floating-point SSA_NAMEs, and call
900 execute_cse_reciprocals_1 on each of them. */
903 const pass_data pass_data_cse_reciprocals
=
905 GIMPLE_PASS
, /* type */
907 OPTGROUP_NONE
, /* optinfo_flags */
908 TV_TREE_RECIP
, /* tv_id */
909 PROP_ssa
, /* properties_required */
910 0, /* properties_provided */
911 0, /* properties_destroyed */
912 0, /* todo_flags_start */
913 TODO_update_ssa
, /* todo_flags_finish */
916 class pass_cse_reciprocals
: public gimple_opt_pass
919 pass_cse_reciprocals (gcc::context
*ctxt
)
920 : gimple_opt_pass (pass_data_cse_reciprocals
, ctxt
)
923 /* opt_pass methods: */
924 bool gate (function
*) final override
926 return optimize
&& flag_reciprocal_math
;
928 unsigned int execute (function
*) final override
;
930 }; // class pass_cse_reciprocals
933 pass_cse_reciprocals::execute (function
*fun
)
938 occ_pool
= new object_allocator
<occurrence
> ("dominators for recip");
940 memset (&reciprocal_stats
, 0, sizeof (reciprocal_stats
));
941 calculate_dominance_info (CDI_DOMINATORS
);
942 calculate_dominance_info (CDI_POST_DOMINATORS
);
945 FOR_EACH_BB_FN (bb
, fun
)
946 gcc_assert (!bb
->aux
);
948 for (arg
= DECL_ARGUMENTS (fun
->decl
); arg
; arg
= DECL_CHAIN (arg
))
949 if (FLOAT_TYPE_P (TREE_TYPE (arg
))
950 && is_gimple_reg (arg
))
952 tree name
= ssa_default_def (fun
, arg
);
954 execute_cse_reciprocals_1 (NULL
, name
);
957 FOR_EACH_BB_FN (bb
, fun
)
961 for (gphi_iterator gsi
= gsi_start_phis (bb
); !gsi_end_p (gsi
);
964 gphi
*phi
= gsi
.phi ();
965 def
= PHI_RESULT (phi
);
966 if (! virtual_operand_p (def
)
967 && FLOAT_TYPE_P (TREE_TYPE (def
)))
968 execute_cse_reciprocals_1 (NULL
, def
);
971 for (gimple_stmt_iterator gsi
= gsi_after_labels (bb
); !gsi_end_p (gsi
);
974 gimple
*stmt
= gsi_stmt (gsi
);
976 if (gimple_has_lhs (stmt
)
977 && (def
= SINGLE_SSA_TREE_OPERAND (stmt
, SSA_OP_DEF
)) != NULL
978 && FLOAT_TYPE_P (TREE_TYPE (def
))
979 && TREE_CODE (def
) == SSA_NAME
)
981 execute_cse_reciprocals_1 (&gsi
, def
);
982 stmt
= gsi_stmt (gsi
);
983 if (flag_unsafe_math_optimizations
984 && is_gimple_assign (stmt
)
985 && gimple_assign_lhs (stmt
) == def
986 && !stmt_can_throw_internal (cfun
, stmt
)
987 && gimple_assign_rhs_code (stmt
) == RDIV_EXPR
)
988 optimize_recip_sqrt (&gsi
, def
);
992 if (optimize_bb_for_size_p (bb
))
995 /* Scan for a/func(b) and convert it to reciprocal a*rfunc(b). */
996 for (gimple_stmt_iterator gsi
= gsi_after_labels (bb
); !gsi_end_p (gsi
);
999 gimple
*stmt
= gsi_stmt (gsi
);
1001 if (is_gimple_assign (stmt
)
1002 && gimple_assign_rhs_code (stmt
) == RDIV_EXPR
)
1004 tree arg1
= gimple_assign_rhs2 (stmt
);
1007 if (TREE_CODE (arg1
) != SSA_NAME
)
1010 stmt1
= SSA_NAME_DEF_STMT (arg1
);
1012 if (is_gimple_call (stmt1
)
1013 && gimple_call_lhs (stmt1
))
1016 imm_use_iterator ui
;
1017 use_operand_p use_p
;
1018 tree fndecl
= NULL_TREE
;
1020 gcall
*call
= as_a
<gcall
*> (stmt1
);
1021 internal_fn ifn
= internal_fn_reciprocal (call
);
1022 if (ifn
== IFN_LAST
)
1024 fndecl
= gimple_call_fndecl (call
);
1026 || !fndecl_built_in_p (fndecl
, BUILT_IN_MD
))
1028 fndecl
= targetm
.builtin_reciprocal (fndecl
);
1033 /* Check that all uses of the SSA name are divisions,
1034 otherwise replacing the defining statement will do
1037 FOR_EACH_IMM_USE_FAST (use_p
, ui
, arg1
)
1039 gimple
*stmt2
= USE_STMT (use_p
);
1040 if (is_gimple_debug (stmt2
))
1042 if (!is_gimple_assign (stmt2
)
1043 || gimple_assign_rhs_code (stmt2
) != RDIV_EXPR
1044 || gimple_assign_rhs1 (stmt2
) == arg1
1045 || gimple_assign_rhs2 (stmt2
) != arg1
)
1054 gimple_replace_ssa_lhs (call
, arg1
);
1055 if (gimple_call_internal_p (call
) != (ifn
!= IFN_LAST
))
1057 auto_vec
<tree
, 4> args
;
1058 for (unsigned int i
= 0;
1059 i
< gimple_call_num_args (call
); i
++)
1060 args
.safe_push (gimple_call_arg (call
, i
));
1062 if (ifn
== IFN_LAST
)
1063 stmt2
= gimple_build_call_vec (fndecl
, args
);
1065 stmt2
= gimple_build_call_internal_vec (ifn
, args
);
1066 gimple_call_set_lhs (stmt2
, arg1
);
1067 gimple_move_vops (stmt2
, call
);
1068 gimple_call_set_nothrow (stmt2
,
1069 gimple_call_nothrow_p (call
));
1070 gimple_stmt_iterator gsi2
= gsi_for_stmt (call
);
1071 gsi_replace (&gsi2
, stmt2
, true);
1075 if (ifn
== IFN_LAST
)
1076 gimple_call_set_fndecl (call
, fndecl
);
1078 gimple_call_set_internal_fn (call
, ifn
);
1081 reciprocal_stats
.rfuncs_inserted
++;
1083 FOR_EACH_IMM_USE_STMT (stmt
, ui
, arg1
)
1085 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
1086 gimple_assign_set_rhs_code (stmt
, MULT_EXPR
);
1087 fold_stmt_inplace (&gsi
);
1095 statistics_counter_event (fun
, "reciprocal divs inserted",
1096 reciprocal_stats
.rdivs_inserted
);
1097 statistics_counter_event (fun
, "reciprocal functions inserted",
1098 reciprocal_stats
.rfuncs_inserted
);
1100 free_dominance_info (CDI_DOMINATORS
);
1101 free_dominance_info (CDI_POST_DOMINATORS
);
1109 make_pass_cse_reciprocals (gcc::context
*ctxt
)
1111 return new pass_cse_reciprocals (ctxt
);
1114 /* If NAME is the result of a type conversion, look for other
1115 equivalent dominating or dominated conversions, and replace all
1116 uses with the earliest dominating name, removing the redundant
1117 conversions. Return the prevailing name. */
1120 execute_cse_conv_1 (tree name
, bool *cfg_changed
)
1122 if (SSA_NAME_IS_DEFAULT_DEF (name
)
1123 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (name
))
1126 gimple
*def_stmt
= SSA_NAME_DEF_STMT (name
);
1128 if (!gimple_assign_cast_p (def_stmt
))
1131 tree src
= gimple_assign_rhs1 (def_stmt
);
1133 if (TREE_CODE (src
) != SSA_NAME
)
1136 imm_use_iterator use_iter
;
1139 /* Find the earliest dominating def. */
1140 FOR_EACH_IMM_USE_STMT (use_stmt
, use_iter
, src
)
1142 if (use_stmt
== def_stmt
1143 || !gimple_assign_cast_p (use_stmt
))
1146 tree lhs
= gimple_assign_lhs (use_stmt
);
1148 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs
)
1149 || (gimple_assign_rhs1 (use_stmt
)
1150 != gimple_assign_rhs1 (def_stmt
))
1151 || !types_compatible_p (TREE_TYPE (name
), TREE_TYPE (lhs
)))
1155 if (gimple_bb (def_stmt
) == gimple_bb (use_stmt
))
1157 gimple_stmt_iterator gsi
= gsi_for_stmt (use_stmt
);
1158 while (!gsi_end_p (gsi
) && gsi_stmt (gsi
) != def_stmt
)
1160 use_dominates
= !gsi_end_p (gsi
);
1162 else if (dominated_by_p (CDI_DOMINATORS
, gimple_bb (use_stmt
),
1163 gimple_bb (def_stmt
)))
1164 use_dominates
= false;
1165 else if (dominated_by_p (CDI_DOMINATORS
, gimple_bb (def_stmt
),
1166 gimple_bb (use_stmt
)))
1167 use_dominates
= true;
1173 std::swap (name
, lhs
);
1174 std::swap (def_stmt
, use_stmt
);
1178 /* Now go through all uses of SRC again, replacing the equivalent
1179 dominated conversions. We may replace defs that were not
1180 dominated by the then-prevailing defs when we first visited
1182 FOR_EACH_IMM_USE_STMT (use_stmt
, use_iter
, src
)
1184 if (use_stmt
== def_stmt
1185 || !gimple_assign_cast_p (use_stmt
))
1188 tree lhs
= gimple_assign_lhs (use_stmt
);
1190 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs
)
1191 || (gimple_assign_rhs1 (use_stmt
)
1192 != gimple_assign_rhs1 (def_stmt
))
1193 || !types_compatible_p (TREE_TYPE (name
), TREE_TYPE (lhs
)))
1196 basic_block use_bb
= gimple_bb (use_stmt
);
1197 if (gimple_bb (def_stmt
) == use_bb
1198 || dominated_by_p (CDI_DOMINATORS
, use_bb
, gimple_bb (def_stmt
)))
1200 sincos_stats
.conv_removed
++;
1202 gimple_stmt_iterator gsi
= gsi_for_stmt (use_stmt
);
1203 replace_uses_by (lhs
, name
);
1204 if (gsi_remove (&gsi
, true)
1205 && gimple_purge_dead_eh_edges (use_bb
))
1206 *cfg_changed
= true;
1207 release_defs (use_stmt
);
1214 /* Records an occurrence at statement USE_STMT in the vector of trees
1215 STMTS if it is dominated by *TOP_BB or dominates it or this basic block
1216 is not yet initialized. Returns true if the occurrence was pushed on
1217 the vector. Adjusts *TOP_BB to be the basic block dominating all
1218 statements in the vector. */
1221 maybe_record_sincos (vec
<gimple
*> *stmts
,
1222 basic_block
*top_bb
, gimple
*use_stmt
)
1224 basic_block use_bb
= gimple_bb (use_stmt
);
1226 && (*top_bb
== use_bb
1227 || dominated_by_p (CDI_DOMINATORS
, use_bb
, *top_bb
)))
1228 stmts
->safe_push (use_stmt
);
1230 || dominated_by_p (CDI_DOMINATORS
, *top_bb
, use_bb
))
1232 stmts
->safe_push (use_stmt
);
1241 /* Look for sin, cos and cexpi calls with the same argument NAME and
1242 create a single call to cexpi CSEing the result in this case.
1243 We first walk over all immediate uses of the argument collecting
1244 statements that we can CSE in a vector and in a second pass replace
1245 the statement rhs with a REALPART or IMAGPART expression on the
1246 result of the cexpi call we insert before the use statement that
1247 dominates all other candidates. */
1250 execute_cse_sincos_1 (tree name
)
1252 gimple_stmt_iterator gsi
;
1253 imm_use_iterator use_iter
;
1254 tree fndecl
, res
, type
= NULL_TREE
;
1255 gimple
*def_stmt
, *use_stmt
, *stmt
;
1256 int seen_cos
= 0, seen_sin
= 0, seen_cexpi
= 0;
1257 auto_vec
<gimple
*> stmts
;
1258 basic_block top_bb
= NULL
;
1260 bool cfg_changed
= false;
1262 name
= execute_cse_conv_1 (name
, &cfg_changed
);
1264 FOR_EACH_IMM_USE_STMT (use_stmt
, use_iter
, name
)
1266 if (gimple_code (use_stmt
) != GIMPLE_CALL
1267 || !gimple_call_lhs (use_stmt
))
1270 switch (gimple_call_combined_fn (use_stmt
))
1273 seen_cos
|= maybe_record_sincos (&stmts
, &top_bb
, use_stmt
) ? 1 : 0;
1277 seen_sin
|= maybe_record_sincos (&stmts
, &top_bb
, use_stmt
) ? 1 : 0;
1281 seen_cexpi
|= maybe_record_sincos (&stmts
, &top_bb
, use_stmt
) ? 1 : 0;
1288 tree t
= mathfn_built_in_type (gimple_call_combined_fn (use_stmt
));
1292 t
= TREE_TYPE (name
);
1294 /* This checks that NAME has the right type in the first round,
1295 and, in subsequent rounds, that the built_in type is the same
1296 type, or a compatible type. */
1297 if (type
!= t
&& !types_compatible_p (type
, t
))
1300 if (seen_cos
+ seen_sin
+ seen_cexpi
<= 1)
1303 /* Simply insert cexpi at the beginning of top_bb but not earlier than
1304 the name def statement. */
1305 fndecl
= mathfn_built_in (type
, BUILT_IN_CEXPI
);
1308 stmt
= gimple_build_call (fndecl
, 1, name
);
1309 res
= make_temp_ssa_name (TREE_TYPE (TREE_TYPE (fndecl
)), stmt
, "sincostmp");
1310 gimple_call_set_lhs (stmt
, res
);
1312 def_stmt
= SSA_NAME_DEF_STMT (name
);
1313 if (!SSA_NAME_IS_DEFAULT_DEF (name
)
1314 && gimple_code (def_stmt
) != GIMPLE_PHI
1315 && gimple_bb (def_stmt
) == top_bb
)
1317 gsi
= gsi_for_stmt (def_stmt
);
1318 gsi_insert_after (&gsi
, stmt
, GSI_SAME_STMT
);
1322 gsi
= gsi_after_labels (top_bb
);
1323 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
1325 sincos_stats
.inserted
++;
1327 /* And adjust the recorded old call sites. */
1328 for (i
= 0; stmts
.iterate (i
, &use_stmt
); ++i
)
1332 switch (gimple_call_combined_fn (use_stmt
))
1335 rhs
= fold_build1 (REALPART_EXPR
, type
, res
);
1339 rhs
= fold_build1 (IMAGPART_EXPR
, type
, res
);
1350 /* Replace call with a copy. */
1351 stmt
= gimple_build_assign (gimple_call_lhs (use_stmt
), rhs
);
1353 gsi
= gsi_for_stmt (use_stmt
);
1354 gsi_replace (&gsi
, stmt
, true);
1355 if (gimple_purge_dead_eh_edges (gimple_bb (stmt
)))
1362 /* To evaluate powi(x,n), the floating point value x raised to the
1363 constant integer exponent n, we use a hybrid algorithm that
1364 combines the "window method" with look-up tables. For an
1365 introduction to exponentiation algorithms and "addition chains",
1366 see section 4.6.3, "Evaluation of Powers" of Donald E. Knuth,
1367 "Seminumerical Algorithms", Vol. 2, "The Art of Computer Programming",
1368 3rd Edition, 1998, and Daniel M. Gordon, "A Survey of Fast Exponentiation
1369 Methods", Journal of Algorithms, Vol. 27, pp. 129-146, 1998. */
1371 /* Provide a default value for POWI_MAX_MULTS, the maximum number of
1372 multiplications to inline before calling the system library's pow
1373 function. powi(x,n) requires at worst 2*bits(n)-2 multiplications,
1374 so this default never requires calling pow, powf or powl. */
1376 #ifndef POWI_MAX_MULTS
1377 #define POWI_MAX_MULTS (2*HOST_BITS_PER_WIDE_INT-2)
1380 /* The size of the "optimal power tree" lookup table. All
1381 exponents less than this value are simply looked up in the
1382 powi_table below. This threshold is also used to size the
1383 cache of pseudo registers that hold intermediate results. */
1384 #define POWI_TABLE_SIZE 256
1386 /* The size, in bits of the window, used in the "window method"
1387 exponentiation algorithm. This is equivalent to a radix of
1388 (1<<POWI_WINDOW_SIZE) in the corresponding "m-ary method". */
1389 #define POWI_WINDOW_SIZE 3
1391 /* The following table is an efficient representation of an
1392 "optimal power tree". For each value, i, the corresponding
1393 value, j, in the table states than an optimal evaluation
1394 sequence for calculating pow(x,i) can be found by evaluating
1395 pow(x,j)*pow(x,i-j). An optimal power tree for the first
1396 100 integers is given in Knuth's "Seminumerical algorithms". */
1398 static const unsigned char powi_table
[POWI_TABLE_SIZE
] =
1400 0, 1, 1, 2, 2, 3, 3, 4, /* 0 - 7 */
1401 4, 6, 5, 6, 6, 10, 7, 9, /* 8 - 15 */
1402 8, 16, 9, 16, 10, 12, 11, 13, /* 16 - 23 */
1403 12, 17, 13, 18, 14, 24, 15, 26, /* 24 - 31 */
1404 16, 17, 17, 19, 18, 33, 19, 26, /* 32 - 39 */
1405 20, 25, 21, 40, 22, 27, 23, 44, /* 40 - 47 */
1406 24, 32, 25, 34, 26, 29, 27, 44, /* 48 - 55 */
1407 28, 31, 29, 34, 30, 60, 31, 36, /* 56 - 63 */
1408 32, 64, 33, 34, 34, 46, 35, 37, /* 64 - 71 */
1409 36, 65, 37, 50, 38, 48, 39, 69, /* 72 - 79 */
1410 40, 49, 41, 43, 42, 51, 43, 58, /* 80 - 87 */
1411 44, 64, 45, 47, 46, 59, 47, 76, /* 88 - 95 */
1412 48, 65, 49, 66, 50, 67, 51, 66, /* 96 - 103 */
1413 52, 70, 53, 74, 54, 104, 55, 74, /* 104 - 111 */
1414 56, 64, 57, 69, 58, 78, 59, 68, /* 112 - 119 */
1415 60, 61, 61, 80, 62, 75, 63, 68, /* 120 - 127 */
1416 64, 65, 65, 128, 66, 129, 67, 90, /* 128 - 135 */
1417 68, 73, 69, 131, 70, 94, 71, 88, /* 136 - 143 */
1418 72, 128, 73, 98, 74, 132, 75, 121, /* 144 - 151 */
1419 76, 102, 77, 124, 78, 132, 79, 106, /* 152 - 159 */
1420 80, 97, 81, 160, 82, 99, 83, 134, /* 160 - 167 */
1421 84, 86, 85, 95, 86, 160, 87, 100, /* 168 - 175 */
1422 88, 113, 89, 98, 90, 107, 91, 122, /* 176 - 183 */
1423 92, 111, 93, 102, 94, 126, 95, 150, /* 184 - 191 */
1424 96, 128, 97, 130, 98, 133, 99, 195, /* 192 - 199 */
1425 100, 128, 101, 123, 102, 164, 103, 138, /* 200 - 207 */
1426 104, 145, 105, 146, 106, 109, 107, 149, /* 208 - 215 */
1427 108, 200, 109, 146, 110, 170, 111, 157, /* 216 - 223 */
1428 112, 128, 113, 130, 114, 182, 115, 132, /* 224 - 231 */
1429 116, 200, 117, 132, 118, 158, 119, 206, /* 232 - 239 */
1430 120, 240, 121, 162, 122, 147, 123, 152, /* 240 - 247 */
1431 124, 166, 125, 214, 126, 138, 127, 153, /* 248 - 255 */
1435 /* Return the number of multiplications required to calculate
1436 powi(x,n) where n is less than POWI_TABLE_SIZE. This is a
1437 subroutine of powi_cost. CACHE is an array indicating
1438 which exponents have already been calculated. */
1441 powi_lookup_cost (unsigned HOST_WIDE_INT n
, bool *cache
)
1443 /* If we've already calculated this exponent, then this evaluation
1444 doesn't require any additional multiplications. */
1449 return powi_lookup_cost (n
- powi_table
[n
], cache
)
1450 + powi_lookup_cost (powi_table
[n
], cache
) + 1;
1453 /* Return the number of multiplications required to calculate
1454 powi(x,n) for an arbitrary x, given the exponent N. This
1455 function needs to be kept in sync with powi_as_mults below. */
1458 powi_cost (HOST_WIDE_INT n
)
1460 bool cache
[POWI_TABLE_SIZE
];
1461 unsigned HOST_WIDE_INT digit
;
1462 unsigned HOST_WIDE_INT val
;
1468 /* Ignore the reciprocal when calculating the cost. */
1471 /* Initialize the exponent cache. */
1472 memset (cache
, 0, POWI_TABLE_SIZE
* sizeof (bool));
1477 while (val
>= POWI_TABLE_SIZE
)
1481 digit
= val
& ((1 << POWI_WINDOW_SIZE
) - 1);
1482 result
+= powi_lookup_cost (digit
, cache
)
1483 + POWI_WINDOW_SIZE
+ 1;
1484 val
>>= POWI_WINDOW_SIZE
;
1493 return result
+ powi_lookup_cost (val
, cache
);
1496 /* Recursive subroutine of powi_as_mults. This function takes the
1497 array, CACHE, of already calculated exponents and an exponent N and
1498 returns a tree that corresponds to CACHE[1]**N, with type TYPE. */
1501 powi_as_mults_1 (gimple_stmt_iterator
*gsi
, location_t loc
, tree type
,
1502 unsigned HOST_WIDE_INT n
, tree
*cache
)
1504 tree op0
, op1
, ssa_target
;
1505 unsigned HOST_WIDE_INT digit
;
1508 if (n
< POWI_TABLE_SIZE
&& cache
[n
])
1511 ssa_target
= make_temp_ssa_name (type
, NULL
, "powmult");
1513 if (n
< POWI_TABLE_SIZE
)
1515 cache
[n
] = ssa_target
;
1516 op0
= powi_as_mults_1 (gsi
, loc
, type
, n
- powi_table
[n
], cache
);
1517 op1
= powi_as_mults_1 (gsi
, loc
, type
, powi_table
[n
], cache
);
1521 digit
= n
& ((1 << POWI_WINDOW_SIZE
) - 1);
1522 op0
= powi_as_mults_1 (gsi
, loc
, type
, n
- digit
, cache
);
1523 op1
= powi_as_mults_1 (gsi
, loc
, type
, digit
, cache
);
1527 op0
= powi_as_mults_1 (gsi
, loc
, type
, n
>> 1, cache
);
1531 mult_stmt
= gimple_build_assign (ssa_target
, MULT_EXPR
, op0
, op1
);
1532 gimple_set_location (mult_stmt
, loc
);
1533 gsi_insert_before (gsi
, mult_stmt
, GSI_SAME_STMT
);
1538 /* Convert ARG0**N to a tree of multiplications of ARG0 with itself.
1539 This function needs to be kept in sync with powi_cost above. */
1542 powi_as_mults (gimple_stmt_iterator
*gsi
, location_t loc
,
1543 tree arg0
, HOST_WIDE_INT n
)
1545 tree cache
[POWI_TABLE_SIZE
], result
, type
= TREE_TYPE (arg0
);
1550 return build_one_cst (type
);
1552 memset (cache
, 0, sizeof (cache
));
1555 result
= powi_as_mults_1 (gsi
, loc
, type
, absu_hwi (n
), cache
);
1559 /* If the original exponent was negative, reciprocate the result. */
1560 target
= make_temp_ssa_name (type
, NULL
, "powmult");
1561 div_stmt
= gimple_build_assign (target
, RDIV_EXPR
,
1562 build_real (type
, dconst1
), result
);
1563 gimple_set_location (div_stmt
, loc
);
1564 gsi_insert_before (gsi
, div_stmt
, GSI_SAME_STMT
);
1569 /* ARG0 and N are the two arguments to a powi builtin in GSI with
1570 location info LOC. If the arguments are appropriate, create an
1571 equivalent sequence of statements prior to GSI using an optimal
1572 number of multiplications, and return an expession holding the
1576 gimple_expand_builtin_powi (gimple_stmt_iterator
*gsi
, location_t loc
,
1577 tree arg0
, HOST_WIDE_INT n
)
1579 if ((n
>= -1 && n
<= 2)
1580 || (optimize_function_for_speed_p (cfun
)
1581 && powi_cost (n
) <= POWI_MAX_MULTS
))
1582 return powi_as_mults (gsi
, loc
, arg0
, n
);
1587 /* Build a gimple call statement that calls FN with argument ARG.
1588 Set the lhs of the call statement to a fresh SSA name. Insert the
1589 statement prior to GSI's current position, and return the fresh
1593 build_and_insert_call (gimple_stmt_iterator
*gsi
, location_t loc
,
1599 call_stmt
= gimple_build_call (fn
, 1, arg
);
1600 ssa_target
= make_temp_ssa_name (TREE_TYPE (arg
), NULL
, "powroot");
1601 gimple_set_lhs (call_stmt
, ssa_target
);
1602 gimple_set_location (call_stmt
, loc
);
1603 gsi_insert_before (gsi
, call_stmt
, GSI_SAME_STMT
);
1608 /* Build a gimple binary operation with the given CODE and arguments
1609 ARG0, ARG1, assigning the result to a new SSA name for variable
1610 TARGET. Insert the statement prior to GSI's current position, and
1611 return the fresh SSA name.*/
1614 build_and_insert_binop (gimple_stmt_iterator
*gsi
, location_t loc
,
1615 const char *name
, enum tree_code code
,
1616 tree arg0
, tree arg1
)
1618 tree result
= make_temp_ssa_name (TREE_TYPE (arg0
), NULL
, name
);
1619 gassign
*stmt
= gimple_build_assign (result
, code
, arg0
, arg1
);
1620 gimple_set_location (stmt
, loc
);
1621 gsi_insert_before (gsi
, stmt
, GSI_SAME_STMT
);
1625 /* Build a gimple reference operation with the given CODE and argument
1626 ARG, assigning the result to a new SSA name of TYPE with NAME.
1627 Insert the statement prior to GSI's current position, and return
1628 the fresh SSA name. */
1631 build_and_insert_ref (gimple_stmt_iterator
*gsi
, location_t loc
, tree type
,
1632 const char *name
, enum tree_code code
, tree arg0
)
1634 tree result
= make_temp_ssa_name (type
, NULL
, name
);
1635 gimple
*stmt
= gimple_build_assign (result
, build1 (code
, type
, arg0
));
1636 gimple_set_location (stmt
, loc
);
1637 gsi_insert_before (gsi
, stmt
, GSI_SAME_STMT
);
1641 /* Build a gimple assignment to cast VAL to TYPE. Insert the statement
1642 prior to GSI's current position, and return the fresh SSA name. */
1645 build_and_insert_cast (gimple_stmt_iterator
*gsi
, location_t loc
,
1646 tree type
, tree val
)
1648 tree result
= make_ssa_name (type
);
1649 gassign
*stmt
= gimple_build_assign (result
, NOP_EXPR
, val
);
1650 gimple_set_location (stmt
, loc
);
1651 gsi_insert_before (gsi
, stmt
, GSI_SAME_STMT
);
1655 struct pow_synth_sqrt_info
1658 unsigned int deepest
;
1659 unsigned int num_mults
;
1662 /* Return true iff the real value C can be represented as a
1663 sum of powers of 0.5 up to N. That is:
1664 C == SUM<i from 1..N> (a[i]*(0.5**i)) where a[i] is either 0 or 1.
1665 Record in INFO the various parameters of the synthesis algorithm such
1666 as the factors a[i], the maximum 0.5 power and the number of
1667 multiplications that will be required. */
1670 representable_as_half_series_p (REAL_VALUE_TYPE c
, unsigned n
,
1671 struct pow_synth_sqrt_info
*info
)
1673 REAL_VALUE_TYPE factor
= dconsthalf
;
1674 REAL_VALUE_TYPE remainder
= c
;
1677 info
->num_mults
= 0;
1678 memset (info
->factors
, 0, n
* sizeof (bool));
1680 for (unsigned i
= 0; i
< n
; i
++)
1682 REAL_VALUE_TYPE res
;
1684 /* If something inexact happened bail out now. */
1685 if (real_arithmetic (&res
, MINUS_EXPR
, &remainder
, &factor
))
1688 /* We have hit zero. The number is representable as a sum
1689 of powers of 0.5. */
1690 if (real_equal (&res
, &dconst0
))
1692 info
->factors
[i
] = true;
1693 info
->deepest
= i
+ 1;
1696 else if (!REAL_VALUE_NEGATIVE (res
))
1699 info
->factors
[i
] = true;
1703 info
->factors
[i
] = false;
1705 real_arithmetic (&factor
, MULT_EXPR
, &factor
, &dconsthalf
);
1710 /* Return the tree corresponding to FN being applied
1711 to ARG N times at GSI and LOC.
1712 Look up previous results from CACHE if need be.
1713 cache[0] should contain just plain ARG i.e. FN applied to ARG 0 times. */
1716 get_fn_chain (tree arg
, unsigned int n
, gimple_stmt_iterator
*gsi
,
1717 tree fn
, location_t loc
, tree
*cache
)
1719 tree res
= cache
[n
];
1722 tree prev
= get_fn_chain (arg
, n
- 1, gsi
, fn
, loc
, cache
);
1723 res
= build_and_insert_call (gsi
, loc
, fn
, prev
);
1730 /* Print to STREAM the repeated application of function FNAME to ARG
1731 N times. So, for FNAME = "foo", ARG = "x", N = 2 it would print:
1735 print_nested_fn (FILE* stream
, const char *fname
, const char* arg
,
1739 fprintf (stream
, "%s", arg
);
1742 fprintf (stream
, "%s (", fname
);
1743 print_nested_fn (stream
, fname
, arg
, n
- 1);
1744 fprintf (stream
, ")");
1748 /* Print to STREAM the fractional sequence of sqrt chains
1749 applied to ARG, described by INFO. Used for the dump file. */
1752 dump_fractional_sqrt_sequence (FILE *stream
, const char *arg
,
1753 struct pow_synth_sqrt_info
*info
)
1755 for (unsigned int i
= 0; i
< info
->deepest
; i
++)
1757 bool is_set
= info
->factors
[i
];
1760 print_nested_fn (stream
, "sqrt", arg
, i
+ 1);
1761 if (i
!= info
->deepest
- 1)
1762 fprintf (stream
, " * ");
1767 /* Print to STREAM a representation of raising ARG to an integer
1768 power N. Used for the dump file. */
1771 dump_integer_part (FILE *stream
, const char* arg
, HOST_WIDE_INT n
)
1774 fprintf (stream
, "powi (%s, " HOST_WIDE_INT_PRINT_DEC
")", arg
, n
);
1776 fprintf (stream
, "%s", arg
);
1779 /* Attempt to synthesize a POW[F] (ARG0, ARG1) call using chains of
1780 square roots. Place at GSI and LOC. Limit the maximum depth
1781 of the sqrt chains to MAX_DEPTH. Return the tree holding the
1782 result of the expanded sequence or NULL_TREE if the expansion failed.
1784 This routine assumes that ARG1 is a real number with a fractional part
1785 (the integer exponent case will have been handled earlier in
1786 gimple_expand_builtin_pow).
1789 * For ARG1 composed of a whole part WHOLE_PART and a fractional part
1790 FRAC_PART i.e. WHOLE_PART == floor (ARG1) and
1791 FRAC_PART == ARG1 - WHOLE_PART:
1792 Produce POWI (ARG0, WHOLE_PART) * POW (ARG0, FRAC_PART) where
1793 POW (ARG0, FRAC_PART) is expanded as a product of square root chains
1794 if it can be expressed as such, that is if FRAC_PART satisfies:
1795 FRAC_PART == <SUM from i = 1 until MAX_DEPTH> (a[i] * (0.5**i))
1796 where integer a[i] is either 0 or 1.
1799 POW (x, 3.625) == POWI (x, 3) * POW (x, 0.625)
1800 --> POWI (x, 3) * SQRT (x) * SQRT (SQRT (SQRT (x)))
1802 For ARG1 < 0.0 there are two approaches:
1803 * (A) Expand to 1.0 / POW (ARG0, -ARG1) where POW (ARG0, -ARG1)
1804 is calculated as above.
1807 POW (x, -5.625) == 1.0 / POW (x, 5.625)
1808 --> 1.0 / (POWI (x, 5) * SQRT (x) * SQRT (SQRT (SQRT (x))))
1810 * (B) : WHOLE_PART := - ceil (abs (ARG1))
1811 FRAC_PART := ARG1 - WHOLE_PART
1812 and expand to POW (x, FRAC_PART) / POWI (x, WHOLE_PART).
1814 POW (x, -5.875) == POW (x, 0.125) / POWI (X, 6)
1815 --> SQRT (SQRT (SQRT (x))) / (POWI (x, 6))
1817 For ARG1 < 0.0 we choose between (A) and (B) depending on
1818 how many multiplications we'd have to do.
1819 So, for the example in (B): POW (x, -5.875), if we were to
1820 follow algorithm (A) we would produce:
1821 1.0 / POWI (X, 5) * SQRT (X) * SQRT (SQRT (X)) * SQRT (SQRT (SQRT (X)))
1822 which contains more multiplications than approach (B).
1824 Hopefully, this approach will eliminate potentially expensive POW library
1825 calls when unsafe floating point math is enabled and allow the compiler to
1826 further optimise the multiplies, square roots and divides produced by this
1830 expand_pow_as_sqrts (gimple_stmt_iterator
*gsi
, location_t loc
,
1831 tree arg0
, tree arg1
, HOST_WIDE_INT max_depth
)
1833 tree type
= TREE_TYPE (arg0
);
1834 machine_mode mode
= TYPE_MODE (type
);
1835 tree sqrtfn
= mathfn_built_in (type
, BUILT_IN_SQRT
);
1836 bool one_over
= true;
1841 if (TREE_CODE (arg1
) != REAL_CST
)
1844 REAL_VALUE_TYPE exp_init
= TREE_REAL_CST (arg1
);
1846 gcc_assert (max_depth
> 0);
1847 tree
*cache
= XALLOCAVEC (tree
, max_depth
+ 1);
1849 struct pow_synth_sqrt_info synth_info
;
1850 synth_info
.factors
= XALLOCAVEC (bool, max_depth
+ 1);
1851 synth_info
.deepest
= 0;
1852 synth_info
.num_mults
= 0;
1854 bool neg_exp
= REAL_VALUE_NEGATIVE (exp_init
);
1855 REAL_VALUE_TYPE exp
= real_value_abs (&exp_init
);
1857 /* The whole and fractional parts of exp. */
1858 REAL_VALUE_TYPE whole_part
;
1859 REAL_VALUE_TYPE frac_part
;
1861 real_floor (&whole_part
, mode
, &exp
);
1862 real_arithmetic (&frac_part
, MINUS_EXPR
, &exp
, &whole_part
);
1865 REAL_VALUE_TYPE ceil_whole
= dconst0
;
1866 REAL_VALUE_TYPE ceil_fract
= dconst0
;
1870 real_ceil (&ceil_whole
, mode
, &exp
);
1871 real_arithmetic (&ceil_fract
, MINUS_EXPR
, &ceil_whole
, &exp
);
1874 if (!representable_as_half_series_p (frac_part
, max_depth
, &synth_info
))
1877 /* Check whether it's more profitable to not use 1.0 / ... */
1880 struct pow_synth_sqrt_info alt_synth_info
;
1881 alt_synth_info
.factors
= XALLOCAVEC (bool, max_depth
+ 1);
1882 alt_synth_info
.deepest
= 0;
1883 alt_synth_info
.num_mults
= 0;
1885 if (representable_as_half_series_p (ceil_fract
, max_depth
,
1887 && alt_synth_info
.deepest
<= synth_info
.deepest
1888 && alt_synth_info
.num_mults
< synth_info
.num_mults
)
1890 whole_part
= ceil_whole
;
1891 frac_part
= ceil_fract
;
1892 synth_info
.deepest
= alt_synth_info
.deepest
;
1893 synth_info
.num_mults
= alt_synth_info
.num_mults
;
1894 memcpy (synth_info
.factors
, alt_synth_info
.factors
,
1895 (max_depth
+ 1) * sizeof (bool));
1900 HOST_WIDE_INT n
= real_to_integer (&whole_part
);
1901 REAL_VALUE_TYPE cint
;
1902 real_from_integer (&cint
, VOIDmode
, n
, SIGNED
);
1904 if (!real_identical (&whole_part
, &cint
))
1907 if (powi_cost (n
) + synth_info
.num_mults
> POWI_MAX_MULTS
)
1910 memset (cache
, 0, (max_depth
+ 1) * sizeof (tree
));
1912 tree integer_res
= n
== 0 ? build_real (type
, dconst1
) : arg0
;
1914 /* Calculate the integer part of the exponent. */
1917 integer_res
= gimple_expand_builtin_powi (gsi
, loc
, arg0
, n
);
1926 real_to_decimal (string
, &exp_init
, sizeof (string
), 0, 1);
1927 fprintf (dump_file
, "synthesizing pow (x, %s) as:\n", string
);
1933 fprintf (dump_file
, "1.0 / (");
1934 dump_integer_part (dump_file
, "x", n
);
1936 fprintf (dump_file
, " * ");
1937 dump_fractional_sqrt_sequence (dump_file
, "x", &synth_info
);
1938 fprintf (dump_file
, ")");
1942 dump_fractional_sqrt_sequence (dump_file
, "x", &synth_info
);
1943 fprintf (dump_file
, " / (");
1944 dump_integer_part (dump_file
, "x", n
);
1945 fprintf (dump_file
, ")");
1950 dump_fractional_sqrt_sequence (dump_file
, "x", &synth_info
);
1952 fprintf (dump_file
, " * ");
1953 dump_integer_part (dump_file
, "x", n
);
1956 fprintf (dump_file
, "\ndeepest sqrt chain: %d\n", synth_info
.deepest
);
1960 tree fract_res
= NULL_TREE
;
1963 /* Calculate the fractional part of the exponent. */
1964 for (unsigned i
= 0; i
< synth_info
.deepest
; i
++)
1966 if (synth_info
.factors
[i
])
1968 tree sqrt_chain
= get_fn_chain (arg0
, i
+ 1, gsi
, sqrtfn
, loc
, cache
);
1971 fract_res
= sqrt_chain
;
1974 fract_res
= build_and_insert_binop (gsi
, loc
, "powroot", MULT_EXPR
,
1975 fract_res
, sqrt_chain
);
1979 tree res
= NULL_TREE
;
1986 res
= build_and_insert_binop (gsi
, loc
, "powroot", MULT_EXPR
,
1987 fract_res
, integer_res
);
1991 res
= build_and_insert_binop (gsi
, loc
, "powrootrecip", RDIV_EXPR
,
1992 build_real (type
, dconst1
), res
);
1996 res
= build_and_insert_binop (gsi
, loc
, "powroot", RDIV_EXPR
,
1997 fract_res
, integer_res
);
2001 res
= build_and_insert_binop (gsi
, loc
, "powroot", MULT_EXPR
,
2002 fract_res
, integer_res
);
2006 /* ARG0 and ARG1 are the two arguments to a pow builtin call in GSI
2007 with location info LOC. If possible, create an equivalent and
2008 less expensive sequence of statements prior to GSI, and return an
2009 expession holding the result. */
2012 gimple_expand_builtin_pow (gimple_stmt_iterator
*gsi
, location_t loc
,
2013 tree arg0
, tree arg1
)
2015 REAL_VALUE_TYPE c
, cint
, dconst1_3
, dconst1_4
, dconst1_6
;
2016 REAL_VALUE_TYPE c2
, dconst3
;
2018 tree type
, sqrtfn
, cbrtfn
, sqrt_arg0
, result
, cbrt_x
, powi_cbrt_x
;
2020 bool speed_p
= optimize_bb_for_speed_p (gsi_bb (*gsi
));
2021 bool hw_sqrt_exists
, c_is_int
, c2_is_int
;
2023 dconst1_4
= dconst1
;
2024 SET_REAL_EXP (&dconst1_4
, REAL_EXP (&dconst1_4
) - 2);
2026 /* If the exponent isn't a constant, there's nothing of interest
2028 if (TREE_CODE (arg1
) != REAL_CST
)
2031 /* Don't perform the operation if flag_signaling_nans is on
2032 and the operand is a signaling NaN. */
2033 if (HONOR_SNANS (TYPE_MODE (TREE_TYPE (arg1
)))
2034 && ((TREE_CODE (arg0
) == REAL_CST
2035 && REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (arg0
)))
2036 || REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (arg1
))))
2039 /* If the exponent is equivalent to an integer, expand to an optimal
2040 multiplication sequence when profitable. */
2041 c
= TREE_REAL_CST (arg1
);
2042 n
= real_to_integer (&c
);
2043 real_from_integer (&cint
, VOIDmode
, n
, SIGNED
);
2044 c_is_int
= real_identical (&c
, &cint
);
2047 && ((n
>= -1 && n
<= 2)
2048 || (flag_unsafe_math_optimizations
2050 && powi_cost (n
) <= POWI_MAX_MULTS
)))
2051 return gimple_expand_builtin_powi (gsi
, loc
, arg0
, n
);
2053 /* Attempt various optimizations using sqrt and cbrt. */
2054 type
= TREE_TYPE (arg0
);
2055 mode
= TYPE_MODE (type
);
2056 sqrtfn
= mathfn_built_in (type
, BUILT_IN_SQRT
);
2058 /* Optimize pow(x,0.5) = sqrt(x). This replacement is always safe
2059 unless signed zeros must be maintained. pow(-0,0.5) = +0, while
2062 && real_equal (&c
, &dconsthalf
)
2063 && !HONOR_SIGNED_ZEROS (mode
))
2064 return build_and_insert_call (gsi
, loc
, sqrtfn
, arg0
);
2066 hw_sqrt_exists
= optab_handler (sqrt_optab
, mode
) != CODE_FOR_nothing
;
2068 /* Optimize pow(x,1./3.) = cbrt(x). This requires unsafe math
2069 optimizations since 1./3. is not exactly representable. If x
2070 is negative and finite, the correct value of pow(x,1./3.) is
2071 a NaN with the "invalid" exception raised, because the value
2072 of 1./3. actually has an even denominator. The correct value
2073 of cbrt(x) is a negative real value. */
2074 cbrtfn
= mathfn_built_in (type
, BUILT_IN_CBRT
);
2075 dconst1_3
= real_value_truncate (mode
, dconst_third ());
2077 if (flag_unsafe_math_optimizations
2079 && (!HONOR_NANS (mode
) || tree_expr_nonnegative_p (arg0
))
2080 && real_equal (&c
, &dconst1_3
))
2081 return build_and_insert_call (gsi
, loc
, cbrtfn
, arg0
);
2083 /* Optimize pow(x,1./6.) = cbrt(sqrt(x)). Don't do this optimization
2084 if we don't have a hardware sqrt insn. */
2085 dconst1_6
= dconst1_3
;
2086 SET_REAL_EXP (&dconst1_6
, REAL_EXP (&dconst1_6
) - 1);
2088 if (flag_unsafe_math_optimizations
2091 && (!HONOR_NANS (mode
) || tree_expr_nonnegative_p (arg0
))
2094 && real_equal (&c
, &dconst1_6
))
2097 sqrt_arg0
= build_and_insert_call (gsi
, loc
, sqrtfn
, arg0
);
2100 return build_and_insert_call (gsi
, loc
, cbrtfn
, sqrt_arg0
);
2104 /* Attempt to expand the POW as a product of square root chains.
2105 Expand the 0.25 case even when otpimising for size. */
2106 if (flag_unsafe_math_optimizations
2109 && (speed_p
|| real_equal (&c
, &dconst1_4
))
2110 && !HONOR_SIGNED_ZEROS (mode
))
2112 unsigned int max_depth
= speed_p
2113 ? param_max_pow_sqrt_depth
2116 tree expand_with_sqrts
2117 = expand_pow_as_sqrts (gsi
, loc
, arg0
, arg1
, max_depth
);
2119 if (expand_with_sqrts
)
2120 return expand_with_sqrts
;
2123 real_arithmetic (&c2
, MULT_EXPR
, &c
, &dconst2
);
2124 n
= real_to_integer (&c2
);
2125 real_from_integer (&cint
, VOIDmode
, n
, SIGNED
);
2126 c2_is_int
= real_identical (&c2
, &cint
);
2128 /* Optimize pow(x,c), where 3c = n for some nonzero integer n, into
2130 powi(x, n/3) * powi(cbrt(x), n%3), n > 0;
2131 1.0 / (powi(x, abs(n)/3) * powi(cbrt(x), abs(n)%3)), n < 0.
2133 Do not calculate the first factor when n/3 = 0. As cbrt(x) is
2134 different from pow(x, 1./3.) due to rounding and behavior with
2135 negative x, we need to constrain this transformation to unsafe
2136 math and positive x or finite math. */
2137 real_from_integer (&dconst3
, VOIDmode
, 3, SIGNED
);
2138 real_arithmetic (&c2
, MULT_EXPR
, &c
, &dconst3
);
2139 real_round (&c2
, mode
, &c2
);
2140 n
= real_to_integer (&c2
);
2141 real_from_integer (&cint
, VOIDmode
, n
, SIGNED
);
2142 real_arithmetic (&c2
, RDIV_EXPR
, &cint
, &dconst3
);
2143 real_convert (&c2
, mode
, &c2
);
2145 if (flag_unsafe_math_optimizations
2147 && (!HONOR_NANS (mode
) || tree_expr_nonnegative_p (arg0
))
2148 && real_identical (&c2
, &c
)
2150 && optimize_function_for_speed_p (cfun
)
2151 && powi_cost (n
/ 3) <= POWI_MAX_MULTS
)
2153 tree powi_x_ndiv3
= NULL_TREE
;
2155 /* Attempt to fold powi(arg0, abs(n/3)) into multiplies. If not
2156 possible or profitable, give up. Skip the degenerate case when
2157 abs(n) < 3, where the result is always 1. */
2158 if (absu_hwi (n
) >= 3)
2160 powi_x_ndiv3
= gimple_expand_builtin_powi (gsi
, loc
, arg0
,
2166 /* Calculate powi(cbrt(x), n%3). Don't use gimple_expand_builtin_powi
2167 as that creates an unnecessary variable. Instead, just produce
2168 either cbrt(x) or cbrt(x) * cbrt(x). */
2169 cbrt_x
= build_and_insert_call (gsi
, loc
, cbrtfn
, arg0
);
2171 if (absu_hwi (n
) % 3 == 1)
2172 powi_cbrt_x
= cbrt_x
;
2174 powi_cbrt_x
= build_and_insert_binop (gsi
, loc
, "powroot", MULT_EXPR
,
2177 /* Multiply the two subexpressions, unless powi(x,abs(n)/3) = 1. */
2178 if (absu_hwi (n
) < 3)
2179 result
= powi_cbrt_x
;
2181 result
= build_and_insert_binop (gsi
, loc
, "powroot", MULT_EXPR
,
2182 powi_x_ndiv3
, powi_cbrt_x
);
2184 /* If n is negative, reciprocate the result. */
2186 result
= build_and_insert_binop (gsi
, loc
, "powroot", RDIV_EXPR
,
2187 build_real (type
, dconst1
), result
);
2192 /* No optimizations succeeded. */
2196 /* ARG is the argument to a cabs builtin call in GSI with location info
2197 LOC. Create a sequence of statements prior to GSI that calculates
2198 sqrt(R*R + I*I), where R and I are the real and imaginary components
2199 of ARG, respectively. Return an expression holding the result. */
2202 gimple_expand_builtin_cabs (gimple_stmt_iterator
*gsi
, location_t loc
, tree arg
)
2204 tree real_part
, imag_part
, addend1
, addend2
, sum
, result
;
2205 tree type
= TREE_TYPE (TREE_TYPE (arg
));
2206 tree sqrtfn
= mathfn_built_in (type
, BUILT_IN_SQRT
);
2207 machine_mode mode
= TYPE_MODE (type
);
2209 if (!flag_unsafe_math_optimizations
2210 || !optimize_bb_for_speed_p (gimple_bb (gsi_stmt (*gsi
)))
2212 || optab_handler (sqrt_optab
, mode
) == CODE_FOR_nothing
)
2215 real_part
= build_and_insert_ref (gsi
, loc
, type
, "cabs",
2216 REALPART_EXPR
, arg
);
2217 addend1
= build_and_insert_binop (gsi
, loc
, "cabs", MULT_EXPR
,
2218 real_part
, real_part
);
2219 imag_part
= build_and_insert_ref (gsi
, loc
, type
, "cabs",
2220 IMAGPART_EXPR
, arg
);
2221 addend2
= build_and_insert_binop (gsi
, loc
, "cabs", MULT_EXPR
,
2222 imag_part
, imag_part
);
2223 sum
= build_and_insert_binop (gsi
, loc
, "cabs", PLUS_EXPR
, addend1
, addend2
);
2224 result
= build_and_insert_call (gsi
, loc
, sqrtfn
, sum
);
2229 /* Go through all calls to sin, cos and cexpi and call execute_cse_sincos_1
2230 on the SSA_NAME argument of each of them. */
2234 const pass_data pass_data_cse_sincos
=
2236 GIMPLE_PASS
, /* type */
2237 "sincos", /* name */
2238 OPTGROUP_NONE
, /* optinfo_flags */
2239 TV_TREE_SINCOS
, /* tv_id */
2240 PROP_ssa
, /* properties_required */
2241 0, /* properties_provided */
2242 0, /* properties_destroyed */
2243 0, /* todo_flags_start */
2244 TODO_update_ssa
, /* todo_flags_finish */
2247 class pass_cse_sincos
: public gimple_opt_pass
2250 pass_cse_sincos (gcc::context
*ctxt
)
2251 : gimple_opt_pass (pass_data_cse_sincos
, ctxt
)
2254 /* opt_pass methods: */
2255 bool gate (function
*) final override
2260 unsigned int execute (function
*) final override
;
2262 }; // class pass_cse_sincos
2265 pass_cse_sincos::execute (function
*fun
)
2268 bool cfg_changed
= false;
2270 calculate_dominance_info (CDI_DOMINATORS
);
2271 memset (&sincos_stats
, 0, sizeof (sincos_stats
));
2273 FOR_EACH_BB_FN (bb
, fun
)
2275 gimple_stmt_iterator gsi
;
2277 for (gsi
= gsi_after_labels (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2279 gimple
*stmt
= gsi_stmt (gsi
);
2281 if (is_gimple_call (stmt
)
2282 && gimple_call_lhs (stmt
))
2285 switch (gimple_call_combined_fn (stmt
))
2290 arg
= gimple_call_arg (stmt
, 0);
2291 /* Make sure we have either sincos or cexp. */
2292 if (!targetm
.libc_has_function (function_c99_math_complex
,
2294 && !targetm
.libc_has_function (function_sincos
,
2298 if (TREE_CODE (arg
) == SSA_NAME
)
2299 cfg_changed
|= execute_cse_sincos_1 (arg
);
2308 statistics_counter_event (fun
, "sincos statements inserted",
2309 sincos_stats
.inserted
);
2310 statistics_counter_event (fun
, "conv statements removed",
2311 sincos_stats
.conv_removed
);
2313 return cfg_changed
? TODO_cleanup_cfg
: 0;
2319 make_pass_cse_sincos (gcc::context
*ctxt
)
2321 return new pass_cse_sincos (ctxt
);
2324 /* Expand powi(x,n) into an optimal number of multiplies, when n is a constant.
2325 Also expand CABS. */
2328 const pass_data pass_data_expand_powcabs
=
2330 GIMPLE_PASS
, /* type */
2331 "powcabs", /* name */
2332 OPTGROUP_NONE
, /* optinfo_flags */
2333 TV_TREE_POWCABS
, /* tv_id */
2334 PROP_ssa
, /* properties_required */
2335 PROP_gimple_opt_math
, /* properties_provided */
2336 0, /* properties_destroyed */
2337 0, /* todo_flags_start */
2338 TODO_update_ssa
, /* todo_flags_finish */
2341 class pass_expand_powcabs
: public gimple_opt_pass
2344 pass_expand_powcabs (gcc::context
*ctxt
)
2345 : gimple_opt_pass (pass_data_expand_powcabs
, ctxt
)
2348 /* opt_pass methods: */
2349 bool gate (function
*) final override
2354 unsigned int execute (function
*) final override
;
2356 }; // class pass_expand_powcabs
2359 pass_expand_powcabs::execute (function
*fun
)
2362 bool cfg_changed
= false;
2364 calculate_dominance_info (CDI_DOMINATORS
);
2366 FOR_EACH_BB_FN (bb
, fun
)
2368 gimple_stmt_iterator gsi
;
2369 bool cleanup_eh
= false;
2371 for (gsi
= gsi_after_labels (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2373 gimple
*stmt
= gsi_stmt (gsi
);
2375 /* Only the last stmt in a bb could throw, no need to call
2376 gimple_purge_dead_eh_edges if we change something in the middle
2377 of a basic block. */
2380 if (is_gimple_call (stmt
)
2381 && gimple_call_lhs (stmt
))
2383 tree arg0
, arg1
, result
;
2387 switch (gimple_call_combined_fn (stmt
))
2390 arg0
= gimple_call_arg (stmt
, 0);
2391 arg1
= gimple_call_arg (stmt
, 1);
2393 loc
= gimple_location (stmt
);
2394 result
= gimple_expand_builtin_pow (&gsi
, loc
, arg0
, arg1
);
2398 tree lhs
= gimple_get_lhs (stmt
);
2399 gassign
*new_stmt
= gimple_build_assign (lhs
, result
);
2400 gimple_set_location (new_stmt
, loc
);
2401 unlink_stmt_vdef (stmt
);
2402 gsi_replace (&gsi
, new_stmt
, true);
2404 if (gimple_vdef (stmt
))
2405 release_ssa_name (gimple_vdef (stmt
));
2410 arg0
= gimple_call_arg (stmt
, 0);
2411 arg1
= gimple_call_arg (stmt
, 1);
2412 loc
= gimple_location (stmt
);
2414 if (real_minus_onep (arg0
))
2416 tree t0
, t1
, cond
, one
, minus_one
;
2419 t0
= TREE_TYPE (arg0
);
2420 t1
= TREE_TYPE (arg1
);
2421 one
= build_real (t0
, dconst1
);
2422 minus_one
= build_real (t0
, dconstm1
);
2424 cond
= make_temp_ssa_name (t1
, NULL
, "powi_cond");
2425 stmt
= gimple_build_assign (cond
, BIT_AND_EXPR
,
2426 arg1
, build_int_cst (t1
, 1));
2427 gimple_set_location (stmt
, loc
);
2428 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
2430 result
= make_temp_ssa_name (t0
, NULL
, "powi");
2431 stmt
= gimple_build_assign (result
, COND_EXPR
, cond
,
2433 gimple_set_location (stmt
, loc
);
2434 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
2438 if (!tree_fits_shwi_p (arg1
))
2441 n
= tree_to_shwi (arg1
);
2442 result
= gimple_expand_builtin_powi (&gsi
, loc
, arg0
, n
);
2447 tree lhs
= gimple_get_lhs (stmt
);
2448 gassign
*new_stmt
= gimple_build_assign (lhs
, result
);
2449 gimple_set_location (new_stmt
, loc
);
2450 unlink_stmt_vdef (stmt
);
2451 gsi_replace (&gsi
, new_stmt
, true);
2453 if (gimple_vdef (stmt
))
2454 release_ssa_name (gimple_vdef (stmt
));
2459 arg0
= gimple_call_arg (stmt
, 0);
2460 loc
= gimple_location (stmt
);
2461 result
= gimple_expand_builtin_cabs (&gsi
, loc
, arg0
);
2465 tree lhs
= gimple_get_lhs (stmt
);
2466 gassign
*new_stmt
= gimple_build_assign (lhs
, result
);
2467 gimple_set_location (new_stmt
, loc
);
2468 unlink_stmt_vdef (stmt
);
2469 gsi_replace (&gsi
, new_stmt
, true);
2471 if (gimple_vdef (stmt
))
2472 release_ssa_name (gimple_vdef (stmt
));
2481 cfg_changed
|= gimple_purge_dead_eh_edges (bb
);
2484 return cfg_changed
? TODO_cleanup_cfg
: 0;
2490 make_pass_expand_powcabs (gcc::context
*ctxt
)
2492 return new pass_expand_powcabs (ctxt
);
2495 /* Return true if stmt is a type conversion operation that can be stripped
2496 when used in a widening multiply operation. */
2498 widening_mult_conversion_strippable_p (tree result_type
, gimple
*stmt
)
2500 enum tree_code rhs_code
= gimple_assign_rhs_code (stmt
);
2502 if (TREE_CODE (result_type
) == INTEGER_TYPE
)
2507 if (!CONVERT_EXPR_CODE_P (rhs_code
))
2510 op_type
= TREE_TYPE (gimple_assign_lhs (stmt
));
2512 /* If the type of OP has the same precision as the result, then
2513 we can strip this conversion. The multiply operation will be
2514 selected to create the correct extension as a by-product. */
2515 if (TYPE_PRECISION (result_type
) == TYPE_PRECISION (op_type
))
2518 /* We can also strip a conversion if it preserves the signed-ness of
2519 the operation and doesn't narrow the range. */
2520 inner_op_type
= TREE_TYPE (gimple_assign_rhs1 (stmt
));
2522 /* If the inner-most type is unsigned, then we can strip any
2523 intermediate widening operation. If it's signed, then the
2524 intermediate widening operation must also be signed. */
2525 if ((TYPE_UNSIGNED (inner_op_type
)
2526 || TYPE_UNSIGNED (op_type
) == TYPE_UNSIGNED (inner_op_type
))
2527 && TYPE_PRECISION (op_type
) > TYPE_PRECISION (inner_op_type
))
2533 return rhs_code
== FIXED_CONVERT_EXPR
;
2536 /* Return true if RHS is a suitable operand for a widening multiplication,
2537 assuming a target type of TYPE.
2538 There are two cases:
2540 - RHS makes some value at least twice as wide. Store that value
2541 in *NEW_RHS_OUT if so, and store its type in *TYPE_OUT.
2543 - RHS is an integer constant. Store that value in *NEW_RHS_OUT if so,
2544 but leave *TYPE_OUT untouched. */
2547 is_widening_mult_rhs_p (tree type
, tree rhs
, tree
*type_out
,
2553 if (TREE_CODE (rhs
) == SSA_NAME
)
2555 /* Use tree_non_zero_bits to see if this operand is zero_extended
2556 for unsigned widening multiplications or non-negative for
2557 signed widening multiplications. */
2558 if (TREE_CODE (type
) == INTEGER_TYPE
2559 && (TYPE_PRECISION (type
) & 1) == 0
2560 && int_mode_for_size (TYPE_PRECISION (type
) / 2, 1).exists ())
2562 unsigned int prec
= TYPE_PRECISION (type
);
2563 unsigned int hprec
= prec
/ 2;
2564 wide_int bits
= wide_int::from (tree_nonzero_bits (rhs
), prec
,
2565 TYPE_SIGN (TREE_TYPE (rhs
)));
2566 if (TYPE_UNSIGNED (type
)
2567 && wi::bit_and (bits
, wi::mask (hprec
, true, prec
)) == 0)
2569 *type_out
= build_nonstandard_integer_type (hprec
, true);
2570 /* X & MODE_MASK can be simplified to (T)X. */
2571 stmt
= SSA_NAME_DEF_STMT (rhs
);
2572 if (is_gimple_assign (stmt
)
2573 && gimple_assign_rhs_code (stmt
) == BIT_AND_EXPR
2574 && TREE_CODE (gimple_assign_rhs2 (stmt
)) == INTEGER_CST
2575 && wide_int::from (wi::to_wide (gimple_assign_rhs2 (stmt
)),
2576 prec
, TYPE_SIGN (TREE_TYPE (rhs
)))
2577 == wi::mask (hprec
, false, prec
))
2578 *new_rhs_out
= gimple_assign_rhs1 (stmt
);
2583 else if (!TYPE_UNSIGNED (type
)
2584 && wi::bit_and (bits
, wi::mask (hprec
- 1, true, prec
)) == 0)
2586 *type_out
= build_nonstandard_integer_type (hprec
, false);
2592 stmt
= SSA_NAME_DEF_STMT (rhs
);
2593 if (is_gimple_assign (stmt
))
2596 if (widening_mult_conversion_strippable_p (type
, stmt
))
2598 rhs1
= gimple_assign_rhs1 (stmt
);
2600 if (TREE_CODE (rhs1
) == INTEGER_CST
)
2602 *new_rhs_out
= rhs1
;
2613 type1
= TREE_TYPE (rhs1
);
2615 if (TREE_CODE (type1
) != TREE_CODE (type
)
2616 || TYPE_PRECISION (type1
) * 2 > TYPE_PRECISION (type
))
2619 *new_rhs_out
= rhs1
;
2624 if (TREE_CODE (rhs
) == INTEGER_CST
)
2634 /* Return true if STMT performs a widening multiplication, assuming the
2635 output type is TYPE. If so, store the unwidened types of the operands
2636 in *TYPE1_OUT and *TYPE2_OUT respectively. Also fill *RHS1_OUT and
2637 *RHS2_OUT such that converting those operands to types *TYPE1_OUT
2638 and *TYPE2_OUT would give the operands of the multiplication. */
2641 is_widening_mult_p (gimple
*stmt
,
2642 tree
*type1_out
, tree
*rhs1_out
,
2643 tree
*type2_out
, tree
*rhs2_out
)
2645 tree type
= TREE_TYPE (gimple_assign_lhs (stmt
));
2647 if (TREE_CODE (type
) == INTEGER_TYPE
)
2649 if (TYPE_OVERFLOW_TRAPS (type
))
2652 else if (TREE_CODE (type
) != FIXED_POINT_TYPE
)
2655 if (!is_widening_mult_rhs_p (type
, gimple_assign_rhs1 (stmt
), type1_out
,
2659 if (!is_widening_mult_rhs_p (type
, gimple_assign_rhs2 (stmt
), type2_out
,
2663 if (*type1_out
== NULL
)
2665 if (*type2_out
== NULL
|| !int_fits_type_p (*rhs1_out
, *type2_out
))
2667 *type1_out
= *type2_out
;
2670 if (*type2_out
== NULL
)
2672 if (!int_fits_type_p (*rhs2_out
, *type1_out
))
2674 *type2_out
= *type1_out
;
2677 /* Ensure that the larger of the two operands comes first. */
2678 if (TYPE_PRECISION (*type1_out
) < TYPE_PRECISION (*type2_out
))
2680 std::swap (*type1_out
, *type2_out
);
2681 std::swap (*rhs1_out
, *rhs2_out
);
2687 /* Check to see if the CALL statement is an invocation of copysign
2688 with 1. being the first argument. */
2690 is_copysign_call_with_1 (gimple
*call
)
2692 gcall
*c
= dyn_cast
<gcall
*> (call
);
2696 enum combined_fn code
= gimple_call_combined_fn (c
);
2698 if (code
== CFN_LAST
)
2701 if (builtin_fn_p (code
))
2703 switch (as_builtin_fn (code
))
2705 CASE_FLT_FN (BUILT_IN_COPYSIGN
):
2706 CASE_FLT_FN_FLOATN_NX (BUILT_IN_COPYSIGN
):
2707 return real_onep (gimple_call_arg (c
, 0));
2713 if (internal_fn_p (code
))
2715 switch (as_internal_fn (code
))
2718 return real_onep (gimple_call_arg (c
, 0));
2727 /* Try to expand the pattern x * copysign (1, y) into xorsign (x, y).
2728 This only happens when the xorsign optab is defined, if the
2729 pattern is not a xorsign pattern or if expansion fails FALSE is
2730 returned, otherwise TRUE is returned. */
2732 convert_expand_mult_copysign (gimple
*stmt
, gimple_stmt_iterator
*gsi
)
2734 tree treeop0
, treeop1
, lhs
, type
;
2735 location_t loc
= gimple_location (stmt
);
2736 lhs
= gimple_assign_lhs (stmt
);
2737 treeop0
= gimple_assign_rhs1 (stmt
);
2738 treeop1
= gimple_assign_rhs2 (stmt
);
2739 type
= TREE_TYPE (lhs
);
2740 machine_mode mode
= TYPE_MODE (type
);
2742 if (HONOR_SNANS (type
))
2745 if (TREE_CODE (treeop0
) == SSA_NAME
&& TREE_CODE (treeop1
) == SSA_NAME
)
2747 gimple
*call0
= SSA_NAME_DEF_STMT (treeop0
);
2748 if (!has_single_use (treeop0
) || !is_copysign_call_with_1 (call0
))
2750 call0
= SSA_NAME_DEF_STMT (treeop1
);
2751 if (!has_single_use (treeop1
) || !is_copysign_call_with_1 (call0
))
2756 if (optab_handler (xorsign_optab
, mode
) == CODE_FOR_nothing
)
2759 gcall
*c
= as_a
<gcall
*> (call0
);
2760 treeop0
= gimple_call_arg (c
, 1);
2763 = gimple_build_call_internal (IFN_XORSIGN
, 2, treeop1
, treeop0
);
2764 gimple_set_lhs (call_stmt
, lhs
);
2765 gimple_set_location (call_stmt
, loc
);
2766 gsi_replace (gsi
, call_stmt
, true);
2773 /* Process a single gimple statement STMT, which has a MULT_EXPR as
2774 its rhs, and try to convert it into a WIDEN_MULT_EXPR. The return
2775 value is true iff we converted the statement. */
2778 convert_mult_to_widen (gimple
*stmt
, gimple_stmt_iterator
*gsi
)
2780 tree lhs
, rhs1
, rhs2
, type
, type1
, type2
;
2781 enum insn_code handler
;
2782 scalar_int_mode to_mode
, from_mode
, actual_mode
;
2784 int actual_precision
;
2785 location_t loc
= gimple_location (stmt
);
2786 bool from_unsigned1
, from_unsigned2
;
2788 lhs
= gimple_assign_lhs (stmt
);
2789 type
= TREE_TYPE (lhs
);
2790 if (TREE_CODE (type
) != INTEGER_TYPE
)
2793 if (!is_widening_mult_p (stmt
, &type1
, &rhs1
, &type2
, &rhs2
))
2796 /* if any one of rhs1 and rhs2 is subject to abnormal coalescing,
2797 avoid the tranform. */
2798 if ((TREE_CODE (rhs1
) == SSA_NAME
2799 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs1
))
2800 || (TREE_CODE (rhs2
) == SSA_NAME
2801 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs2
)))
2804 to_mode
= SCALAR_INT_TYPE_MODE (type
);
2805 from_mode
= SCALAR_INT_TYPE_MODE (type1
);
2806 if (to_mode
== from_mode
)
2809 from_unsigned1
= TYPE_UNSIGNED (type1
);
2810 from_unsigned2
= TYPE_UNSIGNED (type2
);
2812 if (from_unsigned1
&& from_unsigned2
)
2813 op
= umul_widen_optab
;
2814 else if (!from_unsigned1
&& !from_unsigned2
)
2815 op
= smul_widen_optab
;
2817 op
= usmul_widen_optab
;
2819 handler
= find_widening_optab_handler_and_mode (op
, to_mode
, from_mode
,
2822 if (handler
== CODE_FOR_nothing
)
2824 if (op
!= smul_widen_optab
)
2826 /* We can use a signed multiply with unsigned types as long as
2827 there is a wider mode to use, or it is the smaller of the two
2828 types that is unsigned. Note that type1 >= type2, always. */
2829 if ((TYPE_UNSIGNED (type1
)
2830 && TYPE_PRECISION (type1
) == GET_MODE_PRECISION (from_mode
))
2831 || (TYPE_UNSIGNED (type2
)
2832 && TYPE_PRECISION (type2
) == GET_MODE_PRECISION (from_mode
)))
2834 if (!GET_MODE_WIDER_MODE (from_mode
).exists (&from_mode
)
2835 || GET_MODE_SIZE (to_mode
) <= GET_MODE_SIZE (from_mode
))
2839 op
= smul_widen_optab
;
2840 handler
= find_widening_optab_handler_and_mode (op
, to_mode
,
2844 if (handler
== CODE_FOR_nothing
)
2847 from_unsigned1
= from_unsigned2
= false;
2851 /* Expand can synthesize smul_widen_optab if the target
2852 supports umul_widen_optab. */
2853 op
= umul_widen_optab
;
2854 handler
= find_widening_optab_handler_and_mode (op
, to_mode
,
2857 if (handler
== CODE_FOR_nothing
)
2862 /* Ensure that the inputs to the handler are in the correct precison
2863 for the opcode. This will be the full mode size. */
2864 actual_precision
= GET_MODE_PRECISION (actual_mode
);
2865 if (2 * actual_precision
> TYPE_PRECISION (type
))
2867 if (actual_precision
!= TYPE_PRECISION (type1
)
2868 || from_unsigned1
!= TYPE_UNSIGNED (type1
))
2869 type1
= build_nonstandard_integer_type (actual_precision
, from_unsigned1
);
2870 if (!useless_type_conversion_p (type1
, TREE_TYPE (rhs1
)))
2872 if (TREE_CODE (rhs1
) == INTEGER_CST
)
2873 rhs1
= fold_convert (type1
, rhs1
);
2875 rhs1
= build_and_insert_cast (gsi
, loc
, type1
, rhs1
);
2877 if (actual_precision
!= TYPE_PRECISION (type2
)
2878 || from_unsigned2
!= TYPE_UNSIGNED (type2
))
2879 type2
= build_nonstandard_integer_type (actual_precision
, from_unsigned2
);
2880 if (!useless_type_conversion_p (type2
, TREE_TYPE (rhs2
)))
2882 if (TREE_CODE (rhs2
) == INTEGER_CST
)
2883 rhs2
= fold_convert (type2
, rhs2
);
2885 rhs2
= build_and_insert_cast (gsi
, loc
, type2
, rhs2
);
2888 gimple_assign_set_rhs1 (stmt
, rhs1
);
2889 gimple_assign_set_rhs2 (stmt
, rhs2
);
2890 gimple_assign_set_rhs_code (stmt
, WIDEN_MULT_EXPR
);
2892 widen_mul_stats
.widen_mults_inserted
++;
2896 /* Process a single gimple statement STMT, which is found at the
2897 iterator GSI and has a either a PLUS_EXPR or a MINUS_EXPR as its
2898 rhs (given by CODE), and try to convert it into a
2899 WIDEN_MULT_PLUS_EXPR or a WIDEN_MULT_MINUS_EXPR. The return value
2900 is true iff we converted the statement. */
2903 convert_plusminus_to_widen (gimple_stmt_iterator
*gsi
, gimple
*stmt
,
2904 enum tree_code code
)
2906 gimple
*rhs1_stmt
= NULL
, *rhs2_stmt
= NULL
;
2907 gimple
*conv1_stmt
= NULL
, *conv2_stmt
= NULL
, *conv_stmt
;
2908 tree type
, type1
, type2
, optype
;
2909 tree lhs
, rhs1
, rhs2
, mult_rhs1
, mult_rhs2
, add_rhs
;
2910 enum tree_code rhs1_code
= ERROR_MARK
, rhs2_code
= ERROR_MARK
;
2912 enum tree_code wmult_code
;
2913 enum insn_code handler
;
2914 scalar_mode to_mode
, from_mode
, actual_mode
;
2915 location_t loc
= gimple_location (stmt
);
2916 int actual_precision
;
2917 bool from_unsigned1
, from_unsigned2
;
2919 lhs
= gimple_assign_lhs (stmt
);
2920 type
= TREE_TYPE (lhs
);
2921 if (TREE_CODE (type
) != INTEGER_TYPE
2922 && TREE_CODE (type
) != FIXED_POINT_TYPE
)
2925 if (code
== MINUS_EXPR
)
2926 wmult_code
= WIDEN_MULT_MINUS_EXPR
;
2928 wmult_code
= WIDEN_MULT_PLUS_EXPR
;
2930 rhs1
= gimple_assign_rhs1 (stmt
);
2931 rhs2
= gimple_assign_rhs2 (stmt
);
2933 if (TREE_CODE (rhs1
) == SSA_NAME
)
2935 rhs1_stmt
= SSA_NAME_DEF_STMT (rhs1
);
2936 if (is_gimple_assign (rhs1_stmt
))
2937 rhs1_code
= gimple_assign_rhs_code (rhs1_stmt
);
2940 if (TREE_CODE (rhs2
) == SSA_NAME
)
2942 rhs2_stmt
= SSA_NAME_DEF_STMT (rhs2
);
2943 if (is_gimple_assign (rhs2_stmt
))
2944 rhs2_code
= gimple_assign_rhs_code (rhs2_stmt
);
2947 /* Allow for one conversion statement between the multiply
2948 and addition/subtraction statement. If there are more than
2949 one conversions then we assume they would invalidate this
2950 transformation. If that's not the case then they should have
2951 been folded before now. */
2952 if (CONVERT_EXPR_CODE_P (rhs1_code
))
2954 conv1_stmt
= rhs1_stmt
;
2955 rhs1
= gimple_assign_rhs1 (rhs1_stmt
);
2956 if (TREE_CODE (rhs1
) == SSA_NAME
)
2958 rhs1_stmt
= SSA_NAME_DEF_STMT (rhs1
);
2959 if (is_gimple_assign (rhs1_stmt
))
2960 rhs1_code
= gimple_assign_rhs_code (rhs1_stmt
);
2965 if (CONVERT_EXPR_CODE_P (rhs2_code
))
2967 conv2_stmt
= rhs2_stmt
;
2968 rhs2
= gimple_assign_rhs1 (rhs2_stmt
);
2969 if (TREE_CODE (rhs2
) == SSA_NAME
)
2971 rhs2_stmt
= SSA_NAME_DEF_STMT (rhs2
);
2972 if (is_gimple_assign (rhs2_stmt
))
2973 rhs2_code
= gimple_assign_rhs_code (rhs2_stmt
);
2979 /* If code is WIDEN_MULT_EXPR then it would seem unnecessary to call
2980 is_widening_mult_p, but we still need the rhs returns.
2982 It might also appear that it would be sufficient to use the existing
2983 operands of the widening multiply, but that would limit the choice of
2984 multiply-and-accumulate instructions.
2986 If the widened-multiplication result has more than one uses, it is
2987 probably wiser not to do the conversion. Also restrict this operation
2988 to single basic block to avoid moving the multiply to a different block
2989 with a higher execution frequency. */
2990 if (code
== PLUS_EXPR
2991 && (rhs1_code
== MULT_EXPR
|| rhs1_code
== WIDEN_MULT_EXPR
))
2993 if (!has_single_use (rhs1
)
2994 || gimple_bb (rhs1_stmt
) != gimple_bb (stmt
)
2995 || !is_widening_mult_p (rhs1_stmt
, &type1
, &mult_rhs1
,
2996 &type2
, &mult_rhs2
))
2999 conv_stmt
= conv1_stmt
;
3001 else if (rhs2_code
== MULT_EXPR
|| rhs2_code
== WIDEN_MULT_EXPR
)
3003 if (!has_single_use (rhs2
)
3004 || gimple_bb (rhs2_stmt
) != gimple_bb (stmt
)
3005 || !is_widening_mult_p (rhs2_stmt
, &type1
, &mult_rhs1
,
3006 &type2
, &mult_rhs2
))
3009 conv_stmt
= conv2_stmt
;
3014 to_mode
= SCALAR_TYPE_MODE (type
);
3015 from_mode
= SCALAR_TYPE_MODE (type1
);
3016 if (to_mode
== from_mode
)
3019 from_unsigned1
= TYPE_UNSIGNED (type1
);
3020 from_unsigned2
= TYPE_UNSIGNED (type2
);
3023 /* There's no such thing as a mixed sign madd yet, so use a wider mode. */
3024 if (from_unsigned1
!= from_unsigned2
)
3026 if (!INTEGRAL_TYPE_P (type
))
3028 /* We can use a signed multiply with unsigned types as long as
3029 there is a wider mode to use, or it is the smaller of the two
3030 types that is unsigned. Note that type1 >= type2, always. */
3032 && TYPE_PRECISION (type1
) == GET_MODE_PRECISION (from_mode
))
3034 && TYPE_PRECISION (type2
) == GET_MODE_PRECISION (from_mode
)))
3036 if (!GET_MODE_WIDER_MODE (from_mode
).exists (&from_mode
)
3037 || GET_MODE_SIZE (from_mode
) >= GET_MODE_SIZE (to_mode
))
3041 from_unsigned1
= from_unsigned2
= false;
3042 optype
= build_nonstandard_integer_type (GET_MODE_PRECISION (from_mode
),
3046 /* If there was a conversion between the multiply and addition
3047 then we need to make sure it fits a multiply-and-accumulate.
3048 The should be a single mode change which does not change the
3052 /* We use the original, unmodified data types for this. */
3053 tree from_type
= TREE_TYPE (gimple_assign_rhs1 (conv_stmt
));
3054 tree to_type
= TREE_TYPE (gimple_assign_lhs (conv_stmt
));
3055 int data_size
= TYPE_PRECISION (type1
) + TYPE_PRECISION (type2
);
3056 bool is_unsigned
= TYPE_UNSIGNED (type1
) && TYPE_UNSIGNED (type2
);
3058 if (TYPE_PRECISION (from_type
) > TYPE_PRECISION (to_type
))
3060 /* Conversion is a truncate. */
3061 if (TYPE_PRECISION (to_type
) < data_size
)
3064 else if (TYPE_PRECISION (from_type
) < TYPE_PRECISION (to_type
))
3066 /* Conversion is an extend. Check it's the right sort. */
3067 if (TYPE_UNSIGNED (from_type
) != is_unsigned
3068 && !(is_unsigned
&& TYPE_PRECISION (from_type
) > data_size
))
3071 /* else convert is a no-op for our purposes. */
3074 /* Verify that the machine can perform a widening multiply
3075 accumulate in this mode/signedness combination, otherwise
3076 this transformation is likely to pessimize code. */
3077 this_optab
= optab_for_tree_code (wmult_code
, optype
, optab_default
);
3078 handler
= find_widening_optab_handler_and_mode (this_optab
, to_mode
,
3079 from_mode
, &actual_mode
);
3081 if (handler
== CODE_FOR_nothing
)
3084 /* Ensure that the inputs to the handler are in the correct precison
3085 for the opcode. This will be the full mode size. */
3086 actual_precision
= GET_MODE_PRECISION (actual_mode
);
3087 if (actual_precision
!= TYPE_PRECISION (type1
)
3088 || from_unsigned1
!= TYPE_UNSIGNED (type1
))
3089 type1
= build_nonstandard_integer_type (actual_precision
, from_unsigned1
);
3090 if (!useless_type_conversion_p (type1
, TREE_TYPE (mult_rhs1
)))
3092 if (TREE_CODE (mult_rhs1
) == INTEGER_CST
)
3093 mult_rhs1
= fold_convert (type1
, mult_rhs1
);
3095 mult_rhs1
= build_and_insert_cast (gsi
, loc
, type1
, mult_rhs1
);
3097 if (actual_precision
!= TYPE_PRECISION (type2
)
3098 || from_unsigned2
!= TYPE_UNSIGNED (type2
))
3099 type2
= build_nonstandard_integer_type (actual_precision
, from_unsigned2
);
3100 if (!useless_type_conversion_p (type2
, TREE_TYPE (mult_rhs2
)))
3102 if (TREE_CODE (mult_rhs2
) == INTEGER_CST
)
3103 mult_rhs2
= fold_convert (type2
, mult_rhs2
);
3105 mult_rhs2
= build_and_insert_cast (gsi
, loc
, type2
, mult_rhs2
);
3108 if (!useless_type_conversion_p (type
, TREE_TYPE (add_rhs
)))
3109 add_rhs
= build_and_insert_cast (gsi
, loc
, type
, add_rhs
);
3111 gimple_assign_set_rhs_with_ops (gsi
, wmult_code
, mult_rhs1
, mult_rhs2
,
3113 update_stmt (gsi_stmt (*gsi
));
3114 widen_mul_stats
.maccs_inserted
++;
3118 /* Given a result MUL_RESULT which is a result of a multiplication of OP1 and
3119 OP2 and which we know is used in statements that can be, together with the
3120 multiplication, converted to FMAs, perform the transformation. */
3123 convert_mult_to_fma_1 (tree mul_result
, tree op1
, tree op2
)
3125 tree type
= TREE_TYPE (mul_result
);
3127 imm_use_iterator imm_iter
;
3130 FOR_EACH_IMM_USE_STMT (use_stmt
, imm_iter
, mul_result
)
3132 gimple_stmt_iterator gsi
= gsi_for_stmt (use_stmt
);
3133 tree addop
, mulop1
= op1
, result
= mul_result
;
3134 bool negate_p
= false;
3135 gimple_seq seq
= NULL
;
3137 if (is_gimple_debug (use_stmt
))
3140 if (is_gimple_assign (use_stmt
)
3141 && gimple_assign_rhs_code (use_stmt
) == NEGATE_EXPR
)
3143 result
= gimple_assign_lhs (use_stmt
);
3144 use_operand_p use_p
;
3145 gimple
*neguse_stmt
;
3146 single_imm_use (gimple_assign_lhs (use_stmt
), &use_p
, &neguse_stmt
);
3147 gsi_remove (&gsi
, true);
3148 release_defs (use_stmt
);
3150 use_stmt
= neguse_stmt
;
3151 gsi
= gsi_for_stmt (use_stmt
);
3155 tree cond
, else_value
, ops
[3], len
, bias
;
3157 if (!can_interpret_as_conditional_op_p (use_stmt
, &cond
, &code
,
3161 addop
= ops
[0] == result
? ops
[1] : ops
[0];
3163 if (code
== MINUS_EXPR
)
3165 if (ops
[0] == result
)
3166 /* a * b - c -> a * b + (-c) */
3167 addop
= gimple_build (&seq
, NEGATE_EXPR
, type
, addop
);
3169 /* a - b * c -> (-b) * c + a */
3170 negate_p
= !negate_p
;
3174 mulop1
= gimple_build (&seq
, NEGATE_EXPR
, type
, mulop1
);
3177 gsi_insert_seq_before (&gsi
, seq
, GSI_SAME_STMT
);
3181 = gimple_build_call_internal (IFN_COND_LEN_FMA
, 7, cond
, mulop1
, op2
,
3182 addop
, else_value
, len
, bias
);
3184 fma_stmt
= gimple_build_call_internal (IFN_COND_FMA
, 5, cond
, mulop1
,
3185 op2
, addop
, else_value
);
3187 fma_stmt
= gimple_build_call_internal (IFN_FMA
, 3, mulop1
, op2
, addop
);
3188 gimple_set_lhs (fma_stmt
, gimple_get_lhs (use_stmt
));
3189 gimple_call_set_nothrow (fma_stmt
, !stmt_can_throw_internal (cfun
,
3191 gsi_replace (&gsi
, fma_stmt
, true);
3192 /* Follow all SSA edges so that we generate FMS, FNMA and FNMS
3193 regardless of where the negation occurs. */
3194 gimple
*orig_stmt
= gsi_stmt (gsi
);
3195 if (fold_stmt (&gsi
, follow_all_ssa_edges
))
3197 if (maybe_clean_or_replace_eh_stmt (orig_stmt
, gsi_stmt (gsi
)))
3199 update_stmt (gsi_stmt (gsi
));
3202 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3204 fprintf (dump_file
, "Generated FMA ");
3205 print_gimple_stmt (dump_file
, gsi_stmt (gsi
), 0, TDF_NONE
);
3206 fprintf (dump_file
, "\n");
3209 /* If the FMA result is negated in a single use, fold the negation
3211 orig_stmt
= gsi_stmt (gsi
);
3212 use_operand_p use_p
;
3214 if (is_gimple_call (orig_stmt
)
3215 && gimple_call_internal_p (orig_stmt
)
3216 && gimple_call_lhs (orig_stmt
)
3217 && TREE_CODE (gimple_call_lhs (orig_stmt
)) == SSA_NAME
3218 && single_imm_use (gimple_call_lhs (orig_stmt
), &use_p
, &neg_stmt
)
3219 && is_gimple_assign (neg_stmt
)
3220 && gimple_assign_rhs_code (neg_stmt
) == NEGATE_EXPR
3221 && !stmt_could_throw_p (cfun
, neg_stmt
))
3223 gsi
= gsi_for_stmt (neg_stmt
);
3224 if (fold_stmt (&gsi
, follow_all_ssa_edges
))
3226 if (maybe_clean_or_replace_eh_stmt (neg_stmt
, gsi_stmt (gsi
)))
3228 update_stmt (gsi_stmt (gsi
));
3229 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3231 fprintf (dump_file
, "Folded FMA negation ");
3232 print_gimple_stmt (dump_file
, gsi_stmt (gsi
), 0, TDF_NONE
);
3233 fprintf (dump_file
, "\n");
3238 widen_mul_stats
.fmas_inserted
++;
3242 /* Data necessary to perform the actual transformation from a multiplication
3243 and an addition to an FMA after decision is taken it should be done and to
3244 then delete the multiplication statement from the function IL. */
3246 struct fma_transformation_info
3254 /* Structure containing the current state of FMA deferring, i.e. whether we are
3255 deferring, whether to continue deferring, and all data necessary to come
3256 back and perform all deferred transformations. */
3258 class fma_deferring_state
3261 /* Class constructor. Pass true as PERFORM_DEFERRING in order to actually
3262 do any deferring. */
3264 fma_deferring_state (bool perform_deferring
)
3265 : m_candidates (), m_mul_result_set (), m_initial_phi (NULL
),
3266 m_last_result (NULL_TREE
), m_deferring_p (perform_deferring
) {}
3268 /* List of FMA candidates for which we the transformation has been determined
3269 possible but we at this point in BB analysis we do not consider them
3271 auto_vec
<fma_transformation_info
, 8> m_candidates
;
3273 /* Set of results of multiplication that are part of an already deferred FMA
3275 hash_set
<tree
> m_mul_result_set
;
3277 /* The PHI that supposedly feeds back result of a FMA to another over loop
3279 gphi
*m_initial_phi
;
3281 /* Result of the last produced FMA candidate or NULL if there has not been
3285 /* If true, deferring might still be profitable. If false, transform all
3286 candidates and no longer defer. */
3290 /* Transform all deferred FMA candidates and mark STATE as no longer
3294 cancel_fma_deferring (fma_deferring_state
*state
)
3296 if (!state
->m_deferring_p
)
3299 for (unsigned i
= 0; i
< state
->m_candidates
.length (); i
++)
3301 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3302 fprintf (dump_file
, "Generating deferred FMA\n");
3304 const fma_transformation_info
&fti
= state
->m_candidates
[i
];
3305 convert_mult_to_fma_1 (fti
.mul_result
, fti
.op1
, fti
.op2
);
3307 gimple_stmt_iterator gsi
= gsi_for_stmt (fti
.mul_stmt
);
3308 gsi_remove (&gsi
, true);
3309 release_defs (fti
.mul_stmt
);
3311 state
->m_deferring_p
= false;
3314 /* If OP is an SSA name defined by a PHI node, return the PHI statement.
3315 Otherwise return NULL. */
3318 result_of_phi (tree op
)
3320 if (TREE_CODE (op
) != SSA_NAME
)
3323 return dyn_cast
<gphi
*> (SSA_NAME_DEF_STMT (op
));
3326 /* After processing statements of a BB and recording STATE, return true if the
3327 initial phi is fed by the last FMA candidate result ore one such result from
3328 previously processed BBs marked in LAST_RESULT_SET. */
3331 last_fma_candidate_feeds_initial_phi (fma_deferring_state
*state
,
3332 hash_set
<tree
> *last_result_set
)
3336 FOR_EACH_PHI_ARG (use
, state
->m_initial_phi
, iter
, SSA_OP_USE
)
3338 tree t
= USE_FROM_PTR (use
);
3339 if (t
== state
->m_last_result
3340 || last_result_set
->contains (t
))
3347 /* Combine the multiplication at MUL_STMT with operands MULOP1 and MULOP2
3348 with uses in additions and subtractions to form fused multiply-add
3349 operations. Returns true if successful and MUL_STMT should be removed.
3350 If MUL_COND is nonnull, the multiplication in MUL_STMT is conditional
3351 on MUL_COND, otherwise it is unconditional.
3353 If STATE indicates that we are deferring FMA transformation, that means
3354 that we do not produce FMAs for basic blocks which look like:
3357 # accumulator_111 = PHI <0.0(5), accumulator_66(6)>
3359 accumulator_66 = _65 + accumulator_111;
3361 or its unrolled version, i.e. with several FMA candidates that feed result
3362 of one into the addend of another. Instead, we add them to a list in STATE
3363 and if we later discover an FMA candidate that is not part of such a chain,
3364 we go back and perform all deferred past candidates. */
3367 convert_mult_to_fma (gimple
*mul_stmt
, tree op1
, tree op2
,
3368 fma_deferring_state
*state
, tree mul_cond
= NULL_TREE
,
3369 tree mul_len
= NULL_TREE
, tree mul_bias
= NULL_TREE
)
3371 tree mul_result
= gimple_get_lhs (mul_stmt
);
3372 /* If there isn't a LHS then this can't be an FMA. There can be no LHS
3373 if the statement was left just for the side-effects. */
3376 tree type
= TREE_TYPE (mul_result
);
3377 gimple
*use_stmt
, *neguse_stmt
;
3378 use_operand_p use_p
;
3379 imm_use_iterator imm_iter
;
3381 if (FLOAT_TYPE_P (type
)
3382 && flag_fp_contract_mode
!= FP_CONTRACT_FAST
)
3385 /* We don't want to do bitfield reduction ops. */
3386 if (INTEGRAL_TYPE_P (type
)
3387 && (!type_has_mode_precision_p (type
) || TYPE_OVERFLOW_TRAPS (type
)))
3390 /* If the target doesn't support it, don't generate it. We assume that
3391 if fma isn't available then fms, fnma or fnms are not either. */
3392 optimization_type opt_type
= bb_optimization_type (gimple_bb (mul_stmt
));
3393 if (!direct_internal_fn_supported_p (IFN_FMA
, type
, opt_type
))
3396 /* If the multiplication has zero uses, it is kept around probably because
3397 of -fnon-call-exceptions. Don't optimize it away in that case,
3399 if (has_zero_uses (mul_result
))
3403 = (state
->m_deferring_p
3404 && maybe_le (tree_to_poly_int64 (TYPE_SIZE (type
)),
3405 param_avoid_fma_max_bits
));
3406 bool defer
= check_defer
;
3407 bool seen_negate_p
= false;
3409 /* There is no numerical difference between fused and unfused integer FMAs,
3410 and the assumption below that FMA is as cheap as addition is unlikely
3411 to be true, especially if the multiplication occurs multiple times on
3412 the same chain. E.g., for something like:
3414 (((a * b) + c) >> 1) + (a * b)
3416 we do not want to duplicate the a * b into two additions, not least
3417 because the result is not a natural FMA chain. */
3418 if (ANY_INTEGRAL_TYPE_P (type
)
3419 && !has_single_use (mul_result
))
3422 if (!dbg_cnt (form_fma
))
3425 /* Make sure that the multiplication statement becomes dead after
3426 the transformation, thus that all uses are transformed to FMAs.
3427 This means we assume that an FMA operation has the same cost
3429 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, mul_result
)
3431 tree result
= mul_result
;
3432 bool negate_p
= false;
3434 use_stmt
= USE_STMT (use_p
);
3436 if (is_gimple_debug (use_stmt
))
3439 /* For now restrict this operations to single basic blocks. In theory
3440 we would want to support sinking the multiplication in
3446 to form a fma in the then block and sink the multiplication to the
3448 if (gimple_bb (use_stmt
) != gimple_bb (mul_stmt
))
3451 /* A negate on the multiplication leads to FNMA. */
3452 if (is_gimple_assign (use_stmt
)
3453 && gimple_assign_rhs_code (use_stmt
) == NEGATE_EXPR
)
3458 /* If (due to earlier missed optimizations) we have two
3459 negates of the same value, treat them as equivalent
3460 to a single negate with multiple uses. */
3464 result
= gimple_assign_lhs (use_stmt
);
3466 /* Make sure the negate statement becomes dead with this
3467 single transformation. */
3468 if (!single_imm_use (gimple_assign_lhs (use_stmt
),
3469 &use_p
, &neguse_stmt
))
3472 /* Make sure the multiplication isn't also used on that stmt. */
3473 FOR_EACH_PHI_OR_STMT_USE (usep
, neguse_stmt
, iter
, SSA_OP_USE
)
3474 if (USE_FROM_PTR (usep
) == mul_result
)
3478 use_stmt
= neguse_stmt
;
3479 if (gimple_bb (use_stmt
) != gimple_bb (mul_stmt
))
3482 negate_p
= seen_negate_p
= true;
3485 tree cond
, else_value
, ops
[3], len
, bias
;
3487 if (!can_interpret_as_conditional_op_p (use_stmt
, &cond
, &code
, ops
,
3488 &else_value
, &len
, &bias
))
3494 if (ops
[1] == result
)
3495 negate_p
= !negate_p
;
3500 /* FMA can only be formed from PLUS and MINUS. */
3506 /* For COND_LEN_* operations, we may have dummpy mask which is
3507 the all true mask. Such TREE type may be mul_cond != cond
3508 but we still consider they are equal. */
3509 if (mul_cond
&& cond
!= mul_cond
3510 && !(integer_truep (mul_cond
) && integer_truep (cond
)))
3513 if (else_value
== result
)
3516 if (!direct_internal_fn_supported_p (IFN_COND_LEN_FMA
, type
,
3522 poly_int64 mul_value
, value
;
3523 if (poly_int_tree_p (mul_len
, &mul_value
)
3524 && poly_int_tree_p (len
, &value
)
3525 && maybe_ne (mul_value
, value
))
3527 else if (mul_len
!= len
)
3530 if (wi::to_widest (mul_bias
) != wi::to_widest (bias
))
3536 if (mul_cond
&& cond
!= mul_cond
)
3541 if (cond
== result
|| else_value
== result
)
3543 if (!direct_internal_fn_supported_p (IFN_COND_FMA
, type
,
3549 /* If the subtrahend (OPS[1]) is computed by a MULT_EXPR that
3550 we'll visit later, we might be able to get a more profitable
3552 OTOH, if we don't, a negate / fma pair has likely lower latency
3553 that a mult / subtract pair. */
3554 if (code
== MINUS_EXPR
3557 && !direct_internal_fn_supported_p (IFN_FMS
, type
, opt_type
)
3558 && direct_internal_fn_supported_p (IFN_FNMA
, type
, opt_type
)
3559 && TREE_CODE (ops
[1]) == SSA_NAME
3560 && has_single_use (ops
[1]))
3562 gimple
*stmt2
= SSA_NAME_DEF_STMT (ops
[1]);
3563 if (is_gimple_assign (stmt2
)
3564 && gimple_assign_rhs_code (stmt2
) == MULT_EXPR
)
3568 /* We can't handle a * b + a * b. */
3569 if (ops
[0] == ops
[1])
3571 /* If deferring, make sure we are not looking at an instruction that
3572 wouldn't have existed if we were not. */
3573 if (state
->m_deferring_p
3574 && (state
->m_mul_result_set
.contains (ops
[0])
3575 || state
->m_mul_result_set
.contains (ops
[1])))
3580 tree use_lhs
= gimple_get_lhs (use_stmt
);
3581 if (state
->m_last_result
)
3583 if (ops
[1] == state
->m_last_result
3584 || ops
[0] == state
->m_last_result
)
3591 gcc_checking_assert (!state
->m_initial_phi
);
3593 if (ops
[0] == result
)
3594 phi
= result_of_phi (ops
[1]);
3597 gcc_assert (ops
[1] == result
);
3598 phi
= result_of_phi (ops
[0]);
3603 state
->m_initial_phi
= phi
;
3610 state
->m_last_result
= use_lhs
;
3611 check_defer
= false;
3616 /* While it is possible to validate whether or not the exact form that
3617 we've recognized is available in the backend, the assumption is that
3618 if the deferring logic above did not trigger, the transformation is
3619 never a loss. For instance, suppose the target only has the plain FMA
3620 pattern available. Consider a*b-c -> fma(a,b,-c): we've exchanged
3621 MUL+SUB for FMA+NEG, which is still two operations. Consider
3622 -(a*b)-c -> fma(-a,b,-c): we still have 3 operations, but in the FMA
3623 form the two NEGs are independent and could be run in parallel. */
3628 fma_transformation_info fti
;
3629 fti
.mul_stmt
= mul_stmt
;
3630 fti
.mul_result
= mul_result
;
3633 state
->m_candidates
.safe_push (fti
);
3634 state
->m_mul_result_set
.add (mul_result
);
3636 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3638 fprintf (dump_file
, "Deferred generating FMA for multiplication ");
3639 print_gimple_stmt (dump_file
, mul_stmt
, 0, TDF_NONE
);
3640 fprintf (dump_file
, "\n");
3647 if (state
->m_deferring_p
)
3648 cancel_fma_deferring (state
);
3649 convert_mult_to_fma_1 (mul_result
, op1
, op2
);
3655 /* Helper function of match_arith_overflow. For MUL_OVERFLOW, if we have
3656 a check for non-zero like:
3657 _1 = x_4(D) * y_5(D);
3660 goto <bb 3>; [50.00%]
3662 goto <bb 4>; [50.00%]
3664 <bb 3> [local count: 536870913]:
3669 <bb 4> [local count: 1073741824]:
3670 # iftmp.0_3 = PHI <_10(3), 0(2)>
3671 then in addition to using .MUL_OVERFLOW (x_4(D), y_5(D)) we can also
3672 optimize the x_4(D) != 0 condition to 1. */
3675 maybe_optimize_guarding_check (vec
<gimple
*> &mul_stmts
, gimple
*cond_stmt
,
3676 gimple
*div_stmt
, bool *cfg_changed
)
3678 basic_block bb
= gimple_bb (cond_stmt
);
3679 if (gimple_bb (div_stmt
) != bb
|| !single_pred_p (bb
))
3681 edge pred_edge
= single_pred_edge (bb
);
3682 basic_block pred_bb
= pred_edge
->src
;
3683 if (EDGE_COUNT (pred_bb
->succs
) != 2)
3685 edge other_edge
= EDGE_SUCC (pred_bb
, EDGE_SUCC (pred_bb
, 0) == pred_edge
);
3686 edge other_succ_edge
= NULL
;
3687 if (gimple_code (cond_stmt
) == GIMPLE_COND
)
3689 if (EDGE_COUNT (bb
->succs
) != 2)
3691 other_succ_edge
= EDGE_SUCC (bb
, 0);
3692 if (gimple_cond_code (cond_stmt
) == NE_EXPR
)
3694 if (other_succ_edge
->flags
& EDGE_TRUE_VALUE
)
3695 other_succ_edge
= EDGE_SUCC (bb
, 1);
3697 else if (other_succ_edge
->flags
& EDGE_FALSE_VALUE
)
3698 other_succ_edge
= EDGE_SUCC (bb
, 0);
3699 if (other_edge
->dest
!= other_succ_edge
->dest
)
3702 else if (!single_succ_p (bb
) || other_edge
->dest
!= single_succ (bb
))
3704 gcond
*zero_cond
= safe_dyn_cast
<gcond
*> (*gsi_last_bb (pred_bb
));
3705 if (zero_cond
== NULL
3706 || (gimple_cond_code (zero_cond
)
3707 != ((pred_edge
->flags
& EDGE_TRUE_VALUE
) ? NE_EXPR
: EQ_EXPR
))
3708 || !integer_zerop (gimple_cond_rhs (zero_cond
)))
3710 tree zero_cond_lhs
= gimple_cond_lhs (zero_cond
);
3711 if (TREE_CODE (zero_cond_lhs
) != SSA_NAME
)
3713 if (gimple_assign_rhs2 (div_stmt
) != zero_cond_lhs
)
3715 /* Allow the divisor to be result of a same precision cast
3716 from zero_cond_lhs. */
3717 tree rhs2
= gimple_assign_rhs2 (div_stmt
);
3718 if (TREE_CODE (rhs2
) != SSA_NAME
)
3720 gimple
*g
= SSA_NAME_DEF_STMT (rhs2
);
3721 if (!gimple_assign_cast_p (g
)
3722 || gimple_assign_rhs1 (g
) != gimple_cond_lhs (zero_cond
)
3723 || !INTEGRAL_TYPE_P (TREE_TYPE (zero_cond_lhs
))
3724 || (TYPE_PRECISION (TREE_TYPE (zero_cond_lhs
))
3725 != TYPE_PRECISION (TREE_TYPE (rhs2
))))
3728 gimple_stmt_iterator gsi
= gsi_after_labels (bb
);
3729 mul_stmts
.quick_push (div_stmt
);
3730 if (is_gimple_debug (gsi_stmt (gsi
)))
3731 gsi_next_nondebug (&gsi
);
3732 unsigned cast_count
= 0;
3733 while (gsi_stmt (gsi
) != cond_stmt
)
3735 /* If original mul_stmt has a single use, allow it in the same bb,
3736 we are looking then just at __builtin_mul_overflow_p.
3737 Though, in that case the original mul_stmt will be replaced
3738 by .MUL_OVERFLOW, REALPART_EXPR and IMAGPART_EXPR stmts. */
3742 FOR_EACH_VEC_ELT (mul_stmts
, i
, mul_stmt
)
3744 if (gsi_stmt (gsi
) == mul_stmt
)
3750 if (!ok
&& gimple_assign_cast_p (gsi_stmt (gsi
)) && ++cast_count
< 4)
3754 gsi_next_nondebug (&gsi
);
3756 if (gimple_code (cond_stmt
) == GIMPLE_COND
)
3758 basic_block succ_bb
= other_edge
->dest
;
3759 for (gphi_iterator gpi
= gsi_start_phis (succ_bb
); !gsi_end_p (gpi
);
3762 gphi
*phi
= gpi
.phi ();
3763 tree v1
= gimple_phi_arg_def (phi
, other_edge
->dest_idx
);
3764 tree v2
= gimple_phi_arg_def (phi
, other_succ_edge
->dest_idx
);
3765 if (!operand_equal_p (v1
, v2
, 0))
3771 tree lhs
= gimple_assign_lhs (cond_stmt
);
3772 if (!lhs
|| !INTEGRAL_TYPE_P (TREE_TYPE (lhs
)))
3774 gsi_next_nondebug (&gsi
);
3775 if (!gsi_end_p (gsi
))
3777 if (gimple_assign_rhs_code (cond_stmt
) == COND_EXPR
)
3779 gimple
*cast_stmt
= gsi_stmt (gsi
);
3780 if (!gimple_assign_cast_p (cast_stmt
))
3782 tree new_lhs
= gimple_assign_lhs (cast_stmt
);
3783 gsi_next_nondebug (&gsi
);
3784 if (!gsi_end_p (gsi
)
3786 || !INTEGRAL_TYPE_P (TREE_TYPE (new_lhs
))
3787 || TYPE_PRECISION (TREE_TYPE (new_lhs
)) <= 1)
3791 edge succ_edge
= single_succ_edge (bb
);
3792 basic_block succ_bb
= succ_edge
->dest
;
3793 gsi
= gsi_start_phis (succ_bb
);
3794 if (gsi_end_p (gsi
))
3796 gphi
*phi
= as_a
<gphi
*> (gsi_stmt (gsi
));
3798 if (!gsi_end_p (gsi
))
3800 if (gimple_phi_arg_def (phi
, succ_edge
->dest_idx
) != lhs
)
3802 tree other_val
= gimple_phi_arg_def (phi
, other_edge
->dest_idx
);
3803 if (gimple_assign_rhs_code (cond_stmt
) == COND_EXPR
)
3805 tree cond
= gimple_assign_rhs1 (cond_stmt
);
3806 if (TREE_CODE (cond
) == NE_EXPR
)
3808 if (!operand_equal_p (other_val
,
3809 gimple_assign_rhs3 (cond_stmt
), 0))
3812 else if (!operand_equal_p (other_val
,
3813 gimple_assign_rhs2 (cond_stmt
), 0))
3816 else if (gimple_assign_rhs_code (cond_stmt
) == NE_EXPR
)
3818 if (!integer_zerop (other_val
))
3821 else if (!integer_onep (other_val
))
3824 if (pred_edge
->flags
& EDGE_TRUE_VALUE
)
3825 gimple_cond_make_true (zero_cond
);
3827 gimple_cond_make_false (zero_cond
);
3828 update_stmt (zero_cond
);
3829 *cfg_changed
= true;
3832 /* Helper function for arith_overflow_check_p. Return true
3833 if VAL1 is equal to VAL2 cast to corresponding integral type
3834 with other signedness or vice versa. */
3837 arith_cast_equal_p (tree val1
, tree val2
)
3839 if (TREE_CODE (val1
) == INTEGER_CST
&& TREE_CODE (val2
) == INTEGER_CST
)
3840 return wi::eq_p (wi::to_wide (val1
), wi::to_wide (val2
));
3841 else if (TREE_CODE (val1
) != SSA_NAME
|| TREE_CODE (val2
) != SSA_NAME
)
3843 if (gimple_assign_cast_p (SSA_NAME_DEF_STMT (val1
))
3844 && gimple_assign_rhs1 (SSA_NAME_DEF_STMT (val1
)) == val2
)
3846 if (gimple_assign_cast_p (SSA_NAME_DEF_STMT (val2
))
3847 && gimple_assign_rhs1 (SSA_NAME_DEF_STMT (val2
)) == val1
)
3852 /* Helper function of match_arith_overflow. Return 1
3853 if USE_STMT is unsigned overflow check ovf != 0 for
3854 STMT, -1 if USE_STMT is unsigned overflow check ovf == 0
3858 arith_overflow_check_p (gimple
*stmt
, gimple
*cast_stmt
, gimple
*&use_stmt
,
3859 tree maxval
, tree
*other
)
3861 enum tree_code ccode
= ERROR_MARK
;
3862 tree crhs1
= NULL_TREE
, crhs2
= NULL_TREE
;
3863 enum tree_code code
= gimple_assign_rhs_code (stmt
);
3864 tree lhs
= gimple_assign_lhs (cast_stmt
? cast_stmt
: stmt
);
3865 tree rhs1
= gimple_assign_rhs1 (stmt
);
3866 tree rhs2
= gimple_assign_rhs2 (stmt
);
3867 tree multop
= NULL_TREE
, divlhs
= NULL_TREE
;
3868 gimple
*cur_use_stmt
= use_stmt
;
3870 if (code
== MULT_EXPR
)
3872 if (!is_gimple_assign (use_stmt
))
3874 if (gimple_assign_rhs_code (use_stmt
) != TRUNC_DIV_EXPR
)
3876 if (gimple_assign_rhs1 (use_stmt
) != lhs
)
3880 if (arith_cast_equal_p (gimple_assign_rhs2 (use_stmt
), rhs1
))
3882 else if (arith_cast_equal_p (gimple_assign_rhs2 (use_stmt
), rhs2
))
3887 else if (gimple_assign_rhs2 (use_stmt
) == rhs1
)
3889 else if (operand_equal_p (gimple_assign_rhs2 (use_stmt
), rhs2
, 0))
3893 if (stmt_ends_bb_p (use_stmt
))
3895 divlhs
= gimple_assign_lhs (use_stmt
);
3899 if (!single_imm_use (divlhs
, &use
, &cur_use_stmt
))
3901 if (cast_stmt
&& gimple_assign_cast_p (cur_use_stmt
))
3903 tree cast_lhs
= gimple_assign_lhs (cur_use_stmt
);
3904 if (INTEGRAL_TYPE_P (TREE_TYPE (cast_lhs
))
3905 && TYPE_UNSIGNED (TREE_TYPE (cast_lhs
))
3906 && (TYPE_PRECISION (TREE_TYPE (cast_lhs
))
3907 == TYPE_PRECISION (TREE_TYPE (divlhs
)))
3908 && single_imm_use (cast_lhs
, &use
, &cur_use_stmt
))
3917 if (gimple_code (cur_use_stmt
) == GIMPLE_COND
)
3919 ccode
= gimple_cond_code (cur_use_stmt
);
3920 crhs1
= gimple_cond_lhs (cur_use_stmt
);
3921 crhs2
= gimple_cond_rhs (cur_use_stmt
);
3923 else if (is_gimple_assign (cur_use_stmt
))
3925 if (gimple_assign_rhs_class (cur_use_stmt
) == GIMPLE_BINARY_RHS
)
3927 ccode
= gimple_assign_rhs_code (cur_use_stmt
);
3928 crhs1
= gimple_assign_rhs1 (cur_use_stmt
);
3929 crhs2
= gimple_assign_rhs2 (cur_use_stmt
);
3931 else if (gimple_assign_rhs_code (cur_use_stmt
) == COND_EXPR
)
3933 tree cond
= gimple_assign_rhs1 (cur_use_stmt
);
3934 if (COMPARISON_CLASS_P (cond
))
3936 ccode
= TREE_CODE (cond
);
3937 crhs1
= TREE_OPERAND (cond
, 0);
3938 crhs2
= TREE_OPERAND (cond
, 1);
3949 if (TREE_CODE_CLASS (ccode
) != tcc_comparison
)
3958 /* r = a + b; r > maxval or r <= maxval */
3960 && TREE_CODE (crhs2
) == INTEGER_CST
3961 && tree_int_cst_equal (crhs2
, maxval
))
3962 return ccode
== GT_EXPR
? 1 : -1;
3965 /* r = a - b; r > a or r <= a
3966 r = a + b; a > r or a <= r or b > r or b <= r. */
3967 if ((code
== MINUS_EXPR
&& crhs1
== lhs
&& crhs2
== rhs1
)
3968 || (code
== PLUS_EXPR
&& (crhs1
== rhs1
|| crhs1
== rhs2
)
3970 return ccode
== GT_EXPR
? 1 : -1;
3971 /* r = ~a; b > r or b <= r. */
3972 if (code
== BIT_NOT_EXPR
&& crhs2
== lhs
)
3976 return ccode
== GT_EXPR
? 1 : -1;
3983 /* r = a - b; a < r or a >= r
3984 r = a + b; r < a or r >= a or r < b or r >= b. */
3985 if ((code
== MINUS_EXPR
&& crhs1
== rhs1
&& crhs2
== lhs
)
3986 || (code
== PLUS_EXPR
&& crhs1
== lhs
3987 && (crhs2
== rhs1
|| crhs2
== rhs2
)))
3988 return ccode
== LT_EXPR
? 1 : -1;
3989 /* r = ~a; r < b or r >= b. */
3990 if (code
== BIT_NOT_EXPR
&& crhs1
== lhs
)
3994 return ccode
== LT_EXPR
? 1 : -1;
3999 /* r = a * b; _1 = r / a; _1 == b
4000 r = a * b; _1 = r / b; _1 == a
4001 r = a * b; _1 = r / a; _1 != b
4002 r = a * b; _1 = r / b; _1 != a. */
4003 if (code
== MULT_EXPR
)
4007 if ((crhs1
== divlhs
&& arith_cast_equal_p (crhs2
, multop
))
4008 || (crhs2
== divlhs
&& arith_cast_equal_p (crhs1
, multop
)))
4010 use_stmt
= cur_use_stmt
;
4011 return ccode
== NE_EXPR
? 1 : -1;
4014 else if ((crhs1
== divlhs
&& operand_equal_p (crhs2
, multop
, 0))
4015 || (crhs2
== divlhs
&& crhs1
== multop
))
4017 use_stmt
= cur_use_stmt
;
4018 return ccode
== NE_EXPR
? 1 : -1;
4028 /* Recognize for unsigned x
4031 where there are other uses of x and replace it with
4032 _7 = .SUB_OVERFLOW (y, z);
4033 x = REALPART_EXPR <_7>;
4034 _8 = IMAGPART_EXPR <_7>;
4036 and similarly for addition.
4043 where y and z have unsigned types with maximum max
4044 and there are other uses of x and all of those cast x
4045 back to that unsigned type and again replace it with
4046 _7 = .ADD_OVERFLOW (y, z);
4047 _9 = REALPART_EXPR <_7>;
4048 _8 = IMAGPART_EXPR <_7>;
4050 and replace (utype) x with _9.
4056 _7 = .ADD_OVERFLOW (y, z);
4057 _8 = IMAGPART_EXPR <_7>;
4063 goto <bb 3>; [50.00%]
4065 goto <bb 4>; [50.00%]
4067 <bb 3> [local count: 536870913]:
4072 <bb 4> [local count: 1073741824]:
4073 # iftmp.0_3 = PHI <_10(3), 0(2)>
4075 _7 = .MUL_OVERFLOW (x, y);
4076 z = IMAGPART_EXPR <_7>;
4077 _8 = IMAGPART_EXPR <_7>;
4079 iftmp.0_3 = (int) _9; */
4082 match_arith_overflow (gimple_stmt_iterator
*gsi
, gimple
*stmt
,
4083 enum tree_code code
, bool *cfg_changed
)
4085 tree lhs
= gimple_assign_lhs (stmt
);
4086 tree type
= TREE_TYPE (lhs
);
4087 use_operand_p use_p
;
4088 imm_use_iterator iter
;
4089 bool use_seen
= false;
4090 bool ovf_use_seen
= false;
4092 gimple
*add_stmt
= NULL
;
4093 bool add_first
= false;
4094 gimple
*cond_stmt
= NULL
;
4095 gimple
*cast_stmt
= NULL
;
4096 tree cast_lhs
= NULL_TREE
;
4098 gcc_checking_assert (code
== PLUS_EXPR
4099 || code
== MINUS_EXPR
4100 || code
== MULT_EXPR
4101 || code
== BIT_NOT_EXPR
);
4102 if (!INTEGRAL_TYPE_P (type
)
4103 || !TYPE_UNSIGNED (type
)
4104 || has_zero_uses (lhs
)
4105 || (code
!= PLUS_EXPR
4106 && code
!= MULT_EXPR
4107 && optab_handler (code
== MINUS_EXPR
? usubv4_optab
: uaddv4_optab
,
4108 TYPE_MODE (type
)) == CODE_FOR_nothing
))
4111 tree rhs1
= gimple_assign_rhs1 (stmt
);
4112 tree rhs2
= gimple_assign_rhs2 (stmt
);
4113 FOR_EACH_IMM_USE_FAST (use_p
, iter
, lhs
)
4115 use_stmt
= USE_STMT (use_p
);
4116 if (is_gimple_debug (use_stmt
))
4119 tree other
= NULL_TREE
;
4120 if (arith_overflow_check_p (stmt
, NULL
, use_stmt
, NULL_TREE
, &other
))
4122 if (code
== BIT_NOT_EXPR
)
4125 if (TREE_CODE (other
) != SSA_NAME
)
4131 cond_stmt
= use_stmt
;
4133 ovf_use_seen
= true;
4138 if (code
== MULT_EXPR
4139 && cast_stmt
== NULL
4140 && gimple_assign_cast_p (use_stmt
))
4142 cast_lhs
= gimple_assign_lhs (use_stmt
);
4143 if (INTEGRAL_TYPE_P (TREE_TYPE (cast_lhs
))
4144 && !TYPE_UNSIGNED (TREE_TYPE (cast_lhs
))
4145 && (TYPE_PRECISION (TREE_TYPE (cast_lhs
))
4146 == TYPE_PRECISION (TREE_TYPE (lhs
))))
4147 cast_stmt
= use_stmt
;
4149 cast_lhs
= NULL_TREE
;
4152 if (ovf_use_seen
&& use_seen
)
4157 && code
== MULT_EXPR
4160 if (TREE_CODE (rhs1
) != SSA_NAME
4161 || (TREE_CODE (rhs2
) != SSA_NAME
&& TREE_CODE (rhs2
) != INTEGER_CST
))
4163 FOR_EACH_IMM_USE_FAST (use_p
, iter
, cast_lhs
)
4165 use_stmt
= USE_STMT (use_p
);
4166 if (is_gimple_debug (use_stmt
))
4169 if (arith_overflow_check_p (stmt
, cast_stmt
, use_stmt
,
4171 ovf_use_seen
= true;
4177 cast_lhs
= NULL_TREE
;
4180 tree maxval
= NULL_TREE
;
4182 || (code
!= MULT_EXPR
&& (code
== BIT_NOT_EXPR
? use_seen
: !use_seen
))
4183 || (code
== PLUS_EXPR
4184 && optab_handler (uaddv4_optab
,
4185 TYPE_MODE (type
)) == CODE_FOR_nothing
)
4186 || (code
== MULT_EXPR
4187 && optab_handler (cast_stmt
? mulv4_optab
: umulv4_optab
,
4188 TYPE_MODE (type
)) == CODE_FOR_nothing
4191 || !can_mult_highpart_p (TYPE_MODE (type
), true))))
4193 if (code
!= PLUS_EXPR
)
4195 if (TREE_CODE (rhs1
) != SSA_NAME
4196 || !gimple_assign_cast_p (SSA_NAME_DEF_STMT (rhs1
)))
4198 rhs1
= gimple_assign_rhs1 (SSA_NAME_DEF_STMT (rhs1
));
4199 tree type1
= TREE_TYPE (rhs1
);
4200 if (!INTEGRAL_TYPE_P (type1
)
4201 || !TYPE_UNSIGNED (type1
)
4202 || TYPE_PRECISION (type1
) >= TYPE_PRECISION (type
)
4203 || (TYPE_PRECISION (type1
)
4204 != GET_MODE_BITSIZE (SCALAR_INT_TYPE_MODE (type1
))))
4206 if (TREE_CODE (rhs2
) == INTEGER_CST
)
4208 if (wi::ne_p (wi::rshift (wi::to_wide (rhs2
),
4209 TYPE_PRECISION (type1
),
4212 rhs2
= fold_convert (type1
, rhs2
);
4216 if (TREE_CODE (rhs2
) != SSA_NAME
4217 || !gimple_assign_cast_p (SSA_NAME_DEF_STMT (rhs2
)))
4219 rhs2
= gimple_assign_rhs1 (SSA_NAME_DEF_STMT (rhs2
));
4220 tree type2
= TREE_TYPE (rhs2
);
4221 if (!INTEGRAL_TYPE_P (type2
)
4222 || !TYPE_UNSIGNED (type2
)
4223 || TYPE_PRECISION (type2
) >= TYPE_PRECISION (type
)
4224 || (TYPE_PRECISION (type2
)
4225 != GET_MODE_BITSIZE (SCALAR_INT_TYPE_MODE (type2
))))
4228 if (TYPE_PRECISION (type1
) >= TYPE_PRECISION (TREE_TYPE (rhs2
)))
4231 type
= TREE_TYPE (rhs2
);
4233 if (TREE_CODE (type
) != INTEGER_TYPE
4234 || optab_handler (uaddv4_optab
,
4235 TYPE_MODE (type
)) == CODE_FOR_nothing
)
4238 maxval
= wide_int_to_tree (type
, wi::max_value (TYPE_PRECISION (type
),
4240 ovf_use_seen
= false;
4242 basic_block use_bb
= NULL
;
4243 FOR_EACH_IMM_USE_FAST (use_p
, iter
, lhs
)
4245 use_stmt
= USE_STMT (use_p
);
4246 if (is_gimple_debug (use_stmt
))
4249 if (arith_overflow_check_p (stmt
, NULL
, use_stmt
, maxval
, NULL
))
4251 ovf_use_seen
= true;
4252 use_bb
= gimple_bb (use_stmt
);
4256 if (!gimple_assign_cast_p (use_stmt
)
4257 || gimple_assign_rhs_code (use_stmt
) == VIEW_CONVERT_EXPR
)
4259 tree use_lhs
= gimple_assign_lhs (use_stmt
);
4260 if (!INTEGRAL_TYPE_P (TREE_TYPE (use_lhs
))
4261 || (TYPE_PRECISION (TREE_TYPE (use_lhs
))
4262 > TYPE_PRECISION (type
)))
4269 if (!useless_type_conversion_p (type
, TREE_TYPE (rhs1
)))
4273 tree new_rhs1
= make_ssa_name (type
);
4274 gimple
*g
= gimple_build_assign (new_rhs1
, NOP_EXPR
, rhs1
);
4275 gsi_insert_before (gsi
, g
, GSI_SAME_STMT
);
4278 else if (!useless_type_conversion_p (type
, TREE_TYPE (rhs2
)))
4282 tree new_rhs2
= make_ssa_name (type
);
4283 gimple
*g
= gimple_build_assign (new_rhs2
, NOP_EXPR
, rhs2
);
4284 gsi_insert_before (gsi
, g
, GSI_SAME_STMT
);
4289 /* If there are no uses of the wider addition, check if
4290 forwprop has not created a narrower addition.
4291 Require it to be in the same bb as the overflow check. */
4292 FOR_EACH_IMM_USE_FAST (use_p
, iter
, rhs1
)
4294 use_stmt
= USE_STMT (use_p
);
4295 if (is_gimple_debug (use_stmt
))
4298 if (use_stmt
== stmt
)
4301 if (!is_gimple_assign (use_stmt
)
4302 || gimple_bb (use_stmt
) != use_bb
4303 || gimple_assign_rhs_code (use_stmt
) != PLUS_EXPR
)
4306 if (gimple_assign_rhs1 (use_stmt
) == rhs1
)
4308 if (!operand_equal_p (gimple_assign_rhs2 (use_stmt
),
4312 else if (gimple_assign_rhs2 (use_stmt
) == rhs1
)
4314 if (gimple_assign_rhs1 (use_stmt
) != rhs2
)
4320 add_stmt
= use_stmt
;
4323 if (add_stmt
== NULL
)
4326 /* If stmt and add_stmt are in the same bb, we need to find out
4327 which one is earlier. If they are in different bbs, we've
4328 checked add_stmt is in the same bb as one of the uses of the
4329 stmt lhs, so stmt needs to dominate add_stmt too. */
4330 if (gimple_bb (stmt
) == gimple_bb (add_stmt
))
4332 gimple_stmt_iterator gsif
= *gsi
;
4333 gimple_stmt_iterator gsib
= *gsi
;
4335 /* Search both forward and backward from stmt and have a small
4337 for (i
= 0; i
< 128; i
++)
4339 if (!gsi_end_p (gsib
))
4341 gsi_prev_nondebug (&gsib
);
4342 if (gsi_stmt (gsib
) == add_stmt
)
4348 else if (gsi_end_p (gsif
))
4350 if (!gsi_end_p (gsif
))
4352 gsi_next_nondebug (&gsif
);
4353 if (gsi_stmt (gsif
) == add_stmt
)
4360 *gsi
= gsi_for_stmt (add_stmt
);
4365 if (code
== BIT_NOT_EXPR
)
4366 *gsi
= gsi_for_stmt (cond_stmt
);
4368 auto_vec
<gimple
*, 8> mul_stmts
;
4369 if (code
== MULT_EXPR
&& cast_stmt
)
4371 type
= TREE_TYPE (cast_lhs
);
4372 gimple
*g
= SSA_NAME_DEF_STMT (rhs1
);
4373 if (gimple_assign_cast_p (g
)
4374 && useless_type_conversion_p (type
,
4375 TREE_TYPE (gimple_assign_rhs1 (g
)))
4376 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g
)))
4377 rhs1
= gimple_assign_rhs1 (g
);
4380 g
= gimple_build_assign (make_ssa_name (type
), NOP_EXPR
, rhs1
);
4381 gsi_insert_before (gsi
, g
, GSI_SAME_STMT
);
4382 rhs1
= gimple_assign_lhs (g
);
4383 mul_stmts
.quick_push (g
);
4385 if (TREE_CODE (rhs2
) == INTEGER_CST
)
4386 rhs2
= fold_convert (type
, rhs2
);
4389 g
= SSA_NAME_DEF_STMT (rhs2
);
4390 if (gimple_assign_cast_p (g
)
4391 && useless_type_conversion_p (type
,
4392 TREE_TYPE (gimple_assign_rhs1 (g
)))
4393 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g
)))
4394 rhs2
= gimple_assign_rhs1 (g
);
4397 g
= gimple_build_assign (make_ssa_name (type
), NOP_EXPR
, rhs2
);
4398 gsi_insert_before (gsi
, g
, GSI_SAME_STMT
);
4399 rhs2
= gimple_assign_lhs (g
);
4400 mul_stmts
.quick_push (g
);
4404 tree ctype
= build_complex_type (type
);
4405 gcall
*g
= gimple_build_call_internal (code
== MULT_EXPR
4407 : code
!= MINUS_EXPR
4408 ? IFN_ADD_OVERFLOW
: IFN_SUB_OVERFLOW
,
4410 tree ctmp
= make_ssa_name (ctype
);
4411 gimple_call_set_lhs (g
, ctmp
);
4412 gsi_insert_before (gsi
, g
, GSI_SAME_STMT
);
4413 tree new_lhs
= (maxval
|| cast_stmt
) ? make_ssa_name (type
) : lhs
;
4415 if (code
!= BIT_NOT_EXPR
)
4417 g2
= gimple_build_assign (new_lhs
, REALPART_EXPR
,
4418 build1 (REALPART_EXPR
, type
, ctmp
));
4419 if (maxval
|| cast_stmt
)
4421 gsi_insert_before (gsi
, g2
, GSI_SAME_STMT
);
4423 *gsi
= gsi_for_stmt (stmt
);
4426 gsi_replace (gsi
, g2
, true);
4427 if (code
== MULT_EXPR
)
4429 mul_stmts
.quick_push (g
);
4430 mul_stmts
.quick_push (g2
);
4433 g2
= gimple_build_assign (lhs
, NOP_EXPR
, new_lhs
);
4434 gsi_replace (gsi
, g2
, true);
4435 mul_stmts
.quick_push (g2
);
4439 tree ovf
= make_ssa_name (type
);
4440 g2
= gimple_build_assign (ovf
, IMAGPART_EXPR
,
4441 build1 (IMAGPART_EXPR
, type
, ctmp
));
4442 if (code
!= BIT_NOT_EXPR
)
4443 gsi_insert_after (gsi
, g2
, GSI_NEW_STMT
);
4445 gsi_insert_before (gsi
, g2
, GSI_SAME_STMT
);
4446 if (code
== MULT_EXPR
)
4447 mul_stmts
.quick_push (g2
);
4449 FOR_EACH_IMM_USE_STMT (use_stmt
, iter
, cast_lhs
? cast_lhs
: lhs
)
4451 if (is_gimple_debug (use_stmt
))
4454 gimple
*orig_use_stmt
= use_stmt
;
4455 int ovf_use
= arith_overflow_check_p (stmt
, cast_stmt
, use_stmt
,
4459 gcc_assert (code
!= BIT_NOT_EXPR
);
4462 tree use_lhs
= gimple_assign_lhs (use_stmt
);
4463 gimple_assign_set_rhs1 (use_stmt
, new_lhs
);
4464 if (useless_type_conversion_p (TREE_TYPE (use_lhs
),
4465 TREE_TYPE (new_lhs
)))
4466 gimple_assign_set_rhs_code (use_stmt
, SSA_NAME
);
4467 update_stmt (use_stmt
);
4471 if (gimple_code (use_stmt
) == GIMPLE_COND
)
4473 gcond
*cond_stmt
= as_a
<gcond
*> (use_stmt
);
4474 gimple_cond_set_lhs (cond_stmt
, ovf
);
4475 gimple_cond_set_rhs (cond_stmt
, build_int_cst (type
, 0));
4476 gimple_cond_set_code (cond_stmt
, ovf_use
== 1 ? NE_EXPR
: EQ_EXPR
);
4480 gcc_checking_assert (is_gimple_assign (use_stmt
));
4481 if (gimple_assign_rhs_class (use_stmt
) == GIMPLE_BINARY_RHS
)
4483 gimple_assign_set_rhs1 (use_stmt
, ovf
);
4484 gimple_assign_set_rhs2 (use_stmt
, build_int_cst (type
, 0));
4485 gimple_assign_set_rhs_code (use_stmt
,
4486 ovf_use
== 1 ? NE_EXPR
: EQ_EXPR
);
4490 gcc_checking_assert (gimple_assign_rhs_code (use_stmt
)
4492 tree cond
= build2 (ovf_use
== 1 ? NE_EXPR
: EQ_EXPR
,
4493 boolean_type_node
, ovf
,
4494 build_int_cst (type
, 0));
4495 gimple_assign_set_rhs1 (use_stmt
, cond
);
4498 update_stmt (use_stmt
);
4499 if (code
== MULT_EXPR
&& use_stmt
!= orig_use_stmt
)
4501 gimple_stmt_iterator gsi2
= gsi_for_stmt (orig_use_stmt
);
4502 maybe_optimize_guarding_check (mul_stmts
, use_stmt
, orig_use_stmt
,
4506 if (single_imm_use (gimple_assign_lhs (orig_use_stmt
), &use
,
4508 && gimple_assign_cast_p (cast_stmt
))
4510 gimple_stmt_iterator gsi3
= gsi_for_stmt (cast_stmt
);
4511 gsi_remove (&gsi3
, true);
4512 release_ssa_name (gimple_assign_lhs (cast_stmt
));
4514 gsi_remove (&gsi2
, true);
4515 release_ssa_name (gimple_assign_lhs (orig_use_stmt
));
4520 gimple_stmt_iterator gsi2
= gsi_for_stmt (stmt
);
4521 gsi_remove (&gsi2
, true);
4524 gimple
*g
= gimple_build_assign (gimple_assign_lhs (add_stmt
),
4526 gsi2
= gsi_for_stmt (add_stmt
);
4527 gsi_replace (&gsi2
, g
, true);
4530 else if (code
== BIT_NOT_EXPR
)
4532 *gsi
= gsi_for_stmt (stmt
);
4533 gsi_remove (gsi
, true);
4534 release_ssa_name (lhs
);
4540 /* Helper of match_uaddc_usubc. Look through an integral cast
4541 which should preserve [0, 1] range value (unless source has
4542 1-bit signed type) and the cast has single use. */
4545 uaddc_cast (gimple
*g
)
4547 if (!gimple_assign_cast_p (g
))
4549 tree op
= gimple_assign_rhs1 (g
);
4550 if (TREE_CODE (op
) == SSA_NAME
4551 && INTEGRAL_TYPE_P (TREE_TYPE (op
))
4552 && (TYPE_PRECISION (TREE_TYPE (op
)) > 1
4553 || TYPE_UNSIGNED (TREE_TYPE (op
)))
4554 && has_single_use (gimple_assign_lhs (g
)))
4555 return SSA_NAME_DEF_STMT (op
);
4559 /* Helper of match_uaddc_usubc. Look through a NE_EXPR
4560 comparison with 0 which also preserves [0, 1] value range. */
4563 uaddc_ne0 (gimple
*g
)
4565 if (is_gimple_assign (g
)
4566 && gimple_assign_rhs_code (g
) == NE_EXPR
4567 && integer_zerop (gimple_assign_rhs2 (g
))
4568 && TREE_CODE (gimple_assign_rhs1 (g
)) == SSA_NAME
4569 && has_single_use (gimple_assign_lhs (g
)))
4570 return SSA_NAME_DEF_STMT (gimple_assign_rhs1 (g
));
4574 /* Return true if G is {REAL,IMAG}PART_EXPR PART with SSA_NAME
4578 uaddc_is_cplxpart (gimple
*g
, tree_code part
)
4580 return (is_gimple_assign (g
)
4581 && gimple_assign_rhs_code (g
) == part
4582 && TREE_CODE (TREE_OPERAND (gimple_assign_rhs1 (g
), 0)) == SSA_NAME
);
4585 /* Try to match e.g.
4586 _29 = .ADD_OVERFLOW (_3, _4);
4587 _30 = REALPART_EXPR <_29>;
4588 _31 = IMAGPART_EXPR <_29>;
4589 _32 = .ADD_OVERFLOW (_30, _38);
4590 _33 = REALPART_EXPR <_32>;
4591 _34 = IMAGPART_EXPR <_32>;
4594 _36 = .UADDC (_3, _4, _38);
4595 _33 = REALPART_EXPR <_36>;
4596 _35 = IMAGPART_EXPR <_36>;
4598 _22 = .SUB_OVERFLOW (_6, _5);
4599 _23 = REALPART_EXPR <_22>;
4600 _24 = IMAGPART_EXPR <_22>;
4601 _25 = .SUB_OVERFLOW (_23, _37);
4602 _26 = REALPART_EXPR <_25>;
4603 _27 = IMAGPART_EXPR <_25>;
4606 _29 = .USUBC (_6, _5, _37);
4607 _26 = REALPART_EXPR <_29>;
4608 _288 = IMAGPART_EXPR <_29>;
4609 provided _38 or _37 above have [0, 1] range
4610 and _3, _4 and _30 or _6, _5 and _23 are unsigned
4611 integral types with the same precision. Whether + or | or ^ is
4612 used on the IMAGPART_EXPR results doesn't matter, with one of
4613 added or subtracted operands in [0, 1] range at most one
4614 .ADD_OVERFLOW or .SUB_OVERFLOW will indicate overflow. */
4617 match_uaddc_usubc (gimple_stmt_iterator
*gsi
, gimple
*stmt
, tree_code code
)
4620 rhs
[0] = gimple_assign_rhs1 (stmt
);
4621 rhs
[1] = gimple_assign_rhs2 (stmt
);
4624 tree type
= TREE_TYPE (rhs
[0]);
4625 if (!INTEGRAL_TYPE_P (type
) || !TYPE_UNSIGNED (type
))
4628 auto_vec
<gimple
*, 2> temp_stmts
;
4629 if (code
!= BIT_IOR_EXPR
&& code
!= BIT_XOR_EXPR
)
4631 /* If overflow flag is ignored on the MSB limb, we can end up with
4632 the most significant limb handled as r = op1 + op2 + ovf1 + ovf2;
4633 or r = op1 - op2 - ovf1 - ovf2; or various equivalent expressions
4634 thereof. Handle those like the ovf = ovf1 + ovf2; case to recognize
4635 the limb below the MSB, but also create another .UADDC/.USUBC call
4638 First look through assignments with the same rhs code as CODE,
4639 with the exception that subtraction of a constant is canonicalized
4640 into addition of its negation. rhs[0] will be minuend for
4641 subtractions and one of addends for addition, all other assigned
4642 rhs[i] operands will be subtrahends or other addends. */
4643 while (TREE_CODE (rhs
[0]) == SSA_NAME
&& !rhs
[3])
4645 gimple
*g
= SSA_NAME_DEF_STMT (rhs
[0]);
4646 if (has_single_use (rhs
[0])
4647 && is_gimple_assign (g
)
4648 && (gimple_assign_rhs_code (g
) == code
4649 || (code
== MINUS_EXPR
4650 && gimple_assign_rhs_code (g
) == PLUS_EXPR
4651 && TREE_CODE (gimple_assign_rhs2 (g
)) == INTEGER_CST
)))
4653 tree r2
= gimple_assign_rhs2 (g
);
4654 if (gimple_assign_rhs_code (g
) != code
)
4656 r2
= const_unop (NEGATE_EXPR
, TREE_TYPE (r2
), r2
);
4660 rhs
[0] = gimple_assign_rhs1 (g
);
4661 tree
&r
= rhs
[2] ? rhs
[3] : rhs
[2];
4663 temp_stmts
.quick_push (g
);
4668 for (int i
= 1; i
<= 2; ++i
)
4669 while (rhs
[i
] && TREE_CODE (rhs
[i
]) == SSA_NAME
&& !rhs
[3])
4671 gimple
*g
= SSA_NAME_DEF_STMT (rhs
[i
]);
4672 if (has_single_use (rhs
[i
])
4673 && is_gimple_assign (g
)
4674 && gimple_assign_rhs_code (g
) == PLUS_EXPR
)
4676 rhs
[i
] = gimple_assign_rhs1 (g
);
4678 rhs
[3] = gimple_assign_rhs2 (g
);
4680 rhs
[2] = gimple_assign_rhs2 (g
);
4681 temp_stmts
.quick_push (g
);
4686 /* If there are just 3 addends or one minuend and two subtrahends,
4687 check for UADDC or USUBC being pattern recognized earlier.
4688 Say r = op1 + op2 + ovf1 + ovf2; where the (ovf1 + ovf2) part
4689 got pattern matched earlier as __imag__ .UADDC (arg1, arg2, arg3)
4691 if (rhs
[2] && !rhs
[3])
4693 for (int i
= (code
== MINUS_EXPR
? 1 : 0); i
< 3; ++i
)
4694 if (TREE_CODE (rhs
[i
]) == SSA_NAME
)
4696 gimple
*im
= uaddc_cast (SSA_NAME_DEF_STMT (rhs
[i
]));
4697 im
= uaddc_ne0 (im
);
4698 if (uaddc_is_cplxpart (im
, IMAGPART_EXPR
))
4700 /* We found one of the 3 addends or 2 subtrahends to be
4701 __imag__ of something, verify it is .UADDC/.USUBC. */
4702 tree rhs1
= gimple_assign_rhs1 (im
);
4703 gimple
*ovf
= SSA_NAME_DEF_STMT (TREE_OPERAND (rhs1
, 0));
4704 tree ovf_lhs
= NULL_TREE
;
4705 tree ovf_arg1
= NULL_TREE
, ovf_arg2
= NULL_TREE
;
4706 if (gimple_call_internal_p (ovf
, code
== PLUS_EXPR
4708 : IFN_SUB_OVERFLOW
))
4710 /* Or verify it is .ADD_OVERFLOW/.SUB_OVERFLOW.
4711 This is for the case of 2 chained .UADDC/.USUBC,
4712 where the first one uses 0 carry-in and the second
4713 one ignores the carry-out.
4715 _16 = .ADD_OVERFLOW (_1, _2);
4716 _17 = REALPART_EXPR <_16>;
4717 _18 = IMAGPART_EXPR <_16>;
4720 where the first 3 statements come from the lower
4721 limb addition and the last 2 from the higher limb
4722 which ignores carry-out. */
4723 ovf_lhs
= gimple_call_lhs (ovf
);
4724 tree ovf_lhs_type
= TREE_TYPE (TREE_TYPE (ovf_lhs
));
4725 ovf_arg1
= gimple_call_arg (ovf
, 0);
4726 ovf_arg2
= gimple_call_arg (ovf
, 1);
4727 /* In that case we need to punt if the types don't
4729 if (!types_compatible_p (type
, ovf_lhs_type
)
4730 || !types_compatible_p (type
, TREE_TYPE (ovf_arg1
))
4731 || !types_compatible_p (type
,
4732 TREE_TYPE (ovf_arg2
)))
4733 ovf_lhs
= NULL_TREE
;
4736 for (int i
= (code
== PLUS_EXPR
? 1 : 0);
4739 tree r
= gimple_call_arg (ovf
, i
);
4740 if (TREE_CODE (r
) != SSA_NAME
)
4742 if (uaddc_is_cplxpart (SSA_NAME_DEF_STMT (r
),
4745 /* Punt if one of the args which isn't
4746 subtracted isn't __real__; that could
4747 then prevent better match later.
4749 _3 = .ADD_OVERFLOW (_1, _2);
4750 _4 = REALPART_EXPR <_3>;
4751 _5 = IMAGPART_EXPR <_3>;
4752 _7 = .ADD_OVERFLOW (_4, _6);
4753 _8 = REALPART_EXPR <_7>;
4754 _9 = IMAGPART_EXPR <_7>;
4758 We want to match this when called on
4759 the last stmt as a pair of .UADDC calls,
4760 but without this check we could turn
4761 that prematurely on _13 = _12 + _9;
4762 stmt into .UADDC with 0 carry-in just
4763 on the second .ADD_OVERFLOW call and
4764 another replacing the _12 and _13
4766 ovf_lhs
= NULL_TREE
;
4773 use_operand_p use_p
;
4774 imm_use_iterator iter
;
4775 tree re_lhs
= NULL_TREE
;
4776 FOR_EACH_IMM_USE_FAST (use_p
, iter
, ovf_lhs
)
4778 gimple
*use_stmt
= USE_STMT (use_p
);
4779 if (is_gimple_debug (use_stmt
))
4783 if (!uaddc_is_cplxpart (use_stmt
,
4786 ovf_lhs
= NULL_TREE
;
4789 re_lhs
= gimple_assign_lhs (use_stmt
);
4791 if (ovf_lhs
&& re_lhs
)
4793 FOR_EACH_IMM_USE_FAST (use_p
, iter
, re_lhs
)
4795 gimple
*use_stmt
= USE_STMT (use_p
);
4796 if (is_gimple_debug (use_stmt
))
4799 = gimple_call_internal_fn (ovf
);
4800 /* Punt if the __real__ of lhs is used
4801 in the same .*_OVERFLOW call.
4803 _3 = .ADD_OVERFLOW (_1, _2);
4804 _4 = REALPART_EXPR <_3>;
4805 _5 = IMAGPART_EXPR <_3>;
4806 _7 = .ADD_OVERFLOW (_4, _6);
4807 _8 = REALPART_EXPR <_7>;
4808 _9 = IMAGPART_EXPR <_7>;
4812 We want to match this when called on
4813 the last stmt as a pair of .UADDC calls,
4814 but without this check we could turn
4815 that prematurely on _13 = _12 + _5;
4816 stmt into .UADDC with 0 carry-in just
4817 on the first .ADD_OVERFLOW call and
4818 another replacing the _12 and _13
4820 if (gimple_call_internal_p (use_stmt
, ifn
))
4822 ovf_lhs
= NULL_TREE
;
4830 || gimple_call_internal_p (ovf
,
4832 ? IFN_UADDC
: IFN_USUBC
))
4833 && (optab_handler (code
== PLUS_EXPR
4834 ? uaddc5_optab
: usubc5_optab
,
4836 != CODE_FOR_nothing
))
4838 /* And in that case build another .UADDC/.USUBC
4839 call for the most significand limb addition.
4840 Overflow bit is ignored here. */
4842 std::swap (rhs
[i
], rhs
[2]);
4844 = gimple_build_call_internal (code
== PLUS_EXPR
4849 tree nlhs
= make_ssa_name (build_complex_type (type
));
4850 gimple_call_set_lhs (g
, nlhs
);
4851 gsi_insert_before (gsi
, g
, GSI_SAME_STMT
);
4852 tree ilhs
= gimple_assign_lhs (stmt
);
4853 g
= gimple_build_assign (ilhs
, REALPART_EXPR
,
4854 build1 (REALPART_EXPR
,
4857 gsi_replace (gsi
, g
, true);
4858 /* And if it is initialized from result of __imag__
4859 of .{ADD,SUB}_OVERFLOW call, replace that
4860 call with .U{ADD,SUB}C call with the same arguments,
4861 just 0 added as third argument. This isn't strictly
4862 necessary, .ADD_OVERFLOW (x, y) and .UADDC (x, y, 0)
4863 produce the same result, but may result in better
4864 generated code on some targets where the backend can
4865 better prepare in how the result will be used. */
4868 tree zero
= build_zero_cst (type
);
4869 g
= gimple_build_call_internal (code
== PLUS_EXPR
4874 gimple_call_set_lhs (g
, ovf_lhs
);
4875 gimple_stmt_iterator gsi2
= gsi_for_stmt (ovf
);
4876 gsi_replace (&gsi2
, g
, true);
4884 if (code
== MINUS_EXPR
&& !rhs
[2])
4886 if (code
== MINUS_EXPR
)
4887 /* Code below expects rhs[0] and rhs[1] to have the IMAGPART_EXPRs.
4888 So, for MINUS_EXPR swap the single added rhs operand (others are
4889 subtracted) to rhs[3]. */
4890 std::swap (rhs
[0], rhs
[3]);
4892 /* Walk from both operands of STMT (for +/- even sometimes from
4893 all the 4 addends or 3 subtrahends), see through casts and != 0
4894 statements which would preserve [0, 1] range of values and
4895 check which is initialized from __imag__. */
4896 gimple
*im1
= NULL
, *im2
= NULL
;
4897 for (int i
= 0; i
< (code
== MINUS_EXPR
? 3 : 4); i
++)
4898 if (rhs
[i
] && TREE_CODE (rhs
[i
]) == SSA_NAME
)
4900 gimple
*im
= uaddc_cast (SSA_NAME_DEF_STMT (rhs
[i
]));
4901 im
= uaddc_ne0 (im
);
4902 if (uaddc_is_cplxpart (im
, IMAGPART_EXPR
))
4908 std::swap (rhs
[0], rhs
[i
]);
4914 std::swap (rhs
[1], rhs
[i
]);
4919 /* If we don't find at least two, punt. */
4922 /* Check they are __imag__ of .ADD_OVERFLOW or .SUB_OVERFLOW call results,
4923 either both .ADD_OVERFLOW or both .SUB_OVERFLOW and that we have
4924 uaddc5/usubc5 named pattern for the corresponding mode. */
4926 = SSA_NAME_DEF_STMT (TREE_OPERAND (gimple_assign_rhs1 (im1
), 0));
4928 = SSA_NAME_DEF_STMT (TREE_OPERAND (gimple_assign_rhs1 (im2
), 0));
4930 if (!is_gimple_call (ovf1
)
4931 || !gimple_call_internal_p (ovf1
)
4932 || ((ifn
= gimple_call_internal_fn (ovf1
)) != IFN_ADD_OVERFLOW
4933 && ifn
!= IFN_SUB_OVERFLOW
)
4934 || !gimple_call_internal_p (ovf2
, ifn
)
4935 || optab_handler (ifn
== IFN_ADD_OVERFLOW
? uaddc5_optab
: usubc5_optab
,
4936 TYPE_MODE (type
)) == CODE_FOR_nothing
4938 && optab_handler (code
== PLUS_EXPR
? uaddc5_optab
: usubc5_optab
,
4939 TYPE_MODE (type
)) == CODE_FOR_nothing
))
4941 tree arg1
, arg2
, arg3
= NULL_TREE
;
4942 gimple
*re1
= NULL
, *re2
= NULL
;
4943 /* On one of the two calls, one of the .ADD_OVERFLOW/.SUB_OVERFLOW arguments
4944 should be initialized from __real__ of the other of the two calls.
4945 Though, for .SUB_OVERFLOW, it has to be the first argument, not the
4947 for (int i
= (ifn
== IFN_ADD_OVERFLOW
? 1 : 0); i
>= 0; --i
)
4948 for (gimple
*ovf
= ovf1
; ovf
; ovf
= (ovf
== ovf1
? ovf2
: NULL
))
4950 tree arg
= gimple_call_arg (ovf
, i
);
4951 if (TREE_CODE (arg
) != SSA_NAME
)
4953 re1
= SSA_NAME_DEF_STMT (arg
);
4954 if (uaddc_is_cplxpart (re1
, REALPART_EXPR
)
4955 && (SSA_NAME_DEF_STMT (TREE_OPERAND (gimple_assign_rhs1 (re1
), 0))
4956 == (ovf
== ovf1
? ovf2
: ovf1
)))
4960 /* Make sure ovf2 is the .*_OVERFLOW call with argument
4961 initialized from __real__ of ovf1. */
4962 std::swap (rhs
[0], rhs
[1]);
4963 std::swap (im1
, im2
);
4964 std::swap (ovf1
, ovf2
);
4966 arg3
= gimple_call_arg (ovf
, 1 - i
);
4973 arg1
= gimple_call_arg (ovf1
, 0);
4974 arg2
= gimple_call_arg (ovf1
, 1);
4975 if (!types_compatible_p (type
, TREE_TYPE (arg1
)))
4977 int kind
[2] = { 0, 0 };
4978 tree arg_im
[2] = { NULL_TREE
, NULL_TREE
};
4979 /* At least one of arg2 and arg3 should have type compatible
4980 with arg1/rhs[0], and the other one should have value in [0, 1]
4981 range. If both are in [0, 1] range and type compatible with
4982 arg1/rhs[0], try harder to find after looking through casts,
4983 != 0 comparisons which one is initialized to __imag__ of
4984 .{ADD,SUB}_OVERFLOW or .U{ADD,SUB}C call results. */
4985 for (int i
= 0; i
< 2; ++i
)
4987 tree arg
= i
== 0 ? arg2
: arg3
;
4988 if (types_compatible_p (type
, TREE_TYPE (arg
)))
4990 if (!INTEGRAL_TYPE_P (TREE_TYPE (arg
))
4991 || (TYPE_PRECISION (TREE_TYPE (arg
)) == 1
4992 && !TYPE_UNSIGNED (TREE_TYPE (arg
))))
4994 if (tree_zero_one_valued_p (arg
))
4996 if (TREE_CODE (arg
) == SSA_NAME
)
4998 gimple
*g
= SSA_NAME_DEF_STMT (arg
);
4999 if (gimple_assign_cast_p (g
))
5001 tree op
= gimple_assign_rhs1 (g
);
5002 if (TREE_CODE (op
) == SSA_NAME
5003 && INTEGRAL_TYPE_P (TREE_TYPE (op
)))
5004 g
= SSA_NAME_DEF_STMT (op
);
5007 if (!uaddc_is_cplxpart (g
, IMAGPART_EXPR
))
5009 arg_im
[i
] = gimple_assign_lhs (g
);
5010 g
= SSA_NAME_DEF_STMT (TREE_OPERAND (gimple_assign_rhs1 (g
), 0));
5011 if (!is_gimple_call (g
) || !gimple_call_internal_p (g
))
5013 switch (gimple_call_internal_fn (g
))
5015 case IFN_ADD_OVERFLOW
:
5016 case IFN_SUB_OVERFLOW
:
5026 /* Make arg2 the one with compatible type and arg3 the one
5027 with [0, 1] range. If both is true for both operands,
5028 prefer as arg3 result of __imag__ of some ifn. */
5029 if ((kind
[0] & 1) == 0 || ((kind
[1] & 1) != 0 && kind
[0] > kind
[1]))
5031 std::swap (arg2
, arg3
);
5032 std::swap (kind
[0], kind
[1]);
5033 std::swap (arg_im
[0], arg_im
[1]);
5035 if ((kind
[0] & 1) == 0 || (kind
[1] & 6) == 0)
5037 if (!has_single_use (gimple_assign_lhs (im1
))
5038 || !has_single_use (gimple_assign_lhs (im2
))
5039 || !has_single_use (gimple_assign_lhs (re1
))
5040 || num_imm_uses (gimple_call_lhs (ovf1
)) != 2)
5042 /* Check that ovf2's result is used in __real__ and set re2
5043 to that statement. */
5044 use_operand_p use_p
;
5045 imm_use_iterator iter
;
5046 tree lhs
= gimple_call_lhs (ovf2
);
5047 FOR_EACH_IMM_USE_FAST (use_p
, iter
, lhs
)
5049 gimple
*use_stmt
= USE_STMT (use_p
);
5050 if (is_gimple_debug (use_stmt
))
5052 if (use_stmt
== im2
)
5056 if (!uaddc_is_cplxpart (use_stmt
, REALPART_EXPR
))
5060 /* Build .UADDC/.USUBC call which will be placed before the stmt. */
5061 gimple_stmt_iterator gsi2
= gsi_for_stmt (ovf2
);
5063 if ((kind
[1] & 4) != 0 && types_compatible_p (type
, TREE_TYPE (arg_im
[1])))
5065 if ((kind
[1] & 1) == 0)
5067 if (TREE_CODE (arg3
) == INTEGER_CST
)
5068 arg3
= fold_convert (type
, arg3
);
5071 g
= gimple_build_assign (make_ssa_name (type
), NOP_EXPR
, arg3
);
5072 gsi_insert_before (&gsi2
, g
, GSI_SAME_STMT
);
5073 arg3
= gimple_assign_lhs (g
);
5076 g
= gimple_build_call_internal (ifn
== IFN_ADD_OVERFLOW
5077 ? IFN_UADDC
: IFN_USUBC
,
5078 3, arg1
, arg2
, arg3
);
5079 tree nlhs
= make_ssa_name (TREE_TYPE (lhs
));
5080 gimple_call_set_lhs (g
, nlhs
);
5081 gsi_insert_before (&gsi2
, g
, GSI_SAME_STMT
);
5082 /* In the case where stmt is | or ^ of two overflow flags
5083 or addition of those, replace stmt with __imag__ of the above
5084 added call. In case of arg1 + arg2 + (ovf1 + ovf2) or
5085 arg1 - arg2 - (ovf1 + ovf2) just emit it before stmt. */
5086 tree ilhs
= rhs
[2] ? make_ssa_name (type
) : gimple_assign_lhs (stmt
);
5087 g
= gimple_build_assign (ilhs
, IMAGPART_EXPR
,
5088 build1 (IMAGPART_EXPR
, TREE_TYPE (ilhs
), nlhs
));
5091 gsi_insert_before (gsi
, g
, GSI_SAME_STMT
);
5092 /* Remove some further statements which can't be kept in the IL because
5093 they can use SSA_NAMEs whose setter is going to be removed too. */
5094 for (gimple
*g2
: temp_stmts
)
5096 gsi2
= gsi_for_stmt (g2
);
5097 gsi_remove (&gsi2
, true);
5102 gsi_replace (gsi
, g
, true);
5103 /* Remove some statements which can't be kept in the IL because they
5104 use SSA_NAME whose setter is going to be removed too. */
5106 for (int i
= 0; i
< 2; i
++)
5107 if (rhs1
== gimple_assign_lhs (im2
))
5111 g
= SSA_NAME_DEF_STMT (rhs1
);
5112 rhs1
= gimple_assign_rhs1 (g
);
5113 gsi2
= gsi_for_stmt (g
);
5114 gsi_remove (&gsi2
, true);
5117 gcc_checking_assert (rhs1
== gimple_assign_lhs (im2
));
5118 gsi2
= gsi_for_stmt (im2
);
5119 gsi_remove (&gsi2
, true);
5121 /* Replace the re2 statement with __real__ of the newly added
5122 .UADDC/.USUBC call. */
5125 gsi2
= gsi_for_stmt (re2
);
5126 tree rlhs
= gimple_assign_lhs (re2
);
5127 g
= gimple_build_assign (rlhs
, REALPART_EXPR
,
5128 build1 (REALPART_EXPR
, TREE_TYPE (rlhs
), nlhs
));
5129 gsi_replace (&gsi2
, g
, true);
5133 /* If this is the arg1 + arg2 + (ovf1 + ovf2) or
5134 arg1 - arg2 - (ovf1 + ovf2) case for the most significant limb,
5135 replace stmt with __real__ of another .UADDC/.USUBC call which
5136 handles the most significant limb. Overflow flag from this is
5138 g
= gimple_build_call_internal (code
== PLUS_EXPR
5139 ? IFN_UADDC
: IFN_USUBC
,
5140 3, rhs
[3], rhs
[2], ilhs
);
5141 nlhs
= make_ssa_name (TREE_TYPE (lhs
));
5142 gimple_call_set_lhs (g
, nlhs
);
5143 gsi_insert_before (gsi
, g
, GSI_SAME_STMT
);
5144 ilhs
= gimple_assign_lhs (stmt
);
5145 g
= gimple_build_assign (ilhs
, REALPART_EXPR
,
5146 build1 (REALPART_EXPR
, TREE_TYPE (ilhs
), nlhs
));
5147 gsi_replace (gsi
, g
, true);
5149 if (TREE_CODE (arg3
) == SSA_NAME
)
5151 /* When pattern recognizing the second least significant limb
5152 above (i.e. first pair of .{ADD,SUB}_OVERFLOW calls for one limb),
5153 check if the [0, 1] range argument (i.e. carry in) isn't the
5154 result of another .{ADD,SUB}_OVERFLOW call (one handling the
5155 least significant limb). Again look through casts and != 0. */
5156 gimple
*im3
= SSA_NAME_DEF_STMT (arg3
);
5157 for (int i
= 0; i
< 2; ++i
)
5159 gimple
*im4
= uaddc_cast (im3
);
5165 im3
= uaddc_ne0 (im3
);
5166 if (uaddc_is_cplxpart (im3
, IMAGPART_EXPR
))
5169 = SSA_NAME_DEF_STMT (TREE_OPERAND (gimple_assign_rhs1 (im3
), 0));
5170 if (gimple_call_internal_p (ovf3
, ifn
))
5172 lhs
= gimple_call_lhs (ovf3
);
5173 arg1
= gimple_call_arg (ovf3
, 0);
5174 arg2
= gimple_call_arg (ovf3
, 1);
5175 if (types_compatible_p (type
, TREE_TYPE (TREE_TYPE (lhs
)))
5176 && types_compatible_p (type
, TREE_TYPE (arg1
))
5177 && types_compatible_p (type
, TREE_TYPE (arg2
)))
5179 /* And if it is initialized from result of __imag__
5180 of .{ADD,SUB}_OVERFLOW call, replace that
5181 call with .U{ADD,SUB}C call with the same arguments,
5182 just 0 added as third argument. This isn't strictly
5183 necessary, .ADD_OVERFLOW (x, y) and .UADDC (x, y, 0)
5184 produce the same result, but may result in better
5185 generated code on some targets where the backend can
5186 better prepare in how the result will be used. */
5187 g
= gimple_build_call_internal (ifn
== IFN_ADD_OVERFLOW
5188 ? IFN_UADDC
: IFN_USUBC
,
5190 build_zero_cst (type
));
5191 gimple_call_set_lhs (g
, lhs
);
5192 gsi2
= gsi_for_stmt (ovf3
);
5193 gsi_replace (&gsi2
, g
, true);
5201 /* Replace .POPCOUNT (x) == 1 or .POPCOUNT (x) != 1 with
5202 (x & (x - 1)) > x - 1 or (x & (x - 1)) <= x - 1 if .POPCOUNT
5203 isn't a direct optab. */
5206 match_single_bit_test (gimple_stmt_iterator
*gsi
, gimple
*stmt
)
5209 enum tree_code code
;
5210 if (gimple_code (stmt
) == GIMPLE_COND
)
5212 clhs
= gimple_cond_lhs (stmt
);
5213 crhs
= gimple_cond_rhs (stmt
);
5214 code
= gimple_cond_code (stmt
);
5218 clhs
= gimple_assign_rhs1 (stmt
);
5219 crhs
= gimple_assign_rhs2 (stmt
);
5220 code
= gimple_assign_rhs_code (stmt
);
5222 if (code
!= EQ_EXPR
&& code
!= NE_EXPR
)
5224 if (TREE_CODE (clhs
) != SSA_NAME
|| !integer_onep (crhs
))
5226 gimple
*call
= SSA_NAME_DEF_STMT (clhs
);
5227 combined_fn cfn
= gimple_call_combined_fn (call
);
5235 if (!has_single_use (clhs
))
5237 tree arg
= gimple_call_arg (call
, 0);
5238 tree type
= TREE_TYPE (arg
);
5239 if (!INTEGRAL_TYPE_P (type
))
5241 bool nonzero_arg
= tree_expr_nonzero_p (arg
);
5242 if (direct_internal_fn_supported_p (IFN_POPCOUNT
, type
, OPTIMIZE_FOR_BOTH
))
5244 /* Tell expand_POPCOUNT the popcount result is only used in equality
5245 comparison with one, so that it can decide based on rtx costs. */
5246 gimple
*g
= gimple_build_call_internal (IFN_POPCOUNT
, 2, arg
,
5247 nonzero_arg
? integer_zero_node
5248 : integer_one_node
);
5249 gimple_call_set_lhs (g
, gimple_call_lhs (call
));
5250 gimple_stmt_iterator gsi2
= gsi_for_stmt (call
);
5251 gsi_replace (&gsi2
, g
, true);
5254 tree argm1
= make_ssa_name (type
);
5255 gimple
*g
= gimple_build_assign (argm1
, PLUS_EXPR
, arg
,
5256 build_int_cst (type
, -1));
5257 gsi_insert_before (gsi
, g
, GSI_SAME_STMT
);
5258 g
= gimple_build_assign (make_ssa_name (type
),
5259 nonzero_arg
? BIT_AND_EXPR
: BIT_XOR_EXPR
,
5261 gsi_insert_before (gsi
, g
, GSI_SAME_STMT
);
5265 argm1
= build_zero_cst (type
);
5269 cmpcode
= code
== EQ_EXPR
? GT_EXPR
: LE_EXPR
;
5270 if (gcond
*cond
= dyn_cast
<gcond
*> (stmt
))
5272 gimple_cond_set_lhs (cond
, gimple_assign_lhs (g
));
5273 gimple_cond_set_rhs (cond
, argm1
);
5274 gimple_cond_set_code (cond
, cmpcode
);
5278 gimple_assign_set_rhs1 (stmt
, gimple_assign_lhs (g
));
5279 gimple_assign_set_rhs2 (stmt
, argm1
);
5280 gimple_assign_set_rhs_code (stmt
, cmpcode
);
5283 gimple_stmt_iterator gsi2
= gsi_for_stmt (call
);
5284 gsi_remove (&gsi2
, true);
5285 release_defs (call
);
5288 /* Return true if target has support for divmod. */
5291 target_supports_divmod_p (optab divmod_optab
, optab div_optab
, machine_mode mode
)
5293 /* If target supports hardware divmod insn, use it for divmod. */
5294 if (optab_handler (divmod_optab
, mode
) != CODE_FOR_nothing
)
5297 /* Check if libfunc for divmod is available. */
5298 rtx libfunc
= optab_libfunc (divmod_optab
, mode
);
5299 if (libfunc
!= NULL_RTX
)
5301 /* If optab_handler exists for div_optab, perhaps in a wider mode,
5302 we don't want to use the libfunc even if it exists for given mode. */
5303 machine_mode div_mode
;
5304 FOR_EACH_MODE_FROM (div_mode
, mode
)
5305 if (optab_handler (div_optab
, div_mode
) != CODE_FOR_nothing
)
5308 return targetm
.expand_divmod_libfunc
!= NULL
;
5314 /* Check if stmt is candidate for divmod transform. */
5317 divmod_candidate_p (gassign
*stmt
)
5319 tree type
= TREE_TYPE (gimple_assign_lhs (stmt
));
5320 machine_mode mode
= TYPE_MODE (type
);
5321 optab divmod_optab
, div_optab
;
5323 if (TYPE_UNSIGNED (type
))
5325 divmod_optab
= udivmod_optab
;
5326 div_optab
= udiv_optab
;
5330 divmod_optab
= sdivmod_optab
;
5331 div_optab
= sdiv_optab
;
5334 tree op1
= gimple_assign_rhs1 (stmt
);
5335 tree op2
= gimple_assign_rhs2 (stmt
);
5337 /* Disable the transform if either is a constant, since division-by-constant
5338 may have specialized expansion. */
5339 if (CONSTANT_CLASS_P (op1
))
5342 if (CONSTANT_CLASS_P (op2
))
5344 if (integer_pow2p (op2
))
5347 if (element_precision (type
) <= HOST_BITS_PER_WIDE_INT
5348 && element_precision (type
) <= BITS_PER_WORD
)
5351 /* If the divisor is not power of 2 and the precision wider than
5352 HWI, expand_divmod punts on that, so in that case it is better
5353 to use divmod optab or libfunc. Similarly if choose_multiplier
5354 might need pre/post shifts of BITS_PER_WORD or more. */
5357 /* Exclude the case where TYPE_OVERFLOW_TRAPS (type) as that should
5358 expand using the [su]divv optabs. */
5359 if (TYPE_OVERFLOW_TRAPS (type
))
5362 if (!target_supports_divmod_p (divmod_optab
, div_optab
, mode
))
5368 /* This function looks for:
5369 t1 = a TRUNC_DIV_EXPR b;
5370 t2 = a TRUNC_MOD_EXPR b;
5371 and transforms it to the following sequence:
5372 complex_tmp = DIVMOD (a, b);
5373 t1 = REALPART_EXPR(a);
5374 t2 = IMAGPART_EXPR(b);
5375 For conditions enabling the transform see divmod_candidate_p().
5377 The pass has three parts:
5378 1) Find top_stmt which is trunc_div or trunc_mod stmt and dominates all
5379 other trunc_div_expr and trunc_mod_expr stmts.
5380 2) Add top_stmt and all trunc_div and trunc_mod stmts dominated by top_stmt
5382 3) Insert DIVMOD call just before top_stmt and update entries in
5383 stmts vector to use return value of DIMOVD (REALEXPR_PART for div,
5384 IMAGPART_EXPR for mod). */
5387 convert_to_divmod (gassign
*stmt
)
5389 if (stmt_can_throw_internal (cfun
, stmt
)
5390 || !divmod_candidate_p (stmt
))
5393 tree op1
= gimple_assign_rhs1 (stmt
);
5394 tree op2
= gimple_assign_rhs2 (stmt
);
5396 imm_use_iterator use_iter
;
5398 auto_vec
<gimple
*> stmts
;
5400 gimple
*top_stmt
= stmt
;
5401 basic_block top_bb
= gimple_bb (stmt
);
5403 /* Part 1: Try to set top_stmt to "topmost" stmt that dominates
5404 at-least stmt and possibly other trunc_div/trunc_mod stmts
5405 having same operands as stmt. */
5407 FOR_EACH_IMM_USE_STMT (use_stmt
, use_iter
, op1
)
5409 if (is_gimple_assign (use_stmt
)
5410 && (gimple_assign_rhs_code (use_stmt
) == TRUNC_DIV_EXPR
5411 || gimple_assign_rhs_code (use_stmt
) == TRUNC_MOD_EXPR
)
5412 && operand_equal_p (op1
, gimple_assign_rhs1 (use_stmt
), 0)
5413 && operand_equal_p (op2
, gimple_assign_rhs2 (use_stmt
), 0))
5415 if (stmt_can_throw_internal (cfun
, use_stmt
))
5418 basic_block bb
= gimple_bb (use_stmt
);
5422 if (gimple_uid (use_stmt
) < gimple_uid (top_stmt
))
5423 top_stmt
= use_stmt
;
5425 else if (dominated_by_p (CDI_DOMINATORS
, top_bb
, bb
))
5428 top_stmt
= use_stmt
;
5433 tree top_op1
= gimple_assign_rhs1 (top_stmt
);
5434 tree top_op2
= gimple_assign_rhs2 (top_stmt
);
5436 stmts
.safe_push (top_stmt
);
5437 bool div_seen
= (gimple_assign_rhs_code (top_stmt
) == TRUNC_DIV_EXPR
);
5439 /* Part 2: Add all trunc_div/trunc_mod statements domianted by top_bb
5440 to stmts vector. The 2nd loop will always add stmt to stmts vector, since
5441 gimple_bb (top_stmt) dominates gimple_bb (stmt), so the
5442 2nd loop ends up adding at-least single trunc_mod_expr stmt. */
5444 FOR_EACH_IMM_USE_STMT (use_stmt
, use_iter
, top_op1
)
5446 if (is_gimple_assign (use_stmt
)
5447 && (gimple_assign_rhs_code (use_stmt
) == TRUNC_DIV_EXPR
5448 || gimple_assign_rhs_code (use_stmt
) == TRUNC_MOD_EXPR
)
5449 && operand_equal_p (top_op1
, gimple_assign_rhs1 (use_stmt
), 0)
5450 && operand_equal_p (top_op2
, gimple_assign_rhs2 (use_stmt
), 0))
5452 if (use_stmt
== top_stmt
5453 || stmt_can_throw_internal (cfun
, use_stmt
)
5454 || !dominated_by_p (CDI_DOMINATORS
, gimple_bb (use_stmt
), top_bb
))
5457 stmts
.safe_push (use_stmt
);
5458 if (gimple_assign_rhs_code (use_stmt
) == TRUNC_DIV_EXPR
)
5466 /* Part 3: Create libcall to internal fn DIVMOD:
5467 divmod_tmp = DIVMOD (op1, op2). */
5469 gcall
*call_stmt
= gimple_build_call_internal (IFN_DIVMOD
, 2, op1
, op2
);
5470 tree res
= make_temp_ssa_name (build_complex_type (TREE_TYPE (op1
)),
5471 call_stmt
, "divmod_tmp");
5472 gimple_call_set_lhs (call_stmt
, res
);
5473 /* We rejected throwing statements above. */
5474 gimple_call_set_nothrow (call_stmt
, true);
5476 /* Insert the call before top_stmt. */
5477 gimple_stmt_iterator top_stmt_gsi
= gsi_for_stmt (top_stmt
);
5478 gsi_insert_before (&top_stmt_gsi
, call_stmt
, GSI_SAME_STMT
);
5480 widen_mul_stats
.divmod_calls_inserted
++;
5482 /* Update all statements in stmts vector:
5483 lhs = op1 TRUNC_DIV_EXPR op2 -> lhs = REALPART_EXPR<divmod_tmp>
5484 lhs = op1 TRUNC_MOD_EXPR op2 -> lhs = IMAGPART_EXPR<divmod_tmp>. */
5486 for (unsigned i
= 0; stmts
.iterate (i
, &use_stmt
); ++i
)
5490 switch (gimple_assign_rhs_code (use_stmt
))
5492 case TRUNC_DIV_EXPR
:
5493 new_rhs
= fold_build1 (REALPART_EXPR
, TREE_TYPE (op1
), res
);
5496 case TRUNC_MOD_EXPR
:
5497 new_rhs
= fold_build1 (IMAGPART_EXPR
, TREE_TYPE (op1
), res
);
5504 gimple_stmt_iterator gsi
= gsi_for_stmt (use_stmt
);
5505 gimple_assign_set_rhs_from_tree (&gsi
, new_rhs
);
5506 update_stmt (use_stmt
);
5512 /* Process a single gimple assignment STMT, which has a RSHIFT_EXPR as
5513 its rhs, and try to convert it into a MULT_HIGHPART_EXPR. The return
5514 value is true iff we converted the statement. */
5517 convert_mult_to_highpart (gassign
*stmt
, gimple_stmt_iterator
*gsi
)
5519 tree lhs
= gimple_assign_lhs (stmt
);
5520 tree stype
= TREE_TYPE (lhs
);
5521 tree sarg0
= gimple_assign_rhs1 (stmt
);
5522 tree sarg1
= gimple_assign_rhs2 (stmt
);
5524 if (TREE_CODE (stype
) != INTEGER_TYPE
5525 || TREE_CODE (sarg1
) != INTEGER_CST
5526 || TREE_CODE (sarg0
) != SSA_NAME
5527 || !tree_fits_uhwi_p (sarg1
)
5528 || !has_single_use (sarg0
))
5531 gassign
*def
= dyn_cast
<gassign
*> (SSA_NAME_DEF_STMT (sarg0
));
5535 enum tree_code mcode
= gimple_assign_rhs_code (def
);
5536 if (mcode
== NOP_EXPR
)
5538 tree tmp
= gimple_assign_rhs1 (def
);
5539 if (TREE_CODE (tmp
) != SSA_NAME
|| !has_single_use (tmp
))
5541 def
= dyn_cast
<gassign
*> (SSA_NAME_DEF_STMT (tmp
));
5544 mcode
= gimple_assign_rhs_code (def
);
5547 if (mcode
!= WIDEN_MULT_EXPR
5548 || gimple_bb (def
) != gimple_bb (stmt
))
5550 tree mtype
= TREE_TYPE (gimple_assign_lhs (def
));
5551 if (TREE_CODE (mtype
) != INTEGER_TYPE
5552 || TYPE_PRECISION (mtype
) != TYPE_PRECISION (stype
))
5555 tree mop1
= gimple_assign_rhs1 (def
);
5556 tree mop2
= gimple_assign_rhs2 (def
);
5557 tree optype
= TREE_TYPE (mop1
);
5558 bool unsignedp
= TYPE_UNSIGNED (optype
);
5559 unsigned int prec
= TYPE_PRECISION (optype
);
5561 if (unsignedp
!= TYPE_UNSIGNED (mtype
)
5562 || TYPE_PRECISION (mtype
) != 2 * prec
)
5565 unsigned HOST_WIDE_INT bits
= tree_to_uhwi (sarg1
);
5566 if (bits
< prec
|| bits
>= 2 * prec
)
5569 /* For the time being, require operands to have the same sign. */
5570 if (unsignedp
!= TYPE_UNSIGNED (TREE_TYPE (mop2
)))
5573 machine_mode mode
= TYPE_MODE (optype
);
5574 optab tab
= unsignedp
? umul_highpart_optab
: smul_highpart_optab
;
5575 if (optab_handler (tab
, mode
) == CODE_FOR_nothing
)
5578 location_t loc
= gimple_location (stmt
);
5579 tree highpart1
= build_and_insert_binop (gsi
, loc
, "highparttmp",
5580 MULT_HIGHPART_EXPR
, mop1
, mop2
);
5581 tree highpart2
= highpart1
;
5582 tree ntype
= optype
;
5584 if (TYPE_UNSIGNED (stype
) != TYPE_UNSIGNED (optype
))
5586 ntype
= TYPE_UNSIGNED (stype
) ? unsigned_type_for (optype
)
5587 : signed_type_for (optype
);
5588 highpart2
= build_and_insert_cast (gsi
, loc
, ntype
, highpart1
);
5591 highpart2
= build_and_insert_binop (gsi
, loc
, "highparttmp",
5592 RSHIFT_EXPR
, highpart2
,
5593 build_int_cst (ntype
, bits
- prec
));
5595 gassign
*new_stmt
= gimple_build_assign (lhs
, NOP_EXPR
, highpart2
);
5596 gsi_replace (gsi
, new_stmt
, true);
5598 widen_mul_stats
.highpart_mults_inserted
++;
5602 /* If target has spaceship<MODE>3 expander, pattern recognize
5603 <bb 2> [local count: 1073741824]:
5604 if (a_2(D) == b_3(D))
5605 goto <bb 6>; [34.00%]
5607 goto <bb 3>; [66.00%]
5609 <bb 3> [local count: 708669601]:
5610 if (a_2(D) < b_3(D))
5611 goto <bb 6>; [1.04%]
5613 goto <bb 4>; [98.96%]
5615 <bb 4> [local count: 701299439]:
5616 if (a_2(D) > b_3(D))
5617 goto <bb 5>; [48.89%]
5619 goto <bb 6>; [51.11%]
5621 <bb 5> [local count: 342865295]:
5623 <bb 6> [local count: 1073741824]:
5625 <bb 2> [local count: 1073741824]:
5626 _1 = .SPACESHIP (a_2(D), b_3(D));
5628 goto <bb 6>; [34.00%]
5630 goto <bb 3>; [66.00%]
5632 <bb 3> [local count: 708669601]:
5634 goto <bb 6>; [1.04%]
5636 goto <bb 4>; [98.96%]
5638 <bb 4> [local count: 701299439]:
5640 goto <bb 5>; [48.89%]
5642 goto <bb 6>; [51.11%]
5644 <bb 5> [local count: 342865295]:
5646 <bb 6> [local count: 1073741824]:
5647 so that the backend can emit optimal comparison and
5648 conditional jump sequence. */
5651 optimize_spaceship (gcond
*stmt
)
5653 enum tree_code code
= gimple_cond_code (stmt
);
5654 if (code
!= EQ_EXPR
&& code
!= NE_EXPR
)
5656 tree arg1
= gimple_cond_lhs (stmt
);
5657 tree arg2
= gimple_cond_rhs (stmt
);
5658 if (!SCALAR_FLOAT_TYPE_P (TREE_TYPE (arg1
))
5659 || optab_handler (spaceship_optab
,
5660 TYPE_MODE (TREE_TYPE (arg1
))) == CODE_FOR_nothing
5661 || operand_equal_p (arg1
, arg2
, 0))
5664 basic_block bb0
= gimple_bb (stmt
), bb1
, bb2
= NULL
;
5665 edge em1
= NULL
, e1
= NULL
, e2
= NULL
;
5666 bb1
= EDGE_SUCC (bb0
, 1)->dest
;
5667 if (((EDGE_SUCC (bb0
, 0)->flags
& EDGE_TRUE_VALUE
) != 0) ^ (code
== EQ_EXPR
))
5668 bb1
= EDGE_SUCC (bb0
, 0)->dest
;
5670 gcond
*g
= safe_dyn_cast
<gcond
*> (*gsi_last_bb (bb1
));
5672 || !single_pred_p (bb1
)
5673 || (operand_equal_p (gimple_cond_lhs (g
), arg1
, 0)
5674 ? !operand_equal_p (gimple_cond_rhs (g
), arg2
, 0)
5675 : (!operand_equal_p (gimple_cond_lhs (g
), arg2
, 0)
5676 || !operand_equal_p (gimple_cond_rhs (g
), arg1
, 0)))
5677 || !cond_only_block_p (bb1
))
5680 enum tree_code ccode
= (operand_equal_p (gimple_cond_lhs (g
), arg1
, 0)
5681 ? LT_EXPR
: GT_EXPR
);
5682 switch (gimple_cond_code (g
))
5689 ccode
= ccode
== LT_EXPR
? GT_EXPR
: LT_EXPR
;
5695 for (int i
= 0; i
< 2; ++i
)
5697 /* With NaNs, </<=/>/>= are false, so we need to look for the
5698 third comparison on the false edge from whatever non-equality
5699 comparison the second comparison is. */
5700 if (HONOR_NANS (TREE_TYPE (arg1
))
5701 && (EDGE_SUCC (bb1
, i
)->flags
& EDGE_TRUE_VALUE
) != 0)
5704 bb2
= EDGE_SUCC (bb1
, i
)->dest
;
5705 g
= safe_dyn_cast
<gcond
*> (*gsi_last_bb (bb2
));
5707 || !single_pred_p (bb2
)
5708 || (operand_equal_p (gimple_cond_lhs (g
), arg1
, 0)
5709 ? !operand_equal_p (gimple_cond_rhs (g
), arg2
, 0)
5710 : (!operand_equal_p (gimple_cond_lhs (g
), arg2
, 0)
5711 || !operand_equal_p (gimple_cond_rhs (g
), arg1
, 0)))
5712 || !cond_only_block_p (bb2
)
5713 || EDGE_SUCC (bb2
, 0)->dest
== EDGE_SUCC (bb2
, 1)->dest
)
5716 enum tree_code ccode2
5717 = (operand_equal_p (gimple_cond_lhs (g
), arg1
, 0) ? LT_EXPR
: GT_EXPR
);
5718 switch (gimple_cond_code (g
))
5725 ccode2
= ccode2
== LT_EXPR
? GT_EXPR
: LT_EXPR
;
5730 if (HONOR_NANS (TREE_TYPE (arg1
)) && ccode
== ccode2
)
5733 if ((ccode
== LT_EXPR
)
5734 ^ ((EDGE_SUCC (bb1
, i
)->flags
& EDGE_TRUE_VALUE
) != 0))
5736 em1
= EDGE_SUCC (bb1
, 1 - i
);
5737 e1
= EDGE_SUCC (bb2
, 0);
5738 e2
= EDGE_SUCC (bb2
, 1);
5739 if ((ccode2
== LT_EXPR
) ^ ((e1
->flags
& EDGE_TRUE_VALUE
) == 0))
5744 e1
= EDGE_SUCC (bb1
, 1 - i
);
5745 em1
= EDGE_SUCC (bb2
, 0);
5746 e2
= EDGE_SUCC (bb2
, 1);
5747 if ((ccode2
!= LT_EXPR
) ^ ((em1
->flags
& EDGE_TRUE_VALUE
) == 0))
5748 std::swap (em1
, e2
);
5755 if ((ccode
== LT_EXPR
)
5756 ^ ((EDGE_SUCC (bb1
, 0)->flags
& EDGE_TRUE_VALUE
) != 0))
5758 em1
= EDGE_SUCC (bb1
, 1);
5759 e1
= EDGE_SUCC (bb1
, 0);
5760 e2
= (e1
->flags
& EDGE_TRUE_VALUE
) ? em1
: e1
;
5764 em1
= EDGE_SUCC (bb1
, 0);
5765 e1
= EDGE_SUCC (bb1
, 1);
5766 e2
= (e1
->flags
& EDGE_TRUE_VALUE
) ? em1
: e1
;
5770 gcall
*gc
= gimple_build_call_internal (IFN_SPACESHIP
, 2, arg1
, arg2
);
5771 tree lhs
= make_ssa_name (integer_type_node
);
5772 gimple_call_set_lhs (gc
, lhs
);
5773 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
5774 gsi_insert_before (&gsi
, gc
, GSI_SAME_STMT
);
5776 gimple_cond_set_lhs (stmt
, lhs
);
5777 gimple_cond_set_rhs (stmt
, integer_zero_node
);
5780 gcond
*cond
= as_a
<gcond
*> (*gsi_last_bb (bb1
));
5781 gimple_cond_set_lhs (cond
, lhs
);
5782 if (em1
->src
== bb1
&& e2
!= em1
)
5784 gimple_cond_set_rhs (cond
, integer_minus_one_node
);
5785 gimple_cond_set_code (cond
, (em1
->flags
& EDGE_TRUE_VALUE
)
5786 ? EQ_EXPR
: NE_EXPR
);
5790 gcc_assert (e1
->src
== bb1
&& e2
!= e1
);
5791 gimple_cond_set_rhs (cond
, integer_one_node
);
5792 gimple_cond_set_code (cond
, (e1
->flags
& EDGE_TRUE_VALUE
)
5793 ? EQ_EXPR
: NE_EXPR
);
5797 if (e2
!= e1
&& e2
!= em1
)
5799 cond
= as_a
<gcond
*> (*gsi_last_bb (bb2
));
5800 gimple_cond_set_lhs (cond
, lhs
);
5801 if (em1
->src
== bb2
)
5802 gimple_cond_set_rhs (cond
, integer_minus_one_node
);
5805 gcc_assert (e1
->src
== bb2
);
5806 gimple_cond_set_rhs (cond
, integer_one_node
);
5808 gimple_cond_set_code (cond
,
5809 (e2
->flags
& EDGE_TRUE_VALUE
) ? NE_EXPR
: EQ_EXPR
);
5813 wide_int wm1
= wi::minus_one (TYPE_PRECISION (integer_type_node
));
5814 wide_int w2
= wi::two (TYPE_PRECISION (integer_type_node
));
5815 value_range
vr (TREE_TYPE (lhs
), wm1
, w2
);
5816 set_range_info (lhs
, vr
);
5820 /* Find integer multiplications where the operands are extended from
5821 smaller types, and replace the MULT_EXPR with a WIDEN_MULT_EXPR
5822 or MULT_HIGHPART_EXPR where appropriate. */
5826 const pass_data pass_data_optimize_widening_mul
=
5828 GIMPLE_PASS
, /* type */
5829 "widening_mul", /* name */
5830 OPTGROUP_NONE
, /* optinfo_flags */
5831 TV_TREE_WIDEN_MUL
, /* tv_id */
5832 PROP_ssa
, /* properties_required */
5833 0, /* properties_provided */
5834 0, /* properties_destroyed */
5835 0, /* todo_flags_start */
5836 TODO_update_ssa
, /* todo_flags_finish */
5839 class pass_optimize_widening_mul
: public gimple_opt_pass
5842 pass_optimize_widening_mul (gcc::context
*ctxt
)
5843 : gimple_opt_pass (pass_data_optimize_widening_mul
, ctxt
)
5846 /* opt_pass methods: */
5847 bool gate (function
*) final override
5849 return flag_expensive_optimizations
&& optimize
;
5852 unsigned int execute (function
*) final override
;
5854 }; // class pass_optimize_widening_mul
5856 /* Walker class to perform the transformation in reverse dominance order. */
5858 class math_opts_dom_walker
: public dom_walker
5861 /* Constructor, CFG_CHANGED is a pointer to a boolean flag that will be set
5862 if walking modidifes the CFG. */
5864 math_opts_dom_walker (bool *cfg_changed_p
)
5865 : dom_walker (CDI_DOMINATORS
), m_last_result_set (),
5866 m_cfg_changed_p (cfg_changed_p
) {}
5868 /* The actual actions performed in the walk. */
5870 void after_dom_children (basic_block
) final override
;
5872 /* Set of results of chains of multiply and add statement combinations that
5873 were not transformed into FMAs because of active deferring. */
5874 hash_set
<tree
> m_last_result_set
;
5876 /* Pointer to a flag of the user that needs to be set if CFG has been
5878 bool *m_cfg_changed_p
;
5882 math_opts_dom_walker::after_dom_children (basic_block bb
)
5884 gimple_stmt_iterator gsi
;
5886 fma_deferring_state
fma_state (param_avoid_fma_max_bits
> 0);
5888 for (gsi
= gsi_after_labels (bb
); !gsi_end_p (gsi
);)
5890 gimple
*stmt
= gsi_stmt (gsi
);
5891 enum tree_code code
;
5893 if (is_gimple_assign (stmt
))
5895 code
= gimple_assign_rhs_code (stmt
);
5899 if (!convert_mult_to_widen (stmt
, &gsi
)
5900 && !convert_expand_mult_copysign (stmt
, &gsi
)
5901 && convert_mult_to_fma (stmt
,
5902 gimple_assign_rhs1 (stmt
),
5903 gimple_assign_rhs2 (stmt
),
5906 gsi_remove (&gsi
, true);
5907 release_defs (stmt
);
5910 match_arith_overflow (&gsi
, stmt
, code
, m_cfg_changed_p
);
5915 if (!convert_plusminus_to_widen (&gsi
, stmt
, code
))
5917 match_arith_overflow (&gsi
, stmt
, code
, m_cfg_changed_p
);
5918 if (gsi_stmt (gsi
) == stmt
)
5919 match_uaddc_usubc (&gsi
, stmt
, code
);
5924 if (match_arith_overflow (&gsi
, stmt
, code
, m_cfg_changed_p
))
5928 case TRUNC_MOD_EXPR
:
5929 convert_to_divmod (as_a
<gassign
*> (stmt
));
5933 convert_mult_to_highpart (as_a
<gassign
*> (stmt
), &gsi
);
5938 match_uaddc_usubc (&gsi
, stmt
, code
);
5943 match_single_bit_test (&gsi
, stmt
);
5949 else if (is_gimple_call (stmt
))
5951 switch (gimple_call_combined_fn (stmt
))
5954 if (gimple_call_lhs (stmt
)
5955 && TREE_CODE (gimple_call_arg (stmt
, 1)) == REAL_CST
5956 && real_equal (&TREE_REAL_CST (gimple_call_arg (stmt
, 1)),
5958 && convert_mult_to_fma (stmt
,
5959 gimple_call_arg (stmt
, 0),
5960 gimple_call_arg (stmt
, 0),
5963 unlink_stmt_vdef (stmt
);
5964 if (gsi_remove (&gsi
, true)
5965 && gimple_purge_dead_eh_edges (bb
))
5966 *m_cfg_changed_p
= true;
5967 release_defs (stmt
);
5973 if (convert_mult_to_fma (stmt
,
5974 gimple_call_arg (stmt
, 1),
5975 gimple_call_arg (stmt
, 2),
5977 gimple_call_arg (stmt
, 0)))
5980 gsi_remove (&gsi
, true);
5981 release_defs (stmt
);
5986 case CFN_COND_LEN_MUL
:
5987 if (convert_mult_to_fma (stmt
,
5988 gimple_call_arg (stmt
, 1),
5989 gimple_call_arg (stmt
, 2),
5991 gimple_call_arg (stmt
, 0),
5992 gimple_call_arg (stmt
, 4),
5993 gimple_call_arg (stmt
, 5)))
5996 gsi_remove (&gsi
, true);
5997 release_defs (stmt
);
6003 cancel_fma_deferring (&fma_state
);
6010 else if (gimple_code (stmt
) == GIMPLE_COND
)
6012 match_single_bit_test (&gsi
, stmt
);
6013 optimize_spaceship (as_a
<gcond
*> (stmt
));
6017 if (fma_state
.m_deferring_p
6018 && fma_state
.m_initial_phi
)
6020 gcc_checking_assert (fma_state
.m_last_result
);
6021 if (!last_fma_candidate_feeds_initial_phi (&fma_state
,
6022 &m_last_result_set
))
6023 cancel_fma_deferring (&fma_state
);
6025 m_last_result_set
.add (fma_state
.m_last_result
);
6031 pass_optimize_widening_mul::execute (function
*fun
)
6033 bool cfg_changed
= false;
6035 memset (&widen_mul_stats
, 0, sizeof (widen_mul_stats
));
6036 calculate_dominance_info (CDI_DOMINATORS
);
6037 renumber_gimple_stmt_uids (cfun
);
6039 math_opts_dom_walker (&cfg_changed
).walk (ENTRY_BLOCK_PTR_FOR_FN (cfun
));
6041 statistics_counter_event (fun
, "widening multiplications inserted",
6042 widen_mul_stats
.widen_mults_inserted
);
6043 statistics_counter_event (fun
, "widening maccs inserted",
6044 widen_mul_stats
.maccs_inserted
);
6045 statistics_counter_event (fun
, "fused multiply-adds inserted",
6046 widen_mul_stats
.fmas_inserted
);
6047 statistics_counter_event (fun
, "divmod calls inserted",
6048 widen_mul_stats
.divmod_calls_inserted
);
6049 statistics_counter_event (fun
, "highpart multiplications inserted",
6050 widen_mul_stats
.highpart_mults_inserted
);
6052 return cfg_changed
? TODO_cleanup_cfg
: 0;
6058 make_pass_optimize_widening_mul (gcc::context
*ctxt
)
6060 return new pass_optimize_widening_mul (ctxt
);