Fortran: allow IEEE_VALUE to correctly return signaling NaNs
[official-gcc.git] / gcc / tree-ssa-math-opts.c
blob1b6a57b8a77e8e75f3ffeda47c39ff42451e6e26
1 /* Global, SSA-based optimizations using mathematical identities.
2 Copyright (C) 2005-2022 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* Currently, the only mini-pass in this file tries to CSE reciprocal
21 operations. These are common in sequences such as this one:
23 modulus = sqrt(x*x + y*y + z*z);
24 x = x / modulus;
25 y = y / modulus;
26 z = z / modulus;
28 that can be optimized to
30 modulus = sqrt(x*x + y*y + z*z);
31 rmodulus = 1.0 / modulus;
32 x = x * rmodulus;
33 y = y * rmodulus;
34 z = z * rmodulus;
36 We do this for loop invariant divisors, and with this pass whenever
37 we notice that a division has the same divisor multiple times.
39 Of course, like in PRE, we don't insert a division if a dominator
40 already has one. However, this cannot be done as an extension of
41 PRE for several reasons.
43 First of all, with some experiments it was found out that the
44 transformation is not always useful if there are only two divisions
45 by the same divisor. This is probably because modern processors
46 can pipeline the divisions; on older, in-order processors it should
47 still be effective to optimize two divisions by the same number.
48 We make this a param, and it shall be called N in the remainder of
49 this comment.
51 Second, if trapping math is active, we have less freedom on where
52 to insert divisions: we can only do so in basic blocks that already
53 contain one. (If divisions don't trap, instead, we can insert
54 divisions elsewhere, which will be in blocks that are common dominators
55 of those that have the division).
57 We really don't want to compute the reciprocal unless a division will
58 be found. To do this, we won't insert the division in a basic block
59 that has less than N divisions *post-dominating* it.
61 The algorithm constructs a subset of the dominator tree, holding the
62 blocks containing the divisions and the common dominators to them,
63 and walk it twice. The first walk is in post-order, and it annotates
64 each block with the number of divisions that post-dominate it: this
65 gives information on where divisions can be inserted profitably.
66 The second walk is in pre-order, and it inserts divisions as explained
67 above, and replaces divisions by multiplications.
69 In the best case, the cost of the pass is O(n_statements). In the
70 worst-case, the cost is due to creating the dominator tree subset,
71 with a cost of O(n_basic_blocks ^ 2); however this can only happen
72 for n_statements / n_basic_blocks statements. So, the amortized cost
73 of creating the dominator tree subset is O(n_basic_blocks) and the
74 worst-case cost of the pass is O(n_statements * n_basic_blocks).
76 More practically, the cost will be small because there are few
77 divisions, and they tend to be in the same basic block, so insert_bb
78 is called very few times.
80 If we did this using domwalk.c, an efficient implementation would have
81 to work on all the variables in a single pass, because we could not
82 work on just a subset of the dominator tree, as we do now, and the
83 cost would also be something like O(n_statements * n_basic_blocks).
84 The data structures would be more complex in order to work on all the
85 variables in a single pass. */
87 #include "config.h"
88 #include "system.h"
89 #include "coretypes.h"
90 #include "backend.h"
91 #include "target.h"
92 #include "rtl.h"
93 #include "tree.h"
94 #include "gimple.h"
95 #include "predict.h"
96 #include "alloc-pool.h"
97 #include "tree-pass.h"
98 #include "ssa.h"
99 #include "optabs-tree.h"
100 #include "gimple-pretty-print.h"
101 #include "alias.h"
102 #include "fold-const.h"
103 #include "gimple-fold.h"
104 #include "gimple-iterator.h"
105 #include "gimplify.h"
106 #include "gimplify-me.h"
107 #include "stor-layout.h"
108 #include "tree-cfg.h"
109 #include "tree-dfa.h"
110 #include "tree-ssa.h"
111 #include "builtins.h"
112 #include "internal-fn.h"
113 #include "case-cfn-macros.h"
114 #include "optabs-libfuncs.h"
115 #include "tree-eh.h"
116 #include "targhooks.h"
117 #include "domwalk.h"
118 #include "tree-ssa-math-opts.h"
120 /* This structure represents one basic block that either computes a
121 division, or is a common dominator for basic block that compute a
122 division. */
123 struct occurrence {
124 /* The basic block represented by this structure. */
125 basic_block bb = basic_block();
127 /* If non-NULL, the SSA_NAME holding the definition for a reciprocal
128 inserted in BB. */
129 tree recip_def = tree();
131 /* If non-NULL, the SSA_NAME holding the definition for a squared
132 reciprocal inserted in BB. */
133 tree square_recip_def = tree();
135 /* If non-NULL, the GIMPLE_ASSIGN for a reciprocal computation that
136 was inserted in BB. */
137 gimple *recip_def_stmt = nullptr;
139 /* Pointer to a list of "struct occurrence"s for blocks dominated
140 by BB. */
141 struct occurrence *children = nullptr;
143 /* Pointer to the next "struct occurrence"s in the list of blocks
144 sharing a common dominator. */
145 struct occurrence *next = nullptr;
147 /* The number of divisions that are in BB before compute_merit. The
148 number of divisions that are in BB or post-dominate it after
149 compute_merit. */
150 int num_divisions = 0;
152 /* True if the basic block has a division, false if it is a common
153 dominator for basic blocks that do. If it is false and trapping
154 math is active, BB is not a candidate for inserting a reciprocal. */
155 bool bb_has_division = false;
157 /* Construct a struct occurrence for basic block BB, and whose
158 children list is headed by CHILDREN. */
159 occurrence (basic_block bb, struct occurrence *children)
160 : bb (bb), children (children)
162 bb->aux = this;
165 /* Destroy a struct occurrence and remove it from its basic block. */
166 ~occurrence ()
168 bb->aux = nullptr;
171 /* Allocate memory for a struct occurrence from OCC_POOL. */
172 static void* operator new (size_t);
174 /* Return memory for a struct occurrence to OCC_POOL. */
175 static void operator delete (void*, size_t);
178 static struct
180 /* Number of 1.0/X ops inserted. */
181 int rdivs_inserted;
183 /* Number of 1.0/FUNC ops inserted. */
184 int rfuncs_inserted;
185 } reciprocal_stats;
187 static struct
189 /* Number of cexpi calls inserted. */
190 int inserted;
192 /* Number of conversions removed. */
193 int conv_removed;
195 } sincos_stats;
197 static struct
199 /* Number of widening multiplication ops inserted. */
200 int widen_mults_inserted;
202 /* Number of integer multiply-and-accumulate ops inserted. */
203 int maccs_inserted;
205 /* Number of fp fused multiply-add ops inserted. */
206 int fmas_inserted;
208 /* Number of divmod calls inserted. */
209 int divmod_calls_inserted;
211 /* Number of highpart multiplication ops inserted. */
212 int highpart_mults_inserted;
213 } widen_mul_stats;
215 /* The instance of "struct occurrence" representing the highest
216 interesting block in the dominator tree. */
217 static struct occurrence *occ_head;
219 /* Allocation pool for getting instances of "struct occurrence". */
220 static object_allocator<occurrence> *occ_pool;
222 void* occurrence::operator new (size_t n)
224 gcc_assert (n == sizeof(occurrence));
225 return occ_pool->allocate_raw ();
228 void occurrence::operator delete (void *occ, size_t n)
230 gcc_assert (n == sizeof(occurrence));
231 occ_pool->remove_raw (occ);
234 /* Insert NEW_OCC into our subset of the dominator tree. P_HEAD points to a
235 list of "struct occurrence"s, one per basic block, having IDOM as
236 their common dominator.
238 We try to insert NEW_OCC as deep as possible in the tree, and we also
239 insert any other block that is a common dominator for BB and one
240 block already in the tree. */
242 static void
243 insert_bb (struct occurrence *new_occ, basic_block idom,
244 struct occurrence **p_head)
246 struct occurrence *occ, **p_occ;
248 for (p_occ = p_head; (occ = *p_occ) != NULL; )
250 basic_block bb = new_occ->bb, occ_bb = occ->bb;
251 basic_block dom = nearest_common_dominator (CDI_DOMINATORS, occ_bb, bb);
252 if (dom == bb)
254 /* BB dominates OCC_BB. OCC becomes NEW_OCC's child: remove OCC
255 from its list. */
256 *p_occ = occ->next;
257 occ->next = new_occ->children;
258 new_occ->children = occ;
260 /* Try the next block (it may as well be dominated by BB). */
263 else if (dom == occ_bb)
265 /* OCC_BB dominates BB. Tail recurse to look deeper. */
266 insert_bb (new_occ, dom, &occ->children);
267 return;
270 else if (dom != idom)
272 gcc_assert (!dom->aux);
274 /* There is a dominator between IDOM and BB, add it and make
275 two children out of NEW_OCC and OCC. First, remove OCC from
276 its list. */
277 *p_occ = occ->next;
278 new_occ->next = occ;
279 occ->next = NULL;
281 /* None of the previous blocks has DOM as a dominator: if we tail
282 recursed, we would reexamine them uselessly. Just switch BB with
283 DOM, and go on looking for blocks dominated by DOM. */
284 new_occ = new occurrence (dom, new_occ);
287 else
289 /* Nothing special, go on with the next element. */
290 p_occ = &occ->next;
294 /* No place was found as a child of IDOM. Make BB a sibling of IDOM. */
295 new_occ->next = *p_head;
296 *p_head = new_occ;
299 /* Register that we found a division in BB.
300 IMPORTANCE is a measure of how much weighting to give
301 that division. Use IMPORTANCE = 2 to register a single
302 division. If the division is going to be found multiple
303 times use 1 (as it is with squares). */
305 static inline void
306 register_division_in (basic_block bb, int importance)
308 struct occurrence *occ;
310 occ = (struct occurrence *) bb->aux;
311 if (!occ)
313 occ = new occurrence (bb, NULL);
314 insert_bb (occ, ENTRY_BLOCK_PTR_FOR_FN (cfun), &occ_head);
317 occ->bb_has_division = true;
318 occ->num_divisions += importance;
322 /* Compute the number of divisions that postdominate each block in OCC and
323 its children. */
325 static void
326 compute_merit (struct occurrence *occ)
328 struct occurrence *occ_child;
329 basic_block dom = occ->bb;
331 for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
333 basic_block bb;
334 if (occ_child->children)
335 compute_merit (occ_child);
337 if (flag_exceptions)
338 bb = single_noncomplex_succ (dom);
339 else
340 bb = dom;
342 if (dominated_by_p (CDI_POST_DOMINATORS, bb, occ_child->bb))
343 occ->num_divisions += occ_child->num_divisions;
348 /* Return whether USE_STMT is a floating-point division by DEF. */
349 static inline bool
350 is_division_by (gimple *use_stmt, tree def)
352 return is_gimple_assign (use_stmt)
353 && gimple_assign_rhs_code (use_stmt) == RDIV_EXPR
354 && gimple_assign_rhs2 (use_stmt) == def
355 /* Do not recognize x / x as valid division, as we are getting
356 confused later by replacing all immediate uses x in such
357 a stmt. */
358 && gimple_assign_rhs1 (use_stmt) != def
359 && !stmt_can_throw_internal (cfun, use_stmt);
362 /* Return TRUE if USE_STMT is a multiplication of DEF by A. */
363 static inline bool
364 is_mult_by (gimple *use_stmt, tree def, tree a)
366 if (gimple_code (use_stmt) == GIMPLE_ASSIGN
367 && gimple_assign_rhs_code (use_stmt) == MULT_EXPR)
369 tree op0 = gimple_assign_rhs1 (use_stmt);
370 tree op1 = gimple_assign_rhs2 (use_stmt);
372 return (op0 == def && op1 == a)
373 || (op0 == a && op1 == def);
375 return 0;
378 /* Return whether USE_STMT is DEF * DEF. */
379 static inline bool
380 is_square_of (gimple *use_stmt, tree def)
382 return is_mult_by (use_stmt, def, def);
385 /* Return whether USE_STMT is a floating-point division by
386 DEF * DEF. */
387 static inline bool
388 is_division_by_square (gimple *use_stmt, tree def)
390 if (gimple_code (use_stmt) == GIMPLE_ASSIGN
391 && gimple_assign_rhs_code (use_stmt) == RDIV_EXPR
392 && gimple_assign_rhs1 (use_stmt) != gimple_assign_rhs2 (use_stmt)
393 && !stmt_can_throw_internal (cfun, use_stmt))
395 tree denominator = gimple_assign_rhs2 (use_stmt);
396 if (TREE_CODE (denominator) == SSA_NAME)
397 return is_square_of (SSA_NAME_DEF_STMT (denominator), def);
399 return 0;
402 /* Walk the subset of the dominator tree rooted at OCC, setting the
403 RECIP_DEF field to a definition of 1.0 / DEF that can be used in
404 the given basic block. The field may be left NULL, of course,
405 if it is not possible or profitable to do the optimization.
407 DEF_BSI is an iterator pointing at the statement defining DEF.
408 If RECIP_DEF is set, a dominator already has a computation that can
409 be used.
411 If should_insert_square_recip is set, then this also inserts
412 the square of the reciprocal immediately after the definition
413 of the reciprocal. */
415 static void
416 insert_reciprocals (gimple_stmt_iterator *def_gsi, struct occurrence *occ,
417 tree def, tree recip_def, tree square_recip_def,
418 int should_insert_square_recip, int threshold)
420 tree type;
421 gassign *new_stmt, *new_square_stmt;
422 gimple_stmt_iterator gsi;
423 struct occurrence *occ_child;
425 if (!recip_def
426 && (occ->bb_has_division || !flag_trapping_math)
427 /* Divide by two as all divisions are counted twice in
428 the costing loop. */
429 && occ->num_divisions / 2 >= threshold)
431 /* Make a variable with the replacement and substitute it. */
432 type = TREE_TYPE (def);
433 recip_def = create_tmp_reg (type, "reciptmp");
434 new_stmt = gimple_build_assign (recip_def, RDIV_EXPR,
435 build_one_cst (type), def);
437 if (should_insert_square_recip)
439 square_recip_def = create_tmp_reg (type, "powmult_reciptmp");
440 new_square_stmt = gimple_build_assign (square_recip_def, MULT_EXPR,
441 recip_def, recip_def);
444 if (occ->bb_has_division)
446 /* Case 1: insert before an existing division. */
447 gsi = gsi_after_labels (occ->bb);
448 while (!gsi_end_p (gsi)
449 && (!is_division_by (gsi_stmt (gsi), def))
450 && (!is_division_by_square (gsi_stmt (gsi), def)))
451 gsi_next (&gsi);
453 gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
454 if (should_insert_square_recip)
455 gsi_insert_before (&gsi, new_square_stmt, GSI_SAME_STMT);
457 else if (def_gsi && occ->bb == gsi_bb (*def_gsi))
459 /* Case 2: insert right after the definition. Note that this will
460 never happen if the definition statement can throw, because in
461 that case the sole successor of the statement's basic block will
462 dominate all the uses as well. */
463 gsi_insert_after (def_gsi, new_stmt, GSI_NEW_STMT);
464 if (should_insert_square_recip)
465 gsi_insert_after (def_gsi, new_square_stmt, GSI_NEW_STMT);
467 else
469 /* Case 3: insert in a basic block not containing defs/uses. */
470 gsi = gsi_after_labels (occ->bb);
471 gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
472 if (should_insert_square_recip)
473 gsi_insert_before (&gsi, new_square_stmt, GSI_SAME_STMT);
476 reciprocal_stats.rdivs_inserted++;
478 occ->recip_def_stmt = new_stmt;
481 occ->recip_def = recip_def;
482 occ->square_recip_def = square_recip_def;
483 for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
484 insert_reciprocals (def_gsi, occ_child, def, recip_def,
485 square_recip_def, should_insert_square_recip,
486 threshold);
489 /* Replace occurrences of expr / (x * x) with expr * ((1 / x) * (1 / x)).
490 Take as argument the use for (x * x). */
491 static inline void
492 replace_reciprocal_squares (use_operand_p use_p)
494 gimple *use_stmt = USE_STMT (use_p);
495 basic_block bb = gimple_bb (use_stmt);
496 struct occurrence *occ = (struct occurrence *) bb->aux;
498 if (optimize_bb_for_speed_p (bb) && occ->square_recip_def
499 && occ->recip_def)
501 gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
502 gimple_assign_set_rhs_code (use_stmt, MULT_EXPR);
503 gimple_assign_set_rhs2 (use_stmt, occ->square_recip_def);
504 SET_USE (use_p, occ->square_recip_def);
505 fold_stmt_inplace (&gsi);
506 update_stmt (use_stmt);
511 /* Replace the division at USE_P with a multiplication by the reciprocal, if
512 possible. */
514 static inline void
515 replace_reciprocal (use_operand_p use_p)
517 gimple *use_stmt = USE_STMT (use_p);
518 basic_block bb = gimple_bb (use_stmt);
519 struct occurrence *occ = (struct occurrence *) bb->aux;
521 if (optimize_bb_for_speed_p (bb)
522 && occ->recip_def && use_stmt != occ->recip_def_stmt)
524 gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
525 gimple_assign_set_rhs_code (use_stmt, MULT_EXPR);
526 SET_USE (use_p, occ->recip_def);
527 fold_stmt_inplace (&gsi);
528 update_stmt (use_stmt);
533 /* Free OCC and return one more "struct occurrence" to be freed. */
535 static struct occurrence *
536 free_bb (struct occurrence *occ)
538 struct occurrence *child, *next;
540 /* First get the two pointers hanging off OCC. */
541 next = occ->next;
542 child = occ->children;
543 delete occ;
545 /* Now ensure that we don't recurse unless it is necessary. */
546 if (!child)
547 return next;
548 else
550 while (next)
551 next = free_bb (next);
553 return child;
557 /* Transform sequences like
558 t = sqrt (a)
559 x = 1.0 / t;
560 r1 = x * x;
561 r2 = a * x;
562 into:
563 t = sqrt (a)
564 r1 = 1.0 / a;
565 r2 = t;
566 x = r1 * r2;
567 depending on the uses of x, r1, r2. This removes one multiplication and
568 allows the sqrt and division operations to execute in parallel.
569 DEF_GSI is the gsi of the initial division by sqrt that defines
570 DEF (x in the example above). */
572 static void
573 optimize_recip_sqrt (gimple_stmt_iterator *def_gsi, tree def)
575 gimple *use_stmt;
576 imm_use_iterator use_iter;
577 gimple *stmt = gsi_stmt (*def_gsi);
578 tree x = def;
579 tree orig_sqrt_ssa_name = gimple_assign_rhs2 (stmt);
580 tree div_rhs1 = gimple_assign_rhs1 (stmt);
582 if (TREE_CODE (orig_sqrt_ssa_name) != SSA_NAME
583 || TREE_CODE (div_rhs1) != REAL_CST
584 || !real_equal (&TREE_REAL_CST (div_rhs1), &dconst1))
585 return;
587 gcall *sqrt_stmt
588 = dyn_cast <gcall *> (SSA_NAME_DEF_STMT (orig_sqrt_ssa_name));
590 if (!sqrt_stmt || !gimple_call_lhs (sqrt_stmt))
591 return;
593 switch (gimple_call_combined_fn (sqrt_stmt))
595 CASE_CFN_SQRT:
596 CASE_CFN_SQRT_FN:
597 break;
599 default:
600 return;
602 tree a = gimple_call_arg (sqrt_stmt, 0);
604 /* We have 'a' and 'x'. Now analyze the uses of 'x'. */
606 /* Statements that use x in x * x. */
607 auto_vec<gimple *> sqr_stmts;
608 /* Statements that use x in a * x. */
609 auto_vec<gimple *> mult_stmts;
610 bool has_other_use = false;
611 bool mult_on_main_path = false;
613 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, x)
615 if (is_gimple_debug (use_stmt))
616 continue;
617 if (is_square_of (use_stmt, x))
619 sqr_stmts.safe_push (use_stmt);
620 if (gimple_bb (use_stmt) == gimple_bb (stmt))
621 mult_on_main_path = true;
623 else if (is_mult_by (use_stmt, x, a))
625 mult_stmts.safe_push (use_stmt);
626 if (gimple_bb (use_stmt) == gimple_bb (stmt))
627 mult_on_main_path = true;
629 else
630 has_other_use = true;
633 /* In the x * x and a * x cases we just rewire stmt operands or
634 remove multiplications. In the has_other_use case we introduce
635 a multiplication so make sure we don't introduce a multiplication
636 on a path where there was none. */
637 if (has_other_use && !mult_on_main_path)
638 return;
640 if (sqr_stmts.is_empty () && mult_stmts.is_empty ())
641 return;
643 /* If x = 1.0 / sqrt (a) has uses other than those optimized here we want
644 to be able to compose it from the sqr and mult cases. */
645 if (has_other_use && (sqr_stmts.is_empty () || mult_stmts.is_empty ()))
646 return;
648 if (dump_file)
650 fprintf (dump_file, "Optimizing reciprocal sqrt multiplications of\n");
651 print_gimple_stmt (dump_file, sqrt_stmt, 0, TDF_NONE);
652 print_gimple_stmt (dump_file, stmt, 0, TDF_NONE);
653 fprintf (dump_file, "\n");
656 bool delete_div = !has_other_use;
657 tree sqr_ssa_name = NULL_TREE;
658 if (!sqr_stmts.is_empty ())
660 /* r1 = x * x. Transform the original
661 x = 1.0 / t
662 into
663 tmp1 = 1.0 / a
664 r1 = tmp1. */
666 sqr_ssa_name
667 = make_temp_ssa_name (TREE_TYPE (a), NULL, "recip_sqrt_sqr");
669 if (dump_file)
671 fprintf (dump_file, "Replacing original division\n");
672 print_gimple_stmt (dump_file, stmt, 0, TDF_NONE);
673 fprintf (dump_file, "with new division\n");
675 stmt
676 = gimple_build_assign (sqr_ssa_name, gimple_assign_rhs_code (stmt),
677 gimple_assign_rhs1 (stmt), a);
678 gsi_insert_before (def_gsi, stmt, GSI_SAME_STMT);
679 gsi_remove (def_gsi, true);
680 *def_gsi = gsi_for_stmt (stmt);
681 fold_stmt_inplace (def_gsi);
682 update_stmt (stmt);
684 if (dump_file)
685 print_gimple_stmt (dump_file, stmt, 0, TDF_NONE);
687 delete_div = false;
688 gimple *sqr_stmt;
689 unsigned int i;
690 FOR_EACH_VEC_ELT (sqr_stmts, i, sqr_stmt)
692 gimple_stmt_iterator gsi2 = gsi_for_stmt (sqr_stmt);
693 gimple_assign_set_rhs_from_tree (&gsi2, sqr_ssa_name);
694 update_stmt (sqr_stmt);
697 if (!mult_stmts.is_empty ())
699 /* r2 = a * x. Transform this into:
700 r2 = t (The original sqrt (a)). */
701 unsigned int i;
702 gimple *mult_stmt = NULL;
703 FOR_EACH_VEC_ELT (mult_stmts, i, mult_stmt)
705 gimple_stmt_iterator gsi2 = gsi_for_stmt (mult_stmt);
707 if (dump_file)
709 fprintf (dump_file, "Replacing squaring multiplication\n");
710 print_gimple_stmt (dump_file, mult_stmt, 0, TDF_NONE);
711 fprintf (dump_file, "with assignment\n");
713 gimple_assign_set_rhs_from_tree (&gsi2, orig_sqrt_ssa_name);
714 fold_stmt_inplace (&gsi2);
715 update_stmt (mult_stmt);
716 if (dump_file)
717 print_gimple_stmt (dump_file, mult_stmt, 0, TDF_NONE);
721 if (has_other_use)
723 /* Using the two temporaries tmp1, tmp2 from above
724 the original x is now:
725 x = tmp1 * tmp2. */
726 gcc_assert (orig_sqrt_ssa_name);
727 gcc_assert (sqr_ssa_name);
729 gimple *new_stmt
730 = gimple_build_assign (x, MULT_EXPR,
731 orig_sqrt_ssa_name, sqr_ssa_name);
732 gsi_insert_after (def_gsi, new_stmt, GSI_NEW_STMT);
733 update_stmt (stmt);
735 else if (delete_div)
737 /* Remove the original division. */
738 gimple_stmt_iterator gsi2 = gsi_for_stmt (stmt);
739 gsi_remove (&gsi2, true);
740 release_defs (stmt);
742 else
743 release_ssa_name (x);
746 /* Look for floating-point divisions among DEF's uses, and try to
747 replace them by multiplications with the reciprocal. Add
748 as many statements computing the reciprocal as needed.
750 DEF must be a GIMPLE register of a floating-point type. */
752 static void
753 execute_cse_reciprocals_1 (gimple_stmt_iterator *def_gsi, tree def)
755 use_operand_p use_p, square_use_p;
756 imm_use_iterator use_iter, square_use_iter;
757 tree square_def;
758 struct occurrence *occ;
759 int count = 0;
760 int threshold;
761 int square_recip_count = 0;
762 int sqrt_recip_count = 0;
764 gcc_assert (FLOAT_TYPE_P (TREE_TYPE (def)) && TREE_CODE (def) == SSA_NAME);
765 threshold = targetm.min_divisions_for_recip_mul (TYPE_MODE (TREE_TYPE (def)));
767 /* If DEF is a square (x * x), count the number of divisions by x.
768 If there are more divisions by x than by (DEF * DEF), prefer to optimize
769 the reciprocal of x instead of DEF. This improves cases like:
770 def = x * x
771 t0 = a / def
772 t1 = b / def
773 t2 = c / x
774 Reciprocal optimization of x results in 1 division rather than 2 or 3. */
775 gimple *def_stmt = SSA_NAME_DEF_STMT (def);
777 if (is_gimple_assign (def_stmt)
778 && gimple_assign_rhs_code (def_stmt) == MULT_EXPR
779 && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME
780 && gimple_assign_rhs1 (def_stmt) == gimple_assign_rhs2 (def_stmt))
782 tree op0 = gimple_assign_rhs1 (def_stmt);
784 FOR_EACH_IMM_USE_FAST (use_p, use_iter, op0)
786 gimple *use_stmt = USE_STMT (use_p);
787 if (is_division_by (use_stmt, op0))
788 sqrt_recip_count++;
792 FOR_EACH_IMM_USE_FAST (use_p, use_iter, def)
794 gimple *use_stmt = USE_STMT (use_p);
795 if (is_division_by (use_stmt, def))
797 register_division_in (gimple_bb (use_stmt), 2);
798 count++;
801 if (is_square_of (use_stmt, def))
803 square_def = gimple_assign_lhs (use_stmt);
804 FOR_EACH_IMM_USE_FAST (square_use_p, square_use_iter, square_def)
806 gimple *square_use_stmt = USE_STMT (square_use_p);
807 if (is_division_by (square_use_stmt, square_def))
809 /* This is executed twice for each division by a square. */
810 register_division_in (gimple_bb (square_use_stmt), 1);
811 square_recip_count++;
817 /* Square reciprocals were counted twice above. */
818 square_recip_count /= 2;
820 /* If it is more profitable to optimize 1 / x, don't optimize 1 / (x * x). */
821 if (sqrt_recip_count > square_recip_count)
822 goto out;
824 /* Do the expensive part only if we can hope to optimize something. */
825 if (count + square_recip_count >= threshold && count >= 1)
827 gimple *use_stmt;
828 for (occ = occ_head; occ; occ = occ->next)
830 compute_merit (occ);
831 insert_reciprocals (def_gsi, occ, def, NULL, NULL,
832 square_recip_count, threshold);
835 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, def)
837 if (is_division_by (use_stmt, def))
839 FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter)
840 replace_reciprocal (use_p);
842 else if (square_recip_count > 0 && is_square_of (use_stmt, def))
844 FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter)
846 /* Find all uses of the square that are divisions and
847 * replace them by multiplications with the inverse. */
848 imm_use_iterator square_iterator;
849 gimple *powmult_use_stmt = USE_STMT (use_p);
850 tree powmult_def_name = gimple_assign_lhs (powmult_use_stmt);
852 FOR_EACH_IMM_USE_STMT (powmult_use_stmt,
853 square_iterator, powmult_def_name)
854 FOR_EACH_IMM_USE_ON_STMT (square_use_p, square_iterator)
856 gimple *powmult_use_stmt = USE_STMT (square_use_p);
857 if (is_division_by (powmult_use_stmt, powmult_def_name))
858 replace_reciprocal_squares (square_use_p);
865 out:
866 for (occ = occ_head; occ; )
867 occ = free_bb (occ);
869 occ_head = NULL;
872 /* Return an internal function that implements the reciprocal of CALL,
873 or IFN_LAST if there is no such function that the target supports. */
875 internal_fn
876 internal_fn_reciprocal (gcall *call)
878 internal_fn ifn;
880 switch (gimple_call_combined_fn (call))
882 CASE_CFN_SQRT:
883 CASE_CFN_SQRT_FN:
884 ifn = IFN_RSQRT;
885 break;
887 default:
888 return IFN_LAST;
891 tree_pair types = direct_internal_fn_types (ifn, call);
892 if (!direct_internal_fn_supported_p (ifn, types, OPTIMIZE_FOR_SPEED))
893 return IFN_LAST;
895 return ifn;
898 /* Go through all the floating-point SSA_NAMEs, and call
899 execute_cse_reciprocals_1 on each of them. */
900 namespace {
902 const pass_data pass_data_cse_reciprocals =
904 GIMPLE_PASS, /* type */
905 "recip", /* name */
906 OPTGROUP_NONE, /* optinfo_flags */
907 TV_TREE_RECIP, /* tv_id */
908 PROP_ssa, /* properties_required */
909 0, /* properties_provided */
910 0, /* properties_destroyed */
911 0, /* todo_flags_start */
912 TODO_update_ssa, /* todo_flags_finish */
915 class pass_cse_reciprocals : public gimple_opt_pass
917 public:
918 pass_cse_reciprocals (gcc::context *ctxt)
919 : gimple_opt_pass (pass_data_cse_reciprocals, ctxt)
922 /* opt_pass methods: */
923 virtual bool gate (function *) { return optimize && flag_reciprocal_math; }
924 virtual unsigned int execute (function *);
926 }; // class pass_cse_reciprocals
928 unsigned int
929 pass_cse_reciprocals::execute (function *fun)
931 basic_block bb;
932 tree arg;
934 occ_pool = new object_allocator<occurrence> ("dominators for recip");
936 memset (&reciprocal_stats, 0, sizeof (reciprocal_stats));
937 calculate_dominance_info (CDI_DOMINATORS);
938 calculate_dominance_info (CDI_POST_DOMINATORS);
940 if (flag_checking)
941 FOR_EACH_BB_FN (bb, fun)
942 gcc_assert (!bb->aux);
944 for (arg = DECL_ARGUMENTS (fun->decl); arg; arg = DECL_CHAIN (arg))
945 if (FLOAT_TYPE_P (TREE_TYPE (arg))
946 && is_gimple_reg (arg))
948 tree name = ssa_default_def (fun, arg);
949 if (name)
950 execute_cse_reciprocals_1 (NULL, name);
953 FOR_EACH_BB_FN (bb, fun)
955 tree def;
957 for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
958 gsi_next (&gsi))
960 gphi *phi = gsi.phi ();
961 def = PHI_RESULT (phi);
962 if (! virtual_operand_p (def)
963 && FLOAT_TYPE_P (TREE_TYPE (def)))
964 execute_cse_reciprocals_1 (NULL, def);
967 for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (gsi);
968 gsi_next (&gsi))
970 gimple *stmt = gsi_stmt (gsi);
972 if (gimple_has_lhs (stmt)
973 && (def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF)) != NULL
974 && FLOAT_TYPE_P (TREE_TYPE (def))
975 && TREE_CODE (def) == SSA_NAME)
977 execute_cse_reciprocals_1 (&gsi, def);
978 stmt = gsi_stmt (gsi);
979 if (flag_unsafe_math_optimizations
980 && is_gimple_assign (stmt)
981 && gimple_assign_lhs (stmt) == def
982 && !stmt_can_throw_internal (cfun, stmt)
983 && gimple_assign_rhs_code (stmt) == RDIV_EXPR)
984 optimize_recip_sqrt (&gsi, def);
988 if (optimize_bb_for_size_p (bb))
989 continue;
991 /* Scan for a/func(b) and convert it to reciprocal a*rfunc(b). */
992 for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (gsi);
993 gsi_next (&gsi))
995 gimple *stmt = gsi_stmt (gsi);
997 if (is_gimple_assign (stmt)
998 && gimple_assign_rhs_code (stmt) == RDIV_EXPR)
1000 tree arg1 = gimple_assign_rhs2 (stmt);
1001 gimple *stmt1;
1003 if (TREE_CODE (arg1) != SSA_NAME)
1004 continue;
1006 stmt1 = SSA_NAME_DEF_STMT (arg1);
1008 if (is_gimple_call (stmt1)
1009 && gimple_call_lhs (stmt1))
1011 bool fail;
1012 imm_use_iterator ui;
1013 use_operand_p use_p;
1014 tree fndecl = NULL_TREE;
1016 gcall *call = as_a <gcall *> (stmt1);
1017 internal_fn ifn = internal_fn_reciprocal (call);
1018 if (ifn == IFN_LAST)
1020 fndecl = gimple_call_fndecl (call);
1021 if (!fndecl
1022 || !fndecl_built_in_p (fndecl, BUILT_IN_MD))
1023 continue;
1024 fndecl = targetm.builtin_reciprocal (fndecl);
1025 if (!fndecl)
1026 continue;
1029 /* Check that all uses of the SSA name are divisions,
1030 otherwise replacing the defining statement will do
1031 the wrong thing. */
1032 fail = false;
1033 FOR_EACH_IMM_USE_FAST (use_p, ui, arg1)
1035 gimple *stmt2 = USE_STMT (use_p);
1036 if (is_gimple_debug (stmt2))
1037 continue;
1038 if (!is_gimple_assign (stmt2)
1039 || gimple_assign_rhs_code (stmt2) != RDIV_EXPR
1040 || gimple_assign_rhs1 (stmt2) == arg1
1041 || gimple_assign_rhs2 (stmt2) != arg1)
1043 fail = true;
1044 break;
1047 if (fail)
1048 continue;
1050 gimple_replace_ssa_lhs (call, arg1);
1051 if (gimple_call_internal_p (call) != (ifn != IFN_LAST))
1053 auto_vec<tree, 4> args;
1054 for (unsigned int i = 0;
1055 i < gimple_call_num_args (call); i++)
1056 args.safe_push (gimple_call_arg (call, i));
1057 gcall *stmt2;
1058 if (ifn == IFN_LAST)
1059 stmt2 = gimple_build_call_vec (fndecl, args);
1060 else
1061 stmt2 = gimple_build_call_internal_vec (ifn, args);
1062 gimple_call_set_lhs (stmt2, arg1);
1063 gimple_move_vops (stmt2, call);
1064 gimple_call_set_nothrow (stmt2,
1065 gimple_call_nothrow_p (call));
1066 gimple_stmt_iterator gsi2 = gsi_for_stmt (call);
1067 gsi_replace (&gsi2, stmt2, true);
1069 else
1071 if (ifn == IFN_LAST)
1072 gimple_call_set_fndecl (call, fndecl);
1073 else
1074 gimple_call_set_internal_fn (call, ifn);
1075 update_stmt (call);
1077 reciprocal_stats.rfuncs_inserted++;
1079 FOR_EACH_IMM_USE_STMT (stmt, ui, arg1)
1081 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1082 gimple_assign_set_rhs_code (stmt, MULT_EXPR);
1083 fold_stmt_inplace (&gsi);
1084 update_stmt (stmt);
1091 statistics_counter_event (fun, "reciprocal divs inserted",
1092 reciprocal_stats.rdivs_inserted);
1093 statistics_counter_event (fun, "reciprocal functions inserted",
1094 reciprocal_stats.rfuncs_inserted);
1096 free_dominance_info (CDI_DOMINATORS);
1097 free_dominance_info (CDI_POST_DOMINATORS);
1098 delete occ_pool;
1099 return 0;
1102 } // anon namespace
1104 gimple_opt_pass *
1105 make_pass_cse_reciprocals (gcc::context *ctxt)
1107 return new pass_cse_reciprocals (ctxt);
1110 /* If NAME is the result of a type conversion, look for other
1111 equivalent dominating or dominated conversions, and replace all
1112 uses with the earliest dominating name, removing the redundant
1113 conversions. Return the prevailing name. */
1115 static tree
1116 execute_cse_conv_1 (tree name)
1118 if (SSA_NAME_IS_DEFAULT_DEF (name)
1119 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (name))
1120 return name;
1122 gimple *def_stmt = SSA_NAME_DEF_STMT (name);
1124 if (!gimple_assign_cast_p (def_stmt))
1125 return name;
1127 tree src = gimple_assign_rhs1 (def_stmt);
1129 if (TREE_CODE (src) != SSA_NAME)
1130 return name;
1132 imm_use_iterator use_iter;
1133 gimple *use_stmt;
1135 /* Find the earliest dominating def. */
1136 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, src)
1138 if (use_stmt == def_stmt
1139 || !gimple_assign_cast_p (use_stmt))
1140 continue;
1142 tree lhs = gimple_assign_lhs (use_stmt);
1144 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs)
1145 || (gimple_assign_rhs1 (use_stmt)
1146 != gimple_assign_rhs1 (def_stmt))
1147 || !types_compatible_p (TREE_TYPE (name), TREE_TYPE (lhs)))
1148 continue;
1150 bool use_dominates;
1151 if (gimple_bb (def_stmt) == gimple_bb (use_stmt))
1153 gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
1154 while (!gsi_end_p (gsi) && gsi_stmt (gsi) != def_stmt)
1155 gsi_next (&gsi);
1156 use_dominates = !gsi_end_p (gsi);
1158 else if (dominated_by_p (CDI_DOMINATORS, gimple_bb (use_stmt),
1159 gimple_bb (def_stmt)))
1160 use_dominates = false;
1161 else if (dominated_by_p (CDI_DOMINATORS, gimple_bb (def_stmt),
1162 gimple_bb (use_stmt)))
1163 use_dominates = true;
1164 else
1165 continue;
1167 if (use_dominates)
1169 std::swap (name, lhs);
1170 std::swap (def_stmt, use_stmt);
1174 /* Now go through all uses of SRC again, replacing the equivalent
1175 dominated conversions. We may replace defs that were not
1176 dominated by the then-prevailing defs when we first visited
1177 them. */
1178 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, src)
1180 if (use_stmt == def_stmt
1181 || !gimple_assign_cast_p (use_stmt))
1182 continue;
1184 tree lhs = gimple_assign_lhs (use_stmt);
1186 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs)
1187 || (gimple_assign_rhs1 (use_stmt)
1188 != gimple_assign_rhs1 (def_stmt))
1189 || !types_compatible_p (TREE_TYPE (name), TREE_TYPE (lhs)))
1190 continue;
1192 if (gimple_bb (def_stmt) == gimple_bb (use_stmt)
1193 || dominated_by_p (CDI_DOMINATORS, gimple_bb (use_stmt),
1194 gimple_bb (def_stmt)))
1196 sincos_stats.conv_removed++;
1198 gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
1199 replace_uses_by (lhs, name);
1200 gsi_remove (&gsi, true);
1204 return name;
1207 /* Records an occurrence at statement USE_STMT in the vector of trees
1208 STMTS if it is dominated by *TOP_BB or dominates it or this basic block
1209 is not yet initialized. Returns true if the occurrence was pushed on
1210 the vector. Adjusts *TOP_BB to be the basic block dominating all
1211 statements in the vector. */
1213 static bool
1214 maybe_record_sincos (vec<gimple *> *stmts,
1215 basic_block *top_bb, gimple *use_stmt)
1217 basic_block use_bb = gimple_bb (use_stmt);
1218 if (*top_bb
1219 && (*top_bb == use_bb
1220 || dominated_by_p (CDI_DOMINATORS, use_bb, *top_bb)))
1221 stmts->safe_push (use_stmt);
1222 else if (!*top_bb
1223 || dominated_by_p (CDI_DOMINATORS, *top_bb, use_bb))
1225 stmts->safe_push (use_stmt);
1226 *top_bb = use_bb;
1228 else
1229 return false;
1231 return true;
1234 /* Look for sin, cos and cexpi calls with the same argument NAME and
1235 create a single call to cexpi CSEing the result in this case.
1236 We first walk over all immediate uses of the argument collecting
1237 statements that we can CSE in a vector and in a second pass replace
1238 the statement rhs with a REALPART or IMAGPART expression on the
1239 result of the cexpi call we insert before the use statement that
1240 dominates all other candidates. */
1242 static bool
1243 execute_cse_sincos_1 (tree name)
1245 gimple_stmt_iterator gsi;
1246 imm_use_iterator use_iter;
1247 tree fndecl, res, type = NULL_TREE;
1248 gimple *def_stmt, *use_stmt, *stmt;
1249 int seen_cos = 0, seen_sin = 0, seen_cexpi = 0;
1250 auto_vec<gimple *> stmts;
1251 basic_block top_bb = NULL;
1252 int i;
1253 bool cfg_changed = false;
1255 name = execute_cse_conv_1 (name);
1257 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, name)
1259 if (gimple_code (use_stmt) != GIMPLE_CALL
1260 || !gimple_call_lhs (use_stmt))
1261 continue;
1263 switch (gimple_call_combined_fn (use_stmt))
1265 CASE_CFN_COS:
1266 seen_cos |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
1267 break;
1269 CASE_CFN_SIN:
1270 seen_sin |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
1271 break;
1273 CASE_CFN_CEXPI:
1274 seen_cexpi |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
1275 break;
1277 default:;
1278 continue;
1281 tree t = mathfn_built_in_type (gimple_call_combined_fn (use_stmt));
1282 if (!type)
1284 type = t;
1285 t = TREE_TYPE (name);
1287 /* This checks that NAME has the right type in the first round,
1288 and, in subsequent rounds, that the built_in type is the same
1289 type, or a compatible type. */
1290 if (type != t && !types_compatible_p (type, t))
1291 return false;
1293 if (seen_cos + seen_sin + seen_cexpi <= 1)
1294 return false;
1296 /* Simply insert cexpi at the beginning of top_bb but not earlier than
1297 the name def statement. */
1298 fndecl = mathfn_built_in (type, BUILT_IN_CEXPI);
1299 if (!fndecl)
1300 return false;
1301 stmt = gimple_build_call (fndecl, 1, name);
1302 res = make_temp_ssa_name (TREE_TYPE (TREE_TYPE (fndecl)), stmt, "sincostmp");
1303 gimple_call_set_lhs (stmt, res);
1305 def_stmt = SSA_NAME_DEF_STMT (name);
1306 if (!SSA_NAME_IS_DEFAULT_DEF (name)
1307 && gimple_code (def_stmt) != GIMPLE_PHI
1308 && gimple_bb (def_stmt) == top_bb)
1310 gsi = gsi_for_stmt (def_stmt);
1311 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
1313 else
1315 gsi = gsi_after_labels (top_bb);
1316 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1318 sincos_stats.inserted++;
1320 /* And adjust the recorded old call sites. */
1321 for (i = 0; stmts.iterate (i, &use_stmt); ++i)
1323 tree rhs = NULL;
1325 switch (gimple_call_combined_fn (use_stmt))
1327 CASE_CFN_COS:
1328 rhs = fold_build1 (REALPART_EXPR, type, res);
1329 break;
1331 CASE_CFN_SIN:
1332 rhs = fold_build1 (IMAGPART_EXPR, type, res);
1333 break;
1335 CASE_CFN_CEXPI:
1336 rhs = res;
1337 break;
1339 default:;
1340 gcc_unreachable ();
1343 /* Replace call with a copy. */
1344 stmt = gimple_build_assign (gimple_call_lhs (use_stmt), rhs);
1346 gsi = gsi_for_stmt (use_stmt);
1347 gsi_replace (&gsi, stmt, true);
1348 if (gimple_purge_dead_eh_edges (gimple_bb (stmt)))
1349 cfg_changed = true;
1352 return cfg_changed;
1355 /* To evaluate powi(x,n), the floating point value x raised to the
1356 constant integer exponent n, we use a hybrid algorithm that
1357 combines the "window method" with look-up tables. For an
1358 introduction to exponentiation algorithms and "addition chains",
1359 see section 4.6.3, "Evaluation of Powers" of Donald E. Knuth,
1360 "Seminumerical Algorithms", Vol. 2, "The Art of Computer Programming",
1361 3rd Edition, 1998, and Daniel M. Gordon, "A Survey of Fast Exponentiation
1362 Methods", Journal of Algorithms, Vol. 27, pp. 129-146, 1998. */
1364 /* Provide a default value for POWI_MAX_MULTS, the maximum number of
1365 multiplications to inline before calling the system library's pow
1366 function. powi(x,n) requires at worst 2*bits(n)-2 multiplications,
1367 so this default never requires calling pow, powf or powl. */
1369 #ifndef POWI_MAX_MULTS
1370 #define POWI_MAX_MULTS (2*HOST_BITS_PER_WIDE_INT-2)
1371 #endif
1373 /* The size of the "optimal power tree" lookup table. All
1374 exponents less than this value are simply looked up in the
1375 powi_table below. This threshold is also used to size the
1376 cache of pseudo registers that hold intermediate results. */
1377 #define POWI_TABLE_SIZE 256
1379 /* The size, in bits of the window, used in the "window method"
1380 exponentiation algorithm. This is equivalent to a radix of
1381 (1<<POWI_WINDOW_SIZE) in the corresponding "m-ary method". */
1382 #define POWI_WINDOW_SIZE 3
1384 /* The following table is an efficient representation of an
1385 "optimal power tree". For each value, i, the corresponding
1386 value, j, in the table states than an optimal evaluation
1387 sequence for calculating pow(x,i) can be found by evaluating
1388 pow(x,j)*pow(x,i-j). An optimal power tree for the first
1389 100 integers is given in Knuth's "Seminumerical algorithms". */
1391 static const unsigned char powi_table[POWI_TABLE_SIZE] =
1393 0, 1, 1, 2, 2, 3, 3, 4, /* 0 - 7 */
1394 4, 6, 5, 6, 6, 10, 7, 9, /* 8 - 15 */
1395 8, 16, 9, 16, 10, 12, 11, 13, /* 16 - 23 */
1396 12, 17, 13, 18, 14, 24, 15, 26, /* 24 - 31 */
1397 16, 17, 17, 19, 18, 33, 19, 26, /* 32 - 39 */
1398 20, 25, 21, 40, 22, 27, 23, 44, /* 40 - 47 */
1399 24, 32, 25, 34, 26, 29, 27, 44, /* 48 - 55 */
1400 28, 31, 29, 34, 30, 60, 31, 36, /* 56 - 63 */
1401 32, 64, 33, 34, 34, 46, 35, 37, /* 64 - 71 */
1402 36, 65, 37, 50, 38, 48, 39, 69, /* 72 - 79 */
1403 40, 49, 41, 43, 42, 51, 43, 58, /* 80 - 87 */
1404 44, 64, 45, 47, 46, 59, 47, 76, /* 88 - 95 */
1405 48, 65, 49, 66, 50, 67, 51, 66, /* 96 - 103 */
1406 52, 70, 53, 74, 54, 104, 55, 74, /* 104 - 111 */
1407 56, 64, 57, 69, 58, 78, 59, 68, /* 112 - 119 */
1408 60, 61, 61, 80, 62, 75, 63, 68, /* 120 - 127 */
1409 64, 65, 65, 128, 66, 129, 67, 90, /* 128 - 135 */
1410 68, 73, 69, 131, 70, 94, 71, 88, /* 136 - 143 */
1411 72, 128, 73, 98, 74, 132, 75, 121, /* 144 - 151 */
1412 76, 102, 77, 124, 78, 132, 79, 106, /* 152 - 159 */
1413 80, 97, 81, 160, 82, 99, 83, 134, /* 160 - 167 */
1414 84, 86, 85, 95, 86, 160, 87, 100, /* 168 - 175 */
1415 88, 113, 89, 98, 90, 107, 91, 122, /* 176 - 183 */
1416 92, 111, 93, 102, 94, 126, 95, 150, /* 184 - 191 */
1417 96, 128, 97, 130, 98, 133, 99, 195, /* 192 - 199 */
1418 100, 128, 101, 123, 102, 164, 103, 138, /* 200 - 207 */
1419 104, 145, 105, 146, 106, 109, 107, 149, /* 208 - 215 */
1420 108, 200, 109, 146, 110, 170, 111, 157, /* 216 - 223 */
1421 112, 128, 113, 130, 114, 182, 115, 132, /* 224 - 231 */
1422 116, 200, 117, 132, 118, 158, 119, 206, /* 232 - 239 */
1423 120, 240, 121, 162, 122, 147, 123, 152, /* 240 - 247 */
1424 124, 166, 125, 214, 126, 138, 127, 153, /* 248 - 255 */
1428 /* Return the number of multiplications required to calculate
1429 powi(x,n) where n is less than POWI_TABLE_SIZE. This is a
1430 subroutine of powi_cost. CACHE is an array indicating
1431 which exponents have already been calculated. */
1433 static int
1434 powi_lookup_cost (unsigned HOST_WIDE_INT n, bool *cache)
1436 /* If we've already calculated this exponent, then this evaluation
1437 doesn't require any additional multiplications. */
1438 if (cache[n])
1439 return 0;
1441 cache[n] = true;
1442 return powi_lookup_cost (n - powi_table[n], cache)
1443 + powi_lookup_cost (powi_table[n], cache) + 1;
1446 /* Return the number of multiplications required to calculate
1447 powi(x,n) for an arbitrary x, given the exponent N. This
1448 function needs to be kept in sync with powi_as_mults below. */
1450 static int
1451 powi_cost (HOST_WIDE_INT n)
1453 bool cache[POWI_TABLE_SIZE];
1454 unsigned HOST_WIDE_INT digit;
1455 unsigned HOST_WIDE_INT val;
1456 int result;
1458 if (n == 0)
1459 return 0;
1461 /* Ignore the reciprocal when calculating the cost. */
1462 val = (n < 0) ? -n : n;
1464 /* Initialize the exponent cache. */
1465 memset (cache, 0, POWI_TABLE_SIZE * sizeof (bool));
1466 cache[1] = true;
1468 result = 0;
1470 while (val >= POWI_TABLE_SIZE)
1472 if (val & 1)
1474 digit = val & ((1 << POWI_WINDOW_SIZE) - 1);
1475 result += powi_lookup_cost (digit, cache)
1476 + POWI_WINDOW_SIZE + 1;
1477 val >>= POWI_WINDOW_SIZE;
1479 else
1481 val >>= 1;
1482 result++;
1486 return result + powi_lookup_cost (val, cache);
1489 /* Recursive subroutine of powi_as_mults. This function takes the
1490 array, CACHE, of already calculated exponents and an exponent N and
1491 returns a tree that corresponds to CACHE[1]**N, with type TYPE. */
1493 static tree
1494 powi_as_mults_1 (gimple_stmt_iterator *gsi, location_t loc, tree type,
1495 HOST_WIDE_INT n, tree *cache)
1497 tree op0, op1, ssa_target;
1498 unsigned HOST_WIDE_INT digit;
1499 gassign *mult_stmt;
1501 if (n < POWI_TABLE_SIZE && cache[n])
1502 return cache[n];
1504 ssa_target = make_temp_ssa_name (type, NULL, "powmult");
1506 if (n < POWI_TABLE_SIZE)
1508 cache[n] = ssa_target;
1509 op0 = powi_as_mults_1 (gsi, loc, type, n - powi_table[n], cache);
1510 op1 = powi_as_mults_1 (gsi, loc, type, powi_table[n], cache);
1512 else if (n & 1)
1514 digit = n & ((1 << POWI_WINDOW_SIZE) - 1);
1515 op0 = powi_as_mults_1 (gsi, loc, type, n - digit, cache);
1516 op1 = powi_as_mults_1 (gsi, loc, type, digit, cache);
1518 else
1520 op0 = powi_as_mults_1 (gsi, loc, type, n >> 1, cache);
1521 op1 = op0;
1524 mult_stmt = gimple_build_assign (ssa_target, MULT_EXPR, op0, op1);
1525 gimple_set_location (mult_stmt, loc);
1526 gsi_insert_before (gsi, mult_stmt, GSI_SAME_STMT);
1528 return ssa_target;
1531 /* Convert ARG0**N to a tree of multiplications of ARG0 with itself.
1532 This function needs to be kept in sync with powi_cost above. */
1534 tree
1535 powi_as_mults (gimple_stmt_iterator *gsi, location_t loc,
1536 tree arg0, HOST_WIDE_INT n)
1538 tree cache[POWI_TABLE_SIZE], result, type = TREE_TYPE (arg0);
1539 gassign *div_stmt;
1540 tree target;
1542 if (n == 0)
1543 return build_one_cst (type);
1545 memset (cache, 0, sizeof (cache));
1546 cache[1] = arg0;
1548 result = powi_as_mults_1 (gsi, loc, type, (n < 0) ? -n : n, cache);
1549 if (n >= 0)
1550 return result;
1552 /* If the original exponent was negative, reciprocate the result. */
1553 target = make_temp_ssa_name (type, NULL, "powmult");
1554 div_stmt = gimple_build_assign (target, RDIV_EXPR,
1555 build_real (type, dconst1), result);
1556 gimple_set_location (div_stmt, loc);
1557 gsi_insert_before (gsi, div_stmt, GSI_SAME_STMT);
1559 return target;
1562 /* ARG0 and N are the two arguments to a powi builtin in GSI with
1563 location info LOC. If the arguments are appropriate, create an
1564 equivalent sequence of statements prior to GSI using an optimal
1565 number of multiplications, and return an expession holding the
1566 result. */
1568 static tree
1569 gimple_expand_builtin_powi (gimple_stmt_iterator *gsi, location_t loc,
1570 tree arg0, HOST_WIDE_INT n)
1572 /* Avoid largest negative number. */
1573 if (n != -n
1574 && ((n >= -1 && n <= 2)
1575 || (optimize_function_for_speed_p (cfun)
1576 && powi_cost (n) <= POWI_MAX_MULTS)))
1577 return powi_as_mults (gsi, loc, arg0, n);
1579 return NULL_TREE;
1582 /* Build a gimple call statement that calls FN with argument ARG.
1583 Set the lhs of the call statement to a fresh SSA name. Insert the
1584 statement prior to GSI's current position, and return the fresh
1585 SSA name. */
1587 static tree
1588 build_and_insert_call (gimple_stmt_iterator *gsi, location_t loc,
1589 tree fn, tree arg)
1591 gcall *call_stmt;
1592 tree ssa_target;
1594 call_stmt = gimple_build_call (fn, 1, arg);
1595 ssa_target = make_temp_ssa_name (TREE_TYPE (arg), NULL, "powroot");
1596 gimple_set_lhs (call_stmt, ssa_target);
1597 gimple_set_location (call_stmt, loc);
1598 gsi_insert_before (gsi, call_stmt, GSI_SAME_STMT);
1600 return ssa_target;
1603 /* Build a gimple binary operation with the given CODE and arguments
1604 ARG0, ARG1, assigning the result to a new SSA name for variable
1605 TARGET. Insert the statement prior to GSI's current position, and
1606 return the fresh SSA name.*/
1608 static tree
1609 build_and_insert_binop (gimple_stmt_iterator *gsi, location_t loc,
1610 const char *name, enum tree_code code,
1611 tree arg0, tree arg1)
1613 tree result = make_temp_ssa_name (TREE_TYPE (arg0), NULL, name);
1614 gassign *stmt = gimple_build_assign (result, code, arg0, arg1);
1615 gimple_set_location (stmt, loc);
1616 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1617 return result;
1620 /* Build a gimple reference operation with the given CODE and argument
1621 ARG, assigning the result to a new SSA name of TYPE with NAME.
1622 Insert the statement prior to GSI's current position, and return
1623 the fresh SSA name. */
1625 static inline tree
1626 build_and_insert_ref (gimple_stmt_iterator *gsi, location_t loc, tree type,
1627 const char *name, enum tree_code code, tree arg0)
1629 tree result = make_temp_ssa_name (type, NULL, name);
1630 gimple *stmt = gimple_build_assign (result, build1 (code, type, arg0));
1631 gimple_set_location (stmt, loc);
1632 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1633 return result;
1636 /* Build a gimple assignment to cast VAL to TYPE. Insert the statement
1637 prior to GSI's current position, and return the fresh SSA name. */
1639 static tree
1640 build_and_insert_cast (gimple_stmt_iterator *gsi, location_t loc,
1641 tree type, tree val)
1643 tree result = make_ssa_name (type);
1644 gassign *stmt = gimple_build_assign (result, NOP_EXPR, val);
1645 gimple_set_location (stmt, loc);
1646 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1647 return result;
1650 struct pow_synth_sqrt_info
1652 bool *factors;
1653 unsigned int deepest;
1654 unsigned int num_mults;
1657 /* Return true iff the real value C can be represented as a
1658 sum of powers of 0.5 up to N. That is:
1659 C == SUM<i from 1..N> (a[i]*(0.5**i)) where a[i] is either 0 or 1.
1660 Record in INFO the various parameters of the synthesis algorithm such
1661 as the factors a[i], the maximum 0.5 power and the number of
1662 multiplications that will be required. */
1664 bool
1665 representable_as_half_series_p (REAL_VALUE_TYPE c, unsigned n,
1666 struct pow_synth_sqrt_info *info)
1668 REAL_VALUE_TYPE factor = dconsthalf;
1669 REAL_VALUE_TYPE remainder = c;
1671 info->deepest = 0;
1672 info->num_mults = 0;
1673 memset (info->factors, 0, n * sizeof (bool));
1675 for (unsigned i = 0; i < n; i++)
1677 REAL_VALUE_TYPE res;
1679 /* If something inexact happened bail out now. */
1680 if (real_arithmetic (&res, MINUS_EXPR, &remainder, &factor))
1681 return false;
1683 /* We have hit zero. The number is representable as a sum
1684 of powers of 0.5. */
1685 if (real_equal (&res, &dconst0))
1687 info->factors[i] = true;
1688 info->deepest = i + 1;
1689 return true;
1691 else if (!REAL_VALUE_NEGATIVE (res))
1693 remainder = res;
1694 info->factors[i] = true;
1695 info->num_mults++;
1697 else
1698 info->factors[i] = false;
1700 real_arithmetic (&factor, MULT_EXPR, &factor, &dconsthalf);
1702 return false;
1705 /* Return the tree corresponding to FN being applied
1706 to ARG N times at GSI and LOC.
1707 Look up previous results from CACHE if need be.
1708 cache[0] should contain just plain ARG i.e. FN applied to ARG 0 times. */
1710 static tree
1711 get_fn_chain (tree arg, unsigned int n, gimple_stmt_iterator *gsi,
1712 tree fn, location_t loc, tree *cache)
1714 tree res = cache[n];
1715 if (!res)
1717 tree prev = get_fn_chain (arg, n - 1, gsi, fn, loc, cache);
1718 res = build_and_insert_call (gsi, loc, fn, prev);
1719 cache[n] = res;
1722 return res;
1725 /* Print to STREAM the repeated application of function FNAME to ARG
1726 N times. So, for FNAME = "foo", ARG = "x", N = 2 it would print:
1727 "foo (foo (x))". */
1729 static void
1730 print_nested_fn (FILE* stream, const char *fname, const char* arg,
1731 unsigned int n)
1733 if (n == 0)
1734 fprintf (stream, "%s", arg);
1735 else
1737 fprintf (stream, "%s (", fname);
1738 print_nested_fn (stream, fname, arg, n - 1);
1739 fprintf (stream, ")");
1743 /* Print to STREAM the fractional sequence of sqrt chains
1744 applied to ARG, described by INFO. Used for the dump file. */
1746 static void
1747 dump_fractional_sqrt_sequence (FILE *stream, const char *arg,
1748 struct pow_synth_sqrt_info *info)
1750 for (unsigned int i = 0; i < info->deepest; i++)
1752 bool is_set = info->factors[i];
1753 if (is_set)
1755 print_nested_fn (stream, "sqrt", arg, i + 1);
1756 if (i != info->deepest - 1)
1757 fprintf (stream, " * ");
1762 /* Print to STREAM a representation of raising ARG to an integer
1763 power N. Used for the dump file. */
1765 static void
1766 dump_integer_part (FILE *stream, const char* arg, HOST_WIDE_INT n)
1768 if (n > 1)
1769 fprintf (stream, "powi (%s, " HOST_WIDE_INT_PRINT_DEC ")", arg, n);
1770 else if (n == 1)
1771 fprintf (stream, "%s", arg);
1774 /* Attempt to synthesize a POW[F] (ARG0, ARG1) call using chains of
1775 square roots. Place at GSI and LOC. Limit the maximum depth
1776 of the sqrt chains to MAX_DEPTH. Return the tree holding the
1777 result of the expanded sequence or NULL_TREE if the expansion failed.
1779 This routine assumes that ARG1 is a real number with a fractional part
1780 (the integer exponent case will have been handled earlier in
1781 gimple_expand_builtin_pow).
1783 For ARG1 > 0.0:
1784 * For ARG1 composed of a whole part WHOLE_PART and a fractional part
1785 FRAC_PART i.e. WHOLE_PART == floor (ARG1) and
1786 FRAC_PART == ARG1 - WHOLE_PART:
1787 Produce POWI (ARG0, WHOLE_PART) * POW (ARG0, FRAC_PART) where
1788 POW (ARG0, FRAC_PART) is expanded as a product of square root chains
1789 if it can be expressed as such, that is if FRAC_PART satisfies:
1790 FRAC_PART == <SUM from i = 1 until MAX_DEPTH> (a[i] * (0.5**i))
1791 where integer a[i] is either 0 or 1.
1793 Example:
1794 POW (x, 3.625) == POWI (x, 3) * POW (x, 0.625)
1795 --> POWI (x, 3) * SQRT (x) * SQRT (SQRT (SQRT (x)))
1797 For ARG1 < 0.0 there are two approaches:
1798 * (A) Expand to 1.0 / POW (ARG0, -ARG1) where POW (ARG0, -ARG1)
1799 is calculated as above.
1801 Example:
1802 POW (x, -5.625) == 1.0 / POW (x, 5.625)
1803 --> 1.0 / (POWI (x, 5) * SQRT (x) * SQRT (SQRT (SQRT (x))))
1805 * (B) : WHOLE_PART := - ceil (abs (ARG1))
1806 FRAC_PART := ARG1 - WHOLE_PART
1807 and expand to POW (x, FRAC_PART) / POWI (x, WHOLE_PART).
1808 Example:
1809 POW (x, -5.875) == POW (x, 0.125) / POWI (X, 6)
1810 --> SQRT (SQRT (SQRT (x))) / (POWI (x, 6))
1812 For ARG1 < 0.0 we choose between (A) and (B) depending on
1813 how many multiplications we'd have to do.
1814 So, for the example in (B): POW (x, -5.875), if we were to
1815 follow algorithm (A) we would produce:
1816 1.0 / POWI (X, 5) * SQRT (X) * SQRT (SQRT (X)) * SQRT (SQRT (SQRT (X)))
1817 which contains more multiplications than approach (B).
1819 Hopefully, this approach will eliminate potentially expensive POW library
1820 calls when unsafe floating point math is enabled and allow the compiler to
1821 further optimise the multiplies, square roots and divides produced by this
1822 function. */
1824 static tree
1825 expand_pow_as_sqrts (gimple_stmt_iterator *gsi, location_t loc,
1826 tree arg0, tree arg1, HOST_WIDE_INT max_depth)
1828 tree type = TREE_TYPE (arg0);
1829 machine_mode mode = TYPE_MODE (type);
1830 tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
1831 bool one_over = true;
1833 if (!sqrtfn)
1834 return NULL_TREE;
1836 if (TREE_CODE (arg1) != REAL_CST)
1837 return NULL_TREE;
1839 REAL_VALUE_TYPE exp_init = TREE_REAL_CST (arg1);
1841 gcc_assert (max_depth > 0);
1842 tree *cache = XALLOCAVEC (tree, max_depth + 1);
1844 struct pow_synth_sqrt_info synth_info;
1845 synth_info.factors = XALLOCAVEC (bool, max_depth + 1);
1846 synth_info.deepest = 0;
1847 synth_info.num_mults = 0;
1849 bool neg_exp = REAL_VALUE_NEGATIVE (exp_init);
1850 REAL_VALUE_TYPE exp = real_value_abs (&exp_init);
1852 /* The whole and fractional parts of exp. */
1853 REAL_VALUE_TYPE whole_part;
1854 REAL_VALUE_TYPE frac_part;
1856 real_floor (&whole_part, mode, &exp);
1857 real_arithmetic (&frac_part, MINUS_EXPR, &exp, &whole_part);
1860 REAL_VALUE_TYPE ceil_whole = dconst0;
1861 REAL_VALUE_TYPE ceil_fract = dconst0;
1863 if (neg_exp)
1865 real_ceil (&ceil_whole, mode, &exp);
1866 real_arithmetic (&ceil_fract, MINUS_EXPR, &ceil_whole, &exp);
1869 if (!representable_as_half_series_p (frac_part, max_depth, &synth_info))
1870 return NULL_TREE;
1872 /* Check whether it's more profitable to not use 1.0 / ... */
1873 if (neg_exp)
1875 struct pow_synth_sqrt_info alt_synth_info;
1876 alt_synth_info.factors = XALLOCAVEC (bool, max_depth + 1);
1877 alt_synth_info.deepest = 0;
1878 alt_synth_info.num_mults = 0;
1880 if (representable_as_half_series_p (ceil_fract, max_depth,
1881 &alt_synth_info)
1882 && alt_synth_info.deepest <= synth_info.deepest
1883 && alt_synth_info.num_mults < synth_info.num_mults)
1885 whole_part = ceil_whole;
1886 frac_part = ceil_fract;
1887 synth_info.deepest = alt_synth_info.deepest;
1888 synth_info.num_mults = alt_synth_info.num_mults;
1889 memcpy (synth_info.factors, alt_synth_info.factors,
1890 (max_depth + 1) * sizeof (bool));
1891 one_over = false;
1895 HOST_WIDE_INT n = real_to_integer (&whole_part);
1896 REAL_VALUE_TYPE cint;
1897 real_from_integer (&cint, VOIDmode, n, SIGNED);
1899 if (!real_identical (&whole_part, &cint))
1900 return NULL_TREE;
1902 if (powi_cost (n) + synth_info.num_mults > POWI_MAX_MULTS)
1903 return NULL_TREE;
1905 memset (cache, 0, (max_depth + 1) * sizeof (tree));
1907 tree integer_res = n == 0 ? build_real (type, dconst1) : arg0;
1909 /* Calculate the integer part of the exponent. */
1910 if (n > 1)
1912 integer_res = gimple_expand_builtin_powi (gsi, loc, arg0, n);
1913 if (!integer_res)
1914 return NULL_TREE;
1917 if (dump_file)
1919 char string[64];
1921 real_to_decimal (string, &exp_init, sizeof (string), 0, 1);
1922 fprintf (dump_file, "synthesizing pow (x, %s) as:\n", string);
1924 if (neg_exp)
1926 if (one_over)
1928 fprintf (dump_file, "1.0 / (");
1929 dump_integer_part (dump_file, "x", n);
1930 if (n > 0)
1931 fprintf (dump_file, " * ");
1932 dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1933 fprintf (dump_file, ")");
1935 else
1937 dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1938 fprintf (dump_file, " / (");
1939 dump_integer_part (dump_file, "x", n);
1940 fprintf (dump_file, ")");
1943 else
1945 dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1946 if (n > 0)
1947 fprintf (dump_file, " * ");
1948 dump_integer_part (dump_file, "x", n);
1951 fprintf (dump_file, "\ndeepest sqrt chain: %d\n", synth_info.deepest);
1955 tree fract_res = NULL_TREE;
1956 cache[0] = arg0;
1958 /* Calculate the fractional part of the exponent. */
1959 for (unsigned i = 0; i < synth_info.deepest; i++)
1961 if (synth_info.factors[i])
1963 tree sqrt_chain = get_fn_chain (arg0, i + 1, gsi, sqrtfn, loc, cache);
1965 if (!fract_res)
1966 fract_res = sqrt_chain;
1968 else
1969 fract_res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1970 fract_res, sqrt_chain);
1974 tree res = NULL_TREE;
1976 if (neg_exp)
1978 if (one_over)
1980 if (n > 0)
1981 res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1982 fract_res, integer_res);
1983 else
1984 res = fract_res;
1986 res = build_and_insert_binop (gsi, loc, "powrootrecip", RDIV_EXPR,
1987 build_real (type, dconst1), res);
1989 else
1991 res = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR,
1992 fract_res, integer_res);
1995 else
1996 res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1997 fract_res, integer_res);
1998 return res;
2001 /* ARG0 and ARG1 are the two arguments to a pow builtin call in GSI
2002 with location info LOC. If possible, create an equivalent and
2003 less expensive sequence of statements prior to GSI, and return an
2004 expession holding the result. */
2006 static tree
2007 gimple_expand_builtin_pow (gimple_stmt_iterator *gsi, location_t loc,
2008 tree arg0, tree arg1)
2010 REAL_VALUE_TYPE c, cint, dconst1_3, dconst1_4, dconst1_6;
2011 REAL_VALUE_TYPE c2, dconst3;
2012 HOST_WIDE_INT n;
2013 tree type, sqrtfn, cbrtfn, sqrt_arg0, result, cbrt_x, powi_cbrt_x;
2014 machine_mode mode;
2015 bool speed_p = optimize_bb_for_speed_p (gsi_bb (*gsi));
2016 bool hw_sqrt_exists, c_is_int, c2_is_int;
2018 dconst1_4 = dconst1;
2019 SET_REAL_EXP (&dconst1_4, REAL_EXP (&dconst1_4) - 2);
2021 /* If the exponent isn't a constant, there's nothing of interest
2022 to be done. */
2023 if (TREE_CODE (arg1) != REAL_CST)
2024 return NULL_TREE;
2026 /* Don't perform the operation if flag_signaling_nans is on
2027 and the operand is a signaling NaN. */
2028 if (HONOR_SNANS (TYPE_MODE (TREE_TYPE (arg1)))
2029 && ((TREE_CODE (arg0) == REAL_CST
2030 && REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (arg0)))
2031 || REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (arg1))))
2032 return NULL_TREE;
2034 /* If the exponent is equivalent to an integer, expand to an optimal
2035 multiplication sequence when profitable. */
2036 c = TREE_REAL_CST (arg1);
2037 n = real_to_integer (&c);
2038 real_from_integer (&cint, VOIDmode, n, SIGNED);
2039 c_is_int = real_identical (&c, &cint);
2041 if (c_is_int
2042 && ((n >= -1 && n <= 2)
2043 || (flag_unsafe_math_optimizations
2044 && speed_p
2045 && powi_cost (n) <= POWI_MAX_MULTS)))
2046 return gimple_expand_builtin_powi (gsi, loc, arg0, n);
2048 /* Attempt various optimizations using sqrt and cbrt. */
2049 type = TREE_TYPE (arg0);
2050 mode = TYPE_MODE (type);
2051 sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
2053 /* Optimize pow(x,0.5) = sqrt(x). This replacement is always safe
2054 unless signed zeros must be maintained. pow(-0,0.5) = +0, while
2055 sqrt(-0) = -0. */
2056 if (sqrtfn
2057 && real_equal (&c, &dconsthalf)
2058 && !HONOR_SIGNED_ZEROS (mode))
2059 return build_and_insert_call (gsi, loc, sqrtfn, arg0);
2061 hw_sqrt_exists = optab_handler (sqrt_optab, mode) != CODE_FOR_nothing;
2063 /* Optimize pow(x,1./3.) = cbrt(x). This requires unsafe math
2064 optimizations since 1./3. is not exactly representable. If x
2065 is negative and finite, the correct value of pow(x,1./3.) is
2066 a NaN with the "invalid" exception raised, because the value
2067 of 1./3. actually has an even denominator. The correct value
2068 of cbrt(x) is a negative real value. */
2069 cbrtfn = mathfn_built_in (type, BUILT_IN_CBRT);
2070 dconst1_3 = real_value_truncate (mode, dconst_third ());
2072 if (flag_unsafe_math_optimizations
2073 && cbrtfn
2074 && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
2075 && real_equal (&c, &dconst1_3))
2076 return build_and_insert_call (gsi, loc, cbrtfn, arg0);
2078 /* Optimize pow(x,1./6.) = cbrt(sqrt(x)). Don't do this optimization
2079 if we don't have a hardware sqrt insn. */
2080 dconst1_6 = dconst1_3;
2081 SET_REAL_EXP (&dconst1_6, REAL_EXP (&dconst1_6) - 1);
2083 if (flag_unsafe_math_optimizations
2084 && sqrtfn
2085 && cbrtfn
2086 && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
2087 && speed_p
2088 && hw_sqrt_exists
2089 && real_equal (&c, &dconst1_6))
2091 /* sqrt(x) */
2092 sqrt_arg0 = build_and_insert_call (gsi, loc, sqrtfn, arg0);
2094 /* cbrt(sqrt(x)) */
2095 return build_and_insert_call (gsi, loc, cbrtfn, sqrt_arg0);
2099 /* Attempt to expand the POW as a product of square root chains.
2100 Expand the 0.25 case even when otpimising for size. */
2101 if (flag_unsafe_math_optimizations
2102 && sqrtfn
2103 && hw_sqrt_exists
2104 && (speed_p || real_equal (&c, &dconst1_4))
2105 && !HONOR_SIGNED_ZEROS (mode))
2107 unsigned int max_depth = speed_p
2108 ? param_max_pow_sqrt_depth
2109 : 2;
2111 tree expand_with_sqrts
2112 = expand_pow_as_sqrts (gsi, loc, arg0, arg1, max_depth);
2114 if (expand_with_sqrts)
2115 return expand_with_sqrts;
2118 real_arithmetic (&c2, MULT_EXPR, &c, &dconst2);
2119 n = real_to_integer (&c2);
2120 real_from_integer (&cint, VOIDmode, n, SIGNED);
2121 c2_is_int = real_identical (&c2, &cint);
2123 /* Optimize pow(x,c), where 3c = n for some nonzero integer n, into
2125 powi(x, n/3) * powi(cbrt(x), n%3), n > 0;
2126 1.0 / (powi(x, abs(n)/3) * powi(cbrt(x), abs(n)%3)), n < 0.
2128 Do not calculate the first factor when n/3 = 0. As cbrt(x) is
2129 different from pow(x, 1./3.) due to rounding and behavior with
2130 negative x, we need to constrain this transformation to unsafe
2131 math and positive x or finite math. */
2132 real_from_integer (&dconst3, VOIDmode, 3, SIGNED);
2133 real_arithmetic (&c2, MULT_EXPR, &c, &dconst3);
2134 real_round (&c2, mode, &c2);
2135 n = real_to_integer (&c2);
2136 real_from_integer (&cint, VOIDmode, n, SIGNED);
2137 real_arithmetic (&c2, RDIV_EXPR, &cint, &dconst3);
2138 real_convert (&c2, mode, &c2);
2140 if (flag_unsafe_math_optimizations
2141 && cbrtfn
2142 && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
2143 && real_identical (&c2, &c)
2144 && !c2_is_int
2145 && optimize_function_for_speed_p (cfun)
2146 && powi_cost (n / 3) <= POWI_MAX_MULTS)
2148 tree powi_x_ndiv3 = NULL_TREE;
2150 /* Attempt to fold powi(arg0, abs(n/3)) into multiplies. If not
2151 possible or profitable, give up. Skip the degenerate case when
2152 abs(n) < 3, where the result is always 1. */
2153 if (absu_hwi (n) >= 3)
2155 powi_x_ndiv3 = gimple_expand_builtin_powi (gsi, loc, arg0,
2156 abs_hwi (n / 3));
2157 if (!powi_x_ndiv3)
2158 return NULL_TREE;
2161 /* Calculate powi(cbrt(x), n%3). Don't use gimple_expand_builtin_powi
2162 as that creates an unnecessary variable. Instead, just produce
2163 either cbrt(x) or cbrt(x) * cbrt(x). */
2164 cbrt_x = build_and_insert_call (gsi, loc, cbrtfn, arg0);
2166 if (absu_hwi (n) % 3 == 1)
2167 powi_cbrt_x = cbrt_x;
2168 else
2169 powi_cbrt_x = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
2170 cbrt_x, cbrt_x);
2172 /* Multiply the two subexpressions, unless powi(x,abs(n)/3) = 1. */
2173 if (absu_hwi (n) < 3)
2174 result = powi_cbrt_x;
2175 else
2176 result = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
2177 powi_x_ndiv3, powi_cbrt_x);
2179 /* If n is negative, reciprocate the result. */
2180 if (n < 0)
2181 result = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR,
2182 build_real (type, dconst1), result);
2184 return result;
2187 /* No optimizations succeeded. */
2188 return NULL_TREE;
2191 /* ARG is the argument to a cabs builtin call in GSI with location info
2192 LOC. Create a sequence of statements prior to GSI that calculates
2193 sqrt(R*R + I*I), where R and I are the real and imaginary components
2194 of ARG, respectively. Return an expression holding the result. */
2196 static tree
2197 gimple_expand_builtin_cabs (gimple_stmt_iterator *gsi, location_t loc, tree arg)
2199 tree real_part, imag_part, addend1, addend2, sum, result;
2200 tree type = TREE_TYPE (TREE_TYPE (arg));
2201 tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
2202 machine_mode mode = TYPE_MODE (type);
2204 if (!flag_unsafe_math_optimizations
2205 || !optimize_bb_for_speed_p (gimple_bb (gsi_stmt (*gsi)))
2206 || !sqrtfn
2207 || optab_handler (sqrt_optab, mode) == CODE_FOR_nothing)
2208 return NULL_TREE;
2210 real_part = build_and_insert_ref (gsi, loc, type, "cabs",
2211 REALPART_EXPR, arg);
2212 addend1 = build_and_insert_binop (gsi, loc, "cabs", MULT_EXPR,
2213 real_part, real_part);
2214 imag_part = build_and_insert_ref (gsi, loc, type, "cabs",
2215 IMAGPART_EXPR, arg);
2216 addend2 = build_and_insert_binop (gsi, loc, "cabs", MULT_EXPR,
2217 imag_part, imag_part);
2218 sum = build_and_insert_binop (gsi, loc, "cabs", PLUS_EXPR, addend1, addend2);
2219 result = build_and_insert_call (gsi, loc, sqrtfn, sum);
2221 return result;
2224 /* Go through all calls to sin, cos and cexpi and call execute_cse_sincos_1
2225 on the SSA_NAME argument of each of them. Also expand powi(x,n) into
2226 an optimal number of multiplies, when n is a constant. */
2228 namespace {
2230 const pass_data pass_data_cse_sincos =
2232 GIMPLE_PASS, /* type */
2233 "sincos", /* name */
2234 OPTGROUP_NONE, /* optinfo_flags */
2235 TV_TREE_SINCOS, /* tv_id */
2236 PROP_ssa, /* properties_required */
2237 PROP_gimple_opt_math, /* properties_provided */
2238 0, /* properties_destroyed */
2239 0, /* todo_flags_start */
2240 TODO_update_ssa, /* todo_flags_finish */
2243 class pass_cse_sincos : public gimple_opt_pass
2245 public:
2246 pass_cse_sincos (gcc::context *ctxt)
2247 : gimple_opt_pass (pass_data_cse_sincos, ctxt)
2250 /* opt_pass methods: */
2251 virtual bool gate (function *)
2253 /* We no longer require either sincos or cexp, since powi expansion
2254 piggybacks on this pass. */
2255 return optimize;
2258 virtual unsigned int execute (function *);
2260 }; // class pass_cse_sincos
2262 unsigned int
2263 pass_cse_sincos::execute (function *fun)
2265 basic_block bb;
2266 bool cfg_changed = false;
2268 calculate_dominance_info (CDI_DOMINATORS);
2269 memset (&sincos_stats, 0, sizeof (sincos_stats));
2271 FOR_EACH_BB_FN (bb, fun)
2273 gimple_stmt_iterator gsi;
2274 bool cleanup_eh = false;
2276 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2278 gimple *stmt = gsi_stmt (gsi);
2280 /* Only the last stmt in a bb could throw, no need to call
2281 gimple_purge_dead_eh_edges if we change something in the middle
2282 of a basic block. */
2283 cleanup_eh = false;
2285 if (is_gimple_call (stmt)
2286 && gimple_call_lhs (stmt))
2288 tree arg, arg0, arg1, result;
2289 HOST_WIDE_INT n;
2290 location_t loc;
2292 switch (gimple_call_combined_fn (stmt))
2294 CASE_CFN_COS:
2295 CASE_CFN_SIN:
2296 CASE_CFN_CEXPI:
2297 arg = gimple_call_arg (stmt, 0);
2298 /* Make sure we have either sincos or cexp. */
2299 if (!targetm.libc_has_function (function_c99_math_complex,
2300 TREE_TYPE (arg))
2301 && !targetm.libc_has_function (function_sincos,
2302 TREE_TYPE (arg)))
2303 break;
2305 if (TREE_CODE (arg) == SSA_NAME)
2306 cfg_changed |= execute_cse_sincos_1 (arg);
2307 break;
2309 CASE_CFN_POW:
2310 arg0 = gimple_call_arg (stmt, 0);
2311 arg1 = gimple_call_arg (stmt, 1);
2313 loc = gimple_location (stmt);
2314 result = gimple_expand_builtin_pow (&gsi, loc, arg0, arg1);
2316 if (result)
2318 tree lhs = gimple_get_lhs (stmt);
2319 gassign *new_stmt = gimple_build_assign (lhs, result);
2320 gimple_set_location (new_stmt, loc);
2321 unlink_stmt_vdef (stmt);
2322 gsi_replace (&gsi, new_stmt, true);
2323 cleanup_eh = true;
2324 if (gimple_vdef (stmt))
2325 release_ssa_name (gimple_vdef (stmt));
2327 break;
2329 CASE_CFN_POWI:
2330 arg0 = gimple_call_arg (stmt, 0);
2331 arg1 = gimple_call_arg (stmt, 1);
2332 loc = gimple_location (stmt);
2334 if (real_minus_onep (arg0))
2336 tree t0, t1, cond, one, minus_one;
2337 gassign *stmt;
2339 t0 = TREE_TYPE (arg0);
2340 t1 = TREE_TYPE (arg1);
2341 one = build_real (t0, dconst1);
2342 minus_one = build_real (t0, dconstm1);
2344 cond = make_temp_ssa_name (t1, NULL, "powi_cond");
2345 stmt = gimple_build_assign (cond, BIT_AND_EXPR,
2346 arg1, build_int_cst (t1, 1));
2347 gimple_set_location (stmt, loc);
2348 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
2350 result = make_temp_ssa_name (t0, NULL, "powi");
2351 stmt = gimple_build_assign (result, COND_EXPR, cond,
2352 minus_one, one);
2353 gimple_set_location (stmt, loc);
2354 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
2356 else
2358 if (!tree_fits_shwi_p (arg1))
2359 break;
2361 n = tree_to_shwi (arg1);
2362 result = gimple_expand_builtin_powi (&gsi, loc, arg0, n);
2365 if (result)
2367 tree lhs = gimple_get_lhs (stmt);
2368 gassign *new_stmt = gimple_build_assign (lhs, result);
2369 gimple_set_location (new_stmt, loc);
2370 unlink_stmt_vdef (stmt);
2371 gsi_replace (&gsi, new_stmt, true);
2372 cleanup_eh = true;
2373 if (gimple_vdef (stmt))
2374 release_ssa_name (gimple_vdef (stmt));
2376 break;
2378 CASE_CFN_CABS:
2379 arg0 = gimple_call_arg (stmt, 0);
2380 loc = gimple_location (stmt);
2381 result = gimple_expand_builtin_cabs (&gsi, loc, arg0);
2383 if (result)
2385 tree lhs = gimple_get_lhs (stmt);
2386 gassign *new_stmt = gimple_build_assign (lhs, result);
2387 gimple_set_location (new_stmt, loc);
2388 unlink_stmt_vdef (stmt);
2389 gsi_replace (&gsi, new_stmt, true);
2390 cleanup_eh = true;
2391 if (gimple_vdef (stmt))
2392 release_ssa_name (gimple_vdef (stmt));
2394 break;
2396 default:;
2400 if (cleanup_eh)
2401 cfg_changed |= gimple_purge_dead_eh_edges (bb);
2404 statistics_counter_event (fun, "sincos statements inserted",
2405 sincos_stats.inserted);
2406 statistics_counter_event (fun, "conv statements removed",
2407 sincos_stats.conv_removed);
2409 return cfg_changed ? TODO_cleanup_cfg : 0;
2412 } // anon namespace
2414 gimple_opt_pass *
2415 make_pass_cse_sincos (gcc::context *ctxt)
2417 return new pass_cse_sincos (ctxt);
2420 /* Return true if stmt is a type conversion operation that can be stripped
2421 when used in a widening multiply operation. */
2422 static bool
2423 widening_mult_conversion_strippable_p (tree result_type, gimple *stmt)
2425 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
2427 if (TREE_CODE (result_type) == INTEGER_TYPE)
2429 tree op_type;
2430 tree inner_op_type;
2432 if (!CONVERT_EXPR_CODE_P (rhs_code))
2433 return false;
2435 op_type = TREE_TYPE (gimple_assign_lhs (stmt));
2437 /* If the type of OP has the same precision as the result, then
2438 we can strip this conversion. The multiply operation will be
2439 selected to create the correct extension as a by-product. */
2440 if (TYPE_PRECISION (result_type) == TYPE_PRECISION (op_type))
2441 return true;
2443 /* We can also strip a conversion if it preserves the signed-ness of
2444 the operation and doesn't narrow the range. */
2445 inner_op_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
2447 /* If the inner-most type is unsigned, then we can strip any
2448 intermediate widening operation. If it's signed, then the
2449 intermediate widening operation must also be signed. */
2450 if ((TYPE_UNSIGNED (inner_op_type)
2451 || TYPE_UNSIGNED (op_type) == TYPE_UNSIGNED (inner_op_type))
2452 && TYPE_PRECISION (op_type) > TYPE_PRECISION (inner_op_type))
2453 return true;
2455 return false;
2458 return rhs_code == FIXED_CONVERT_EXPR;
2461 /* Return true if RHS is a suitable operand for a widening multiplication,
2462 assuming a target type of TYPE.
2463 There are two cases:
2465 - RHS makes some value at least twice as wide. Store that value
2466 in *NEW_RHS_OUT if so, and store its type in *TYPE_OUT.
2468 - RHS is an integer constant. Store that value in *NEW_RHS_OUT if so,
2469 but leave *TYPE_OUT untouched. */
2471 static bool
2472 is_widening_mult_rhs_p (tree type, tree rhs, tree *type_out,
2473 tree *new_rhs_out)
2475 gimple *stmt;
2476 tree type1, rhs1;
2478 if (TREE_CODE (rhs) == SSA_NAME)
2480 stmt = SSA_NAME_DEF_STMT (rhs);
2481 if (is_gimple_assign (stmt))
2483 if (! widening_mult_conversion_strippable_p (type, stmt))
2484 rhs1 = rhs;
2485 else
2487 rhs1 = gimple_assign_rhs1 (stmt);
2489 if (TREE_CODE (rhs1) == INTEGER_CST)
2491 *new_rhs_out = rhs1;
2492 *type_out = NULL;
2493 return true;
2497 else
2498 rhs1 = rhs;
2500 type1 = TREE_TYPE (rhs1);
2502 if (TREE_CODE (type1) != TREE_CODE (type)
2503 || TYPE_PRECISION (type1) * 2 > TYPE_PRECISION (type))
2504 return false;
2506 *new_rhs_out = rhs1;
2507 *type_out = type1;
2508 return true;
2511 if (TREE_CODE (rhs) == INTEGER_CST)
2513 *new_rhs_out = rhs;
2514 *type_out = NULL;
2515 return true;
2518 return false;
2521 /* Return true if STMT performs a widening multiplication, assuming the
2522 output type is TYPE. If so, store the unwidened types of the operands
2523 in *TYPE1_OUT and *TYPE2_OUT respectively. Also fill *RHS1_OUT and
2524 *RHS2_OUT such that converting those operands to types *TYPE1_OUT
2525 and *TYPE2_OUT would give the operands of the multiplication. */
2527 static bool
2528 is_widening_mult_p (gimple *stmt,
2529 tree *type1_out, tree *rhs1_out,
2530 tree *type2_out, tree *rhs2_out)
2532 tree type = TREE_TYPE (gimple_assign_lhs (stmt));
2534 if (TREE_CODE (type) == INTEGER_TYPE)
2536 if (TYPE_OVERFLOW_TRAPS (type))
2537 return false;
2539 else if (TREE_CODE (type) != FIXED_POINT_TYPE)
2540 return false;
2542 if (!is_widening_mult_rhs_p (type, gimple_assign_rhs1 (stmt), type1_out,
2543 rhs1_out))
2544 return false;
2546 if (!is_widening_mult_rhs_p (type, gimple_assign_rhs2 (stmt), type2_out,
2547 rhs2_out))
2548 return false;
2550 if (*type1_out == NULL)
2552 if (*type2_out == NULL || !int_fits_type_p (*rhs1_out, *type2_out))
2553 return false;
2554 *type1_out = *type2_out;
2557 if (*type2_out == NULL)
2559 if (!int_fits_type_p (*rhs2_out, *type1_out))
2560 return false;
2561 *type2_out = *type1_out;
2564 /* Ensure that the larger of the two operands comes first. */
2565 if (TYPE_PRECISION (*type1_out) < TYPE_PRECISION (*type2_out))
2567 std::swap (*type1_out, *type2_out);
2568 std::swap (*rhs1_out, *rhs2_out);
2571 return true;
2574 /* Check to see if the CALL statement is an invocation of copysign
2575 with 1. being the first argument. */
2576 static bool
2577 is_copysign_call_with_1 (gimple *call)
2579 gcall *c = dyn_cast <gcall *> (call);
2580 if (! c)
2581 return false;
2583 enum combined_fn code = gimple_call_combined_fn (c);
2585 if (code == CFN_LAST)
2586 return false;
2588 if (builtin_fn_p (code))
2590 switch (as_builtin_fn (code))
2592 CASE_FLT_FN (BUILT_IN_COPYSIGN):
2593 CASE_FLT_FN_FLOATN_NX (BUILT_IN_COPYSIGN):
2594 return real_onep (gimple_call_arg (c, 0));
2595 default:
2596 return false;
2600 if (internal_fn_p (code))
2602 switch (as_internal_fn (code))
2604 case IFN_COPYSIGN:
2605 return real_onep (gimple_call_arg (c, 0));
2606 default:
2607 return false;
2611 return false;
2614 /* Try to expand the pattern x * copysign (1, y) into xorsign (x, y).
2615 This only happens when the xorsign optab is defined, if the
2616 pattern is not a xorsign pattern or if expansion fails FALSE is
2617 returned, otherwise TRUE is returned. */
2618 static bool
2619 convert_expand_mult_copysign (gimple *stmt, gimple_stmt_iterator *gsi)
2621 tree treeop0, treeop1, lhs, type;
2622 location_t loc = gimple_location (stmt);
2623 lhs = gimple_assign_lhs (stmt);
2624 treeop0 = gimple_assign_rhs1 (stmt);
2625 treeop1 = gimple_assign_rhs2 (stmt);
2626 type = TREE_TYPE (lhs);
2627 machine_mode mode = TYPE_MODE (type);
2629 if (HONOR_SNANS (type))
2630 return false;
2632 if (TREE_CODE (treeop0) == SSA_NAME && TREE_CODE (treeop1) == SSA_NAME)
2634 gimple *call0 = SSA_NAME_DEF_STMT (treeop0);
2635 if (!has_single_use (treeop0) || !is_copysign_call_with_1 (call0))
2637 call0 = SSA_NAME_DEF_STMT (treeop1);
2638 if (!has_single_use (treeop1) || !is_copysign_call_with_1 (call0))
2639 return false;
2641 treeop1 = treeop0;
2643 if (optab_handler (xorsign_optab, mode) == CODE_FOR_nothing)
2644 return false;
2646 gcall *c = as_a<gcall*> (call0);
2647 treeop0 = gimple_call_arg (c, 1);
2649 gcall *call_stmt
2650 = gimple_build_call_internal (IFN_XORSIGN, 2, treeop1, treeop0);
2651 gimple_set_lhs (call_stmt, lhs);
2652 gimple_set_location (call_stmt, loc);
2653 gsi_replace (gsi, call_stmt, true);
2654 return true;
2657 return false;
2660 /* Process a single gimple statement STMT, which has a MULT_EXPR as
2661 its rhs, and try to convert it into a WIDEN_MULT_EXPR. The return
2662 value is true iff we converted the statement. */
2664 static bool
2665 convert_mult_to_widen (gimple *stmt, gimple_stmt_iterator *gsi)
2667 tree lhs, rhs1, rhs2, type, type1, type2;
2668 enum insn_code handler;
2669 scalar_int_mode to_mode, from_mode, actual_mode;
2670 optab op;
2671 int actual_precision;
2672 location_t loc = gimple_location (stmt);
2673 bool from_unsigned1, from_unsigned2;
2675 lhs = gimple_assign_lhs (stmt);
2676 type = TREE_TYPE (lhs);
2677 if (TREE_CODE (type) != INTEGER_TYPE)
2678 return false;
2680 if (!is_widening_mult_p (stmt, &type1, &rhs1, &type2, &rhs2))
2681 return false;
2683 to_mode = SCALAR_INT_TYPE_MODE (type);
2684 from_mode = SCALAR_INT_TYPE_MODE (type1);
2685 if (to_mode == from_mode)
2686 return false;
2688 from_unsigned1 = TYPE_UNSIGNED (type1);
2689 from_unsigned2 = TYPE_UNSIGNED (type2);
2691 if (from_unsigned1 && from_unsigned2)
2692 op = umul_widen_optab;
2693 else if (!from_unsigned1 && !from_unsigned2)
2694 op = smul_widen_optab;
2695 else
2696 op = usmul_widen_optab;
2698 handler = find_widening_optab_handler_and_mode (op, to_mode, from_mode,
2699 &actual_mode);
2701 if (handler == CODE_FOR_nothing)
2703 if (op != smul_widen_optab)
2705 /* We can use a signed multiply with unsigned types as long as
2706 there is a wider mode to use, or it is the smaller of the two
2707 types that is unsigned. Note that type1 >= type2, always. */
2708 if ((TYPE_UNSIGNED (type1)
2709 && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode))
2710 || (TYPE_UNSIGNED (type2)
2711 && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode)))
2713 if (!GET_MODE_WIDER_MODE (from_mode).exists (&from_mode)
2714 || GET_MODE_SIZE (to_mode) <= GET_MODE_SIZE (from_mode))
2715 return false;
2718 op = smul_widen_optab;
2719 handler = find_widening_optab_handler_and_mode (op, to_mode,
2720 from_mode,
2721 &actual_mode);
2723 if (handler == CODE_FOR_nothing)
2724 return false;
2726 from_unsigned1 = from_unsigned2 = false;
2728 else
2730 /* Expand can synthesize smul_widen_optab if the target
2731 supports umul_widen_optab. */
2732 op = umul_widen_optab;
2733 handler = find_widening_optab_handler_and_mode (op, to_mode,
2734 from_mode,
2735 &actual_mode);
2736 if (handler == CODE_FOR_nothing)
2737 return false;
2741 /* Ensure that the inputs to the handler are in the correct precison
2742 for the opcode. This will be the full mode size. */
2743 actual_precision = GET_MODE_PRECISION (actual_mode);
2744 if (2 * actual_precision > TYPE_PRECISION (type))
2745 return false;
2746 if (actual_precision != TYPE_PRECISION (type1)
2747 || from_unsigned1 != TYPE_UNSIGNED (type1))
2748 rhs1 = build_and_insert_cast (gsi, loc,
2749 build_nonstandard_integer_type
2750 (actual_precision, from_unsigned1), rhs1);
2751 if (actual_precision != TYPE_PRECISION (type2)
2752 || from_unsigned2 != TYPE_UNSIGNED (type2))
2753 rhs2 = build_and_insert_cast (gsi, loc,
2754 build_nonstandard_integer_type
2755 (actual_precision, from_unsigned2), rhs2);
2757 /* Handle constants. */
2758 if (TREE_CODE (rhs1) == INTEGER_CST)
2759 rhs1 = fold_convert (type1, rhs1);
2760 if (TREE_CODE (rhs2) == INTEGER_CST)
2761 rhs2 = fold_convert (type2, rhs2);
2763 gimple_assign_set_rhs1 (stmt, rhs1);
2764 gimple_assign_set_rhs2 (stmt, rhs2);
2765 gimple_assign_set_rhs_code (stmt, WIDEN_MULT_EXPR);
2766 update_stmt (stmt);
2767 widen_mul_stats.widen_mults_inserted++;
2768 return true;
2771 /* Process a single gimple statement STMT, which is found at the
2772 iterator GSI and has a either a PLUS_EXPR or a MINUS_EXPR as its
2773 rhs (given by CODE), and try to convert it into a
2774 WIDEN_MULT_PLUS_EXPR or a WIDEN_MULT_MINUS_EXPR. The return value
2775 is true iff we converted the statement. */
2777 static bool
2778 convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple *stmt,
2779 enum tree_code code)
2781 gimple *rhs1_stmt = NULL, *rhs2_stmt = NULL;
2782 gimple *conv1_stmt = NULL, *conv2_stmt = NULL, *conv_stmt;
2783 tree type, type1, type2, optype;
2784 tree lhs, rhs1, rhs2, mult_rhs1, mult_rhs2, add_rhs;
2785 enum tree_code rhs1_code = ERROR_MARK, rhs2_code = ERROR_MARK;
2786 optab this_optab;
2787 enum tree_code wmult_code;
2788 enum insn_code handler;
2789 scalar_mode to_mode, from_mode, actual_mode;
2790 location_t loc = gimple_location (stmt);
2791 int actual_precision;
2792 bool from_unsigned1, from_unsigned2;
2794 lhs = gimple_assign_lhs (stmt);
2795 type = TREE_TYPE (lhs);
2796 if (TREE_CODE (type) != INTEGER_TYPE
2797 && TREE_CODE (type) != FIXED_POINT_TYPE)
2798 return false;
2800 if (code == MINUS_EXPR)
2801 wmult_code = WIDEN_MULT_MINUS_EXPR;
2802 else
2803 wmult_code = WIDEN_MULT_PLUS_EXPR;
2805 rhs1 = gimple_assign_rhs1 (stmt);
2806 rhs2 = gimple_assign_rhs2 (stmt);
2808 if (TREE_CODE (rhs1) == SSA_NAME)
2810 rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
2811 if (is_gimple_assign (rhs1_stmt))
2812 rhs1_code = gimple_assign_rhs_code (rhs1_stmt);
2815 if (TREE_CODE (rhs2) == SSA_NAME)
2817 rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
2818 if (is_gimple_assign (rhs2_stmt))
2819 rhs2_code = gimple_assign_rhs_code (rhs2_stmt);
2822 /* Allow for one conversion statement between the multiply
2823 and addition/subtraction statement. If there are more than
2824 one conversions then we assume they would invalidate this
2825 transformation. If that's not the case then they should have
2826 been folded before now. */
2827 if (CONVERT_EXPR_CODE_P (rhs1_code))
2829 conv1_stmt = rhs1_stmt;
2830 rhs1 = gimple_assign_rhs1 (rhs1_stmt);
2831 if (TREE_CODE (rhs1) == SSA_NAME)
2833 rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
2834 if (is_gimple_assign (rhs1_stmt))
2835 rhs1_code = gimple_assign_rhs_code (rhs1_stmt);
2837 else
2838 return false;
2840 if (CONVERT_EXPR_CODE_P (rhs2_code))
2842 conv2_stmt = rhs2_stmt;
2843 rhs2 = gimple_assign_rhs1 (rhs2_stmt);
2844 if (TREE_CODE (rhs2) == SSA_NAME)
2846 rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
2847 if (is_gimple_assign (rhs2_stmt))
2848 rhs2_code = gimple_assign_rhs_code (rhs2_stmt);
2850 else
2851 return false;
2854 /* If code is WIDEN_MULT_EXPR then it would seem unnecessary to call
2855 is_widening_mult_p, but we still need the rhs returns.
2857 It might also appear that it would be sufficient to use the existing
2858 operands of the widening multiply, but that would limit the choice of
2859 multiply-and-accumulate instructions.
2861 If the widened-multiplication result has more than one uses, it is
2862 probably wiser not to do the conversion. Also restrict this operation
2863 to single basic block to avoid moving the multiply to a different block
2864 with a higher execution frequency. */
2865 if (code == PLUS_EXPR
2866 && (rhs1_code == MULT_EXPR || rhs1_code == WIDEN_MULT_EXPR))
2868 if (!has_single_use (rhs1)
2869 || gimple_bb (rhs1_stmt) != gimple_bb (stmt)
2870 || !is_widening_mult_p (rhs1_stmt, &type1, &mult_rhs1,
2871 &type2, &mult_rhs2))
2872 return false;
2873 add_rhs = rhs2;
2874 conv_stmt = conv1_stmt;
2876 else if (rhs2_code == MULT_EXPR || rhs2_code == WIDEN_MULT_EXPR)
2878 if (!has_single_use (rhs2)
2879 || gimple_bb (rhs2_stmt) != gimple_bb (stmt)
2880 || !is_widening_mult_p (rhs2_stmt, &type1, &mult_rhs1,
2881 &type2, &mult_rhs2))
2882 return false;
2883 add_rhs = rhs1;
2884 conv_stmt = conv2_stmt;
2886 else
2887 return false;
2889 to_mode = SCALAR_TYPE_MODE (type);
2890 from_mode = SCALAR_TYPE_MODE (type1);
2891 if (to_mode == from_mode)
2892 return false;
2894 from_unsigned1 = TYPE_UNSIGNED (type1);
2895 from_unsigned2 = TYPE_UNSIGNED (type2);
2896 optype = type1;
2898 /* There's no such thing as a mixed sign madd yet, so use a wider mode. */
2899 if (from_unsigned1 != from_unsigned2)
2901 if (!INTEGRAL_TYPE_P (type))
2902 return false;
2903 /* We can use a signed multiply with unsigned types as long as
2904 there is a wider mode to use, or it is the smaller of the two
2905 types that is unsigned. Note that type1 >= type2, always. */
2906 if ((from_unsigned1
2907 && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode))
2908 || (from_unsigned2
2909 && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode)))
2911 if (!GET_MODE_WIDER_MODE (from_mode).exists (&from_mode)
2912 || GET_MODE_SIZE (from_mode) >= GET_MODE_SIZE (to_mode))
2913 return false;
2916 from_unsigned1 = from_unsigned2 = false;
2917 optype = build_nonstandard_integer_type (GET_MODE_PRECISION (from_mode),
2918 false);
2921 /* If there was a conversion between the multiply and addition
2922 then we need to make sure it fits a multiply-and-accumulate.
2923 The should be a single mode change which does not change the
2924 value. */
2925 if (conv_stmt)
2927 /* We use the original, unmodified data types for this. */
2928 tree from_type = TREE_TYPE (gimple_assign_rhs1 (conv_stmt));
2929 tree to_type = TREE_TYPE (gimple_assign_lhs (conv_stmt));
2930 int data_size = TYPE_PRECISION (type1) + TYPE_PRECISION (type2);
2931 bool is_unsigned = TYPE_UNSIGNED (type1) && TYPE_UNSIGNED (type2);
2933 if (TYPE_PRECISION (from_type) > TYPE_PRECISION (to_type))
2935 /* Conversion is a truncate. */
2936 if (TYPE_PRECISION (to_type) < data_size)
2937 return false;
2939 else if (TYPE_PRECISION (from_type) < TYPE_PRECISION (to_type))
2941 /* Conversion is an extend. Check it's the right sort. */
2942 if (TYPE_UNSIGNED (from_type) != is_unsigned
2943 && !(is_unsigned && TYPE_PRECISION (from_type) > data_size))
2944 return false;
2946 /* else convert is a no-op for our purposes. */
2949 /* Verify that the machine can perform a widening multiply
2950 accumulate in this mode/signedness combination, otherwise
2951 this transformation is likely to pessimize code. */
2952 this_optab = optab_for_tree_code (wmult_code, optype, optab_default);
2953 handler = find_widening_optab_handler_and_mode (this_optab, to_mode,
2954 from_mode, &actual_mode);
2956 if (handler == CODE_FOR_nothing)
2957 return false;
2959 /* Ensure that the inputs to the handler are in the correct precison
2960 for the opcode. This will be the full mode size. */
2961 actual_precision = GET_MODE_PRECISION (actual_mode);
2962 if (actual_precision != TYPE_PRECISION (type1)
2963 || from_unsigned1 != TYPE_UNSIGNED (type1))
2964 mult_rhs1 = build_and_insert_cast (gsi, loc,
2965 build_nonstandard_integer_type
2966 (actual_precision, from_unsigned1),
2967 mult_rhs1);
2968 if (actual_precision != TYPE_PRECISION (type2)
2969 || from_unsigned2 != TYPE_UNSIGNED (type2))
2970 mult_rhs2 = build_and_insert_cast (gsi, loc,
2971 build_nonstandard_integer_type
2972 (actual_precision, from_unsigned2),
2973 mult_rhs2);
2975 if (!useless_type_conversion_p (type, TREE_TYPE (add_rhs)))
2976 add_rhs = build_and_insert_cast (gsi, loc, type, add_rhs);
2978 /* Handle constants. */
2979 if (TREE_CODE (mult_rhs1) == INTEGER_CST)
2980 mult_rhs1 = fold_convert (type1, mult_rhs1);
2981 if (TREE_CODE (mult_rhs2) == INTEGER_CST)
2982 mult_rhs2 = fold_convert (type2, mult_rhs2);
2984 gimple_assign_set_rhs_with_ops (gsi, wmult_code, mult_rhs1, mult_rhs2,
2985 add_rhs);
2986 update_stmt (gsi_stmt (*gsi));
2987 widen_mul_stats.maccs_inserted++;
2988 return true;
2991 /* Given a result MUL_RESULT which is a result of a multiplication of OP1 and
2992 OP2 and which we know is used in statements that can be, together with the
2993 multiplication, converted to FMAs, perform the transformation. */
2995 static void
2996 convert_mult_to_fma_1 (tree mul_result, tree op1, tree op2)
2998 tree type = TREE_TYPE (mul_result);
2999 gimple *use_stmt;
3000 imm_use_iterator imm_iter;
3001 gcall *fma_stmt;
3003 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, mul_result)
3005 gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
3006 tree addop, mulop1 = op1, result = mul_result;
3007 bool negate_p = false;
3008 gimple_seq seq = NULL;
3010 if (is_gimple_debug (use_stmt))
3011 continue;
3013 if (is_gimple_assign (use_stmt)
3014 && gimple_assign_rhs_code (use_stmt) == NEGATE_EXPR)
3016 result = gimple_assign_lhs (use_stmt);
3017 use_operand_p use_p;
3018 gimple *neguse_stmt;
3019 single_imm_use (gimple_assign_lhs (use_stmt), &use_p, &neguse_stmt);
3020 gsi_remove (&gsi, true);
3021 release_defs (use_stmt);
3023 use_stmt = neguse_stmt;
3024 gsi = gsi_for_stmt (use_stmt);
3025 negate_p = true;
3028 tree cond, else_value, ops[3];
3029 tree_code code;
3030 if (!can_interpret_as_conditional_op_p (use_stmt, &cond, &code,
3031 ops, &else_value))
3032 gcc_unreachable ();
3033 addop = ops[0] == result ? ops[1] : ops[0];
3035 if (code == MINUS_EXPR)
3037 if (ops[0] == result)
3038 /* a * b - c -> a * b + (-c) */
3039 addop = gimple_build (&seq, NEGATE_EXPR, type, addop);
3040 else
3041 /* a - b * c -> (-b) * c + a */
3042 negate_p = !negate_p;
3045 if (negate_p)
3046 mulop1 = gimple_build (&seq, NEGATE_EXPR, type, mulop1);
3048 if (seq)
3049 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
3051 if (cond)
3052 fma_stmt = gimple_build_call_internal (IFN_COND_FMA, 5, cond, mulop1,
3053 op2, addop, else_value);
3054 else
3055 fma_stmt = gimple_build_call_internal (IFN_FMA, 3, mulop1, op2, addop);
3056 gimple_set_lhs (fma_stmt, gimple_get_lhs (use_stmt));
3057 gimple_call_set_nothrow (fma_stmt, !stmt_can_throw_internal (cfun,
3058 use_stmt));
3059 gsi_replace (&gsi, fma_stmt, true);
3060 /* Follow all SSA edges so that we generate FMS, FNMA and FNMS
3061 regardless of where the negation occurs. */
3062 gimple *orig_stmt = gsi_stmt (gsi);
3063 if (fold_stmt (&gsi, follow_all_ssa_edges))
3065 if (maybe_clean_or_replace_eh_stmt (orig_stmt, gsi_stmt (gsi)))
3066 gcc_unreachable ();
3067 update_stmt (gsi_stmt (gsi));
3070 if (dump_file && (dump_flags & TDF_DETAILS))
3072 fprintf (dump_file, "Generated FMA ");
3073 print_gimple_stmt (dump_file, gsi_stmt (gsi), 0, TDF_NONE);
3074 fprintf (dump_file, "\n");
3077 /* If the FMA result is negated in a single use, fold the negation
3078 too. */
3079 orig_stmt = gsi_stmt (gsi);
3080 use_operand_p use_p;
3081 gimple *neg_stmt;
3082 if (is_gimple_call (orig_stmt)
3083 && gimple_call_internal_p (orig_stmt)
3084 && gimple_call_lhs (orig_stmt)
3085 && TREE_CODE (gimple_call_lhs (orig_stmt)) == SSA_NAME
3086 && single_imm_use (gimple_call_lhs (orig_stmt), &use_p, &neg_stmt)
3087 && is_gimple_assign (neg_stmt)
3088 && gimple_assign_rhs_code (neg_stmt) == NEGATE_EXPR
3089 && !stmt_could_throw_p (cfun, neg_stmt))
3091 gsi = gsi_for_stmt (neg_stmt);
3092 if (fold_stmt (&gsi, follow_all_ssa_edges))
3094 if (maybe_clean_or_replace_eh_stmt (neg_stmt, gsi_stmt (gsi)))
3095 gcc_unreachable ();
3096 update_stmt (gsi_stmt (gsi));
3097 if (dump_file && (dump_flags & TDF_DETAILS))
3099 fprintf (dump_file, "Folded FMA negation ");
3100 print_gimple_stmt (dump_file, gsi_stmt (gsi), 0, TDF_NONE);
3101 fprintf (dump_file, "\n");
3106 widen_mul_stats.fmas_inserted++;
3110 /* Data necessary to perform the actual transformation from a multiplication
3111 and an addition to an FMA after decision is taken it should be done and to
3112 then delete the multiplication statement from the function IL. */
3114 struct fma_transformation_info
3116 gimple *mul_stmt;
3117 tree mul_result;
3118 tree op1;
3119 tree op2;
3122 /* Structure containing the current state of FMA deferring, i.e. whether we are
3123 deferring, whether to continue deferring, and all data necessary to come
3124 back and perform all deferred transformations. */
3126 class fma_deferring_state
3128 public:
3129 /* Class constructor. Pass true as PERFORM_DEFERRING in order to actually
3130 do any deferring. */
3132 fma_deferring_state (bool perform_deferring)
3133 : m_candidates (), m_mul_result_set (), m_initial_phi (NULL),
3134 m_last_result (NULL_TREE), m_deferring_p (perform_deferring) {}
3136 /* List of FMA candidates for which we the transformation has been determined
3137 possible but we at this point in BB analysis we do not consider them
3138 beneficial. */
3139 auto_vec<fma_transformation_info, 8> m_candidates;
3141 /* Set of results of multiplication that are part of an already deferred FMA
3142 candidates. */
3143 hash_set<tree> m_mul_result_set;
3145 /* The PHI that supposedly feeds back result of a FMA to another over loop
3146 boundary. */
3147 gphi *m_initial_phi;
3149 /* Result of the last produced FMA candidate or NULL if there has not been
3150 one. */
3151 tree m_last_result;
3153 /* If true, deferring might still be profitable. If false, transform all
3154 candidates and no longer defer. */
3155 bool m_deferring_p;
3158 /* Transform all deferred FMA candidates and mark STATE as no longer
3159 deferring. */
3161 static void
3162 cancel_fma_deferring (fma_deferring_state *state)
3164 if (!state->m_deferring_p)
3165 return;
3167 for (unsigned i = 0; i < state->m_candidates.length (); i++)
3169 if (dump_file && (dump_flags & TDF_DETAILS))
3170 fprintf (dump_file, "Generating deferred FMA\n");
3172 const fma_transformation_info &fti = state->m_candidates[i];
3173 convert_mult_to_fma_1 (fti.mul_result, fti.op1, fti.op2);
3175 gimple_stmt_iterator gsi = gsi_for_stmt (fti.mul_stmt);
3176 gsi_remove (&gsi, true);
3177 release_defs (fti.mul_stmt);
3179 state->m_deferring_p = false;
3182 /* If OP is an SSA name defined by a PHI node, return the PHI statement.
3183 Otherwise return NULL. */
3185 static gphi *
3186 result_of_phi (tree op)
3188 if (TREE_CODE (op) != SSA_NAME)
3189 return NULL;
3191 return dyn_cast <gphi *> (SSA_NAME_DEF_STMT (op));
3194 /* After processing statements of a BB and recording STATE, return true if the
3195 initial phi is fed by the last FMA candidate result ore one such result from
3196 previously processed BBs marked in LAST_RESULT_SET. */
3198 static bool
3199 last_fma_candidate_feeds_initial_phi (fma_deferring_state *state,
3200 hash_set<tree> *last_result_set)
3202 ssa_op_iter iter;
3203 use_operand_p use;
3204 FOR_EACH_PHI_ARG (use, state->m_initial_phi, iter, SSA_OP_USE)
3206 tree t = USE_FROM_PTR (use);
3207 if (t == state->m_last_result
3208 || last_result_set->contains (t))
3209 return true;
3212 return false;
3215 /* Combine the multiplication at MUL_STMT with operands MULOP1 and MULOP2
3216 with uses in additions and subtractions to form fused multiply-add
3217 operations. Returns true if successful and MUL_STMT should be removed.
3218 If MUL_COND is nonnull, the multiplication in MUL_STMT is conditional
3219 on MUL_COND, otherwise it is unconditional.
3221 If STATE indicates that we are deferring FMA transformation, that means
3222 that we do not produce FMAs for basic blocks which look like:
3224 <bb 6>
3225 # accumulator_111 = PHI <0.0(5), accumulator_66(6)>
3226 _65 = _14 * _16;
3227 accumulator_66 = _65 + accumulator_111;
3229 or its unrolled version, i.e. with several FMA candidates that feed result
3230 of one into the addend of another. Instead, we add them to a list in STATE
3231 and if we later discover an FMA candidate that is not part of such a chain,
3232 we go back and perform all deferred past candidates. */
3234 static bool
3235 convert_mult_to_fma (gimple *mul_stmt, tree op1, tree op2,
3236 fma_deferring_state *state, tree mul_cond = NULL_TREE)
3238 tree mul_result = gimple_get_lhs (mul_stmt);
3239 /* If there isn't a LHS then this can't be an FMA. There can be no LHS
3240 if the statement was left just for the side-effects. */
3241 if (!mul_result)
3242 return false;
3243 tree type = TREE_TYPE (mul_result);
3244 gimple *use_stmt, *neguse_stmt;
3245 use_operand_p use_p;
3246 imm_use_iterator imm_iter;
3248 if (FLOAT_TYPE_P (type)
3249 && flag_fp_contract_mode == FP_CONTRACT_OFF)
3250 return false;
3252 /* We don't want to do bitfield reduction ops. */
3253 if (INTEGRAL_TYPE_P (type)
3254 && (!type_has_mode_precision_p (type) || TYPE_OVERFLOW_TRAPS (type)))
3255 return false;
3257 /* If the target doesn't support it, don't generate it. We assume that
3258 if fma isn't available then fms, fnma or fnms are not either. */
3259 optimization_type opt_type = bb_optimization_type (gimple_bb (mul_stmt));
3260 if (!direct_internal_fn_supported_p (IFN_FMA, type, opt_type))
3261 return false;
3263 /* If the multiplication has zero uses, it is kept around probably because
3264 of -fnon-call-exceptions. Don't optimize it away in that case,
3265 it is DCE job. */
3266 if (has_zero_uses (mul_result))
3267 return false;
3269 bool check_defer
3270 = (state->m_deferring_p
3271 && maybe_le (tree_to_poly_int64 (TYPE_SIZE (type)),
3272 param_avoid_fma_max_bits));
3273 bool defer = check_defer;
3274 bool seen_negate_p = false;
3275 /* Make sure that the multiplication statement becomes dead after
3276 the transformation, thus that all uses are transformed to FMAs.
3277 This means we assume that an FMA operation has the same cost
3278 as an addition. */
3279 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, mul_result)
3281 tree result = mul_result;
3282 bool negate_p = false;
3284 use_stmt = USE_STMT (use_p);
3286 if (is_gimple_debug (use_stmt))
3287 continue;
3289 /* For now restrict this operations to single basic blocks. In theory
3290 we would want to support sinking the multiplication in
3291 m = a*b;
3292 if ()
3293 ma = m + c;
3294 else
3295 d = m;
3296 to form a fma in the then block and sink the multiplication to the
3297 else block. */
3298 if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
3299 return false;
3301 /* A negate on the multiplication leads to FNMA. */
3302 if (is_gimple_assign (use_stmt)
3303 && gimple_assign_rhs_code (use_stmt) == NEGATE_EXPR)
3305 ssa_op_iter iter;
3306 use_operand_p usep;
3308 /* If (due to earlier missed optimizations) we have two
3309 negates of the same value, treat them as equivalent
3310 to a single negate with multiple uses. */
3311 if (seen_negate_p)
3312 return false;
3314 result = gimple_assign_lhs (use_stmt);
3316 /* Make sure the negate statement becomes dead with this
3317 single transformation. */
3318 if (!single_imm_use (gimple_assign_lhs (use_stmt),
3319 &use_p, &neguse_stmt))
3320 return false;
3322 /* Make sure the multiplication isn't also used on that stmt. */
3323 FOR_EACH_PHI_OR_STMT_USE (usep, neguse_stmt, iter, SSA_OP_USE)
3324 if (USE_FROM_PTR (usep) == mul_result)
3325 return false;
3327 /* Re-validate. */
3328 use_stmt = neguse_stmt;
3329 if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
3330 return false;
3332 negate_p = seen_negate_p = true;
3335 tree cond, else_value, ops[3];
3336 tree_code code;
3337 if (!can_interpret_as_conditional_op_p (use_stmt, &cond, &code, ops,
3338 &else_value))
3339 return false;
3341 switch (code)
3343 case MINUS_EXPR:
3344 if (ops[1] == result)
3345 negate_p = !negate_p;
3346 break;
3347 case PLUS_EXPR:
3348 break;
3349 default:
3350 /* FMA can only be formed from PLUS and MINUS. */
3351 return false;
3354 if (mul_cond && cond != mul_cond)
3355 return false;
3357 if (cond)
3359 if (cond == result || else_value == result)
3360 return false;
3361 if (!direct_internal_fn_supported_p (IFN_COND_FMA, type, opt_type))
3362 return false;
3365 /* If the subtrahend (OPS[1]) is computed by a MULT_EXPR that
3366 we'll visit later, we might be able to get a more profitable
3367 match with fnma.
3368 OTOH, if we don't, a negate / fma pair has likely lower latency
3369 that a mult / subtract pair. */
3370 if (code == MINUS_EXPR
3371 && !negate_p
3372 && ops[0] == result
3373 && !direct_internal_fn_supported_p (IFN_FMS, type, opt_type)
3374 && direct_internal_fn_supported_p (IFN_FNMA, type, opt_type)
3375 && TREE_CODE (ops[1]) == SSA_NAME
3376 && has_single_use (ops[1]))
3378 gimple *stmt2 = SSA_NAME_DEF_STMT (ops[1]);
3379 if (is_gimple_assign (stmt2)
3380 && gimple_assign_rhs_code (stmt2) == MULT_EXPR)
3381 return false;
3384 /* We can't handle a * b + a * b. */
3385 if (ops[0] == ops[1])
3386 return false;
3387 /* If deferring, make sure we are not looking at an instruction that
3388 wouldn't have existed if we were not. */
3389 if (state->m_deferring_p
3390 && (state->m_mul_result_set.contains (ops[0])
3391 || state->m_mul_result_set.contains (ops[1])))
3392 return false;
3394 if (check_defer)
3396 tree use_lhs = gimple_get_lhs (use_stmt);
3397 if (state->m_last_result)
3399 if (ops[1] == state->m_last_result
3400 || ops[0] == state->m_last_result)
3401 defer = true;
3402 else
3403 defer = false;
3405 else
3407 gcc_checking_assert (!state->m_initial_phi);
3408 gphi *phi;
3409 if (ops[0] == result)
3410 phi = result_of_phi (ops[1]);
3411 else
3413 gcc_assert (ops[1] == result);
3414 phi = result_of_phi (ops[0]);
3417 if (phi)
3419 state->m_initial_phi = phi;
3420 defer = true;
3422 else
3423 defer = false;
3426 state->m_last_result = use_lhs;
3427 check_defer = false;
3429 else
3430 defer = false;
3432 /* While it is possible to validate whether or not the exact form that
3433 we've recognized is available in the backend, the assumption is that
3434 if the deferring logic above did not trigger, the transformation is
3435 never a loss. For instance, suppose the target only has the plain FMA
3436 pattern available. Consider a*b-c -> fma(a,b,-c): we've exchanged
3437 MUL+SUB for FMA+NEG, which is still two operations. Consider
3438 -(a*b)-c -> fma(-a,b,-c): we still have 3 operations, but in the FMA
3439 form the two NEGs are independent and could be run in parallel. */
3442 if (defer)
3444 fma_transformation_info fti;
3445 fti.mul_stmt = mul_stmt;
3446 fti.mul_result = mul_result;
3447 fti.op1 = op1;
3448 fti.op2 = op2;
3449 state->m_candidates.safe_push (fti);
3450 state->m_mul_result_set.add (mul_result);
3452 if (dump_file && (dump_flags & TDF_DETAILS))
3454 fprintf (dump_file, "Deferred generating FMA for multiplication ");
3455 print_gimple_stmt (dump_file, mul_stmt, 0, TDF_NONE);
3456 fprintf (dump_file, "\n");
3459 return false;
3461 else
3463 if (state->m_deferring_p)
3464 cancel_fma_deferring (state);
3465 convert_mult_to_fma_1 (mul_result, op1, op2);
3466 return true;
3471 /* Helper function of match_arith_overflow. For MUL_OVERFLOW, if we have
3472 a check for non-zero like:
3473 _1 = x_4(D) * y_5(D);
3474 *res_7(D) = _1;
3475 if (x_4(D) != 0)
3476 goto <bb 3>; [50.00%]
3477 else
3478 goto <bb 4>; [50.00%]
3480 <bb 3> [local count: 536870913]:
3481 _2 = _1 / x_4(D);
3482 _9 = _2 != y_5(D);
3483 _10 = (int) _9;
3485 <bb 4> [local count: 1073741824]:
3486 # iftmp.0_3 = PHI <_10(3), 0(2)>
3487 then in addition to using .MUL_OVERFLOW (x_4(D), y_5(D)) we can also
3488 optimize the x_4(D) != 0 condition to 1. */
3490 static void
3491 maybe_optimize_guarding_check (vec<gimple *> &mul_stmts, gimple *cond_stmt,
3492 gimple *div_stmt, bool *cfg_changed)
3494 basic_block bb = gimple_bb (cond_stmt);
3495 if (gimple_bb (div_stmt) != bb || !single_pred_p (bb))
3496 return;
3497 edge pred_edge = single_pred_edge (bb);
3498 basic_block pred_bb = pred_edge->src;
3499 if (EDGE_COUNT (pred_bb->succs) != 2)
3500 return;
3501 edge other_edge = EDGE_SUCC (pred_bb, EDGE_SUCC (pred_bb, 0) == pred_edge);
3502 edge other_succ_edge = NULL;
3503 if (gimple_code (cond_stmt) == GIMPLE_COND)
3505 if (EDGE_COUNT (bb->succs) != 2)
3506 return;
3507 other_succ_edge = EDGE_SUCC (bb, 0);
3508 if (gimple_cond_code (cond_stmt) == NE_EXPR)
3510 if (other_succ_edge->flags & EDGE_TRUE_VALUE)
3511 other_succ_edge = EDGE_SUCC (bb, 1);
3513 else if (other_succ_edge->flags & EDGE_FALSE_VALUE)
3514 other_succ_edge = EDGE_SUCC (bb, 0);
3515 if (other_edge->dest != other_succ_edge->dest)
3516 return;
3518 else if (!single_succ_p (bb) || other_edge->dest != single_succ (bb))
3519 return;
3520 gimple *zero_cond = last_stmt (pred_bb);
3521 if (zero_cond == NULL
3522 || gimple_code (zero_cond) != GIMPLE_COND
3523 || (gimple_cond_code (zero_cond)
3524 != ((pred_edge->flags & EDGE_TRUE_VALUE) ? NE_EXPR : EQ_EXPR))
3525 || !integer_zerop (gimple_cond_rhs (zero_cond)))
3526 return;
3527 tree zero_cond_lhs = gimple_cond_lhs (zero_cond);
3528 if (TREE_CODE (zero_cond_lhs) != SSA_NAME)
3529 return;
3530 if (gimple_assign_rhs2 (div_stmt) != zero_cond_lhs)
3532 /* Allow the divisor to be result of a same precision cast
3533 from zero_cond_lhs. */
3534 tree rhs2 = gimple_assign_rhs2 (div_stmt);
3535 if (TREE_CODE (rhs2) != SSA_NAME)
3536 return;
3537 gimple *g = SSA_NAME_DEF_STMT (rhs2);
3538 if (!gimple_assign_cast_p (g)
3539 || gimple_assign_rhs1 (g) != gimple_cond_lhs (zero_cond)
3540 || !INTEGRAL_TYPE_P (TREE_TYPE (zero_cond_lhs))
3541 || (TYPE_PRECISION (TREE_TYPE (zero_cond_lhs))
3542 != TYPE_PRECISION (TREE_TYPE (rhs2))))
3543 return;
3545 gimple_stmt_iterator gsi = gsi_after_labels (bb);
3546 mul_stmts.quick_push (div_stmt);
3547 if (is_gimple_debug (gsi_stmt (gsi)))
3548 gsi_next_nondebug (&gsi);
3549 unsigned cast_count = 0;
3550 while (gsi_stmt (gsi) != cond_stmt)
3552 /* If original mul_stmt has a single use, allow it in the same bb,
3553 we are looking then just at __builtin_mul_overflow_p.
3554 Though, in that case the original mul_stmt will be replaced
3555 by .MUL_OVERFLOW, REALPART_EXPR and IMAGPART_EXPR stmts. */
3556 gimple *mul_stmt;
3557 unsigned int i;
3558 bool ok = false;
3559 FOR_EACH_VEC_ELT (mul_stmts, i, mul_stmt)
3561 if (gsi_stmt (gsi) == mul_stmt)
3563 ok = true;
3564 break;
3567 if (!ok && gimple_assign_cast_p (gsi_stmt (gsi)) && ++cast_count < 4)
3568 ok = true;
3569 if (!ok)
3570 return;
3571 gsi_next_nondebug (&gsi);
3573 if (gimple_code (cond_stmt) == GIMPLE_COND)
3575 basic_block succ_bb = other_edge->dest;
3576 for (gphi_iterator gpi = gsi_start_phis (succ_bb); !gsi_end_p (gpi);
3577 gsi_next (&gpi))
3579 gphi *phi = gpi.phi ();
3580 tree v1 = gimple_phi_arg_def (phi, other_edge->dest_idx);
3581 tree v2 = gimple_phi_arg_def (phi, other_succ_edge->dest_idx);
3582 if (!operand_equal_p (v1, v2, 0))
3583 return;
3586 else
3588 tree lhs = gimple_assign_lhs (cond_stmt);
3589 if (!lhs || !INTEGRAL_TYPE_P (TREE_TYPE (lhs)))
3590 return;
3591 gsi_next_nondebug (&gsi);
3592 if (!gsi_end_p (gsi))
3594 if (gimple_assign_rhs_code (cond_stmt) == COND_EXPR)
3595 return;
3596 gimple *cast_stmt = gsi_stmt (gsi);
3597 if (!gimple_assign_cast_p (cast_stmt))
3598 return;
3599 tree new_lhs = gimple_assign_lhs (cast_stmt);
3600 gsi_next_nondebug (&gsi);
3601 if (!gsi_end_p (gsi)
3602 || !new_lhs
3603 || !INTEGRAL_TYPE_P (TREE_TYPE (new_lhs))
3604 || TYPE_PRECISION (TREE_TYPE (new_lhs)) <= 1)
3605 return;
3606 lhs = new_lhs;
3608 edge succ_edge = single_succ_edge (bb);
3609 basic_block succ_bb = succ_edge->dest;
3610 gsi = gsi_start_phis (succ_bb);
3611 if (gsi_end_p (gsi))
3612 return;
3613 gphi *phi = as_a <gphi *> (gsi_stmt (gsi));
3614 gsi_next (&gsi);
3615 if (!gsi_end_p (gsi))
3616 return;
3617 if (gimple_phi_arg_def (phi, succ_edge->dest_idx) != lhs)
3618 return;
3619 tree other_val = gimple_phi_arg_def (phi, other_edge->dest_idx);
3620 if (gimple_assign_rhs_code (cond_stmt) == COND_EXPR)
3622 tree cond = gimple_assign_rhs1 (cond_stmt);
3623 if (TREE_CODE (cond) == NE_EXPR)
3625 if (!operand_equal_p (other_val,
3626 gimple_assign_rhs3 (cond_stmt), 0))
3627 return;
3629 else if (!operand_equal_p (other_val,
3630 gimple_assign_rhs2 (cond_stmt), 0))
3631 return;
3633 else if (gimple_assign_rhs_code (cond_stmt) == NE_EXPR)
3635 if (!integer_zerop (other_val))
3636 return;
3638 else if (!integer_onep (other_val))
3639 return;
3641 gcond *zero_gcond = as_a <gcond *> (zero_cond);
3642 if (pred_edge->flags & EDGE_TRUE_VALUE)
3643 gimple_cond_make_true (zero_gcond);
3644 else
3645 gimple_cond_make_false (zero_gcond);
3646 update_stmt (zero_cond);
3647 *cfg_changed = true;
3650 /* Helper function for arith_overflow_check_p. Return true
3651 if VAL1 is equal to VAL2 cast to corresponding integral type
3652 with other signedness or vice versa. */
3654 static bool
3655 arith_cast_equal_p (tree val1, tree val2)
3657 if (TREE_CODE (val1) == INTEGER_CST && TREE_CODE (val2) == INTEGER_CST)
3658 return wi::eq_p (wi::to_wide (val1), wi::to_wide (val2));
3659 else if (TREE_CODE (val1) != SSA_NAME || TREE_CODE (val2) != SSA_NAME)
3660 return false;
3661 if (gimple_assign_cast_p (SSA_NAME_DEF_STMT (val1))
3662 && gimple_assign_rhs1 (SSA_NAME_DEF_STMT (val1)) == val2)
3663 return true;
3664 if (gimple_assign_cast_p (SSA_NAME_DEF_STMT (val2))
3665 && gimple_assign_rhs1 (SSA_NAME_DEF_STMT (val2)) == val1)
3666 return true;
3667 return false;
3670 /* Helper function of match_arith_overflow. Return 1
3671 if USE_STMT is unsigned overflow check ovf != 0 for
3672 STMT, -1 if USE_STMT is unsigned overflow check ovf == 0
3673 and 0 otherwise. */
3675 static int
3676 arith_overflow_check_p (gimple *stmt, gimple *cast_stmt, gimple *&use_stmt,
3677 tree maxval, tree *other)
3679 enum tree_code ccode = ERROR_MARK;
3680 tree crhs1 = NULL_TREE, crhs2 = NULL_TREE;
3681 enum tree_code code = gimple_assign_rhs_code (stmt);
3682 tree lhs = gimple_assign_lhs (cast_stmt ? cast_stmt : stmt);
3683 tree rhs1 = gimple_assign_rhs1 (stmt);
3684 tree rhs2 = gimple_assign_rhs2 (stmt);
3685 tree multop = NULL_TREE, divlhs = NULL_TREE;
3686 gimple *cur_use_stmt = use_stmt;
3688 if (code == MULT_EXPR)
3690 if (!is_gimple_assign (use_stmt))
3691 return 0;
3692 if (gimple_assign_rhs_code (use_stmt) != TRUNC_DIV_EXPR)
3693 return 0;
3694 if (gimple_assign_rhs1 (use_stmt) != lhs)
3695 return 0;
3696 if (cast_stmt)
3698 if (arith_cast_equal_p (gimple_assign_rhs2 (use_stmt), rhs1))
3699 multop = rhs2;
3700 else if (arith_cast_equal_p (gimple_assign_rhs2 (use_stmt), rhs2))
3701 multop = rhs1;
3702 else
3703 return 0;
3705 else if (gimple_assign_rhs2 (use_stmt) == rhs1)
3706 multop = rhs2;
3707 else if (operand_equal_p (gimple_assign_rhs2 (use_stmt), rhs2, 0))
3708 multop = rhs1;
3709 else
3710 return 0;
3711 if (stmt_ends_bb_p (use_stmt))
3712 return 0;
3713 divlhs = gimple_assign_lhs (use_stmt);
3714 if (!divlhs)
3715 return 0;
3716 use_operand_p use;
3717 if (!single_imm_use (divlhs, &use, &cur_use_stmt))
3718 return 0;
3720 if (gimple_code (cur_use_stmt) == GIMPLE_COND)
3722 ccode = gimple_cond_code (cur_use_stmt);
3723 crhs1 = gimple_cond_lhs (cur_use_stmt);
3724 crhs2 = gimple_cond_rhs (cur_use_stmt);
3726 else if (is_gimple_assign (cur_use_stmt))
3728 if (gimple_assign_rhs_class (cur_use_stmt) == GIMPLE_BINARY_RHS)
3730 ccode = gimple_assign_rhs_code (cur_use_stmt);
3731 crhs1 = gimple_assign_rhs1 (cur_use_stmt);
3732 crhs2 = gimple_assign_rhs2 (cur_use_stmt);
3734 else if (gimple_assign_rhs_code (cur_use_stmt) == COND_EXPR)
3736 tree cond = gimple_assign_rhs1 (cur_use_stmt);
3737 if (COMPARISON_CLASS_P (cond))
3739 ccode = TREE_CODE (cond);
3740 crhs1 = TREE_OPERAND (cond, 0);
3741 crhs2 = TREE_OPERAND (cond, 1);
3743 else
3744 return 0;
3746 else
3747 return 0;
3749 else
3750 return 0;
3752 if (TREE_CODE_CLASS (ccode) != tcc_comparison)
3753 return 0;
3755 switch (ccode)
3757 case GT_EXPR:
3758 case LE_EXPR:
3759 if (maxval)
3761 /* r = a + b; r > maxval or r <= maxval */
3762 if (crhs1 == lhs
3763 && TREE_CODE (crhs2) == INTEGER_CST
3764 && tree_int_cst_equal (crhs2, maxval))
3765 return ccode == GT_EXPR ? 1 : -1;
3766 break;
3768 /* r = a - b; r > a or r <= a
3769 r = a + b; a > r or a <= r or b > r or b <= r. */
3770 if ((code == MINUS_EXPR && crhs1 == lhs && crhs2 == rhs1)
3771 || (code == PLUS_EXPR && (crhs1 == rhs1 || crhs1 == rhs2)
3772 && crhs2 == lhs))
3773 return ccode == GT_EXPR ? 1 : -1;
3774 /* r = ~a; b > r or b <= r. */
3775 if (code == BIT_NOT_EXPR && crhs2 == lhs)
3777 if (other)
3778 *other = crhs1;
3779 return ccode == GT_EXPR ? 1 : -1;
3781 break;
3782 case LT_EXPR:
3783 case GE_EXPR:
3784 if (maxval)
3785 break;
3786 /* r = a - b; a < r or a >= r
3787 r = a + b; r < a or r >= a or r < b or r >= b. */
3788 if ((code == MINUS_EXPR && crhs1 == rhs1 && crhs2 == lhs)
3789 || (code == PLUS_EXPR && crhs1 == lhs
3790 && (crhs2 == rhs1 || crhs2 == rhs2)))
3791 return ccode == LT_EXPR ? 1 : -1;
3792 /* r = ~a; r < b or r >= b. */
3793 if (code == BIT_NOT_EXPR && crhs1 == lhs)
3795 if (other)
3796 *other = crhs2;
3797 return ccode == LT_EXPR ? 1 : -1;
3799 break;
3800 case EQ_EXPR:
3801 case NE_EXPR:
3802 /* r = a * b; _1 = r / a; _1 == b
3803 r = a * b; _1 = r / b; _1 == a
3804 r = a * b; _1 = r / a; _1 != b
3805 r = a * b; _1 = r / b; _1 != a. */
3806 if (code == MULT_EXPR)
3808 if (cast_stmt)
3810 if ((crhs1 == divlhs && arith_cast_equal_p (crhs2, multop))
3811 || (crhs2 == divlhs && arith_cast_equal_p (crhs1, multop)))
3813 use_stmt = cur_use_stmt;
3814 return ccode == NE_EXPR ? 1 : -1;
3817 else if ((crhs1 == divlhs && operand_equal_p (crhs2, multop, 0))
3818 || (crhs2 == divlhs && crhs1 == multop))
3820 use_stmt = cur_use_stmt;
3821 return ccode == NE_EXPR ? 1 : -1;
3824 break;
3825 default:
3826 break;
3828 return 0;
3831 /* Recognize for unsigned x
3832 x = y - z;
3833 if (x > y)
3834 where there are other uses of x and replace it with
3835 _7 = .SUB_OVERFLOW (y, z);
3836 x = REALPART_EXPR <_7>;
3837 _8 = IMAGPART_EXPR <_7>;
3838 if (_8)
3839 and similarly for addition.
3841 Also recognize:
3842 yc = (type) y;
3843 zc = (type) z;
3844 x = yc + zc;
3845 if (x > max)
3846 where y and z have unsigned types with maximum max
3847 and there are other uses of x and all of those cast x
3848 back to that unsigned type and again replace it with
3849 _7 = .ADD_OVERFLOW (y, z);
3850 _9 = REALPART_EXPR <_7>;
3851 _8 = IMAGPART_EXPR <_7>;
3852 if (_8)
3853 and replace (utype) x with _9.
3855 Also recognize:
3856 x = ~z;
3857 if (y > x)
3858 and replace it with
3859 _7 = .ADD_OVERFLOW (y, z);
3860 _8 = IMAGPART_EXPR <_7>;
3861 if (_8)
3863 And also recognize:
3864 z = x * y;
3865 if (x != 0)
3866 goto <bb 3>; [50.00%]
3867 else
3868 goto <bb 4>; [50.00%]
3870 <bb 3> [local count: 536870913]:
3871 _2 = z / x;
3872 _9 = _2 != y;
3873 _10 = (int) _9;
3875 <bb 4> [local count: 1073741824]:
3876 # iftmp.0_3 = PHI <_10(3), 0(2)>
3877 and replace it with
3878 _7 = .MUL_OVERFLOW (x, y);
3879 z = IMAGPART_EXPR <_7>;
3880 _8 = IMAGPART_EXPR <_7>;
3881 _9 = _8 != 0;
3882 iftmp.0_3 = (int) _9; */
3884 static bool
3885 match_arith_overflow (gimple_stmt_iterator *gsi, gimple *stmt,
3886 enum tree_code code, bool *cfg_changed)
3888 tree lhs = gimple_assign_lhs (stmt);
3889 tree type = TREE_TYPE (lhs);
3890 use_operand_p use_p;
3891 imm_use_iterator iter;
3892 bool use_seen = false;
3893 bool ovf_use_seen = false;
3894 gimple *use_stmt;
3895 gimple *add_stmt = NULL;
3896 bool add_first = false;
3897 gimple *cond_stmt = NULL;
3898 gimple *cast_stmt = NULL;
3899 tree cast_lhs = NULL_TREE;
3901 gcc_checking_assert (code == PLUS_EXPR
3902 || code == MINUS_EXPR
3903 || code == MULT_EXPR
3904 || code == BIT_NOT_EXPR);
3905 if (!INTEGRAL_TYPE_P (type)
3906 || !TYPE_UNSIGNED (type)
3907 || has_zero_uses (lhs)
3908 || (code != PLUS_EXPR
3909 && code != MULT_EXPR
3910 && optab_handler (code == MINUS_EXPR ? usubv4_optab : uaddv4_optab,
3911 TYPE_MODE (type)) == CODE_FOR_nothing))
3912 return false;
3914 tree rhs1 = gimple_assign_rhs1 (stmt);
3915 tree rhs2 = gimple_assign_rhs2 (stmt);
3916 FOR_EACH_IMM_USE_FAST (use_p, iter, lhs)
3918 use_stmt = USE_STMT (use_p);
3919 if (is_gimple_debug (use_stmt))
3920 continue;
3922 tree other = NULL_TREE;
3923 if (arith_overflow_check_p (stmt, NULL, use_stmt, NULL_TREE, &other))
3925 if (code == BIT_NOT_EXPR)
3927 gcc_assert (other);
3928 if (TREE_CODE (other) != SSA_NAME)
3929 return false;
3930 if (rhs2 == NULL)
3931 rhs2 = other;
3932 else
3933 return false;
3934 cond_stmt = use_stmt;
3936 ovf_use_seen = true;
3938 else
3940 use_seen = true;
3941 if (code == MULT_EXPR
3942 && cast_stmt == NULL
3943 && gimple_assign_cast_p (use_stmt))
3945 cast_lhs = gimple_assign_lhs (use_stmt);
3946 if (INTEGRAL_TYPE_P (TREE_TYPE (cast_lhs))
3947 && !TYPE_UNSIGNED (TREE_TYPE (cast_lhs))
3948 && (TYPE_PRECISION (TREE_TYPE (cast_lhs))
3949 == TYPE_PRECISION (TREE_TYPE (lhs))))
3950 cast_stmt = use_stmt;
3951 else
3952 cast_lhs = NULL_TREE;
3955 if (ovf_use_seen && use_seen)
3956 break;
3959 if (!ovf_use_seen
3960 && code == MULT_EXPR
3961 && cast_stmt)
3963 if (TREE_CODE (rhs1) != SSA_NAME
3964 || (TREE_CODE (rhs2) != SSA_NAME && TREE_CODE (rhs2) != INTEGER_CST))
3965 return false;
3966 FOR_EACH_IMM_USE_FAST (use_p, iter, cast_lhs)
3968 use_stmt = USE_STMT (use_p);
3969 if (is_gimple_debug (use_stmt))
3970 continue;
3972 if (arith_overflow_check_p (stmt, cast_stmt, use_stmt,
3973 NULL_TREE, NULL))
3974 ovf_use_seen = true;
3977 else
3979 cast_stmt = NULL;
3980 cast_lhs = NULL_TREE;
3983 tree maxval = NULL_TREE;
3984 if (!ovf_use_seen
3985 || (code != MULT_EXPR && (code == BIT_NOT_EXPR ? use_seen : !use_seen))
3986 || (code == PLUS_EXPR
3987 && optab_handler (uaddv4_optab,
3988 TYPE_MODE (type)) == CODE_FOR_nothing)
3989 || (code == MULT_EXPR
3990 && optab_handler (cast_stmt ? mulv4_optab : umulv4_optab,
3991 TYPE_MODE (type)) == CODE_FOR_nothing))
3993 if (code != PLUS_EXPR)
3994 return false;
3995 if (TREE_CODE (rhs1) != SSA_NAME
3996 || !gimple_assign_cast_p (SSA_NAME_DEF_STMT (rhs1)))
3997 return false;
3998 rhs1 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (rhs1));
3999 tree type1 = TREE_TYPE (rhs1);
4000 if (!INTEGRAL_TYPE_P (type1)
4001 || !TYPE_UNSIGNED (type1)
4002 || TYPE_PRECISION (type1) >= TYPE_PRECISION (type)
4003 || (TYPE_PRECISION (type1)
4004 != GET_MODE_BITSIZE (SCALAR_INT_TYPE_MODE (type1))))
4005 return false;
4006 if (TREE_CODE (rhs2) == INTEGER_CST)
4008 if (wi::ne_p (wi::rshift (wi::to_wide (rhs2),
4009 TYPE_PRECISION (type1),
4010 UNSIGNED), 0))
4011 return false;
4012 rhs2 = fold_convert (type1, rhs2);
4014 else
4016 if (TREE_CODE (rhs2) != SSA_NAME
4017 || !gimple_assign_cast_p (SSA_NAME_DEF_STMT (rhs2)))
4018 return false;
4019 rhs2 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (rhs2));
4020 tree type2 = TREE_TYPE (rhs2);
4021 if (!INTEGRAL_TYPE_P (type2)
4022 || !TYPE_UNSIGNED (type2)
4023 || TYPE_PRECISION (type2) >= TYPE_PRECISION (type)
4024 || (TYPE_PRECISION (type2)
4025 != GET_MODE_BITSIZE (SCALAR_INT_TYPE_MODE (type2))))
4026 return false;
4028 if (TYPE_PRECISION (type1) >= TYPE_PRECISION (TREE_TYPE (rhs2)))
4029 type = type1;
4030 else
4031 type = TREE_TYPE (rhs2);
4033 if (TREE_CODE (type) != INTEGER_TYPE
4034 || optab_handler (uaddv4_optab,
4035 TYPE_MODE (type)) == CODE_FOR_nothing)
4036 return false;
4038 maxval = wide_int_to_tree (type, wi::max_value (TYPE_PRECISION (type),
4039 UNSIGNED));
4040 ovf_use_seen = false;
4041 use_seen = false;
4042 basic_block use_bb = NULL;
4043 FOR_EACH_IMM_USE_FAST (use_p, iter, lhs)
4045 use_stmt = USE_STMT (use_p);
4046 if (is_gimple_debug (use_stmt))
4047 continue;
4049 if (arith_overflow_check_p (stmt, NULL, use_stmt, maxval, NULL))
4051 ovf_use_seen = true;
4052 use_bb = gimple_bb (use_stmt);
4054 else
4056 if (!gimple_assign_cast_p (use_stmt)
4057 || gimple_assign_rhs_code (use_stmt) == VIEW_CONVERT_EXPR)
4058 return false;
4059 tree use_lhs = gimple_assign_lhs (use_stmt);
4060 if (!INTEGRAL_TYPE_P (TREE_TYPE (use_lhs))
4061 || (TYPE_PRECISION (TREE_TYPE (use_lhs))
4062 > TYPE_PRECISION (type)))
4063 return false;
4064 use_seen = true;
4067 if (!ovf_use_seen)
4068 return false;
4069 if (!useless_type_conversion_p (type, TREE_TYPE (rhs1)))
4071 if (!use_seen)
4072 return false;
4073 tree new_rhs1 = make_ssa_name (type);
4074 gimple *g = gimple_build_assign (new_rhs1, NOP_EXPR, rhs1);
4075 gsi_insert_before (gsi, g, GSI_SAME_STMT);
4076 rhs1 = new_rhs1;
4078 else if (!useless_type_conversion_p (type, TREE_TYPE (rhs2)))
4080 if (!use_seen)
4081 return false;
4082 tree new_rhs2 = make_ssa_name (type);
4083 gimple *g = gimple_build_assign (new_rhs2, NOP_EXPR, rhs2);
4084 gsi_insert_before (gsi, g, GSI_SAME_STMT);
4085 rhs2 = new_rhs2;
4087 else if (!use_seen)
4089 /* If there are no uses of the wider addition, check if
4090 forwprop has not created a narrower addition.
4091 Require it to be in the same bb as the overflow check. */
4092 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs1)
4094 use_stmt = USE_STMT (use_p);
4095 if (is_gimple_debug (use_stmt))
4096 continue;
4098 if (use_stmt == stmt)
4099 continue;
4101 if (!is_gimple_assign (use_stmt)
4102 || gimple_bb (use_stmt) != use_bb
4103 || gimple_assign_rhs_code (use_stmt) != PLUS_EXPR)
4104 continue;
4106 if (gimple_assign_rhs1 (use_stmt) == rhs1)
4108 if (!operand_equal_p (gimple_assign_rhs2 (use_stmt),
4109 rhs2, 0))
4110 continue;
4112 else if (gimple_assign_rhs2 (use_stmt) == rhs1)
4114 if (gimple_assign_rhs1 (use_stmt) != rhs2)
4115 continue;
4117 else
4118 continue;
4120 add_stmt = use_stmt;
4121 break;
4123 if (add_stmt == NULL)
4124 return false;
4126 /* If stmt and add_stmt are in the same bb, we need to find out
4127 which one is earlier. If they are in different bbs, we've
4128 checked add_stmt is in the same bb as one of the uses of the
4129 stmt lhs, so stmt needs to dominate add_stmt too. */
4130 if (gimple_bb (stmt) == gimple_bb (add_stmt))
4132 gimple_stmt_iterator gsif = *gsi;
4133 gimple_stmt_iterator gsib = *gsi;
4134 int i;
4135 /* Search both forward and backward from stmt and have a small
4136 upper bound. */
4137 for (i = 0; i < 128; i++)
4139 if (!gsi_end_p (gsib))
4141 gsi_prev_nondebug (&gsib);
4142 if (gsi_stmt (gsib) == add_stmt)
4144 add_first = true;
4145 break;
4148 else if (gsi_end_p (gsif))
4149 break;
4150 if (!gsi_end_p (gsif))
4152 gsi_next_nondebug (&gsif);
4153 if (gsi_stmt (gsif) == add_stmt)
4154 break;
4157 if (i == 128)
4158 return false;
4159 if (add_first)
4160 *gsi = gsi_for_stmt (add_stmt);
4165 if (code == BIT_NOT_EXPR)
4166 *gsi = gsi_for_stmt (cond_stmt);
4168 auto_vec<gimple *, 8> mul_stmts;
4169 if (code == MULT_EXPR && cast_stmt)
4171 type = TREE_TYPE (cast_lhs);
4172 gimple *g = SSA_NAME_DEF_STMT (rhs1);
4173 if (gimple_assign_cast_p (g)
4174 && useless_type_conversion_p (type,
4175 TREE_TYPE (gimple_assign_rhs1 (g)))
4176 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g)))
4177 rhs1 = gimple_assign_rhs1 (g);
4178 else
4180 g = gimple_build_assign (make_ssa_name (type), NOP_EXPR, rhs1);
4181 gsi_insert_before (gsi, g, GSI_SAME_STMT);
4182 rhs1 = gimple_assign_lhs (g);
4183 mul_stmts.quick_push (g);
4185 if (TREE_CODE (rhs2) == INTEGER_CST)
4186 rhs2 = fold_convert (type, rhs2);
4187 else
4189 g = SSA_NAME_DEF_STMT (rhs2);
4190 if (gimple_assign_cast_p (g)
4191 && useless_type_conversion_p (type,
4192 TREE_TYPE (gimple_assign_rhs1 (g)))
4193 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g)))
4194 rhs2 = gimple_assign_rhs1 (g);
4195 else
4197 g = gimple_build_assign (make_ssa_name (type), NOP_EXPR, rhs2);
4198 gsi_insert_before (gsi, g, GSI_SAME_STMT);
4199 rhs2 = gimple_assign_lhs (g);
4200 mul_stmts.quick_push (g);
4204 tree ctype = build_complex_type (type);
4205 gcall *g = gimple_build_call_internal (code == MULT_EXPR
4206 ? IFN_MUL_OVERFLOW
4207 : code != MINUS_EXPR
4208 ? IFN_ADD_OVERFLOW : IFN_SUB_OVERFLOW,
4209 2, rhs1, rhs2);
4210 tree ctmp = make_ssa_name (ctype);
4211 gimple_call_set_lhs (g, ctmp);
4212 gsi_insert_before (gsi, g, GSI_SAME_STMT);
4213 tree new_lhs = (maxval || cast_stmt) ? make_ssa_name (type) : lhs;
4214 gassign *g2;
4215 if (code != BIT_NOT_EXPR)
4217 g2 = gimple_build_assign (new_lhs, REALPART_EXPR,
4218 build1 (REALPART_EXPR, type, ctmp));
4219 if (maxval || cast_stmt)
4221 gsi_insert_before (gsi, g2, GSI_SAME_STMT);
4222 if (add_first)
4223 *gsi = gsi_for_stmt (stmt);
4225 else
4226 gsi_replace (gsi, g2, true);
4227 if (code == MULT_EXPR)
4229 mul_stmts.quick_push (g);
4230 mul_stmts.quick_push (g2);
4231 if (cast_stmt)
4233 g2 = gimple_build_assign (lhs, NOP_EXPR, new_lhs);
4234 gsi_replace (gsi, g2, true);
4235 mul_stmts.quick_push (g2);
4239 tree ovf = make_ssa_name (type);
4240 g2 = gimple_build_assign (ovf, IMAGPART_EXPR,
4241 build1 (IMAGPART_EXPR, type, ctmp));
4242 if (code != BIT_NOT_EXPR)
4243 gsi_insert_after (gsi, g2, GSI_NEW_STMT);
4244 else
4245 gsi_insert_before (gsi, g2, GSI_SAME_STMT);
4246 if (code == MULT_EXPR)
4247 mul_stmts.quick_push (g2);
4249 FOR_EACH_IMM_USE_STMT (use_stmt, iter, cast_lhs ? cast_lhs : lhs)
4251 if (is_gimple_debug (use_stmt))
4252 continue;
4254 gimple *orig_use_stmt = use_stmt;
4255 int ovf_use = arith_overflow_check_p (stmt, cast_stmt, use_stmt,
4256 maxval, NULL);
4257 if (ovf_use == 0)
4259 gcc_assert (code != BIT_NOT_EXPR);
4260 if (maxval)
4262 tree use_lhs = gimple_assign_lhs (use_stmt);
4263 gimple_assign_set_rhs1 (use_stmt, new_lhs);
4264 if (useless_type_conversion_p (TREE_TYPE (use_lhs),
4265 TREE_TYPE (new_lhs)))
4266 gimple_assign_set_rhs_code (use_stmt, SSA_NAME);
4267 update_stmt (use_stmt);
4269 continue;
4271 if (gimple_code (use_stmt) == GIMPLE_COND)
4273 gcond *cond_stmt = as_a <gcond *> (use_stmt);
4274 gimple_cond_set_lhs (cond_stmt, ovf);
4275 gimple_cond_set_rhs (cond_stmt, build_int_cst (type, 0));
4276 gimple_cond_set_code (cond_stmt, ovf_use == 1 ? NE_EXPR : EQ_EXPR);
4278 else
4280 gcc_checking_assert (is_gimple_assign (use_stmt));
4281 if (gimple_assign_rhs_class (use_stmt) == GIMPLE_BINARY_RHS)
4283 gimple_assign_set_rhs1 (use_stmt, ovf);
4284 gimple_assign_set_rhs2 (use_stmt, build_int_cst (type, 0));
4285 gimple_assign_set_rhs_code (use_stmt,
4286 ovf_use == 1 ? NE_EXPR : EQ_EXPR);
4288 else
4290 gcc_checking_assert (gimple_assign_rhs_code (use_stmt)
4291 == COND_EXPR);
4292 tree cond = build2 (ovf_use == 1 ? NE_EXPR : EQ_EXPR,
4293 boolean_type_node, ovf,
4294 build_int_cst (type, 0));
4295 gimple_assign_set_rhs1 (use_stmt, cond);
4298 update_stmt (use_stmt);
4299 if (code == MULT_EXPR && use_stmt != orig_use_stmt)
4301 gimple_stmt_iterator gsi2 = gsi_for_stmt (orig_use_stmt);
4302 maybe_optimize_guarding_check (mul_stmts, use_stmt, orig_use_stmt,
4303 cfg_changed);
4304 gsi_remove (&gsi2, true);
4305 release_ssa_name (gimple_assign_lhs (orig_use_stmt));
4308 if (maxval)
4310 gimple_stmt_iterator gsi2 = gsi_for_stmt (stmt);
4311 gsi_remove (&gsi2, true);
4312 if (add_stmt)
4314 gimple *g = gimple_build_assign (gimple_assign_lhs (add_stmt),
4315 new_lhs);
4316 gsi2 = gsi_for_stmt (add_stmt);
4317 gsi_replace (&gsi2, g, true);
4320 else if (code == BIT_NOT_EXPR)
4322 *gsi = gsi_for_stmt (stmt);
4323 gsi_remove (gsi, true);
4324 release_ssa_name (lhs);
4325 return true;
4327 return false;
4330 /* Return true if target has support for divmod. */
4332 static bool
4333 target_supports_divmod_p (optab divmod_optab, optab div_optab, machine_mode mode)
4335 /* If target supports hardware divmod insn, use it for divmod. */
4336 if (optab_handler (divmod_optab, mode) != CODE_FOR_nothing)
4337 return true;
4339 /* Check if libfunc for divmod is available. */
4340 rtx libfunc = optab_libfunc (divmod_optab, mode);
4341 if (libfunc != NULL_RTX)
4343 /* If optab_handler exists for div_optab, perhaps in a wider mode,
4344 we don't want to use the libfunc even if it exists for given mode. */
4345 machine_mode div_mode;
4346 FOR_EACH_MODE_FROM (div_mode, mode)
4347 if (optab_handler (div_optab, div_mode) != CODE_FOR_nothing)
4348 return false;
4350 return targetm.expand_divmod_libfunc != NULL;
4353 return false;
4356 /* Check if stmt is candidate for divmod transform. */
4358 static bool
4359 divmod_candidate_p (gassign *stmt)
4361 tree type = TREE_TYPE (gimple_assign_lhs (stmt));
4362 machine_mode mode = TYPE_MODE (type);
4363 optab divmod_optab, div_optab;
4365 if (TYPE_UNSIGNED (type))
4367 divmod_optab = udivmod_optab;
4368 div_optab = udiv_optab;
4370 else
4372 divmod_optab = sdivmod_optab;
4373 div_optab = sdiv_optab;
4376 tree op1 = gimple_assign_rhs1 (stmt);
4377 tree op2 = gimple_assign_rhs2 (stmt);
4379 /* Disable the transform if either is a constant, since division-by-constant
4380 may have specialized expansion. */
4381 if (CONSTANT_CLASS_P (op1))
4382 return false;
4384 if (CONSTANT_CLASS_P (op2))
4386 if (integer_pow2p (op2))
4387 return false;
4389 if (TYPE_PRECISION (type) <= HOST_BITS_PER_WIDE_INT
4390 && TYPE_PRECISION (type) <= BITS_PER_WORD)
4391 return false;
4393 /* If the divisor is not power of 2 and the precision wider than
4394 HWI, expand_divmod punts on that, so in that case it is better
4395 to use divmod optab or libfunc. Similarly if choose_multiplier
4396 might need pre/post shifts of BITS_PER_WORD or more. */
4399 /* Exclude the case where TYPE_OVERFLOW_TRAPS (type) as that should
4400 expand using the [su]divv optabs. */
4401 if (TYPE_OVERFLOW_TRAPS (type))
4402 return false;
4404 if (!target_supports_divmod_p (divmod_optab, div_optab, mode))
4405 return false;
4407 return true;
4410 /* This function looks for:
4411 t1 = a TRUNC_DIV_EXPR b;
4412 t2 = a TRUNC_MOD_EXPR b;
4413 and transforms it to the following sequence:
4414 complex_tmp = DIVMOD (a, b);
4415 t1 = REALPART_EXPR(a);
4416 t2 = IMAGPART_EXPR(b);
4417 For conditions enabling the transform see divmod_candidate_p().
4419 The pass has three parts:
4420 1) Find top_stmt which is trunc_div or trunc_mod stmt and dominates all
4421 other trunc_div_expr and trunc_mod_expr stmts.
4422 2) Add top_stmt and all trunc_div and trunc_mod stmts dominated by top_stmt
4423 to stmts vector.
4424 3) Insert DIVMOD call just before top_stmt and update entries in
4425 stmts vector to use return value of DIMOVD (REALEXPR_PART for div,
4426 IMAGPART_EXPR for mod). */
4428 static bool
4429 convert_to_divmod (gassign *stmt)
4431 if (stmt_can_throw_internal (cfun, stmt)
4432 || !divmod_candidate_p (stmt))
4433 return false;
4435 tree op1 = gimple_assign_rhs1 (stmt);
4436 tree op2 = gimple_assign_rhs2 (stmt);
4438 imm_use_iterator use_iter;
4439 gimple *use_stmt;
4440 auto_vec<gimple *> stmts;
4442 gimple *top_stmt = stmt;
4443 basic_block top_bb = gimple_bb (stmt);
4445 /* Part 1: Try to set top_stmt to "topmost" stmt that dominates
4446 at-least stmt and possibly other trunc_div/trunc_mod stmts
4447 having same operands as stmt. */
4449 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, op1)
4451 if (is_gimple_assign (use_stmt)
4452 && (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR
4453 || gimple_assign_rhs_code (use_stmt) == TRUNC_MOD_EXPR)
4454 && operand_equal_p (op1, gimple_assign_rhs1 (use_stmt), 0)
4455 && operand_equal_p (op2, gimple_assign_rhs2 (use_stmt), 0))
4457 if (stmt_can_throw_internal (cfun, use_stmt))
4458 continue;
4460 basic_block bb = gimple_bb (use_stmt);
4462 if (bb == top_bb)
4464 if (gimple_uid (use_stmt) < gimple_uid (top_stmt))
4465 top_stmt = use_stmt;
4467 else if (dominated_by_p (CDI_DOMINATORS, top_bb, bb))
4469 top_bb = bb;
4470 top_stmt = use_stmt;
4475 tree top_op1 = gimple_assign_rhs1 (top_stmt);
4476 tree top_op2 = gimple_assign_rhs2 (top_stmt);
4478 stmts.safe_push (top_stmt);
4479 bool div_seen = (gimple_assign_rhs_code (top_stmt) == TRUNC_DIV_EXPR);
4481 /* Part 2: Add all trunc_div/trunc_mod statements domianted by top_bb
4482 to stmts vector. The 2nd loop will always add stmt to stmts vector, since
4483 gimple_bb (top_stmt) dominates gimple_bb (stmt), so the
4484 2nd loop ends up adding at-least single trunc_mod_expr stmt. */
4486 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, top_op1)
4488 if (is_gimple_assign (use_stmt)
4489 && (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR
4490 || gimple_assign_rhs_code (use_stmt) == TRUNC_MOD_EXPR)
4491 && operand_equal_p (top_op1, gimple_assign_rhs1 (use_stmt), 0)
4492 && operand_equal_p (top_op2, gimple_assign_rhs2 (use_stmt), 0))
4494 if (use_stmt == top_stmt
4495 || stmt_can_throw_internal (cfun, use_stmt)
4496 || !dominated_by_p (CDI_DOMINATORS, gimple_bb (use_stmt), top_bb))
4497 continue;
4499 stmts.safe_push (use_stmt);
4500 if (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR)
4501 div_seen = true;
4505 if (!div_seen)
4506 return false;
4508 /* Part 3: Create libcall to internal fn DIVMOD:
4509 divmod_tmp = DIVMOD (op1, op2). */
4511 gcall *call_stmt = gimple_build_call_internal (IFN_DIVMOD, 2, op1, op2);
4512 tree res = make_temp_ssa_name (build_complex_type (TREE_TYPE (op1)),
4513 call_stmt, "divmod_tmp");
4514 gimple_call_set_lhs (call_stmt, res);
4515 /* We rejected throwing statements above. */
4516 gimple_call_set_nothrow (call_stmt, true);
4518 /* Insert the call before top_stmt. */
4519 gimple_stmt_iterator top_stmt_gsi = gsi_for_stmt (top_stmt);
4520 gsi_insert_before (&top_stmt_gsi, call_stmt, GSI_SAME_STMT);
4522 widen_mul_stats.divmod_calls_inserted++;
4524 /* Update all statements in stmts vector:
4525 lhs = op1 TRUNC_DIV_EXPR op2 -> lhs = REALPART_EXPR<divmod_tmp>
4526 lhs = op1 TRUNC_MOD_EXPR op2 -> lhs = IMAGPART_EXPR<divmod_tmp>. */
4528 for (unsigned i = 0; stmts.iterate (i, &use_stmt); ++i)
4530 tree new_rhs;
4532 switch (gimple_assign_rhs_code (use_stmt))
4534 case TRUNC_DIV_EXPR:
4535 new_rhs = fold_build1 (REALPART_EXPR, TREE_TYPE (op1), res);
4536 break;
4538 case TRUNC_MOD_EXPR:
4539 new_rhs = fold_build1 (IMAGPART_EXPR, TREE_TYPE (op1), res);
4540 break;
4542 default:
4543 gcc_unreachable ();
4546 gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
4547 gimple_assign_set_rhs_from_tree (&gsi, new_rhs);
4548 update_stmt (use_stmt);
4551 return true;
4554 /* Process a single gimple assignment STMT, which has a RSHIFT_EXPR as
4555 its rhs, and try to convert it into a MULT_HIGHPART_EXPR. The return
4556 value is true iff we converted the statement. */
4558 static bool
4559 convert_mult_to_highpart (gassign *stmt, gimple_stmt_iterator *gsi)
4561 tree lhs = gimple_assign_lhs (stmt);
4562 tree stype = TREE_TYPE (lhs);
4563 tree sarg0 = gimple_assign_rhs1 (stmt);
4564 tree sarg1 = gimple_assign_rhs2 (stmt);
4566 if (TREE_CODE (stype) != INTEGER_TYPE
4567 || TREE_CODE (sarg1) != INTEGER_CST
4568 || TREE_CODE (sarg0) != SSA_NAME
4569 || !tree_fits_uhwi_p (sarg1)
4570 || !has_single_use (sarg0))
4571 return false;
4573 gassign *def = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (sarg0));
4574 if (!def)
4575 return false;
4577 enum tree_code mcode = gimple_assign_rhs_code (def);
4578 if (mcode == NOP_EXPR)
4580 tree tmp = gimple_assign_rhs1 (def);
4581 if (TREE_CODE (tmp) != SSA_NAME || !has_single_use (tmp))
4582 return false;
4583 def = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (tmp));
4584 if (!def)
4585 return false;
4586 mcode = gimple_assign_rhs_code (def);
4589 if (mcode != WIDEN_MULT_EXPR
4590 || gimple_bb (def) != gimple_bb (stmt))
4591 return false;
4592 tree mtype = TREE_TYPE (gimple_assign_lhs (def));
4593 if (TREE_CODE (mtype) != INTEGER_TYPE
4594 || TYPE_PRECISION (mtype) != TYPE_PRECISION (stype))
4595 return false;
4597 tree mop1 = gimple_assign_rhs1 (def);
4598 tree mop2 = gimple_assign_rhs2 (def);
4599 tree optype = TREE_TYPE (mop1);
4600 bool unsignedp = TYPE_UNSIGNED (optype);
4601 unsigned int prec = TYPE_PRECISION (optype);
4603 if (unsignedp != TYPE_UNSIGNED (mtype)
4604 || TYPE_PRECISION (mtype) != 2 * prec)
4605 return false;
4607 unsigned HOST_WIDE_INT bits = tree_to_uhwi (sarg1);
4608 if (bits < prec || bits >= 2 * prec)
4609 return false;
4611 machine_mode mode = TYPE_MODE (optype);
4612 optab tab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
4613 if (optab_handler (tab, mode) == CODE_FOR_nothing)
4614 return false;
4616 location_t loc = gimple_location (stmt);
4617 tree highpart1 = build_and_insert_binop (gsi, loc, "highparttmp",
4618 MULT_HIGHPART_EXPR, mop1, mop2);
4619 tree highpart2 = highpart1;
4620 tree ntype = optype;
4622 if (TYPE_UNSIGNED (stype) != TYPE_UNSIGNED (optype))
4624 ntype = TYPE_UNSIGNED (stype) ? unsigned_type_for (optype)
4625 : signed_type_for (optype);
4626 highpart2 = build_and_insert_cast (gsi, loc, ntype, highpart1);
4628 if (bits > prec)
4629 highpart2 = build_and_insert_binop (gsi, loc, "highparttmp",
4630 RSHIFT_EXPR, highpart2,
4631 build_int_cst (ntype, bits - prec));
4633 gassign *new_stmt = gimple_build_assign (lhs, NOP_EXPR, highpart2);
4634 gsi_replace (gsi, new_stmt, true);
4636 widen_mul_stats.highpart_mults_inserted++;
4637 return true;
4641 /* Find integer multiplications where the operands are extended from
4642 smaller types, and replace the MULT_EXPR with a WIDEN_MULT_EXPR
4643 or MULT_HIGHPART_EXPR where appropriate. */
4645 namespace {
4647 const pass_data pass_data_optimize_widening_mul =
4649 GIMPLE_PASS, /* type */
4650 "widening_mul", /* name */
4651 OPTGROUP_NONE, /* optinfo_flags */
4652 TV_TREE_WIDEN_MUL, /* tv_id */
4653 PROP_ssa, /* properties_required */
4654 0, /* properties_provided */
4655 0, /* properties_destroyed */
4656 0, /* todo_flags_start */
4657 TODO_update_ssa, /* todo_flags_finish */
4660 class pass_optimize_widening_mul : public gimple_opt_pass
4662 public:
4663 pass_optimize_widening_mul (gcc::context *ctxt)
4664 : gimple_opt_pass (pass_data_optimize_widening_mul, ctxt)
4667 /* opt_pass methods: */
4668 virtual bool gate (function *)
4670 return flag_expensive_optimizations && optimize;
4673 virtual unsigned int execute (function *);
4675 }; // class pass_optimize_widening_mul
4677 /* Walker class to perform the transformation in reverse dominance order. */
4679 class math_opts_dom_walker : public dom_walker
4681 public:
4682 /* Constructor, CFG_CHANGED is a pointer to a boolean flag that will be set
4683 if walking modidifes the CFG. */
4685 math_opts_dom_walker (bool *cfg_changed_p)
4686 : dom_walker (CDI_DOMINATORS), m_last_result_set (),
4687 m_cfg_changed_p (cfg_changed_p) {}
4689 /* The actual actions performed in the walk. */
4691 virtual void after_dom_children (basic_block);
4693 /* Set of results of chains of multiply and add statement combinations that
4694 were not transformed into FMAs because of active deferring. */
4695 hash_set<tree> m_last_result_set;
4697 /* Pointer to a flag of the user that needs to be set if CFG has been
4698 modified. */
4699 bool *m_cfg_changed_p;
4702 void
4703 math_opts_dom_walker::after_dom_children (basic_block bb)
4705 gimple_stmt_iterator gsi;
4707 fma_deferring_state fma_state (param_avoid_fma_max_bits > 0);
4709 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
4711 gimple *stmt = gsi_stmt (gsi);
4712 enum tree_code code;
4714 if (is_gimple_assign (stmt))
4716 code = gimple_assign_rhs_code (stmt);
4717 switch (code)
4719 case MULT_EXPR:
4720 if (!convert_mult_to_widen (stmt, &gsi)
4721 && !convert_expand_mult_copysign (stmt, &gsi)
4722 && convert_mult_to_fma (stmt,
4723 gimple_assign_rhs1 (stmt),
4724 gimple_assign_rhs2 (stmt),
4725 &fma_state))
4727 gsi_remove (&gsi, true);
4728 release_defs (stmt);
4729 continue;
4731 match_arith_overflow (&gsi, stmt, code, m_cfg_changed_p);
4732 break;
4734 case PLUS_EXPR:
4735 case MINUS_EXPR:
4736 if (!convert_plusminus_to_widen (&gsi, stmt, code))
4737 match_arith_overflow (&gsi, stmt, code, m_cfg_changed_p);
4738 break;
4740 case BIT_NOT_EXPR:
4741 if (match_arith_overflow (&gsi, stmt, code, m_cfg_changed_p))
4742 continue;
4743 break;
4745 case TRUNC_MOD_EXPR:
4746 convert_to_divmod (as_a<gassign *> (stmt));
4747 break;
4749 case RSHIFT_EXPR:
4750 convert_mult_to_highpart (as_a<gassign *> (stmt), &gsi);
4751 break;
4753 default:;
4756 else if (is_gimple_call (stmt))
4758 switch (gimple_call_combined_fn (stmt))
4760 CASE_CFN_POW:
4761 if (gimple_call_lhs (stmt)
4762 && TREE_CODE (gimple_call_arg (stmt, 1)) == REAL_CST
4763 && real_equal (&TREE_REAL_CST (gimple_call_arg (stmt, 1)),
4764 &dconst2)
4765 && convert_mult_to_fma (stmt,
4766 gimple_call_arg (stmt, 0),
4767 gimple_call_arg (stmt, 0),
4768 &fma_state))
4770 unlink_stmt_vdef (stmt);
4771 if (gsi_remove (&gsi, true)
4772 && gimple_purge_dead_eh_edges (bb))
4773 *m_cfg_changed_p = true;
4774 release_defs (stmt);
4775 continue;
4777 break;
4779 case CFN_COND_MUL:
4780 if (convert_mult_to_fma (stmt,
4781 gimple_call_arg (stmt, 1),
4782 gimple_call_arg (stmt, 2),
4783 &fma_state,
4784 gimple_call_arg (stmt, 0)))
4787 gsi_remove (&gsi, true);
4788 release_defs (stmt);
4789 continue;
4791 break;
4793 case CFN_LAST:
4794 cancel_fma_deferring (&fma_state);
4795 break;
4797 default:
4798 break;
4801 gsi_next (&gsi);
4803 if (fma_state.m_deferring_p
4804 && fma_state.m_initial_phi)
4806 gcc_checking_assert (fma_state.m_last_result);
4807 if (!last_fma_candidate_feeds_initial_phi (&fma_state,
4808 &m_last_result_set))
4809 cancel_fma_deferring (&fma_state);
4810 else
4811 m_last_result_set.add (fma_state.m_last_result);
4816 unsigned int
4817 pass_optimize_widening_mul::execute (function *fun)
4819 bool cfg_changed = false;
4821 memset (&widen_mul_stats, 0, sizeof (widen_mul_stats));
4822 calculate_dominance_info (CDI_DOMINATORS);
4823 renumber_gimple_stmt_uids (cfun);
4825 math_opts_dom_walker (&cfg_changed).walk (ENTRY_BLOCK_PTR_FOR_FN (cfun));
4827 statistics_counter_event (fun, "widening multiplications inserted",
4828 widen_mul_stats.widen_mults_inserted);
4829 statistics_counter_event (fun, "widening maccs inserted",
4830 widen_mul_stats.maccs_inserted);
4831 statistics_counter_event (fun, "fused multiply-adds inserted",
4832 widen_mul_stats.fmas_inserted);
4833 statistics_counter_event (fun, "divmod calls inserted",
4834 widen_mul_stats.divmod_calls_inserted);
4835 statistics_counter_event (fun, "highpart multiplications inserted",
4836 widen_mul_stats.highpart_mults_inserted);
4838 return cfg_changed ? TODO_cleanup_cfg : 0;
4841 } // anon namespace
4843 gimple_opt_pass *
4844 make_pass_optimize_widening_mul (gcc::context *ctxt)
4846 return new pass_optimize_widening_mul (ctxt);