c, c++: attribute format on a ctor with a vbase [PR101833, PR47634]
[official-gcc.git] / gcc / tree-ssa-math-opts.cc
blob2085597447ef9956491feb5376ebeb3c0369898b
1 /* Global, SSA-based optimizations using mathematical identities.
2 Copyright (C) 2005-2022 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* Currently, the only mini-pass in this file tries to CSE reciprocal
21 operations. These are common in sequences such as this one:
23 modulus = sqrt(x*x + y*y + z*z);
24 x = x / modulus;
25 y = y / modulus;
26 z = z / modulus;
28 that can be optimized to
30 modulus = sqrt(x*x + y*y + z*z);
31 rmodulus = 1.0 / modulus;
32 x = x * rmodulus;
33 y = y * rmodulus;
34 z = z * rmodulus;
36 We do this for loop invariant divisors, and with this pass whenever
37 we notice that a division has the same divisor multiple times.
39 Of course, like in PRE, we don't insert a division if a dominator
40 already has one. However, this cannot be done as an extension of
41 PRE for several reasons.
43 First of all, with some experiments it was found out that the
44 transformation is not always useful if there are only two divisions
45 by the same divisor. This is probably because modern processors
46 can pipeline the divisions; on older, in-order processors it should
47 still be effective to optimize two divisions by the same number.
48 We make this a param, and it shall be called N in the remainder of
49 this comment.
51 Second, if trapping math is active, we have less freedom on where
52 to insert divisions: we can only do so in basic blocks that already
53 contain one. (If divisions don't trap, instead, we can insert
54 divisions elsewhere, which will be in blocks that are common dominators
55 of those that have the division).
57 We really don't want to compute the reciprocal unless a division will
58 be found. To do this, we won't insert the division in a basic block
59 that has less than N divisions *post-dominating* it.
61 The algorithm constructs a subset of the dominator tree, holding the
62 blocks containing the divisions and the common dominators to them,
63 and walk it twice. The first walk is in post-order, and it annotates
64 each block with the number of divisions that post-dominate it: this
65 gives information on where divisions can be inserted profitably.
66 The second walk is in pre-order, and it inserts divisions as explained
67 above, and replaces divisions by multiplications.
69 In the best case, the cost of the pass is O(n_statements). In the
70 worst-case, the cost is due to creating the dominator tree subset,
71 with a cost of O(n_basic_blocks ^ 2); however this can only happen
72 for n_statements / n_basic_blocks statements. So, the amortized cost
73 of creating the dominator tree subset is O(n_basic_blocks) and the
74 worst-case cost of the pass is O(n_statements * n_basic_blocks).
76 More practically, the cost will be small because there are few
77 divisions, and they tend to be in the same basic block, so insert_bb
78 is called very few times.
80 If we did this using domwalk.cc, an efficient implementation would have
81 to work on all the variables in a single pass, because we could not
82 work on just a subset of the dominator tree, as we do now, and the
83 cost would also be something like O(n_statements * n_basic_blocks).
84 The data structures would be more complex in order to work on all the
85 variables in a single pass. */
87 #include "config.h"
88 #include "system.h"
89 #include "coretypes.h"
90 #include "backend.h"
91 #include "target.h"
92 #include "rtl.h"
93 #include "tree.h"
94 #include "gimple.h"
95 #include "predict.h"
96 #include "alloc-pool.h"
97 #include "tree-pass.h"
98 #include "ssa.h"
99 #include "optabs-tree.h"
100 #include "gimple-pretty-print.h"
101 #include "alias.h"
102 #include "fold-const.h"
103 #include "gimple-fold.h"
104 #include "gimple-iterator.h"
105 #include "gimplify.h"
106 #include "gimplify-me.h"
107 #include "stor-layout.h"
108 #include "tree-cfg.h"
109 #include "tree-dfa.h"
110 #include "tree-ssa.h"
111 #include "builtins.h"
112 #include "internal-fn.h"
113 #include "case-cfn-macros.h"
114 #include "optabs-libfuncs.h"
115 #include "tree-eh.h"
116 #include "targhooks.h"
117 #include "domwalk.h"
118 #include "tree-ssa-math-opts.h"
120 /* This structure represents one basic block that either computes a
121 division, or is a common dominator for basic block that compute a
122 division. */
123 struct occurrence {
124 /* The basic block represented by this structure. */
125 basic_block bb = basic_block();
127 /* If non-NULL, the SSA_NAME holding the definition for a reciprocal
128 inserted in BB. */
129 tree recip_def = tree();
131 /* If non-NULL, the SSA_NAME holding the definition for a squared
132 reciprocal inserted in BB. */
133 tree square_recip_def = tree();
135 /* If non-NULL, the GIMPLE_ASSIGN for a reciprocal computation that
136 was inserted in BB. */
137 gimple *recip_def_stmt = nullptr;
139 /* Pointer to a list of "struct occurrence"s for blocks dominated
140 by BB. */
141 struct occurrence *children = nullptr;
143 /* Pointer to the next "struct occurrence"s in the list of blocks
144 sharing a common dominator. */
145 struct occurrence *next = nullptr;
147 /* The number of divisions that are in BB before compute_merit. The
148 number of divisions that are in BB or post-dominate it after
149 compute_merit. */
150 int num_divisions = 0;
152 /* True if the basic block has a division, false if it is a common
153 dominator for basic blocks that do. If it is false and trapping
154 math is active, BB is not a candidate for inserting a reciprocal. */
155 bool bb_has_division = false;
157 /* Construct a struct occurrence for basic block BB, and whose
158 children list is headed by CHILDREN. */
159 occurrence (basic_block bb, struct occurrence *children)
160 : bb (bb), children (children)
162 bb->aux = this;
165 /* Destroy a struct occurrence and remove it from its basic block. */
166 ~occurrence ()
168 bb->aux = nullptr;
171 /* Allocate memory for a struct occurrence from OCC_POOL. */
172 static void* operator new (size_t);
174 /* Return memory for a struct occurrence to OCC_POOL. */
175 static void operator delete (void*, size_t);
178 static struct
180 /* Number of 1.0/X ops inserted. */
181 int rdivs_inserted;
183 /* Number of 1.0/FUNC ops inserted. */
184 int rfuncs_inserted;
185 } reciprocal_stats;
187 static struct
189 /* Number of cexpi calls inserted. */
190 int inserted;
192 /* Number of conversions removed. */
193 int conv_removed;
195 } sincos_stats;
197 static struct
199 /* Number of widening multiplication ops inserted. */
200 int widen_mults_inserted;
202 /* Number of integer multiply-and-accumulate ops inserted. */
203 int maccs_inserted;
205 /* Number of fp fused multiply-add ops inserted. */
206 int fmas_inserted;
208 /* Number of divmod calls inserted. */
209 int divmod_calls_inserted;
211 /* Number of highpart multiplication ops inserted. */
212 int highpart_mults_inserted;
213 } widen_mul_stats;
215 /* The instance of "struct occurrence" representing the highest
216 interesting block in the dominator tree. */
217 static struct occurrence *occ_head;
219 /* Allocation pool for getting instances of "struct occurrence". */
220 static object_allocator<occurrence> *occ_pool;
222 void* occurrence::operator new (size_t n)
224 gcc_assert (n == sizeof(occurrence));
225 return occ_pool->allocate_raw ();
228 void occurrence::operator delete (void *occ, size_t n)
230 gcc_assert (n == sizeof(occurrence));
231 occ_pool->remove_raw (occ);
234 /* Insert NEW_OCC into our subset of the dominator tree. P_HEAD points to a
235 list of "struct occurrence"s, one per basic block, having IDOM as
236 their common dominator.
238 We try to insert NEW_OCC as deep as possible in the tree, and we also
239 insert any other block that is a common dominator for BB and one
240 block already in the tree. */
242 static void
243 insert_bb (struct occurrence *new_occ, basic_block idom,
244 struct occurrence **p_head)
246 struct occurrence *occ, **p_occ;
248 for (p_occ = p_head; (occ = *p_occ) != NULL; )
250 basic_block bb = new_occ->bb, occ_bb = occ->bb;
251 basic_block dom = nearest_common_dominator (CDI_DOMINATORS, occ_bb, bb);
252 if (dom == bb)
254 /* BB dominates OCC_BB. OCC becomes NEW_OCC's child: remove OCC
255 from its list. */
256 *p_occ = occ->next;
257 occ->next = new_occ->children;
258 new_occ->children = occ;
260 /* Try the next block (it may as well be dominated by BB). */
263 else if (dom == occ_bb)
265 /* OCC_BB dominates BB. Tail recurse to look deeper. */
266 insert_bb (new_occ, dom, &occ->children);
267 return;
270 else if (dom != idom)
272 gcc_assert (!dom->aux);
274 /* There is a dominator between IDOM and BB, add it and make
275 two children out of NEW_OCC and OCC. First, remove OCC from
276 its list. */
277 *p_occ = occ->next;
278 new_occ->next = occ;
279 occ->next = NULL;
281 /* None of the previous blocks has DOM as a dominator: if we tail
282 recursed, we would reexamine them uselessly. Just switch BB with
283 DOM, and go on looking for blocks dominated by DOM. */
284 new_occ = new occurrence (dom, new_occ);
287 else
289 /* Nothing special, go on with the next element. */
290 p_occ = &occ->next;
294 /* No place was found as a child of IDOM. Make BB a sibling of IDOM. */
295 new_occ->next = *p_head;
296 *p_head = new_occ;
299 /* Register that we found a division in BB.
300 IMPORTANCE is a measure of how much weighting to give
301 that division. Use IMPORTANCE = 2 to register a single
302 division. If the division is going to be found multiple
303 times use 1 (as it is with squares). */
305 static inline void
306 register_division_in (basic_block bb, int importance)
308 struct occurrence *occ;
310 occ = (struct occurrence *) bb->aux;
311 if (!occ)
313 occ = new occurrence (bb, NULL);
314 insert_bb (occ, ENTRY_BLOCK_PTR_FOR_FN (cfun), &occ_head);
317 occ->bb_has_division = true;
318 occ->num_divisions += importance;
322 /* Compute the number of divisions that postdominate each block in OCC and
323 its children. */
325 static void
326 compute_merit (struct occurrence *occ)
328 struct occurrence *occ_child;
329 basic_block dom = occ->bb;
331 for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
333 basic_block bb;
334 if (occ_child->children)
335 compute_merit (occ_child);
337 if (flag_exceptions)
338 bb = single_noncomplex_succ (dom);
339 else
340 bb = dom;
342 if (dominated_by_p (CDI_POST_DOMINATORS, bb, occ_child->bb))
343 occ->num_divisions += occ_child->num_divisions;
348 /* Return whether USE_STMT is a floating-point division by DEF. */
349 static inline bool
350 is_division_by (gimple *use_stmt, tree def)
352 return is_gimple_assign (use_stmt)
353 && gimple_assign_rhs_code (use_stmt) == RDIV_EXPR
354 && gimple_assign_rhs2 (use_stmt) == def
355 /* Do not recognize x / x as valid division, as we are getting
356 confused later by replacing all immediate uses x in such
357 a stmt. */
358 && gimple_assign_rhs1 (use_stmt) != def
359 && !stmt_can_throw_internal (cfun, use_stmt);
362 /* Return TRUE if USE_STMT is a multiplication of DEF by A. */
363 static inline bool
364 is_mult_by (gimple *use_stmt, tree def, tree a)
366 if (gimple_code (use_stmt) == GIMPLE_ASSIGN
367 && gimple_assign_rhs_code (use_stmt) == MULT_EXPR)
369 tree op0 = gimple_assign_rhs1 (use_stmt);
370 tree op1 = gimple_assign_rhs2 (use_stmt);
372 return (op0 == def && op1 == a)
373 || (op0 == a && op1 == def);
375 return 0;
378 /* Return whether USE_STMT is DEF * DEF. */
379 static inline bool
380 is_square_of (gimple *use_stmt, tree def)
382 return is_mult_by (use_stmt, def, def);
385 /* Return whether USE_STMT is a floating-point division by
386 DEF * DEF. */
387 static inline bool
388 is_division_by_square (gimple *use_stmt, tree def)
390 if (gimple_code (use_stmt) == GIMPLE_ASSIGN
391 && gimple_assign_rhs_code (use_stmt) == RDIV_EXPR
392 && gimple_assign_rhs1 (use_stmt) != gimple_assign_rhs2 (use_stmt)
393 && !stmt_can_throw_internal (cfun, use_stmt))
395 tree denominator = gimple_assign_rhs2 (use_stmt);
396 if (TREE_CODE (denominator) == SSA_NAME)
397 return is_square_of (SSA_NAME_DEF_STMT (denominator), def);
399 return 0;
402 /* Walk the subset of the dominator tree rooted at OCC, setting the
403 RECIP_DEF field to a definition of 1.0 / DEF that can be used in
404 the given basic block. The field may be left NULL, of course,
405 if it is not possible or profitable to do the optimization.
407 DEF_BSI is an iterator pointing at the statement defining DEF.
408 If RECIP_DEF is set, a dominator already has a computation that can
409 be used.
411 If should_insert_square_recip is set, then this also inserts
412 the square of the reciprocal immediately after the definition
413 of the reciprocal. */
415 static void
416 insert_reciprocals (gimple_stmt_iterator *def_gsi, struct occurrence *occ,
417 tree def, tree recip_def, tree square_recip_def,
418 int should_insert_square_recip, int threshold)
420 tree type;
421 gassign *new_stmt, *new_square_stmt;
422 gimple_stmt_iterator gsi;
423 struct occurrence *occ_child;
425 if (!recip_def
426 && (occ->bb_has_division || !flag_trapping_math)
427 /* Divide by two as all divisions are counted twice in
428 the costing loop. */
429 && occ->num_divisions / 2 >= threshold)
431 /* Make a variable with the replacement and substitute it. */
432 type = TREE_TYPE (def);
433 recip_def = create_tmp_reg (type, "reciptmp");
434 new_stmt = gimple_build_assign (recip_def, RDIV_EXPR,
435 build_one_cst (type), def);
437 if (should_insert_square_recip)
439 square_recip_def = create_tmp_reg (type, "powmult_reciptmp");
440 new_square_stmt = gimple_build_assign (square_recip_def, MULT_EXPR,
441 recip_def, recip_def);
444 if (occ->bb_has_division)
446 /* Case 1: insert before an existing division. */
447 gsi = gsi_after_labels (occ->bb);
448 while (!gsi_end_p (gsi)
449 && (!is_division_by (gsi_stmt (gsi), def))
450 && (!is_division_by_square (gsi_stmt (gsi), def)))
451 gsi_next (&gsi);
453 gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
454 if (should_insert_square_recip)
455 gsi_insert_before (&gsi, new_square_stmt, GSI_SAME_STMT);
457 else if (def_gsi && occ->bb == gsi_bb (*def_gsi))
459 /* Case 2: insert right after the definition. Note that this will
460 never happen if the definition statement can throw, because in
461 that case the sole successor of the statement's basic block will
462 dominate all the uses as well. */
463 gsi_insert_after (def_gsi, new_stmt, GSI_NEW_STMT);
464 if (should_insert_square_recip)
465 gsi_insert_after (def_gsi, new_square_stmt, GSI_NEW_STMT);
467 else
469 /* Case 3: insert in a basic block not containing defs/uses. */
470 gsi = gsi_after_labels (occ->bb);
471 gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
472 if (should_insert_square_recip)
473 gsi_insert_before (&gsi, new_square_stmt, GSI_SAME_STMT);
476 reciprocal_stats.rdivs_inserted++;
478 occ->recip_def_stmt = new_stmt;
481 occ->recip_def = recip_def;
482 occ->square_recip_def = square_recip_def;
483 for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
484 insert_reciprocals (def_gsi, occ_child, def, recip_def,
485 square_recip_def, should_insert_square_recip,
486 threshold);
489 /* Replace occurrences of expr / (x * x) with expr * ((1 / x) * (1 / x)).
490 Take as argument the use for (x * x). */
491 static inline void
492 replace_reciprocal_squares (use_operand_p use_p)
494 gimple *use_stmt = USE_STMT (use_p);
495 basic_block bb = gimple_bb (use_stmt);
496 struct occurrence *occ = (struct occurrence *) bb->aux;
498 if (optimize_bb_for_speed_p (bb) && occ->square_recip_def
499 && occ->recip_def)
501 gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
502 gimple_assign_set_rhs_code (use_stmt, MULT_EXPR);
503 gimple_assign_set_rhs2 (use_stmt, occ->square_recip_def);
504 SET_USE (use_p, occ->square_recip_def);
505 fold_stmt_inplace (&gsi);
506 update_stmt (use_stmt);
511 /* Replace the division at USE_P with a multiplication by the reciprocal, if
512 possible. */
514 static inline void
515 replace_reciprocal (use_operand_p use_p)
517 gimple *use_stmt = USE_STMT (use_p);
518 basic_block bb = gimple_bb (use_stmt);
519 struct occurrence *occ = (struct occurrence *) bb->aux;
521 if (optimize_bb_for_speed_p (bb)
522 && occ->recip_def && use_stmt != occ->recip_def_stmt)
524 gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
525 gimple_assign_set_rhs_code (use_stmt, MULT_EXPR);
526 SET_USE (use_p, occ->recip_def);
527 fold_stmt_inplace (&gsi);
528 update_stmt (use_stmt);
533 /* Free OCC and return one more "struct occurrence" to be freed. */
535 static struct occurrence *
536 free_bb (struct occurrence *occ)
538 struct occurrence *child, *next;
540 /* First get the two pointers hanging off OCC. */
541 next = occ->next;
542 child = occ->children;
543 delete occ;
545 /* Now ensure that we don't recurse unless it is necessary. */
546 if (!child)
547 return next;
548 else
550 while (next)
551 next = free_bb (next);
553 return child;
557 /* Transform sequences like
558 t = sqrt (a)
559 x = 1.0 / t;
560 r1 = x * x;
561 r2 = a * x;
562 into:
563 t = sqrt (a)
564 r1 = 1.0 / a;
565 r2 = t;
566 x = r1 * r2;
567 depending on the uses of x, r1, r2. This removes one multiplication and
568 allows the sqrt and division operations to execute in parallel.
569 DEF_GSI is the gsi of the initial division by sqrt that defines
570 DEF (x in the example above). */
572 static void
573 optimize_recip_sqrt (gimple_stmt_iterator *def_gsi, tree def)
575 gimple *use_stmt;
576 imm_use_iterator use_iter;
577 gimple *stmt = gsi_stmt (*def_gsi);
578 tree x = def;
579 tree orig_sqrt_ssa_name = gimple_assign_rhs2 (stmt);
580 tree div_rhs1 = gimple_assign_rhs1 (stmt);
582 if (TREE_CODE (orig_sqrt_ssa_name) != SSA_NAME
583 || TREE_CODE (div_rhs1) != REAL_CST
584 || !real_equal (&TREE_REAL_CST (div_rhs1), &dconst1))
585 return;
587 gcall *sqrt_stmt
588 = dyn_cast <gcall *> (SSA_NAME_DEF_STMT (orig_sqrt_ssa_name));
590 if (!sqrt_stmt || !gimple_call_lhs (sqrt_stmt))
591 return;
593 switch (gimple_call_combined_fn (sqrt_stmt))
595 CASE_CFN_SQRT:
596 CASE_CFN_SQRT_FN:
597 break;
599 default:
600 return;
602 tree a = gimple_call_arg (sqrt_stmt, 0);
604 /* We have 'a' and 'x'. Now analyze the uses of 'x'. */
606 /* Statements that use x in x * x. */
607 auto_vec<gimple *> sqr_stmts;
608 /* Statements that use x in a * x. */
609 auto_vec<gimple *> mult_stmts;
610 bool has_other_use = false;
611 bool mult_on_main_path = false;
613 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, x)
615 if (is_gimple_debug (use_stmt))
616 continue;
617 if (is_square_of (use_stmt, x))
619 sqr_stmts.safe_push (use_stmt);
620 if (gimple_bb (use_stmt) == gimple_bb (stmt))
621 mult_on_main_path = true;
623 else if (is_mult_by (use_stmt, x, a))
625 mult_stmts.safe_push (use_stmt);
626 if (gimple_bb (use_stmt) == gimple_bb (stmt))
627 mult_on_main_path = true;
629 else
630 has_other_use = true;
633 /* In the x * x and a * x cases we just rewire stmt operands or
634 remove multiplications. In the has_other_use case we introduce
635 a multiplication so make sure we don't introduce a multiplication
636 on a path where there was none. */
637 if (has_other_use && !mult_on_main_path)
638 return;
640 if (sqr_stmts.is_empty () && mult_stmts.is_empty ())
641 return;
643 /* If x = 1.0 / sqrt (a) has uses other than those optimized here we want
644 to be able to compose it from the sqr and mult cases. */
645 if (has_other_use && (sqr_stmts.is_empty () || mult_stmts.is_empty ()))
646 return;
648 if (dump_file)
650 fprintf (dump_file, "Optimizing reciprocal sqrt multiplications of\n");
651 print_gimple_stmt (dump_file, sqrt_stmt, 0, TDF_NONE);
652 print_gimple_stmt (dump_file, stmt, 0, TDF_NONE);
653 fprintf (dump_file, "\n");
656 bool delete_div = !has_other_use;
657 tree sqr_ssa_name = NULL_TREE;
658 if (!sqr_stmts.is_empty ())
660 /* r1 = x * x. Transform the original
661 x = 1.0 / t
662 into
663 tmp1 = 1.0 / a
664 r1 = tmp1. */
666 sqr_ssa_name
667 = make_temp_ssa_name (TREE_TYPE (a), NULL, "recip_sqrt_sqr");
669 if (dump_file)
671 fprintf (dump_file, "Replacing original division\n");
672 print_gimple_stmt (dump_file, stmt, 0, TDF_NONE);
673 fprintf (dump_file, "with new division\n");
675 stmt
676 = gimple_build_assign (sqr_ssa_name, gimple_assign_rhs_code (stmt),
677 gimple_assign_rhs1 (stmt), a);
678 gsi_insert_before (def_gsi, stmt, GSI_SAME_STMT);
679 gsi_remove (def_gsi, true);
680 *def_gsi = gsi_for_stmt (stmt);
681 fold_stmt_inplace (def_gsi);
682 update_stmt (stmt);
684 if (dump_file)
685 print_gimple_stmt (dump_file, stmt, 0, TDF_NONE);
687 delete_div = false;
688 gimple *sqr_stmt;
689 unsigned int i;
690 FOR_EACH_VEC_ELT (sqr_stmts, i, sqr_stmt)
692 gimple_stmt_iterator gsi2 = gsi_for_stmt (sqr_stmt);
693 gimple_assign_set_rhs_from_tree (&gsi2, sqr_ssa_name);
694 update_stmt (sqr_stmt);
697 if (!mult_stmts.is_empty ())
699 /* r2 = a * x. Transform this into:
700 r2 = t (The original sqrt (a)). */
701 unsigned int i;
702 gimple *mult_stmt = NULL;
703 FOR_EACH_VEC_ELT (mult_stmts, i, mult_stmt)
705 gimple_stmt_iterator gsi2 = gsi_for_stmt (mult_stmt);
707 if (dump_file)
709 fprintf (dump_file, "Replacing squaring multiplication\n");
710 print_gimple_stmt (dump_file, mult_stmt, 0, TDF_NONE);
711 fprintf (dump_file, "with assignment\n");
713 gimple_assign_set_rhs_from_tree (&gsi2, orig_sqrt_ssa_name);
714 fold_stmt_inplace (&gsi2);
715 update_stmt (mult_stmt);
716 if (dump_file)
717 print_gimple_stmt (dump_file, mult_stmt, 0, TDF_NONE);
721 if (has_other_use)
723 /* Using the two temporaries tmp1, tmp2 from above
724 the original x is now:
725 x = tmp1 * tmp2. */
726 gcc_assert (orig_sqrt_ssa_name);
727 gcc_assert (sqr_ssa_name);
729 gimple *new_stmt
730 = gimple_build_assign (x, MULT_EXPR,
731 orig_sqrt_ssa_name, sqr_ssa_name);
732 gsi_insert_after (def_gsi, new_stmt, GSI_NEW_STMT);
733 update_stmt (stmt);
735 else if (delete_div)
737 /* Remove the original division. */
738 gimple_stmt_iterator gsi2 = gsi_for_stmt (stmt);
739 gsi_remove (&gsi2, true);
740 release_defs (stmt);
742 else
743 release_ssa_name (x);
746 /* Look for floating-point divisions among DEF's uses, and try to
747 replace them by multiplications with the reciprocal. Add
748 as many statements computing the reciprocal as needed.
750 DEF must be a GIMPLE register of a floating-point type. */
752 static void
753 execute_cse_reciprocals_1 (gimple_stmt_iterator *def_gsi, tree def)
755 use_operand_p use_p, square_use_p;
756 imm_use_iterator use_iter, square_use_iter;
757 tree square_def;
758 struct occurrence *occ;
759 int count = 0;
760 int threshold;
761 int square_recip_count = 0;
762 int sqrt_recip_count = 0;
764 gcc_assert (FLOAT_TYPE_P (TREE_TYPE (def)) && TREE_CODE (def) == SSA_NAME);
765 threshold = targetm.min_divisions_for_recip_mul (TYPE_MODE (TREE_TYPE (def)));
767 /* If DEF is a square (x * x), count the number of divisions by x.
768 If there are more divisions by x than by (DEF * DEF), prefer to optimize
769 the reciprocal of x instead of DEF. This improves cases like:
770 def = x * x
771 t0 = a / def
772 t1 = b / def
773 t2 = c / x
774 Reciprocal optimization of x results in 1 division rather than 2 or 3. */
775 gimple *def_stmt = SSA_NAME_DEF_STMT (def);
777 if (is_gimple_assign (def_stmt)
778 && gimple_assign_rhs_code (def_stmt) == MULT_EXPR
779 && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME
780 && gimple_assign_rhs1 (def_stmt) == gimple_assign_rhs2 (def_stmt))
782 tree op0 = gimple_assign_rhs1 (def_stmt);
784 FOR_EACH_IMM_USE_FAST (use_p, use_iter, op0)
786 gimple *use_stmt = USE_STMT (use_p);
787 if (is_division_by (use_stmt, op0))
788 sqrt_recip_count++;
792 FOR_EACH_IMM_USE_FAST (use_p, use_iter, def)
794 gimple *use_stmt = USE_STMT (use_p);
795 if (is_division_by (use_stmt, def))
797 register_division_in (gimple_bb (use_stmt), 2);
798 count++;
801 if (is_square_of (use_stmt, def))
803 square_def = gimple_assign_lhs (use_stmt);
804 FOR_EACH_IMM_USE_FAST (square_use_p, square_use_iter, square_def)
806 gimple *square_use_stmt = USE_STMT (square_use_p);
807 if (is_division_by (square_use_stmt, square_def))
809 /* This is executed twice for each division by a square. */
810 register_division_in (gimple_bb (square_use_stmt), 1);
811 square_recip_count++;
817 /* Square reciprocals were counted twice above. */
818 square_recip_count /= 2;
820 /* If it is more profitable to optimize 1 / x, don't optimize 1 / (x * x). */
821 if (sqrt_recip_count > square_recip_count)
822 goto out;
824 /* Do the expensive part only if we can hope to optimize something. */
825 if (count + square_recip_count >= threshold && count >= 1)
827 gimple *use_stmt;
828 for (occ = occ_head; occ; occ = occ->next)
830 compute_merit (occ);
831 insert_reciprocals (def_gsi, occ, def, NULL, NULL,
832 square_recip_count, threshold);
835 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, def)
837 if (is_division_by (use_stmt, def))
839 FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter)
840 replace_reciprocal (use_p);
842 else if (square_recip_count > 0 && is_square_of (use_stmt, def))
844 FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter)
846 /* Find all uses of the square that are divisions and
847 * replace them by multiplications with the inverse. */
848 imm_use_iterator square_iterator;
849 gimple *powmult_use_stmt = USE_STMT (use_p);
850 tree powmult_def_name = gimple_assign_lhs (powmult_use_stmt);
852 FOR_EACH_IMM_USE_STMT (powmult_use_stmt,
853 square_iterator, powmult_def_name)
854 FOR_EACH_IMM_USE_ON_STMT (square_use_p, square_iterator)
856 gimple *powmult_use_stmt = USE_STMT (square_use_p);
857 if (is_division_by (powmult_use_stmt, powmult_def_name))
858 replace_reciprocal_squares (square_use_p);
865 out:
866 for (occ = occ_head; occ; )
867 occ = free_bb (occ);
869 occ_head = NULL;
872 /* Return an internal function that implements the reciprocal of CALL,
873 or IFN_LAST if there is no such function that the target supports. */
875 internal_fn
876 internal_fn_reciprocal (gcall *call)
878 internal_fn ifn;
880 switch (gimple_call_combined_fn (call))
882 CASE_CFN_SQRT:
883 CASE_CFN_SQRT_FN:
884 ifn = IFN_RSQRT;
885 break;
887 default:
888 return IFN_LAST;
891 tree_pair types = direct_internal_fn_types (ifn, call);
892 if (!direct_internal_fn_supported_p (ifn, types, OPTIMIZE_FOR_SPEED))
893 return IFN_LAST;
895 return ifn;
898 /* Go through all the floating-point SSA_NAMEs, and call
899 execute_cse_reciprocals_1 on each of them. */
900 namespace {
902 const pass_data pass_data_cse_reciprocals =
904 GIMPLE_PASS, /* type */
905 "recip", /* name */
906 OPTGROUP_NONE, /* optinfo_flags */
907 TV_TREE_RECIP, /* tv_id */
908 PROP_ssa, /* properties_required */
909 0, /* properties_provided */
910 0, /* properties_destroyed */
911 0, /* todo_flags_start */
912 TODO_update_ssa, /* todo_flags_finish */
915 class pass_cse_reciprocals : public gimple_opt_pass
917 public:
918 pass_cse_reciprocals (gcc::context *ctxt)
919 : gimple_opt_pass (pass_data_cse_reciprocals, ctxt)
922 /* opt_pass methods: */
923 virtual bool gate (function *) { return optimize && flag_reciprocal_math; }
924 virtual unsigned int execute (function *);
926 }; // class pass_cse_reciprocals
928 unsigned int
929 pass_cse_reciprocals::execute (function *fun)
931 basic_block bb;
932 tree arg;
934 occ_pool = new object_allocator<occurrence> ("dominators for recip");
936 memset (&reciprocal_stats, 0, sizeof (reciprocal_stats));
937 calculate_dominance_info (CDI_DOMINATORS);
938 calculate_dominance_info (CDI_POST_DOMINATORS);
940 if (flag_checking)
941 FOR_EACH_BB_FN (bb, fun)
942 gcc_assert (!bb->aux);
944 for (arg = DECL_ARGUMENTS (fun->decl); arg; arg = DECL_CHAIN (arg))
945 if (FLOAT_TYPE_P (TREE_TYPE (arg))
946 && is_gimple_reg (arg))
948 tree name = ssa_default_def (fun, arg);
949 if (name)
950 execute_cse_reciprocals_1 (NULL, name);
953 FOR_EACH_BB_FN (bb, fun)
955 tree def;
957 for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
958 gsi_next (&gsi))
960 gphi *phi = gsi.phi ();
961 def = PHI_RESULT (phi);
962 if (! virtual_operand_p (def)
963 && FLOAT_TYPE_P (TREE_TYPE (def)))
964 execute_cse_reciprocals_1 (NULL, def);
967 for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (gsi);
968 gsi_next (&gsi))
970 gimple *stmt = gsi_stmt (gsi);
972 if (gimple_has_lhs (stmt)
973 && (def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF)) != NULL
974 && FLOAT_TYPE_P (TREE_TYPE (def))
975 && TREE_CODE (def) == SSA_NAME)
977 execute_cse_reciprocals_1 (&gsi, def);
978 stmt = gsi_stmt (gsi);
979 if (flag_unsafe_math_optimizations
980 && is_gimple_assign (stmt)
981 && gimple_assign_lhs (stmt) == def
982 && !stmt_can_throw_internal (cfun, stmt)
983 && gimple_assign_rhs_code (stmt) == RDIV_EXPR)
984 optimize_recip_sqrt (&gsi, def);
988 if (optimize_bb_for_size_p (bb))
989 continue;
991 /* Scan for a/func(b) and convert it to reciprocal a*rfunc(b). */
992 for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (gsi);
993 gsi_next (&gsi))
995 gimple *stmt = gsi_stmt (gsi);
997 if (is_gimple_assign (stmt)
998 && gimple_assign_rhs_code (stmt) == RDIV_EXPR)
1000 tree arg1 = gimple_assign_rhs2 (stmt);
1001 gimple *stmt1;
1003 if (TREE_CODE (arg1) != SSA_NAME)
1004 continue;
1006 stmt1 = SSA_NAME_DEF_STMT (arg1);
1008 if (is_gimple_call (stmt1)
1009 && gimple_call_lhs (stmt1))
1011 bool fail;
1012 imm_use_iterator ui;
1013 use_operand_p use_p;
1014 tree fndecl = NULL_TREE;
1016 gcall *call = as_a <gcall *> (stmt1);
1017 internal_fn ifn = internal_fn_reciprocal (call);
1018 if (ifn == IFN_LAST)
1020 fndecl = gimple_call_fndecl (call);
1021 if (!fndecl
1022 || !fndecl_built_in_p (fndecl, BUILT_IN_MD))
1023 continue;
1024 fndecl = targetm.builtin_reciprocal (fndecl);
1025 if (!fndecl)
1026 continue;
1029 /* Check that all uses of the SSA name are divisions,
1030 otherwise replacing the defining statement will do
1031 the wrong thing. */
1032 fail = false;
1033 FOR_EACH_IMM_USE_FAST (use_p, ui, arg1)
1035 gimple *stmt2 = USE_STMT (use_p);
1036 if (is_gimple_debug (stmt2))
1037 continue;
1038 if (!is_gimple_assign (stmt2)
1039 || gimple_assign_rhs_code (stmt2) != RDIV_EXPR
1040 || gimple_assign_rhs1 (stmt2) == arg1
1041 || gimple_assign_rhs2 (stmt2) != arg1)
1043 fail = true;
1044 break;
1047 if (fail)
1048 continue;
1050 gimple_replace_ssa_lhs (call, arg1);
1051 if (gimple_call_internal_p (call) != (ifn != IFN_LAST))
1053 auto_vec<tree, 4> args;
1054 for (unsigned int i = 0;
1055 i < gimple_call_num_args (call); i++)
1056 args.safe_push (gimple_call_arg (call, i));
1057 gcall *stmt2;
1058 if (ifn == IFN_LAST)
1059 stmt2 = gimple_build_call_vec (fndecl, args);
1060 else
1061 stmt2 = gimple_build_call_internal_vec (ifn, args);
1062 gimple_call_set_lhs (stmt2, arg1);
1063 gimple_move_vops (stmt2, call);
1064 gimple_call_set_nothrow (stmt2,
1065 gimple_call_nothrow_p (call));
1066 gimple_stmt_iterator gsi2 = gsi_for_stmt (call);
1067 gsi_replace (&gsi2, stmt2, true);
1069 else
1071 if (ifn == IFN_LAST)
1072 gimple_call_set_fndecl (call, fndecl);
1073 else
1074 gimple_call_set_internal_fn (call, ifn);
1075 update_stmt (call);
1077 reciprocal_stats.rfuncs_inserted++;
1079 FOR_EACH_IMM_USE_STMT (stmt, ui, arg1)
1081 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1082 gimple_assign_set_rhs_code (stmt, MULT_EXPR);
1083 fold_stmt_inplace (&gsi);
1084 update_stmt (stmt);
1091 statistics_counter_event (fun, "reciprocal divs inserted",
1092 reciprocal_stats.rdivs_inserted);
1093 statistics_counter_event (fun, "reciprocal functions inserted",
1094 reciprocal_stats.rfuncs_inserted);
1096 free_dominance_info (CDI_DOMINATORS);
1097 free_dominance_info (CDI_POST_DOMINATORS);
1098 delete occ_pool;
1099 return 0;
1102 } // anon namespace
1104 gimple_opt_pass *
1105 make_pass_cse_reciprocals (gcc::context *ctxt)
1107 return new pass_cse_reciprocals (ctxt);
1110 /* If NAME is the result of a type conversion, look for other
1111 equivalent dominating or dominated conversions, and replace all
1112 uses with the earliest dominating name, removing the redundant
1113 conversions. Return the prevailing name. */
1115 static tree
1116 execute_cse_conv_1 (tree name, bool *cfg_changed)
1118 if (SSA_NAME_IS_DEFAULT_DEF (name)
1119 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (name))
1120 return name;
1122 gimple *def_stmt = SSA_NAME_DEF_STMT (name);
1124 if (!gimple_assign_cast_p (def_stmt))
1125 return name;
1127 tree src = gimple_assign_rhs1 (def_stmt);
1129 if (TREE_CODE (src) != SSA_NAME)
1130 return name;
1132 imm_use_iterator use_iter;
1133 gimple *use_stmt;
1135 /* Find the earliest dominating def. */
1136 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, src)
1138 if (use_stmt == def_stmt
1139 || !gimple_assign_cast_p (use_stmt))
1140 continue;
1142 tree lhs = gimple_assign_lhs (use_stmt);
1144 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs)
1145 || (gimple_assign_rhs1 (use_stmt)
1146 != gimple_assign_rhs1 (def_stmt))
1147 || !types_compatible_p (TREE_TYPE (name), TREE_TYPE (lhs)))
1148 continue;
1150 bool use_dominates;
1151 if (gimple_bb (def_stmt) == gimple_bb (use_stmt))
1153 gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
1154 while (!gsi_end_p (gsi) && gsi_stmt (gsi) != def_stmt)
1155 gsi_next (&gsi);
1156 use_dominates = !gsi_end_p (gsi);
1158 else if (dominated_by_p (CDI_DOMINATORS, gimple_bb (use_stmt),
1159 gimple_bb (def_stmt)))
1160 use_dominates = false;
1161 else if (dominated_by_p (CDI_DOMINATORS, gimple_bb (def_stmt),
1162 gimple_bb (use_stmt)))
1163 use_dominates = true;
1164 else
1165 continue;
1167 if (use_dominates)
1169 std::swap (name, lhs);
1170 std::swap (def_stmt, use_stmt);
1174 /* Now go through all uses of SRC again, replacing the equivalent
1175 dominated conversions. We may replace defs that were not
1176 dominated by the then-prevailing defs when we first visited
1177 them. */
1178 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, src)
1180 if (use_stmt == def_stmt
1181 || !gimple_assign_cast_p (use_stmt))
1182 continue;
1184 tree lhs = gimple_assign_lhs (use_stmt);
1186 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs)
1187 || (gimple_assign_rhs1 (use_stmt)
1188 != gimple_assign_rhs1 (def_stmt))
1189 || !types_compatible_p (TREE_TYPE (name), TREE_TYPE (lhs)))
1190 continue;
1192 basic_block use_bb = gimple_bb (use_stmt);
1193 if (gimple_bb (def_stmt) == use_bb
1194 || dominated_by_p (CDI_DOMINATORS, use_bb, gimple_bb (def_stmt)))
1196 sincos_stats.conv_removed++;
1198 gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
1199 replace_uses_by (lhs, name);
1200 if (gsi_remove (&gsi, true)
1201 && gimple_purge_dead_eh_edges (use_bb))
1202 *cfg_changed = true;
1203 release_defs (use_stmt);
1207 return name;
1210 /* Records an occurrence at statement USE_STMT in the vector of trees
1211 STMTS if it is dominated by *TOP_BB or dominates it or this basic block
1212 is not yet initialized. Returns true if the occurrence was pushed on
1213 the vector. Adjusts *TOP_BB to be the basic block dominating all
1214 statements in the vector. */
1216 static bool
1217 maybe_record_sincos (vec<gimple *> *stmts,
1218 basic_block *top_bb, gimple *use_stmt)
1220 basic_block use_bb = gimple_bb (use_stmt);
1221 if (*top_bb
1222 && (*top_bb == use_bb
1223 || dominated_by_p (CDI_DOMINATORS, use_bb, *top_bb)))
1224 stmts->safe_push (use_stmt);
1225 else if (!*top_bb
1226 || dominated_by_p (CDI_DOMINATORS, *top_bb, use_bb))
1228 stmts->safe_push (use_stmt);
1229 *top_bb = use_bb;
1231 else
1232 return false;
1234 return true;
1237 /* Look for sin, cos and cexpi calls with the same argument NAME and
1238 create a single call to cexpi CSEing the result in this case.
1239 We first walk over all immediate uses of the argument collecting
1240 statements that we can CSE in a vector and in a second pass replace
1241 the statement rhs with a REALPART or IMAGPART expression on the
1242 result of the cexpi call we insert before the use statement that
1243 dominates all other candidates. */
1245 static bool
1246 execute_cse_sincos_1 (tree name)
1248 gimple_stmt_iterator gsi;
1249 imm_use_iterator use_iter;
1250 tree fndecl, res, type = NULL_TREE;
1251 gimple *def_stmt, *use_stmt, *stmt;
1252 int seen_cos = 0, seen_sin = 0, seen_cexpi = 0;
1253 auto_vec<gimple *> stmts;
1254 basic_block top_bb = NULL;
1255 int i;
1256 bool cfg_changed = false;
1258 name = execute_cse_conv_1 (name, &cfg_changed);
1260 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, name)
1262 if (gimple_code (use_stmt) != GIMPLE_CALL
1263 || !gimple_call_lhs (use_stmt))
1264 continue;
1266 switch (gimple_call_combined_fn (use_stmt))
1268 CASE_CFN_COS:
1269 seen_cos |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
1270 break;
1272 CASE_CFN_SIN:
1273 seen_sin |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
1274 break;
1276 CASE_CFN_CEXPI:
1277 seen_cexpi |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
1278 break;
1280 default:;
1281 continue;
1284 tree t = mathfn_built_in_type (gimple_call_combined_fn (use_stmt));
1285 if (!type)
1287 type = t;
1288 t = TREE_TYPE (name);
1290 /* This checks that NAME has the right type in the first round,
1291 and, in subsequent rounds, that the built_in type is the same
1292 type, or a compatible type. */
1293 if (type != t && !types_compatible_p (type, t))
1294 return false;
1296 if (seen_cos + seen_sin + seen_cexpi <= 1)
1297 return false;
1299 /* Simply insert cexpi at the beginning of top_bb but not earlier than
1300 the name def statement. */
1301 fndecl = mathfn_built_in (type, BUILT_IN_CEXPI);
1302 if (!fndecl)
1303 return false;
1304 stmt = gimple_build_call (fndecl, 1, name);
1305 res = make_temp_ssa_name (TREE_TYPE (TREE_TYPE (fndecl)), stmt, "sincostmp");
1306 gimple_call_set_lhs (stmt, res);
1308 def_stmt = SSA_NAME_DEF_STMT (name);
1309 if (!SSA_NAME_IS_DEFAULT_DEF (name)
1310 && gimple_code (def_stmt) != GIMPLE_PHI
1311 && gimple_bb (def_stmt) == top_bb)
1313 gsi = gsi_for_stmt (def_stmt);
1314 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
1316 else
1318 gsi = gsi_after_labels (top_bb);
1319 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1321 sincos_stats.inserted++;
1323 /* And adjust the recorded old call sites. */
1324 for (i = 0; stmts.iterate (i, &use_stmt); ++i)
1326 tree rhs = NULL;
1328 switch (gimple_call_combined_fn (use_stmt))
1330 CASE_CFN_COS:
1331 rhs = fold_build1 (REALPART_EXPR, type, res);
1332 break;
1334 CASE_CFN_SIN:
1335 rhs = fold_build1 (IMAGPART_EXPR, type, res);
1336 break;
1338 CASE_CFN_CEXPI:
1339 rhs = res;
1340 break;
1342 default:;
1343 gcc_unreachable ();
1346 /* Replace call with a copy. */
1347 stmt = gimple_build_assign (gimple_call_lhs (use_stmt), rhs);
1349 gsi = gsi_for_stmt (use_stmt);
1350 gsi_replace (&gsi, stmt, true);
1351 if (gimple_purge_dead_eh_edges (gimple_bb (stmt)))
1352 cfg_changed = true;
1355 return cfg_changed;
1358 /* To evaluate powi(x,n), the floating point value x raised to the
1359 constant integer exponent n, we use a hybrid algorithm that
1360 combines the "window method" with look-up tables. For an
1361 introduction to exponentiation algorithms and "addition chains",
1362 see section 4.6.3, "Evaluation of Powers" of Donald E. Knuth,
1363 "Seminumerical Algorithms", Vol. 2, "The Art of Computer Programming",
1364 3rd Edition, 1998, and Daniel M. Gordon, "A Survey of Fast Exponentiation
1365 Methods", Journal of Algorithms, Vol. 27, pp. 129-146, 1998. */
1367 /* Provide a default value for POWI_MAX_MULTS, the maximum number of
1368 multiplications to inline before calling the system library's pow
1369 function. powi(x,n) requires at worst 2*bits(n)-2 multiplications,
1370 so this default never requires calling pow, powf or powl. */
1372 #ifndef POWI_MAX_MULTS
1373 #define POWI_MAX_MULTS (2*HOST_BITS_PER_WIDE_INT-2)
1374 #endif
1376 /* The size of the "optimal power tree" lookup table. All
1377 exponents less than this value are simply looked up in the
1378 powi_table below. This threshold is also used to size the
1379 cache of pseudo registers that hold intermediate results. */
1380 #define POWI_TABLE_SIZE 256
1382 /* The size, in bits of the window, used in the "window method"
1383 exponentiation algorithm. This is equivalent to a radix of
1384 (1<<POWI_WINDOW_SIZE) in the corresponding "m-ary method". */
1385 #define POWI_WINDOW_SIZE 3
1387 /* The following table is an efficient representation of an
1388 "optimal power tree". For each value, i, the corresponding
1389 value, j, in the table states than an optimal evaluation
1390 sequence for calculating pow(x,i) can be found by evaluating
1391 pow(x,j)*pow(x,i-j). An optimal power tree for the first
1392 100 integers is given in Knuth's "Seminumerical algorithms". */
1394 static const unsigned char powi_table[POWI_TABLE_SIZE] =
1396 0, 1, 1, 2, 2, 3, 3, 4, /* 0 - 7 */
1397 4, 6, 5, 6, 6, 10, 7, 9, /* 8 - 15 */
1398 8, 16, 9, 16, 10, 12, 11, 13, /* 16 - 23 */
1399 12, 17, 13, 18, 14, 24, 15, 26, /* 24 - 31 */
1400 16, 17, 17, 19, 18, 33, 19, 26, /* 32 - 39 */
1401 20, 25, 21, 40, 22, 27, 23, 44, /* 40 - 47 */
1402 24, 32, 25, 34, 26, 29, 27, 44, /* 48 - 55 */
1403 28, 31, 29, 34, 30, 60, 31, 36, /* 56 - 63 */
1404 32, 64, 33, 34, 34, 46, 35, 37, /* 64 - 71 */
1405 36, 65, 37, 50, 38, 48, 39, 69, /* 72 - 79 */
1406 40, 49, 41, 43, 42, 51, 43, 58, /* 80 - 87 */
1407 44, 64, 45, 47, 46, 59, 47, 76, /* 88 - 95 */
1408 48, 65, 49, 66, 50, 67, 51, 66, /* 96 - 103 */
1409 52, 70, 53, 74, 54, 104, 55, 74, /* 104 - 111 */
1410 56, 64, 57, 69, 58, 78, 59, 68, /* 112 - 119 */
1411 60, 61, 61, 80, 62, 75, 63, 68, /* 120 - 127 */
1412 64, 65, 65, 128, 66, 129, 67, 90, /* 128 - 135 */
1413 68, 73, 69, 131, 70, 94, 71, 88, /* 136 - 143 */
1414 72, 128, 73, 98, 74, 132, 75, 121, /* 144 - 151 */
1415 76, 102, 77, 124, 78, 132, 79, 106, /* 152 - 159 */
1416 80, 97, 81, 160, 82, 99, 83, 134, /* 160 - 167 */
1417 84, 86, 85, 95, 86, 160, 87, 100, /* 168 - 175 */
1418 88, 113, 89, 98, 90, 107, 91, 122, /* 176 - 183 */
1419 92, 111, 93, 102, 94, 126, 95, 150, /* 184 - 191 */
1420 96, 128, 97, 130, 98, 133, 99, 195, /* 192 - 199 */
1421 100, 128, 101, 123, 102, 164, 103, 138, /* 200 - 207 */
1422 104, 145, 105, 146, 106, 109, 107, 149, /* 208 - 215 */
1423 108, 200, 109, 146, 110, 170, 111, 157, /* 216 - 223 */
1424 112, 128, 113, 130, 114, 182, 115, 132, /* 224 - 231 */
1425 116, 200, 117, 132, 118, 158, 119, 206, /* 232 - 239 */
1426 120, 240, 121, 162, 122, 147, 123, 152, /* 240 - 247 */
1427 124, 166, 125, 214, 126, 138, 127, 153, /* 248 - 255 */
1431 /* Return the number of multiplications required to calculate
1432 powi(x,n) where n is less than POWI_TABLE_SIZE. This is a
1433 subroutine of powi_cost. CACHE is an array indicating
1434 which exponents have already been calculated. */
1436 static int
1437 powi_lookup_cost (unsigned HOST_WIDE_INT n, bool *cache)
1439 /* If we've already calculated this exponent, then this evaluation
1440 doesn't require any additional multiplications. */
1441 if (cache[n])
1442 return 0;
1444 cache[n] = true;
1445 return powi_lookup_cost (n - powi_table[n], cache)
1446 + powi_lookup_cost (powi_table[n], cache) + 1;
1449 /* Return the number of multiplications required to calculate
1450 powi(x,n) for an arbitrary x, given the exponent N. This
1451 function needs to be kept in sync with powi_as_mults below. */
1453 static int
1454 powi_cost (HOST_WIDE_INT n)
1456 bool cache[POWI_TABLE_SIZE];
1457 unsigned HOST_WIDE_INT digit;
1458 unsigned HOST_WIDE_INT val;
1459 int result;
1461 if (n == 0)
1462 return 0;
1464 /* Ignore the reciprocal when calculating the cost. */
1465 val = absu_hwi (n);
1467 /* Initialize the exponent cache. */
1468 memset (cache, 0, POWI_TABLE_SIZE * sizeof (bool));
1469 cache[1] = true;
1471 result = 0;
1473 while (val >= POWI_TABLE_SIZE)
1475 if (val & 1)
1477 digit = val & ((1 << POWI_WINDOW_SIZE) - 1);
1478 result += powi_lookup_cost (digit, cache)
1479 + POWI_WINDOW_SIZE + 1;
1480 val >>= POWI_WINDOW_SIZE;
1482 else
1484 val >>= 1;
1485 result++;
1489 return result + powi_lookup_cost (val, cache);
1492 /* Recursive subroutine of powi_as_mults. This function takes the
1493 array, CACHE, of already calculated exponents and an exponent N and
1494 returns a tree that corresponds to CACHE[1]**N, with type TYPE. */
1496 static tree
1497 powi_as_mults_1 (gimple_stmt_iterator *gsi, location_t loc, tree type,
1498 unsigned HOST_WIDE_INT n, tree *cache)
1500 tree op0, op1, ssa_target;
1501 unsigned HOST_WIDE_INT digit;
1502 gassign *mult_stmt;
1504 if (n < POWI_TABLE_SIZE && cache[n])
1505 return cache[n];
1507 ssa_target = make_temp_ssa_name (type, NULL, "powmult");
1509 if (n < POWI_TABLE_SIZE)
1511 cache[n] = ssa_target;
1512 op0 = powi_as_mults_1 (gsi, loc, type, n - powi_table[n], cache);
1513 op1 = powi_as_mults_1 (gsi, loc, type, powi_table[n], cache);
1515 else if (n & 1)
1517 digit = n & ((1 << POWI_WINDOW_SIZE) - 1);
1518 op0 = powi_as_mults_1 (gsi, loc, type, n - digit, cache);
1519 op1 = powi_as_mults_1 (gsi, loc, type, digit, cache);
1521 else
1523 op0 = powi_as_mults_1 (gsi, loc, type, n >> 1, cache);
1524 op1 = op0;
1527 mult_stmt = gimple_build_assign (ssa_target, MULT_EXPR, op0, op1);
1528 gimple_set_location (mult_stmt, loc);
1529 gsi_insert_before (gsi, mult_stmt, GSI_SAME_STMT);
1531 return ssa_target;
1534 /* Convert ARG0**N to a tree of multiplications of ARG0 with itself.
1535 This function needs to be kept in sync with powi_cost above. */
1537 tree
1538 powi_as_mults (gimple_stmt_iterator *gsi, location_t loc,
1539 tree arg0, HOST_WIDE_INT n)
1541 tree cache[POWI_TABLE_SIZE], result, type = TREE_TYPE (arg0);
1542 gassign *div_stmt;
1543 tree target;
1545 if (n == 0)
1546 return build_one_cst (type);
1548 memset (cache, 0, sizeof (cache));
1549 cache[1] = arg0;
1551 result = powi_as_mults_1 (gsi, loc, type, absu_hwi (n), cache);
1552 if (n >= 0)
1553 return result;
1555 /* If the original exponent was negative, reciprocate the result. */
1556 target = make_temp_ssa_name (type, NULL, "powmult");
1557 div_stmt = gimple_build_assign (target, RDIV_EXPR,
1558 build_real (type, dconst1), result);
1559 gimple_set_location (div_stmt, loc);
1560 gsi_insert_before (gsi, div_stmt, GSI_SAME_STMT);
1562 return target;
1565 /* ARG0 and N are the two arguments to a powi builtin in GSI with
1566 location info LOC. If the arguments are appropriate, create an
1567 equivalent sequence of statements prior to GSI using an optimal
1568 number of multiplications, and return an expession holding the
1569 result. */
1571 static tree
1572 gimple_expand_builtin_powi (gimple_stmt_iterator *gsi, location_t loc,
1573 tree arg0, HOST_WIDE_INT n)
1575 if ((n >= -1 && n <= 2)
1576 || (optimize_function_for_speed_p (cfun)
1577 && powi_cost (n) <= POWI_MAX_MULTS))
1578 return powi_as_mults (gsi, loc, arg0, n);
1580 return NULL_TREE;
1583 /* Build a gimple call statement that calls FN with argument ARG.
1584 Set the lhs of the call statement to a fresh SSA name. Insert the
1585 statement prior to GSI's current position, and return the fresh
1586 SSA name. */
1588 static tree
1589 build_and_insert_call (gimple_stmt_iterator *gsi, location_t loc,
1590 tree fn, tree arg)
1592 gcall *call_stmt;
1593 tree ssa_target;
1595 call_stmt = gimple_build_call (fn, 1, arg);
1596 ssa_target = make_temp_ssa_name (TREE_TYPE (arg), NULL, "powroot");
1597 gimple_set_lhs (call_stmt, ssa_target);
1598 gimple_set_location (call_stmt, loc);
1599 gsi_insert_before (gsi, call_stmt, GSI_SAME_STMT);
1601 return ssa_target;
1604 /* Build a gimple binary operation with the given CODE and arguments
1605 ARG0, ARG1, assigning the result to a new SSA name for variable
1606 TARGET. Insert the statement prior to GSI's current position, and
1607 return the fresh SSA name.*/
1609 static tree
1610 build_and_insert_binop (gimple_stmt_iterator *gsi, location_t loc,
1611 const char *name, enum tree_code code,
1612 tree arg0, tree arg1)
1614 tree result = make_temp_ssa_name (TREE_TYPE (arg0), NULL, name);
1615 gassign *stmt = gimple_build_assign (result, code, arg0, arg1);
1616 gimple_set_location (stmt, loc);
1617 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1618 return result;
1621 /* Build a gimple reference operation with the given CODE and argument
1622 ARG, assigning the result to a new SSA name of TYPE with NAME.
1623 Insert the statement prior to GSI's current position, and return
1624 the fresh SSA name. */
1626 static inline tree
1627 build_and_insert_ref (gimple_stmt_iterator *gsi, location_t loc, tree type,
1628 const char *name, enum tree_code code, tree arg0)
1630 tree result = make_temp_ssa_name (type, NULL, name);
1631 gimple *stmt = gimple_build_assign (result, build1 (code, type, arg0));
1632 gimple_set_location (stmt, loc);
1633 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1634 return result;
1637 /* Build a gimple assignment to cast VAL to TYPE. Insert the statement
1638 prior to GSI's current position, and return the fresh SSA name. */
1640 static tree
1641 build_and_insert_cast (gimple_stmt_iterator *gsi, location_t loc,
1642 tree type, tree val)
1644 tree result = make_ssa_name (type);
1645 gassign *stmt = gimple_build_assign (result, NOP_EXPR, val);
1646 gimple_set_location (stmt, loc);
1647 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1648 return result;
1651 struct pow_synth_sqrt_info
1653 bool *factors;
1654 unsigned int deepest;
1655 unsigned int num_mults;
1658 /* Return true iff the real value C can be represented as a
1659 sum of powers of 0.5 up to N. That is:
1660 C == SUM<i from 1..N> (a[i]*(0.5**i)) where a[i] is either 0 or 1.
1661 Record in INFO the various parameters of the synthesis algorithm such
1662 as the factors a[i], the maximum 0.5 power and the number of
1663 multiplications that will be required. */
1665 bool
1666 representable_as_half_series_p (REAL_VALUE_TYPE c, unsigned n,
1667 struct pow_synth_sqrt_info *info)
1669 REAL_VALUE_TYPE factor = dconsthalf;
1670 REAL_VALUE_TYPE remainder = c;
1672 info->deepest = 0;
1673 info->num_mults = 0;
1674 memset (info->factors, 0, n * sizeof (bool));
1676 for (unsigned i = 0; i < n; i++)
1678 REAL_VALUE_TYPE res;
1680 /* If something inexact happened bail out now. */
1681 if (real_arithmetic (&res, MINUS_EXPR, &remainder, &factor))
1682 return false;
1684 /* We have hit zero. The number is representable as a sum
1685 of powers of 0.5. */
1686 if (real_equal (&res, &dconst0))
1688 info->factors[i] = true;
1689 info->deepest = i + 1;
1690 return true;
1692 else if (!REAL_VALUE_NEGATIVE (res))
1694 remainder = res;
1695 info->factors[i] = true;
1696 info->num_mults++;
1698 else
1699 info->factors[i] = false;
1701 real_arithmetic (&factor, MULT_EXPR, &factor, &dconsthalf);
1703 return false;
1706 /* Return the tree corresponding to FN being applied
1707 to ARG N times at GSI and LOC.
1708 Look up previous results from CACHE if need be.
1709 cache[0] should contain just plain ARG i.e. FN applied to ARG 0 times. */
1711 static tree
1712 get_fn_chain (tree arg, unsigned int n, gimple_stmt_iterator *gsi,
1713 tree fn, location_t loc, tree *cache)
1715 tree res = cache[n];
1716 if (!res)
1718 tree prev = get_fn_chain (arg, n - 1, gsi, fn, loc, cache);
1719 res = build_and_insert_call (gsi, loc, fn, prev);
1720 cache[n] = res;
1723 return res;
1726 /* Print to STREAM the repeated application of function FNAME to ARG
1727 N times. So, for FNAME = "foo", ARG = "x", N = 2 it would print:
1728 "foo (foo (x))". */
1730 static void
1731 print_nested_fn (FILE* stream, const char *fname, const char* arg,
1732 unsigned int n)
1734 if (n == 0)
1735 fprintf (stream, "%s", arg);
1736 else
1738 fprintf (stream, "%s (", fname);
1739 print_nested_fn (stream, fname, arg, n - 1);
1740 fprintf (stream, ")");
1744 /* Print to STREAM the fractional sequence of sqrt chains
1745 applied to ARG, described by INFO. Used for the dump file. */
1747 static void
1748 dump_fractional_sqrt_sequence (FILE *stream, const char *arg,
1749 struct pow_synth_sqrt_info *info)
1751 for (unsigned int i = 0; i < info->deepest; i++)
1753 bool is_set = info->factors[i];
1754 if (is_set)
1756 print_nested_fn (stream, "sqrt", arg, i + 1);
1757 if (i != info->deepest - 1)
1758 fprintf (stream, " * ");
1763 /* Print to STREAM a representation of raising ARG to an integer
1764 power N. Used for the dump file. */
1766 static void
1767 dump_integer_part (FILE *stream, const char* arg, HOST_WIDE_INT n)
1769 if (n > 1)
1770 fprintf (stream, "powi (%s, " HOST_WIDE_INT_PRINT_DEC ")", arg, n);
1771 else if (n == 1)
1772 fprintf (stream, "%s", arg);
1775 /* Attempt to synthesize a POW[F] (ARG0, ARG1) call using chains of
1776 square roots. Place at GSI and LOC. Limit the maximum depth
1777 of the sqrt chains to MAX_DEPTH. Return the tree holding the
1778 result of the expanded sequence or NULL_TREE if the expansion failed.
1780 This routine assumes that ARG1 is a real number with a fractional part
1781 (the integer exponent case will have been handled earlier in
1782 gimple_expand_builtin_pow).
1784 For ARG1 > 0.0:
1785 * For ARG1 composed of a whole part WHOLE_PART and a fractional part
1786 FRAC_PART i.e. WHOLE_PART == floor (ARG1) and
1787 FRAC_PART == ARG1 - WHOLE_PART:
1788 Produce POWI (ARG0, WHOLE_PART) * POW (ARG0, FRAC_PART) where
1789 POW (ARG0, FRAC_PART) is expanded as a product of square root chains
1790 if it can be expressed as such, that is if FRAC_PART satisfies:
1791 FRAC_PART == <SUM from i = 1 until MAX_DEPTH> (a[i] * (0.5**i))
1792 where integer a[i] is either 0 or 1.
1794 Example:
1795 POW (x, 3.625) == POWI (x, 3) * POW (x, 0.625)
1796 --> POWI (x, 3) * SQRT (x) * SQRT (SQRT (SQRT (x)))
1798 For ARG1 < 0.0 there are two approaches:
1799 * (A) Expand to 1.0 / POW (ARG0, -ARG1) where POW (ARG0, -ARG1)
1800 is calculated as above.
1802 Example:
1803 POW (x, -5.625) == 1.0 / POW (x, 5.625)
1804 --> 1.0 / (POWI (x, 5) * SQRT (x) * SQRT (SQRT (SQRT (x))))
1806 * (B) : WHOLE_PART := - ceil (abs (ARG1))
1807 FRAC_PART := ARG1 - WHOLE_PART
1808 and expand to POW (x, FRAC_PART) / POWI (x, WHOLE_PART).
1809 Example:
1810 POW (x, -5.875) == POW (x, 0.125) / POWI (X, 6)
1811 --> SQRT (SQRT (SQRT (x))) / (POWI (x, 6))
1813 For ARG1 < 0.0 we choose between (A) and (B) depending on
1814 how many multiplications we'd have to do.
1815 So, for the example in (B): POW (x, -5.875), if we were to
1816 follow algorithm (A) we would produce:
1817 1.0 / POWI (X, 5) * SQRT (X) * SQRT (SQRT (X)) * SQRT (SQRT (SQRT (X)))
1818 which contains more multiplications than approach (B).
1820 Hopefully, this approach will eliminate potentially expensive POW library
1821 calls when unsafe floating point math is enabled and allow the compiler to
1822 further optimise the multiplies, square roots and divides produced by this
1823 function. */
1825 static tree
1826 expand_pow_as_sqrts (gimple_stmt_iterator *gsi, location_t loc,
1827 tree arg0, tree arg1, HOST_WIDE_INT max_depth)
1829 tree type = TREE_TYPE (arg0);
1830 machine_mode mode = TYPE_MODE (type);
1831 tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
1832 bool one_over = true;
1834 if (!sqrtfn)
1835 return NULL_TREE;
1837 if (TREE_CODE (arg1) != REAL_CST)
1838 return NULL_TREE;
1840 REAL_VALUE_TYPE exp_init = TREE_REAL_CST (arg1);
1842 gcc_assert (max_depth > 0);
1843 tree *cache = XALLOCAVEC (tree, max_depth + 1);
1845 struct pow_synth_sqrt_info synth_info;
1846 synth_info.factors = XALLOCAVEC (bool, max_depth + 1);
1847 synth_info.deepest = 0;
1848 synth_info.num_mults = 0;
1850 bool neg_exp = REAL_VALUE_NEGATIVE (exp_init);
1851 REAL_VALUE_TYPE exp = real_value_abs (&exp_init);
1853 /* The whole and fractional parts of exp. */
1854 REAL_VALUE_TYPE whole_part;
1855 REAL_VALUE_TYPE frac_part;
1857 real_floor (&whole_part, mode, &exp);
1858 real_arithmetic (&frac_part, MINUS_EXPR, &exp, &whole_part);
1861 REAL_VALUE_TYPE ceil_whole = dconst0;
1862 REAL_VALUE_TYPE ceil_fract = dconst0;
1864 if (neg_exp)
1866 real_ceil (&ceil_whole, mode, &exp);
1867 real_arithmetic (&ceil_fract, MINUS_EXPR, &ceil_whole, &exp);
1870 if (!representable_as_half_series_p (frac_part, max_depth, &synth_info))
1871 return NULL_TREE;
1873 /* Check whether it's more profitable to not use 1.0 / ... */
1874 if (neg_exp)
1876 struct pow_synth_sqrt_info alt_synth_info;
1877 alt_synth_info.factors = XALLOCAVEC (bool, max_depth + 1);
1878 alt_synth_info.deepest = 0;
1879 alt_synth_info.num_mults = 0;
1881 if (representable_as_half_series_p (ceil_fract, max_depth,
1882 &alt_synth_info)
1883 && alt_synth_info.deepest <= synth_info.deepest
1884 && alt_synth_info.num_mults < synth_info.num_mults)
1886 whole_part = ceil_whole;
1887 frac_part = ceil_fract;
1888 synth_info.deepest = alt_synth_info.deepest;
1889 synth_info.num_mults = alt_synth_info.num_mults;
1890 memcpy (synth_info.factors, alt_synth_info.factors,
1891 (max_depth + 1) * sizeof (bool));
1892 one_over = false;
1896 HOST_WIDE_INT n = real_to_integer (&whole_part);
1897 REAL_VALUE_TYPE cint;
1898 real_from_integer (&cint, VOIDmode, n, SIGNED);
1900 if (!real_identical (&whole_part, &cint))
1901 return NULL_TREE;
1903 if (powi_cost (n) + synth_info.num_mults > POWI_MAX_MULTS)
1904 return NULL_TREE;
1906 memset (cache, 0, (max_depth + 1) * sizeof (tree));
1908 tree integer_res = n == 0 ? build_real (type, dconst1) : arg0;
1910 /* Calculate the integer part of the exponent. */
1911 if (n > 1)
1913 integer_res = gimple_expand_builtin_powi (gsi, loc, arg0, n);
1914 if (!integer_res)
1915 return NULL_TREE;
1918 if (dump_file)
1920 char string[64];
1922 real_to_decimal (string, &exp_init, sizeof (string), 0, 1);
1923 fprintf (dump_file, "synthesizing pow (x, %s) as:\n", string);
1925 if (neg_exp)
1927 if (one_over)
1929 fprintf (dump_file, "1.0 / (");
1930 dump_integer_part (dump_file, "x", n);
1931 if (n > 0)
1932 fprintf (dump_file, " * ");
1933 dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1934 fprintf (dump_file, ")");
1936 else
1938 dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1939 fprintf (dump_file, " / (");
1940 dump_integer_part (dump_file, "x", n);
1941 fprintf (dump_file, ")");
1944 else
1946 dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1947 if (n > 0)
1948 fprintf (dump_file, " * ");
1949 dump_integer_part (dump_file, "x", n);
1952 fprintf (dump_file, "\ndeepest sqrt chain: %d\n", synth_info.deepest);
1956 tree fract_res = NULL_TREE;
1957 cache[0] = arg0;
1959 /* Calculate the fractional part of the exponent. */
1960 for (unsigned i = 0; i < synth_info.deepest; i++)
1962 if (synth_info.factors[i])
1964 tree sqrt_chain = get_fn_chain (arg0, i + 1, gsi, sqrtfn, loc, cache);
1966 if (!fract_res)
1967 fract_res = sqrt_chain;
1969 else
1970 fract_res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1971 fract_res, sqrt_chain);
1975 tree res = NULL_TREE;
1977 if (neg_exp)
1979 if (one_over)
1981 if (n > 0)
1982 res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1983 fract_res, integer_res);
1984 else
1985 res = fract_res;
1987 res = build_and_insert_binop (gsi, loc, "powrootrecip", RDIV_EXPR,
1988 build_real (type, dconst1), res);
1990 else
1992 res = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR,
1993 fract_res, integer_res);
1996 else
1997 res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1998 fract_res, integer_res);
1999 return res;
2002 /* ARG0 and ARG1 are the two arguments to a pow builtin call in GSI
2003 with location info LOC. If possible, create an equivalent and
2004 less expensive sequence of statements prior to GSI, and return an
2005 expession holding the result. */
2007 static tree
2008 gimple_expand_builtin_pow (gimple_stmt_iterator *gsi, location_t loc,
2009 tree arg0, tree arg1)
2011 REAL_VALUE_TYPE c, cint, dconst1_3, dconst1_4, dconst1_6;
2012 REAL_VALUE_TYPE c2, dconst3;
2013 HOST_WIDE_INT n;
2014 tree type, sqrtfn, cbrtfn, sqrt_arg0, result, cbrt_x, powi_cbrt_x;
2015 machine_mode mode;
2016 bool speed_p = optimize_bb_for_speed_p (gsi_bb (*gsi));
2017 bool hw_sqrt_exists, c_is_int, c2_is_int;
2019 dconst1_4 = dconst1;
2020 SET_REAL_EXP (&dconst1_4, REAL_EXP (&dconst1_4) - 2);
2022 /* If the exponent isn't a constant, there's nothing of interest
2023 to be done. */
2024 if (TREE_CODE (arg1) != REAL_CST)
2025 return NULL_TREE;
2027 /* Don't perform the operation if flag_signaling_nans is on
2028 and the operand is a signaling NaN. */
2029 if (HONOR_SNANS (TYPE_MODE (TREE_TYPE (arg1)))
2030 && ((TREE_CODE (arg0) == REAL_CST
2031 && REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (arg0)))
2032 || REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (arg1))))
2033 return NULL_TREE;
2035 /* If the exponent is equivalent to an integer, expand to an optimal
2036 multiplication sequence when profitable. */
2037 c = TREE_REAL_CST (arg1);
2038 n = real_to_integer (&c);
2039 real_from_integer (&cint, VOIDmode, n, SIGNED);
2040 c_is_int = real_identical (&c, &cint);
2042 if (c_is_int
2043 && ((n >= -1 && n <= 2)
2044 || (flag_unsafe_math_optimizations
2045 && speed_p
2046 && powi_cost (n) <= POWI_MAX_MULTS)))
2047 return gimple_expand_builtin_powi (gsi, loc, arg0, n);
2049 /* Attempt various optimizations using sqrt and cbrt. */
2050 type = TREE_TYPE (arg0);
2051 mode = TYPE_MODE (type);
2052 sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
2054 /* Optimize pow(x,0.5) = sqrt(x). This replacement is always safe
2055 unless signed zeros must be maintained. pow(-0,0.5) = +0, while
2056 sqrt(-0) = -0. */
2057 if (sqrtfn
2058 && real_equal (&c, &dconsthalf)
2059 && !HONOR_SIGNED_ZEROS (mode))
2060 return build_and_insert_call (gsi, loc, sqrtfn, arg0);
2062 hw_sqrt_exists = optab_handler (sqrt_optab, mode) != CODE_FOR_nothing;
2064 /* Optimize pow(x,1./3.) = cbrt(x). This requires unsafe math
2065 optimizations since 1./3. is not exactly representable. If x
2066 is negative and finite, the correct value of pow(x,1./3.) is
2067 a NaN with the "invalid" exception raised, because the value
2068 of 1./3. actually has an even denominator. The correct value
2069 of cbrt(x) is a negative real value. */
2070 cbrtfn = mathfn_built_in (type, BUILT_IN_CBRT);
2071 dconst1_3 = real_value_truncate (mode, dconst_third ());
2073 if (flag_unsafe_math_optimizations
2074 && cbrtfn
2075 && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
2076 && real_equal (&c, &dconst1_3))
2077 return build_and_insert_call (gsi, loc, cbrtfn, arg0);
2079 /* Optimize pow(x,1./6.) = cbrt(sqrt(x)). Don't do this optimization
2080 if we don't have a hardware sqrt insn. */
2081 dconst1_6 = dconst1_3;
2082 SET_REAL_EXP (&dconst1_6, REAL_EXP (&dconst1_6) - 1);
2084 if (flag_unsafe_math_optimizations
2085 && sqrtfn
2086 && cbrtfn
2087 && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
2088 && speed_p
2089 && hw_sqrt_exists
2090 && real_equal (&c, &dconst1_6))
2092 /* sqrt(x) */
2093 sqrt_arg0 = build_and_insert_call (gsi, loc, sqrtfn, arg0);
2095 /* cbrt(sqrt(x)) */
2096 return build_and_insert_call (gsi, loc, cbrtfn, sqrt_arg0);
2100 /* Attempt to expand the POW as a product of square root chains.
2101 Expand the 0.25 case even when otpimising for size. */
2102 if (flag_unsafe_math_optimizations
2103 && sqrtfn
2104 && hw_sqrt_exists
2105 && (speed_p || real_equal (&c, &dconst1_4))
2106 && !HONOR_SIGNED_ZEROS (mode))
2108 unsigned int max_depth = speed_p
2109 ? param_max_pow_sqrt_depth
2110 : 2;
2112 tree expand_with_sqrts
2113 = expand_pow_as_sqrts (gsi, loc, arg0, arg1, max_depth);
2115 if (expand_with_sqrts)
2116 return expand_with_sqrts;
2119 real_arithmetic (&c2, MULT_EXPR, &c, &dconst2);
2120 n = real_to_integer (&c2);
2121 real_from_integer (&cint, VOIDmode, n, SIGNED);
2122 c2_is_int = real_identical (&c2, &cint);
2124 /* Optimize pow(x,c), where 3c = n for some nonzero integer n, into
2126 powi(x, n/3) * powi(cbrt(x), n%3), n > 0;
2127 1.0 / (powi(x, abs(n)/3) * powi(cbrt(x), abs(n)%3)), n < 0.
2129 Do not calculate the first factor when n/3 = 0. As cbrt(x) is
2130 different from pow(x, 1./3.) due to rounding and behavior with
2131 negative x, we need to constrain this transformation to unsafe
2132 math and positive x or finite math. */
2133 real_from_integer (&dconst3, VOIDmode, 3, SIGNED);
2134 real_arithmetic (&c2, MULT_EXPR, &c, &dconst3);
2135 real_round (&c2, mode, &c2);
2136 n = real_to_integer (&c2);
2137 real_from_integer (&cint, VOIDmode, n, SIGNED);
2138 real_arithmetic (&c2, RDIV_EXPR, &cint, &dconst3);
2139 real_convert (&c2, mode, &c2);
2141 if (flag_unsafe_math_optimizations
2142 && cbrtfn
2143 && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
2144 && real_identical (&c2, &c)
2145 && !c2_is_int
2146 && optimize_function_for_speed_p (cfun)
2147 && powi_cost (n / 3) <= POWI_MAX_MULTS)
2149 tree powi_x_ndiv3 = NULL_TREE;
2151 /* Attempt to fold powi(arg0, abs(n/3)) into multiplies. If not
2152 possible or profitable, give up. Skip the degenerate case when
2153 abs(n) < 3, where the result is always 1. */
2154 if (absu_hwi (n) >= 3)
2156 powi_x_ndiv3 = gimple_expand_builtin_powi (gsi, loc, arg0,
2157 abs_hwi (n / 3));
2158 if (!powi_x_ndiv3)
2159 return NULL_TREE;
2162 /* Calculate powi(cbrt(x), n%3). Don't use gimple_expand_builtin_powi
2163 as that creates an unnecessary variable. Instead, just produce
2164 either cbrt(x) or cbrt(x) * cbrt(x). */
2165 cbrt_x = build_and_insert_call (gsi, loc, cbrtfn, arg0);
2167 if (absu_hwi (n) % 3 == 1)
2168 powi_cbrt_x = cbrt_x;
2169 else
2170 powi_cbrt_x = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
2171 cbrt_x, cbrt_x);
2173 /* Multiply the two subexpressions, unless powi(x,abs(n)/3) = 1. */
2174 if (absu_hwi (n) < 3)
2175 result = powi_cbrt_x;
2176 else
2177 result = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
2178 powi_x_ndiv3, powi_cbrt_x);
2180 /* If n is negative, reciprocate the result. */
2181 if (n < 0)
2182 result = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR,
2183 build_real (type, dconst1), result);
2185 return result;
2188 /* No optimizations succeeded. */
2189 return NULL_TREE;
2192 /* ARG is the argument to a cabs builtin call in GSI with location info
2193 LOC. Create a sequence of statements prior to GSI that calculates
2194 sqrt(R*R + I*I), where R and I are the real and imaginary components
2195 of ARG, respectively. Return an expression holding the result. */
2197 static tree
2198 gimple_expand_builtin_cabs (gimple_stmt_iterator *gsi, location_t loc, tree arg)
2200 tree real_part, imag_part, addend1, addend2, sum, result;
2201 tree type = TREE_TYPE (TREE_TYPE (arg));
2202 tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
2203 machine_mode mode = TYPE_MODE (type);
2205 if (!flag_unsafe_math_optimizations
2206 || !optimize_bb_for_speed_p (gimple_bb (gsi_stmt (*gsi)))
2207 || !sqrtfn
2208 || optab_handler (sqrt_optab, mode) == CODE_FOR_nothing)
2209 return NULL_TREE;
2211 real_part = build_and_insert_ref (gsi, loc, type, "cabs",
2212 REALPART_EXPR, arg);
2213 addend1 = build_and_insert_binop (gsi, loc, "cabs", MULT_EXPR,
2214 real_part, real_part);
2215 imag_part = build_and_insert_ref (gsi, loc, type, "cabs",
2216 IMAGPART_EXPR, arg);
2217 addend2 = build_and_insert_binop (gsi, loc, "cabs", MULT_EXPR,
2218 imag_part, imag_part);
2219 sum = build_and_insert_binop (gsi, loc, "cabs", PLUS_EXPR, addend1, addend2);
2220 result = build_and_insert_call (gsi, loc, sqrtfn, sum);
2222 return result;
2225 /* Go through all calls to sin, cos and cexpi and call execute_cse_sincos_1
2226 on the SSA_NAME argument of each of them. Also expand powi(x,n) into
2227 an optimal number of multiplies, when n is a constant. */
2229 namespace {
2231 const pass_data pass_data_cse_sincos =
2233 GIMPLE_PASS, /* type */
2234 "sincos", /* name */
2235 OPTGROUP_NONE, /* optinfo_flags */
2236 TV_TREE_SINCOS, /* tv_id */
2237 PROP_ssa, /* properties_required */
2238 PROP_gimple_opt_math, /* properties_provided */
2239 0, /* properties_destroyed */
2240 0, /* todo_flags_start */
2241 TODO_update_ssa, /* todo_flags_finish */
2244 class pass_cse_sincos : public gimple_opt_pass
2246 public:
2247 pass_cse_sincos (gcc::context *ctxt)
2248 : gimple_opt_pass (pass_data_cse_sincos, ctxt)
2251 /* opt_pass methods: */
2252 virtual bool gate (function *)
2254 /* We no longer require either sincos or cexp, since powi expansion
2255 piggybacks on this pass. */
2256 return optimize;
2259 virtual unsigned int execute (function *);
2261 }; // class pass_cse_sincos
2263 unsigned int
2264 pass_cse_sincos::execute (function *fun)
2266 basic_block bb;
2267 bool cfg_changed = false;
2269 calculate_dominance_info (CDI_DOMINATORS);
2270 memset (&sincos_stats, 0, sizeof (sincos_stats));
2272 FOR_EACH_BB_FN (bb, fun)
2274 gimple_stmt_iterator gsi;
2275 bool cleanup_eh = false;
2277 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2279 gimple *stmt = gsi_stmt (gsi);
2281 /* Only the last stmt in a bb could throw, no need to call
2282 gimple_purge_dead_eh_edges if we change something in the middle
2283 of a basic block. */
2284 cleanup_eh = false;
2286 if (is_gimple_call (stmt)
2287 && gimple_call_lhs (stmt))
2289 tree arg, arg0, arg1, result;
2290 HOST_WIDE_INT n;
2291 location_t loc;
2293 switch (gimple_call_combined_fn (stmt))
2295 CASE_CFN_COS:
2296 CASE_CFN_SIN:
2297 CASE_CFN_CEXPI:
2298 arg = gimple_call_arg (stmt, 0);
2299 /* Make sure we have either sincos or cexp. */
2300 if (!targetm.libc_has_function (function_c99_math_complex,
2301 TREE_TYPE (arg))
2302 && !targetm.libc_has_function (function_sincos,
2303 TREE_TYPE (arg)))
2304 break;
2306 if (TREE_CODE (arg) == SSA_NAME)
2307 cfg_changed |= execute_cse_sincos_1 (arg);
2308 break;
2310 CASE_CFN_POW:
2311 arg0 = gimple_call_arg (stmt, 0);
2312 arg1 = gimple_call_arg (stmt, 1);
2314 loc = gimple_location (stmt);
2315 result = gimple_expand_builtin_pow (&gsi, loc, arg0, arg1);
2317 if (result)
2319 tree lhs = gimple_get_lhs (stmt);
2320 gassign *new_stmt = gimple_build_assign (lhs, result);
2321 gimple_set_location (new_stmt, loc);
2322 unlink_stmt_vdef (stmt);
2323 gsi_replace (&gsi, new_stmt, true);
2324 cleanup_eh = true;
2325 if (gimple_vdef (stmt))
2326 release_ssa_name (gimple_vdef (stmt));
2328 break;
2330 CASE_CFN_POWI:
2331 arg0 = gimple_call_arg (stmt, 0);
2332 arg1 = gimple_call_arg (stmt, 1);
2333 loc = gimple_location (stmt);
2335 if (real_minus_onep (arg0))
2337 tree t0, t1, cond, one, minus_one;
2338 gassign *stmt;
2340 t0 = TREE_TYPE (arg0);
2341 t1 = TREE_TYPE (arg1);
2342 one = build_real (t0, dconst1);
2343 minus_one = build_real (t0, dconstm1);
2345 cond = make_temp_ssa_name (t1, NULL, "powi_cond");
2346 stmt = gimple_build_assign (cond, BIT_AND_EXPR,
2347 arg1, build_int_cst (t1, 1));
2348 gimple_set_location (stmt, loc);
2349 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
2351 result = make_temp_ssa_name (t0, NULL, "powi");
2352 stmt = gimple_build_assign (result, COND_EXPR, cond,
2353 minus_one, one);
2354 gimple_set_location (stmt, loc);
2355 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
2357 else
2359 if (!tree_fits_shwi_p (arg1))
2360 break;
2362 n = tree_to_shwi (arg1);
2363 result = gimple_expand_builtin_powi (&gsi, loc, arg0, n);
2366 if (result)
2368 tree lhs = gimple_get_lhs (stmt);
2369 gassign *new_stmt = gimple_build_assign (lhs, result);
2370 gimple_set_location (new_stmt, loc);
2371 unlink_stmt_vdef (stmt);
2372 gsi_replace (&gsi, new_stmt, true);
2373 cleanup_eh = true;
2374 if (gimple_vdef (stmt))
2375 release_ssa_name (gimple_vdef (stmt));
2377 break;
2379 CASE_CFN_CABS:
2380 arg0 = gimple_call_arg (stmt, 0);
2381 loc = gimple_location (stmt);
2382 result = gimple_expand_builtin_cabs (&gsi, loc, arg0);
2384 if (result)
2386 tree lhs = gimple_get_lhs (stmt);
2387 gassign *new_stmt = gimple_build_assign (lhs, result);
2388 gimple_set_location (new_stmt, loc);
2389 unlink_stmt_vdef (stmt);
2390 gsi_replace (&gsi, new_stmt, true);
2391 cleanup_eh = true;
2392 if (gimple_vdef (stmt))
2393 release_ssa_name (gimple_vdef (stmt));
2395 break;
2397 default:;
2401 if (cleanup_eh)
2402 cfg_changed |= gimple_purge_dead_eh_edges (bb);
2405 statistics_counter_event (fun, "sincos statements inserted",
2406 sincos_stats.inserted);
2407 statistics_counter_event (fun, "conv statements removed",
2408 sincos_stats.conv_removed);
2410 return cfg_changed ? TODO_cleanup_cfg : 0;
2413 } // anon namespace
2415 gimple_opt_pass *
2416 make_pass_cse_sincos (gcc::context *ctxt)
2418 return new pass_cse_sincos (ctxt);
2421 /* Return true if stmt is a type conversion operation that can be stripped
2422 when used in a widening multiply operation. */
2423 static bool
2424 widening_mult_conversion_strippable_p (tree result_type, gimple *stmt)
2426 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
2428 if (TREE_CODE (result_type) == INTEGER_TYPE)
2430 tree op_type;
2431 tree inner_op_type;
2433 if (!CONVERT_EXPR_CODE_P (rhs_code))
2434 return false;
2436 op_type = TREE_TYPE (gimple_assign_lhs (stmt));
2438 /* If the type of OP has the same precision as the result, then
2439 we can strip this conversion. The multiply operation will be
2440 selected to create the correct extension as a by-product. */
2441 if (TYPE_PRECISION (result_type) == TYPE_PRECISION (op_type))
2442 return true;
2444 /* We can also strip a conversion if it preserves the signed-ness of
2445 the operation and doesn't narrow the range. */
2446 inner_op_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
2448 /* If the inner-most type is unsigned, then we can strip any
2449 intermediate widening operation. If it's signed, then the
2450 intermediate widening operation must also be signed. */
2451 if ((TYPE_UNSIGNED (inner_op_type)
2452 || TYPE_UNSIGNED (op_type) == TYPE_UNSIGNED (inner_op_type))
2453 && TYPE_PRECISION (op_type) > TYPE_PRECISION (inner_op_type))
2454 return true;
2456 return false;
2459 return rhs_code == FIXED_CONVERT_EXPR;
2462 /* Return true if RHS is a suitable operand for a widening multiplication,
2463 assuming a target type of TYPE.
2464 There are two cases:
2466 - RHS makes some value at least twice as wide. Store that value
2467 in *NEW_RHS_OUT if so, and store its type in *TYPE_OUT.
2469 - RHS is an integer constant. Store that value in *NEW_RHS_OUT if so,
2470 but leave *TYPE_OUT untouched. */
2472 static bool
2473 is_widening_mult_rhs_p (tree type, tree rhs, tree *type_out,
2474 tree *new_rhs_out)
2476 gimple *stmt;
2477 tree type1, rhs1;
2479 if (TREE_CODE (rhs) == SSA_NAME)
2481 stmt = SSA_NAME_DEF_STMT (rhs);
2482 if (is_gimple_assign (stmt))
2484 if (! widening_mult_conversion_strippable_p (type, stmt))
2485 rhs1 = rhs;
2486 else
2488 rhs1 = gimple_assign_rhs1 (stmt);
2490 if (TREE_CODE (rhs1) == INTEGER_CST)
2492 *new_rhs_out = rhs1;
2493 *type_out = NULL;
2494 return true;
2498 else
2499 rhs1 = rhs;
2501 type1 = TREE_TYPE (rhs1);
2503 if (TREE_CODE (type1) != TREE_CODE (type)
2504 || TYPE_PRECISION (type1) * 2 > TYPE_PRECISION (type))
2505 return false;
2507 *new_rhs_out = rhs1;
2508 *type_out = type1;
2509 return true;
2512 if (TREE_CODE (rhs) == INTEGER_CST)
2514 *new_rhs_out = rhs;
2515 *type_out = NULL;
2516 return true;
2519 return false;
2522 /* Return true if STMT performs a widening multiplication, assuming the
2523 output type is TYPE. If so, store the unwidened types of the operands
2524 in *TYPE1_OUT and *TYPE2_OUT respectively. Also fill *RHS1_OUT and
2525 *RHS2_OUT such that converting those operands to types *TYPE1_OUT
2526 and *TYPE2_OUT would give the operands of the multiplication. */
2528 static bool
2529 is_widening_mult_p (gimple *stmt,
2530 tree *type1_out, tree *rhs1_out,
2531 tree *type2_out, tree *rhs2_out)
2533 tree type = TREE_TYPE (gimple_assign_lhs (stmt));
2535 if (TREE_CODE (type) == INTEGER_TYPE)
2537 if (TYPE_OVERFLOW_TRAPS (type))
2538 return false;
2540 else if (TREE_CODE (type) != FIXED_POINT_TYPE)
2541 return false;
2543 if (!is_widening_mult_rhs_p (type, gimple_assign_rhs1 (stmt), type1_out,
2544 rhs1_out))
2545 return false;
2547 if (!is_widening_mult_rhs_p (type, gimple_assign_rhs2 (stmt), type2_out,
2548 rhs2_out))
2549 return false;
2551 if (*type1_out == NULL)
2553 if (*type2_out == NULL || !int_fits_type_p (*rhs1_out, *type2_out))
2554 return false;
2555 *type1_out = *type2_out;
2558 if (*type2_out == NULL)
2560 if (!int_fits_type_p (*rhs2_out, *type1_out))
2561 return false;
2562 *type2_out = *type1_out;
2565 /* Ensure that the larger of the two operands comes first. */
2566 if (TYPE_PRECISION (*type1_out) < TYPE_PRECISION (*type2_out))
2568 std::swap (*type1_out, *type2_out);
2569 std::swap (*rhs1_out, *rhs2_out);
2572 return true;
2575 /* Check to see if the CALL statement is an invocation of copysign
2576 with 1. being the first argument. */
2577 static bool
2578 is_copysign_call_with_1 (gimple *call)
2580 gcall *c = dyn_cast <gcall *> (call);
2581 if (! c)
2582 return false;
2584 enum combined_fn code = gimple_call_combined_fn (c);
2586 if (code == CFN_LAST)
2587 return false;
2589 if (builtin_fn_p (code))
2591 switch (as_builtin_fn (code))
2593 CASE_FLT_FN (BUILT_IN_COPYSIGN):
2594 CASE_FLT_FN_FLOATN_NX (BUILT_IN_COPYSIGN):
2595 return real_onep (gimple_call_arg (c, 0));
2596 default:
2597 return false;
2601 if (internal_fn_p (code))
2603 switch (as_internal_fn (code))
2605 case IFN_COPYSIGN:
2606 return real_onep (gimple_call_arg (c, 0));
2607 default:
2608 return false;
2612 return false;
2615 /* Try to expand the pattern x * copysign (1, y) into xorsign (x, y).
2616 This only happens when the xorsign optab is defined, if the
2617 pattern is not a xorsign pattern or if expansion fails FALSE is
2618 returned, otherwise TRUE is returned. */
2619 static bool
2620 convert_expand_mult_copysign (gimple *stmt, gimple_stmt_iterator *gsi)
2622 tree treeop0, treeop1, lhs, type;
2623 location_t loc = gimple_location (stmt);
2624 lhs = gimple_assign_lhs (stmt);
2625 treeop0 = gimple_assign_rhs1 (stmt);
2626 treeop1 = gimple_assign_rhs2 (stmt);
2627 type = TREE_TYPE (lhs);
2628 machine_mode mode = TYPE_MODE (type);
2630 if (HONOR_SNANS (type))
2631 return false;
2633 if (TREE_CODE (treeop0) == SSA_NAME && TREE_CODE (treeop1) == SSA_NAME)
2635 gimple *call0 = SSA_NAME_DEF_STMT (treeop0);
2636 if (!has_single_use (treeop0) || !is_copysign_call_with_1 (call0))
2638 call0 = SSA_NAME_DEF_STMT (treeop1);
2639 if (!has_single_use (treeop1) || !is_copysign_call_with_1 (call0))
2640 return false;
2642 treeop1 = treeop0;
2644 if (optab_handler (xorsign_optab, mode) == CODE_FOR_nothing)
2645 return false;
2647 gcall *c = as_a<gcall*> (call0);
2648 treeop0 = gimple_call_arg (c, 1);
2650 gcall *call_stmt
2651 = gimple_build_call_internal (IFN_XORSIGN, 2, treeop1, treeop0);
2652 gimple_set_lhs (call_stmt, lhs);
2653 gimple_set_location (call_stmt, loc);
2654 gsi_replace (gsi, call_stmt, true);
2655 return true;
2658 return false;
2661 /* Process a single gimple statement STMT, which has a MULT_EXPR as
2662 its rhs, and try to convert it into a WIDEN_MULT_EXPR. The return
2663 value is true iff we converted the statement. */
2665 static bool
2666 convert_mult_to_widen (gimple *stmt, gimple_stmt_iterator *gsi)
2668 tree lhs, rhs1, rhs2, type, type1, type2;
2669 enum insn_code handler;
2670 scalar_int_mode to_mode, from_mode, actual_mode;
2671 optab op;
2672 int actual_precision;
2673 location_t loc = gimple_location (stmt);
2674 bool from_unsigned1, from_unsigned2;
2676 lhs = gimple_assign_lhs (stmt);
2677 type = TREE_TYPE (lhs);
2678 if (TREE_CODE (type) != INTEGER_TYPE)
2679 return false;
2681 if (!is_widening_mult_p (stmt, &type1, &rhs1, &type2, &rhs2))
2682 return false;
2684 to_mode = SCALAR_INT_TYPE_MODE (type);
2685 from_mode = SCALAR_INT_TYPE_MODE (type1);
2686 if (to_mode == from_mode)
2687 return false;
2689 from_unsigned1 = TYPE_UNSIGNED (type1);
2690 from_unsigned2 = TYPE_UNSIGNED (type2);
2692 if (from_unsigned1 && from_unsigned2)
2693 op = umul_widen_optab;
2694 else if (!from_unsigned1 && !from_unsigned2)
2695 op = smul_widen_optab;
2696 else
2697 op = usmul_widen_optab;
2699 handler = find_widening_optab_handler_and_mode (op, to_mode, from_mode,
2700 &actual_mode);
2702 if (handler == CODE_FOR_nothing)
2704 if (op != smul_widen_optab)
2706 /* We can use a signed multiply with unsigned types as long as
2707 there is a wider mode to use, or it is the smaller of the two
2708 types that is unsigned. Note that type1 >= type2, always. */
2709 if ((TYPE_UNSIGNED (type1)
2710 && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode))
2711 || (TYPE_UNSIGNED (type2)
2712 && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode)))
2714 if (!GET_MODE_WIDER_MODE (from_mode).exists (&from_mode)
2715 || GET_MODE_SIZE (to_mode) <= GET_MODE_SIZE (from_mode))
2716 return false;
2719 op = smul_widen_optab;
2720 handler = find_widening_optab_handler_and_mode (op, to_mode,
2721 from_mode,
2722 &actual_mode);
2724 if (handler == CODE_FOR_nothing)
2725 return false;
2727 from_unsigned1 = from_unsigned2 = false;
2729 else
2731 /* Expand can synthesize smul_widen_optab if the target
2732 supports umul_widen_optab. */
2733 op = umul_widen_optab;
2734 handler = find_widening_optab_handler_and_mode (op, to_mode,
2735 from_mode,
2736 &actual_mode);
2737 if (handler == CODE_FOR_nothing)
2738 return false;
2742 /* Ensure that the inputs to the handler are in the correct precison
2743 for the opcode. This will be the full mode size. */
2744 actual_precision = GET_MODE_PRECISION (actual_mode);
2745 if (2 * actual_precision > TYPE_PRECISION (type))
2746 return false;
2747 if (actual_precision != TYPE_PRECISION (type1)
2748 || from_unsigned1 != TYPE_UNSIGNED (type1))
2749 rhs1 = build_and_insert_cast (gsi, loc,
2750 build_nonstandard_integer_type
2751 (actual_precision, from_unsigned1), rhs1);
2752 if (actual_precision != TYPE_PRECISION (type2)
2753 || from_unsigned2 != TYPE_UNSIGNED (type2))
2754 rhs2 = build_and_insert_cast (gsi, loc,
2755 build_nonstandard_integer_type
2756 (actual_precision, from_unsigned2), rhs2);
2758 /* Handle constants. */
2759 if (TREE_CODE (rhs1) == INTEGER_CST)
2760 rhs1 = fold_convert (type1, rhs1);
2761 if (TREE_CODE (rhs2) == INTEGER_CST)
2762 rhs2 = fold_convert (type2, rhs2);
2764 gimple_assign_set_rhs1 (stmt, rhs1);
2765 gimple_assign_set_rhs2 (stmt, rhs2);
2766 gimple_assign_set_rhs_code (stmt, WIDEN_MULT_EXPR);
2767 update_stmt (stmt);
2768 widen_mul_stats.widen_mults_inserted++;
2769 return true;
2772 /* Process a single gimple statement STMT, which is found at the
2773 iterator GSI and has a either a PLUS_EXPR or a MINUS_EXPR as its
2774 rhs (given by CODE), and try to convert it into a
2775 WIDEN_MULT_PLUS_EXPR or a WIDEN_MULT_MINUS_EXPR. The return value
2776 is true iff we converted the statement. */
2778 static bool
2779 convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple *stmt,
2780 enum tree_code code)
2782 gimple *rhs1_stmt = NULL, *rhs2_stmt = NULL;
2783 gimple *conv1_stmt = NULL, *conv2_stmt = NULL, *conv_stmt;
2784 tree type, type1, type2, optype;
2785 tree lhs, rhs1, rhs2, mult_rhs1, mult_rhs2, add_rhs;
2786 enum tree_code rhs1_code = ERROR_MARK, rhs2_code = ERROR_MARK;
2787 optab this_optab;
2788 enum tree_code wmult_code;
2789 enum insn_code handler;
2790 scalar_mode to_mode, from_mode, actual_mode;
2791 location_t loc = gimple_location (stmt);
2792 int actual_precision;
2793 bool from_unsigned1, from_unsigned2;
2795 lhs = gimple_assign_lhs (stmt);
2796 type = TREE_TYPE (lhs);
2797 if (TREE_CODE (type) != INTEGER_TYPE
2798 && TREE_CODE (type) != FIXED_POINT_TYPE)
2799 return false;
2801 if (code == MINUS_EXPR)
2802 wmult_code = WIDEN_MULT_MINUS_EXPR;
2803 else
2804 wmult_code = WIDEN_MULT_PLUS_EXPR;
2806 rhs1 = gimple_assign_rhs1 (stmt);
2807 rhs2 = gimple_assign_rhs2 (stmt);
2809 if (TREE_CODE (rhs1) == SSA_NAME)
2811 rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
2812 if (is_gimple_assign (rhs1_stmt))
2813 rhs1_code = gimple_assign_rhs_code (rhs1_stmt);
2816 if (TREE_CODE (rhs2) == SSA_NAME)
2818 rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
2819 if (is_gimple_assign (rhs2_stmt))
2820 rhs2_code = gimple_assign_rhs_code (rhs2_stmt);
2823 /* Allow for one conversion statement between the multiply
2824 and addition/subtraction statement. If there are more than
2825 one conversions then we assume they would invalidate this
2826 transformation. If that's not the case then they should have
2827 been folded before now. */
2828 if (CONVERT_EXPR_CODE_P (rhs1_code))
2830 conv1_stmt = rhs1_stmt;
2831 rhs1 = gimple_assign_rhs1 (rhs1_stmt);
2832 if (TREE_CODE (rhs1) == SSA_NAME)
2834 rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
2835 if (is_gimple_assign (rhs1_stmt))
2836 rhs1_code = gimple_assign_rhs_code (rhs1_stmt);
2838 else
2839 return false;
2841 if (CONVERT_EXPR_CODE_P (rhs2_code))
2843 conv2_stmt = rhs2_stmt;
2844 rhs2 = gimple_assign_rhs1 (rhs2_stmt);
2845 if (TREE_CODE (rhs2) == SSA_NAME)
2847 rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
2848 if (is_gimple_assign (rhs2_stmt))
2849 rhs2_code = gimple_assign_rhs_code (rhs2_stmt);
2851 else
2852 return false;
2855 /* If code is WIDEN_MULT_EXPR then it would seem unnecessary to call
2856 is_widening_mult_p, but we still need the rhs returns.
2858 It might also appear that it would be sufficient to use the existing
2859 operands of the widening multiply, but that would limit the choice of
2860 multiply-and-accumulate instructions.
2862 If the widened-multiplication result has more than one uses, it is
2863 probably wiser not to do the conversion. Also restrict this operation
2864 to single basic block to avoid moving the multiply to a different block
2865 with a higher execution frequency. */
2866 if (code == PLUS_EXPR
2867 && (rhs1_code == MULT_EXPR || rhs1_code == WIDEN_MULT_EXPR))
2869 if (!has_single_use (rhs1)
2870 || gimple_bb (rhs1_stmt) != gimple_bb (stmt)
2871 || !is_widening_mult_p (rhs1_stmt, &type1, &mult_rhs1,
2872 &type2, &mult_rhs2))
2873 return false;
2874 add_rhs = rhs2;
2875 conv_stmt = conv1_stmt;
2877 else if (rhs2_code == MULT_EXPR || rhs2_code == WIDEN_MULT_EXPR)
2879 if (!has_single_use (rhs2)
2880 || gimple_bb (rhs2_stmt) != gimple_bb (stmt)
2881 || !is_widening_mult_p (rhs2_stmt, &type1, &mult_rhs1,
2882 &type2, &mult_rhs2))
2883 return false;
2884 add_rhs = rhs1;
2885 conv_stmt = conv2_stmt;
2887 else
2888 return false;
2890 to_mode = SCALAR_TYPE_MODE (type);
2891 from_mode = SCALAR_TYPE_MODE (type1);
2892 if (to_mode == from_mode)
2893 return false;
2895 from_unsigned1 = TYPE_UNSIGNED (type1);
2896 from_unsigned2 = TYPE_UNSIGNED (type2);
2897 optype = type1;
2899 /* There's no such thing as a mixed sign madd yet, so use a wider mode. */
2900 if (from_unsigned1 != from_unsigned2)
2902 if (!INTEGRAL_TYPE_P (type))
2903 return false;
2904 /* We can use a signed multiply with unsigned types as long as
2905 there is a wider mode to use, or it is the smaller of the two
2906 types that is unsigned. Note that type1 >= type2, always. */
2907 if ((from_unsigned1
2908 && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode))
2909 || (from_unsigned2
2910 && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode)))
2912 if (!GET_MODE_WIDER_MODE (from_mode).exists (&from_mode)
2913 || GET_MODE_SIZE (from_mode) >= GET_MODE_SIZE (to_mode))
2914 return false;
2917 from_unsigned1 = from_unsigned2 = false;
2918 optype = build_nonstandard_integer_type (GET_MODE_PRECISION (from_mode),
2919 false);
2922 /* If there was a conversion between the multiply and addition
2923 then we need to make sure it fits a multiply-and-accumulate.
2924 The should be a single mode change which does not change the
2925 value. */
2926 if (conv_stmt)
2928 /* We use the original, unmodified data types for this. */
2929 tree from_type = TREE_TYPE (gimple_assign_rhs1 (conv_stmt));
2930 tree to_type = TREE_TYPE (gimple_assign_lhs (conv_stmt));
2931 int data_size = TYPE_PRECISION (type1) + TYPE_PRECISION (type2);
2932 bool is_unsigned = TYPE_UNSIGNED (type1) && TYPE_UNSIGNED (type2);
2934 if (TYPE_PRECISION (from_type) > TYPE_PRECISION (to_type))
2936 /* Conversion is a truncate. */
2937 if (TYPE_PRECISION (to_type) < data_size)
2938 return false;
2940 else if (TYPE_PRECISION (from_type) < TYPE_PRECISION (to_type))
2942 /* Conversion is an extend. Check it's the right sort. */
2943 if (TYPE_UNSIGNED (from_type) != is_unsigned
2944 && !(is_unsigned && TYPE_PRECISION (from_type) > data_size))
2945 return false;
2947 /* else convert is a no-op for our purposes. */
2950 /* Verify that the machine can perform a widening multiply
2951 accumulate in this mode/signedness combination, otherwise
2952 this transformation is likely to pessimize code. */
2953 this_optab = optab_for_tree_code (wmult_code, optype, optab_default);
2954 handler = find_widening_optab_handler_and_mode (this_optab, to_mode,
2955 from_mode, &actual_mode);
2957 if (handler == CODE_FOR_nothing)
2958 return false;
2960 /* Ensure that the inputs to the handler are in the correct precison
2961 for the opcode. This will be the full mode size. */
2962 actual_precision = GET_MODE_PRECISION (actual_mode);
2963 if (actual_precision != TYPE_PRECISION (type1)
2964 || from_unsigned1 != TYPE_UNSIGNED (type1))
2965 mult_rhs1 = build_and_insert_cast (gsi, loc,
2966 build_nonstandard_integer_type
2967 (actual_precision, from_unsigned1),
2968 mult_rhs1);
2969 if (actual_precision != TYPE_PRECISION (type2)
2970 || from_unsigned2 != TYPE_UNSIGNED (type2))
2971 mult_rhs2 = build_and_insert_cast (gsi, loc,
2972 build_nonstandard_integer_type
2973 (actual_precision, from_unsigned2),
2974 mult_rhs2);
2976 if (!useless_type_conversion_p (type, TREE_TYPE (add_rhs)))
2977 add_rhs = build_and_insert_cast (gsi, loc, type, add_rhs);
2979 /* Handle constants. */
2980 if (TREE_CODE (mult_rhs1) == INTEGER_CST)
2981 mult_rhs1 = fold_convert (type1, mult_rhs1);
2982 if (TREE_CODE (mult_rhs2) == INTEGER_CST)
2983 mult_rhs2 = fold_convert (type2, mult_rhs2);
2985 gimple_assign_set_rhs_with_ops (gsi, wmult_code, mult_rhs1, mult_rhs2,
2986 add_rhs);
2987 update_stmt (gsi_stmt (*gsi));
2988 widen_mul_stats.maccs_inserted++;
2989 return true;
2992 /* Given a result MUL_RESULT which is a result of a multiplication of OP1 and
2993 OP2 and which we know is used in statements that can be, together with the
2994 multiplication, converted to FMAs, perform the transformation. */
2996 static void
2997 convert_mult_to_fma_1 (tree mul_result, tree op1, tree op2)
2999 tree type = TREE_TYPE (mul_result);
3000 gimple *use_stmt;
3001 imm_use_iterator imm_iter;
3002 gcall *fma_stmt;
3004 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, mul_result)
3006 gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
3007 tree addop, mulop1 = op1, result = mul_result;
3008 bool negate_p = false;
3009 gimple_seq seq = NULL;
3011 if (is_gimple_debug (use_stmt))
3012 continue;
3014 if (is_gimple_assign (use_stmt)
3015 && gimple_assign_rhs_code (use_stmt) == NEGATE_EXPR)
3017 result = gimple_assign_lhs (use_stmt);
3018 use_operand_p use_p;
3019 gimple *neguse_stmt;
3020 single_imm_use (gimple_assign_lhs (use_stmt), &use_p, &neguse_stmt);
3021 gsi_remove (&gsi, true);
3022 release_defs (use_stmt);
3024 use_stmt = neguse_stmt;
3025 gsi = gsi_for_stmt (use_stmt);
3026 negate_p = true;
3029 tree cond, else_value, ops[3];
3030 tree_code code;
3031 if (!can_interpret_as_conditional_op_p (use_stmt, &cond, &code,
3032 ops, &else_value))
3033 gcc_unreachable ();
3034 addop = ops[0] == result ? ops[1] : ops[0];
3036 if (code == MINUS_EXPR)
3038 if (ops[0] == result)
3039 /* a * b - c -> a * b + (-c) */
3040 addop = gimple_build (&seq, NEGATE_EXPR, type, addop);
3041 else
3042 /* a - b * c -> (-b) * c + a */
3043 negate_p = !negate_p;
3046 if (negate_p)
3047 mulop1 = gimple_build (&seq, NEGATE_EXPR, type, mulop1);
3049 if (seq)
3050 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
3052 if (cond)
3053 fma_stmt = gimple_build_call_internal (IFN_COND_FMA, 5, cond, mulop1,
3054 op2, addop, else_value);
3055 else
3056 fma_stmt = gimple_build_call_internal (IFN_FMA, 3, mulop1, op2, addop);
3057 gimple_set_lhs (fma_stmt, gimple_get_lhs (use_stmt));
3058 gimple_call_set_nothrow (fma_stmt, !stmt_can_throw_internal (cfun,
3059 use_stmt));
3060 gsi_replace (&gsi, fma_stmt, true);
3061 /* Follow all SSA edges so that we generate FMS, FNMA and FNMS
3062 regardless of where the negation occurs. */
3063 gimple *orig_stmt = gsi_stmt (gsi);
3064 if (fold_stmt (&gsi, follow_all_ssa_edges))
3066 if (maybe_clean_or_replace_eh_stmt (orig_stmt, gsi_stmt (gsi)))
3067 gcc_unreachable ();
3068 update_stmt (gsi_stmt (gsi));
3071 if (dump_file && (dump_flags & TDF_DETAILS))
3073 fprintf (dump_file, "Generated FMA ");
3074 print_gimple_stmt (dump_file, gsi_stmt (gsi), 0, TDF_NONE);
3075 fprintf (dump_file, "\n");
3078 /* If the FMA result is negated in a single use, fold the negation
3079 too. */
3080 orig_stmt = gsi_stmt (gsi);
3081 use_operand_p use_p;
3082 gimple *neg_stmt;
3083 if (is_gimple_call (orig_stmt)
3084 && gimple_call_internal_p (orig_stmt)
3085 && gimple_call_lhs (orig_stmt)
3086 && TREE_CODE (gimple_call_lhs (orig_stmt)) == SSA_NAME
3087 && single_imm_use (gimple_call_lhs (orig_stmt), &use_p, &neg_stmt)
3088 && is_gimple_assign (neg_stmt)
3089 && gimple_assign_rhs_code (neg_stmt) == NEGATE_EXPR
3090 && !stmt_could_throw_p (cfun, neg_stmt))
3092 gsi = gsi_for_stmt (neg_stmt);
3093 if (fold_stmt (&gsi, follow_all_ssa_edges))
3095 if (maybe_clean_or_replace_eh_stmt (neg_stmt, gsi_stmt (gsi)))
3096 gcc_unreachable ();
3097 update_stmt (gsi_stmt (gsi));
3098 if (dump_file && (dump_flags & TDF_DETAILS))
3100 fprintf (dump_file, "Folded FMA negation ");
3101 print_gimple_stmt (dump_file, gsi_stmt (gsi), 0, TDF_NONE);
3102 fprintf (dump_file, "\n");
3107 widen_mul_stats.fmas_inserted++;
3111 /* Data necessary to perform the actual transformation from a multiplication
3112 and an addition to an FMA after decision is taken it should be done and to
3113 then delete the multiplication statement from the function IL. */
3115 struct fma_transformation_info
3117 gimple *mul_stmt;
3118 tree mul_result;
3119 tree op1;
3120 tree op2;
3123 /* Structure containing the current state of FMA deferring, i.e. whether we are
3124 deferring, whether to continue deferring, and all data necessary to come
3125 back and perform all deferred transformations. */
3127 class fma_deferring_state
3129 public:
3130 /* Class constructor. Pass true as PERFORM_DEFERRING in order to actually
3131 do any deferring. */
3133 fma_deferring_state (bool perform_deferring)
3134 : m_candidates (), m_mul_result_set (), m_initial_phi (NULL),
3135 m_last_result (NULL_TREE), m_deferring_p (perform_deferring) {}
3137 /* List of FMA candidates for which we the transformation has been determined
3138 possible but we at this point in BB analysis we do not consider them
3139 beneficial. */
3140 auto_vec<fma_transformation_info, 8> m_candidates;
3142 /* Set of results of multiplication that are part of an already deferred FMA
3143 candidates. */
3144 hash_set<tree> m_mul_result_set;
3146 /* The PHI that supposedly feeds back result of a FMA to another over loop
3147 boundary. */
3148 gphi *m_initial_phi;
3150 /* Result of the last produced FMA candidate or NULL if there has not been
3151 one. */
3152 tree m_last_result;
3154 /* If true, deferring might still be profitable. If false, transform all
3155 candidates and no longer defer. */
3156 bool m_deferring_p;
3159 /* Transform all deferred FMA candidates and mark STATE as no longer
3160 deferring. */
3162 static void
3163 cancel_fma_deferring (fma_deferring_state *state)
3165 if (!state->m_deferring_p)
3166 return;
3168 for (unsigned i = 0; i < state->m_candidates.length (); i++)
3170 if (dump_file && (dump_flags & TDF_DETAILS))
3171 fprintf (dump_file, "Generating deferred FMA\n");
3173 const fma_transformation_info &fti = state->m_candidates[i];
3174 convert_mult_to_fma_1 (fti.mul_result, fti.op1, fti.op2);
3176 gimple_stmt_iterator gsi = gsi_for_stmt (fti.mul_stmt);
3177 gsi_remove (&gsi, true);
3178 release_defs (fti.mul_stmt);
3180 state->m_deferring_p = false;
3183 /* If OP is an SSA name defined by a PHI node, return the PHI statement.
3184 Otherwise return NULL. */
3186 static gphi *
3187 result_of_phi (tree op)
3189 if (TREE_CODE (op) != SSA_NAME)
3190 return NULL;
3192 return dyn_cast <gphi *> (SSA_NAME_DEF_STMT (op));
3195 /* After processing statements of a BB and recording STATE, return true if the
3196 initial phi is fed by the last FMA candidate result ore one such result from
3197 previously processed BBs marked in LAST_RESULT_SET. */
3199 static bool
3200 last_fma_candidate_feeds_initial_phi (fma_deferring_state *state,
3201 hash_set<tree> *last_result_set)
3203 ssa_op_iter iter;
3204 use_operand_p use;
3205 FOR_EACH_PHI_ARG (use, state->m_initial_phi, iter, SSA_OP_USE)
3207 tree t = USE_FROM_PTR (use);
3208 if (t == state->m_last_result
3209 || last_result_set->contains (t))
3210 return true;
3213 return false;
3216 /* Combine the multiplication at MUL_STMT with operands MULOP1 and MULOP2
3217 with uses in additions and subtractions to form fused multiply-add
3218 operations. Returns true if successful and MUL_STMT should be removed.
3219 If MUL_COND is nonnull, the multiplication in MUL_STMT is conditional
3220 on MUL_COND, otherwise it is unconditional.
3222 If STATE indicates that we are deferring FMA transformation, that means
3223 that we do not produce FMAs for basic blocks which look like:
3225 <bb 6>
3226 # accumulator_111 = PHI <0.0(5), accumulator_66(6)>
3227 _65 = _14 * _16;
3228 accumulator_66 = _65 + accumulator_111;
3230 or its unrolled version, i.e. with several FMA candidates that feed result
3231 of one into the addend of another. Instead, we add them to a list in STATE
3232 and if we later discover an FMA candidate that is not part of such a chain,
3233 we go back and perform all deferred past candidates. */
3235 static bool
3236 convert_mult_to_fma (gimple *mul_stmt, tree op1, tree op2,
3237 fma_deferring_state *state, tree mul_cond = NULL_TREE)
3239 tree mul_result = gimple_get_lhs (mul_stmt);
3240 /* If there isn't a LHS then this can't be an FMA. There can be no LHS
3241 if the statement was left just for the side-effects. */
3242 if (!mul_result)
3243 return false;
3244 tree type = TREE_TYPE (mul_result);
3245 gimple *use_stmt, *neguse_stmt;
3246 use_operand_p use_p;
3247 imm_use_iterator imm_iter;
3249 if (FLOAT_TYPE_P (type)
3250 && flag_fp_contract_mode == FP_CONTRACT_OFF)
3251 return false;
3253 /* We don't want to do bitfield reduction ops. */
3254 if (INTEGRAL_TYPE_P (type)
3255 && (!type_has_mode_precision_p (type) || TYPE_OVERFLOW_TRAPS (type)))
3256 return false;
3258 /* If the target doesn't support it, don't generate it. We assume that
3259 if fma isn't available then fms, fnma or fnms are not either. */
3260 optimization_type opt_type = bb_optimization_type (gimple_bb (mul_stmt));
3261 if (!direct_internal_fn_supported_p (IFN_FMA, type, opt_type))
3262 return false;
3264 /* If the multiplication has zero uses, it is kept around probably because
3265 of -fnon-call-exceptions. Don't optimize it away in that case,
3266 it is DCE job. */
3267 if (has_zero_uses (mul_result))
3268 return false;
3270 bool check_defer
3271 = (state->m_deferring_p
3272 && maybe_le (tree_to_poly_int64 (TYPE_SIZE (type)),
3273 param_avoid_fma_max_bits));
3274 bool defer = check_defer;
3275 bool seen_negate_p = false;
3276 /* Make sure that the multiplication statement becomes dead after
3277 the transformation, thus that all uses are transformed to FMAs.
3278 This means we assume that an FMA operation has the same cost
3279 as an addition. */
3280 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, mul_result)
3282 tree result = mul_result;
3283 bool negate_p = false;
3285 use_stmt = USE_STMT (use_p);
3287 if (is_gimple_debug (use_stmt))
3288 continue;
3290 /* For now restrict this operations to single basic blocks. In theory
3291 we would want to support sinking the multiplication in
3292 m = a*b;
3293 if ()
3294 ma = m + c;
3295 else
3296 d = m;
3297 to form a fma in the then block and sink the multiplication to the
3298 else block. */
3299 if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
3300 return false;
3302 /* A negate on the multiplication leads to FNMA. */
3303 if (is_gimple_assign (use_stmt)
3304 && gimple_assign_rhs_code (use_stmt) == NEGATE_EXPR)
3306 ssa_op_iter iter;
3307 use_operand_p usep;
3309 /* If (due to earlier missed optimizations) we have two
3310 negates of the same value, treat them as equivalent
3311 to a single negate with multiple uses. */
3312 if (seen_negate_p)
3313 return false;
3315 result = gimple_assign_lhs (use_stmt);
3317 /* Make sure the negate statement becomes dead with this
3318 single transformation. */
3319 if (!single_imm_use (gimple_assign_lhs (use_stmt),
3320 &use_p, &neguse_stmt))
3321 return false;
3323 /* Make sure the multiplication isn't also used on that stmt. */
3324 FOR_EACH_PHI_OR_STMT_USE (usep, neguse_stmt, iter, SSA_OP_USE)
3325 if (USE_FROM_PTR (usep) == mul_result)
3326 return false;
3328 /* Re-validate. */
3329 use_stmt = neguse_stmt;
3330 if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
3331 return false;
3333 negate_p = seen_negate_p = true;
3336 tree cond, else_value, ops[3];
3337 tree_code code;
3338 if (!can_interpret_as_conditional_op_p (use_stmt, &cond, &code, ops,
3339 &else_value))
3340 return false;
3342 switch (code)
3344 case MINUS_EXPR:
3345 if (ops[1] == result)
3346 negate_p = !negate_p;
3347 break;
3348 case PLUS_EXPR:
3349 break;
3350 default:
3351 /* FMA can only be formed from PLUS and MINUS. */
3352 return false;
3355 if (mul_cond && cond != mul_cond)
3356 return false;
3358 if (cond)
3360 if (cond == result || else_value == result)
3361 return false;
3362 if (!direct_internal_fn_supported_p (IFN_COND_FMA, type, opt_type))
3363 return false;
3366 /* If the subtrahend (OPS[1]) is computed by a MULT_EXPR that
3367 we'll visit later, we might be able to get a more profitable
3368 match with fnma.
3369 OTOH, if we don't, a negate / fma pair has likely lower latency
3370 that a mult / subtract pair. */
3371 if (code == MINUS_EXPR
3372 && !negate_p
3373 && ops[0] == result
3374 && !direct_internal_fn_supported_p (IFN_FMS, type, opt_type)
3375 && direct_internal_fn_supported_p (IFN_FNMA, type, opt_type)
3376 && TREE_CODE (ops[1]) == SSA_NAME
3377 && has_single_use (ops[1]))
3379 gimple *stmt2 = SSA_NAME_DEF_STMT (ops[1]);
3380 if (is_gimple_assign (stmt2)
3381 && gimple_assign_rhs_code (stmt2) == MULT_EXPR)
3382 return false;
3385 /* We can't handle a * b + a * b. */
3386 if (ops[0] == ops[1])
3387 return false;
3388 /* If deferring, make sure we are not looking at an instruction that
3389 wouldn't have existed if we were not. */
3390 if (state->m_deferring_p
3391 && (state->m_mul_result_set.contains (ops[0])
3392 || state->m_mul_result_set.contains (ops[1])))
3393 return false;
3395 if (check_defer)
3397 tree use_lhs = gimple_get_lhs (use_stmt);
3398 if (state->m_last_result)
3400 if (ops[1] == state->m_last_result
3401 || ops[0] == state->m_last_result)
3402 defer = true;
3403 else
3404 defer = false;
3406 else
3408 gcc_checking_assert (!state->m_initial_phi);
3409 gphi *phi;
3410 if (ops[0] == result)
3411 phi = result_of_phi (ops[1]);
3412 else
3414 gcc_assert (ops[1] == result);
3415 phi = result_of_phi (ops[0]);
3418 if (phi)
3420 state->m_initial_phi = phi;
3421 defer = true;
3423 else
3424 defer = false;
3427 state->m_last_result = use_lhs;
3428 check_defer = false;
3430 else
3431 defer = false;
3433 /* While it is possible to validate whether or not the exact form that
3434 we've recognized is available in the backend, the assumption is that
3435 if the deferring logic above did not trigger, the transformation is
3436 never a loss. For instance, suppose the target only has the plain FMA
3437 pattern available. Consider a*b-c -> fma(a,b,-c): we've exchanged
3438 MUL+SUB for FMA+NEG, which is still two operations. Consider
3439 -(a*b)-c -> fma(-a,b,-c): we still have 3 operations, but in the FMA
3440 form the two NEGs are independent and could be run in parallel. */
3443 if (defer)
3445 fma_transformation_info fti;
3446 fti.mul_stmt = mul_stmt;
3447 fti.mul_result = mul_result;
3448 fti.op1 = op1;
3449 fti.op2 = op2;
3450 state->m_candidates.safe_push (fti);
3451 state->m_mul_result_set.add (mul_result);
3453 if (dump_file && (dump_flags & TDF_DETAILS))
3455 fprintf (dump_file, "Deferred generating FMA for multiplication ");
3456 print_gimple_stmt (dump_file, mul_stmt, 0, TDF_NONE);
3457 fprintf (dump_file, "\n");
3460 return false;
3462 else
3464 if (state->m_deferring_p)
3465 cancel_fma_deferring (state);
3466 convert_mult_to_fma_1 (mul_result, op1, op2);
3467 return true;
3472 /* Helper function of match_arith_overflow. For MUL_OVERFLOW, if we have
3473 a check for non-zero like:
3474 _1 = x_4(D) * y_5(D);
3475 *res_7(D) = _1;
3476 if (x_4(D) != 0)
3477 goto <bb 3>; [50.00%]
3478 else
3479 goto <bb 4>; [50.00%]
3481 <bb 3> [local count: 536870913]:
3482 _2 = _1 / x_4(D);
3483 _9 = _2 != y_5(D);
3484 _10 = (int) _9;
3486 <bb 4> [local count: 1073741824]:
3487 # iftmp.0_3 = PHI <_10(3), 0(2)>
3488 then in addition to using .MUL_OVERFLOW (x_4(D), y_5(D)) we can also
3489 optimize the x_4(D) != 0 condition to 1. */
3491 static void
3492 maybe_optimize_guarding_check (vec<gimple *> &mul_stmts, gimple *cond_stmt,
3493 gimple *div_stmt, bool *cfg_changed)
3495 basic_block bb = gimple_bb (cond_stmt);
3496 if (gimple_bb (div_stmt) != bb || !single_pred_p (bb))
3497 return;
3498 edge pred_edge = single_pred_edge (bb);
3499 basic_block pred_bb = pred_edge->src;
3500 if (EDGE_COUNT (pred_bb->succs) != 2)
3501 return;
3502 edge other_edge = EDGE_SUCC (pred_bb, EDGE_SUCC (pred_bb, 0) == pred_edge);
3503 edge other_succ_edge = NULL;
3504 if (gimple_code (cond_stmt) == GIMPLE_COND)
3506 if (EDGE_COUNT (bb->succs) != 2)
3507 return;
3508 other_succ_edge = EDGE_SUCC (bb, 0);
3509 if (gimple_cond_code (cond_stmt) == NE_EXPR)
3511 if (other_succ_edge->flags & EDGE_TRUE_VALUE)
3512 other_succ_edge = EDGE_SUCC (bb, 1);
3514 else if (other_succ_edge->flags & EDGE_FALSE_VALUE)
3515 other_succ_edge = EDGE_SUCC (bb, 0);
3516 if (other_edge->dest != other_succ_edge->dest)
3517 return;
3519 else if (!single_succ_p (bb) || other_edge->dest != single_succ (bb))
3520 return;
3521 gimple *zero_cond = last_stmt (pred_bb);
3522 if (zero_cond == NULL
3523 || gimple_code (zero_cond) != GIMPLE_COND
3524 || (gimple_cond_code (zero_cond)
3525 != ((pred_edge->flags & EDGE_TRUE_VALUE) ? NE_EXPR : EQ_EXPR))
3526 || !integer_zerop (gimple_cond_rhs (zero_cond)))
3527 return;
3528 tree zero_cond_lhs = gimple_cond_lhs (zero_cond);
3529 if (TREE_CODE (zero_cond_lhs) != SSA_NAME)
3530 return;
3531 if (gimple_assign_rhs2 (div_stmt) != zero_cond_lhs)
3533 /* Allow the divisor to be result of a same precision cast
3534 from zero_cond_lhs. */
3535 tree rhs2 = gimple_assign_rhs2 (div_stmt);
3536 if (TREE_CODE (rhs2) != SSA_NAME)
3537 return;
3538 gimple *g = SSA_NAME_DEF_STMT (rhs2);
3539 if (!gimple_assign_cast_p (g)
3540 || gimple_assign_rhs1 (g) != gimple_cond_lhs (zero_cond)
3541 || !INTEGRAL_TYPE_P (TREE_TYPE (zero_cond_lhs))
3542 || (TYPE_PRECISION (TREE_TYPE (zero_cond_lhs))
3543 != TYPE_PRECISION (TREE_TYPE (rhs2))))
3544 return;
3546 gimple_stmt_iterator gsi = gsi_after_labels (bb);
3547 mul_stmts.quick_push (div_stmt);
3548 if (is_gimple_debug (gsi_stmt (gsi)))
3549 gsi_next_nondebug (&gsi);
3550 unsigned cast_count = 0;
3551 while (gsi_stmt (gsi) != cond_stmt)
3553 /* If original mul_stmt has a single use, allow it in the same bb,
3554 we are looking then just at __builtin_mul_overflow_p.
3555 Though, in that case the original mul_stmt will be replaced
3556 by .MUL_OVERFLOW, REALPART_EXPR and IMAGPART_EXPR stmts. */
3557 gimple *mul_stmt;
3558 unsigned int i;
3559 bool ok = false;
3560 FOR_EACH_VEC_ELT (mul_stmts, i, mul_stmt)
3562 if (gsi_stmt (gsi) == mul_stmt)
3564 ok = true;
3565 break;
3568 if (!ok && gimple_assign_cast_p (gsi_stmt (gsi)) && ++cast_count < 4)
3569 ok = true;
3570 if (!ok)
3571 return;
3572 gsi_next_nondebug (&gsi);
3574 if (gimple_code (cond_stmt) == GIMPLE_COND)
3576 basic_block succ_bb = other_edge->dest;
3577 for (gphi_iterator gpi = gsi_start_phis (succ_bb); !gsi_end_p (gpi);
3578 gsi_next (&gpi))
3580 gphi *phi = gpi.phi ();
3581 tree v1 = gimple_phi_arg_def (phi, other_edge->dest_idx);
3582 tree v2 = gimple_phi_arg_def (phi, other_succ_edge->dest_idx);
3583 if (!operand_equal_p (v1, v2, 0))
3584 return;
3587 else
3589 tree lhs = gimple_assign_lhs (cond_stmt);
3590 if (!lhs || !INTEGRAL_TYPE_P (TREE_TYPE (lhs)))
3591 return;
3592 gsi_next_nondebug (&gsi);
3593 if (!gsi_end_p (gsi))
3595 if (gimple_assign_rhs_code (cond_stmt) == COND_EXPR)
3596 return;
3597 gimple *cast_stmt = gsi_stmt (gsi);
3598 if (!gimple_assign_cast_p (cast_stmt))
3599 return;
3600 tree new_lhs = gimple_assign_lhs (cast_stmt);
3601 gsi_next_nondebug (&gsi);
3602 if (!gsi_end_p (gsi)
3603 || !new_lhs
3604 || !INTEGRAL_TYPE_P (TREE_TYPE (new_lhs))
3605 || TYPE_PRECISION (TREE_TYPE (new_lhs)) <= 1)
3606 return;
3607 lhs = new_lhs;
3609 edge succ_edge = single_succ_edge (bb);
3610 basic_block succ_bb = succ_edge->dest;
3611 gsi = gsi_start_phis (succ_bb);
3612 if (gsi_end_p (gsi))
3613 return;
3614 gphi *phi = as_a <gphi *> (gsi_stmt (gsi));
3615 gsi_next (&gsi);
3616 if (!gsi_end_p (gsi))
3617 return;
3618 if (gimple_phi_arg_def (phi, succ_edge->dest_idx) != lhs)
3619 return;
3620 tree other_val = gimple_phi_arg_def (phi, other_edge->dest_idx);
3621 if (gimple_assign_rhs_code (cond_stmt) == COND_EXPR)
3623 tree cond = gimple_assign_rhs1 (cond_stmt);
3624 if (TREE_CODE (cond) == NE_EXPR)
3626 if (!operand_equal_p (other_val,
3627 gimple_assign_rhs3 (cond_stmt), 0))
3628 return;
3630 else if (!operand_equal_p (other_val,
3631 gimple_assign_rhs2 (cond_stmt), 0))
3632 return;
3634 else if (gimple_assign_rhs_code (cond_stmt) == NE_EXPR)
3636 if (!integer_zerop (other_val))
3637 return;
3639 else if (!integer_onep (other_val))
3640 return;
3642 gcond *zero_gcond = as_a <gcond *> (zero_cond);
3643 if (pred_edge->flags & EDGE_TRUE_VALUE)
3644 gimple_cond_make_true (zero_gcond);
3645 else
3646 gimple_cond_make_false (zero_gcond);
3647 update_stmt (zero_cond);
3648 *cfg_changed = true;
3651 /* Helper function for arith_overflow_check_p. Return true
3652 if VAL1 is equal to VAL2 cast to corresponding integral type
3653 with other signedness or vice versa. */
3655 static bool
3656 arith_cast_equal_p (tree val1, tree val2)
3658 if (TREE_CODE (val1) == INTEGER_CST && TREE_CODE (val2) == INTEGER_CST)
3659 return wi::eq_p (wi::to_wide (val1), wi::to_wide (val2));
3660 else if (TREE_CODE (val1) != SSA_NAME || TREE_CODE (val2) != SSA_NAME)
3661 return false;
3662 if (gimple_assign_cast_p (SSA_NAME_DEF_STMT (val1))
3663 && gimple_assign_rhs1 (SSA_NAME_DEF_STMT (val1)) == val2)
3664 return true;
3665 if (gimple_assign_cast_p (SSA_NAME_DEF_STMT (val2))
3666 && gimple_assign_rhs1 (SSA_NAME_DEF_STMT (val2)) == val1)
3667 return true;
3668 return false;
3671 /* Helper function of match_arith_overflow. Return 1
3672 if USE_STMT is unsigned overflow check ovf != 0 for
3673 STMT, -1 if USE_STMT is unsigned overflow check ovf == 0
3674 and 0 otherwise. */
3676 static int
3677 arith_overflow_check_p (gimple *stmt, gimple *cast_stmt, gimple *&use_stmt,
3678 tree maxval, tree *other)
3680 enum tree_code ccode = ERROR_MARK;
3681 tree crhs1 = NULL_TREE, crhs2 = NULL_TREE;
3682 enum tree_code code = gimple_assign_rhs_code (stmt);
3683 tree lhs = gimple_assign_lhs (cast_stmt ? cast_stmt : stmt);
3684 tree rhs1 = gimple_assign_rhs1 (stmt);
3685 tree rhs2 = gimple_assign_rhs2 (stmt);
3686 tree multop = NULL_TREE, divlhs = NULL_TREE;
3687 gimple *cur_use_stmt = use_stmt;
3689 if (code == MULT_EXPR)
3691 if (!is_gimple_assign (use_stmt))
3692 return 0;
3693 if (gimple_assign_rhs_code (use_stmt) != TRUNC_DIV_EXPR)
3694 return 0;
3695 if (gimple_assign_rhs1 (use_stmt) != lhs)
3696 return 0;
3697 if (cast_stmt)
3699 if (arith_cast_equal_p (gimple_assign_rhs2 (use_stmt), rhs1))
3700 multop = rhs2;
3701 else if (arith_cast_equal_p (gimple_assign_rhs2 (use_stmt), rhs2))
3702 multop = rhs1;
3703 else
3704 return 0;
3706 else if (gimple_assign_rhs2 (use_stmt) == rhs1)
3707 multop = rhs2;
3708 else if (operand_equal_p (gimple_assign_rhs2 (use_stmt), rhs2, 0))
3709 multop = rhs1;
3710 else
3711 return 0;
3712 if (stmt_ends_bb_p (use_stmt))
3713 return 0;
3714 divlhs = gimple_assign_lhs (use_stmt);
3715 if (!divlhs)
3716 return 0;
3717 use_operand_p use;
3718 if (!single_imm_use (divlhs, &use, &cur_use_stmt))
3719 return 0;
3721 if (gimple_code (cur_use_stmt) == GIMPLE_COND)
3723 ccode = gimple_cond_code (cur_use_stmt);
3724 crhs1 = gimple_cond_lhs (cur_use_stmt);
3725 crhs2 = gimple_cond_rhs (cur_use_stmt);
3727 else if (is_gimple_assign (cur_use_stmt))
3729 if (gimple_assign_rhs_class (cur_use_stmt) == GIMPLE_BINARY_RHS)
3731 ccode = gimple_assign_rhs_code (cur_use_stmt);
3732 crhs1 = gimple_assign_rhs1 (cur_use_stmt);
3733 crhs2 = gimple_assign_rhs2 (cur_use_stmt);
3735 else if (gimple_assign_rhs_code (cur_use_stmt) == COND_EXPR)
3737 tree cond = gimple_assign_rhs1 (cur_use_stmt);
3738 if (COMPARISON_CLASS_P (cond))
3740 ccode = TREE_CODE (cond);
3741 crhs1 = TREE_OPERAND (cond, 0);
3742 crhs2 = TREE_OPERAND (cond, 1);
3744 else
3745 return 0;
3747 else
3748 return 0;
3750 else
3751 return 0;
3753 if (TREE_CODE_CLASS (ccode) != tcc_comparison)
3754 return 0;
3756 switch (ccode)
3758 case GT_EXPR:
3759 case LE_EXPR:
3760 if (maxval)
3762 /* r = a + b; r > maxval or r <= maxval */
3763 if (crhs1 == lhs
3764 && TREE_CODE (crhs2) == INTEGER_CST
3765 && tree_int_cst_equal (crhs2, maxval))
3766 return ccode == GT_EXPR ? 1 : -1;
3767 break;
3769 /* r = a - b; r > a or r <= a
3770 r = a + b; a > r or a <= r or b > r or b <= r. */
3771 if ((code == MINUS_EXPR && crhs1 == lhs && crhs2 == rhs1)
3772 || (code == PLUS_EXPR && (crhs1 == rhs1 || crhs1 == rhs2)
3773 && crhs2 == lhs))
3774 return ccode == GT_EXPR ? 1 : -1;
3775 /* r = ~a; b > r or b <= r. */
3776 if (code == BIT_NOT_EXPR && crhs2 == lhs)
3778 if (other)
3779 *other = crhs1;
3780 return ccode == GT_EXPR ? 1 : -1;
3782 break;
3783 case LT_EXPR:
3784 case GE_EXPR:
3785 if (maxval)
3786 break;
3787 /* r = a - b; a < r or a >= r
3788 r = a + b; r < a or r >= a or r < b or r >= b. */
3789 if ((code == MINUS_EXPR && crhs1 == rhs1 && crhs2 == lhs)
3790 || (code == PLUS_EXPR && crhs1 == lhs
3791 && (crhs2 == rhs1 || crhs2 == rhs2)))
3792 return ccode == LT_EXPR ? 1 : -1;
3793 /* r = ~a; r < b or r >= b. */
3794 if (code == BIT_NOT_EXPR && crhs1 == lhs)
3796 if (other)
3797 *other = crhs2;
3798 return ccode == LT_EXPR ? 1 : -1;
3800 break;
3801 case EQ_EXPR:
3802 case NE_EXPR:
3803 /* r = a * b; _1 = r / a; _1 == b
3804 r = a * b; _1 = r / b; _1 == a
3805 r = a * b; _1 = r / a; _1 != b
3806 r = a * b; _1 = r / b; _1 != a. */
3807 if (code == MULT_EXPR)
3809 if (cast_stmt)
3811 if ((crhs1 == divlhs && arith_cast_equal_p (crhs2, multop))
3812 || (crhs2 == divlhs && arith_cast_equal_p (crhs1, multop)))
3814 use_stmt = cur_use_stmt;
3815 return ccode == NE_EXPR ? 1 : -1;
3818 else if ((crhs1 == divlhs && operand_equal_p (crhs2, multop, 0))
3819 || (crhs2 == divlhs && crhs1 == multop))
3821 use_stmt = cur_use_stmt;
3822 return ccode == NE_EXPR ? 1 : -1;
3825 break;
3826 default:
3827 break;
3829 return 0;
3832 /* Recognize for unsigned x
3833 x = y - z;
3834 if (x > y)
3835 where there are other uses of x and replace it with
3836 _7 = .SUB_OVERFLOW (y, z);
3837 x = REALPART_EXPR <_7>;
3838 _8 = IMAGPART_EXPR <_7>;
3839 if (_8)
3840 and similarly for addition.
3842 Also recognize:
3843 yc = (type) y;
3844 zc = (type) z;
3845 x = yc + zc;
3846 if (x > max)
3847 where y and z have unsigned types with maximum max
3848 and there are other uses of x and all of those cast x
3849 back to that unsigned type and again replace it with
3850 _7 = .ADD_OVERFLOW (y, z);
3851 _9 = REALPART_EXPR <_7>;
3852 _8 = IMAGPART_EXPR <_7>;
3853 if (_8)
3854 and replace (utype) x with _9.
3856 Also recognize:
3857 x = ~z;
3858 if (y > x)
3859 and replace it with
3860 _7 = .ADD_OVERFLOW (y, z);
3861 _8 = IMAGPART_EXPR <_7>;
3862 if (_8)
3864 And also recognize:
3865 z = x * y;
3866 if (x != 0)
3867 goto <bb 3>; [50.00%]
3868 else
3869 goto <bb 4>; [50.00%]
3871 <bb 3> [local count: 536870913]:
3872 _2 = z / x;
3873 _9 = _2 != y;
3874 _10 = (int) _9;
3876 <bb 4> [local count: 1073741824]:
3877 # iftmp.0_3 = PHI <_10(3), 0(2)>
3878 and replace it with
3879 _7 = .MUL_OVERFLOW (x, y);
3880 z = IMAGPART_EXPR <_7>;
3881 _8 = IMAGPART_EXPR <_7>;
3882 _9 = _8 != 0;
3883 iftmp.0_3 = (int) _9; */
3885 static bool
3886 match_arith_overflow (gimple_stmt_iterator *gsi, gimple *stmt,
3887 enum tree_code code, bool *cfg_changed)
3889 tree lhs = gimple_assign_lhs (stmt);
3890 tree type = TREE_TYPE (lhs);
3891 use_operand_p use_p;
3892 imm_use_iterator iter;
3893 bool use_seen = false;
3894 bool ovf_use_seen = false;
3895 gimple *use_stmt;
3896 gimple *add_stmt = NULL;
3897 bool add_first = false;
3898 gimple *cond_stmt = NULL;
3899 gimple *cast_stmt = NULL;
3900 tree cast_lhs = NULL_TREE;
3902 gcc_checking_assert (code == PLUS_EXPR
3903 || code == MINUS_EXPR
3904 || code == MULT_EXPR
3905 || code == BIT_NOT_EXPR);
3906 if (!INTEGRAL_TYPE_P (type)
3907 || !TYPE_UNSIGNED (type)
3908 || has_zero_uses (lhs)
3909 || (code != PLUS_EXPR
3910 && code != MULT_EXPR
3911 && optab_handler (code == MINUS_EXPR ? usubv4_optab : uaddv4_optab,
3912 TYPE_MODE (type)) == CODE_FOR_nothing))
3913 return false;
3915 tree rhs1 = gimple_assign_rhs1 (stmt);
3916 tree rhs2 = gimple_assign_rhs2 (stmt);
3917 FOR_EACH_IMM_USE_FAST (use_p, iter, lhs)
3919 use_stmt = USE_STMT (use_p);
3920 if (is_gimple_debug (use_stmt))
3921 continue;
3923 tree other = NULL_TREE;
3924 if (arith_overflow_check_p (stmt, NULL, use_stmt, NULL_TREE, &other))
3926 if (code == BIT_NOT_EXPR)
3928 gcc_assert (other);
3929 if (TREE_CODE (other) != SSA_NAME)
3930 return false;
3931 if (rhs2 == NULL)
3932 rhs2 = other;
3933 else
3934 return false;
3935 cond_stmt = use_stmt;
3937 ovf_use_seen = true;
3939 else
3941 use_seen = true;
3942 if (code == MULT_EXPR
3943 && cast_stmt == NULL
3944 && gimple_assign_cast_p (use_stmt))
3946 cast_lhs = gimple_assign_lhs (use_stmt);
3947 if (INTEGRAL_TYPE_P (TREE_TYPE (cast_lhs))
3948 && !TYPE_UNSIGNED (TREE_TYPE (cast_lhs))
3949 && (TYPE_PRECISION (TREE_TYPE (cast_lhs))
3950 == TYPE_PRECISION (TREE_TYPE (lhs))))
3951 cast_stmt = use_stmt;
3952 else
3953 cast_lhs = NULL_TREE;
3956 if (ovf_use_seen && use_seen)
3957 break;
3960 if (!ovf_use_seen
3961 && code == MULT_EXPR
3962 && cast_stmt)
3964 if (TREE_CODE (rhs1) != SSA_NAME
3965 || (TREE_CODE (rhs2) != SSA_NAME && TREE_CODE (rhs2) != INTEGER_CST))
3966 return false;
3967 FOR_EACH_IMM_USE_FAST (use_p, iter, cast_lhs)
3969 use_stmt = USE_STMT (use_p);
3970 if (is_gimple_debug (use_stmt))
3971 continue;
3973 if (arith_overflow_check_p (stmt, cast_stmt, use_stmt,
3974 NULL_TREE, NULL))
3975 ovf_use_seen = true;
3978 else
3980 cast_stmt = NULL;
3981 cast_lhs = NULL_TREE;
3984 tree maxval = NULL_TREE;
3985 if (!ovf_use_seen
3986 || (code != MULT_EXPR && (code == BIT_NOT_EXPR ? use_seen : !use_seen))
3987 || (code == PLUS_EXPR
3988 && optab_handler (uaddv4_optab,
3989 TYPE_MODE (type)) == CODE_FOR_nothing)
3990 || (code == MULT_EXPR
3991 && optab_handler (cast_stmt ? mulv4_optab : umulv4_optab,
3992 TYPE_MODE (type)) == CODE_FOR_nothing))
3994 if (code != PLUS_EXPR)
3995 return false;
3996 if (TREE_CODE (rhs1) != SSA_NAME
3997 || !gimple_assign_cast_p (SSA_NAME_DEF_STMT (rhs1)))
3998 return false;
3999 rhs1 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (rhs1));
4000 tree type1 = TREE_TYPE (rhs1);
4001 if (!INTEGRAL_TYPE_P (type1)
4002 || !TYPE_UNSIGNED (type1)
4003 || TYPE_PRECISION (type1) >= TYPE_PRECISION (type)
4004 || (TYPE_PRECISION (type1)
4005 != GET_MODE_BITSIZE (SCALAR_INT_TYPE_MODE (type1))))
4006 return false;
4007 if (TREE_CODE (rhs2) == INTEGER_CST)
4009 if (wi::ne_p (wi::rshift (wi::to_wide (rhs2),
4010 TYPE_PRECISION (type1),
4011 UNSIGNED), 0))
4012 return false;
4013 rhs2 = fold_convert (type1, rhs2);
4015 else
4017 if (TREE_CODE (rhs2) != SSA_NAME
4018 || !gimple_assign_cast_p (SSA_NAME_DEF_STMT (rhs2)))
4019 return false;
4020 rhs2 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (rhs2));
4021 tree type2 = TREE_TYPE (rhs2);
4022 if (!INTEGRAL_TYPE_P (type2)
4023 || !TYPE_UNSIGNED (type2)
4024 || TYPE_PRECISION (type2) >= TYPE_PRECISION (type)
4025 || (TYPE_PRECISION (type2)
4026 != GET_MODE_BITSIZE (SCALAR_INT_TYPE_MODE (type2))))
4027 return false;
4029 if (TYPE_PRECISION (type1) >= TYPE_PRECISION (TREE_TYPE (rhs2)))
4030 type = type1;
4031 else
4032 type = TREE_TYPE (rhs2);
4034 if (TREE_CODE (type) != INTEGER_TYPE
4035 || optab_handler (uaddv4_optab,
4036 TYPE_MODE (type)) == CODE_FOR_nothing)
4037 return false;
4039 maxval = wide_int_to_tree (type, wi::max_value (TYPE_PRECISION (type),
4040 UNSIGNED));
4041 ovf_use_seen = false;
4042 use_seen = false;
4043 basic_block use_bb = NULL;
4044 FOR_EACH_IMM_USE_FAST (use_p, iter, lhs)
4046 use_stmt = USE_STMT (use_p);
4047 if (is_gimple_debug (use_stmt))
4048 continue;
4050 if (arith_overflow_check_p (stmt, NULL, use_stmt, maxval, NULL))
4052 ovf_use_seen = true;
4053 use_bb = gimple_bb (use_stmt);
4055 else
4057 if (!gimple_assign_cast_p (use_stmt)
4058 || gimple_assign_rhs_code (use_stmt) == VIEW_CONVERT_EXPR)
4059 return false;
4060 tree use_lhs = gimple_assign_lhs (use_stmt);
4061 if (!INTEGRAL_TYPE_P (TREE_TYPE (use_lhs))
4062 || (TYPE_PRECISION (TREE_TYPE (use_lhs))
4063 > TYPE_PRECISION (type)))
4064 return false;
4065 use_seen = true;
4068 if (!ovf_use_seen)
4069 return false;
4070 if (!useless_type_conversion_p (type, TREE_TYPE (rhs1)))
4072 if (!use_seen)
4073 return false;
4074 tree new_rhs1 = make_ssa_name (type);
4075 gimple *g = gimple_build_assign (new_rhs1, NOP_EXPR, rhs1);
4076 gsi_insert_before (gsi, g, GSI_SAME_STMT);
4077 rhs1 = new_rhs1;
4079 else if (!useless_type_conversion_p (type, TREE_TYPE (rhs2)))
4081 if (!use_seen)
4082 return false;
4083 tree new_rhs2 = make_ssa_name (type);
4084 gimple *g = gimple_build_assign (new_rhs2, NOP_EXPR, rhs2);
4085 gsi_insert_before (gsi, g, GSI_SAME_STMT);
4086 rhs2 = new_rhs2;
4088 else if (!use_seen)
4090 /* If there are no uses of the wider addition, check if
4091 forwprop has not created a narrower addition.
4092 Require it to be in the same bb as the overflow check. */
4093 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs1)
4095 use_stmt = USE_STMT (use_p);
4096 if (is_gimple_debug (use_stmt))
4097 continue;
4099 if (use_stmt == stmt)
4100 continue;
4102 if (!is_gimple_assign (use_stmt)
4103 || gimple_bb (use_stmt) != use_bb
4104 || gimple_assign_rhs_code (use_stmt) != PLUS_EXPR)
4105 continue;
4107 if (gimple_assign_rhs1 (use_stmt) == rhs1)
4109 if (!operand_equal_p (gimple_assign_rhs2 (use_stmt),
4110 rhs2, 0))
4111 continue;
4113 else if (gimple_assign_rhs2 (use_stmt) == rhs1)
4115 if (gimple_assign_rhs1 (use_stmt) != rhs2)
4116 continue;
4118 else
4119 continue;
4121 add_stmt = use_stmt;
4122 break;
4124 if (add_stmt == NULL)
4125 return false;
4127 /* If stmt and add_stmt are in the same bb, we need to find out
4128 which one is earlier. If they are in different bbs, we've
4129 checked add_stmt is in the same bb as one of the uses of the
4130 stmt lhs, so stmt needs to dominate add_stmt too. */
4131 if (gimple_bb (stmt) == gimple_bb (add_stmt))
4133 gimple_stmt_iterator gsif = *gsi;
4134 gimple_stmt_iterator gsib = *gsi;
4135 int i;
4136 /* Search both forward and backward from stmt and have a small
4137 upper bound. */
4138 for (i = 0; i < 128; i++)
4140 if (!gsi_end_p (gsib))
4142 gsi_prev_nondebug (&gsib);
4143 if (gsi_stmt (gsib) == add_stmt)
4145 add_first = true;
4146 break;
4149 else if (gsi_end_p (gsif))
4150 break;
4151 if (!gsi_end_p (gsif))
4153 gsi_next_nondebug (&gsif);
4154 if (gsi_stmt (gsif) == add_stmt)
4155 break;
4158 if (i == 128)
4159 return false;
4160 if (add_first)
4161 *gsi = gsi_for_stmt (add_stmt);
4166 if (code == BIT_NOT_EXPR)
4167 *gsi = gsi_for_stmt (cond_stmt);
4169 auto_vec<gimple *, 8> mul_stmts;
4170 if (code == MULT_EXPR && cast_stmt)
4172 type = TREE_TYPE (cast_lhs);
4173 gimple *g = SSA_NAME_DEF_STMT (rhs1);
4174 if (gimple_assign_cast_p (g)
4175 && useless_type_conversion_p (type,
4176 TREE_TYPE (gimple_assign_rhs1 (g)))
4177 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g)))
4178 rhs1 = gimple_assign_rhs1 (g);
4179 else
4181 g = gimple_build_assign (make_ssa_name (type), NOP_EXPR, rhs1);
4182 gsi_insert_before (gsi, g, GSI_SAME_STMT);
4183 rhs1 = gimple_assign_lhs (g);
4184 mul_stmts.quick_push (g);
4186 if (TREE_CODE (rhs2) == INTEGER_CST)
4187 rhs2 = fold_convert (type, rhs2);
4188 else
4190 g = SSA_NAME_DEF_STMT (rhs2);
4191 if (gimple_assign_cast_p (g)
4192 && useless_type_conversion_p (type,
4193 TREE_TYPE (gimple_assign_rhs1 (g)))
4194 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g)))
4195 rhs2 = gimple_assign_rhs1 (g);
4196 else
4198 g = gimple_build_assign (make_ssa_name (type), NOP_EXPR, rhs2);
4199 gsi_insert_before (gsi, g, GSI_SAME_STMT);
4200 rhs2 = gimple_assign_lhs (g);
4201 mul_stmts.quick_push (g);
4205 tree ctype = build_complex_type (type);
4206 gcall *g = gimple_build_call_internal (code == MULT_EXPR
4207 ? IFN_MUL_OVERFLOW
4208 : code != MINUS_EXPR
4209 ? IFN_ADD_OVERFLOW : IFN_SUB_OVERFLOW,
4210 2, rhs1, rhs2);
4211 tree ctmp = make_ssa_name (ctype);
4212 gimple_call_set_lhs (g, ctmp);
4213 gsi_insert_before (gsi, g, GSI_SAME_STMT);
4214 tree new_lhs = (maxval || cast_stmt) ? make_ssa_name (type) : lhs;
4215 gassign *g2;
4216 if (code != BIT_NOT_EXPR)
4218 g2 = gimple_build_assign (new_lhs, REALPART_EXPR,
4219 build1 (REALPART_EXPR, type, ctmp));
4220 if (maxval || cast_stmt)
4222 gsi_insert_before (gsi, g2, GSI_SAME_STMT);
4223 if (add_first)
4224 *gsi = gsi_for_stmt (stmt);
4226 else
4227 gsi_replace (gsi, g2, true);
4228 if (code == MULT_EXPR)
4230 mul_stmts.quick_push (g);
4231 mul_stmts.quick_push (g2);
4232 if (cast_stmt)
4234 g2 = gimple_build_assign (lhs, NOP_EXPR, new_lhs);
4235 gsi_replace (gsi, g2, true);
4236 mul_stmts.quick_push (g2);
4240 tree ovf = make_ssa_name (type);
4241 g2 = gimple_build_assign (ovf, IMAGPART_EXPR,
4242 build1 (IMAGPART_EXPR, type, ctmp));
4243 if (code != BIT_NOT_EXPR)
4244 gsi_insert_after (gsi, g2, GSI_NEW_STMT);
4245 else
4246 gsi_insert_before (gsi, g2, GSI_SAME_STMT);
4247 if (code == MULT_EXPR)
4248 mul_stmts.quick_push (g2);
4250 FOR_EACH_IMM_USE_STMT (use_stmt, iter, cast_lhs ? cast_lhs : lhs)
4252 if (is_gimple_debug (use_stmt))
4253 continue;
4255 gimple *orig_use_stmt = use_stmt;
4256 int ovf_use = arith_overflow_check_p (stmt, cast_stmt, use_stmt,
4257 maxval, NULL);
4258 if (ovf_use == 0)
4260 gcc_assert (code != BIT_NOT_EXPR);
4261 if (maxval)
4263 tree use_lhs = gimple_assign_lhs (use_stmt);
4264 gimple_assign_set_rhs1 (use_stmt, new_lhs);
4265 if (useless_type_conversion_p (TREE_TYPE (use_lhs),
4266 TREE_TYPE (new_lhs)))
4267 gimple_assign_set_rhs_code (use_stmt, SSA_NAME);
4268 update_stmt (use_stmt);
4270 continue;
4272 if (gimple_code (use_stmt) == GIMPLE_COND)
4274 gcond *cond_stmt = as_a <gcond *> (use_stmt);
4275 gimple_cond_set_lhs (cond_stmt, ovf);
4276 gimple_cond_set_rhs (cond_stmt, build_int_cst (type, 0));
4277 gimple_cond_set_code (cond_stmt, ovf_use == 1 ? NE_EXPR : EQ_EXPR);
4279 else
4281 gcc_checking_assert (is_gimple_assign (use_stmt));
4282 if (gimple_assign_rhs_class (use_stmt) == GIMPLE_BINARY_RHS)
4284 gimple_assign_set_rhs1 (use_stmt, ovf);
4285 gimple_assign_set_rhs2 (use_stmt, build_int_cst (type, 0));
4286 gimple_assign_set_rhs_code (use_stmt,
4287 ovf_use == 1 ? NE_EXPR : EQ_EXPR);
4289 else
4291 gcc_checking_assert (gimple_assign_rhs_code (use_stmt)
4292 == COND_EXPR);
4293 tree cond = build2 (ovf_use == 1 ? NE_EXPR : EQ_EXPR,
4294 boolean_type_node, ovf,
4295 build_int_cst (type, 0));
4296 gimple_assign_set_rhs1 (use_stmt, cond);
4299 update_stmt (use_stmt);
4300 if (code == MULT_EXPR && use_stmt != orig_use_stmt)
4302 gimple_stmt_iterator gsi2 = gsi_for_stmt (orig_use_stmt);
4303 maybe_optimize_guarding_check (mul_stmts, use_stmt, orig_use_stmt,
4304 cfg_changed);
4305 gsi_remove (&gsi2, true);
4306 release_ssa_name (gimple_assign_lhs (orig_use_stmt));
4309 if (maxval)
4311 gimple_stmt_iterator gsi2 = gsi_for_stmt (stmt);
4312 gsi_remove (&gsi2, true);
4313 if (add_stmt)
4315 gimple *g = gimple_build_assign (gimple_assign_lhs (add_stmt),
4316 new_lhs);
4317 gsi2 = gsi_for_stmt (add_stmt);
4318 gsi_replace (&gsi2, g, true);
4321 else if (code == BIT_NOT_EXPR)
4323 *gsi = gsi_for_stmt (stmt);
4324 gsi_remove (gsi, true);
4325 release_ssa_name (lhs);
4326 return true;
4328 return false;
4331 /* Return true if target has support for divmod. */
4333 static bool
4334 target_supports_divmod_p (optab divmod_optab, optab div_optab, machine_mode mode)
4336 /* If target supports hardware divmod insn, use it for divmod. */
4337 if (optab_handler (divmod_optab, mode) != CODE_FOR_nothing)
4338 return true;
4340 /* Check if libfunc for divmod is available. */
4341 rtx libfunc = optab_libfunc (divmod_optab, mode);
4342 if (libfunc != NULL_RTX)
4344 /* If optab_handler exists for div_optab, perhaps in a wider mode,
4345 we don't want to use the libfunc even if it exists for given mode. */
4346 machine_mode div_mode;
4347 FOR_EACH_MODE_FROM (div_mode, mode)
4348 if (optab_handler (div_optab, div_mode) != CODE_FOR_nothing)
4349 return false;
4351 return targetm.expand_divmod_libfunc != NULL;
4354 return false;
4357 /* Check if stmt is candidate for divmod transform. */
4359 static bool
4360 divmod_candidate_p (gassign *stmt)
4362 tree type = TREE_TYPE (gimple_assign_lhs (stmt));
4363 machine_mode mode = TYPE_MODE (type);
4364 optab divmod_optab, div_optab;
4366 if (TYPE_UNSIGNED (type))
4368 divmod_optab = udivmod_optab;
4369 div_optab = udiv_optab;
4371 else
4373 divmod_optab = sdivmod_optab;
4374 div_optab = sdiv_optab;
4377 tree op1 = gimple_assign_rhs1 (stmt);
4378 tree op2 = gimple_assign_rhs2 (stmt);
4380 /* Disable the transform if either is a constant, since division-by-constant
4381 may have specialized expansion. */
4382 if (CONSTANT_CLASS_P (op1))
4383 return false;
4385 if (CONSTANT_CLASS_P (op2))
4387 if (integer_pow2p (op2))
4388 return false;
4390 if (TYPE_PRECISION (type) <= HOST_BITS_PER_WIDE_INT
4391 && TYPE_PRECISION (type) <= BITS_PER_WORD)
4392 return false;
4394 /* If the divisor is not power of 2 and the precision wider than
4395 HWI, expand_divmod punts on that, so in that case it is better
4396 to use divmod optab or libfunc. Similarly if choose_multiplier
4397 might need pre/post shifts of BITS_PER_WORD or more. */
4400 /* Exclude the case where TYPE_OVERFLOW_TRAPS (type) as that should
4401 expand using the [su]divv optabs. */
4402 if (TYPE_OVERFLOW_TRAPS (type))
4403 return false;
4405 if (!target_supports_divmod_p (divmod_optab, div_optab, mode))
4406 return false;
4408 return true;
4411 /* This function looks for:
4412 t1 = a TRUNC_DIV_EXPR b;
4413 t2 = a TRUNC_MOD_EXPR b;
4414 and transforms it to the following sequence:
4415 complex_tmp = DIVMOD (a, b);
4416 t1 = REALPART_EXPR(a);
4417 t2 = IMAGPART_EXPR(b);
4418 For conditions enabling the transform see divmod_candidate_p().
4420 The pass has three parts:
4421 1) Find top_stmt which is trunc_div or trunc_mod stmt and dominates all
4422 other trunc_div_expr and trunc_mod_expr stmts.
4423 2) Add top_stmt and all trunc_div and trunc_mod stmts dominated by top_stmt
4424 to stmts vector.
4425 3) Insert DIVMOD call just before top_stmt and update entries in
4426 stmts vector to use return value of DIMOVD (REALEXPR_PART for div,
4427 IMAGPART_EXPR for mod). */
4429 static bool
4430 convert_to_divmod (gassign *stmt)
4432 if (stmt_can_throw_internal (cfun, stmt)
4433 || !divmod_candidate_p (stmt))
4434 return false;
4436 tree op1 = gimple_assign_rhs1 (stmt);
4437 tree op2 = gimple_assign_rhs2 (stmt);
4439 imm_use_iterator use_iter;
4440 gimple *use_stmt;
4441 auto_vec<gimple *> stmts;
4443 gimple *top_stmt = stmt;
4444 basic_block top_bb = gimple_bb (stmt);
4446 /* Part 1: Try to set top_stmt to "topmost" stmt that dominates
4447 at-least stmt and possibly other trunc_div/trunc_mod stmts
4448 having same operands as stmt. */
4450 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, op1)
4452 if (is_gimple_assign (use_stmt)
4453 && (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR
4454 || gimple_assign_rhs_code (use_stmt) == TRUNC_MOD_EXPR)
4455 && operand_equal_p (op1, gimple_assign_rhs1 (use_stmt), 0)
4456 && operand_equal_p (op2, gimple_assign_rhs2 (use_stmt), 0))
4458 if (stmt_can_throw_internal (cfun, use_stmt))
4459 continue;
4461 basic_block bb = gimple_bb (use_stmt);
4463 if (bb == top_bb)
4465 if (gimple_uid (use_stmt) < gimple_uid (top_stmt))
4466 top_stmt = use_stmt;
4468 else if (dominated_by_p (CDI_DOMINATORS, top_bb, bb))
4470 top_bb = bb;
4471 top_stmt = use_stmt;
4476 tree top_op1 = gimple_assign_rhs1 (top_stmt);
4477 tree top_op2 = gimple_assign_rhs2 (top_stmt);
4479 stmts.safe_push (top_stmt);
4480 bool div_seen = (gimple_assign_rhs_code (top_stmt) == TRUNC_DIV_EXPR);
4482 /* Part 2: Add all trunc_div/trunc_mod statements domianted by top_bb
4483 to stmts vector. The 2nd loop will always add stmt to stmts vector, since
4484 gimple_bb (top_stmt) dominates gimple_bb (stmt), so the
4485 2nd loop ends up adding at-least single trunc_mod_expr stmt. */
4487 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, top_op1)
4489 if (is_gimple_assign (use_stmt)
4490 && (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR
4491 || gimple_assign_rhs_code (use_stmt) == TRUNC_MOD_EXPR)
4492 && operand_equal_p (top_op1, gimple_assign_rhs1 (use_stmt), 0)
4493 && operand_equal_p (top_op2, gimple_assign_rhs2 (use_stmt), 0))
4495 if (use_stmt == top_stmt
4496 || stmt_can_throw_internal (cfun, use_stmt)
4497 || !dominated_by_p (CDI_DOMINATORS, gimple_bb (use_stmt), top_bb))
4498 continue;
4500 stmts.safe_push (use_stmt);
4501 if (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR)
4502 div_seen = true;
4506 if (!div_seen)
4507 return false;
4509 /* Part 3: Create libcall to internal fn DIVMOD:
4510 divmod_tmp = DIVMOD (op1, op2). */
4512 gcall *call_stmt = gimple_build_call_internal (IFN_DIVMOD, 2, op1, op2);
4513 tree res = make_temp_ssa_name (build_complex_type (TREE_TYPE (op1)),
4514 call_stmt, "divmod_tmp");
4515 gimple_call_set_lhs (call_stmt, res);
4516 /* We rejected throwing statements above. */
4517 gimple_call_set_nothrow (call_stmt, true);
4519 /* Insert the call before top_stmt. */
4520 gimple_stmt_iterator top_stmt_gsi = gsi_for_stmt (top_stmt);
4521 gsi_insert_before (&top_stmt_gsi, call_stmt, GSI_SAME_STMT);
4523 widen_mul_stats.divmod_calls_inserted++;
4525 /* Update all statements in stmts vector:
4526 lhs = op1 TRUNC_DIV_EXPR op2 -> lhs = REALPART_EXPR<divmod_tmp>
4527 lhs = op1 TRUNC_MOD_EXPR op2 -> lhs = IMAGPART_EXPR<divmod_tmp>. */
4529 for (unsigned i = 0; stmts.iterate (i, &use_stmt); ++i)
4531 tree new_rhs;
4533 switch (gimple_assign_rhs_code (use_stmt))
4535 case TRUNC_DIV_EXPR:
4536 new_rhs = fold_build1 (REALPART_EXPR, TREE_TYPE (op1), res);
4537 break;
4539 case TRUNC_MOD_EXPR:
4540 new_rhs = fold_build1 (IMAGPART_EXPR, TREE_TYPE (op1), res);
4541 break;
4543 default:
4544 gcc_unreachable ();
4547 gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
4548 gimple_assign_set_rhs_from_tree (&gsi, new_rhs);
4549 update_stmt (use_stmt);
4552 return true;
4555 /* Process a single gimple assignment STMT, which has a RSHIFT_EXPR as
4556 its rhs, and try to convert it into a MULT_HIGHPART_EXPR. The return
4557 value is true iff we converted the statement. */
4559 static bool
4560 convert_mult_to_highpart (gassign *stmt, gimple_stmt_iterator *gsi)
4562 tree lhs = gimple_assign_lhs (stmt);
4563 tree stype = TREE_TYPE (lhs);
4564 tree sarg0 = gimple_assign_rhs1 (stmt);
4565 tree sarg1 = gimple_assign_rhs2 (stmt);
4567 if (TREE_CODE (stype) != INTEGER_TYPE
4568 || TREE_CODE (sarg1) != INTEGER_CST
4569 || TREE_CODE (sarg0) != SSA_NAME
4570 || !tree_fits_uhwi_p (sarg1)
4571 || !has_single_use (sarg0))
4572 return false;
4574 gassign *def = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (sarg0));
4575 if (!def)
4576 return false;
4578 enum tree_code mcode = gimple_assign_rhs_code (def);
4579 if (mcode == NOP_EXPR)
4581 tree tmp = gimple_assign_rhs1 (def);
4582 if (TREE_CODE (tmp) != SSA_NAME || !has_single_use (tmp))
4583 return false;
4584 def = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (tmp));
4585 if (!def)
4586 return false;
4587 mcode = gimple_assign_rhs_code (def);
4590 if (mcode != WIDEN_MULT_EXPR
4591 || gimple_bb (def) != gimple_bb (stmt))
4592 return false;
4593 tree mtype = TREE_TYPE (gimple_assign_lhs (def));
4594 if (TREE_CODE (mtype) != INTEGER_TYPE
4595 || TYPE_PRECISION (mtype) != TYPE_PRECISION (stype))
4596 return false;
4598 tree mop1 = gimple_assign_rhs1 (def);
4599 tree mop2 = gimple_assign_rhs2 (def);
4600 tree optype = TREE_TYPE (mop1);
4601 bool unsignedp = TYPE_UNSIGNED (optype);
4602 unsigned int prec = TYPE_PRECISION (optype);
4604 if (unsignedp != TYPE_UNSIGNED (mtype)
4605 || TYPE_PRECISION (mtype) != 2 * prec)
4606 return false;
4608 unsigned HOST_WIDE_INT bits = tree_to_uhwi (sarg1);
4609 if (bits < prec || bits >= 2 * prec)
4610 return false;
4612 /* For the time being, require operands to have the same sign. */
4613 if (unsignedp != TYPE_UNSIGNED (TREE_TYPE (mop2)))
4614 return false;
4616 machine_mode mode = TYPE_MODE (optype);
4617 optab tab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
4618 if (optab_handler (tab, mode) == CODE_FOR_nothing)
4619 return false;
4621 location_t loc = gimple_location (stmt);
4622 tree highpart1 = build_and_insert_binop (gsi, loc, "highparttmp",
4623 MULT_HIGHPART_EXPR, mop1, mop2);
4624 tree highpart2 = highpart1;
4625 tree ntype = optype;
4627 if (TYPE_UNSIGNED (stype) != TYPE_UNSIGNED (optype))
4629 ntype = TYPE_UNSIGNED (stype) ? unsigned_type_for (optype)
4630 : signed_type_for (optype);
4631 highpart2 = build_and_insert_cast (gsi, loc, ntype, highpart1);
4633 if (bits > prec)
4634 highpart2 = build_and_insert_binop (gsi, loc, "highparttmp",
4635 RSHIFT_EXPR, highpart2,
4636 build_int_cst (ntype, bits - prec));
4638 gassign *new_stmt = gimple_build_assign (lhs, NOP_EXPR, highpart2);
4639 gsi_replace (gsi, new_stmt, true);
4641 widen_mul_stats.highpart_mults_inserted++;
4642 return true;
4645 /* If target has spaceship<MODE>3 expander, pattern recognize
4646 <bb 2> [local count: 1073741824]:
4647 if (a_2(D) == b_3(D))
4648 goto <bb 6>; [34.00%]
4649 else
4650 goto <bb 3>; [66.00%]
4652 <bb 3> [local count: 708669601]:
4653 if (a_2(D) < b_3(D))
4654 goto <bb 6>; [1.04%]
4655 else
4656 goto <bb 4>; [98.96%]
4658 <bb 4> [local count: 701299439]:
4659 if (a_2(D) > b_3(D))
4660 goto <bb 5>; [48.89%]
4661 else
4662 goto <bb 6>; [51.11%]
4664 <bb 5> [local count: 342865295]:
4666 <bb 6> [local count: 1073741824]:
4667 and turn it into:
4668 <bb 2> [local count: 1073741824]:
4669 _1 = .SPACESHIP (a_2(D), b_3(D));
4670 if (_1 == 0)
4671 goto <bb 6>; [34.00%]
4672 else
4673 goto <bb 3>; [66.00%]
4675 <bb 3> [local count: 708669601]:
4676 if (_1 == -1)
4677 goto <bb 6>; [1.04%]
4678 else
4679 goto <bb 4>; [98.96%]
4681 <bb 4> [local count: 701299439]:
4682 if (_1 == 1)
4683 goto <bb 5>; [48.89%]
4684 else
4685 goto <bb 6>; [51.11%]
4687 <bb 5> [local count: 342865295]:
4689 <bb 6> [local count: 1073741824]:
4690 so that the backend can emit optimal comparison and
4691 conditional jump sequence. */
4693 static void
4694 optimize_spaceship (gimple *stmt)
4696 enum tree_code code = gimple_cond_code (stmt);
4697 if (code != EQ_EXPR && code != NE_EXPR)
4698 return;
4699 tree arg1 = gimple_cond_lhs (stmt);
4700 tree arg2 = gimple_cond_rhs (stmt);
4701 if (!SCALAR_FLOAT_TYPE_P (TREE_TYPE (arg1))
4702 || optab_handler (spaceship_optab,
4703 TYPE_MODE (TREE_TYPE (arg1))) == CODE_FOR_nothing
4704 || operand_equal_p (arg1, arg2, 0))
4705 return;
4707 basic_block bb0 = gimple_bb (stmt), bb1, bb2 = NULL;
4708 edge em1 = NULL, e1 = NULL, e2 = NULL;
4709 bb1 = EDGE_SUCC (bb0, 1)->dest;
4710 if (((EDGE_SUCC (bb0, 0)->flags & EDGE_TRUE_VALUE) != 0) ^ (code == EQ_EXPR))
4711 bb1 = EDGE_SUCC (bb0, 0)->dest;
4713 gimple *g = last_stmt (bb1);
4714 if (g == NULL
4715 || gimple_code (g) != GIMPLE_COND
4716 || !single_pred_p (bb1)
4717 || (operand_equal_p (gimple_cond_lhs (g), arg1, 0)
4718 ? !operand_equal_p (gimple_cond_rhs (g), arg2, 0)
4719 : (!operand_equal_p (gimple_cond_lhs (g), arg2, 0)
4720 || !operand_equal_p (gimple_cond_rhs (g), arg1, 0)))
4721 || !cond_only_block_p (bb1))
4722 return;
4724 enum tree_code ccode = (operand_equal_p (gimple_cond_lhs (g), arg1, 0)
4725 ? LT_EXPR : GT_EXPR);
4726 switch (gimple_cond_code (g))
4728 case LT_EXPR:
4729 case LE_EXPR:
4730 break;
4731 case GT_EXPR:
4732 case GE_EXPR:
4733 ccode = ccode == LT_EXPR ? GT_EXPR : LT_EXPR;
4734 break;
4735 default:
4736 return;
4739 for (int i = 0; i < 2; ++i)
4741 /* With NaNs, </<=/>/>= are false, so we need to look for the
4742 third comparison on the false edge from whatever non-equality
4743 comparison the second comparison is. */
4744 if (HONOR_NANS (TREE_TYPE (arg1))
4745 && (EDGE_SUCC (bb1, i)->flags & EDGE_TRUE_VALUE) != 0)
4746 continue;
4748 bb2 = EDGE_SUCC (bb1, i)->dest;
4749 g = last_stmt (bb2);
4750 if (g == NULL
4751 || gimple_code (g) != GIMPLE_COND
4752 || !single_pred_p (bb2)
4753 || (operand_equal_p (gimple_cond_lhs (g), arg1, 0)
4754 ? !operand_equal_p (gimple_cond_rhs (g), arg2, 0)
4755 : (!operand_equal_p (gimple_cond_lhs (g), arg2, 0)
4756 || !operand_equal_p (gimple_cond_rhs (g), arg1, 0)))
4757 || !cond_only_block_p (bb2)
4758 || EDGE_SUCC (bb2, 0)->dest == EDGE_SUCC (bb2, 1)->dest)
4759 continue;
4761 enum tree_code ccode2
4762 = (operand_equal_p (gimple_cond_lhs (g), arg1, 0) ? LT_EXPR : GT_EXPR);
4763 switch (gimple_cond_code (g))
4765 case LT_EXPR:
4766 case LE_EXPR:
4767 break;
4768 case GT_EXPR:
4769 case GE_EXPR:
4770 ccode2 = ccode2 == LT_EXPR ? GT_EXPR : LT_EXPR;
4771 break;
4772 default:
4773 continue;
4775 if (HONOR_NANS (TREE_TYPE (arg1)) && ccode == ccode2)
4776 continue;
4778 if ((ccode == LT_EXPR)
4779 ^ ((EDGE_SUCC (bb1, i)->flags & EDGE_TRUE_VALUE) != 0))
4781 em1 = EDGE_SUCC (bb1, 1 - i);
4782 e1 = EDGE_SUCC (bb2, 0);
4783 e2 = EDGE_SUCC (bb2, 1);
4784 if ((ccode2 == LT_EXPR) ^ ((e1->flags & EDGE_TRUE_VALUE) == 0))
4785 std::swap (e1, e2);
4787 else
4789 e1 = EDGE_SUCC (bb1, 1 - i);
4790 em1 = EDGE_SUCC (bb2, 0);
4791 e2 = EDGE_SUCC (bb2, 1);
4792 if ((ccode2 != LT_EXPR) ^ ((em1->flags & EDGE_TRUE_VALUE) == 0))
4793 std::swap (em1, e2);
4795 break;
4798 if (em1 == NULL)
4800 if ((ccode == LT_EXPR)
4801 ^ ((EDGE_SUCC (bb1, 0)->flags & EDGE_TRUE_VALUE) != 0))
4803 em1 = EDGE_SUCC (bb1, 1);
4804 e1 = EDGE_SUCC (bb1, 0);
4805 e2 = (e1->flags & EDGE_TRUE_VALUE) ? em1 : e1;
4807 else
4809 em1 = EDGE_SUCC (bb1, 0);
4810 e1 = EDGE_SUCC (bb1, 1);
4811 e2 = (e1->flags & EDGE_TRUE_VALUE) ? em1 : e1;
4815 g = gimple_build_call_internal (IFN_SPACESHIP, 2, arg1, arg2);
4816 tree lhs = make_ssa_name (integer_type_node);
4817 gimple_call_set_lhs (g, lhs);
4818 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4819 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4821 gcond *cond = as_a <gcond *> (stmt);
4822 gimple_cond_set_lhs (cond, lhs);
4823 gimple_cond_set_rhs (cond, integer_zero_node);
4824 update_stmt (stmt);
4826 g = last_stmt (bb1);
4827 cond = as_a <gcond *> (g);
4828 gimple_cond_set_lhs (cond, lhs);
4829 if (em1->src == bb1 && e2 != em1)
4831 gimple_cond_set_rhs (cond, integer_minus_one_node);
4832 gimple_cond_set_code (cond, (em1->flags & EDGE_TRUE_VALUE)
4833 ? EQ_EXPR : NE_EXPR);
4835 else
4837 gcc_assert (e1->src == bb1 && e2 != e1);
4838 gimple_cond_set_rhs (cond, integer_one_node);
4839 gimple_cond_set_code (cond, (e1->flags & EDGE_TRUE_VALUE)
4840 ? EQ_EXPR : NE_EXPR);
4842 update_stmt (g);
4844 if (e2 != e1 && e2 != em1)
4846 g = last_stmt (bb2);
4847 cond = as_a <gcond *> (g);
4848 gimple_cond_set_lhs (cond, lhs);
4849 if (em1->src == bb2)
4850 gimple_cond_set_rhs (cond, integer_minus_one_node);
4851 else
4853 gcc_assert (e1->src == bb2);
4854 gimple_cond_set_rhs (cond, integer_one_node);
4856 gimple_cond_set_code (cond,
4857 (e2->flags & EDGE_TRUE_VALUE) ? NE_EXPR : EQ_EXPR);
4858 update_stmt (g);
4861 wide_int wm1 = wi::minus_one (TYPE_PRECISION (integer_type_node));
4862 wide_int w2 = wi::two (TYPE_PRECISION (integer_type_node));
4863 value_range vr (TREE_TYPE (lhs), wm1, w2);
4864 set_range_info (lhs, vr);
4868 /* Find integer multiplications where the operands are extended from
4869 smaller types, and replace the MULT_EXPR with a WIDEN_MULT_EXPR
4870 or MULT_HIGHPART_EXPR where appropriate. */
4872 namespace {
4874 const pass_data pass_data_optimize_widening_mul =
4876 GIMPLE_PASS, /* type */
4877 "widening_mul", /* name */
4878 OPTGROUP_NONE, /* optinfo_flags */
4879 TV_TREE_WIDEN_MUL, /* tv_id */
4880 PROP_ssa, /* properties_required */
4881 0, /* properties_provided */
4882 0, /* properties_destroyed */
4883 0, /* todo_flags_start */
4884 TODO_update_ssa, /* todo_flags_finish */
4887 class pass_optimize_widening_mul : public gimple_opt_pass
4889 public:
4890 pass_optimize_widening_mul (gcc::context *ctxt)
4891 : gimple_opt_pass (pass_data_optimize_widening_mul, ctxt)
4894 /* opt_pass methods: */
4895 virtual bool gate (function *)
4897 return flag_expensive_optimizations && optimize;
4900 virtual unsigned int execute (function *);
4902 }; // class pass_optimize_widening_mul
4904 /* Walker class to perform the transformation in reverse dominance order. */
4906 class math_opts_dom_walker : public dom_walker
4908 public:
4909 /* Constructor, CFG_CHANGED is a pointer to a boolean flag that will be set
4910 if walking modidifes the CFG. */
4912 math_opts_dom_walker (bool *cfg_changed_p)
4913 : dom_walker (CDI_DOMINATORS), m_last_result_set (),
4914 m_cfg_changed_p (cfg_changed_p) {}
4916 /* The actual actions performed in the walk. */
4918 virtual void after_dom_children (basic_block);
4920 /* Set of results of chains of multiply and add statement combinations that
4921 were not transformed into FMAs because of active deferring. */
4922 hash_set<tree> m_last_result_set;
4924 /* Pointer to a flag of the user that needs to be set if CFG has been
4925 modified. */
4926 bool *m_cfg_changed_p;
4929 void
4930 math_opts_dom_walker::after_dom_children (basic_block bb)
4932 gimple_stmt_iterator gsi;
4934 fma_deferring_state fma_state (param_avoid_fma_max_bits > 0);
4936 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
4938 gimple *stmt = gsi_stmt (gsi);
4939 enum tree_code code;
4941 if (is_gimple_assign (stmt))
4943 code = gimple_assign_rhs_code (stmt);
4944 switch (code)
4946 case MULT_EXPR:
4947 if (!convert_mult_to_widen (stmt, &gsi)
4948 && !convert_expand_mult_copysign (stmt, &gsi)
4949 && convert_mult_to_fma (stmt,
4950 gimple_assign_rhs1 (stmt),
4951 gimple_assign_rhs2 (stmt),
4952 &fma_state))
4954 gsi_remove (&gsi, true);
4955 release_defs (stmt);
4956 continue;
4958 match_arith_overflow (&gsi, stmt, code, m_cfg_changed_p);
4959 break;
4961 case PLUS_EXPR:
4962 case MINUS_EXPR:
4963 if (!convert_plusminus_to_widen (&gsi, stmt, code))
4964 match_arith_overflow (&gsi, stmt, code, m_cfg_changed_p);
4965 break;
4967 case BIT_NOT_EXPR:
4968 if (match_arith_overflow (&gsi, stmt, code, m_cfg_changed_p))
4969 continue;
4970 break;
4972 case TRUNC_MOD_EXPR:
4973 convert_to_divmod (as_a<gassign *> (stmt));
4974 break;
4976 case RSHIFT_EXPR:
4977 convert_mult_to_highpart (as_a<gassign *> (stmt), &gsi);
4978 break;
4980 default:;
4983 else if (is_gimple_call (stmt))
4985 switch (gimple_call_combined_fn (stmt))
4987 CASE_CFN_POW:
4988 if (gimple_call_lhs (stmt)
4989 && TREE_CODE (gimple_call_arg (stmt, 1)) == REAL_CST
4990 && real_equal (&TREE_REAL_CST (gimple_call_arg (stmt, 1)),
4991 &dconst2)
4992 && convert_mult_to_fma (stmt,
4993 gimple_call_arg (stmt, 0),
4994 gimple_call_arg (stmt, 0),
4995 &fma_state))
4997 unlink_stmt_vdef (stmt);
4998 if (gsi_remove (&gsi, true)
4999 && gimple_purge_dead_eh_edges (bb))
5000 *m_cfg_changed_p = true;
5001 release_defs (stmt);
5002 continue;
5004 break;
5006 case CFN_COND_MUL:
5007 if (convert_mult_to_fma (stmt,
5008 gimple_call_arg (stmt, 1),
5009 gimple_call_arg (stmt, 2),
5010 &fma_state,
5011 gimple_call_arg (stmt, 0)))
5014 gsi_remove (&gsi, true);
5015 release_defs (stmt);
5016 continue;
5018 break;
5020 case CFN_LAST:
5021 cancel_fma_deferring (&fma_state);
5022 break;
5024 default:
5025 break;
5028 else if (gimple_code (stmt) == GIMPLE_COND)
5029 optimize_spaceship (stmt);
5030 gsi_next (&gsi);
5032 if (fma_state.m_deferring_p
5033 && fma_state.m_initial_phi)
5035 gcc_checking_assert (fma_state.m_last_result);
5036 if (!last_fma_candidate_feeds_initial_phi (&fma_state,
5037 &m_last_result_set))
5038 cancel_fma_deferring (&fma_state);
5039 else
5040 m_last_result_set.add (fma_state.m_last_result);
5045 unsigned int
5046 pass_optimize_widening_mul::execute (function *fun)
5048 bool cfg_changed = false;
5050 memset (&widen_mul_stats, 0, sizeof (widen_mul_stats));
5051 calculate_dominance_info (CDI_DOMINATORS);
5052 renumber_gimple_stmt_uids (cfun);
5054 math_opts_dom_walker (&cfg_changed).walk (ENTRY_BLOCK_PTR_FOR_FN (cfun));
5056 statistics_counter_event (fun, "widening multiplications inserted",
5057 widen_mul_stats.widen_mults_inserted);
5058 statistics_counter_event (fun, "widening maccs inserted",
5059 widen_mul_stats.maccs_inserted);
5060 statistics_counter_event (fun, "fused multiply-adds inserted",
5061 widen_mul_stats.fmas_inserted);
5062 statistics_counter_event (fun, "divmod calls inserted",
5063 widen_mul_stats.divmod_calls_inserted);
5064 statistics_counter_event (fun, "highpart multiplications inserted",
5065 widen_mul_stats.highpart_mults_inserted);
5067 return cfg_changed ? TODO_cleanup_cfg : 0;
5070 } // anon namespace
5072 gimple_opt_pass *
5073 make_pass_optimize_widening_mul (gcc::context *ctxt)
5075 return new pass_optimize_widening_mul (ctxt);