* doc/generic.texi (ANNOTATE_EXPR): Document 3rd operand.
[official-gcc.git] / gcc / tree-ssa-math-opts.c
blob07030439c7add3d26272571201ce6f32ed516aa1
1 /* Global, SSA-based optimizations using mathematical identities.
2 Copyright (C) 2005-2017 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* Currently, the only mini-pass in this file tries to CSE reciprocal
21 operations. These are common in sequences such as this one:
23 modulus = sqrt(x*x + y*y + z*z);
24 x = x / modulus;
25 y = y / modulus;
26 z = z / modulus;
28 that can be optimized to
30 modulus = sqrt(x*x + y*y + z*z);
31 rmodulus = 1.0 / modulus;
32 x = x * rmodulus;
33 y = y * rmodulus;
34 z = z * rmodulus;
36 We do this for loop invariant divisors, and with this pass whenever
37 we notice that a division has the same divisor multiple times.
39 Of course, like in PRE, we don't insert a division if a dominator
40 already has one. However, this cannot be done as an extension of
41 PRE for several reasons.
43 First of all, with some experiments it was found out that the
44 transformation is not always useful if there are only two divisions
45 by the same divisor. This is probably because modern processors
46 can pipeline the divisions; on older, in-order processors it should
47 still be effective to optimize two divisions by the same number.
48 We make this a param, and it shall be called N in the remainder of
49 this comment.
51 Second, if trapping math is active, we have less freedom on where
52 to insert divisions: we can only do so in basic blocks that already
53 contain one. (If divisions don't trap, instead, we can insert
54 divisions elsewhere, which will be in blocks that are common dominators
55 of those that have the division).
57 We really don't want to compute the reciprocal unless a division will
58 be found. To do this, we won't insert the division in a basic block
59 that has less than N divisions *post-dominating* it.
61 The algorithm constructs a subset of the dominator tree, holding the
62 blocks containing the divisions and the common dominators to them,
63 and walk it twice. The first walk is in post-order, and it annotates
64 each block with the number of divisions that post-dominate it: this
65 gives information on where divisions can be inserted profitably.
66 The second walk is in pre-order, and it inserts divisions as explained
67 above, and replaces divisions by multiplications.
69 In the best case, the cost of the pass is O(n_statements). In the
70 worst-case, the cost is due to creating the dominator tree subset,
71 with a cost of O(n_basic_blocks ^ 2); however this can only happen
72 for n_statements / n_basic_blocks statements. So, the amortized cost
73 of creating the dominator tree subset is O(n_basic_blocks) and the
74 worst-case cost of the pass is O(n_statements * n_basic_blocks).
76 More practically, the cost will be small because there are few
77 divisions, and they tend to be in the same basic block, so insert_bb
78 is called very few times.
80 If we did this using domwalk.c, an efficient implementation would have
81 to work on all the variables in a single pass, because we could not
82 work on just a subset of the dominator tree, as we do now, and the
83 cost would also be something like O(n_statements * n_basic_blocks).
84 The data structures would be more complex in order to work on all the
85 variables in a single pass. */
87 #include "config.h"
88 #include "system.h"
89 #include "coretypes.h"
90 #include "backend.h"
91 #include "target.h"
92 #include "rtl.h"
93 #include "tree.h"
94 #include "gimple.h"
95 #include "predict.h"
96 #include "alloc-pool.h"
97 #include "tree-pass.h"
98 #include "ssa.h"
99 #include "optabs-tree.h"
100 #include "gimple-pretty-print.h"
101 #include "alias.h"
102 #include "fold-const.h"
103 #include "gimple-fold.h"
104 #include "gimple-iterator.h"
105 #include "gimplify.h"
106 #include "gimplify-me.h"
107 #include "stor-layout.h"
108 #include "tree-cfg.h"
109 #include "tree-dfa.h"
110 #include "tree-ssa.h"
111 #include "builtins.h"
112 #include "params.h"
113 #include "internal-fn.h"
114 #include "case-cfn-macros.h"
115 #include "optabs-libfuncs.h"
116 #include "tree-eh.h"
117 #include "targhooks.h"
119 /* This structure represents one basic block that either computes a
120 division, or is a common dominator for basic block that compute a
121 division. */
122 struct occurrence {
123 /* The basic block represented by this structure. */
124 basic_block bb;
126 /* If non-NULL, the SSA_NAME holding the definition for a reciprocal
127 inserted in BB. */
128 tree recip_def;
130 /* If non-NULL, the GIMPLE_ASSIGN for a reciprocal computation that
131 was inserted in BB. */
132 gimple *recip_def_stmt;
134 /* Pointer to a list of "struct occurrence"s for blocks dominated
135 by BB. */
136 struct occurrence *children;
138 /* Pointer to the next "struct occurrence"s in the list of blocks
139 sharing a common dominator. */
140 struct occurrence *next;
142 /* The number of divisions that are in BB before compute_merit. The
143 number of divisions that are in BB or post-dominate it after
144 compute_merit. */
145 int num_divisions;
147 /* True if the basic block has a division, false if it is a common
148 dominator for basic blocks that do. If it is false and trapping
149 math is active, BB is not a candidate for inserting a reciprocal. */
150 bool bb_has_division;
153 static struct
155 /* Number of 1.0/X ops inserted. */
156 int rdivs_inserted;
158 /* Number of 1.0/FUNC ops inserted. */
159 int rfuncs_inserted;
160 } reciprocal_stats;
162 static struct
164 /* Number of cexpi calls inserted. */
165 int inserted;
166 } sincos_stats;
168 static struct
170 /* Number of widening multiplication ops inserted. */
171 int widen_mults_inserted;
173 /* Number of integer multiply-and-accumulate ops inserted. */
174 int maccs_inserted;
176 /* Number of fp fused multiply-add ops inserted. */
177 int fmas_inserted;
179 /* Number of divmod calls inserted. */
180 int divmod_calls_inserted;
181 } widen_mul_stats;
183 /* The instance of "struct occurrence" representing the highest
184 interesting block in the dominator tree. */
185 static struct occurrence *occ_head;
187 /* Allocation pool for getting instances of "struct occurrence". */
188 static object_allocator<occurrence> *occ_pool;
192 /* Allocate and return a new struct occurrence for basic block BB, and
193 whose children list is headed by CHILDREN. */
194 static struct occurrence *
195 occ_new (basic_block bb, struct occurrence *children)
197 struct occurrence *occ;
199 bb->aux = occ = occ_pool->allocate ();
200 memset (occ, 0, sizeof (struct occurrence));
202 occ->bb = bb;
203 occ->children = children;
204 return occ;
208 /* Insert NEW_OCC into our subset of the dominator tree. P_HEAD points to a
209 list of "struct occurrence"s, one per basic block, having IDOM as
210 their common dominator.
212 We try to insert NEW_OCC as deep as possible in the tree, and we also
213 insert any other block that is a common dominator for BB and one
214 block already in the tree. */
216 static void
217 insert_bb (struct occurrence *new_occ, basic_block idom,
218 struct occurrence **p_head)
220 struct occurrence *occ, **p_occ;
222 for (p_occ = p_head; (occ = *p_occ) != NULL; )
224 basic_block bb = new_occ->bb, occ_bb = occ->bb;
225 basic_block dom = nearest_common_dominator (CDI_DOMINATORS, occ_bb, bb);
226 if (dom == bb)
228 /* BB dominates OCC_BB. OCC becomes NEW_OCC's child: remove OCC
229 from its list. */
230 *p_occ = occ->next;
231 occ->next = new_occ->children;
232 new_occ->children = occ;
234 /* Try the next block (it may as well be dominated by BB). */
237 else if (dom == occ_bb)
239 /* OCC_BB dominates BB. Tail recurse to look deeper. */
240 insert_bb (new_occ, dom, &occ->children);
241 return;
244 else if (dom != idom)
246 gcc_assert (!dom->aux);
248 /* There is a dominator between IDOM and BB, add it and make
249 two children out of NEW_OCC and OCC. First, remove OCC from
250 its list. */
251 *p_occ = occ->next;
252 new_occ->next = occ;
253 occ->next = NULL;
255 /* None of the previous blocks has DOM as a dominator: if we tail
256 recursed, we would reexamine them uselessly. Just switch BB with
257 DOM, and go on looking for blocks dominated by DOM. */
258 new_occ = occ_new (dom, new_occ);
261 else
263 /* Nothing special, go on with the next element. */
264 p_occ = &occ->next;
268 /* No place was found as a child of IDOM. Make BB a sibling of IDOM. */
269 new_occ->next = *p_head;
270 *p_head = new_occ;
273 /* Register that we found a division in BB. */
275 static inline void
276 register_division_in (basic_block bb)
278 struct occurrence *occ;
280 occ = (struct occurrence *) bb->aux;
281 if (!occ)
283 occ = occ_new (bb, NULL);
284 insert_bb (occ, ENTRY_BLOCK_PTR_FOR_FN (cfun), &occ_head);
287 occ->bb_has_division = true;
288 occ->num_divisions++;
292 /* Compute the number of divisions that postdominate each block in OCC and
293 its children. */
295 static void
296 compute_merit (struct occurrence *occ)
298 struct occurrence *occ_child;
299 basic_block dom = occ->bb;
301 for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
303 basic_block bb;
304 if (occ_child->children)
305 compute_merit (occ_child);
307 if (flag_exceptions)
308 bb = single_noncomplex_succ (dom);
309 else
310 bb = dom;
312 if (dominated_by_p (CDI_POST_DOMINATORS, bb, occ_child->bb))
313 occ->num_divisions += occ_child->num_divisions;
318 /* Return whether USE_STMT is a floating-point division by DEF. */
319 static inline bool
320 is_division_by (gimple *use_stmt, tree def)
322 return is_gimple_assign (use_stmt)
323 && gimple_assign_rhs_code (use_stmt) == RDIV_EXPR
324 && gimple_assign_rhs2 (use_stmt) == def
325 /* Do not recognize x / x as valid division, as we are getting
326 confused later by replacing all immediate uses x in such
327 a stmt. */
328 && gimple_assign_rhs1 (use_stmt) != def;
331 /* Walk the subset of the dominator tree rooted at OCC, setting the
332 RECIP_DEF field to a definition of 1.0 / DEF that can be used in
333 the given basic block. The field may be left NULL, of course,
334 if it is not possible or profitable to do the optimization.
336 DEF_BSI is an iterator pointing at the statement defining DEF.
337 If RECIP_DEF is set, a dominator already has a computation that can
338 be used. */
340 static void
341 insert_reciprocals (gimple_stmt_iterator *def_gsi, struct occurrence *occ,
342 tree def, tree recip_def, int threshold)
344 tree type;
345 gassign *new_stmt;
346 gimple_stmt_iterator gsi;
347 struct occurrence *occ_child;
349 if (!recip_def
350 && (occ->bb_has_division || !flag_trapping_math)
351 && occ->num_divisions >= threshold)
353 /* Make a variable with the replacement and substitute it. */
354 type = TREE_TYPE (def);
355 recip_def = create_tmp_reg (type, "reciptmp");
356 new_stmt = gimple_build_assign (recip_def, RDIV_EXPR,
357 build_one_cst (type), def);
359 if (occ->bb_has_division)
361 /* Case 1: insert before an existing division. */
362 gsi = gsi_after_labels (occ->bb);
363 while (!gsi_end_p (gsi) && !is_division_by (gsi_stmt (gsi), def))
364 gsi_next (&gsi);
366 gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
368 else if (def_gsi && occ->bb == def_gsi->bb)
370 /* Case 2: insert right after the definition. Note that this will
371 never happen if the definition statement can throw, because in
372 that case the sole successor of the statement's basic block will
373 dominate all the uses as well. */
374 gsi_insert_after (def_gsi, new_stmt, GSI_NEW_STMT);
376 else
378 /* Case 3: insert in a basic block not containing defs/uses. */
379 gsi = gsi_after_labels (occ->bb);
380 gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
383 reciprocal_stats.rdivs_inserted++;
385 occ->recip_def_stmt = new_stmt;
388 occ->recip_def = recip_def;
389 for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
390 insert_reciprocals (def_gsi, occ_child, def, recip_def, threshold);
394 /* Replace the division at USE_P with a multiplication by the reciprocal, if
395 possible. */
397 static inline void
398 replace_reciprocal (use_operand_p use_p)
400 gimple *use_stmt = USE_STMT (use_p);
401 basic_block bb = gimple_bb (use_stmt);
402 struct occurrence *occ = (struct occurrence *) bb->aux;
404 if (optimize_bb_for_speed_p (bb)
405 && occ->recip_def && use_stmt != occ->recip_def_stmt)
407 gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
408 gimple_assign_set_rhs_code (use_stmt, MULT_EXPR);
409 SET_USE (use_p, occ->recip_def);
410 fold_stmt_inplace (&gsi);
411 update_stmt (use_stmt);
416 /* Free OCC and return one more "struct occurrence" to be freed. */
418 static struct occurrence *
419 free_bb (struct occurrence *occ)
421 struct occurrence *child, *next;
423 /* First get the two pointers hanging off OCC. */
424 next = occ->next;
425 child = occ->children;
426 occ->bb->aux = NULL;
427 occ_pool->remove (occ);
429 /* Now ensure that we don't recurse unless it is necessary. */
430 if (!child)
431 return next;
432 else
434 while (next)
435 next = free_bb (next);
437 return child;
442 /* Look for floating-point divisions among DEF's uses, and try to
443 replace them by multiplications with the reciprocal. Add
444 as many statements computing the reciprocal as needed.
446 DEF must be a GIMPLE register of a floating-point type. */
448 static void
449 execute_cse_reciprocals_1 (gimple_stmt_iterator *def_gsi, tree def)
451 use_operand_p use_p;
452 imm_use_iterator use_iter;
453 struct occurrence *occ;
454 int count = 0, threshold;
456 gcc_assert (FLOAT_TYPE_P (TREE_TYPE (def)) && is_gimple_reg (def));
458 FOR_EACH_IMM_USE_FAST (use_p, use_iter, def)
460 gimple *use_stmt = USE_STMT (use_p);
461 if (is_division_by (use_stmt, def))
463 register_division_in (gimple_bb (use_stmt));
464 count++;
468 /* Do the expensive part only if we can hope to optimize something. */
469 threshold = targetm.min_divisions_for_recip_mul (TYPE_MODE (TREE_TYPE (def)));
470 if (count >= threshold)
472 gimple *use_stmt;
473 for (occ = occ_head; occ; occ = occ->next)
475 compute_merit (occ);
476 insert_reciprocals (def_gsi, occ, def, NULL, threshold);
479 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, def)
481 if (is_division_by (use_stmt, def))
483 FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter)
484 replace_reciprocal (use_p);
489 for (occ = occ_head; occ; )
490 occ = free_bb (occ);
492 occ_head = NULL;
495 /* Return an internal function that implements the reciprocal of CALL,
496 or IFN_LAST if there is no such function that the target supports. */
498 internal_fn
499 internal_fn_reciprocal (gcall *call)
501 internal_fn ifn;
503 switch (gimple_call_combined_fn (call))
505 CASE_CFN_SQRT:
506 CASE_CFN_SQRT_FN:
507 ifn = IFN_RSQRT;
508 break;
510 default:
511 return IFN_LAST;
514 tree_pair types = direct_internal_fn_types (ifn, call);
515 if (!direct_internal_fn_supported_p (ifn, types, OPTIMIZE_FOR_SPEED))
516 return IFN_LAST;
518 return ifn;
521 /* Go through all the floating-point SSA_NAMEs, and call
522 execute_cse_reciprocals_1 on each of them. */
523 namespace {
525 const pass_data pass_data_cse_reciprocals =
527 GIMPLE_PASS, /* type */
528 "recip", /* name */
529 OPTGROUP_NONE, /* optinfo_flags */
530 TV_NONE, /* tv_id */
531 PROP_ssa, /* properties_required */
532 0, /* properties_provided */
533 0, /* properties_destroyed */
534 0, /* todo_flags_start */
535 TODO_update_ssa, /* todo_flags_finish */
538 class pass_cse_reciprocals : public gimple_opt_pass
540 public:
541 pass_cse_reciprocals (gcc::context *ctxt)
542 : gimple_opt_pass (pass_data_cse_reciprocals, ctxt)
545 /* opt_pass methods: */
546 virtual bool gate (function *) { return optimize && flag_reciprocal_math; }
547 virtual unsigned int execute (function *);
549 }; // class pass_cse_reciprocals
551 unsigned int
552 pass_cse_reciprocals::execute (function *fun)
554 basic_block bb;
555 tree arg;
557 occ_pool = new object_allocator<occurrence> ("dominators for recip");
559 memset (&reciprocal_stats, 0, sizeof (reciprocal_stats));
560 calculate_dominance_info (CDI_DOMINATORS);
561 calculate_dominance_info (CDI_POST_DOMINATORS);
563 if (flag_checking)
564 FOR_EACH_BB_FN (bb, fun)
565 gcc_assert (!bb->aux);
567 for (arg = DECL_ARGUMENTS (fun->decl); arg; arg = DECL_CHAIN (arg))
568 if (FLOAT_TYPE_P (TREE_TYPE (arg))
569 && is_gimple_reg (arg))
571 tree name = ssa_default_def (fun, arg);
572 if (name)
573 execute_cse_reciprocals_1 (NULL, name);
576 FOR_EACH_BB_FN (bb, fun)
578 tree def;
580 for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
581 gsi_next (&gsi))
583 gphi *phi = gsi.phi ();
584 def = PHI_RESULT (phi);
585 if (! virtual_operand_p (def)
586 && FLOAT_TYPE_P (TREE_TYPE (def)))
587 execute_cse_reciprocals_1 (NULL, def);
590 for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (gsi);
591 gsi_next (&gsi))
593 gimple *stmt = gsi_stmt (gsi);
595 if (gimple_has_lhs (stmt)
596 && (def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF)) != NULL
597 && FLOAT_TYPE_P (TREE_TYPE (def))
598 && TREE_CODE (def) == SSA_NAME)
599 execute_cse_reciprocals_1 (&gsi, def);
602 if (optimize_bb_for_size_p (bb))
603 continue;
605 /* Scan for a/func(b) and convert it to reciprocal a*rfunc(b). */
606 for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (gsi);
607 gsi_next (&gsi))
609 gimple *stmt = gsi_stmt (gsi);
611 if (is_gimple_assign (stmt)
612 && gimple_assign_rhs_code (stmt) == RDIV_EXPR)
614 tree arg1 = gimple_assign_rhs2 (stmt);
615 gimple *stmt1;
617 if (TREE_CODE (arg1) != SSA_NAME)
618 continue;
620 stmt1 = SSA_NAME_DEF_STMT (arg1);
622 if (is_gimple_call (stmt1)
623 && gimple_call_lhs (stmt1))
625 bool fail;
626 imm_use_iterator ui;
627 use_operand_p use_p;
628 tree fndecl = NULL_TREE;
630 gcall *call = as_a <gcall *> (stmt1);
631 internal_fn ifn = internal_fn_reciprocal (call);
632 if (ifn == IFN_LAST)
634 fndecl = gimple_call_fndecl (call);
635 if (!fndecl
636 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_MD)
637 continue;
638 fndecl = targetm.builtin_reciprocal (fndecl);
639 if (!fndecl)
640 continue;
643 /* Check that all uses of the SSA name are divisions,
644 otherwise replacing the defining statement will do
645 the wrong thing. */
646 fail = false;
647 FOR_EACH_IMM_USE_FAST (use_p, ui, arg1)
649 gimple *stmt2 = USE_STMT (use_p);
650 if (is_gimple_debug (stmt2))
651 continue;
652 if (!is_gimple_assign (stmt2)
653 || gimple_assign_rhs_code (stmt2) != RDIV_EXPR
654 || gimple_assign_rhs1 (stmt2) == arg1
655 || gimple_assign_rhs2 (stmt2) != arg1)
657 fail = true;
658 break;
661 if (fail)
662 continue;
664 gimple_replace_ssa_lhs (call, arg1);
665 if (gimple_call_internal_p (call) != (ifn != IFN_LAST))
667 auto_vec<tree, 4> args;
668 for (unsigned int i = 0;
669 i < gimple_call_num_args (call); i++)
670 args.safe_push (gimple_call_arg (call, i));
671 gcall *stmt2;
672 if (ifn == IFN_LAST)
673 stmt2 = gimple_build_call_vec (fndecl, args);
674 else
675 stmt2 = gimple_build_call_internal_vec (ifn, args);
676 gimple_call_set_lhs (stmt2, arg1);
677 if (gimple_vdef (call))
679 gimple_set_vdef (stmt2, gimple_vdef (call));
680 SSA_NAME_DEF_STMT (gimple_vdef (stmt2)) = stmt2;
682 gimple_call_set_nothrow (stmt2,
683 gimple_call_nothrow_p (call));
684 gimple_set_vuse (stmt2, gimple_vuse (call));
685 gimple_stmt_iterator gsi2 = gsi_for_stmt (call);
686 gsi_replace (&gsi2, stmt2, true);
688 else
690 if (ifn == IFN_LAST)
691 gimple_call_set_fndecl (call, fndecl);
692 else
693 gimple_call_set_internal_fn (call, ifn);
694 update_stmt (call);
696 reciprocal_stats.rfuncs_inserted++;
698 FOR_EACH_IMM_USE_STMT (stmt, ui, arg1)
700 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
701 gimple_assign_set_rhs_code (stmt, MULT_EXPR);
702 fold_stmt_inplace (&gsi);
703 update_stmt (stmt);
710 statistics_counter_event (fun, "reciprocal divs inserted",
711 reciprocal_stats.rdivs_inserted);
712 statistics_counter_event (fun, "reciprocal functions inserted",
713 reciprocal_stats.rfuncs_inserted);
715 free_dominance_info (CDI_DOMINATORS);
716 free_dominance_info (CDI_POST_DOMINATORS);
717 delete occ_pool;
718 return 0;
721 } // anon namespace
723 gimple_opt_pass *
724 make_pass_cse_reciprocals (gcc::context *ctxt)
726 return new pass_cse_reciprocals (ctxt);
729 /* Records an occurrence at statement USE_STMT in the vector of trees
730 STMTS if it is dominated by *TOP_BB or dominates it or this basic block
731 is not yet initialized. Returns true if the occurrence was pushed on
732 the vector. Adjusts *TOP_BB to be the basic block dominating all
733 statements in the vector. */
735 static bool
736 maybe_record_sincos (vec<gimple *> *stmts,
737 basic_block *top_bb, gimple *use_stmt)
739 basic_block use_bb = gimple_bb (use_stmt);
740 if (*top_bb
741 && (*top_bb == use_bb
742 || dominated_by_p (CDI_DOMINATORS, use_bb, *top_bb)))
743 stmts->safe_push (use_stmt);
744 else if (!*top_bb
745 || dominated_by_p (CDI_DOMINATORS, *top_bb, use_bb))
747 stmts->safe_push (use_stmt);
748 *top_bb = use_bb;
750 else
751 return false;
753 return true;
756 /* Look for sin, cos and cexpi calls with the same argument NAME and
757 create a single call to cexpi CSEing the result in this case.
758 We first walk over all immediate uses of the argument collecting
759 statements that we can CSE in a vector and in a second pass replace
760 the statement rhs with a REALPART or IMAGPART expression on the
761 result of the cexpi call we insert before the use statement that
762 dominates all other candidates. */
764 static bool
765 execute_cse_sincos_1 (tree name)
767 gimple_stmt_iterator gsi;
768 imm_use_iterator use_iter;
769 tree fndecl, res, type;
770 gimple *def_stmt, *use_stmt, *stmt;
771 int seen_cos = 0, seen_sin = 0, seen_cexpi = 0;
772 auto_vec<gimple *> stmts;
773 basic_block top_bb = NULL;
774 int i;
775 bool cfg_changed = false;
777 type = TREE_TYPE (name);
778 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, name)
780 if (gimple_code (use_stmt) != GIMPLE_CALL
781 || !gimple_call_lhs (use_stmt))
782 continue;
784 switch (gimple_call_combined_fn (use_stmt))
786 CASE_CFN_COS:
787 seen_cos |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
788 break;
790 CASE_CFN_SIN:
791 seen_sin |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
792 break;
794 CASE_CFN_CEXPI:
795 seen_cexpi |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
796 break;
798 default:;
802 if (seen_cos + seen_sin + seen_cexpi <= 1)
803 return false;
805 /* Simply insert cexpi at the beginning of top_bb but not earlier than
806 the name def statement. */
807 fndecl = mathfn_built_in (type, BUILT_IN_CEXPI);
808 if (!fndecl)
809 return false;
810 stmt = gimple_build_call (fndecl, 1, name);
811 res = make_temp_ssa_name (TREE_TYPE (TREE_TYPE (fndecl)), stmt, "sincostmp");
812 gimple_call_set_lhs (stmt, res);
814 def_stmt = SSA_NAME_DEF_STMT (name);
815 if (!SSA_NAME_IS_DEFAULT_DEF (name)
816 && gimple_code (def_stmt) != GIMPLE_PHI
817 && gimple_bb (def_stmt) == top_bb)
819 gsi = gsi_for_stmt (def_stmt);
820 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
822 else
824 gsi = gsi_after_labels (top_bb);
825 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
827 sincos_stats.inserted++;
829 /* And adjust the recorded old call sites. */
830 for (i = 0; stmts.iterate (i, &use_stmt); ++i)
832 tree rhs = NULL;
834 switch (gimple_call_combined_fn (use_stmt))
836 CASE_CFN_COS:
837 rhs = fold_build1 (REALPART_EXPR, type, res);
838 break;
840 CASE_CFN_SIN:
841 rhs = fold_build1 (IMAGPART_EXPR, type, res);
842 break;
844 CASE_CFN_CEXPI:
845 rhs = res;
846 break;
848 default:;
849 gcc_unreachable ();
852 /* Replace call with a copy. */
853 stmt = gimple_build_assign (gimple_call_lhs (use_stmt), rhs);
855 gsi = gsi_for_stmt (use_stmt);
856 gsi_replace (&gsi, stmt, true);
857 if (gimple_purge_dead_eh_edges (gimple_bb (stmt)))
858 cfg_changed = true;
861 return cfg_changed;
864 /* To evaluate powi(x,n), the floating point value x raised to the
865 constant integer exponent n, we use a hybrid algorithm that
866 combines the "window method" with look-up tables. For an
867 introduction to exponentiation algorithms and "addition chains",
868 see section 4.6.3, "Evaluation of Powers" of Donald E. Knuth,
869 "Seminumerical Algorithms", Vol. 2, "The Art of Computer Programming",
870 3rd Edition, 1998, and Daniel M. Gordon, "A Survey of Fast Exponentiation
871 Methods", Journal of Algorithms, Vol. 27, pp. 129-146, 1998. */
873 /* Provide a default value for POWI_MAX_MULTS, the maximum number of
874 multiplications to inline before calling the system library's pow
875 function. powi(x,n) requires at worst 2*bits(n)-2 multiplications,
876 so this default never requires calling pow, powf or powl. */
878 #ifndef POWI_MAX_MULTS
879 #define POWI_MAX_MULTS (2*HOST_BITS_PER_WIDE_INT-2)
880 #endif
882 /* The size of the "optimal power tree" lookup table. All
883 exponents less than this value are simply looked up in the
884 powi_table below. This threshold is also used to size the
885 cache of pseudo registers that hold intermediate results. */
886 #define POWI_TABLE_SIZE 256
888 /* The size, in bits of the window, used in the "window method"
889 exponentiation algorithm. This is equivalent to a radix of
890 (1<<POWI_WINDOW_SIZE) in the corresponding "m-ary method". */
891 #define POWI_WINDOW_SIZE 3
893 /* The following table is an efficient representation of an
894 "optimal power tree". For each value, i, the corresponding
895 value, j, in the table states than an optimal evaluation
896 sequence for calculating pow(x,i) can be found by evaluating
897 pow(x,j)*pow(x,i-j). An optimal power tree for the first
898 100 integers is given in Knuth's "Seminumerical algorithms". */
900 static const unsigned char powi_table[POWI_TABLE_SIZE] =
902 0, 1, 1, 2, 2, 3, 3, 4, /* 0 - 7 */
903 4, 6, 5, 6, 6, 10, 7, 9, /* 8 - 15 */
904 8, 16, 9, 16, 10, 12, 11, 13, /* 16 - 23 */
905 12, 17, 13, 18, 14, 24, 15, 26, /* 24 - 31 */
906 16, 17, 17, 19, 18, 33, 19, 26, /* 32 - 39 */
907 20, 25, 21, 40, 22, 27, 23, 44, /* 40 - 47 */
908 24, 32, 25, 34, 26, 29, 27, 44, /* 48 - 55 */
909 28, 31, 29, 34, 30, 60, 31, 36, /* 56 - 63 */
910 32, 64, 33, 34, 34, 46, 35, 37, /* 64 - 71 */
911 36, 65, 37, 50, 38, 48, 39, 69, /* 72 - 79 */
912 40, 49, 41, 43, 42, 51, 43, 58, /* 80 - 87 */
913 44, 64, 45, 47, 46, 59, 47, 76, /* 88 - 95 */
914 48, 65, 49, 66, 50, 67, 51, 66, /* 96 - 103 */
915 52, 70, 53, 74, 54, 104, 55, 74, /* 104 - 111 */
916 56, 64, 57, 69, 58, 78, 59, 68, /* 112 - 119 */
917 60, 61, 61, 80, 62, 75, 63, 68, /* 120 - 127 */
918 64, 65, 65, 128, 66, 129, 67, 90, /* 128 - 135 */
919 68, 73, 69, 131, 70, 94, 71, 88, /* 136 - 143 */
920 72, 128, 73, 98, 74, 132, 75, 121, /* 144 - 151 */
921 76, 102, 77, 124, 78, 132, 79, 106, /* 152 - 159 */
922 80, 97, 81, 160, 82, 99, 83, 134, /* 160 - 167 */
923 84, 86, 85, 95, 86, 160, 87, 100, /* 168 - 175 */
924 88, 113, 89, 98, 90, 107, 91, 122, /* 176 - 183 */
925 92, 111, 93, 102, 94, 126, 95, 150, /* 184 - 191 */
926 96, 128, 97, 130, 98, 133, 99, 195, /* 192 - 199 */
927 100, 128, 101, 123, 102, 164, 103, 138, /* 200 - 207 */
928 104, 145, 105, 146, 106, 109, 107, 149, /* 208 - 215 */
929 108, 200, 109, 146, 110, 170, 111, 157, /* 216 - 223 */
930 112, 128, 113, 130, 114, 182, 115, 132, /* 224 - 231 */
931 116, 200, 117, 132, 118, 158, 119, 206, /* 232 - 239 */
932 120, 240, 121, 162, 122, 147, 123, 152, /* 240 - 247 */
933 124, 166, 125, 214, 126, 138, 127, 153, /* 248 - 255 */
937 /* Return the number of multiplications required to calculate
938 powi(x,n) where n is less than POWI_TABLE_SIZE. This is a
939 subroutine of powi_cost. CACHE is an array indicating
940 which exponents have already been calculated. */
942 static int
943 powi_lookup_cost (unsigned HOST_WIDE_INT n, bool *cache)
945 /* If we've already calculated this exponent, then this evaluation
946 doesn't require any additional multiplications. */
947 if (cache[n])
948 return 0;
950 cache[n] = true;
951 return powi_lookup_cost (n - powi_table[n], cache)
952 + powi_lookup_cost (powi_table[n], cache) + 1;
955 /* Return the number of multiplications required to calculate
956 powi(x,n) for an arbitrary x, given the exponent N. This
957 function needs to be kept in sync with powi_as_mults below. */
959 static int
960 powi_cost (HOST_WIDE_INT n)
962 bool cache[POWI_TABLE_SIZE];
963 unsigned HOST_WIDE_INT digit;
964 unsigned HOST_WIDE_INT val;
965 int result;
967 if (n == 0)
968 return 0;
970 /* Ignore the reciprocal when calculating the cost. */
971 val = (n < 0) ? -n : n;
973 /* Initialize the exponent cache. */
974 memset (cache, 0, POWI_TABLE_SIZE * sizeof (bool));
975 cache[1] = true;
977 result = 0;
979 while (val >= POWI_TABLE_SIZE)
981 if (val & 1)
983 digit = val & ((1 << POWI_WINDOW_SIZE) - 1);
984 result += powi_lookup_cost (digit, cache)
985 + POWI_WINDOW_SIZE + 1;
986 val >>= POWI_WINDOW_SIZE;
988 else
990 val >>= 1;
991 result++;
995 return result + powi_lookup_cost (val, cache);
998 /* Recursive subroutine of powi_as_mults. This function takes the
999 array, CACHE, of already calculated exponents and an exponent N and
1000 returns a tree that corresponds to CACHE[1]**N, with type TYPE. */
1002 static tree
1003 powi_as_mults_1 (gimple_stmt_iterator *gsi, location_t loc, tree type,
1004 HOST_WIDE_INT n, tree *cache)
1006 tree op0, op1, ssa_target;
1007 unsigned HOST_WIDE_INT digit;
1008 gassign *mult_stmt;
1010 if (n < POWI_TABLE_SIZE && cache[n])
1011 return cache[n];
1013 ssa_target = make_temp_ssa_name (type, NULL, "powmult");
1015 if (n < POWI_TABLE_SIZE)
1017 cache[n] = ssa_target;
1018 op0 = powi_as_mults_1 (gsi, loc, type, n - powi_table[n], cache);
1019 op1 = powi_as_mults_1 (gsi, loc, type, powi_table[n], cache);
1021 else if (n & 1)
1023 digit = n & ((1 << POWI_WINDOW_SIZE) - 1);
1024 op0 = powi_as_mults_1 (gsi, loc, type, n - digit, cache);
1025 op1 = powi_as_mults_1 (gsi, loc, type, digit, cache);
1027 else
1029 op0 = powi_as_mults_1 (gsi, loc, type, n >> 1, cache);
1030 op1 = op0;
1033 mult_stmt = gimple_build_assign (ssa_target, MULT_EXPR, op0, op1);
1034 gimple_set_location (mult_stmt, loc);
1035 gsi_insert_before (gsi, mult_stmt, GSI_SAME_STMT);
1037 return ssa_target;
1040 /* Convert ARG0**N to a tree of multiplications of ARG0 with itself.
1041 This function needs to be kept in sync with powi_cost above. */
1043 static tree
1044 powi_as_mults (gimple_stmt_iterator *gsi, location_t loc,
1045 tree arg0, HOST_WIDE_INT n)
1047 tree cache[POWI_TABLE_SIZE], result, type = TREE_TYPE (arg0);
1048 gassign *div_stmt;
1049 tree target;
1051 if (n == 0)
1052 return build_real (type, dconst1);
1054 memset (cache, 0, sizeof (cache));
1055 cache[1] = arg0;
1057 result = powi_as_mults_1 (gsi, loc, type, (n < 0) ? -n : n, cache);
1058 if (n >= 0)
1059 return result;
1061 /* If the original exponent was negative, reciprocate the result. */
1062 target = make_temp_ssa_name (type, NULL, "powmult");
1063 div_stmt = gimple_build_assign (target, RDIV_EXPR,
1064 build_real (type, dconst1), result);
1065 gimple_set_location (div_stmt, loc);
1066 gsi_insert_before (gsi, div_stmt, GSI_SAME_STMT);
1068 return target;
1071 /* ARG0 and N are the two arguments to a powi builtin in GSI with
1072 location info LOC. If the arguments are appropriate, create an
1073 equivalent sequence of statements prior to GSI using an optimal
1074 number of multiplications, and return an expession holding the
1075 result. */
1077 static tree
1078 gimple_expand_builtin_powi (gimple_stmt_iterator *gsi, location_t loc,
1079 tree arg0, HOST_WIDE_INT n)
1081 /* Avoid largest negative number. */
1082 if (n != -n
1083 && ((n >= -1 && n <= 2)
1084 || (optimize_function_for_speed_p (cfun)
1085 && powi_cost (n) <= POWI_MAX_MULTS)))
1086 return powi_as_mults (gsi, loc, arg0, n);
1088 return NULL_TREE;
1091 /* Build a gimple call statement that calls FN with argument ARG.
1092 Set the lhs of the call statement to a fresh SSA name. Insert the
1093 statement prior to GSI's current position, and return the fresh
1094 SSA name. */
1096 static tree
1097 build_and_insert_call (gimple_stmt_iterator *gsi, location_t loc,
1098 tree fn, tree arg)
1100 gcall *call_stmt;
1101 tree ssa_target;
1103 call_stmt = gimple_build_call (fn, 1, arg);
1104 ssa_target = make_temp_ssa_name (TREE_TYPE (arg), NULL, "powroot");
1105 gimple_set_lhs (call_stmt, ssa_target);
1106 gimple_set_location (call_stmt, loc);
1107 gsi_insert_before (gsi, call_stmt, GSI_SAME_STMT);
1109 return ssa_target;
1112 /* Build a gimple binary operation with the given CODE and arguments
1113 ARG0, ARG1, assigning the result to a new SSA name for variable
1114 TARGET. Insert the statement prior to GSI's current position, and
1115 return the fresh SSA name.*/
1117 static tree
1118 build_and_insert_binop (gimple_stmt_iterator *gsi, location_t loc,
1119 const char *name, enum tree_code code,
1120 tree arg0, tree arg1)
1122 tree result = make_temp_ssa_name (TREE_TYPE (arg0), NULL, name);
1123 gassign *stmt = gimple_build_assign (result, code, arg0, arg1);
1124 gimple_set_location (stmt, loc);
1125 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1126 return result;
1129 /* Build a gimple reference operation with the given CODE and argument
1130 ARG, assigning the result to a new SSA name of TYPE with NAME.
1131 Insert the statement prior to GSI's current position, and return
1132 the fresh SSA name. */
1134 static inline tree
1135 build_and_insert_ref (gimple_stmt_iterator *gsi, location_t loc, tree type,
1136 const char *name, enum tree_code code, tree arg0)
1138 tree result = make_temp_ssa_name (type, NULL, name);
1139 gimple *stmt = gimple_build_assign (result, build1 (code, type, arg0));
1140 gimple_set_location (stmt, loc);
1141 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1142 return result;
1145 /* Build a gimple assignment to cast VAL to TYPE. Insert the statement
1146 prior to GSI's current position, and return the fresh SSA name. */
1148 static tree
1149 build_and_insert_cast (gimple_stmt_iterator *gsi, location_t loc,
1150 tree type, tree val)
1152 tree result = make_ssa_name (type);
1153 gassign *stmt = gimple_build_assign (result, NOP_EXPR, val);
1154 gimple_set_location (stmt, loc);
1155 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1156 return result;
1159 struct pow_synth_sqrt_info
1161 bool *factors;
1162 unsigned int deepest;
1163 unsigned int num_mults;
1166 /* Return true iff the real value C can be represented as a
1167 sum of powers of 0.5 up to N. That is:
1168 C == SUM<i from 1..N> (a[i]*(0.5**i)) where a[i] is either 0 or 1.
1169 Record in INFO the various parameters of the synthesis algorithm such
1170 as the factors a[i], the maximum 0.5 power and the number of
1171 multiplications that will be required. */
1173 bool
1174 representable_as_half_series_p (REAL_VALUE_TYPE c, unsigned n,
1175 struct pow_synth_sqrt_info *info)
1177 REAL_VALUE_TYPE factor = dconsthalf;
1178 REAL_VALUE_TYPE remainder = c;
1180 info->deepest = 0;
1181 info->num_mults = 0;
1182 memset (info->factors, 0, n * sizeof (bool));
1184 for (unsigned i = 0; i < n; i++)
1186 REAL_VALUE_TYPE res;
1188 /* If something inexact happened bail out now. */
1189 if (real_arithmetic (&res, MINUS_EXPR, &remainder, &factor))
1190 return false;
1192 /* We have hit zero. The number is representable as a sum
1193 of powers of 0.5. */
1194 if (real_equal (&res, &dconst0))
1196 info->factors[i] = true;
1197 info->deepest = i + 1;
1198 return true;
1200 else if (!REAL_VALUE_NEGATIVE (res))
1202 remainder = res;
1203 info->factors[i] = true;
1204 info->num_mults++;
1206 else
1207 info->factors[i] = false;
1209 real_arithmetic (&factor, MULT_EXPR, &factor, &dconsthalf);
1211 return false;
1214 /* Return the tree corresponding to FN being applied
1215 to ARG N times at GSI and LOC.
1216 Look up previous results from CACHE if need be.
1217 cache[0] should contain just plain ARG i.e. FN applied to ARG 0 times. */
1219 static tree
1220 get_fn_chain (tree arg, unsigned int n, gimple_stmt_iterator *gsi,
1221 tree fn, location_t loc, tree *cache)
1223 tree res = cache[n];
1224 if (!res)
1226 tree prev = get_fn_chain (arg, n - 1, gsi, fn, loc, cache);
1227 res = build_and_insert_call (gsi, loc, fn, prev);
1228 cache[n] = res;
1231 return res;
1234 /* Print to STREAM the repeated application of function FNAME to ARG
1235 N times. So, for FNAME = "foo", ARG = "x", N = 2 it would print:
1236 "foo (foo (x))". */
1238 static void
1239 print_nested_fn (FILE* stream, const char *fname, const char* arg,
1240 unsigned int n)
1242 if (n == 0)
1243 fprintf (stream, "%s", arg);
1244 else
1246 fprintf (stream, "%s (", fname);
1247 print_nested_fn (stream, fname, arg, n - 1);
1248 fprintf (stream, ")");
1252 /* Print to STREAM the fractional sequence of sqrt chains
1253 applied to ARG, described by INFO. Used for the dump file. */
1255 static void
1256 dump_fractional_sqrt_sequence (FILE *stream, const char *arg,
1257 struct pow_synth_sqrt_info *info)
1259 for (unsigned int i = 0; i < info->deepest; i++)
1261 bool is_set = info->factors[i];
1262 if (is_set)
1264 print_nested_fn (stream, "sqrt", arg, i + 1);
1265 if (i != info->deepest - 1)
1266 fprintf (stream, " * ");
1271 /* Print to STREAM a representation of raising ARG to an integer
1272 power N. Used for the dump file. */
1274 static void
1275 dump_integer_part (FILE *stream, const char* arg, HOST_WIDE_INT n)
1277 if (n > 1)
1278 fprintf (stream, "powi (%s, " HOST_WIDE_INT_PRINT_DEC ")", arg, n);
1279 else if (n == 1)
1280 fprintf (stream, "%s", arg);
1283 /* Attempt to synthesize a POW[F] (ARG0, ARG1) call using chains of
1284 square roots. Place at GSI and LOC. Limit the maximum depth
1285 of the sqrt chains to MAX_DEPTH. Return the tree holding the
1286 result of the expanded sequence or NULL_TREE if the expansion failed.
1288 This routine assumes that ARG1 is a real number with a fractional part
1289 (the integer exponent case will have been handled earlier in
1290 gimple_expand_builtin_pow).
1292 For ARG1 > 0.0:
1293 * For ARG1 composed of a whole part WHOLE_PART and a fractional part
1294 FRAC_PART i.e. WHOLE_PART == floor (ARG1) and
1295 FRAC_PART == ARG1 - WHOLE_PART:
1296 Produce POWI (ARG0, WHOLE_PART) * POW (ARG0, FRAC_PART) where
1297 POW (ARG0, FRAC_PART) is expanded as a product of square root chains
1298 if it can be expressed as such, that is if FRAC_PART satisfies:
1299 FRAC_PART == <SUM from i = 1 until MAX_DEPTH> (a[i] * (0.5**i))
1300 where integer a[i] is either 0 or 1.
1302 Example:
1303 POW (x, 3.625) == POWI (x, 3) * POW (x, 0.625)
1304 --> POWI (x, 3) * SQRT (x) * SQRT (SQRT (SQRT (x)))
1306 For ARG1 < 0.0 there are two approaches:
1307 * (A) Expand to 1.0 / POW (ARG0, -ARG1) where POW (ARG0, -ARG1)
1308 is calculated as above.
1310 Example:
1311 POW (x, -5.625) == 1.0 / POW (x, 5.625)
1312 --> 1.0 / (POWI (x, 5) * SQRT (x) * SQRT (SQRT (SQRT (x))))
1314 * (B) : WHOLE_PART := - ceil (abs (ARG1))
1315 FRAC_PART := ARG1 - WHOLE_PART
1316 and expand to POW (x, FRAC_PART) / POWI (x, WHOLE_PART).
1317 Example:
1318 POW (x, -5.875) == POW (x, 0.125) / POWI (X, 6)
1319 --> SQRT (SQRT (SQRT (x))) / (POWI (x, 6))
1321 For ARG1 < 0.0 we choose between (A) and (B) depending on
1322 how many multiplications we'd have to do.
1323 So, for the example in (B): POW (x, -5.875), if we were to
1324 follow algorithm (A) we would produce:
1325 1.0 / POWI (X, 5) * SQRT (X) * SQRT (SQRT (X)) * SQRT (SQRT (SQRT (X)))
1326 which contains more multiplications than approach (B).
1328 Hopefully, this approach will eliminate potentially expensive POW library
1329 calls when unsafe floating point math is enabled and allow the compiler to
1330 further optimise the multiplies, square roots and divides produced by this
1331 function. */
1333 static tree
1334 expand_pow_as_sqrts (gimple_stmt_iterator *gsi, location_t loc,
1335 tree arg0, tree arg1, HOST_WIDE_INT max_depth)
1337 tree type = TREE_TYPE (arg0);
1338 machine_mode mode = TYPE_MODE (type);
1339 tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
1340 bool one_over = true;
1342 if (!sqrtfn)
1343 return NULL_TREE;
1345 if (TREE_CODE (arg1) != REAL_CST)
1346 return NULL_TREE;
1348 REAL_VALUE_TYPE exp_init = TREE_REAL_CST (arg1);
1350 gcc_assert (max_depth > 0);
1351 tree *cache = XALLOCAVEC (tree, max_depth + 1);
1353 struct pow_synth_sqrt_info synth_info;
1354 synth_info.factors = XALLOCAVEC (bool, max_depth + 1);
1355 synth_info.deepest = 0;
1356 synth_info.num_mults = 0;
1358 bool neg_exp = REAL_VALUE_NEGATIVE (exp_init);
1359 REAL_VALUE_TYPE exp = real_value_abs (&exp_init);
1361 /* The whole and fractional parts of exp. */
1362 REAL_VALUE_TYPE whole_part;
1363 REAL_VALUE_TYPE frac_part;
1365 real_floor (&whole_part, mode, &exp);
1366 real_arithmetic (&frac_part, MINUS_EXPR, &exp, &whole_part);
1369 REAL_VALUE_TYPE ceil_whole = dconst0;
1370 REAL_VALUE_TYPE ceil_fract = dconst0;
1372 if (neg_exp)
1374 real_ceil (&ceil_whole, mode, &exp);
1375 real_arithmetic (&ceil_fract, MINUS_EXPR, &ceil_whole, &exp);
1378 if (!representable_as_half_series_p (frac_part, max_depth, &synth_info))
1379 return NULL_TREE;
1381 /* Check whether it's more profitable to not use 1.0 / ... */
1382 if (neg_exp)
1384 struct pow_synth_sqrt_info alt_synth_info;
1385 alt_synth_info.factors = XALLOCAVEC (bool, max_depth + 1);
1386 alt_synth_info.deepest = 0;
1387 alt_synth_info.num_mults = 0;
1389 if (representable_as_half_series_p (ceil_fract, max_depth,
1390 &alt_synth_info)
1391 && alt_synth_info.deepest <= synth_info.deepest
1392 && alt_synth_info.num_mults < synth_info.num_mults)
1394 whole_part = ceil_whole;
1395 frac_part = ceil_fract;
1396 synth_info.deepest = alt_synth_info.deepest;
1397 synth_info.num_mults = alt_synth_info.num_mults;
1398 memcpy (synth_info.factors, alt_synth_info.factors,
1399 (max_depth + 1) * sizeof (bool));
1400 one_over = false;
1404 HOST_WIDE_INT n = real_to_integer (&whole_part);
1405 REAL_VALUE_TYPE cint;
1406 real_from_integer (&cint, VOIDmode, n, SIGNED);
1408 if (!real_identical (&whole_part, &cint))
1409 return NULL_TREE;
1411 if (powi_cost (n) + synth_info.num_mults > POWI_MAX_MULTS)
1412 return NULL_TREE;
1414 memset (cache, 0, (max_depth + 1) * sizeof (tree));
1416 tree integer_res = n == 0 ? build_real (type, dconst1) : arg0;
1418 /* Calculate the integer part of the exponent. */
1419 if (n > 1)
1421 integer_res = gimple_expand_builtin_powi (gsi, loc, arg0, n);
1422 if (!integer_res)
1423 return NULL_TREE;
1426 if (dump_file)
1428 char string[64];
1430 real_to_decimal (string, &exp_init, sizeof (string), 0, 1);
1431 fprintf (dump_file, "synthesizing pow (x, %s) as:\n", string);
1433 if (neg_exp)
1435 if (one_over)
1437 fprintf (dump_file, "1.0 / (");
1438 dump_integer_part (dump_file, "x", n);
1439 if (n > 0)
1440 fprintf (dump_file, " * ");
1441 dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1442 fprintf (dump_file, ")");
1444 else
1446 dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1447 fprintf (dump_file, " / (");
1448 dump_integer_part (dump_file, "x", n);
1449 fprintf (dump_file, ")");
1452 else
1454 dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1455 if (n > 0)
1456 fprintf (dump_file, " * ");
1457 dump_integer_part (dump_file, "x", n);
1460 fprintf (dump_file, "\ndeepest sqrt chain: %d\n", synth_info.deepest);
1464 tree fract_res = NULL_TREE;
1465 cache[0] = arg0;
1467 /* Calculate the fractional part of the exponent. */
1468 for (unsigned i = 0; i < synth_info.deepest; i++)
1470 if (synth_info.factors[i])
1472 tree sqrt_chain = get_fn_chain (arg0, i + 1, gsi, sqrtfn, loc, cache);
1474 if (!fract_res)
1475 fract_res = sqrt_chain;
1477 else
1478 fract_res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1479 fract_res, sqrt_chain);
1483 tree res = NULL_TREE;
1485 if (neg_exp)
1487 if (one_over)
1489 if (n > 0)
1490 res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1491 fract_res, integer_res);
1492 else
1493 res = fract_res;
1495 res = build_and_insert_binop (gsi, loc, "powrootrecip", RDIV_EXPR,
1496 build_real (type, dconst1), res);
1498 else
1500 res = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR,
1501 fract_res, integer_res);
1504 else
1505 res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1506 fract_res, integer_res);
1507 return res;
1510 /* ARG0 and ARG1 are the two arguments to a pow builtin call in GSI
1511 with location info LOC. If possible, create an equivalent and
1512 less expensive sequence of statements prior to GSI, and return an
1513 expession holding the result. */
1515 static tree
1516 gimple_expand_builtin_pow (gimple_stmt_iterator *gsi, location_t loc,
1517 tree arg0, tree arg1)
1519 REAL_VALUE_TYPE c, cint, dconst1_3, dconst1_4, dconst1_6;
1520 REAL_VALUE_TYPE c2, dconst3;
1521 HOST_WIDE_INT n;
1522 tree type, sqrtfn, cbrtfn, sqrt_arg0, result, cbrt_x, powi_cbrt_x;
1523 machine_mode mode;
1524 bool speed_p = optimize_bb_for_speed_p (gsi_bb (*gsi));
1525 bool hw_sqrt_exists, c_is_int, c2_is_int;
1527 dconst1_4 = dconst1;
1528 SET_REAL_EXP (&dconst1_4, REAL_EXP (&dconst1_4) - 2);
1530 /* If the exponent isn't a constant, there's nothing of interest
1531 to be done. */
1532 if (TREE_CODE (arg1) != REAL_CST)
1533 return NULL_TREE;
1535 /* Don't perform the operation if flag_signaling_nans is on
1536 and the operand is a signaling NaN. */
1537 if (HONOR_SNANS (TYPE_MODE (TREE_TYPE (arg1)))
1538 && ((TREE_CODE (arg0) == REAL_CST
1539 && REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (arg0)))
1540 || REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (arg1))))
1541 return NULL_TREE;
1543 /* If the exponent is equivalent to an integer, expand to an optimal
1544 multiplication sequence when profitable. */
1545 c = TREE_REAL_CST (arg1);
1546 n = real_to_integer (&c);
1547 real_from_integer (&cint, VOIDmode, n, SIGNED);
1548 c_is_int = real_identical (&c, &cint);
1550 if (c_is_int
1551 && ((n >= -1 && n <= 2)
1552 || (flag_unsafe_math_optimizations
1553 && speed_p
1554 && powi_cost (n) <= POWI_MAX_MULTS)))
1555 return gimple_expand_builtin_powi (gsi, loc, arg0, n);
1557 /* Attempt various optimizations using sqrt and cbrt. */
1558 type = TREE_TYPE (arg0);
1559 mode = TYPE_MODE (type);
1560 sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
1562 /* Optimize pow(x,0.5) = sqrt(x). This replacement is always safe
1563 unless signed zeros must be maintained. pow(-0,0.5) = +0, while
1564 sqrt(-0) = -0. */
1565 if (sqrtfn
1566 && real_equal (&c, &dconsthalf)
1567 && !HONOR_SIGNED_ZEROS (mode))
1568 return build_and_insert_call (gsi, loc, sqrtfn, arg0);
1570 hw_sqrt_exists = optab_handler (sqrt_optab, mode) != CODE_FOR_nothing;
1572 /* Optimize pow(x,1./3.) = cbrt(x). This requires unsafe math
1573 optimizations since 1./3. is not exactly representable. If x
1574 is negative and finite, the correct value of pow(x,1./3.) is
1575 a NaN with the "invalid" exception raised, because the value
1576 of 1./3. actually has an even denominator. The correct value
1577 of cbrt(x) is a negative real value. */
1578 cbrtfn = mathfn_built_in (type, BUILT_IN_CBRT);
1579 dconst1_3 = real_value_truncate (mode, dconst_third ());
1581 if (flag_unsafe_math_optimizations
1582 && cbrtfn
1583 && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
1584 && real_equal (&c, &dconst1_3))
1585 return build_and_insert_call (gsi, loc, cbrtfn, arg0);
1587 /* Optimize pow(x,1./6.) = cbrt(sqrt(x)). Don't do this optimization
1588 if we don't have a hardware sqrt insn. */
1589 dconst1_6 = dconst1_3;
1590 SET_REAL_EXP (&dconst1_6, REAL_EXP (&dconst1_6) - 1);
1592 if (flag_unsafe_math_optimizations
1593 && sqrtfn
1594 && cbrtfn
1595 && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
1596 && speed_p
1597 && hw_sqrt_exists
1598 && real_equal (&c, &dconst1_6))
1600 /* sqrt(x) */
1601 sqrt_arg0 = build_and_insert_call (gsi, loc, sqrtfn, arg0);
1603 /* cbrt(sqrt(x)) */
1604 return build_and_insert_call (gsi, loc, cbrtfn, sqrt_arg0);
1608 /* Attempt to expand the POW as a product of square root chains.
1609 Expand the 0.25 case even when otpimising for size. */
1610 if (flag_unsafe_math_optimizations
1611 && sqrtfn
1612 && hw_sqrt_exists
1613 && (speed_p || real_equal (&c, &dconst1_4))
1614 && !HONOR_SIGNED_ZEROS (mode))
1616 unsigned int max_depth = speed_p
1617 ? PARAM_VALUE (PARAM_MAX_POW_SQRT_DEPTH)
1618 : 2;
1620 tree expand_with_sqrts
1621 = expand_pow_as_sqrts (gsi, loc, arg0, arg1, max_depth);
1623 if (expand_with_sqrts)
1624 return expand_with_sqrts;
1627 real_arithmetic (&c2, MULT_EXPR, &c, &dconst2);
1628 n = real_to_integer (&c2);
1629 real_from_integer (&cint, VOIDmode, n, SIGNED);
1630 c2_is_int = real_identical (&c2, &cint);
1632 /* Optimize pow(x,c), where 3c = n for some nonzero integer n, into
1634 powi(x, n/3) * powi(cbrt(x), n%3), n > 0;
1635 1.0 / (powi(x, abs(n)/3) * powi(cbrt(x), abs(n)%3)), n < 0.
1637 Do not calculate the first factor when n/3 = 0. As cbrt(x) is
1638 different from pow(x, 1./3.) due to rounding and behavior with
1639 negative x, we need to constrain this transformation to unsafe
1640 math and positive x or finite math. */
1641 real_from_integer (&dconst3, VOIDmode, 3, SIGNED);
1642 real_arithmetic (&c2, MULT_EXPR, &c, &dconst3);
1643 real_round (&c2, mode, &c2);
1644 n = real_to_integer (&c2);
1645 real_from_integer (&cint, VOIDmode, n, SIGNED);
1646 real_arithmetic (&c2, RDIV_EXPR, &cint, &dconst3);
1647 real_convert (&c2, mode, &c2);
1649 if (flag_unsafe_math_optimizations
1650 && cbrtfn
1651 && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
1652 && real_identical (&c2, &c)
1653 && !c2_is_int
1654 && optimize_function_for_speed_p (cfun)
1655 && powi_cost (n / 3) <= POWI_MAX_MULTS)
1657 tree powi_x_ndiv3 = NULL_TREE;
1659 /* Attempt to fold powi(arg0, abs(n/3)) into multiplies. If not
1660 possible or profitable, give up. Skip the degenerate case when
1661 abs(n) < 3, where the result is always 1. */
1662 if (absu_hwi (n) >= 3)
1664 powi_x_ndiv3 = gimple_expand_builtin_powi (gsi, loc, arg0,
1665 abs_hwi (n / 3));
1666 if (!powi_x_ndiv3)
1667 return NULL_TREE;
1670 /* Calculate powi(cbrt(x), n%3). Don't use gimple_expand_builtin_powi
1671 as that creates an unnecessary variable. Instead, just produce
1672 either cbrt(x) or cbrt(x) * cbrt(x). */
1673 cbrt_x = build_and_insert_call (gsi, loc, cbrtfn, arg0);
1675 if (absu_hwi (n) % 3 == 1)
1676 powi_cbrt_x = cbrt_x;
1677 else
1678 powi_cbrt_x = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1679 cbrt_x, cbrt_x);
1681 /* Multiply the two subexpressions, unless powi(x,abs(n)/3) = 1. */
1682 if (absu_hwi (n) < 3)
1683 result = powi_cbrt_x;
1684 else
1685 result = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1686 powi_x_ndiv3, powi_cbrt_x);
1688 /* If n is negative, reciprocate the result. */
1689 if (n < 0)
1690 result = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR,
1691 build_real (type, dconst1), result);
1693 return result;
1696 /* No optimizations succeeded. */
1697 return NULL_TREE;
1700 /* ARG is the argument to a cabs builtin call in GSI with location info
1701 LOC. Create a sequence of statements prior to GSI that calculates
1702 sqrt(R*R + I*I), where R and I are the real and imaginary components
1703 of ARG, respectively. Return an expression holding the result. */
1705 static tree
1706 gimple_expand_builtin_cabs (gimple_stmt_iterator *gsi, location_t loc, tree arg)
1708 tree real_part, imag_part, addend1, addend2, sum, result;
1709 tree type = TREE_TYPE (TREE_TYPE (arg));
1710 tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
1711 machine_mode mode = TYPE_MODE (type);
1713 if (!flag_unsafe_math_optimizations
1714 || !optimize_bb_for_speed_p (gimple_bb (gsi_stmt (*gsi)))
1715 || !sqrtfn
1716 || optab_handler (sqrt_optab, mode) == CODE_FOR_nothing)
1717 return NULL_TREE;
1719 real_part = build_and_insert_ref (gsi, loc, type, "cabs",
1720 REALPART_EXPR, arg);
1721 addend1 = build_and_insert_binop (gsi, loc, "cabs", MULT_EXPR,
1722 real_part, real_part);
1723 imag_part = build_and_insert_ref (gsi, loc, type, "cabs",
1724 IMAGPART_EXPR, arg);
1725 addend2 = build_and_insert_binop (gsi, loc, "cabs", MULT_EXPR,
1726 imag_part, imag_part);
1727 sum = build_and_insert_binop (gsi, loc, "cabs", PLUS_EXPR, addend1, addend2);
1728 result = build_and_insert_call (gsi, loc, sqrtfn, sum);
1730 return result;
1733 /* Go through all calls to sin, cos and cexpi and call execute_cse_sincos_1
1734 on the SSA_NAME argument of each of them. Also expand powi(x,n) into
1735 an optimal number of multiplies, when n is a constant. */
1737 namespace {
1739 const pass_data pass_data_cse_sincos =
1741 GIMPLE_PASS, /* type */
1742 "sincos", /* name */
1743 OPTGROUP_NONE, /* optinfo_flags */
1744 TV_NONE, /* tv_id */
1745 PROP_ssa, /* properties_required */
1746 PROP_gimple_opt_math, /* properties_provided */
1747 0, /* properties_destroyed */
1748 0, /* todo_flags_start */
1749 TODO_update_ssa, /* todo_flags_finish */
1752 class pass_cse_sincos : public gimple_opt_pass
1754 public:
1755 pass_cse_sincos (gcc::context *ctxt)
1756 : gimple_opt_pass (pass_data_cse_sincos, ctxt)
1759 /* opt_pass methods: */
1760 virtual bool gate (function *)
1762 /* We no longer require either sincos or cexp, since powi expansion
1763 piggybacks on this pass. */
1764 return optimize;
1767 virtual unsigned int execute (function *);
1769 }; // class pass_cse_sincos
1771 unsigned int
1772 pass_cse_sincos::execute (function *fun)
1774 basic_block bb;
1775 bool cfg_changed = false;
1777 calculate_dominance_info (CDI_DOMINATORS);
1778 memset (&sincos_stats, 0, sizeof (sincos_stats));
1780 FOR_EACH_BB_FN (bb, fun)
1782 gimple_stmt_iterator gsi;
1783 bool cleanup_eh = false;
1785 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1787 gimple *stmt = gsi_stmt (gsi);
1789 /* Only the last stmt in a bb could throw, no need to call
1790 gimple_purge_dead_eh_edges if we change something in the middle
1791 of a basic block. */
1792 cleanup_eh = false;
1794 if (is_gimple_call (stmt)
1795 && gimple_call_lhs (stmt))
1797 tree arg, arg0, arg1, result;
1798 HOST_WIDE_INT n;
1799 location_t loc;
1801 switch (gimple_call_combined_fn (stmt))
1803 CASE_CFN_COS:
1804 CASE_CFN_SIN:
1805 CASE_CFN_CEXPI:
1806 /* Make sure we have either sincos or cexp. */
1807 if (!targetm.libc_has_function (function_c99_math_complex)
1808 && !targetm.libc_has_function (function_sincos))
1809 break;
1811 arg = gimple_call_arg (stmt, 0);
1812 if (TREE_CODE (arg) == SSA_NAME)
1813 cfg_changed |= execute_cse_sincos_1 (arg);
1814 break;
1816 CASE_CFN_POW:
1817 arg0 = gimple_call_arg (stmt, 0);
1818 arg1 = gimple_call_arg (stmt, 1);
1820 loc = gimple_location (stmt);
1821 result = gimple_expand_builtin_pow (&gsi, loc, arg0, arg1);
1823 if (result)
1825 tree lhs = gimple_get_lhs (stmt);
1826 gassign *new_stmt = gimple_build_assign (lhs, result);
1827 gimple_set_location (new_stmt, loc);
1828 unlink_stmt_vdef (stmt);
1829 gsi_replace (&gsi, new_stmt, true);
1830 cleanup_eh = true;
1831 if (gimple_vdef (stmt))
1832 release_ssa_name (gimple_vdef (stmt));
1834 break;
1836 CASE_CFN_POWI:
1837 arg0 = gimple_call_arg (stmt, 0);
1838 arg1 = gimple_call_arg (stmt, 1);
1839 loc = gimple_location (stmt);
1841 if (real_minus_onep (arg0))
1843 tree t0, t1, cond, one, minus_one;
1844 gassign *stmt;
1846 t0 = TREE_TYPE (arg0);
1847 t1 = TREE_TYPE (arg1);
1848 one = build_real (t0, dconst1);
1849 minus_one = build_real (t0, dconstm1);
1851 cond = make_temp_ssa_name (t1, NULL, "powi_cond");
1852 stmt = gimple_build_assign (cond, BIT_AND_EXPR,
1853 arg1, build_int_cst (t1, 1));
1854 gimple_set_location (stmt, loc);
1855 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1857 result = make_temp_ssa_name (t0, NULL, "powi");
1858 stmt = gimple_build_assign (result, COND_EXPR, cond,
1859 minus_one, one);
1860 gimple_set_location (stmt, loc);
1861 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1863 else
1865 if (!tree_fits_shwi_p (arg1))
1866 break;
1868 n = tree_to_shwi (arg1);
1869 result = gimple_expand_builtin_powi (&gsi, loc, arg0, n);
1872 if (result)
1874 tree lhs = gimple_get_lhs (stmt);
1875 gassign *new_stmt = gimple_build_assign (lhs, result);
1876 gimple_set_location (new_stmt, loc);
1877 unlink_stmt_vdef (stmt);
1878 gsi_replace (&gsi, new_stmt, true);
1879 cleanup_eh = true;
1880 if (gimple_vdef (stmt))
1881 release_ssa_name (gimple_vdef (stmt));
1883 break;
1885 CASE_CFN_CABS:
1886 arg0 = gimple_call_arg (stmt, 0);
1887 loc = gimple_location (stmt);
1888 result = gimple_expand_builtin_cabs (&gsi, loc, arg0);
1890 if (result)
1892 tree lhs = gimple_get_lhs (stmt);
1893 gassign *new_stmt = gimple_build_assign (lhs, result);
1894 gimple_set_location (new_stmt, loc);
1895 unlink_stmt_vdef (stmt);
1896 gsi_replace (&gsi, new_stmt, true);
1897 cleanup_eh = true;
1898 if (gimple_vdef (stmt))
1899 release_ssa_name (gimple_vdef (stmt));
1901 break;
1903 default:;
1907 if (cleanup_eh)
1908 cfg_changed |= gimple_purge_dead_eh_edges (bb);
1911 statistics_counter_event (fun, "sincos statements inserted",
1912 sincos_stats.inserted);
1914 return cfg_changed ? TODO_cleanup_cfg : 0;
1917 } // anon namespace
1919 gimple_opt_pass *
1920 make_pass_cse_sincos (gcc::context *ctxt)
1922 return new pass_cse_sincos (ctxt);
1925 /* Return true if stmt is a type conversion operation that can be stripped
1926 when used in a widening multiply operation. */
1927 static bool
1928 widening_mult_conversion_strippable_p (tree result_type, gimple *stmt)
1930 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
1932 if (TREE_CODE (result_type) == INTEGER_TYPE)
1934 tree op_type;
1935 tree inner_op_type;
1937 if (!CONVERT_EXPR_CODE_P (rhs_code))
1938 return false;
1940 op_type = TREE_TYPE (gimple_assign_lhs (stmt));
1942 /* If the type of OP has the same precision as the result, then
1943 we can strip this conversion. The multiply operation will be
1944 selected to create the correct extension as a by-product. */
1945 if (TYPE_PRECISION (result_type) == TYPE_PRECISION (op_type))
1946 return true;
1948 /* We can also strip a conversion if it preserves the signed-ness of
1949 the operation and doesn't narrow the range. */
1950 inner_op_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
1952 /* If the inner-most type is unsigned, then we can strip any
1953 intermediate widening operation. If it's signed, then the
1954 intermediate widening operation must also be signed. */
1955 if ((TYPE_UNSIGNED (inner_op_type)
1956 || TYPE_UNSIGNED (op_type) == TYPE_UNSIGNED (inner_op_type))
1957 && TYPE_PRECISION (op_type) > TYPE_PRECISION (inner_op_type))
1958 return true;
1960 return false;
1963 return rhs_code == FIXED_CONVERT_EXPR;
1966 /* Return true if RHS is a suitable operand for a widening multiplication,
1967 assuming a target type of TYPE.
1968 There are two cases:
1970 - RHS makes some value at least twice as wide. Store that value
1971 in *NEW_RHS_OUT if so, and store its type in *TYPE_OUT.
1973 - RHS is an integer constant. Store that value in *NEW_RHS_OUT if so,
1974 but leave *TYPE_OUT untouched. */
1976 static bool
1977 is_widening_mult_rhs_p (tree type, tree rhs, tree *type_out,
1978 tree *new_rhs_out)
1980 gimple *stmt;
1981 tree type1, rhs1;
1983 if (TREE_CODE (rhs) == SSA_NAME)
1985 stmt = SSA_NAME_DEF_STMT (rhs);
1986 if (is_gimple_assign (stmt))
1988 if (! widening_mult_conversion_strippable_p (type, stmt))
1989 rhs1 = rhs;
1990 else
1992 rhs1 = gimple_assign_rhs1 (stmt);
1994 if (TREE_CODE (rhs1) == INTEGER_CST)
1996 *new_rhs_out = rhs1;
1997 *type_out = NULL;
1998 return true;
2002 else
2003 rhs1 = rhs;
2005 type1 = TREE_TYPE (rhs1);
2007 if (TREE_CODE (type1) != TREE_CODE (type)
2008 || TYPE_PRECISION (type1) * 2 > TYPE_PRECISION (type))
2009 return false;
2011 *new_rhs_out = rhs1;
2012 *type_out = type1;
2013 return true;
2016 if (TREE_CODE (rhs) == INTEGER_CST)
2018 *new_rhs_out = rhs;
2019 *type_out = NULL;
2020 return true;
2023 return false;
2026 /* Return true if STMT performs a widening multiplication, assuming the
2027 output type is TYPE. If so, store the unwidened types of the operands
2028 in *TYPE1_OUT and *TYPE2_OUT respectively. Also fill *RHS1_OUT and
2029 *RHS2_OUT such that converting those operands to types *TYPE1_OUT
2030 and *TYPE2_OUT would give the operands of the multiplication. */
2032 static bool
2033 is_widening_mult_p (gimple *stmt,
2034 tree *type1_out, tree *rhs1_out,
2035 tree *type2_out, tree *rhs2_out)
2037 tree type = TREE_TYPE (gimple_assign_lhs (stmt));
2039 if (TREE_CODE (type) != INTEGER_TYPE
2040 && TREE_CODE (type) != FIXED_POINT_TYPE)
2041 return false;
2043 if (!is_widening_mult_rhs_p (type, gimple_assign_rhs1 (stmt), type1_out,
2044 rhs1_out))
2045 return false;
2047 if (!is_widening_mult_rhs_p (type, gimple_assign_rhs2 (stmt), type2_out,
2048 rhs2_out))
2049 return false;
2051 if (*type1_out == NULL)
2053 if (*type2_out == NULL || !int_fits_type_p (*rhs1_out, *type2_out))
2054 return false;
2055 *type1_out = *type2_out;
2058 if (*type2_out == NULL)
2060 if (!int_fits_type_p (*rhs2_out, *type1_out))
2061 return false;
2062 *type2_out = *type1_out;
2065 /* Ensure that the larger of the two operands comes first. */
2066 if (TYPE_PRECISION (*type1_out) < TYPE_PRECISION (*type2_out))
2068 std::swap (*type1_out, *type2_out);
2069 std::swap (*rhs1_out, *rhs2_out);
2072 return true;
2075 /* Check to see if the CALL statement is an invocation of copysign
2076 with 1. being the first argument. */
2077 static bool
2078 is_copysign_call_with_1 (gimple *call)
2080 gcall *c = dyn_cast <gcall *> (call);
2081 if (! c)
2082 return false;
2084 enum combined_fn code = gimple_call_combined_fn (c);
2086 if (code == CFN_LAST)
2087 return false;
2089 if (builtin_fn_p (code))
2091 switch (as_builtin_fn (code))
2093 CASE_FLT_FN (BUILT_IN_COPYSIGN):
2094 CASE_FLT_FN_FLOATN_NX (BUILT_IN_COPYSIGN):
2095 return real_onep (gimple_call_arg (c, 0));
2096 default:
2097 return false;
2101 if (internal_fn_p (code))
2103 switch (as_internal_fn (code))
2105 case IFN_COPYSIGN:
2106 return real_onep (gimple_call_arg (c, 0));
2107 default:
2108 return false;
2112 return false;
2115 /* Try to expand the pattern x * copysign (1, y) into xorsign (x, y).
2116 This only happens when the the xorsign optab is defined, if the
2117 pattern is not a xorsign pattern or if expansion fails FALSE is
2118 returned, otherwise TRUE is returned. */
2119 static bool
2120 convert_expand_mult_copysign (gimple *stmt, gimple_stmt_iterator *gsi)
2122 tree treeop0, treeop1, lhs, type;
2123 location_t loc = gimple_location (stmt);
2124 lhs = gimple_assign_lhs (stmt);
2125 treeop0 = gimple_assign_rhs1 (stmt);
2126 treeop1 = gimple_assign_rhs2 (stmt);
2127 type = TREE_TYPE (lhs);
2128 machine_mode mode = TYPE_MODE (type);
2130 if (HONOR_SNANS (type))
2131 return false;
2133 if (TREE_CODE (treeop0) == SSA_NAME && TREE_CODE (treeop1) == SSA_NAME)
2135 gimple *call0 = SSA_NAME_DEF_STMT (treeop0);
2136 if (!has_single_use (treeop0) || !is_copysign_call_with_1 (call0))
2138 call0 = SSA_NAME_DEF_STMT (treeop1);
2139 if (!has_single_use (treeop1) || !is_copysign_call_with_1 (call0))
2140 return false;
2142 treeop1 = treeop0;
2144 if (optab_handler (xorsign_optab, mode) == CODE_FOR_nothing)
2145 return false;
2147 gcall *c = as_a<gcall*> (call0);
2148 treeop0 = gimple_call_arg (c, 1);
2150 gcall *call_stmt
2151 = gimple_build_call_internal (IFN_XORSIGN, 2, treeop1, treeop0);
2152 gimple_set_lhs (call_stmt, lhs);
2153 gimple_set_location (call_stmt, loc);
2154 gsi_replace (gsi, call_stmt, true);
2155 return true;
2158 return false;
2161 /* Process a single gimple statement STMT, which has a MULT_EXPR as
2162 its rhs, and try to convert it into a WIDEN_MULT_EXPR. The return
2163 value is true iff we converted the statement. */
2165 static bool
2166 convert_mult_to_widen (gimple *stmt, gimple_stmt_iterator *gsi)
2168 tree lhs, rhs1, rhs2, type, type1, type2;
2169 enum insn_code handler;
2170 scalar_int_mode to_mode, from_mode, actual_mode;
2171 optab op;
2172 int actual_precision;
2173 location_t loc = gimple_location (stmt);
2174 bool from_unsigned1, from_unsigned2;
2176 lhs = gimple_assign_lhs (stmt);
2177 type = TREE_TYPE (lhs);
2178 if (TREE_CODE (type) != INTEGER_TYPE)
2179 return false;
2181 if (!is_widening_mult_p (stmt, &type1, &rhs1, &type2, &rhs2))
2182 return false;
2184 to_mode = SCALAR_INT_TYPE_MODE (type);
2185 from_mode = SCALAR_INT_TYPE_MODE (type1);
2186 if (to_mode == from_mode)
2187 return false;
2189 from_unsigned1 = TYPE_UNSIGNED (type1);
2190 from_unsigned2 = TYPE_UNSIGNED (type2);
2192 if (from_unsigned1 && from_unsigned2)
2193 op = umul_widen_optab;
2194 else if (!from_unsigned1 && !from_unsigned2)
2195 op = smul_widen_optab;
2196 else
2197 op = usmul_widen_optab;
2199 handler = find_widening_optab_handler_and_mode (op, to_mode, from_mode,
2200 &actual_mode);
2202 if (handler == CODE_FOR_nothing)
2204 if (op != smul_widen_optab)
2206 /* We can use a signed multiply with unsigned types as long as
2207 there is a wider mode to use, or it is the smaller of the two
2208 types that is unsigned. Note that type1 >= type2, always. */
2209 if ((TYPE_UNSIGNED (type1)
2210 && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode))
2211 || (TYPE_UNSIGNED (type2)
2212 && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode)))
2214 if (!GET_MODE_WIDER_MODE (from_mode).exists (&from_mode)
2215 || GET_MODE_SIZE (to_mode) <= GET_MODE_SIZE (from_mode))
2216 return false;
2219 op = smul_widen_optab;
2220 handler = find_widening_optab_handler_and_mode (op, to_mode,
2221 from_mode,
2222 &actual_mode);
2224 if (handler == CODE_FOR_nothing)
2225 return false;
2227 from_unsigned1 = from_unsigned2 = false;
2229 else
2230 return false;
2233 /* Ensure that the inputs to the handler are in the correct precison
2234 for the opcode. This will be the full mode size. */
2235 actual_precision = GET_MODE_PRECISION (actual_mode);
2236 if (2 * actual_precision > TYPE_PRECISION (type))
2237 return false;
2238 if (actual_precision != TYPE_PRECISION (type1)
2239 || from_unsigned1 != TYPE_UNSIGNED (type1))
2240 rhs1 = build_and_insert_cast (gsi, loc,
2241 build_nonstandard_integer_type
2242 (actual_precision, from_unsigned1), rhs1);
2243 if (actual_precision != TYPE_PRECISION (type2)
2244 || from_unsigned2 != TYPE_UNSIGNED (type2))
2245 rhs2 = build_and_insert_cast (gsi, loc,
2246 build_nonstandard_integer_type
2247 (actual_precision, from_unsigned2), rhs2);
2249 /* Handle constants. */
2250 if (TREE_CODE (rhs1) == INTEGER_CST)
2251 rhs1 = fold_convert (type1, rhs1);
2252 if (TREE_CODE (rhs2) == INTEGER_CST)
2253 rhs2 = fold_convert (type2, rhs2);
2255 gimple_assign_set_rhs1 (stmt, rhs1);
2256 gimple_assign_set_rhs2 (stmt, rhs2);
2257 gimple_assign_set_rhs_code (stmt, WIDEN_MULT_EXPR);
2258 update_stmt (stmt);
2259 widen_mul_stats.widen_mults_inserted++;
2260 return true;
2263 /* Process a single gimple statement STMT, which is found at the
2264 iterator GSI and has a either a PLUS_EXPR or a MINUS_EXPR as its
2265 rhs (given by CODE), and try to convert it into a
2266 WIDEN_MULT_PLUS_EXPR or a WIDEN_MULT_MINUS_EXPR. The return value
2267 is true iff we converted the statement. */
2269 static bool
2270 convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple *stmt,
2271 enum tree_code code)
2273 gimple *rhs1_stmt = NULL, *rhs2_stmt = NULL;
2274 gimple *conv1_stmt = NULL, *conv2_stmt = NULL, *conv_stmt;
2275 tree type, type1, type2, optype;
2276 tree lhs, rhs1, rhs2, mult_rhs1, mult_rhs2, add_rhs;
2277 enum tree_code rhs1_code = ERROR_MARK, rhs2_code = ERROR_MARK;
2278 optab this_optab;
2279 enum tree_code wmult_code;
2280 enum insn_code handler;
2281 scalar_mode to_mode, from_mode, actual_mode;
2282 location_t loc = gimple_location (stmt);
2283 int actual_precision;
2284 bool from_unsigned1, from_unsigned2;
2286 lhs = gimple_assign_lhs (stmt);
2287 type = TREE_TYPE (lhs);
2288 if (TREE_CODE (type) != INTEGER_TYPE
2289 && TREE_CODE (type) != FIXED_POINT_TYPE)
2290 return false;
2292 if (code == MINUS_EXPR)
2293 wmult_code = WIDEN_MULT_MINUS_EXPR;
2294 else
2295 wmult_code = WIDEN_MULT_PLUS_EXPR;
2297 rhs1 = gimple_assign_rhs1 (stmt);
2298 rhs2 = gimple_assign_rhs2 (stmt);
2300 if (TREE_CODE (rhs1) == SSA_NAME)
2302 rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
2303 if (is_gimple_assign (rhs1_stmt))
2304 rhs1_code = gimple_assign_rhs_code (rhs1_stmt);
2307 if (TREE_CODE (rhs2) == SSA_NAME)
2309 rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
2310 if (is_gimple_assign (rhs2_stmt))
2311 rhs2_code = gimple_assign_rhs_code (rhs2_stmt);
2314 /* Allow for one conversion statement between the multiply
2315 and addition/subtraction statement. If there are more than
2316 one conversions then we assume they would invalidate this
2317 transformation. If that's not the case then they should have
2318 been folded before now. */
2319 if (CONVERT_EXPR_CODE_P (rhs1_code))
2321 conv1_stmt = rhs1_stmt;
2322 rhs1 = gimple_assign_rhs1 (rhs1_stmt);
2323 if (TREE_CODE (rhs1) == SSA_NAME)
2325 rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
2326 if (is_gimple_assign (rhs1_stmt))
2327 rhs1_code = gimple_assign_rhs_code (rhs1_stmt);
2329 else
2330 return false;
2332 if (CONVERT_EXPR_CODE_P (rhs2_code))
2334 conv2_stmt = rhs2_stmt;
2335 rhs2 = gimple_assign_rhs1 (rhs2_stmt);
2336 if (TREE_CODE (rhs2) == SSA_NAME)
2338 rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
2339 if (is_gimple_assign (rhs2_stmt))
2340 rhs2_code = gimple_assign_rhs_code (rhs2_stmt);
2342 else
2343 return false;
2346 /* If code is WIDEN_MULT_EXPR then it would seem unnecessary to call
2347 is_widening_mult_p, but we still need the rhs returns.
2349 It might also appear that it would be sufficient to use the existing
2350 operands of the widening multiply, but that would limit the choice of
2351 multiply-and-accumulate instructions.
2353 If the widened-multiplication result has more than one uses, it is
2354 probably wiser not to do the conversion. */
2355 if (code == PLUS_EXPR
2356 && (rhs1_code == MULT_EXPR || rhs1_code == WIDEN_MULT_EXPR))
2358 if (!has_single_use (rhs1)
2359 || !is_widening_mult_p (rhs1_stmt, &type1, &mult_rhs1,
2360 &type2, &mult_rhs2))
2361 return false;
2362 add_rhs = rhs2;
2363 conv_stmt = conv1_stmt;
2365 else if (rhs2_code == MULT_EXPR || rhs2_code == WIDEN_MULT_EXPR)
2367 if (!has_single_use (rhs2)
2368 || !is_widening_mult_p (rhs2_stmt, &type1, &mult_rhs1,
2369 &type2, &mult_rhs2))
2370 return false;
2371 add_rhs = rhs1;
2372 conv_stmt = conv2_stmt;
2374 else
2375 return false;
2377 to_mode = SCALAR_TYPE_MODE (type);
2378 from_mode = SCALAR_TYPE_MODE (type1);
2379 if (to_mode == from_mode)
2380 return false;
2382 from_unsigned1 = TYPE_UNSIGNED (type1);
2383 from_unsigned2 = TYPE_UNSIGNED (type2);
2384 optype = type1;
2386 /* There's no such thing as a mixed sign madd yet, so use a wider mode. */
2387 if (from_unsigned1 != from_unsigned2)
2389 if (!INTEGRAL_TYPE_P (type))
2390 return false;
2391 /* We can use a signed multiply with unsigned types as long as
2392 there is a wider mode to use, or it is the smaller of the two
2393 types that is unsigned. Note that type1 >= type2, always. */
2394 if ((from_unsigned1
2395 && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode))
2396 || (from_unsigned2
2397 && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode)))
2399 if (!GET_MODE_WIDER_MODE (from_mode).exists (&from_mode)
2400 || GET_MODE_SIZE (from_mode) >= GET_MODE_SIZE (to_mode))
2401 return false;
2404 from_unsigned1 = from_unsigned2 = false;
2405 optype = build_nonstandard_integer_type (GET_MODE_PRECISION (from_mode),
2406 false);
2409 /* If there was a conversion between the multiply and addition
2410 then we need to make sure it fits a multiply-and-accumulate.
2411 The should be a single mode change which does not change the
2412 value. */
2413 if (conv_stmt)
2415 /* We use the original, unmodified data types for this. */
2416 tree from_type = TREE_TYPE (gimple_assign_rhs1 (conv_stmt));
2417 tree to_type = TREE_TYPE (gimple_assign_lhs (conv_stmt));
2418 int data_size = TYPE_PRECISION (type1) + TYPE_PRECISION (type2);
2419 bool is_unsigned = TYPE_UNSIGNED (type1) && TYPE_UNSIGNED (type2);
2421 if (TYPE_PRECISION (from_type) > TYPE_PRECISION (to_type))
2423 /* Conversion is a truncate. */
2424 if (TYPE_PRECISION (to_type) < data_size)
2425 return false;
2427 else if (TYPE_PRECISION (from_type) < TYPE_PRECISION (to_type))
2429 /* Conversion is an extend. Check it's the right sort. */
2430 if (TYPE_UNSIGNED (from_type) != is_unsigned
2431 && !(is_unsigned && TYPE_PRECISION (from_type) > data_size))
2432 return false;
2434 /* else convert is a no-op for our purposes. */
2437 /* Verify that the machine can perform a widening multiply
2438 accumulate in this mode/signedness combination, otherwise
2439 this transformation is likely to pessimize code. */
2440 this_optab = optab_for_tree_code (wmult_code, optype, optab_default);
2441 handler = find_widening_optab_handler_and_mode (this_optab, to_mode,
2442 from_mode, &actual_mode);
2444 if (handler == CODE_FOR_nothing)
2445 return false;
2447 /* Ensure that the inputs to the handler are in the correct precison
2448 for the opcode. This will be the full mode size. */
2449 actual_precision = GET_MODE_PRECISION (actual_mode);
2450 if (actual_precision != TYPE_PRECISION (type1)
2451 || from_unsigned1 != TYPE_UNSIGNED (type1))
2452 mult_rhs1 = build_and_insert_cast (gsi, loc,
2453 build_nonstandard_integer_type
2454 (actual_precision, from_unsigned1),
2455 mult_rhs1);
2456 if (actual_precision != TYPE_PRECISION (type2)
2457 || from_unsigned2 != TYPE_UNSIGNED (type2))
2458 mult_rhs2 = build_and_insert_cast (gsi, loc,
2459 build_nonstandard_integer_type
2460 (actual_precision, from_unsigned2),
2461 mult_rhs2);
2463 if (!useless_type_conversion_p (type, TREE_TYPE (add_rhs)))
2464 add_rhs = build_and_insert_cast (gsi, loc, type, add_rhs);
2466 /* Handle constants. */
2467 if (TREE_CODE (mult_rhs1) == INTEGER_CST)
2468 mult_rhs1 = fold_convert (type1, mult_rhs1);
2469 if (TREE_CODE (mult_rhs2) == INTEGER_CST)
2470 mult_rhs2 = fold_convert (type2, mult_rhs2);
2472 gimple_assign_set_rhs_with_ops (gsi, wmult_code, mult_rhs1, mult_rhs2,
2473 add_rhs);
2474 update_stmt (gsi_stmt (*gsi));
2475 widen_mul_stats.maccs_inserted++;
2476 return true;
2479 /* Combine the multiplication at MUL_STMT with operands MULOP1 and MULOP2
2480 with uses in additions and subtractions to form fused multiply-add
2481 operations. Returns true if successful and MUL_STMT should be removed. */
2483 static bool
2484 convert_mult_to_fma (gimple *mul_stmt, tree op1, tree op2)
2486 tree mul_result = gimple_get_lhs (mul_stmt);
2487 tree type = TREE_TYPE (mul_result);
2488 gimple *use_stmt, *neguse_stmt;
2489 gassign *fma_stmt;
2490 use_operand_p use_p;
2491 imm_use_iterator imm_iter;
2493 if (FLOAT_TYPE_P (type)
2494 && flag_fp_contract_mode == FP_CONTRACT_OFF)
2495 return false;
2497 /* We don't want to do bitfield reduction ops. */
2498 if (INTEGRAL_TYPE_P (type)
2499 && !type_has_mode_precision_p (type))
2500 return false;
2502 /* If the target doesn't support it, don't generate it. We assume that
2503 if fma isn't available then fms, fnma or fnms are not either. */
2504 if (optab_handler (fma_optab, TYPE_MODE (type)) == CODE_FOR_nothing)
2505 return false;
2507 /* If the multiplication has zero uses, it is kept around probably because
2508 of -fnon-call-exceptions. Don't optimize it away in that case,
2509 it is DCE job. */
2510 if (has_zero_uses (mul_result))
2511 return false;
2513 /* Make sure that the multiplication statement becomes dead after
2514 the transformation, thus that all uses are transformed to FMAs.
2515 This means we assume that an FMA operation has the same cost
2516 as an addition. */
2517 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, mul_result)
2519 enum tree_code use_code;
2520 tree result = mul_result;
2521 bool negate_p = false;
2523 use_stmt = USE_STMT (use_p);
2525 if (is_gimple_debug (use_stmt))
2526 continue;
2528 /* For now restrict this operations to single basic blocks. In theory
2529 we would want to support sinking the multiplication in
2530 m = a*b;
2531 if ()
2532 ma = m + c;
2533 else
2534 d = m;
2535 to form a fma in the then block and sink the multiplication to the
2536 else block. */
2537 if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
2538 return false;
2540 if (!is_gimple_assign (use_stmt))
2541 return false;
2543 use_code = gimple_assign_rhs_code (use_stmt);
2545 /* A negate on the multiplication leads to FNMA. */
2546 if (use_code == NEGATE_EXPR)
2548 ssa_op_iter iter;
2549 use_operand_p usep;
2551 result = gimple_assign_lhs (use_stmt);
2553 /* Make sure the negate statement becomes dead with this
2554 single transformation. */
2555 if (!single_imm_use (gimple_assign_lhs (use_stmt),
2556 &use_p, &neguse_stmt))
2557 return false;
2559 /* Make sure the multiplication isn't also used on that stmt. */
2560 FOR_EACH_PHI_OR_STMT_USE (usep, neguse_stmt, iter, SSA_OP_USE)
2561 if (USE_FROM_PTR (usep) == mul_result)
2562 return false;
2564 /* Re-validate. */
2565 use_stmt = neguse_stmt;
2566 if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
2567 return false;
2568 if (!is_gimple_assign (use_stmt))
2569 return false;
2571 use_code = gimple_assign_rhs_code (use_stmt);
2572 negate_p = true;
2575 switch (use_code)
2577 case MINUS_EXPR:
2578 if (gimple_assign_rhs2 (use_stmt) == result)
2579 negate_p = !negate_p;
2580 break;
2581 case PLUS_EXPR:
2582 break;
2583 default:
2584 /* FMA can only be formed from PLUS and MINUS. */
2585 return false;
2588 /* If the subtrahend (gimple_assign_rhs2 (use_stmt)) is computed
2589 by a MULT_EXPR that we'll visit later, we might be able to
2590 get a more profitable match with fnma.
2591 OTOH, if we don't, a negate / fma pair has likely lower latency
2592 that a mult / subtract pair. */
2593 if (use_code == MINUS_EXPR && !negate_p
2594 && gimple_assign_rhs1 (use_stmt) == result
2595 && optab_handler (fms_optab, TYPE_MODE (type)) == CODE_FOR_nothing
2596 && optab_handler (fnma_optab, TYPE_MODE (type)) != CODE_FOR_nothing)
2598 tree rhs2 = gimple_assign_rhs2 (use_stmt);
2600 if (TREE_CODE (rhs2) == SSA_NAME)
2602 gimple *stmt2 = SSA_NAME_DEF_STMT (rhs2);
2603 if (has_single_use (rhs2)
2604 && is_gimple_assign (stmt2)
2605 && gimple_assign_rhs_code (stmt2) == MULT_EXPR)
2606 return false;
2610 /* We can't handle a * b + a * b. */
2611 if (gimple_assign_rhs1 (use_stmt) == gimple_assign_rhs2 (use_stmt))
2612 return false;
2614 /* While it is possible to validate whether or not the exact form
2615 that we've recognized is available in the backend, the assumption
2616 is that the transformation is never a loss. For instance, suppose
2617 the target only has the plain FMA pattern available. Consider
2618 a*b-c -> fma(a,b,-c): we've exchanged MUL+SUB for FMA+NEG, which
2619 is still two operations. Consider -(a*b)-c -> fma(-a,b,-c): we
2620 still have 3 operations, but in the FMA form the two NEGs are
2621 independent and could be run in parallel. */
2624 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, mul_result)
2626 gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
2627 enum tree_code use_code;
2628 tree addop, mulop1 = op1, result = mul_result;
2629 bool negate_p = false;
2631 if (is_gimple_debug (use_stmt))
2632 continue;
2634 use_code = gimple_assign_rhs_code (use_stmt);
2635 if (use_code == NEGATE_EXPR)
2637 result = gimple_assign_lhs (use_stmt);
2638 single_imm_use (gimple_assign_lhs (use_stmt), &use_p, &neguse_stmt);
2639 gsi_remove (&gsi, true);
2640 release_defs (use_stmt);
2642 use_stmt = neguse_stmt;
2643 gsi = gsi_for_stmt (use_stmt);
2644 use_code = gimple_assign_rhs_code (use_stmt);
2645 negate_p = true;
2648 if (gimple_assign_rhs1 (use_stmt) == result)
2650 addop = gimple_assign_rhs2 (use_stmt);
2651 /* a * b - c -> a * b + (-c) */
2652 if (gimple_assign_rhs_code (use_stmt) == MINUS_EXPR)
2653 addop = force_gimple_operand_gsi (&gsi,
2654 build1 (NEGATE_EXPR,
2655 type, addop),
2656 true, NULL_TREE, true,
2657 GSI_SAME_STMT);
2659 else
2661 addop = gimple_assign_rhs1 (use_stmt);
2662 /* a - b * c -> (-b) * c + a */
2663 if (gimple_assign_rhs_code (use_stmt) == MINUS_EXPR)
2664 negate_p = !negate_p;
2667 if (negate_p)
2668 mulop1 = force_gimple_operand_gsi (&gsi,
2669 build1 (NEGATE_EXPR,
2670 type, mulop1),
2671 true, NULL_TREE, true,
2672 GSI_SAME_STMT);
2674 fma_stmt = gimple_build_assign (gimple_assign_lhs (use_stmt),
2675 FMA_EXPR, mulop1, op2, addop);
2676 gsi_replace (&gsi, fma_stmt, true);
2677 widen_mul_stats.fmas_inserted++;
2680 return true;
2684 /* Helper function of match_uaddsub_overflow. Return 1
2685 if USE_STMT is unsigned overflow check ovf != 0 for
2686 STMT, -1 if USE_STMT is unsigned overflow check ovf == 0
2687 and 0 otherwise. */
2689 static int
2690 uaddsub_overflow_check_p (gimple *stmt, gimple *use_stmt)
2692 enum tree_code ccode = ERROR_MARK;
2693 tree crhs1 = NULL_TREE, crhs2 = NULL_TREE;
2694 if (gimple_code (use_stmt) == GIMPLE_COND)
2696 ccode = gimple_cond_code (use_stmt);
2697 crhs1 = gimple_cond_lhs (use_stmt);
2698 crhs2 = gimple_cond_rhs (use_stmt);
2700 else if (is_gimple_assign (use_stmt))
2702 if (gimple_assign_rhs_class (use_stmt) == GIMPLE_BINARY_RHS)
2704 ccode = gimple_assign_rhs_code (use_stmt);
2705 crhs1 = gimple_assign_rhs1 (use_stmt);
2706 crhs2 = gimple_assign_rhs2 (use_stmt);
2708 else if (gimple_assign_rhs_code (use_stmt) == COND_EXPR)
2710 tree cond = gimple_assign_rhs1 (use_stmt);
2711 if (COMPARISON_CLASS_P (cond))
2713 ccode = TREE_CODE (cond);
2714 crhs1 = TREE_OPERAND (cond, 0);
2715 crhs2 = TREE_OPERAND (cond, 1);
2717 else
2718 return 0;
2720 else
2721 return 0;
2723 else
2724 return 0;
2726 if (TREE_CODE_CLASS (ccode) != tcc_comparison)
2727 return 0;
2729 enum tree_code code = gimple_assign_rhs_code (stmt);
2730 tree lhs = gimple_assign_lhs (stmt);
2731 tree rhs1 = gimple_assign_rhs1 (stmt);
2732 tree rhs2 = gimple_assign_rhs2 (stmt);
2734 switch (ccode)
2736 case GT_EXPR:
2737 case LE_EXPR:
2738 /* r = a - b; r > a or r <= a
2739 r = a + b; a > r or a <= r or b > r or b <= r. */
2740 if ((code == MINUS_EXPR && crhs1 == lhs && crhs2 == rhs1)
2741 || (code == PLUS_EXPR && (crhs1 == rhs1 || crhs1 == rhs2)
2742 && crhs2 == lhs))
2743 return ccode == GT_EXPR ? 1 : -1;
2744 break;
2745 case LT_EXPR:
2746 case GE_EXPR:
2747 /* r = a - b; a < r or a >= r
2748 r = a + b; r < a or r >= a or r < b or r >= b. */
2749 if ((code == MINUS_EXPR && crhs1 == rhs1 && crhs2 == lhs)
2750 || (code == PLUS_EXPR && crhs1 == lhs
2751 && (crhs2 == rhs1 || crhs2 == rhs2)))
2752 return ccode == LT_EXPR ? 1 : -1;
2753 break;
2754 default:
2755 break;
2757 return 0;
2760 /* Recognize for unsigned x
2761 x = y - z;
2762 if (x > y)
2763 where there are other uses of x and replace it with
2764 _7 = SUB_OVERFLOW (y, z);
2765 x = REALPART_EXPR <_7>;
2766 _8 = IMAGPART_EXPR <_7>;
2767 if (_8)
2768 and similarly for addition. */
2770 static bool
2771 match_uaddsub_overflow (gimple_stmt_iterator *gsi, gimple *stmt,
2772 enum tree_code code)
2774 tree lhs = gimple_assign_lhs (stmt);
2775 tree type = TREE_TYPE (lhs);
2776 use_operand_p use_p;
2777 imm_use_iterator iter;
2778 bool use_seen = false;
2779 bool ovf_use_seen = false;
2780 gimple *use_stmt;
2782 gcc_checking_assert (code == PLUS_EXPR || code == MINUS_EXPR);
2783 if (!INTEGRAL_TYPE_P (type)
2784 || !TYPE_UNSIGNED (type)
2785 || has_zero_uses (lhs)
2786 || has_single_use (lhs)
2787 || optab_handler (code == PLUS_EXPR ? uaddv4_optab : usubv4_optab,
2788 TYPE_MODE (type)) == CODE_FOR_nothing)
2789 return false;
2791 FOR_EACH_IMM_USE_FAST (use_p, iter, lhs)
2793 use_stmt = USE_STMT (use_p);
2794 if (is_gimple_debug (use_stmt))
2795 continue;
2797 if (uaddsub_overflow_check_p (stmt, use_stmt))
2798 ovf_use_seen = true;
2799 else
2800 use_seen = true;
2801 if (ovf_use_seen && use_seen)
2802 break;
2805 if (!ovf_use_seen || !use_seen)
2806 return false;
2808 tree ctype = build_complex_type (type);
2809 tree rhs1 = gimple_assign_rhs1 (stmt);
2810 tree rhs2 = gimple_assign_rhs2 (stmt);
2811 gcall *g = gimple_build_call_internal (code == PLUS_EXPR
2812 ? IFN_ADD_OVERFLOW : IFN_SUB_OVERFLOW,
2813 2, rhs1, rhs2);
2814 tree ctmp = make_ssa_name (ctype);
2815 gimple_call_set_lhs (g, ctmp);
2816 gsi_insert_before (gsi, g, GSI_SAME_STMT);
2817 gassign *g2 = gimple_build_assign (lhs, REALPART_EXPR,
2818 build1 (REALPART_EXPR, type, ctmp));
2819 gsi_replace (gsi, g2, true);
2820 tree ovf = make_ssa_name (type);
2821 g2 = gimple_build_assign (ovf, IMAGPART_EXPR,
2822 build1 (IMAGPART_EXPR, type, ctmp));
2823 gsi_insert_after (gsi, g2, GSI_NEW_STMT);
2825 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
2827 if (is_gimple_debug (use_stmt))
2828 continue;
2830 int ovf_use = uaddsub_overflow_check_p (stmt, use_stmt);
2831 if (ovf_use == 0)
2832 continue;
2833 if (gimple_code (use_stmt) == GIMPLE_COND)
2835 gcond *cond_stmt = as_a <gcond *> (use_stmt);
2836 gimple_cond_set_lhs (cond_stmt, ovf);
2837 gimple_cond_set_rhs (cond_stmt, build_int_cst (type, 0));
2838 gimple_cond_set_code (cond_stmt, ovf_use == 1 ? NE_EXPR : EQ_EXPR);
2840 else
2842 gcc_checking_assert (is_gimple_assign (use_stmt));
2843 if (gimple_assign_rhs_class (use_stmt) == GIMPLE_BINARY_RHS)
2845 gimple_assign_set_rhs1 (use_stmt, ovf);
2846 gimple_assign_set_rhs2 (use_stmt, build_int_cst (type, 0));
2847 gimple_assign_set_rhs_code (use_stmt,
2848 ovf_use == 1 ? NE_EXPR : EQ_EXPR);
2850 else
2852 gcc_checking_assert (gimple_assign_rhs_code (use_stmt)
2853 == COND_EXPR);
2854 tree cond = build2 (ovf_use == 1 ? NE_EXPR : EQ_EXPR,
2855 boolean_type_node, ovf,
2856 build_int_cst (type, 0));
2857 gimple_assign_set_rhs1 (use_stmt, cond);
2860 update_stmt (use_stmt);
2862 return true;
2865 /* Return true if target has support for divmod. */
2867 static bool
2868 target_supports_divmod_p (optab divmod_optab, optab div_optab, machine_mode mode)
2870 /* If target supports hardware divmod insn, use it for divmod. */
2871 if (optab_handler (divmod_optab, mode) != CODE_FOR_nothing)
2872 return true;
2874 /* Check if libfunc for divmod is available. */
2875 rtx libfunc = optab_libfunc (divmod_optab, mode);
2876 if (libfunc != NULL_RTX)
2878 /* If optab_handler exists for div_optab, perhaps in a wider mode,
2879 we don't want to use the libfunc even if it exists for given mode. */
2880 machine_mode div_mode;
2881 FOR_EACH_MODE_FROM (div_mode, mode)
2882 if (optab_handler (div_optab, div_mode) != CODE_FOR_nothing)
2883 return false;
2885 return targetm.expand_divmod_libfunc != NULL;
2888 return false;
2891 /* Check if stmt is candidate for divmod transform. */
2893 static bool
2894 divmod_candidate_p (gassign *stmt)
2896 tree type = TREE_TYPE (gimple_assign_lhs (stmt));
2897 machine_mode mode = TYPE_MODE (type);
2898 optab divmod_optab, div_optab;
2900 if (TYPE_UNSIGNED (type))
2902 divmod_optab = udivmod_optab;
2903 div_optab = udiv_optab;
2905 else
2907 divmod_optab = sdivmod_optab;
2908 div_optab = sdiv_optab;
2911 tree op1 = gimple_assign_rhs1 (stmt);
2912 tree op2 = gimple_assign_rhs2 (stmt);
2914 /* Disable the transform if either is a constant, since division-by-constant
2915 may have specialized expansion. */
2916 if (CONSTANT_CLASS_P (op1) || CONSTANT_CLASS_P (op2))
2917 return false;
2919 /* Exclude the case where TYPE_OVERFLOW_TRAPS (type) as that should
2920 expand using the [su]divv optabs. */
2921 if (TYPE_OVERFLOW_TRAPS (type))
2922 return false;
2924 if (!target_supports_divmod_p (divmod_optab, div_optab, mode))
2925 return false;
2927 return true;
2930 /* This function looks for:
2931 t1 = a TRUNC_DIV_EXPR b;
2932 t2 = a TRUNC_MOD_EXPR b;
2933 and transforms it to the following sequence:
2934 complex_tmp = DIVMOD (a, b);
2935 t1 = REALPART_EXPR(a);
2936 t2 = IMAGPART_EXPR(b);
2937 For conditions enabling the transform see divmod_candidate_p().
2939 The pass has three parts:
2940 1) Find top_stmt which is trunc_div or trunc_mod stmt and dominates all
2941 other trunc_div_expr and trunc_mod_expr stmts.
2942 2) Add top_stmt and all trunc_div and trunc_mod stmts dominated by top_stmt
2943 to stmts vector.
2944 3) Insert DIVMOD call just before top_stmt and update entries in
2945 stmts vector to use return value of DIMOVD (REALEXPR_PART for div,
2946 IMAGPART_EXPR for mod). */
2948 static bool
2949 convert_to_divmod (gassign *stmt)
2951 if (stmt_can_throw_internal (stmt)
2952 || !divmod_candidate_p (stmt))
2953 return false;
2955 tree op1 = gimple_assign_rhs1 (stmt);
2956 tree op2 = gimple_assign_rhs2 (stmt);
2958 imm_use_iterator use_iter;
2959 gimple *use_stmt;
2960 auto_vec<gimple *> stmts;
2962 gimple *top_stmt = stmt;
2963 basic_block top_bb = gimple_bb (stmt);
2965 /* Part 1: Try to set top_stmt to "topmost" stmt that dominates
2966 at-least stmt and possibly other trunc_div/trunc_mod stmts
2967 having same operands as stmt. */
2969 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, op1)
2971 if (is_gimple_assign (use_stmt)
2972 && (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR
2973 || gimple_assign_rhs_code (use_stmt) == TRUNC_MOD_EXPR)
2974 && operand_equal_p (op1, gimple_assign_rhs1 (use_stmt), 0)
2975 && operand_equal_p (op2, gimple_assign_rhs2 (use_stmt), 0))
2977 if (stmt_can_throw_internal (use_stmt))
2978 continue;
2980 basic_block bb = gimple_bb (use_stmt);
2982 if (bb == top_bb)
2984 if (gimple_uid (use_stmt) < gimple_uid (top_stmt))
2985 top_stmt = use_stmt;
2987 else if (dominated_by_p (CDI_DOMINATORS, top_bb, bb))
2989 top_bb = bb;
2990 top_stmt = use_stmt;
2995 tree top_op1 = gimple_assign_rhs1 (top_stmt);
2996 tree top_op2 = gimple_assign_rhs2 (top_stmt);
2998 stmts.safe_push (top_stmt);
2999 bool div_seen = (gimple_assign_rhs_code (top_stmt) == TRUNC_DIV_EXPR);
3001 /* Part 2: Add all trunc_div/trunc_mod statements domianted by top_bb
3002 to stmts vector. The 2nd loop will always add stmt to stmts vector, since
3003 gimple_bb (top_stmt) dominates gimple_bb (stmt), so the
3004 2nd loop ends up adding at-least single trunc_mod_expr stmt. */
3006 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, top_op1)
3008 if (is_gimple_assign (use_stmt)
3009 && (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR
3010 || gimple_assign_rhs_code (use_stmt) == TRUNC_MOD_EXPR)
3011 && operand_equal_p (top_op1, gimple_assign_rhs1 (use_stmt), 0)
3012 && operand_equal_p (top_op2, gimple_assign_rhs2 (use_stmt), 0))
3014 if (use_stmt == top_stmt
3015 || stmt_can_throw_internal (use_stmt)
3016 || !dominated_by_p (CDI_DOMINATORS, gimple_bb (use_stmt), top_bb))
3017 continue;
3019 stmts.safe_push (use_stmt);
3020 if (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR)
3021 div_seen = true;
3025 if (!div_seen)
3026 return false;
3028 /* Part 3: Create libcall to internal fn DIVMOD:
3029 divmod_tmp = DIVMOD (op1, op2). */
3031 gcall *call_stmt = gimple_build_call_internal (IFN_DIVMOD, 2, op1, op2);
3032 tree res = make_temp_ssa_name (build_complex_type (TREE_TYPE (op1)),
3033 call_stmt, "divmod_tmp");
3034 gimple_call_set_lhs (call_stmt, res);
3035 /* We rejected throwing statements above. */
3036 gimple_call_set_nothrow (call_stmt, true);
3038 /* Insert the call before top_stmt. */
3039 gimple_stmt_iterator top_stmt_gsi = gsi_for_stmt (top_stmt);
3040 gsi_insert_before (&top_stmt_gsi, call_stmt, GSI_SAME_STMT);
3042 widen_mul_stats.divmod_calls_inserted++;
3044 /* Update all statements in stmts vector:
3045 lhs = op1 TRUNC_DIV_EXPR op2 -> lhs = REALPART_EXPR<divmod_tmp>
3046 lhs = op1 TRUNC_MOD_EXPR op2 -> lhs = IMAGPART_EXPR<divmod_tmp>. */
3048 for (unsigned i = 0; stmts.iterate (i, &use_stmt); ++i)
3050 tree new_rhs;
3052 switch (gimple_assign_rhs_code (use_stmt))
3054 case TRUNC_DIV_EXPR:
3055 new_rhs = fold_build1 (REALPART_EXPR, TREE_TYPE (op1), res);
3056 break;
3058 case TRUNC_MOD_EXPR:
3059 new_rhs = fold_build1 (IMAGPART_EXPR, TREE_TYPE (op1), res);
3060 break;
3062 default:
3063 gcc_unreachable ();
3066 gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
3067 gimple_assign_set_rhs_from_tree (&gsi, new_rhs);
3068 update_stmt (use_stmt);
3071 return true;
3074 /* Find integer multiplications where the operands are extended from
3075 smaller types, and replace the MULT_EXPR with a WIDEN_MULT_EXPR
3076 where appropriate. */
3078 namespace {
3080 const pass_data pass_data_optimize_widening_mul =
3082 GIMPLE_PASS, /* type */
3083 "widening_mul", /* name */
3084 OPTGROUP_NONE, /* optinfo_flags */
3085 TV_NONE, /* tv_id */
3086 PROP_ssa, /* properties_required */
3087 0, /* properties_provided */
3088 0, /* properties_destroyed */
3089 0, /* todo_flags_start */
3090 TODO_update_ssa, /* todo_flags_finish */
3093 class pass_optimize_widening_mul : public gimple_opt_pass
3095 public:
3096 pass_optimize_widening_mul (gcc::context *ctxt)
3097 : gimple_opt_pass (pass_data_optimize_widening_mul, ctxt)
3100 /* opt_pass methods: */
3101 virtual bool gate (function *)
3103 return flag_expensive_optimizations && optimize;
3106 virtual unsigned int execute (function *);
3108 }; // class pass_optimize_widening_mul
3110 unsigned int
3111 pass_optimize_widening_mul::execute (function *fun)
3113 basic_block bb;
3114 bool cfg_changed = false;
3116 memset (&widen_mul_stats, 0, sizeof (widen_mul_stats));
3117 calculate_dominance_info (CDI_DOMINATORS);
3118 renumber_gimple_stmt_uids ();
3120 FOR_EACH_BB_FN (bb, fun)
3122 gimple_stmt_iterator gsi;
3124 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
3126 gimple *stmt = gsi_stmt (gsi);
3127 enum tree_code code;
3129 if (is_gimple_assign (stmt))
3131 code = gimple_assign_rhs_code (stmt);
3132 switch (code)
3134 case MULT_EXPR:
3135 if (!convert_mult_to_widen (stmt, &gsi)
3136 && !convert_expand_mult_copysign (stmt, &gsi)
3137 && convert_mult_to_fma (stmt,
3138 gimple_assign_rhs1 (stmt),
3139 gimple_assign_rhs2 (stmt)))
3141 gsi_remove (&gsi, true);
3142 release_defs (stmt);
3143 continue;
3145 break;
3147 case PLUS_EXPR:
3148 case MINUS_EXPR:
3149 if (!convert_plusminus_to_widen (&gsi, stmt, code))
3150 match_uaddsub_overflow (&gsi, stmt, code);
3151 break;
3153 case TRUNC_MOD_EXPR:
3154 convert_to_divmod (as_a<gassign *> (stmt));
3155 break;
3157 default:;
3160 else if (is_gimple_call (stmt)
3161 && gimple_call_lhs (stmt))
3163 tree fndecl = gimple_call_fndecl (stmt);
3164 if (fndecl
3165 && gimple_call_builtin_p (stmt, BUILT_IN_NORMAL))
3167 switch (DECL_FUNCTION_CODE (fndecl))
3169 case BUILT_IN_POWF:
3170 case BUILT_IN_POW:
3171 case BUILT_IN_POWL:
3172 if (TREE_CODE (gimple_call_arg (stmt, 1)) == REAL_CST
3173 && real_equal
3174 (&TREE_REAL_CST (gimple_call_arg (stmt, 1)),
3175 &dconst2)
3176 && convert_mult_to_fma (stmt,
3177 gimple_call_arg (stmt, 0),
3178 gimple_call_arg (stmt, 0)))
3180 unlink_stmt_vdef (stmt);
3181 if (gsi_remove (&gsi, true)
3182 && gimple_purge_dead_eh_edges (bb))
3183 cfg_changed = true;
3184 release_defs (stmt);
3185 continue;
3187 break;
3189 default:;
3193 gsi_next (&gsi);
3197 statistics_counter_event (fun, "widening multiplications inserted",
3198 widen_mul_stats.widen_mults_inserted);
3199 statistics_counter_event (fun, "widening maccs inserted",
3200 widen_mul_stats.maccs_inserted);
3201 statistics_counter_event (fun, "fused multiply-adds inserted",
3202 widen_mul_stats.fmas_inserted);
3203 statistics_counter_event (fun, "divmod calls inserted",
3204 widen_mul_stats.divmod_calls_inserted);
3206 return cfg_changed ? TODO_cleanup_cfg : 0;
3209 } // anon namespace
3211 gimple_opt_pass *
3212 make_pass_optimize_widening_mul (gcc::context *ctxt)
3214 return new pass_optimize_widening_mul (ctxt);