Merge branch 'master' r216746-r217593 into gimple-classes-v2-option-3
[official-gcc.git] / gcc / tree-ssa-math-opts.c
blobc80ea58848722b29386f6b2da1a11738e64cd6b7
1 /* Global, SSA-based optimizations using mathematical identities.
2 Copyright (C) 2005-2014 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* Currently, the only mini-pass in this file tries to CSE reciprocal
21 operations. These are common in sequences such as this one:
23 modulus = sqrt(x*x + y*y + z*z);
24 x = x / modulus;
25 y = y / modulus;
26 z = z / modulus;
28 that can be optimized to
30 modulus = sqrt(x*x + y*y + z*z);
31 rmodulus = 1.0 / modulus;
32 x = x * rmodulus;
33 y = y * rmodulus;
34 z = z * rmodulus;
36 We do this for loop invariant divisors, and with this pass whenever
37 we notice that a division has the same divisor multiple times.
39 Of course, like in PRE, we don't insert a division if a dominator
40 already has one. However, this cannot be done as an extension of
41 PRE for several reasons.
43 First of all, with some experiments it was found out that the
44 transformation is not always useful if there are only two divisions
45 hy the same divisor. This is probably because modern processors
46 can pipeline the divisions; on older, in-order processors it should
47 still be effective to optimize two divisions by the same number.
48 We make this a param, and it shall be called N in the remainder of
49 this comment.
51 Second, if trapping math is active, we have less freedom on where
52 to insert divisions: we can only do so in basic blocks that already
53 contain one. (If divisions don't trap, instead, we can insert
54 divisions elsewhere, which will be in blocks that are common dominators
55 of those that have the division).
57 We really don't want to compute the reciprocal unless a division will
58 be found. To do this, we won't insert the division in a basic block
59 that has less than N divisions *post-dominating* it.
61 The algorithm constructs a subset of the dominator tree, holding the
62 blocks containing the divisions and the common dominators to them,
63 and walk it twice. The first walk is in post-order, and it annotates
64 each block with the number of divisions that post-dominate it: this
65 gives information on where divisions can be inserted profitably.
66 The second walk is in pre-order, and it inserts divisions as explained
67 above, and replaces divisions by multiplications.
69 In the best case, the cost of the pass is O(n_statements). In the
70 worst-case, the cost is due to creating the dominator tree subset,
71 with a cost of O(n_basic_blocks ^ 2); however this can only happen
72 for n_statements / n_basic_blocks statements. So, the amortized cost
73 of creating the dominator tree subset is O(n_basic_blocks) and the
74 worst-case cost of the pass is O(n_statements * n_basic_blocks).
76 More practically, the cost will be small because there are few
77 divisions, and they tend to be in the same basic block, so insert_bb
78 is called very few times.
80 If we did this using domwalk.c, an efficient implementation would have
81 to work on all the variables in a single pass, because we could not
82 work on just a subset of the dominator tree, as we do now, and the
83 cost would also be something like O(n_statements * n_basic_blocks).
84 The data structures would be more complex in order to work on all the
85 variables in a single pass. */
87 #include "config.h"
88 #include "system.h"
89 #include "coretypes.h"
90 #include "tm.h"
91 #include "flags.h"
92 #include "tree.h"
93 #include "predict.h"
94 #include "vec.h"
95 #include "hashtab.h"
96 #include "hash-set.h"
97 #include "machmode.h"
98 #include "hard-reg-set.h"
99 #include "input.h"
100 #include "function.h"
101 #include "dominance.h"
102 #include "cfg.h"
103 #include "basic-block.h"
104 #include "tree-ssa-alias.h"
105 #include "internal-fn.h"
106 #include "gimple-fold.h"
107 #include "gimple-expr.h"
108 #include "is-a.h"
109 #include "gimple.h"
110 #include "gimple-iterator.h"
111 #include "gimplify.h"
112 #include "gimplify-me.h"
113 #include "stor-layout.h"
114 #include "gimple-ssa.h"
115 #include "tree-cfg.h"
116 #include "tree-phinodes.h"
117 #include "ssa-iterators.h"
118 #include "stringpool.h"
119 #include "tree-ssanames.h"
120 #include "expr.h"
121 #include "tree-dfa.h"
122 #include "tree-ssa.h"
123 #include "tree-pass.h"
124 #include "alloc-pool.h"
125 #include "target.h"
126 #include "gimple-pretty-print.h"
127 #include "builtins.h"
129 /* FIXME: RTL headers have to be included here for optabs. */
130 #include "rtl.h" /* Because optabs.h wants enum rtx_code. */
131 #include "expr.h" /* Because optabs.h wants sepops. */
132 #include "insn-codes.h"
133 #include "optabs.h"
135 /* This structure represents one basic block that either computes a
136 division, or is a common dominator for basic block that compute a
137 division. */
138 struct occurrence {
139 /* The basic block represented by this structure. */
140 basic_block bb;
142 /* If non-NULL, the SSA_NAME holding the definition for a reciprocal
143 inserted in BB. */
144 tree recip_def;
146 /* If non-NULL, the GIMPLE_ASSIGN for a reciprocal computation that
147 was inserted in BB. */
148 gimple recip_def_stmt;
150 /* Pointer to a list of "struct occurrence"s for blocks dominated
151 by BB. */
152 struct occurrence *children;
154 /* Pointer to the next "struct occurrence"s in the list of blocks
155 sharing a common dominator. */
156 struct occurrence *next;
158 /* The number of divisions that are in BB before compute_merit. The
159 number of divisions that are in BB or post-dominate it after
160 compute_merit. */
161 int num_divisions;
163 /* True if the basic block has a division, false if it is a common
164 dominator for basic blocks that do. If it is false and trapping
165 math is active, BB is not a candidate for inserting a reciprocal. */
166 bool bb_has_division;
169 static struct
171 /* Number of 1.0/X ops inserted. */
172 int rdivs_inserted;
174 /* Number of 1.0/FUNC ops inserted. */
175 int rfuncs_inserted;
176 } reciprocal_stats;
178 static struct
180 /* Number of cexpi calls inserted. */
181 int inserted;
182 } sincos_stats;
184 static struct
186 /* Number of hand-written 16-bit nop / bswaps found. */
187 int found_16bit;
189 /* Number of hand-written 32-bit nop / bswaps found. */
190 int found_32bit;
192 /* Number of hand-written 64-bit nop / bswaps found. */
193 int found_64bit;
194 } nop_stats, bswap_stats;
196 static struct
198 /* Number of widening multiplication ops inserted. */
199 int widen_mults_inserted;
201 /* Number of integer multiply-and-accumulate ops inserted. */
202 int maccs_inserted;
204 /* Number of fp fused multiply-add ops inserted. */
205 int fmas_inserted;
206 } widen_mul_stats;
208 /* The instance of "struct occurrence" representing the highest
209 interesting block in the dominator tree. */
210 static struct occurrence *occ_head;
212 /* Allocation pool for getting instances of "struct occurrence". */
213 static alloc_pool occ_pool;
217 /* Allocate and return a new struct occurrence for basic block BB, and
218 whose children list is headed by CHILDREN. */
219 static struct occurrence *
220 occ_new (basic_block bb, struct occurrence *children)
222 struct occurrence *occ;
224 bb->aux = occ = (struct occurrence *) pool_alloc (occ_pool);
225 memset (occ, 0, sizeof (struct occurrence));
227 occ->bb = bb;
228 occ->children = children;
229 return occ;
233 /* Insert NEW_OCC into our subset of the dominator tree. P_HEAD points to a
234 list of "struct occurrence"s, one per basic block, having IDOM as
235 their common dominator.
237 We try to insert NEW_OCC as deep as possible in the tree, and we also
238 insert any other block that is a common dominator for BB and one
239 block already in the tree. */
241 static void
242 insert_bb (struct occurrence *new_occ, basic_block idom,
243 struct occurrence **p_head)
245 struct occurrence *occ, **p_occ;
247 for (p_occ = p_head; (occ = *p_occ) != NULL; )
249 basic_block bb = new_occ->bb, occ_bb = occ->bb;
250 basic_block dom = nearest_common_dominator (CDI_DOMINATORS, occ_bb, bb);
251 if (dom == bb)
253 /* BB dominates OCC_BB. OCC becomes NEW_OCC's child: remove OCC
254 from its list. */
255 *p_occ = occ->next;
256 occ->next = new_occ->children;
257 new_occ->children = occ;
259 /* Try the next block (it may as well be dominated by BB). */
262 else if (dom == occ_bb)
264 /* OCC_BB dominates BB. Tail recurse to look deeper. */
265 insert_bb (new_occ, dom, &occ->children);
266 return;
269 else if (dom != idom)
271 gcc_assert (!dom->aux);
273 /* There is a dominator between IDOM and BB, add it and make
274 two children out of NEW_OCC and OCC. First, remove OCC from
275 its list. */
276 *p_occ = occ->next;
277 new_occ->next = occ;
278 occ->next = NULL;
280 /* None of the previous blocks has DOM as a dominator: if we tail
281 recursed, we would reexamine them uselessly. Just switch BB with
282 DOM, and go on looking for blocks dominated by DOM. */
283 new_occ = occ_new (dom, new_occ);
286 else
288 /* Nothing special, go on with the next element. */
289 p_occ = &occ->next;
293 /* No place was found as a child of IDOM. Make BB a sibling of IDOM. */
294 new_occ->next = *p_head;
295 *p_head = new_occ;
298 /* Register that we found a division in BB. */
300 static inline void
301 register_division_in (basic_block bb)
303 struct occurrence *occ;
305 occ = (struct occurrence *) bb->aux;
306 if (!occ)
308 occ = occ_new (bb, NULL);
309 insert_bb (occ, ENTRY_BLOCK_PTR_FOR_FN (cfun), &occ_head);
312 occ->bb_has_division = true;
313 occ->num_divisions++;
317 /* Compute the number of divisions that postdominate each block in OCC and
318 its children. */
320 static void
321 compute_merit (struct occurrence *occ)
323 struct occurrence *occ_child;
324 basic_block dom = occ->bb;
326 for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
328 basic_block bb;
329 if (occ_child->children)
330 compute_merit (occ_child);
332 if (flag_exceptions)
333 bb = single_noncomplex_succ (dom);
334 else
335 bb = dom;
337 if (dominated_by_p (CDI_POST_DOMINATORS, bb, occ_child->bb))
338 occ->num_divisions += occ_child->num_divisions;
343 /* Return whether USE_STMT is a floating-point division by DEF. */
344 static inline bool
345 is_division_by (gimple use_stmt, tree def)
347 return is_gimple_assign (use_stmt)
348 && gimple_assign_rhs_code (use_stmt) == RDIV_EXPR
349 && gimple_assign_rhs2 (use_stmt) == def
350 /* Do not recognize x / x as valid division, as we are getting
351 confused later by replacing all immediate uses x in such
352 a stmt. */
353 && gimple_assign_rhs1 (use_stmt) != def;
356 /* Walk the subset of the dominator tree rooted at OCC, setting the
357 RECIP_DEF field to a definition of 1.0 / DEF that can be used in
358 the given basic block. The field may be left NULL, of course,
359 if it is not possible or profitable to do the optimization.
361 DEF_BSI is an iterator pointing at the statement defining DEF.
362 If RECIP_DEF is set, a dominator already has a computation that can
363 be used. */
365 static void
366 insert_reciprocals (gimple_stmt_iterator *def_gsi, struct occurrence *occ,
367 tree def, tree recip_def, int threshold)
369 tree type;
370 gassign *new_stmt;
371 gimple_stmt_iterator gsi;
372 struct occurrence *occ_child;
374 if (!recip_def
375 && (occ->bb_has_division || !flag_trapping_math)
376 && occ->num_divisions >= threshold)
378 /* Make a variable with the replacement and substitute it. */
379 type = TREE_TYPE (def);
380 recip_def = create_tmp_reg (type, "reciptmp");
381 new_stmt = gimple_build_assign_with_ops (RDIV_EXPR, recip_def,
382 build_one_cst (type), def);
384 if (occ->bb_has_division)
386 /* Case 1: insert before an existing division. */
387 gsi = gsi_after_labels (occ->bb);
388 while (!gsi_end_p (gsi) && !is_division_by (gsi_stmt (gsi), def))
389 gsi_next (&gsi);
391 gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
393 else if (def_gsi && occ->bb == def_gsi->bb)
395 /* Case 2: insert right after the definition. Note that this will
396 never happen if the definition statement can throw, because in
397 that case the sole successor of the statement's basic block will
398 dominate all the uses as well. */
399 gsi_insert_after (def_gsi, new_stmt, GSI_NEW_STMT);
401 else
403 /* Case 3: insert in a basic block not containing defs/uses. */
404 gsi = gsi_after_labels (occ->bb);
405 gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
408 reciprocal_stats.rdivs_inserted++;
410 occ->recip_def_stmt = new_stmt;
413 occ->recip_def = recip_def;
414 for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
415 insert_reciprocals (def_gsi, occ_child, def, recip_def, threshold);
419 /* Replace the division at USE_P with a multiplication by the reciprocal, if
420 possible. */
422 static inline void
423 replace_reciprocal (use_operand_p use_p)
425 gimple use_stmt = USE_STMT (use_p);
426 basic_block bb = gimple_bb (use_stmt);
427 struct occurrence *occ = (struct occurrence *) bb->aux;
429 if (optimize_bb_for_speed_p (bb)
430 && occ->recip_def && use_stmt != occ->recip_def_stmt)
432 gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
433 gimple_assign_set_rhs_code (use_stmt, MULT_EXPR);
434 SET_USE (use_p, occ->recip_def);
435 fold_stmt_inplace (&gsi);
436 update_stmt (use_stmt);
441 /* Free OCC and return one more "struct occurrence" to be freed. */
443 static struct occurrence *
444 free_bb (struct occurrence *occ)
446 struct occurrence *child, *next;
448 /* First get the two pointers hanging off OCC. */
449 next = occ->next;
450 child = occ->children;
451 occ->bb->aux = NULL;
452 pool_free (occ_pool, occ);
454 /* Now ensure that we don't recurse unless it is necessary. */
455 if (!child)
456 return next;
457 else
459 while (next)
460 next = free_bb (next);
462 return child;
467 /* Look for floating-point divisions among DEF's uses, and try to
468 replace them by multiplications with the reciprocal. Add
469 as many statements computing the reciprocal as needed.
471 DEF must be a GIMPLE register of a floating-point type. */
473 static void
474 execute_cse_reciprocals_1 (gimple_stmt_iterator *def_gsi, tree def)
476 use_operand_p use_p;
477 imm_use_iterator use_iter;
478 struct occurrence *occ;
479 int count = 0, threshold;
481 gcc_assert (FLOAT_TYPE_P (TREE_TYPE (def)) && is_gimple_reg (def));
483 FOR_EACH_IMM_USE_FAST (use_p, use_iter, def)
485 gimple use_stmt = USE_STMT (use_p);
486 if (is_division_by (use_stmt, def))
488 register_division_in (gimple_bb (use_stmt));
489 count++;
493 /* Do the expensive part only if we can hope to optimize something. */
494 threshold = targetm.min_divisions_for_recip_mul (TYPE_MODE (TREE_TYPE (def)));
495 if (count >= threshold)
497 gimple use_stmt;
498 for (occ = occ_head; occ; occ = occ->next)
500 compute_merit (occ);
501 insert_reciprocals (def_gsi, occ, def, NULL, threshold);
504 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, def)
506 if (is_division_by (use_stmt, def))
508 FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter)
509 replace_reciprocal (use_p);
514 for (occ = occ_head; occ; )
515 occ = free_bb (occ);
517 occ_head = NULL;
520 /* Go through all the floating-point SSA_NAMEs, and call
521 execute_cse_reciprocals_1 on each of them. */
522 namespace {
524 const pass_data pass_data_cse_reciprocals =
526 GIMPLE_PASS, /* type */
527 "recip", /* name */
528 OPTGROUP_NONE, /* optinfo_flags */
529 TV_NONE, /* tv_id */
530 PROP_ssa, /* properties_required */
531 0, /* properties_provided */
532 0, /* properties_destroyed */
533 0, /* todo_flags_start */
534 TODO_update_ssa, /* todo_flags_finish */
537 class pass_cse_reciprocals : public gimple_opt_pass
539 public:
540 pass_cse_reciprocals (gcc::context *ctxt)
541 : gimple_opt_pass (pass_data_cse_reciprocals, ctxt)
544 /* opt_pass methods: */
545 virtual bool gate (function *) { return optimize && flag_reciprocal_math; }
546 virtual unsigned int execute (function *);
548 }; // class pass_cse_reciprocals
550 unsigned int
551 pass_cse_reciprocals::execute (function *fun)
553 basic_block bb;
554 tree arg;
556 occ_pool = create_alloc_pool ("dominators for recip",
557 sizeof (struct occurrence),
558 n_basic_blocks_for_fn (fun) / 3 + 1);
560 memset (&reciprocal_stats, 0, sizeof (reciprocal_stats));
561 calculate_dominance_info (CDI_DOMINATORS);
562 calculate_dominance_info (CDI_POST_DOMINATORS);
564 #ifdef ENABLE_CHECKING
565 FOR_EACH_BB_FN (bb, fun)
566 gcc_assert (!bb->aux);
567 #endif
569 for (arg = DECL_ARGUMENTS (fun->decl); arg; arg = DECL_CHAIN (arg))
570 if (FLOAT_TYPE_P (TREE_TYPE (arg))
571 && is_gimple_reg (arg))
573 tree name = ssa_default_def (fun, arg);
574 if (name)
575 execute_cse_reciprocals_1 (NULL, name);
578 FOR_EACH_BB_FN (bb, fun)
580 tree def;
582 for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
583 gsi_next (&gsi))
585 gphi *phi = gsi.phi ();
586 def = PHI_RESULT (phi);
587 if (! virtual_operand_p (def)
588 && FLOAT_TYPE_P (TREE_TYPE (def)))
589 execute_cse_reciprocals_1 (NULL, def);
592 for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (gsi);
593 gsi_next (&gsi))
595 gimple stmt = gsi_stmt (gsi);
597 if (gimple_has_lhs (stmt)
598 && (def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF)) != NULL
599 && FLOAT_TYPE_P (TREE_TYPE (def))
600 && TREE_CODE (def) == SSA_NAME)
601 execute_cse_reciprocals_1 (&gsi, def);
604 if (optimize_bb_for_size_p (bb))
605 continue;
607 /* Scan for a/func(b) and convert it to reciprocal a*rfunc(b). */
608 for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (gsi);
609 gsi_next (&gsi))
611 gimple stmt = gsi_stmt (gsi);
612 tree fndecl;
614 if (is_gimple_assign (stmt)
615 && gimple_assign_rhs_code (stmt) == RDIV_EXPR)
617 tree arg1 = gimple_assign_rhs2 (stmt);
618 gimple stmt1;
620 if (TREE_CODE (arg1) != SSA_NAME)
621 continue;
623 stmt1 = SSA_NAME_DEF_STMT (arg1);
625 if (is_gimple_call (stmt1)
626 && gimple_call_lhs (stmt1)
627 && (fndecl = gimple_call_fndecl (stmt1))
628 && (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
629 || DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD))
631 enum built_in_function code;
632 bool md_code, fail;
633 imm_use_iterator ui;
634 use_operand_p use_p;
636 code = DECL_FUNCTION_CODE (fndecl);
637 md_code = DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD;
639 fndecl = targetm.builtin_reciprocal (code, md_code, false);
640 if (!fndecl)
641 continue;
643 /* Check that all uses of the SSA name are divisions,
644 otherwise replacing the defining statement will do
645 the wrong thing. */
646 fail = false;
647 FOR_EACH_IMM_USE_FAST (use_p, ui, arg1)
649 gimple stmt2 = USE_STMT (use_p);
650 if (is_gimple_debug (stmt2))
651 continue;
652 if (!is_gimple_assign (stmt2)
653 || gimple_assign_rhs_code (stmt2) != RDIV_EXPR
654 || gimple_assign_rhs1 (stmt2) == arg1
655 || gimple_assign_rhs2 (stmt2) != arg1)
657 fail = true;
658 break;
661 if (fail)
662 continue;
664 gimple_replace_ssa_lhs (stmt1, arg1);
665 gimple_call_set_fndecl (stmt1, fndecl);
666 update_stmt (stmt1);
667 reciprocal_stats.rfuncs_inserted++;
669 FOR_EACH_IMM_USE_STMT (stmt, ui, arg1)
671 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
672 gimple_assign_set_rhs_code (stmt, MULT_EXPR);
673 fold_stmt_inplace (&gsi);
674 update_stmt (stmt);
681 statistics_counter_event (fun, "reciprocal divs inserted",
682 reciprocal_stats.rdivs_inserted);
683 statistics_counter_event (fun, "reciprocal functions inserted",
684 reciprocal_stats.rfuncs_inserted);
686 free_dominance_info (CDI_DOMINATORS);
687 free_dominance_info (CDI_POST_DOMINATORS);
688 free_alloc_pool (occ_pool);
689 return 0;
692 } // anon namespace
694 gimple_opt_pass *
695 make_pass_cse_reciprocals (gcc::context *ctxt)
697 return new pass_cse_reciprocals (ctxt);
700 /* Records an occurrence at statement USE_STMT in the vector of trees
701 STMTS if it is dominated by *TOP_BB or dominates it or this basic block
702 is not yet initialized. Returns true if the occurrence was pushed on
703 the vector. Adjusts *TOP_BB to be the basic block dominating all
704 statements in the vector. */
706 static bool
707 maybe_record_sincos (vec<gimple> *stmts,
708 basic_block *top_bb, gimple use_stmt)
710 basic_block use_bb = gimple_bb (use_stmt);
711 if (*top_bb
712 && (*top_bb == use_bb
713 || dominated_by_p (CDI_DOMINATORS, use_bb, *top_bb)))
714 stmts->safe_push (use_stmt);
715 else if (!*top_bb
716 || dominated_by_p (CDI_DOMINATORS, *top_bb, use_bb))
718 stmts->safe_push (use_stmt);
719 *top_bb = use_bb;
721 else
722 return false;
724 return true;
727 /* Look for sin, cos and cexpi calls with the same argument NAME and
728 create a single call to cexpi CSEing the result in this case.
729 We first walk over all immediate uses of the argument collecting
730 statements that we can CSE in a vector and in a second pass replace
731 the statement rhs with a REALPART or IMAGPART expression on the
732 result of the cexpi call we insert before the use statement that
733 dominates all other candidates. */
735 static bool
736 execute_cse_sincos_1 (tree name)
738 gimple_stmt_iterator gsi;
739 imm_use_iterator use_iter;
740 tree fndecl, res, type;
741 gimple def_stmt, use_stmt, stmt;
742 int seen_cos = 0, seen_sin = 0, seen_cexpi = 0;
743 vec<gimple> stmts = vNULL;
744 basic_block top_bb = NULL;
745 int i;
746 bool cfg_changed = false;
748 type = TREE_TYPE (name);
749 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, name)
751 if (gimple_code (use_stmt) != GIMPLE_CALL
752 || !gimple_call_lhs (use_stmt)
753 || !(fndecl = gimple_call_fndecl (use_stmt))
754 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
755 continue;
757 switch (DECL_FUNCTION_CODE (fndecl))
759 CASE_FLT_FN (BUILT_IN_COS):
760 seen_cos |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
761 break;
763 CASE_FLT_FN (BUILT_IN_SIN):
764 seen_sin |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
765 break;
767 CASE_FLT_FN (BUILT_IN_CEXPI):
768 seen_cexpi |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
769 break;
771 default:;
775 if (seen_cos + seen_sin + seen_cexpi <= 1)
777 stmts.release ();
778 return false;
781 /* Simply insert cexpi at the beginning of top_bb but not earlier than
782 the name def statement. */
783 fndecl = mathfn_built_in (type, BUILT_IN_CEXPI);
784 if (!fndecl)
785 return false;
786 stmt = gimple_build_call (fndecl, 1, name);
787 res = make_temp_ssa_name (TREE_TYPE (TREE_TYPE (fndecl)), stmt, "sincostmp");
788 gimple_call_set_lhs (stmt, res);
790 def_stmt = SSA_NAME_DEF_STMT (name);
791 if (!SSA_NAME_IS_DEFAULT_DEF (name)
792 && gimple_code (def_stmt) != GIMPLE_PHI
793 && gimple_bb (def_stmt) == top_bb)
795 gsi = gsi_for_stmt (def_stmt);
796 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
798 else
800 gsi = gsi_after_labels (top_bb);
801 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
803 sincos_stats.inserted++;
805 /* And adjust the recorded old call sites. */
806 for (i = 0; stmts.iterate (i, &use_stmt); ++i)
808 tree rhs = NULL;
809 fndecl = gimple_call_fndecl (use_stmt);
811 switch (DECL_FUNCTION_CODE (fndecl))
813 CASE_FLT_FN (BUILT_IN_COS):
814 rhs = fold_build1 (REALPART_EXPR, type, res);
815 break;
817 CASE_FLT_FN (BUILT_IN_SIN):
818 rhs = fold_build1 (IMAGPART_EXPR, type, res);
819 break;
821 CASE_FLT_FN (BUILT_IN_CEXPI):
822 rhs = res;
823 break;
825 default:;
826 gcc_unreachable ();
829 /* Replace call with a copy. */
830 stmt = gimple_build_assign (gimple_call_lhs (use_stmt), rhs);
832 gsi = gsi_for_stmt (use_stmt);
833 gsi_replace (&gsi, stmt, true);
834 if (gimple_purge_dead_eh_edges (gimple_bb (stmt)))
835 cfg_changed = true;
838 stmts.release ();
840 return cfg_changed;
843 /* To evaluate powi(x,n), the floating point value x raised to the
844 constant integer exponent n, we use a hybrid algorithm that
845 combines the "window method" with look-up tables. For an
846 introduction to exponentiation algorithms and "addition chains",
847 see section 4.6.3, "Evaluation of Powers" of Donald E. Knuth,
848 "Seminumerical Algorithms", Vol. 2, "The Art of Computer Programming",
849 3rd Edition, 1998, and Daniel M. Gordon, "A Survey of Fast Exponentiation
850 Methods", Journal of Algorithms, Vol. 27, pp. 129-146, 1998. */
852 /* Provide a default value for POWI_MAX_MULTS, the maximum number of
853 multiplications to inline before calling the system library's pow
854 function. powi(x,n) requires at worst 2*bits(n)-2 multiplications,
855 so this default never requires calling pow, powf or powl. */
857 #ifndef POWI_MAX_MULTS
858 #define POWI_MAX_MULTS (2*HOST_BITS_PER_WIDE_INT-2)
859 #endif
861 /* The size of the "optimal power tree" lookup table. All
862 exponents less than this value are simply looked up in the
863 powi_table below. This threshold is also used to size the
864 cache of pseudo registers that hold intermediate results. */
865 #define POWI_TABLE_SIZE 256
867 /* The size, in bits of the window, used in the "window method"
868 exponentiation algorithm. This is equivalent to a radix of
869 (1<<POWI_WINDOW_SIZE) in the corresponding "m-ary method". */
870 #define POWI_WINDOW_SIZE 3
872 /* The following table is an efficient representation of an
873 "optimal power tree". For each value, i, the corresponding
874 value, j, in the table states than an optimal evaluation
875 sequence for calculating pow(x,i) can be found by evaluating
876 pow(x,j)*pow(x,i-j). An optimal power tree for the first
877 100 integers is given in Knuth's "Seminumerical algorithms". */
879 static const unsigned char powi_table[POWI_TABLE_SIZE] =
881 0, 1, 1, 2, 2, 3, 3, 4, /* 0 - 7 */
882 4, 6, 5, 6, 6, 10, 7, 9, /* 8 - 15 */
883 8, 16, 9, 16, 10, 12, 11, 13, /* 16 - 23 */
884 12, 17, 13, 18, 14, 24, 15, 26, /* 24 - 31 */
885 16, 17, 17, 19, 18, 33, 19, 26, /* 32 - 39 */
886 20, 25, 21, 40, 22, 27, 23, 44, /* 40 - 47 */
887 24, 32, 25, 34, 26, 29, 27, 44, /* 48 - 55 */
888 28, 31, 29, 34, 30, 60, 31, 36, /* 56 - 63 */
889 32, 64, 33, 34, 34, 46, 35, 37, /* 64 - 71 */
890 36, 65, 37, 50, 38, 48, 39, 69, /* 72 - 79 */
891 40, 49, 41, 43, 42, 51, 43, 58, /* 80 - 87 */
892 44, 64, 45, 47, 46, 59, 47, 76, /* 88 - 95 */
893 48, 65, 49, 66, 50, 67, 51, 66, /* 96 - 103 */
894 52, 70, 53, 74, 54, 104, 55, 74, /* 104 - 111 */
895 56, 64, 57, 69, 58, 78, 59, 68, /* 112 - 119 */
896 60, 61, 61, 80, 62, 75, 63, 68, /* 120 - 127 */
897 64, 65, 65, 128, 66, 129, 67, 90, /* 128 - 135 */
898 68, 73, 69, 131, 70, 94, 71, 88, /* 136 - 143 */
899 72, 128, 73, 98, 74, 132, 75, 121, /* 144 - 151 */
900 76, 102, 77, 124, 78, 132, 79, 106, /* 152 - 159 */
901 80, 97, 81, 160, 82, 99, 83, 134, /* 160 - 167 */
902 84, 86, 85, 95, 86, 160, 87, 100, /* 168 - 175 */
903 88, 113, 89, 98, 90, 107, 91, 122, /* 176 - 183 */
904 92, 111, 93, 102, 94, 126, 95, 150, /* 184 - 191 */
905 96, 128, 97, 130, 98, 133, 99, 195, /* 192 - 199 */
906 100, 128, 101, 123, 102, 164, 103, 138, /* 200 - 207 */
907 104, 145, 105, 146, 106, 109, 107, 149, /* 208 - 215 */
908 108, 200, 109, 146, 110, 170, 111, 157, /* 216 - 223 */
909 112, 128, 113, 130, 114, 182, 115, 132, /* 224 - 231 */
910 116, 200, 117, 132, 118, 158, 119, 206, /* 232 - 239 */
911 120, 240, 121, 162, 122, 147, 123, 152, /* 240 - 247 */
912 124, 166, 125, 214, 126, 138, 127, 153, /* 248 - 255 */
916 /* Return the number of multiplications required to calculate
917 powi(x,n) where n is less than POWI_TABLE_SIZE. This is a
918 subroutine of powi_cost. CACHE is an array indicating
919 which exponents have already been calculated. */
921 static int
922 powi_lookup_cost (unsigned HOST_WIDE_INT n, bool *cache)
924 /* If we've already calculated this exponent, then this evaluation
925 doesn't require any additional multiplications. */
926 if (cache[n])
927 return 0;
929 cache[n] = true;
930 return powi_lookup_cost (n - powi_table[n], cache)
931 + powi_lookup_cost (powi_table[n], cache) + 1;
934 /* Return the number of multiplications required to calculate
935 powi(x,n) for an arbitrary x, given the exponent N. This
936 function needs to be kept in sync with powi_as_mults below. */
938 static int
939 powi_cost (HOST_WIDE_INT n)
941 bool cache[POWI_TABLE_SIZE];
942 unsigned HOST_WIDE_INT digit;
943 unsigned HOST_WIDE_INT val;
944 int result;
946 if (n == 0)
947 return 0;
949 /* Ignore the reciprocal when calculating the cost. */
950 val = (n < 0) ? -n : n;
952 /* Initialize the exponent cache. */
953 memset (cache, 0, POWI_TABLE_SIZE * sizeof (bool));
954 cache[1] = true;
956 result = 0;
958 while (val >= POWI_TABLE_SIZE)
960 if (val & 1)
962 digit = val & ((1 << POWI_WINDOW_SIZE) - 1);
963 result += powi_lookup_cost (digit, cache)
964 + POWI_WINDOW_SIZE + 1;
965 val >>= POWI_WINDOW_SIZE;
967 else
969 val >>= 1;
970 result++;
974 return result + powi_lookup_cost (val, cache);
977 /* Recursive subroutine of powi_as_mults. This function takes the
978 array, CACHE, of already calculated exponents and an exponent N and
979 returns a tree that corresponds to CACHE[1]**N, with type TYPE. */
981 static tree
982 powi_as_mults_1 (gimple_stmt_iterator *gsi, location_t loc, tree type,
983 HOST_WIDE_INT n, tree *cache)
985 tree op0, op1, ssa_target;
986 unsigned HOST_WIDE_INT digit;
987 gassign *mult_stmt;
989 if (n < POWI_TABLE_SIZE && cache[n])
990 return cache[n];
992 ssa_target = make_temp_ssa_name (type, NULL, "powmult");
994 if (n < POWI_TABLE_SIZE)
996 cache[n] = ssa_target;
997 op0 = powi_as_mults_1 (gsi, loc, type, n - powi_table[n], cache);
998 op1 = powi_as_mults_1 (gsi, loc, type, powi_table[n], cache);
1000 else if (n & 1)
1002 digit = n & ((1 << POWI_WINDOW_SIZE) - 1);
1003 op0 = powi_as_mults_1 (gsi, loc, type, n - digit, cache);
1004 op1 = powi_as_mults_1 (gsi, loc, type, digit, cache);
1006 else
1008 op0 = powi_as_mults_1 (gsi, loc, type, n >> 1, cache);
1009 op1 = op0;
1012 mult_stmt = gimple_build_assign_with_ops (MULT_EXPR, ssa_target, op0, op1);
1013 gimple_set_location (mult_stmt, loc);
1014 gsi_insert_before (gsi, mult_stmt, GSI_SAME_STMT);
1016 return ssa_target;
1019 /* Convert ARG0**N to a tree of multiplications of ARG0 with itself.
1020 This function needs to be kept in sync with powi_cost above. */
1022 static tree
1023 powi_as_mults (gimple_stmt_iterator *gsi, location_t loc,
1024 tree arg0, HOST_WIDE_INT n)
1026 tree cache[POWI_TABLE_SIZE], result, type = TREE_TYPE (arg0);
1027 gassign *div_stmt;
1028 tree target;
1030 if (n == 0)
1031 return build_real (type, dconst1);
1033 memset (cache, 0, sizeof (cache));
1034 cache[1] = arg0;
1036 result = powi_as_mults_1 (gsi, loc, type, (n < 0) ? -n : n, cache);
1037 if (n >= 0)
1038 return result;
1040 /* If the original exponent was negative, reciprocate the result. */
1041 target = make_temp_ssa_name (type, NULL, "powmult");
1042 div_stmt = gimple_build_assign_with_ops (RDIV_EXPR, target,
1043 build_real (type, dconst1),
1044 result);
1045 gimple_set_location (div_stmt, loc);
1046 gsi_insert_before (gsi, div_stmt, GSI_SAME_STMT);
1048 return target;
1051 /* ARG0 and N are the two arguments to a powi builtin in GSI with
1052 location info LOC. If the arguments are appropriate, create an
1053 equivalent sequence of statements prior to GSI using an optimal
1054 number of multiplications, and return an expession holding the
1055 result. */
1057 static tree
1058 gimple_expand_builtin_powi (gimple_stmt_iterator *gsi, location_t loc,
1059 tree arg0, HOST_WIDE_INT n)
1061 /* Avoid largest negative number. */
1062 if (n != -n
1063 && ((n >= -1 && n <= 2)
1064 || (optimize_function_for_speed_p (cfun)
1065 && powi_cost (n) <= POWI_MAX_MULTS)))
1066 return powi_as_mults (gsi, loc, arg0, n);
1068 return NULL_TREE;
1071 /* Build a gimple call statement that calls FN with argument ARG.
1072 Set the lhs of the call statement to a fresh SSA name. Insert the
1073 statement prior to GSI's current position, and return the fresh
1074 SSA name. */
1076 static tree
1077 build_and_insert_call (gimple_stmt_iterator *gsi, location_t loc,
1078 tree fn, tree arg)
1080 gcall *call_stmt;
1081 tree ssa_target;
1083 call_stmt = gimple_build_call (fn, 1, arg);
1084 ssa_target = make_temp_ssa_name (TREE_TYPE (arg), NULL, "powroot");
1085 gimple_set_lhs (call_stmt, ssa_target);
1086 gimple_set_location (call_stmt, loc);
1087 gsi_insert_before (gsi, call_stmt, GSI_SAME_STMT);
1089 return ssa_target;
1092 /* Build a gimple binary operation with the given CODE and arguments
1093 ARG0, ARG1, assigning the result to a new SSA name for variable
1094 TARGET. Insert the statement prior to GSI's current position, and
1095 return the fresh SSA name.*/
1097 static tree
1098 build_and_insert_binop (gimple_stmt_iterator *gsi, location_t loc,
1099 const char *name, enum tree_code code,
1100 tree arg0, tree arg1)
1102 tree result = make_temp_ssa_name (TREE_TYPE (arg0), NULL, name);
1103 gassign *stmt = gimple_build_assign_with_ops (code, result, arg0, arg1);
1104 gimple_set_location (stmt, loc);
1105 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1106 return result;
1109 /* Build a gimple reference operation with the given CODE and argument
1110 ARG, assigning the result to a new SSA name of TYPE with NAME.
1111 Insert the statement prior to GSI's current position, and return
1112 the fresh SSA name. */
1114 static inline tree
1115 build_and_insert_ref (gimple_stmt_iterator *gsi, location_t loc, tree type,
1116 const char *name, enum tree_code code, tree arg0)
1118 tree result = make_temp_ssa_name (type, NULL, name);
1119 gimple stmt = gimple_build_assign (result, build1 (code, type, arg0));
1120 gimple_set_location (stmt, loc);
1121 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1122 return result;
1125 /* Build a gimple assignment to cast VAL to TYPE. Insert the statement
1126 prior to GSI's current position, and return the fresh SSA name. */
1128 static tree
1129 build_and_insert_cast (gimple_stmt_iterator *gsi, location_t loc,
1130 tree type, tree val)
1132 tree result = make_ssa_name (type, NULL);
1133 gassign *stmt =
1134 gimple_build_assign_with_ops (NOP_EXPR, result, val, NULL_TREE);
1135 gimple_set_location (stmt, loc);
1136 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1137 return result;
1140 /* ARG0 and ARG1 are the two arguments to a pow builtin call in GSI
1141 with location info LOC. If possible, create an equivalent and
1142 less expensive sequence of statements prior to GSI, and return an
1143 expession holding the result. */
1145 static tree
1146 gimple_expand_builtin_pow (gimple_stmt_iterator *gsi, location_t loc,
1147 tree arg0, tree arg1)
1149 REAL_VALUE_TYPE c, cint, dconst1_4, dconst3_4, dconst1_3, dconst1_6;
1150 REAL_VALUE_TYPE c2, dconst3;
1151 HOST_WIDE_INT n;
1152 tree type, sqrtfn, cbrtfn, sqrt_arg0, sqrt_sqrt, result, cbrt_x, powi_cbrt_x;
1153 machine_mode mode;
1154 bool hw_sqrt_exists, c_is_int, c2_is_int;
1156 /* If the exponent isn't a constant, there's nothing of interest
1157 to be done. */
1158 if (TREE_CODE (arg1) != REAL_CST)
1159 return NULL_TREE;
1161 /* If the exponent is equivalent to an integer, expand to an optimal
1162 multiplication sequence when profitable. */
1163 c = TREE_REAL_CST (arg1);
1164 n = real_to_integer (&c);
1165 real_from_integer (&cint, VOIDmode, n, SIGNED);
1166 c_is_int = real_identical (&c, &cint);
1168 if (c_is_int
1169 && ((n >= -1 && n <= 2)
1170 || (flag_unsafe_math_optimizations
1171 && optimize_bb_for_speed_p (gsi_bb (*gsi))
1172 && powi_cost (n) <= POWI_MAX_MULTS)))
1173 return gimple_expand_builtin_powi (gsi, loc, arg0, n);
1175 /* Attempt various optimizations using sqrt and cbrt. */
1176 type = TREE_TYPE (arg0);
1177 mode = TYPE_MODE (type);
1178 sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
1180 /* Optimize pow(x,0.5) = sqrt(x). This replacement is always safe
1181 unless signed zeros must be maintained. pow(-0,0.5) = +0, while
1182 sqrt(-0) = -0. */
1183 if (sqrtfn
1184 && REAL_VALUES_EQUAL (c, dconsthalf)
1185 && !HONOR_SIGNED_ZEROS (mode))
1186 return build_and_insert_call (gsi, loc, sqrtfn, arg0);
1188 /* Optimize pow(x,0.25) = sqrt(sqrt(x)). Assume on most machines that
1189 a builtin sqrt instruction is smaller than a call to pow with 0.25,
1190 so do this optimization even if -Os. Don't do this optimization
1191 if we don't have a hardware sqrt insn. */
1192 dconst1_4 = dconst1;
1193 SET_REAL_EXP (&dconst1_4, REAL_EXP (&dconst1_4) - 2);
1194 hw_sqrt_exists = optab_handler (sqrt_optab, mode) != CODE_FOR_nothing;
1196 if (flag_unsafe_math_optimizations
1197 && sqrtfn
1198 && REAL_VALUES_EQUAL (c, dconst1_4)
1199 && hw_sqrt_exists)
1201 /* sqrt(x) */
1202 sqrt_arg0 = build_and_insert_call (gsi, loc, sqrtfn, arg0);
1204 /* sqrt(sqrt(x)) */
1205 return build_and_insert_call (gsi, loc, sqrtfn, sqrt_arg0);
1208 /* Optimize pow(x,0.75) = sqrt(x) * sqrt(sqrt(x)) unless we are
1209 optimizing for space. Don't do this optimization if we don't have
1210 a hardware sqrt insn. */
1211 real_from_integer (&dconst3_4, VOIDmode, 3, SIGNED);
1212 SET_REAL_EXP (&dconst3_4, REAL_EXP (&dconst3_4) - 2);
1214 if (flag_unsafe_math_optimizations
1215 && sqrtfn
1216 && optimize_function_for_speed_p (cfun)
1217 && REAL_VALUES_EQUAL (c, dconst3_4)
1218 && hw_sqrt_exists)
1220 /* sqrt(x) */
1221 sqrt_arg0 = build_and_insert_call (gsi, loc, sqrtfn, arg0);
1223 /* sqrt(sqrt(x)) */
1224 sqrt_sqrt = build_and_insert_call (gsi, loc, sqrtfn, sqrt_arg0);
1226 /* sqrt(x) * sqrt(sqrt(x)) */
1227 return build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1228 sqrt_arg0, sqrt_sqrt);
1231 /* Optimize pow(x,1./3.) = cbrt(x). This requires unsafe math
1232 optimizations since 1./3. is not exactly representable. If x
1233 is negative and finite, the correct value of pow(x,1./3.) is
1234 a NaN with the "invalid" exception raised, because the value
1235 of 1./3. actually has an even denominator. The correct value
1236 of cbrt(x) is a negative real value. */
1237 cbrtfn = mathfn_built_in (type, BUILT_IN_CBRT);
1238 dconst1_3 = real_value_truncate (mode, dconst_third ());
1240 if (flag_unsafe_math_optimizations
1241 && cbrtfn
1242 && (gimple_val_nonnegative_real_p (arg0) || !HONOR_NANS (mode))
1243 && REAL_VALUES_EQUAL (c, dconst1_3))
1244 return build_and_insert_call (gsi, loc, cbrtfn, arg0);
1246 /* Optimize pow(x,1./6.) = cbrt(sqrt(x)). Don't do this optimization
1247 if we don't have a hardware sqrt insn. */
1248 dconst1_6 = dconst1_3;
1249 SET_REAL_EXP (&dconst1_6, REAL_EXP (&dconst1_6) - 1);
1251 if (flag_unsafe_math_optimizations
1252 && sqrtfn
1253 && cbrtfn
1254 && (gimple_val_nonnegative_real_p (arg0) || !HONOR_NANS (mode))
1255 && optimize_function_for_speed_p (cfun)
1256 && hw_sqrt_exists
1257 && REAL_VALUES_EQUAL (c, dconst1_6))
1259 /* sqrt(x) */
1260 sqrt_arg0 = build_and_insert_call (gsi, loc, sqrtfn, arg0);
1262 /* cbrt(sqrt(x)) */
1263 return build_and_insert_call (gsi, loc, cbrtfn, sqrt_arg0);
1266 /* Optimize pow(x,c), where n = 2c for some nonzero integer n
1267 and c not an integer, into
1269 sqrt(x) * powi(x, n/2), n > 0;
1270 1.0 / (sqrt(x) * powi(x, abs(n/2))), n < 0.
1272 Do not calculate the powi factor when n/2 = 0. */
1273 real_arithmetic (&c2, MULT_EXPR, &c, &dconst2);
1274 n = real_to_integer (&c2);
1275 real_from_integer (&cint, VOIDmode, n, SIGNED);
1276 c2_is_int = real_identical (&c2, &cint);
1278 if (flag_unsafe_math_optimizations
1279 && sqrtfn
1280 && c2_is_int
1281 && !c_is_int
1282 && optimize_function_for_speed_p (cfun))
1284 tree powi_x_ndiv2 = NULL_TREE;
1286 /* Attempt to fold powi(arg0, abs(n/2)) into multiplies. If not
1287 possible or profitable, give up. Skip the degenerate case when
1288 n is 1 or -1, where the result is always 1. */
1289 if (absu_hwi (n) != 1)
1291 powi_x_ndiv2 = gimple_expand_builtin_powi (gsi, loc, arg0,
1292 abs_hwi (n / 2));
1293 if (!powi_x_ndiv2)
1294 return NULL_TREE;
1297 /* Calculate sqrt(x). When n is not 1 or -1, multiply it by the
1298 result of the optimal multiply sequence just calculated. */
1299 sqrt_arg0 = build_and_insert_call (gsi, loc, sqrtfn, arg0);
1301 if (absu_hwi (n) == 1)
1302 result = sqrt_arg0;
1303 else
1304 result = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1305 sqrt_arg0, powi_x_ndiv2);
1307 /* If n is negative, reciprocate the result. */
1308 if (n < 0)
1309 result = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR,
1310 build_real (type, dconst1), result);
1311 return result;
1314 /* Optimize pow(x,c), where 3c = n for some nonzero integer n, into
1316 powi(x, n/3) * powi(cbrt(x), n%3), n > 0;
1317 1.0 / (powi(x, abs(n)/3) * powi(cbrt(x), abs(n)%3)), n < 0.
1319 Do not calculate the first factor when n/3 = 0. As cbrt(x) is
1320 different from pow(x, 1./3.) due to rounding and behavior with
1321 negative x, we need to constrain this transformation to unsafe
1322 math and positive x or finite math. */
1323 real_from_integer (&dconst3, VOIDmode, 3, SIGNED);
1324 real_arithmetic (&c2, MULT_EXPR, &c, &dconst3);
1325 real_round (&c2, mode, &c2);
1326 n = real_to_integer (&c2);
1327 real_from_integer (&cint, VOIDmode, n, SIGNED);
1328 real_arithmetic (&c2, RDIV_EXPR, &cint, &dconst3);
1329 real_convert (&c2, mode, &c2);
1331 if (flag_unsafe_math_optimizations
1332 && cbrtfn
1333 && (gimple_val_nonnegative_real_p (arg0) || !HONOR_NANS (mode))
1334 && real_identical (&c2, &c)
1335 && !c2_is_int
1336 && optimize_function_for_speed_p (cfun)
1337 && powi_cost (n / 3) <= POWI_MAX_MULTS)
1339 tree powi_x_ndiv3 = NULL_TREE;
1341 /* Attempt to fold powi(arg0, abs(n/3)) into multiplies. If not
1342 possible or profitable, give up. Skip the degenerate case when
1343 abs(n) < 3, where the result is always 1. */
1344 if (absu_hwi (n) >= 3)
1346 powi_x_ndiv3 = gimple_expand_builtin_powi (gsi, loc, arg0,
1347 abs_hwi (n / 3));
1348 if (!powi_x_ndiv3)
1349 return NULL_TREE;
1352 /* Calculate powi(cbrt(x), n%3). Don't use gimple_expand_builtin_powi
1353 as that creates an unnecessary variable. Instead, just produce
1354 either cbrt(x) or cbrt(x) * cbrt(x). */
1355 cbrt_x = build_and_insert_call (gsi, loc, cbrtfn, arg0);
1357 if (absu_hwi (n) % 3 == 1)
1358 powi_cbrt_x = cbrt_x;
1359 else
1360 powi_cbrt_x = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1361 cbrt_x, cbrt_x);
1363 /* Multiply the two subexpressions, unless powi(x,abs(n)/3) = 1. */
1364 if (absu_hwi (n) < 3)
1365 result = powi_cbrt_x;
1366 else
1367 result = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1368 powi_x_ndiv3, powi_cbrt_x);
1370 /* If n is negative, reciprocate the result. */
1371 if (n < 0)
1372 result = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR,
1373 build_real (type, dconst1), result);
1375 return result;
1378 /* No optimizations succeeded. */
1379 return NULL_TREE;
1382 /* ARG is the argument to a cabs builtin call in GSI with location info
1383 LOC. Create a sequence of statements prior to GSI that calculates
1384 sqrt(R*R + I*I), where R and I are the real and imaginary components
1385 of ARG, respectively. Return an expression holding the result. */
1387 static tree
1388 gimple_expand_builtin_cabs (gimple_stmt_iterator *gsi, location_t loc, tree arg)
1390 tree real_part, imag_part, addend1, addend2, sum, result;
1391 tree type = TREE_TYPE (TREE_TYPE (arg));
1392 tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
1393 machine_mode mode = TYPE_MODE (type);
1395 if (!flag_unsafe_math_optimizations
1396 || !optimize_bb_for_speed_p (gimple_bb (gsi_stmt (*gsi)))
1397 || !sqrtfn
1398 || optab_handler (sqrt_optab, mode) == CODE_FOR_nothing)
1399 return NULL_TREE;
1401 real_part = build_and_insert_ref (gsi, loc, type, "cabs",
1402 REALPART_EXPR, arg);
1403 addend1 = build_and_insert_binop (gsi, loc, "cabs", MULT_EXPR,
1404 real_part, real_part);
1405 imag_part = build_and_insert_ref (gsi, loc, type, "cabs",
1406 IMAGPART_EXPR, arg);
1407 addend2 = build_and_insert_binop (gsi, loc, "cabs", MULT_EXPR,
1408 imag_part, imag_part);
1409 sum = build_and_insert_binop (gsi, loc, "cabs", PLUS_EXPR, addend1, addend2);
1410 result = build_and_insert_call (gsi, loc, sqrtfn, sum);
1412 return result;
1415 /* Go through all calls to sin, cos and cexpi and call execute_cse_sincos_1
1416 on the SSA_NAME argument of each of them. Also expand powi(x,n) into
1417 an optimal number of multiplies, when n is a constant. */
1419 namespace {
1421 const pass_data pass_data_cse_sincos =
1423 GIMPLE_PASS, /* type */
1424 "sincos", /* name */
1425 OPTGROUP_NONE, /* optinfo_flags */
1426 TV_NONE, /* tv_id */
1427 PROP_ssa, /* properties_required */
1428 0, /* properties_provided */
1429 0, /* properties_destroyed */
1430 0, /* todo_flags_start */
1431 TODO_update_ssa, /* todo_flags_finish */
1434 class pass_cse_sincos : public gimple_opt_pass
1436 public:
1437 pass_cse_sincos (gcc::context *ctxt)
1438 : gimple_opt_pass (pass_data_cse_sincos, ctxt)
1441 /* opt_pass methods: */
1442 virtual bool gate (function *)
1444 /* We no longer require either sincos or cexp, since powi expansion
1445 piggybacks on this pass. */
1446 return optimize;
1449 virtual unsigned int execute (function *);
1451 }; // class pass_cse_sincos
1453 unsigned int
1454 pass_cse_sincos::execute (function *fun)
1456 basic_block bb;
1457 bool cfg_changed = false;
1459 calculate_dominance_info (CDI_DOMINATORS);
1460 memset (&sincos_stats, 0, sizeof (sincos_stats));
1462 FOR_EACH_BB_FN (bb, fun)
1464 gimple_stmt_iterator gsi;
1465 bool cleanup_eh = false;
1467 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1469 gimple stmt = gsi_stmt (gsi);
1470 tree fndecl;
1472 /* Only the last stmt in a bb could throw, no need to call
1473 gimple_purge_dead_eh_edges if we change something in the middle
1474 of a basic block. */
1475 cleanup_eh = false;
1477 if (is_gimple_call (stmt)
1478 && gimple_call_lhs (stmt)
1479 && (fndecl = gimple_call_fndecl (stmt))
1480 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
1482 tree arg, arg0, arg1, result;
1483 HOST_WIDE_INT n;
1484 location_t loc;
1486 switch (DECL_FUNCTION_CODE (fndecl))
1488 CASE_FLT_FN (BUILT_IN_COS):
1489 CASE_FLT_FN (BUILT_IN_SIN):
1490 CASE_FLT_FN (BUILT_IN_CEXPI):
1491 /* Make sure we have either sincos or cexp. */
1492 if (!targetm.libc_has_function (function_c99_math_complex)
1493 && !targetm.libc_has_function (function_sincos))
1494 break;
1496 arg = gimple_call_arg (stmt, 0);
1497 if (TREE_CODE (arg) == SSA_NAME)
1498 cfg_changed |= execute_cse_sincos_1 (arg);
1499 break;
1501 CASE_FLT_FN (BUILT_IN_POW):
1502 arg0 = gimple_call_arg (stmt, 0);
1503 arg1 = gimple_call_arg (stmt, 1);
1505 loc = gimple_location (stmt);
1506 result = gimple_expand_builtin_pow (&gsi, loc, arg0, arg1);
1508 if (result)
1510 tree lhs = gimple_get_lhs (stmt);
1511 gassign *new_stmt =
1512 gimple_build_assign (lhs, result);
1513 gimple_set_location (new_stmt, loc);
1514 unlink_stmt_vdef (stmt);
1515 gsi_replace (&gsi, new_stmt, true);
1516 cleanup_eh = true;
1517 if (gimple_vdef (stmt))
1518 release_ssa_name (gimple_vdef (stmt));
1520 break;
1522 CASE_FLT_FN (BUILT_IN_POWI):
1523 arg0 = gimple_call_arg (stmt, 0);
1524 arg1 = gimple_call_arg (stmt, 1);
1525 loc = gimple_location (stmt);
1527 if (real_minus_onep (arg0))
1529 tree t0, t1, cond, one, minus_one;
1530 gassign *stmt;
1532 t0 = TREE_TYPE (arg0);
1533 t1 = TREE_TYPE (arg1);
1534 one = build_real (t0, dconst1);
1535 minus_one = build_real (t0, dconstm1);
1537 cond = make_temp_ssa_name (t1, NULL, "powi_cond");
1538 stmt = gimple_build_assign_with_ops (BIT_AND_EXPR, cond,
1539 arg1,
1540 build_int_cst (t1,
1541 1));
1542 gimple_set_location (stmt, loc);
1543 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1545 result = make_temp_ssa_name (t0, NULL, "powi");
1546 stmt = gimple_build_assign_with_ops (COND_EXPR, result,
1547 cond,
1548 minus_one, one);
1549 gimple_set_location (stmt, loc);
1550 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1552 else
1554 if (!tree_fits_shwi_p (arg1))
1555 break;
1557 n = tree_to_shwi (arg1);
1558 result = gimple_expand_builtin_powi (&gsi, loc, arg0, n);
1561 if (result)
1563 tree lhs = gimple_get_lhs (stmt);
1564 gassign *new_stmt = gimple_build_assign (lhs, result);
1565 gimple_set_location (new_stmt, loc);
1566 unlink_stmt_vdef (stmt);
1567 gsi_replace (&gsi, new_stmt, true);
1568 cleanup_eh = true;
1569 if (gimple_vdef (stmt))
1570 release_ssa_name (gimple_vdef (stmt));
1572 break;
1574 CASE_FLT_FN (BUILT_IN_CABS):
1575 arg0 = gimple_call_arg (stmt, 0);
1576 loc = gimple_location (stmt);
1577 result = gimple_expand_builtin_cabs (&gsi, loc, arg0);
1579 if (result)
1581 tree lhs = gimple_get_lhs (stmt);
1582 gassign *new_stmt = gimple_build_assign (lhs, result);
1583 gimple_set_location (new_stmt, loc);
1584 unlink_stmt_vdef (stmt);
1585 gsi_replace (&gsi, new_stmt, true);
1586 cleanup_eh = true;
1587 if (gimple_vdef (stmt))
1588 release_ssa_name (gimple_vdef (stmt));
1590 break;
1592 default:;
1596 if (cleanup_eh)
1597 cfg_changed |= gimple_purge_dead_eh_edges (bb);
1600 statistics_counter_event (fun, "sincos statements inserted",
1601 sincos_stats.inserted);
1603 free_dominance_info (CDI_DOMINATORS);
1604 return cfg_changed ? TODO_cleanup_cfg : 0;
1607 } // anon namespace
1609 gimple_opt_pass *
1610 make_pass_cse_sincos (gcc::context *ctxt)
1612 return new pass_cse_sincos (ctxt);
1615 /* A symbolic number is used to detect byte permutation and selection
1616 patterns. Therefore the field N contains an artificial number
1617 consisting of octet sized markers:
1619 0 - target byte has the value 0
1620 FF - target byte has an unknown value (eg. due to sign extension)
1621 1..size - marker value is the target byte index minus one.
1623 To detect permutations on memory sources (arrays and structures), a symbolic
1624 number is also associated a base address (the array or structure the load is
1625 made from), an offset from the base address and a range which gives the
1626 difference between the highest and lowest accessed memory location to make
1627 such a symbolic number. The range is thus different from size which reflects
1628 the size of the type of current expression. Note that for non memory source,
1629 range holds the same value as size.
1631 For instance, for an array char a[], (short) a[0] | (short) a[3] would have
1632 a size of 2 but a range of 4 while (short) a[0] | ((short) a[0] << 1) would
1633 still have a size of 2 but this time a range of 1. */
1635 struct symbolic_number {
1636 uint64_t n;
1637 tree type;
1638 tree base_addr;
1639 tree offset;
1640 HOST_WIDE_INT bytepos;
1641 tree alias_set;
1642 tree vuse;
1643 unsigned HOST_WIDE_INT range;
1646 #define BITS_PER_MARKER 8
1647 #define MARKER_MASK ((1 << BITS_PER_MARKER) - 1)
1648 #define MARKER_BYTE_UNKNOWN MARKER_MASK
1649 #define HEAD_MARKER(n, size) \
1650 ((n) & ((uint64_t) MARKER_MASK << (((size) - 1) * BITS_PER_MARKER)))
1652 /* The number which the find_bswap_or_nop_1 result should match in
1653 order to have a nop. The number is masked according to the size of
1654 the symbolic number before using it. */
1655 #define CMPNOP (sizeof (int64_t) < 8 ? 0 : \
1656 (uint64_t)0x08070605 << 32 | 0x04030201)
1658 /* The number which the find_bswap_or_nop_1 result should match in
1659 order to have a byte swap. The number is masked according to the
1660 size of the symbolic number before using it. */
1661 #define CMPXCHG (sizeof (int64_t) < 8 ? 0 : \
1662 (uint64_t)0x01020304 << 32 | 0x05060708)
1664 /* Perform a SHIFT or ROTATE operation by COUNT bits on symbolic
1665 number N. Return false if the requested operation is not permitted
1666 on a symbolic number. */
1668 static inline bool
1669 do_shift_rotate (enum tree_code code,
1670 struct symbolic_number *n,
1671 int count)
1673 int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
1674 unsigned head_marker;
1676 if (count % BITS_PER_UNIT != 0)
1677 return false;
1678 count = (count / BITS_PER_UNIT) * BITS_PER_MARKER;
1680 /* Zero out the extra bits of N in order to avoid them being shifted
1681 into the significant bits. */
1682 if (size < 64 / BITS_PER_MARKER)
1683 n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
1685 switch (code)
1687 case LSHIFT_EXPR:
1688 n->n <<= count;
1689 break;
1690 case RSHIFT_EXPR:
1691 head_marker = HEAD_MARKER (n->n, size);
1692 n->n >>= count;
1693 /* Arithmetic shift of signed type: result is dependent on the value. */
1694 if (!TYPE_UNSIGNED (n->type) && head_marker)
1695 for (i = 0; i < count / BITS_PER_MARKER; i++)
1696 n->n |= (uint64_t) MARKER_BYTE_UNKNOWN
1697 << ((size - 1 - i) * BITS_PER_MARKER);
1698 break;
1699 case LROTATE_EXPR:
1700 n->n = (n->n << count) | (n->n >> ((size * BITS_PER_MARKER) - count));
1701 break;
1702 case RROTATE_EXPR:
1703 n->n = (n->n >> count) | (n->n << ((size * BITS_PER_MARKER) - count));
1704 break;
1705 default:
1706 return false;
1708 /* Zero unused bits for size. */
1709 if (size < 64 / BITS_PER_MARKER)
1710 n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
1711 return true;
1714 /* Perform sanity checking for the symbolic number N and the gimple
1715 statement STMT. */
1717 static inline bool
1718 verify_symbolic_number_p (struct symbolic_number *n, gimple stmt)
1720 tree lhs_type;
1722 lhs_type = gimple_expr_type (stmt);
1724 if (TREE_CODE (lhs_type) != INTEGER_TYPE)
1725 return false;
1727 if (TYPE_PRECISION (lhs_type) != TYPE_PRECISION (n->type))
1728 return false;
1730 return true;
1733 /* Initialize the symbolic number N for the bswap pass from the base element
1734 SRC manipulated by the bitwise OR expression. */
1736 static bool
1737 init_symbolic_number (struct symbolic_number *n, tree src)
1739 int size;
1741 n->base_addr = n->offset = n->alias_set = n->vuse = NULL_TREE;
1743 /* Set up the symbolic number N by setting each byte to a value between 1 and
1744 the byte size of rhs1. The highest order byte is set to n->size and the
1745 lowest order byte to 1. */
1746 n->type = TREE_TYPE (src);
1747 size = TYPE_PRECISION (n->type);
1748 if (size % BITS_PER_UNIT != 0)
1749 return false;
1750 size /= BITS_PER_UNIT;
1751 if (size > 64 / BITS_PER_MARKER)
1752 return false;
1753 n->range = size;
1754 n->n = CMPNOP;
1756 if (size < 64 / BITS_PER_MARKER)
1757 n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
1759 return true;
1762 /* Check if STMT might be a byte swap or a nop from a memory source and returns
1763 the answer. If so, REF is that memory source and the base of the memory area
1764 accessed and the offset of the access from that base are recorded in N. */
1766 bool
1767 find_bswap_or_nop_load (gimple stmt, tree ref, struct symbolic_number *n)
1769 /* Leaf node is an array or component ref. Memorize its base and
1770 offset from base to compare to other such leaf node. */
1771 HOST_WIDE_INT bitsize, bitpos;
1772 machine_mode mode;
1773 int unsignedp, volatilep;
1774 tree offset, base_addr;
1776 if (!gimple_assign_load_p (stmt) || gimple_has_volatile_ops (stmt))
1777 return false;
1779 base_addr = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode,
1780 &unsignedp, &volatilep, false);
1782 if (TREE_CODE (base_addr) == MEM_REF)
1784 offset_int bit_offset = 0;
1785 tree off = TREE_OPERAND (base_addr, 1);
1787 if (!integer_zerop (off))
1789 offset_int boff, coff = mem_ref_offset (base_addr);
1790 boff = wi::lshift (coff, LOG2_BITS_PER_UNIT);
1791 bit_offset += boff;
1794 base_addr = TREE_OPERAND (base_addr, 0);
1796 /* Avoid returning a negative bitpos as this may wreak havoc later. */
1797 if (wi::neg_p (bit_offset))
1799 offset_int mask = wi::mask <offset_int> (LOG2_BITS_PER_UNIT, false);
1800 offset_int tem = bit_offset.and_not (mask);
1801 /* TEM is the bitpos rounded to BITS_PER_UNIT towards -Inf.
1802 Subtract it to BIT_OFFSET and add it (scaled) to OFFSET. */
1803 bit_offset -= tem;
1804 tem = wi::arshift (tem, LOG2_BITS_PER_UNIT);
1805 if (offset)
1806 offset = size_binop (PLUS_EXPR, offset,
1807 wide_int_to_tree (sizetype, tem));
1808 else
1809 offset = wide_int_to_tree (sizetype, tem);
1812 bitpos += bit_offset.to_shwi ();
1815 if (bitpos % BITS_PER_UNIT)
1816 return false;
1817 if (bitsize % BITS_PER_UNIT)
1818 return false;
1820 if (!init_symbolic_number (n, ref))
1821 return false;
1822 n->base_addr = base_addr;
1823 n->offset = offset;
1824 n->bytepos = bitpos / BITS_PER_UNIT;
1825 n->alias_set = reference_alias_ptr_type (ref);
1826 n->vuse = gimple_vuse (stmt);
1827 return true;
1830 /* find_bswap_or_nop_1 invokes itself recursively with N and tries to perform
1831 the operation given by the rhs of STMT on the result. If the operation
1832 could successfully be executed the function returns a gimple stmt whose
1833 rhs's first tree is the expression of the source operand and NULL
1834 otherwise. */
1836 static gimple
1837 find_bswap_or_nop_1 (gimple stmt, struct symbolic_number *n, int limit)
1839 enum tree_code code;
1840 tree rhs1, rhs2 = NULL;
1841 gimple rhs1_stmt, rhs2_stmt, source_stmt1;
1842 enum gimple_rhs_class rhs_class;
1844 if (!limit || !is_gimple_assign (stmt))
1845 return NULL;
1847 rhs1 = gimple_assign_rhs1 (stmt);
1849 if (find_bswap_or_nop_load (stmt, rhs1, n))
1850 return stmt;
1852 if (TREE_CODE (rhs1) != SSA_NAME)
1853 return NULL;
1855 code = gimple_assign_rhs_code (stmt);
1856 rhs_class = gimple_assign_rhs_class (stmt);
1857 rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
1859 if (rhs_class == GIMPLE_BINARY_RHS)
1860 rhs2 = gimple_assign_rhs2 (stmt);
1862 /* Handle unary rhs and binary rhs with integer constants as second
1863 operand. */
1865 if (rhs_class == GIMPLE_UNARY_RHS
1866 || (rhs_class == GIMPLE_BINARY_RHS
1867 && TREE_CODE (rhs2) == INTEGER_CST))
1869 if (code != BIT_AND_EXPR
1870 && code != LSHIFT_EXPR
1871 && code != RSHIFT_EXPR
1872 && code != LROTATE_EXPR
1873 && code != RROTATE_EXPR
1874 && !CONVERT_EXPR_CODE_P (code))
1875 return NULL;
1877 source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, n, limit - 1);
1879 /* If find_bswap_or_nop_1 returned NULL, STMT is a leaf node and
1880 we have to initialize the symbolic number. */
1881 if (!source_stmt1)
1883 if (gimple_assign_load_p (stmt)
1884 || !init_symbolic_number (n, rhs1))
1885 return NULL;
1886 source_stmt1 = stmt;
1889 switch (code)
1891 case BIT_AND_EXPR:
1893 int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
1894 uint64_t val = int_cst_value (rhs2), mask = 0;
1895 uint64_t tmp = (1 << BITS_PER_UNIT) - 1;
1897 /* Only constants masking full bytes are allowed. */
1898 for (i = 0; i < size; i++, tmp <<= BITS_PER_UNIT)
1899 if ((val & tmp) != 0 && (val & tmp) != tmp)
1900 return NULL;
1901 else if (val & tmp)
1902 mask |= (uint64_t) MARKER_MASK << (i * BITS_PER_MARKER);
1904 n->n &= mask;
1906 break;
1907 case LSHIFT_EXPR:
1908 case RSHIFT_EXPR:
1909 case LROTATE_EXPR:
1910 case RROTATE_EXPR:
1911 if (!do_shift_rotate (code, n, (int)TREE_INT_CST_LOW (rhs2)))
1912 return NULL;
1913 break;
1914 CASE_CONVERT:
1916 int i, type_size, old_type_size;
1917 tree type;
1919 type = gimple_expr_type (stmt);
1920 type_size = TYPE_PRECISION (type);
1921 if (type_size % BITS_PER_UNIT != 0)
1922 return NULL;
1923 type_size /= BITS_PER_UNIT;
1924 if (type_size > 64 / BITS_PER_MARKER)
1925 return NULL;
1927 /* Sign extension: result is dependent on the value. */
1928 old_type_size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
1929 if (!TYPE_UNSIGNED (n->type) && type_size > old_type_size
1930 && HEAD_MARKER (n->n, old_type_size))
1931 for (i = 0; i < type_size - old_type_size; i++)
1932 n->n |= (uint64_t) MARKER_BYTE_UNKNOWN
1933 << ((type_size - 1 - i) * BITS_PER_MARKER);
1935 if (type_size < 64 / BITS_PER_MARKER)
1937 /* If STMT casts to a smaller type mask out the bits not
1938 belonging to the target type. */
1939 n->n &= ((uint64_t) 1 << (type_size * BITS_PER_MARKER)) - 1;
1941 n->type = type;
1942 if (!n->base_addr)
1943 n->range = type_size;
1945 break;
1946 default:
1947 return NULL;
1949 return verify_symbolic_number_p (n, stmt) ? source_stmt1 : NULL;
1952 /* Handle binary rhs. */
1954 if (rhs_class == GIMPLE_BINARY_RHS)
1956 int i, size;
1957 struct symbolic_number n1, n2;
1958 uint64_t mask;
1959 gimple source_stmt2;
1961 if (code != BIT_IOR_EXPR)
1962 return NULL;
1964 if (TREE_CODE (rhs2) != SSA_NAME)
1965 return NULL;
1967 rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
1969 switch (code)
1971 case BIT_IOR_EXPR:
1972 source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, &n1, limit - 1);
1974 if (!source_stmt1)
1975 return NULL;
1977 source_stmt2 = find_bswap_or_nop_1 (rhs2_stmt, &n2, limit - 1);
1979 if (!source_stmt2)
1980 return NULL;
1982 if (TYPE_PRECISION (n1.type) != TYPE_PRECISION (n2.type))
1983 return NULL;
1985 if (!n1.vuse != !n2.vuse ||
1986 (n1.vuse && !operand_equal_p (n1.vuse, n2.vuse, 0)))
1987 return NULL;
1989 if (gimple_assign_rhs1 (source_stmt1)
1990 != gimple_assign_rhs1 (source_stmt2))
1992 int64_t inc;
1993 HOST_WIDE_INT off_sub;
1994 struct symbolic_number *n_ptr;
1996 if (!n1.base_addr || !n2.base_addr
1997 || !operand_equal_p (n1.base_addr, n2.base_addr, 0))
1998 return NULL;
1999 if (!n1.offset != !n2.offset ||
2000 (n1.offset && !operand_equal_p (n1.offset, n2.offset, 0)))
2001 return NULL;
2003 /* We swap n1 with n2 to have n1 < n2. */
2004 if (n2.bytepos < n1.bytepos)
2006 struct symbolic_number tmpn;
2008 tmpn = n2;
2009 n2 = n1;
2010 n1 = tmpn;
2011 source_stmt1 = source_stmt2;
2014 off_sub = n2.bytepos - n1.bytepos;
2016 /* Check that the range of memory covered can be represented by
2017 a symbolic number. */
2018 if (off_sub + n2.range > 64 / BITS_PER_MARKER)
2019 return NULL;
2020 n->range = n2.range + off_sub;
2022 /* Reinterpret byte marks in symbolic number holding the value of
2023 bigger weight according to target endianness. */
2024 inc = BYTES_BIG_ENDIAN ? off_sub + n2.range - n1.range : off_sub;
2025 size = TYPE_PRECISION (n1.type) / BITS_PER_UNIT;
2026 if (BYTES_BIG_ENDIAN)
2027 n_ptr = &n1;
2028 else
2029 n_ptr = &n2;
2030 for (i = 0; i < size; i++, inc <<= BITS_PER_MARKER)
2032 unsigned marker =
2033 (n_ptr->n >> (i * BITS_PER_MARKER)) & MARKER_MASK;
2034 if (marker && marker != MARKER_BYTE_UNKNOWN)
2035 n_ptr->n += inc;
2038 else
2039 n->range = n1.range;
2041 if (!n1.alias_set
2042 || alias_ptr_types_compatible_p (n1.alias_set, n2.alias_set))
2043 n->alias_set = n1.alias_set;
2044 else
2045 n->alias_set = ptr_type_node;
2046 n->vuse = n1.vuse;
2047 n->base_addr = n1.base_addr;
2048 n->offset = n1.offset;
2049 n->bytepos = n1.bytepos;
2050 n->type = n1.type;
2051 size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
2052 for (i = 0, mask = MARKER_MASK; i < size;
2053 i++, mask <<= BITS_PER_MARKER)
2055 uint64_t masked1, masked2;
2057 masked1 = n1.n & mask;
2058 masked2 = n2.n & mask;
2059 if (masked1 && masked2 && masked1 != masked2)
2060 return NULL;
2062 n->n = n1.n | n2.n;
2064 if (!verify_symbolic_number_p (n, stmt))
2065 return NULL;
2067 break;
2068 default:
2069 return NULL;
2071 return source_stmt1;
2073 return NULL;
2076 /* Check if STMT completes a bswap implementation or a read in a given
2077 endianness consisting of ORs, SHIFTs and ANDs and sets *BSWAP
2078 accordingly. It also sets N to represent the kind of operations
2079 performed: size of the resulting expression and whether it works on
2080 a memory source, and if so alias-set and vuse. At last, the
2081 function returns a stmt whose rhs's first tree is the source
2082 expression. */
2084 static gimple
2085 find_bswap_or_nop (gimple stmt, struct symbolic_number *n, bool *bswap)
2087 /* The number which the find_bswap_or_nop_1 result should match in order
2088 to have a full byte swap. The number is shifted to the right
2089 according to the size of the symbolic number before using it. */
2090 uint64_t cmpxchg = CMPXCHG;
2091 uint64_t cmpnop = CMPNOP;
2093 gimple source_stmt;
2094 int limit;
2096 /* The last parameter determines the depth search limit. It usually
2097 correlates directly to the number n of bytes to be touched. We
2098 increase that number by log2(n) + 1 here in order to also
2099 cover signed -> unsigned conversions of the src operand as can be seen
2100 in libgcc, and for initial shift/and operation of the src operand. */
2101 limit = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (gimple_expr_type (stmt)));
2102 limit += 1 + (int) ceil_log2 ((unsigned HOST_WIDE_INT) limit);
2103 source_stmt = find_bswap_or_nop_1 (stmt, n, limit);
2105 if (!source_stmt)
2106 return NULL;
2108 /* Find real size of result (highest non zero byte). */
2109 if (n->base_addr)
2111 int rsize;
2112 uint64_t tmpn;
2114 for (tmpn = n->n, rsize = 0; tmpn; tmpn >>= BITS_PER_MARKER, rsize++);
2115 n->range = rsize;
2118 /* Zero out the extra bits of N and CMP*. */
2119 if (n->range < (int) sizeof (int64_t))
2121 uint64_t mask;
2123 mask = ((uint64_t) 1 << (n->range * BITS_PER_MARKER)) - 1;
2124 cmpxchg >>= (64 / BITS_PER_MARKER - n->range) * BITS_PER_MARKER;
2125 cmpnop &= mask;
2128 /* A complete byte swap should make the symbolic number to start with
2129 the largest digit in the highest order byte. Unchanged symbolic
2130 number indicates a read with same endianness as target architecture. */
2131 if (n->n == cmpnop)
2132 *bswap = false;
2133 else if (n->n == cmpxchg)
2134 *bswap = true;
2135 else
2136 return NULL;
2138 /* Useless bit manipulation performed by code. */
2139 if (!n->base_addr && n->n == cmpnop)
2140 return NULL;
2142 n->range *= BITS_PER_UNIT;
2143 return source_stmt;
2146 namespace {
2148 const pass_data pass_data_optimize_bswap =
2150 GIMPLE_PASS, /* type */
2151 "bswap", /* name */
2152 OPTGROUP_NONE, /* optinfo_flags */
2153 TV_NONE, /* tv_id */
2154 PROP_ssa, /* properties_required */
2155 0, /* properties_provided */
2156 0, /* properties_destroyed */
2157 0, /* todo_flags_start */
2158 0, /* todo_flags_finish */
2161 class pass_optimize_bswap : public gimple_opt_pass
2163 public:
2164 pass_optimize_bswap (gcc::context *ctxt)
2165 : gimple_opt_pass (pass_data_optimize_bswap, ctxt)
2168 /* opt_pass methods: */
2169 virtual bool gate (function *)
2171 return flag_expensive_optimizations && optimize;
2174 virtual unsigned int execute (function *);
2176 }; // class pass_optimize_bswap
2178 /* Perform the bswap optimization: replace the expression computed in the rhs
2179 of CUR_STMT by an equivalent bswap, load or load + bswap expression.
2180 Which of these alternatives replace the rhs is given by N->base_addr (non
2181 null if a load is needed) and BSWAP. The type, VUSE and set-alias of the
2182 load to perform are also given in N while the builtin bswap invoke is given
2183 in FNDEL. Finally, if a load is involved, SRC_STMT refers to one of the
2184 load statements involved to construct the rhs in CUR_STMT and N->range gives
2185 the size of the rhs expression for maintaining some statistics.
2187 Note that if the replacement involve a load, CUR_STMT is moved just after
2188 SRC_STMT to do the load with the same VUSE which can lead to CUR_STMT
2189 changing of basic block. */
2191 static bool
2192 bswap_replace (gimple cur_stmt, gimple src_stmt, tree fndecl, tree bswap_type,
2193 tree load_type, struct symbolic_number *n, bool bswap)
2195 gimple_stmt_iterator gsi;
2196 tree src, tmp, tgt;
2197 gimple bswap_stmt;
2199 gsi = gsi_for_stmt (cur_stmt);
2200 src = gimple_assign_rhs1 (src_stmt);
2201 tgt = gimple_assign_lhs (cur_stmt);
2203 /* Need to load the value from memory first. */
2204 if (n->base_addr)
2206 gimple_stmt_iterator gsi_ins = gsi_for_stmt (src_stmt);
2207 tree addr_expr, addr_tmp, val_expr, val_tmp;
2208 tree load_offset_ptr, aligned_load_type;
2209 gimple addr_stmt, load_stmt;
2210 unsigned align;
2212 align = get_object_alignment (src);
2213 if (bswap
2214 && align < GET_MODE_ALIGNMENT (TYPE_MODE (load_type))
2215 && SLOW_UNALIGNED_ACCESS (TYPE_MODE (load_type), align))
2216 return false;
2218 /* Move cur_stmt just before one of the load of the original
2219 to ensure it has the same VUSE. See PR61517 for what could
2220 go wrong. */
2221 gsi_move_before (&gsi, &gsi_ins);
2222 gsi = gsi_for_stmt (cur_stmt);
2224 /* Compute address to load from and cast according to the size
2225 of the load. */
2226 addr_expr = build_fold_addr_expr (unshare_expr (src));
2227 if (is_gimple_min_invariant (addr_expr))
2228 addr_tmp = addr_expr;
2229 else
2231 addr_tmp = make_temp_ssa_name (TREE_TYPE (addr_expr), NULL,
2232 "load_src");
2233 addr_stmt = gimple_build_assign (addr_tmp, addr_expr);
2234 gsi_insert_before (&gsi, addr_stmt, GSI_SAME_STMT);
2237 /* Perform the load. */
2238 aligned_load_type = load_type;
2239 if (align < TYPE_ALIGN (load_type))
2240 aligned_load_type = build_aligned_type (load_type, align);
2241 load_offset_ptr = build_int_cst (n->alias_set, 0);
2242 val_expr = fold_build2 (MEM_REF, aligned_load_type, addr_tmp,
2243 load_offset_ptr);
2245 if (!bswap)
2247 if (n->range == 16)
2248 nop_stats.found_16bit++;
2249 else if (n->range == 32)
2250 nop_stats.found_32bit++;
2251 else
2253 gcc_assert (n->range == 64);
2254 nop_stats.found_64bit++;
2257 /* Convert the result of load if necessary. */
2258 if (!useless_type_conversion_p (TREE_TYPE (tgt), load_type))
2260 val_tmp = make_temp_ssa_name (aligned_load_type, NULL,
2261 "load_dst");
2262 load_stmt = gimple_build_assign (val_tmp, val_expr);
2263 gimple_set_vuse (load_stmt, n->vuse);
2264 gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT);
2265 gimple_assign_set_rhs_with_ops_1 (&gsi, NOP_EXPR, val_tmp,
2266 NULL_TREE, NULL_TREE);
2268 else
2270 gimple_assign_set_rhs_with_ops_1 (&gsi, MEM_REF, val_expr,
2271 NULL_TREE, NULL_TREE);
2272 gimple_set_vuse (cur_stmt, n->vuse);
2274 update_stmt (cur_stmt);
2276 if (dump_file)
2278 fprintf (dump_file,
2279 "%d bit load in target endianness found at: ",
2280 (int)n->range);
2281 print_gimple_stmt (dump_file, cur_stmt, 0, 0);
2283 return true;
2285 else
2287 val_tmp = make_temp_ssa_name (aligned_load_type, NULL, "load_dst");
2288 load_stmt = gimple_build_assign (val_tmp, val_expr);
2289 gimple_set_vuse (load_stmt, n->vuse);
2290 gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT);
2292 src = val_tmp;
2295 if (n->range == 16)
2296 bswap_stats.found_16bit++;
2297 else if (n->range == 32)
2298 bswap_stats.found_32bit++;
2299 else
2301 gcc_assert (n->range == 64);
2302 bswap_stats.found_64bit++;
2305 tmp = src;
2307 /* Canonical form for 16 bit bswap is a rotate expression. Only 16bit values
2308 are considered as rotation of 2N bit values by N bits is generally not
2309 equivalent to a bswap. Consider for instance 0x01020304 >> 16 which gives
2310 0x03040102 while a bswap for that value is 0x04030201. */
2311 if (bswap && n->range == 16)
2313 tree count = build_int_cst (NULL, BITS_PER_UNIT);
2314 bswap_type = TREE_TYPE (src);
2315 src = fold_build2 (LROTATE_EXPR, bswap_type, src, count);
2316 bswap_stmt = gimple_build_assign (NULL, src);
2318 else
2320 /* Convert the src expression if necessary. */
2321 if (!useless_type_conversion_p (TREE_TYPE (tmp), bswap_type))
2323 gimple convert_stmt;
2324 tmp = make_temp_ssa_name (bswap_type, NULL, "bswapsrc");
2325 convert_stmt = gimple_build_assign_with_ops (NOP_EXPR, tmp, src,
2326 NULL);
2327 gsi_insert_before (&gsi, convert_stmt, GSI_SAME_STMT);
2330 bswap_stmt = gimple_build_call (fndecl, 1, tmp);
2333 tmp = tgt;
2335 /* Convert the result if necessary. */
2336 if (!useless_type_conversion_p (TREE_TYPE (tgt), bswap_type))
2338 gimple convert_stmt;
2339 tmp = make_temp_ssa_name (bswap_type, NULL, "bswapdst");
2340 convert_stmt = gimple_build_assign_with_ops (NOP_EXPR, tgt, tmp, NULL);
2341 gsi_insert_after (&gsi, convert_stmt, GSI_SAME_STMT);
2344 gimple_set_lhs (bswap_stmt, tmp);
2346 if (dump_file)
2348 fprintf (dump_file, "%d bit bswap implementation found at: ",
2349 (int)n->range);
2350 print_gimple_stmt (dump_file, cur_stmt, 0, 0);
2353 gsi_insert_after (&gsi, bswap_stmt, GSI_SAME_STMT);
2354 gsi_remove (&gsi, true);
2355 return true;
2358 /* Find manual byte swap implementations as well as load in a given
2359 endianness. Byte swaps are turned into a bswap builtin invokation
2360 while endian loads are converted to bswap builtin invokation or
2361 simple load according to the target endianness. */
2363 unsigned int
2364 pass_optimize_bswap::execute (function *fun)
2366 basic_block bb;
2367 bool bswap16_p, bswap32_p, bswap64_p;
2368 bool changed = false;
2369 tree bswap16_type = NULL_TREE, bswap32_type = NULL_TREE, bswap64_type = NULL_TREE;
2371 if (BITS_PER_UNIT != 8)
2372 return 0;
2374 bswap16_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP16)
2375 && optab_handler (bswap_optab, HImode) != CODE_FOR_nothing);
2376 bswap32_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
2377 && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing);
2378 bswap64_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP64)
2379 && (optab_handler (bswap_optab, DImode) != CODE_FOR_nothing
2380 || (bswap32_p && word_mode == SImode)));
2382 /* Determine the argument type of the builtins. The code later on
2383 assumes that the return and argument type are the same. */
2384 if (bswap16_p)
2386 tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP16);
2387 bswap16_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
2390 if (bswap32_p)
2392 tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
2393 bswap32_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
2396 if (bswap64_p)
2398 tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
2399 bswap64_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
2402 memset (&nop_stats, 0, sizeof (nop_stats));
2403 memset (&bswap_stats, 0, sizeof (bswap_stats));
2405 FOR_EACH_BB_FN (bb, fun)
2407 gimple_stmt_iterator gsi;
2409 /* We do a reverse scan for bswap patterns to make sure we get the
2410 widest match. As bswap pattern matching doesn't handle previously
2411 inserted smaller bswap replacements as sub-patterns, the wider
2412 variant wouldn't be detected. */
2413 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi);)
2415 gimple src_stmt, cur_stmt = gsi_stmt (gsi);
2416 tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type;
2417 enum tree_code code;
2418 struct symbolic_number n;
2419 bool bswap;
2421 /* This gsi_prev (&gsi) is not part of the for loop because cur_stmt
2422 might be moved to a different basic block by bswap_replace and gsi
2423 must not points to it if that's the case. Moving the gsi_prev
2424 there make sure that gsi points to the statement previous to
2425 cur_stmt while still making sure that all statements are
2426 considered in this basic block. */
2427 gsi_prev (&gsi);
2429 if (!is_gimple_assign (cur_stmt))
2430 continue;
2432 code = gimple_assign_rhs_code (cur_stmt);
2433 switch (code)
2435 case LROTATE_EXPR:
2436 case RROTATE_EXPR:
2437 if (!tree_fits_uhwi_p (gimple_assign_rhs2 (cur_stmt))
2438 || tree_to_uhwi (gimple_assign_rhs2 (cur_stmt))
2439 % BITS_PER_UNIT)
2440 continue;
2441 /* Fall through. */
2442 case BIT_IOR_EXPR:
2443 break;
2444 default:
2445 continue;
2448 src_stmt = find_bswap_or_nop (cur_stmt, &n, &bswap);
2450 if (!src_stmt)
2451 continue;
2453 switch (n.range)
2455 case 16:
2456 /* Already in canonical form, nothing to do. */
2457 if (code == LROTATE_EXPR || code == RROTATE_EXPR)
2458 continue;
2459 load_type = uint16_type_node;
2460 if (bswap16_p)
2462 fndecl = builtin_decl_explicit (BUILT_IN_BSWAP16);
2463 bswap_type = bswap16_type;
2465 break;
2466 case 32:
2467 load_type = uint32_type_node;
2468 if (bswap32_p)
2470 fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
2471 bswap_type = bswap32_type;
2473 break;
2474 case 64:
2475 load_type = uint64_type_node;
2476 if (bswap64_p)
2478 fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
2479 bswap_type = bswap64_type;
2481 break;
2482 default:
2483 continue;
2486 if (bswap && !fndecl)
2487 continue;
2489 if (bswap_replace (cur_stmt, src_stmt, fndecl, bswap_type, load_type,
2490 &n, bswap))
2491 changed = true;
2495 statistics_counter_event (fun, "16-bit nop implementations found",
2496 nop_stats.found_16bit);
2497 statistics_counter_event (fun, "32-bit nop implementations found",
2498 nop_stats.found_32bit);
2499 statistics_counter_event (fun, "64-bit nop implementations found",
2500 nop_stats.found_64bit);
2501 statistics_counter_event (fun, "16-bit bswap implementations found",
2502 bswap_stats.found_16bit);
2503 statistics_counter_event (fun, "32-bit bswap implementations found",
2504 bswap_stats.found_32bit);
2505 statistics_counter_event (fun, "64-bit bswap implementations found",
2506 bswap_stats.found_64bit);
2508 return (changed ? TODO_update_ssa : 0);
2511 } // anon namespace
2513 gimple_opt_pass *
2514 make_pass_optimize_bswap (gcc::context *ctxt)
2516 return new pass_optimize_bswap (ctxt);
2519 /* Return true if stmt is a type conversion operation that can be stripped
2520 when used in a widening multiply operation. */
2521 static bool
2522 widening_mult_conversion_strippable_p (tree result_type, gimple stmt)
2524 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
2526 if (TREE_CODE (result_type) == INTEGER_TYPE)
2528 tree op_type;
2529 tree inner_op_type;
2531 if (!CONVERT_EXPR_CODE_P (rhs_code))
2532 return false;
2534 op_type = TREE_TYPE (gimple_assign_lhs (stmt));
2536 /* If the type of OP has the same precision as the result, then
2537 we can strip this conversion. The multiply operation will be
2538 selected to create the correct extension as a by-product. */
2539 if (TYPE_PRECISION (result_type) == TYPE_PRECISION (op_type))
2540 return true;
2542 /* We can also strip a conversion if it preserves the signed-ness of
2543 the operation and doesn't narrow the range. */
2544 inner_op_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
2546 /* If the inner-most type is unsigned, then we can strip any
2547 intermediate widening operation. If it's signed, then the
2548 intermediate widening operation must also be signed. */
2549 if ((TYPE_UNSIGNED (inner_op_type)
2550 || TYPE_UNSIGNED (op_type) == TYPE_UNSIGNED (inner_op_type))
2551 && TYPE_PRECISION (op_type) > TYPE_PRECISION (inner_op_type))
2552 return true;
2554 return false;
2557 return rhs_code == FIXED_CONVERT_EXPR;
2560 /* Return true if RHS is a suitable operand for a widening multiplication,
2561 assuming a target type of TYPE.
2562 There are two cases:
2564 - RHS makes some value at least twice as wide. Store that value
2565 in *NEW_RHS_OUT if so, and store its type in *TYPE_OUT.
2567 - RHS is an integer constant. Store that value in *NEW_RHS_OUT if so,
2568 but leave *TYPE_OUT untouched. */
2570 static bool
2571 is_widening_mult_rhs_p (tree type, tree rhs, tree *type_out,
2572 tree *new_rhs_out)
2574 gimple stmt;
2575 tree type1, rhs1;
2577 if (TREE_CODE (rhs) == SSA_NAME)
2579 stmt = SSA_NAME_DEF_STMT (rhs);
2580 if (is_gimple_assign (stmt))
2582 if (! widening_mult_conversion_strippable_p (type, stmt))
2583 rhs1 = rhs;
2584 else
2586 rhs1 = gimple_assign_rhs1 (stmt);
2588 if (TREE_CODE (rhs1) == INTEGER_CST)
2590 *new_rhs_out = rhs1;
2591 *type_out = NULL;
2592 return true;
2596 else
2597 rhs1 = rhs;
2599 type1 = TREE_TYPE (rhs1);
2601 if (TREE_CODE (type1) != TREE_CODE (type)
2602 || TYPE_PRECISION (type1) * 2 > TYPE_PRECISION (type))
2603 return false;
2605 *new_rhs_out = rhs1;
2606 *type_out = type1;
2607 return true;
2610 if (TREE_CODE (rhs) == INTEGER_CST)
2612 *new_rhs_out = rhs;
2613 *type_out = NULL;
2614 return true;
2617 return false;
2620 /* Return true if STMT performs a widening multiplication, assuming the
2621 output type is TYPE. If so, store the unwidened types of the operands
2622 in *TYPE1_OUT and *TYPE2_OUT respectively. Also fill *RHS1_OUT and
2623 *RHS2_OUT such that converting those operands to types *TYPE1_OUT
2624 and *TYPE2_OUT would give the operands of the multiplication. */
2626 static bool
2627 is_widening_mult_p (gimple stmt,
2628 tree *type1_out, tree *rhs1_out,
2629 tree *type2_out, tree *rhs2_out)
2631 tree type = TREE_TYPE (gimple_assign_lhs (stmt));
2633 if (TREE_CODE (type) != INTEGER_TYPE
2634 && TREE_CODE (type) != FIXED_POINT_TYPE)
2635 return false;
2637 if (!is_widening_mult_rhs_p (type, gimple_assign_rhs1 (stmt), type1_out,
2638 rhs1_out))
2639 return false;
2641 if (!is_widening_mult_rhs_p (type, gimple_assign_rhs2 (stmt), type2_out,
2642 rhs2_out))
2643 return false;
2645 if (*type1_out == NULL)
2647 if (*type2_out == NULL || !int_fits_type_p (*rhs1_out, *type2_out))
2648 return false;
2649 *type1_out = *type2_out;
2652 if (*type2_out == NULL)
2654 if (!int_fits_type_p (*rhs2_out, *type1_out))
2655 return false;
2656 *type2_out = *type1_out;
2659 /* Ensure that the larger of the two operands comes first. */
2660 if (TYPE_PRECISION (*type1_out) < TYPE_PRECISION (*type2_out))
2662 tree tmp;
2663 tmp = *type1_out;
2664 *type1_out = *type2_out;
2665 *type2_out = tmp;
2666 tmp = *rhs1_out;
2667 *rhs1_out = *rhs2_out;
2668 *rhs2_out = tmp;
2671 return true;
2674 /* Process a single gimple statement STMT, which has a MULT_EXPR as
2675 its rhs, and try to convert it into a WIDEN_MULT_EXPR. The return
2676 value is true iff we converted the statement. */
2678 static bool
2679 convert_mult_to_widen (gimple stmt, gimple_stmt_iterator *gsi)
2681 tree lhs, rhs1, rhs2, type, type1, type2;
2682 enum insn_code handler;
2683 machine_mode to_mode, from_mode, actual_mode;
2684 optab op;
2685 int actual_precision;
2686 location_t loc = gimple_location (stmt);
2687 bool from_unsigned1, from_unsigned2;
2689 lhs = gimple_assign_lhs (stmt);
2690 type = TREE_TYPE (lhs);
2691 if (TREE_CODE (type) != INTEGER_TYPE)
2692 return false;
2694 if (!is_widening_mult_p (stmt, &type1, &rhs1, &type2, &rhs2))
2695 return false;
2697 to_mode = TYPE_MODE (type);
2698 from_mode = TYPE_MODE (type1);
2699 from_unsigned1 = TYPE_UNSIGNED (type1);
2700 from_unsigned2 = TYPE_UNSIGNED (type2);
2702 if (from_unsigned1 && from_unsigned2)
2703 op = umul_widen_optab;
2704 else if (!from_unsigned1 && !from_unsigned2)
2705 op = smul_widen_optab;
2706 else
2707 op = usmul_widen_optab;
2709 handler = find_widening_optab_handler_and_mode (op, to_mode, from_mode,
2710 0, &actual_mode);
2712 if (handler == CODE_FOR_nothing)
2714 if (op != smul_widen_optab)
2716 /* We can use a signed multiply with unsigned types as long as
2717 there is a wider mode to use, or it is the smaller of the two
2718 types that is unsigned. Note that type1 >= type2, always. */
2719 if ((TYPE_UNSIGNED (type1)
2720 && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode))
2721 || (TYPE_UNSIGNED (type2)
2722 && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode)))
2724 from_mode = GET_MODE_WIDER_MODE (from_mode);
2725 if (GET_MODE_SIZE (to_mode) <= GET_MODE_SIZE (from_mode))
2726 return false;
2729 op = smul_widen_optab;
2730 handler = find_widening_optab_handler_and_mode (op, to_mode,
2731 from_mode, 0,
2732 &actual_mode);
2734 if (handler == CODE_FOR_nothing)
2735 return false;
2737 from_unsigned1 = from_unsigned2 = false;
2739 else
2740 return false;
2743 /* Ensure that the inputs to the handler are in the correct precison
2744 for the opcode. This will be the full mode size. */
2745 actual_precision = GET_MODE_PRECISION (actual_mode);
2746 if (2 * actual_precision > TYPE_PRECISION (type))
2747 return false;
2748 if (actual_precision != TYPE_PRECISION (type1)
2749 || from_unsigned1 != TYPE_UNSIGNED (type1))
2750 rhs1 = build_and_insert_cast (gsi, loc,
2751 build_nonstandard_integer_type
2752 (actual_precision, from_unsigned1), rhs1);
2753 if (actual_precision != TYPE_PRECISION (type2)
2754 || from_unsigned2 != TYPE_UNSIGNED (type2))
2755 rhs2 = build_and_insert_cast (gsi, loc,
2756 build_nonstandard_integer_type
2757 (actual_precision, from_unsigned2), rhs2);
2759 /* Handle constants. */
2760 if (TREE_CODE (rhs1) == INTEGER_CST)
2761 rhs1 = fold_convert (type1, rhs1);
2762 if (TREE_CODE (rhs2) == INTEGER_CST)
2763 rhs2 = fold_convert (type2, rhs2);
2765 gimple_assign_set_rhs1 (stmt, rhs1);
2766 gimple_assign_set_rhs2 (stmt, rhs2);
2767 gimple_assign_set_rhs_code (stmt, WIDEN_MULT_EXPR);
2768 update_stmt (stmt);
2769 widen_mul_stats.widen_mults_inserted++;
2770 return true;
2773 /* Process a single gimple statement STMT, which is found at the
2774 iterator GSI and has a either a PLUS_EXPR or a MINUS_EXPR as its
2775 rhs (given by CODE), and try to convert it into a
2776 WIDEN_MULT_PLUS_EXPR or a WIDEN_MULT_MINUS_EXPR. The return value
2777 is true iff we converted the statement. */
2779 static bool
2780 convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple stmt,
2781 enum tree_code code)
2783 gimple rhs1_stmt = NULL, rhs2_stmt = NULL;
2784 gimple conv1_stmt = NULL, conv2_stmt = NULL, conv_stmt;
2785 tree type, type1, type2, optype;
2786 tree lhs, rhs1, rhs2, mult_rhs1, mult_rhs2, add_rhs;
2787 enum tree_code rhs1_code = ERROR_MARK, rhs2_code = ERROR_MARK;
2788 optab this_optab;
2789 enum tree_code wmult_code;
2790 enum insn_code handler;
2791 machine_mode to_mode, from_mode, actual_mode;
2792 location_t loc = gimple_location (stmt);
2793 int actual_precision;
2794 bool from_unsigned1, from_unsigned2;
2796 lhs = gimple_assign_lhs (stmt);
2797 type = TREE_TYPE (lhs);
2798 if (TREE_CODE (type) != INTEGER_TYPE
2799 && TREE_CODE (type) != FIXED_POINT_TYPE)
2800 return false;
2802 if (code == MINUS_EXPR)
2803 wmult_code = WIDEN_MULT_MINUS_EXPR;
2804 else
2805 wmult_code = WIDEN_MULT_PLUS_EXPR;
2807 rhs1 = gimple_assign_rhs1 (stmt);
2808 rhs2 = gimple_assign_rhs2 (stmt);
2810 if (TREE_CODE (rhs1) == SSA_NAME)
2812 rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
2813 if (is_gimple_assign (rhs1_stmt))
2814 rhs1_code = gimple_assign_rhs_code (rhs1_stmt);
2817 if (TREE_CODE (rhs2) == SSA_NAME)
2819 rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
2820 if (is_gimple_assign (rhs2_stmt))
2821 rhs2_code = gimple_assign_rhs_code (rhs2_stmt);
2824 /* Allow for one conversion statement between the multiply
2825 and addition/subtraction statement. If there are more than
2826 one conversions then we assume they would invalidate this
2827 transformation. If that's not the case then they should have
2828 been folded before now. */
2829 if (CONVERT_EXPR_CODE_P (rhs1_code))
2831 conv1_stmt = rhs1_stmt;
2832 rhs1 = gimple_assign_rhs1 (rhs1_stmt);
2833 if (TREE_CODE (rhs1) == SSA_NAME)
2835 rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
2836 if (is_gimple_assign (rhs1_stmt))
2837 rhs1_code = gimple_assign_rhs_code (rhs1_stmt);
2839 else
2840 return false;
2842 if (CONVERT_EXPR_CODE_P (rhs2_code))
2844 conv2_stmt = rhs2_stmt;
2845 rhs2 = gimple_assign_rhs1 (rhs2_stmt);
2846 if (TREE_CODE (rhs2) == SSA_NAME)
2848 rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
2849 if (is_gimple_assign (rhs2_stmt))
2850 rhs2_code = gimple_assign_rhs_code (rhs2_stmt);
2852 else
2853 return false;
2856 /* If code is WIDEN_MULT_EXPR then it would seem unnecessary to call
2857 is_widening_mult_p, but we still need the rhs returns.
2859 It might also appear that it would be sufficient to use the existing
2860 operands of the widening multiply, but that would limit the choice of
2861 multiply-and-accumulate instructions.
2863 If the widened-multiplication result has more than one uses, it is
2864 probably wiser not to do the conversion. */
2865 if (code == PLUS_EXPR
2866 && (rhs1_code == MULT_EXPR || rhs1_code == WIDEN_MULT_EXPR))
2868 if (!has_single_use (rhs1)
2869 || !is_widening_mult_p (rhs1_stmt, &type1, &mult_rhs1,
2870 &type2, &mult_rhs2))
2871 return false;
2872 add_rhs = rhs2;
2873 conv_stmt = conv1_stmt;
2875 else if (rhs2_code == MULT_EXPR || rhs2_code == WIDEN_MULT_EXPR)
2877 if (!has_single_use (rhs2)
2878 || !is_widening_mult_p (rhs2_stmt, &type1, &mult_rhs1,
2879 &type2, &mult_rhs2))
2880 return false;
2881 add_rhs = rhs1;
2882 conv_stmt = conv2_stmt;
2884 else
2885 return false;
2887 to_mode = TYPE_MODE (type);
2888 from_mode = TYPE_MODE (type1);
2889 from_unsigned1 = TYPE_UNSIGNED (type1);
2890 from_unsigned2 = TYPE_UNSIGNED (type2);
2891 optype = type1;
2893 /* There's no such thing as a mixed sign madd yet, so use a wider mode. */
2894 if (from_unsigned1 != from_unsigned2)
2896 if (!INTEGRAL_TYPE_P (type))
2897 return false;
2898 /* We can use a signed multiply with unsigned types as long as
2899 there is a wider mode to use, or it is the smaller of the two
2900 types that is unsigned. Note that type1 >= type2, always. */
2901 if ((from_unsigned1
2902 && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode))
2903 || (from_unsigned2
2904 && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode)))
2906 from_mode = GET_MODE_WIDER_MODE (from_mode);
2907 if (GET_MODE_SIZE (from_mode) >= GET_MODE_SIZE (to_mode))
2908 return false;
2911 from_unsigned1 = from_unsigned2 = false;
2912 optype = build_nonstandard_integer_type (GET_MODE_PRECISION (from_mode),
2913 false);
2916 /* If there was a conversion between the multiply and addition
2917 then we need to make sure it fits a multiply-and-accumulate.
2918 The should be a single mode change which does not change the
2919 value. */
2920 if (conv_stmt)
2922 /* We use the original, unmodified data types for this. */
2923 tree from_type = TREE_TYPE (gimple_assign_rhs1 (conv_stmt));
2924 tree to_type = TREE_TYPE (gimple_assign_lhs (conv_stmt));
2925 int data_size = TYPE_PRECISION (type1) + TYPE_PRECISION (type2);
2926 bool is_unsigned = TYPE_UNSIGNED (type1) && TYPE_UNSIGNED (type2);
2928 if (TYPE_PRECISION (from_type) > TYPE_PRECISION (to_type))
2930 /* Conversion is a truncate. */
2931 if (TYPE_PRECISION (to_type) < data_size)
2932 return false;
2934 else if (TYPE_PRECISION (from_type) < TYPE_PRECISION (to_type))
2936 /* Conversion is an extend. Check it's the right sort. */
2937 if (TYPE_UNSIGNED (from_type) != is_unsigned
2938 && !(is_unsigned && TYPE_PRECISION (from_type) > data_size))
2939 return false;
2941 /* else convert is a no-op for our purposes. */
2944 /* Verify that the machine can perform a widening multiply
2945 accumulate in this mode/signedness combination, otherwise
2946 this transformation is likely to pessimize code. */
2947 this_optab = optab_for_tree_code (wmult_code, optype, optab_default);
2948 handler = find_widening_optab_handler_and_mode (this_optab, to_mode,
2949 from_mode, 0, &actual_mode);
2951 if (handler == CODE_FOR_nothing)
2952 return false;
2954 /* Ensure that the inputs to the handler are in the correct precison
2955 for the opcode. This will be the full mode size. */
2956 actual_precision = GET_MODE_PRECISION (actual_mode);
2957 if (actual_precision != TYPE_PRECISION (type1)
2958 || from_unsigned1 != TYPE_UNSIGNED (type1))
2959 mult_rhs1 = build_and_insert_cast (gsi, loc,
2960 build_nonstandard_integer_type
2961 (actual_precision, from_unsigned1),
2962 mult_rhs1);
2963 if (actual_precision != TYPE_PRECISION (type2)
2964 || from_unsigned2 != TYPE_UNSIGNED (type2))
2965 mult_rhs2 = build_and_insert_cast (gsi, loc,
2966 build_nonstandard_integer_type
2967 (actual_precision, from_unsigned2),
2968 mult_rhs2);
2970 if (!useless_type_conversion_p (type, TREE_TYPE (add_rhs)))
2971 add_rhs = build_and_insert_cast (gsi, loc, type, add_rhs);
2973 /* Handle constants. */
2974 if (TREE_CODE (mult_rhs1) == INTEGER_CST)
2975 mult_rhs1 = fold_convert (type1, mult_rhs1);
2976 if (TREE_CODE (mult_rhs2) == INTEGER_CST)
2977 mult_rhs2 = fold_convert (type2, mult_rhs2);
2979 gimple_assign_set_rhs_with_ops_1 (gsi, wmult_code, mult_rhs1, mult_rhs2,
2980 add_rhs);
2981 update_stmt (gsi_stmt (*gsi));
2982 widen_mul_stats.maccs_inserted++;
2983 return true;
2986 /* Combine the multiplication at MUL_STMT with operands MULOP1 and MULOP2
2987 with uses in additions and subtractions to form fused multiply-add
2988 operations. Returns true if successful and MUL_STMT should be removed. */
2990 static bool
2991 convert_mult_to_fma (gimple mul_stmt, tree op1, tree op2)
2993 tree mul_result = gimple_get_lhs (mul_stmt);
2994 tree type = TREE_TYPE (mul_result);
2995 gimple use_stmt, neguse_stmt;
2996 gassign *fma_stmt;
2997 use_operand_p use_p;
2998 imm_use_iterator imm_iter;
3000 if (FLOAT_TYPE_P (type)
3001 && flag_fp_contract_mode == FP_CONTRACT_OFF)
3002 return false;
3004 /* We don't want to do bitfield reduction ops. */
3005 if (INTEGRAL_TYPE_P (type)
3006 && (TYPE_PRECISION (type)
3007 != GET_MODE_PRECISION (TYPE_MODE (type))))
3008 return false;
3010 /* If the target doesn't support it, don't generate it. We assume that
3011 if fma isn't available then fms, fnma or fnms are not either. */
3012 if (optab_handler (fma_optab, TYPE_MODE (type)) == CODE_FOR_nothing)
3013 return false;
3015 /* If the multiplication has zero uses, it is kept around probably because
3016 of -fnon-call-exceptions. Don't optimize it away in that case,
3017 it is DCE job. */
3018 if (has_zero_uses (mul_result))
3019 return false;
3021 /* Make sure that the multiplication statement becomes dead after
3022 the transformation, thus that all uses are transformed to FMAs.
3023 This means we assume that an FMA operation has the same cost
3024 as an addition. */
3025 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, mul_result)
3027 enum tree_code use_code;
3028 tree result = mul_result;
3029 bool negate_p = false;
3031 use_stmt = USE_STMT (use_p);
3033 if (is_gimple_debug (use_stmt))
3034 continue;
3036 /* For now restrict this operations to single basic blocks. In theory
3037 we would want to support sinking the multiplication in
3038 m = a*b;
3039 if ()
3040 ma = m + c;
3041 else
3042 d = m;
3043 to form a fma in the then block and sink the multiplication to the
3044 else block. */
3045 if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
3046 return false;
3048 if (!is_gimple_assign (use_stmt))
3049 return false;
3051 use_code = gimple_assign_rhs_code (use_stmt);
3053 /* A negate on the multiplication leads to FNMA. */
3054 if (use_code == NEGATE_EXPR)
3056 ssa_op_iter iter;
3057 use_operand_p usep;
3059 result = gimple_assign_lhs (use_stmt);
3061 /* Make sure the negate statement becomes dead with this
3062 single transformation. */
3063 if (!single_imm_use (gimple_assign_lhs (use_stmt),
3064 &use_p, &neguse_stmt))
3065 return false;
3067 /* Make sure the multiplication isn't also used on that stmt. */
3068 FOR_EACH_PHI_OR_STMT_USE (usep, neguse_stmt, iter, SSA_OP_USE)
3069 if (USE_FROM_PTR (usep) == mul_result)
3070 return false;
3072 /* Re-validate. */
3073 use_stmt = neguse_stmt;
3074 if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
3075 return false;
3076 if (!is_gimple_assign (use_stmt))
3077 return false;
3079 use_code = gimple_assign_rhs_code (use_stmt);
3080 negate_p = true;
3083 switch (use_code)
3085 case MINUS_EXPR:
3086 if (gimple_assign_rhs2 (use_stmt) == result)
3087 negate_p = !negate_p;
3088 break;
3089 case PLUS_EXPR:
3090 break;
3091 default:
3092 /* FMA can only be formed from PLUS and MINUS. */
3093 return false;
3096 /* If the subtrahend (gimple_assign_rhs2 (use_stmt)) is computed
3097 by a MULT_EXPR that we'll visit later, we might be able to
3098 get a more profitable match with fnma.
3099 OTOH, if we don't, a negate / fma pair has likely lower latency
3100 that a mult / subtract pair. */
3101 if (use_code == MINUS_EXPR && !negate_p
3102 && gimple_assign_rhs1 (use_stmt) == result
3103 && optab_handler (fms_optab, TYPE_MODE (type)) == CODE_FOR_nothing
3104 && optab_handler (fnma_optab, TYPE_MODE (type)) != CODE_FOR_nothing)
3106 tree rhs2 = gimple_assign_rhs2 (use_stmt);
3108 if (TREE_CODE (rhs2) == SSA_NAME)
3110 gimple stmt2 = SSA_NAME_DEF_STMT (rhs2);
3111 if (has_single_use (rhs2)
3112 && is_gimple_assign (stmt2)
3113 && gimple_assign_rhs_code (stmt2) == MULT_EXPR)
3114 return false;
3118 /* We can't handle a * b + a * b. */
3119 if (gimple_assign_rhs1 (use_stmt) == gimple_assign_rhs2 (use_stmt))
3120 return false;
3122 /* While it is possible to validate whether or not the exact form
3123 that we've recognized is available in the backend, the assumption
3124 is that the transformation is never a loss. For instance, suppose
3125 the target only has the plain FMA pattern available. Consider
3126 a*b-c -> fma(a,b,-c): we've exchanged MUL+SUB for FMA+NEG, which
3127 is still two operations. Consider -(a*b)-c -> fma(-a,b,-c): we
3128 still have 3 operations, but in the FMA form the two NEGs are
3129 independent and could be run in parallel. */
3132 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, mul_result)
3134 gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
3135 enum tree_code use_code;
3136 tree addop, mulop1 = op1, result = mul_result;
3137 bool negate_p = false;
3139 if (is_gimple_debug (use_stmt))
3140 continue;
3142 use_code = gimple_assign_rhs_code (use_stmt);
3143 if (use_code == NEGATE_EXPR)
3145 result = gimple_assign_lhs (use_stmt);
3146 single_imm_use (gimple_assign_lhs (use_stmt), &use_p, &neguse_stmt);
3147 gsi_remove (&gsi, true);
3148 release_defs (use_stmt);
3150 use_stmt = neguse_stmt;
3151 gsi = gsi_for_stmt (use_stmt);
3152 use_code = gimple_assign_rhs_code (use_stmt);
3153 negate_p = true;
3156 if (gimple_assign_rhs1 (use_stmt) == result)
3158 addop = gimple_assign_rhs2 (use_stmt);
3159 /* a * b - c -> a * b + (-c) */
3160 if (gimple_assign_rhs_code (use_stmt) == MINUS_EXPR)
3161 addop = force_gimple_operand_gsi (&gsi,
3162 build1 (NEGATE_EXPR,
3163 type, addop),
3164 true, NULL_TREE, true,
3165 GSI_SAME_STMT);
3167 else
3169 addop = gimple_assign_rhs1 (use_stmt);
3170 /* a - b * c -> (-b) * c + a */
3171 if (gimple_assign_rhs_code (use_stmt) == MINUS_EXPR)
3172 negate_p = !negate_p;
3175 if (negate_p)
3176 mulop1 = force_gimple_operand_gsi (&gsi,
3177 build1 (NEGATE_EXPR,
3178 type, mulop1),
3179 true, NULL_TREE, true,
3180 GSI_SAME_STMT);
3182 fma_stmt = gimple_build_assign_with_ops (FMA_EXPR,
3183 gimple_assign_lhs (use_stmt),
3184 mulop1, op2,
3185 addop);
3186 gsi_replace (&gsi, fma_stmt, true);
3187 widen_mul_stats.fmas_inserted++;
3190 return true;
3193 /* Find integer multiplications where the operands are extended from
3194 smaller types, and replace the MULT_EXPR with a WIDEN_MULT_EXPR
3195 where appropriate. */
3197 namespace {
3199 const pass_data pass_data_optimize_widening_mul =
3201 GIMPLE_PASS, /* type */
3202 "widening_mul", /* name */
3203 OPTGROUP_NONE, /* optinfo_flags */
3204 TV_NONE, /* tv_id */
3205 PROP_ssa, /* properties_required */
3206 0, /* properties_provided */
3207 0, /* properties_destroyed */
3208 0, /* todo_flags_start */
3209 TODO_update_ssa, /* todo_flags_finish */
3212 class pass_optimize_widening_mul : public gimple_opt_pass
3214 public:
3215 pass_optimize_widening_mul (gcc::context *ctxt)
3216 : gimple_opt_pass (pass_data_optimize_widening_mul, ctxt)
3219 /* opt_pass methods: */
3220 virtual bool gate (function *)
3222 return flag_expensive_optimizations && optimize;
3225 virtual unsigned int execute (function *);
3227 }; // class pass_optimize_widening_mul
3229 unsigned int
3230 pass_optimize_widening_mul::execute (function *fun)
3232 basic_block bb;
3233 bool cfg_changed = false;
3235 memset (&widen_mul_stats, 0, sizeof (widen_mul_stats));
3237 FOR_EACH_BB_FN (bb, fun)
3239 gimple_stmt_iterator gsi;
3241 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
3243 gimple stmt = gsi_stmt (gsi);
3244 enum tree_code code;
3246 if (is_gimple_assign (stmt))
3248 code = gimple_assign_rhs_code (stmt);
3249 switch (code)
3251 case MULT_EXPR:
3252 if (!convert_mult_to_widen (stmt, &gsi)
3253 && convert_mult_to_fma (stmt,
3254 gimple_assign_rhs1 (stmt),
3255 gimple_assign_rhs2 (stmt)))
3257 gsi_remove (&gsi, true);
3258 release_defs (stmt);
3259 continue;
3261 break;
3263 case PLUS_EXPR:
3264 case MINUS_EXPR:
3265 convert_plusminus_to_widen (&gsi, stmt, code);
3266 break;
3268 default:;
3271 else if (is_gimple_call (stmt)
3272 && gimple_call_lhs (stmt))
3274 tree fndecl = gimple_call_fndecl (stmt);
3275 if (fndecl
3276 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
3278 switch (DECL_FUNCTION_CODE (fndecl))
3280 case BUILT_IN_POWF:
3281 case BUILT_IN_POW:
3282 case BUILT_IN_POWL:
3283 if (TREE_CODE (gimple_call_arg (stmt, 1)) == REAL_CST
3284 && REAL_VALUES_EQUAL
3285 (TREE_REAL_CST (gimple_call_arg (stmt, 1)),
3286 dconst2)
3287 && convert_mult_to_fma (stmt,
3288 gimple_call_arg (stmt, 0),
3289 gimple_call_arg (stmt, 0)))
3291 unlink_stmt_vdef (stmt);
3292 if (gsi_remove (&gsi, true)
3293 && gimple_purge_dead_eh_edges (bb))
3294 cfg_changed = true;
3295 release_defs (stmt);
3296 continue;
3298 break;
3300 default:;
3304 gsi_next (&gsi);
3308 statistics_counter_event (fun, "widening multiplications inserted",
3309 widen_mul_stats.widen_mults_inserted);
3310 statistics_counter_event (fun, "widening maccs inserted",
3311 widen_mul_stats.maccs_inserted);
3312 statistics_counter_event (fun, "fused multiply-adds inserted",
3313 widen_mul_stats.fmas_inserted);
3315 return cfg_changed ? TODO_cleanup_cfg : 0;
3318 } // anon namespace
3320 gimple_opt_pass *
3321 make_pass_optimize_widening_mul (gcc::context *ctxt)
3323 return new pass_optimize_widening_mul (ctxt);