PR target/47111
[official-gcc.git] / gcc / tree-ssa-math-opts.c
blobed9b7a9d5467bdc0b69bfc1eef76f7171e4e345b
1 /* Global, SSA-based optimizations using mathematical identities.
2 Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 3, or (at your option) any
10 later version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* Currently, the only mini-pass in this file tries to CSE reciprocal
22 operations. These are common in sequences such as this one:
24 modulus = sqrt(x*x + y*y + z*z);
25 x = x / modulus;
26 y = y / modulus;
27 z = z / modulus;
29 that can be optimized to
31 modulus = sqrt(x*x + y*y + z*z);
32 rmodulus = 1.0 / modulus;
33 x = x * rmodulus;
34 y = y * rmodulus;
35 z = z * rmodulus;
37 We do this for loop invariant divisors, and with this pass whenever
38 we notice that a division has the same divisor multiple times.
40 Of course, like in PRE, we don't insert a division if a dominator
41 already has one. However, this cannot be done as an extension of
42 PRE for several reasons.
44 First of all, with some experiments it was found out that the
45 transformation is not always useful if there are only two divisions
46 hy the same divisor. This is probably because modern processors
47 can pipeline the divisions; on older, in-order processors it should
48 still be effective to optimize two divisions by the same number.
49 We make this a param, and it shall be called N in the remainder of
50 this comment.
52 Second, if trapping math is active, we have less freedom on where
53 to insert divisions: we can only do so in basic blocks that already
54 contain one. (If divisions don't trap, instead, we can insert
55 divisions elsewhere, which will be in blocks that are common dominators
56 of those that have the division).
58 We really don't want to compute the reciprocal unless a division will
59 be found. To do this, we won't insert the division in a basic block
60 that has less than N divisions *post-dominating* it.
62 The algorithm constructs a subset of the dominator tree, holding the
63 blocks containing the divisions and the common dominators to them,
64 and walk it twice. The first walk is in post-order, and it annotates
65 each block with the number of divisions that post-dominate it: this
66 gives information on where divisions can be inserted profitably.
67 The second walk is in pre-order, and it inserts divisions as explained
68 above, and replaces divisions by multiplications.
70 In the best case, the cost of the pass is O(n_statements). In the
71 worst-case, the cost is due to creating the dominator tree subset,
72 with a cost of O(n_basic_blocks ^ 2); however this can only happen
73 for n_statements / n_basic_blocks statements. So, the amortized cost
74 of creating the dominator tree subset is O(n_basic_blocks) and the
75 worst-case cost of the pass is O(n_statements * n_basic_blocks).
77 More practically, the cost will be small because there are few
78 divisions, and they tend to be in the same basic block, so insert_bb
79 is called very few times.
81 If we did this using domwalk.c, an efficient implementation would have
82 to work on all the variables in a single pass, because we could not
83 work on just a subset of the dominator tree, as we do now, and the
84 cost would also be something like O(n_statements * n_basic_blocks).
85 The data structures would be more complex in order to work on all the
86 variables in a single pass. */
88 #include "config.h"
89 #include "system.h"
90 #include "coretypes.h"
91 #include "tm.h"
92 #include "flags.h"
93 #include "tree.h"
94 #include "tree-flow.h"
95 #include "timevar.h"
96 #include "tree-pass.h"
97 #include "alloc-pool.h"
98 #include "basic-block.h"
99 #include "target.h"
100 #include "gimple-pretty-print.h"
102 /* FIXME: RTL headers have to be included here for optabs. */
103 #include "rtl.h" /* Because optabs.h wants enum rtx_code. */
104 #include "expr.h" /* Because optabs.h wants sepops. */
105 #include "optabs.h"
107 /* This structure represents one basic block that either computes a
108 division, or is a common dominator for basic block that compute a
109 division. */
110 struct occurrence {
111 /* The basic block represented by this structure. */
112 basic_block bb;
114 /* If non-NULL, the SSA_NAME holding the definition for a reciprocal
115 inserted in BB. */
116 tree recip_def;
118 /* If non-NULL, the GIMPLE_ASSIGN for a reciprocal computation that
119 was inserted in BB. */
120 gimple recip_def_stmt;
122 /* Pointer to a list of "struct occurrence"s for blocks dominated
123 by BB. */
124 struct occurrence *children;
126 /* Pointer to the next "struct occurrence"s in the list of blocks
127 sharing a common dominator. */
128 struct occurrence *next;
130 /* The number of divisions that are in BB before compute_merit. The
131 number of divisions that are in BB or post-dominate it after
132 compute_merit. */
133 int num_divisions;
135 /* True if the basic block has a division, false if it is a common
136 dominator for basic blocks that do. If it is false and trapping
137 math is active, BB is not a candidate for inserting a reciprocal. */
138 bool bb_has_division;
142 /* The instance of "struct occurrence" representing the highest
143 interesting block in the dominator tree. */
144 static struct occurrence *occ_head;
146 /* Allocation pool for getting instances of "struct occurrence". */
147 static alloc_pool occ_pool;
151 /* Allocate and return a new struct occurrence for basic block BB, and
152 whose children list is headed by CHILDREN. */
153 static struct occurrence *
154 occ_new (basic_block bb, struct occurrence *children)
156 struct occurrence *occ;
158 bb->aux = occ = (struct occurrence *) pool_alloc (occ_pool);
159 memset (occ, 0, sizeof (struct occurrence));
161 occ->bb = bb;
162 occ->children = children;
163 return occ;
167 /* Insert NEW_OCC into our subset of the dominator tree. P_HEAD points to a
168 list of "struct occurrence"s, one per basic block, having IDOM as
169 their common dominator.
171 We try to insert NEW_OCC as deep as possible in the tree, and we also
172 insert any other block that is a common dominator for BB and one
173 block already in the tree. */
175 static void
176 insert_bb (struct occurrence *new_occ, basic_block idom,
177 struct occurrence **p_head)
179 struct occurrence *occ, **p_occ;
181 for (p_occ = p_head; (occ = *p_occ) != NULL; )
183 basic_block bb = new_occ->bb, occ_bb = occ->bb;
184 basic_block dom = nearest_common_dominator (CDI_DOMINATORS, occ_bb, bb);
185 if (dom == bb)
187 /* BB dominates OCC_BB. OCC becomes NEW_OCC's child: remove OCC
188 from its list. */
189 *p_occ = occ->next;
190 occ->next = new_occ->children;
191 new_occ->children = occ;
193 /* Try the next block (it may as well be dominated by BB). */
196 else if (dom == occ_bb)
198 /* OCC_BB dominates BB. Tail recurse to look deeper. */
199 insert_bb (new_occ, dom, &occ->children);
200 return;
203 else if (dom != idom)
205 gcc_assert (!dom->aux);
207 /* There is a dominator between IDOM and BB, add it and make
208 two children out of NEW_OCC and OCC. First, remove OCC from
209 its list. */
210 *p_occ = occ->next;
211 new_occ->next = occ;
212 occ->next = NULL;
214 /* None of the previous blocks has DOM as a dominator: if we tail
215 recursed, we would reexamine them uselessly. Just switch BB with
216 DOM, and go on looking for blocks dominated by DOM. */
217 new_occ = occ_new (dom, new_occ);
220 else
222 /* Nothing special, go on with the next element. */
223 p_occ = &occ->next;
227 /* No place was found as a child of IDOM. Make BB a sibling of IDOM. */
228 new_occ->next = *p_head;
229 *p_head = new_occ;
232 /* Register that we found a division in BB. */
234 static inline void
235 register_division_in (basic_block bb)
237 struct occurrence *occ;
239 occ = (struct occurrence *) bb->aux;
240 if (!occ)
242 occ = occ_new (bb, NULL);
243 insert_bb (occ, ENTRY_BLOCK_PTR, &occ_head);
246 occ->bb_has_division = true;
247 occ->num_divisions++;
251 /* Compute the number of divisions that postdominate each block in OCC and
252 its children. */
254 static void
255 compute_merit (struct occurrence *occ)
257 struct occurrence *occ_child;
258 basic_block dom = occ->bb;
260 for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
262 basic_block bb;
263 if (occ_child->children)
264 compute_merit (occ_child);
266 if (flag_exceptions)
267 bb = single_noncomplex_succ (dom);
268 else
269 bb = dom;
271 if (dominated_by_p (CDI_POST_DOMINATORS, bb, occ_child->bb))
272 occ->num_divisions += occ_child->num_divisions;
277 /* Return whether USE_STMT is a floating-point division by DEF. */
278 static inline bool
279 is_division_by (gimple use_stmt, tree def)
281 return is_gimple_assign (use_stmt)
282 && gimple_assign_rhs_code (use_stmt) == RDIV_EXPR
283 && gimple_assign_rhs2 (use_stmt) == def
284 /* Do not recognize x / x as valid division, as we are getting
285 confused later by replacing all immediate uses x in such
286 a stmt. */
287 && gimple_assign_rhs1 (use_stmt) != def;
290 /* Walk the subset of the dominator tree rooted at OCC, setting the
291 RECIP_DEF field to a definition of 1.0 / DEF that can be used in
292 the given basic block. The field may be left NULL, of course,
293 if it is not possible or profitable to do the optimization.
295 DEF_BSI is an iterator pointing at the statement defining DEF.
296 If RECIP_DEF is set, a dominator already has a computation that can
297 be used. */
299 static void
300 insert_reciprocals (gimple_stmt_iterator *def_gsi, struct occurrence *occ,
301 tree def, tree recip_def, int threshold)
303 tree type;
304 gimple new_stmt;
305 gimple_stmt_iterator gsi;
306 struct occurrence *occ_child;
308 if (!recip_def
309 && (occ->bb_has_division || !flag_trapping_math)
310 && occ->num_divisions >= threshold)
312 /* Make a variable with the replacement and substitute it. */
313 type = TREE_TYPE (def);
314 recip_def = make_rename_temp (type, "reciptmp");
315 new_stmt = gimple_build_assign_with_ops (RDIV_EXPR, recip_def,
316 build_one_cst (type), def);
318 if (occ->bb_has_division)
320 /* Case 1: insert before an existing division. */
321 gsi = gsi_after_labels (occ->bb);
322 while (!gsi_end_p (gsi) && !is_division_by (gsi_stmt (gsi), def))
323 gsi_next (&gsi);
325 gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
327 else if (def_gsi && occ->bb == def_gsi->bb)
329 /* Case 2: insert right after the definition. Note that this will
330 never happen if the definition statement can throw, because in
331 that case the sole successor of the statement's basic block will
332 dominate all the uses as well. */
333 gsi_insert_after (def_gsi, new_stmt, GSI_NEW_STMT);
335 else
337 /* Case 3: insert in a basic block not containing defs/uses. */
338 gsi = gsi_after_labels (occ->bb);
339 gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
342 occ->recip_def_stmt = new_stmt;
345 occ->recip_def = recip_def;
346 for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
347 insert_reciprocals (def_gsi, occ_child, def, recip_def, threshold);
351 /* Replace the division at USE_P with a multiplication by the reciprocal, if
352 possible. */
354 static inline void
355 replace_reciprocal (use_operand_p use_p)
357 gimple use_stmt = USE_STMT (use_p);
358 basic_block bb = gimple_bb (use_stmt);
359 struct occurrence *occ = (struct occurrence *) bb->aux;
361 if (optimize_bb_for_speed_p (bb)
362 && occ->recip_def && use_stmt != occ->recip_def_stmt)
364 gimple_assign_set_rhs_code (use_stmt, MULT_EXPR);
365 SET_USE (use_p, occ->recip_def);
366 fold_stmt_inplace (use_stmt);
367 update_stmt (use_stmt);
372 /* Free OCC and return one more "struct occurrence" to be freed. */
374 static struct occurrence *
375 free_bb (struct occurrence *occ)
377 struct occurrence *child, *next;
379 /* First get the two pointers hanging off OCC. */
380 next = occ->next;
381 child = occ->children;
382 occ->bb->aux = NULL;
383 pool_free (occ_pool, occ);
385 /* Now ensure that we don't recurse unless it is necessary. */
386 if (!child)
387 return next;
388 else
390 while (next)
391 next = free_bb (next);
393 return child;
398 /* Look for floating-point divisions among DEF's uses, and try to
399 replace them by multiplications with the reciprocal. Add
400 as many statements computing the reciprocal as needed.
402 DEF must be a GIMPLE register of a floating-point type. */
404 static void
405 execute_cse_reciprocals_1 (gimple_stmt_iterator *def_gsi, tree def)
407 use_operand_p use_p;
408 imm_use_iterator use_iter;
409 struct occurrence *occ;
410 int count = 0, threshold;
412 gcc_assert (FLOAT_TYPE_P (TREE_TYPE (def)) && is_gimple_reg (def));
414 FOR_EACH_IMM_USE_FAST (use_p, use_iter, def)
416 gimple use_stmt = USE_STMT (use_p);
417 if (is_division_by (use_stmt, def))
419 register_division_in (gimple_bb (use_stmt));
420 count++;
424 /* Do the expensive part only if we can hope to optimize something. */
425 threshold = targetm.min_divisions_for_recip_mul (TYPE_MODE (TREE_TYPE (def)));
426 if (count >= threshold)
428 gimple use_stmt;
429 for (occ = occ_head; occ; occ = occ->next)
431 compute_merit (occ);
432 insert_reciprocals (def_gsi, occ, def, NULL, threshold);
435 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, def)
437 if (is_division_by (use_stmt, def))
439 FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter)
440 replace_reciprocal (use_p);
445 for (occ = occ_head; occ; )
446 occ = free_bb (occ);
448 occ_head = NULL;
451 static bool
452 gate_cse_reciprocals (void)
454 return optimize && flag_reciprocal_math;
457 /* Go through all the floating-point SSA_NAMEs, and call
458 execute_cse_reciprocals_1 on each of them. */
459 static unsigned int
460 execute_cse_reciprocals (void)
462 basic_block bb;
463 tree arg;
465 occ_pool = create_alloc_pool ("dominators for recip",
466 sizeof (struct occurrence),
467 n_basic_blocks / 3 + 1);
469 calculate_dominance_info (CDI_DOMINATORS);
470 calculate_dominance_info (CDI_POST_DOMINATORS);
472 #ifdef ENABLE_CHECKING
473 FOR_EACH_BB (bb)
474 gcc_assert (!bb->aux);
475 #endif
477 for (arg = DECL_ARGUMENTS (cfun->decl); arg; arg = DECL_CHAIN (arg))
478 if (gimple_default_def (cfun, arg)
479 && FLOAT_TYPE_P (TREE_TYPE (arg))
480 && is_gimple_reg (arg))
481 execute_cse_reciprocals_1 (NULL, gimple_default_def (cfun, arg));
483 FOR_EACH_BB (bb)
485 gimple_stmt_iterator gsi;
486 gimple phi;
487 tree def;
489 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
491 phi = gsi_stmt (gsi);
492 def = PHI_RESULT (phi);
493 if (FLOAT_TYPE_P (TREE_TYPE (def))
494 && is_gimple_reg (def))
495 execute_cse_reciprocals_1 (NULL, def);
498 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
500 gimple stmt = gsi_stmt (gsi);
502 if (gimple_has_lhs (stmt)
503 && (def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF)) != NULL
504 && FLOAT_TYPE_P (TREE_TYPE (def))
505 && TREE_CODE (def) == SSA_NAME)
506 execute_cse_reciprocals_1 (&gsi, def);
509 if (optimize_bb_for_size_p (bb))
510 continue;
512 /* Scan for a/func(b) and convert it to reciprocal a*rfunc(b). */
513 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
515 gimple stmt = gsi_stmt (gsi);
516 tree fndecl;
518 if (is_gimple_assign (stmt)
519 && gimple_assign_rhs_code (stmt) == RDIV_EXPR)
521 tree arg1 = gimple_assign_rhs2 (stmt);
522 gimple stmt1;
524 if (TREE_CODE (arg1) != SSA_NAME)
525 continue;
527 stmt1 = SSA_NAME_DEF_STMT (arg1);
529 if (is_gimple_call (stmt1)
530 && gimple_call_lhs (stmt1)
531 && (fndecl = gimple_call_fndecl (stmt1))
532 && (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
533 || DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD))
535 enum built_in_function code;
536 bool md_code, fail;
537 imm_use_iterator ui;
538 use_operand_p use_p;
540 code = DECL_FUNCTION_CODE (fndecl);
541 md_code = DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD;
543 fndecl = targetm.builtin_reciprocal (code, md_code, false);
544 if (!fndecl)
545 continue;
547 /* Check that all uses of the SSA name are divisions,
548 otherwise replacing the defining statement will do
549 the wrong thing. */
550 fail = false;
551 FOR_EACH_IMM_USE_FAST (use_p, ui, arg1)
553 gimple stmt2 = USE_STMT (use_p);
554 if (is_gimple_debug (stmt2))
555 continue;
556 if (!is_gimple_assign (stmt2)
557 || gimple_assign_rhs_code (stmt2) != RDIV_EXPR
558 || gimple_assign_rhs1 (stmt2) == arg1
559 || gimple_assign_rhs2 (stmt2) != arg1)
561 fail = true;
562 break;
565 if (fail)
566 continue;
568 gimple_replace_lhs (stmt1, arg1);
569 gimple_call_set_fndecl (stmt1, fndecl);
570 update_stmt (stmt1);
572 FOR_EACH_IMM_USE_STMT (stmt, ui, arg1)
574 gimple_assign_set_rhs_code (stmt, MULT_EXPR);
575 fold_stmt_inplace (stmt);
576 update_stmt (stmt);
583 free_dominance_info (CDI_DOMINATORS);
584 free_dominance_info (CDI_POST_DOMINATORS);
585 free_alloc_pool (occ_pool);
586 return 0;
589 struct gimple_opt_pass pass_cse_reciprocals =
592 GIMPLE_PASS,
593 "recip", /* name */
594 gate_cse_reciprocals, /* gate */
595 execute_cse_reciprocals, /* execute */
596 NULL, /* sub */
597 NULL, /* next */
598 0, /* static_pass_number */
599 TV_NONE, /* tv_id */
600 PROP_ssa, /* properties_required */
601 0, /* properties_provided */
602 0, /* properties_destroyed */
603 0, /* todo_flags_start */
604 TODO_dump_func | TODO_update_ssa | TODO_verify_ssa
605 | TODO_verify_stmts /* todo_flags_finish */
609 /* Records an occurrence at statement USE_STMT in the vector of trees
610 STMTS if it is dominated by *TOP_BB or dominates it or this basic block
611 is not yet initialized. Returns true if the occurrence was pushed on
612 the vector. Adjusts *TOP_BB to be the basic block dominating all
613 statements in the vector. */
615 static bool
616 maybe_record_sincos (VEC(gimple, heap) **stmts,
617 basic_block *top_bb, gimple use_stmt)
619 basic_block use_bb = gimple_bb (use_stmt);
620 if (*top_bb
621 && (*top_bb == use_bb
622 || dominated_by_p (CDI_DOMINATORS, use_bb, *top_bb)))
623 VEC_safe_push (gimple, heap, *stmts, use_stmt);
624 else if (!*top_bb
625 || dominated_by_p (CDI_DOMINATORS, *top_bb, use_bb))
627 VEC_safe_push (gimple, heap, *stmts, use_stmt);
628 *top_bb = use_bb;
630 else
631 return false;
633 return true;
636 /* Look for sin, cos and cexpi calls with the same argument NAME and
637 create a single call to cexpi CSEing the result in this case.
638 We first walk over all immediate uses of the argument collecting
639 statements that we can CSE in a vector and in a second pass replace
640 the statement rhs with a REALPART or IMAGPART expression on the
641 result of the cexpi call we insert before the use statement that
642 dominates all other candidates. */
644 static bool
645 execute_cse_sincos_1 (tree name)
647 gimple_stmt_iterator gsi;
648 imm_use_iterator use_iter;
649 tree fndecl, res, type;
650 gimple def_stmt, use_stmt, stmt;
651 int seen_cos = 0, seen_sin = 0, seen_cexpi = 0;
652 VEC(gimple, heap) *stmts = NULL;
653 basic_block top_bb = NULL;
654 int i;
655 bool cfg_changed = false;
657 type = TREE_TYPE (name);
658 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, name)
660 if (gimple_code (use_stmt) != GIMPLE_CALL
661 || !gimple_call_lhs (use_stmt)
662 || !(fndecl = gimple_call_fndecl (use_stmt))
663 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
664 continue;
666 switch (DECL_FUNCTION_CODE (fndecl))
668 CASE_FLT_FN (BUILT_IN_COS):
669 seen_cos |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
670 break;
672 CASE_FLT_FN (BUILT_IN_SIN):
673 seen_sin |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
674 break;
676 CASE_FLT_FN (BUILT_IN_CEXPI):
677 seen_cexpi |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
678 break;
680 default:;
684 if (seen_cos + seen_sin + seen_cexpi <= 1)
686 VEC_free(gimple, heap, stmts);
687 return false;
690 /* Simply insert cexpi at the beginning of top_bb but not earlier than
691 the name def statement. */
692 fndecl = mathfn_built_in (type, BUILT_IN_CEXPI);
693 if (!fndecl)
694 return false;
695 res = create_tmp_reg (TREE_TYPE (TREE_TYPE (fndecl)), "sincostmp");
696 stmt = gimple_build_call (fndecl, 1, name);
697 res = make_ssa_name (res, stmt);
698 gimple_call_set_lhs (stmt, res);
700 def_stmt = SSA_NAME_DEF_STMT (name);
701 if (!SSA_NAME_IS_DEFAULT_DEF (name)
702 && gimple_code (def_stmt) != GIMPLE_PHI
703 && gimple_bb (def_stmt) == top_bb)
705 gsi = gsi_for_stmt (def_stmt);
706 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
708 else
710 gsi = gsi_after_labels (top_bb);
711 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
713 update_stmt (stmt);
715 /* And adjust the recorded old call sites. */
716 for (i = 0; VEC_iterate(gimple, stmts, i, use_stmt); ++i)
718 tree rhs = NULL;
719 fndecl = gimple_call_fndecl (use_stmt);
721 switch (DECL_FUNCTION_CODE (fndecl))
723 CASE_FLT_FN (BUILT_IN_COS):
724 rhs = fold_build1 (REALPART_EXPR, type, res);
725 break;
727 CASE_FLT_FN (BUILT_IN_SIN):
728 rhs = fold_build1 (IMAGPART_EXPR, type, res);
729 break;
731 CASE_FLT_FN (BUILT_IN_CEXPI):
732 rhs = res;
733 break;
735 default:;
736 gcc_unreachable ();
739 /* Replace call with a copy. */
740 stmt = gimple_build_assign (gimple_call_lhs (use_stmt), rhs);
742 gsi = gsi_for_stmt (use_stmt);
743 gsi_replace (&gsi, stmt, true);
744 if (gimple_purge_dead_eh_edges (gimple_bb (stmt)))
745 cfg_changed = true;
748 VEC_free(gimple, heap, stmts);
750 return cfg_changed;
753 /* Go through all calls to sin, cos and cexpi and call execute_cse_sincos_1
754 on the SSA_NAME argument of each of them. */
756 static unsigned int
757 execute_cse_sincos (void)
759 basic_block bb;
760 bool cfg_changed = false;
762 calculate_dominance_info (CDI_DOMINATORS);
764 FOR_EACH_BB (bb)
766 gimple_stmt_iterator gsi;
768 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
770 gimple stmt = gsi_stmt (gsi);
771 tree fndecl;
773 if (is_gimple_call (stmt)
774 && gimple_call_lhs (stmt)
775 && (fndecl = gimple_call_fndecl (stmt))
776 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
778 tree arg;
780 switch (DECL_FUNCTION_CODE (fndecl))
782 CASE_FLT_FN (BUILT_IN_COS):
783 CASE_FLT_FN (BUILT_IN_SIN):
784 CASE_FLT_FN (BUILT_IN_CEXPI):
785 arg = gimple_call_arg (stmt, 0);
786 if (TREE_CODE (arg) == SSA_NAME)
787 cfg_changed |= execute_cse_sincos_1 (arg);
788 break;
790 default:;
796 free_dominance_info (CDI_DOMINATORS);
797 return cfg_changed ? TODO_cleanup_cfg : 0;
800 static bool
801 gate_cse_sincos (void)
803 /* Make sure we have either sincos or cexp. */
804 return (TARGET_HAS_SINCOS
805 || TARGET_C99_FUNCTIONS)
806 && optimize;
809 struct gimple_opt_pass pass_cse_sincos =
812 GIMPLE_PASS,
813 "sincos", /* name */
814 gate_cse_sincos, /* gate */
815 execute_cse_sincos, /* execute */
816 NULL, /* sub */
817 NULL, /* next */
818 0, /* static_pass_number */
819 TV_NONE, /* tv_id */
820 PROP_ssa, /* properties_required */
821 0, /* properties_provided */
822 0, /* properties_destroyed */
823 0, /* todo_flags_start */
824 TODO_dump_func | TODO_update_ssa | TODO_verify_ssa
825 | TODO_verify_stmts /* todo_flags_finish */
829 /* A symbolic number is used to detect byte permutation and selection
830 patterns. Therefore the field N contains an artificial number
831 consisting of byte size markers:
833 0 - byte has the value 0
834 1..size - byte contains the content of the byte
835 number indexed with that value minus one */
837 struct symbolic_number {
838 unsigned HOST_WIDEST_INT n;
839 int size;
842 /* Perform a SHIFT or ROTATE operation by COUNT bits on symbolic
843 number N. Return false if the requested operation is not permitted
844 on a symbolic number. */
846 static inline bool
847 do_shift_rotate (enum tree_code code,
848 struct symbolic_number *n,
849 int count)
851 if (count % 8 != 0)
852 return false;
854 /* Zero out the extra bits of N in order to avoid them being shifted
855 into the significant bits. */
856 if (n->size < (int)sizeof (HOST_WIDEST_INT))
857 n->n &= ((unsigned HOST_WIDEST_INT)1 << (n->size * BITS_PER_UNIT)) - 1;
859 switch (code)
861 case LSHIFT_EXPR:
862 n->n <<= count;
863 break;
864 case RSHIFT_EXPR:
865 n->n >>= count;
866 break;
867 case LROTATE_EXPR:
868 n->n = (n->n << count) | (n->n >> ((n->size * BITS_PER_UNIT) - count));
869 break;
870 case RROTATE_EXPR:
871 n->n = (n->n >> count) | (n->n << ((n->size * BITS_PER_UNIT) - count));
872 break;
873 default:
874 return false;
876 return true;
879 /* Perform sanity checking for the symbolic number N and the gimple
880 statement STMT. */
882 static inline bool
883 verify_symbolic_number_p (struct symbolic_number *n, gimple stmt)
885 tree lhs_type;
887 lhs_type = gimple_expr_type (stmt);
889 if (TREE_CODE (lhs_type) != INTEGER_TYPE)
890 return false;
892 if (TYPE_PRECISION (lhs_type) != n->size * BITS_PER_UNIT)
893 return false;
895 return true;
898 /* find_bswap_1 invokes itself recursively with N and tries to perform
899 the operation given by the rhs of STMT on the result. If the
900 operation could successfully be executed the function returns the
901 tree expression of the source operand and NULL otherwise. */
903 static tree
904 find_bswap_1 (gimple stmt, struct symbolic_number *n, int limit)
906 enum tree_code code;
907 tree rhs1, rhs2 = NULL;
908 gimple rhs1_stmt, rhs2_stmt;
909 tree source_expr1;
910 enum gimple_rhs_class rhs_class;
912 if (!limit || !is_gimple_assign (stmt))
913 return NULL_TREE;
915 rhs1 = gimple_assign_rhs1 (stmt);
917 if (TREE_CODE (rhs1) != SSA_NAME)
918 return NULL_TREE;
920 code = gimple_assign_rhs_code (stmt);
921 rhs_class = gimple_assign_rhs_class (stmt);
922 rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
924 if (rhs_class == GIMPLE_BINARY_RHS)
925 rhs2 = gimple_assign_rhs2 (stmt);
927 /* Handle unary rhs and binary rhs with integer constants as second
928 operand. */
930 if (rhs_class == GIMPLE_UNARY_RHS
931 || (rhs_class == GIMPLE_BINARY_RHS
932 && TREE_CODE (rhs2) == INTEGER_CST))
934 if (code != BIT_AND_EXPR
935 && code != LSHIFT_EXPR
936 && code != RSHIFT_EXPR
937 && code != LROTATE_EXPR
938 && code != RROTATE_EXPR
939 && code != NOP_EXPR
940 && code != CONVERT_EXPR)
941 return NULL_TREE;
943 source_expr1 = find_bswap_1 (rhs1_stmt, n, limit - 1);
945 /* If find_bswap_1 returned NULL STMT is a leaf node and we have
946 to initialize the symbolic number. */
947 if (!source_expr1)
949 /* Set up the symbolic number N by setting each byte to a
950 value between 1 and the byte size of rhs1. The highest
951 order byte is set to n->size and the lowest order
952 byte to 1. */
953 n->size = TYPE_PRECISION (TREE_TYPE (rhs1));
954 if (n->size % BITS_PER_UNIT != 0)
955 return NULL_TREE;
956 n->size /= BITS_PER_UNIT;
957 n->n = (sizeof (HOST_WIDEST_INT) < 8 ? 0 :
958 (unsigned HOST_WIDEST_INT)0x08070605 << 32 | 0x04030201);
960 if (n->size < (int)sizeof (HOST_WIDEST_INT))
961 n->n &= ((unsigned HOST_WIDEST_INT)1 <<
962 (n->size * BITS_PER_UNIT)) - 1;
964 source_expr1 = rhs1;
967 switch (code)
969 case BIT_AND_EXPR:
971 int i;
972 unsigned HOST_WIDEST_INT val = widest_int_cst_value (rhs2);
973 unsigned HOST_WIDEST_INT tmp = val;
975 /* Only constants masking full bytes are allowed. */
976 for (i = 0; i < n->size; i++, tmp >>= BITS_PER_UNIT)
977 if ((tmp & 0xff) != 0 && (tmp & 0xff) != 0xff)
978 return NULL_TREE;
980 n->n &= val;
982 break;
983 case LSHIFT_EXPR:
984 case RSHIFT_EXPR:
985 case LROTATE_EXPR:
986 case RROTATE_EXPR:
987 if (!do_shift_rotate (code, n, (int)TREE_INT_CST_LOW (rhs2)))
988 return NULL_TREE;
989 break;
990 CASE_CONVERT:
992 int type_size;
994 type_size = TYPE_PRECISION (gimple_expr_type (stmt));
995 if (type_size % BITS_PER_UNIT != 0)
996 return NULL_TREE;
998 if (type_size / BITS_PER_UNIT < (int)(sizeof (HOST_WIDEST_INT)))
1000 /* If STMT casts to a smaller type mask out the bits not
1001 belonging to the target type. */
1002 n->n &= ((unsigned HOST_WIDEST_INT)1 << type_size) - 1;
1004 n->size = type_size / BITS_PER_UNIT;
1006 break;
1007 default:
1008 return NULL_TREE;
1010 return verify_symbolic_number_p (n, stmt) ? source_expr1 : NULL;
1013 /* Handle binary rhs. */
1015 if (rhs_class == GIMPLE_BINARY_RHS)
1017 struct symbolic_number n1, n2;
1018 tree source_expr2;
1020 if (code != BIT_IOR_EXPR)
1021 return NULL_TREE;
1023 if (TREE_CODE (rhs2) != SSA_NAME)
1024 return NULL_TREE;
1026 rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
1028 switch (code)
1030 case BIT_IOR_EXPR:
1031 source_expr1 = find_bswap_1 (rhs1_stmt, &n1, limit - 1);
1033 if (!source_expr1)
1034 return NULL_TREE;
1036 source_expr2 = find_bswap_1 (rhs2_stmt, &n2, limit - 1);
1038 if (source_expr1 != source_expr2
1039 || n1.size != n2.size)
1040 return NULL_TREE;
1042 n->size = n1.size;
1043 n->n = n1.n | n2.n;
1045 if (!verify_symbolic_number_p (n, stmt))
1046 return NULL_TREE;
1048 break;
1049 default:
1050 return NULL_TREE;
1052 return source_expr1;
1054 return NULL_TREE;
1057 /* Check if STMT completes a bswap implementation consisting of ORs,
1058 SHIFTs and ANDs. Return the source tree expression on which the
1059 byte swap is performed and NULL if no bswap was found. */
1061 static tree
1062 find_bswap (gimple stmt)
1064 /* The number which the find_bswap result should match in order to
1065 have a full byte swap. The number is shifted to the left according
1066 to the size of the symbolic number before using it. */
1067 unsigned HOST_WIDEST_INT cmp =
1068 sizeof (HOST_WIDEST_INT) < 8 ? 0 :
1069 (unsigned HOST_WIDEST_INT)0x01020304 << 32 | 0x05060708;
1071 struct symbolic_number n;
1072 tree source_expr;
1074 /* The last parameter determines the depth search limit. It usually
1075 correlates directly to the number of bytes to be touched. We
1076 increase that number by one here in order to also cover signed ->
1077 unsigned conversions of the src operand as can be seen in
1078 libgcc. */
1079 source_expr = find_bswap_1 (stmt, &n,
1080 TREE_INT_CST_LOW (
1081 TYPE_SIZE_UNIT (gimple_expr_type (stmt))) + 1);
1083 if (!source_expr)
1084 return NULL_TREE;
1086 /* Zero out the extra bits of N and CMP. */
1087 if (n.size < (int)sizeof (HOST_WIDEST_INT))
1089 unsigned HOST_WIDEST_INT mask =
1090 ((unsigned HOST_WIDEST_INT)1 << (n.size * BITS_PER_UNIT)) - 1;
1092 n.n &= mask;
1093 cmp >>= (sizeof (HOST_WIDEST_INT) - n.size) * BITS_PER_UNIT;
1096 /* A complete byte swap should make the symbolic number to start
1097 with the largest digit in the highest order byte. */
1098 if (cmp != n.n)
1099 return NULL_TREE;
1101 return source_expr;
1104 /* Find manual byte swap implementations and turn them into a bswap
1105 builtin invokation. */
1107 static unsigned int
1108 execute_optimize_bswap (void)
1110 basic_block bb;
1111 bool bswap32_p, bswap64_p;
1112 bool changed = false;
1113 tree bswap32_type = NULL_TREE, bswap64_type = NULL_TREE;
1115 if (BITS_PER_UNIT != 8)
1116 return 0;
1118 if (sizeof (HOST_WIDEST_INT) < 8)
1119 return 0;
1121 bswap32_p = (built_in_decls[BUILT_IN_BSWAP32]
1122 && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing);
1123 bswap64_p = (built_in_decls[BUILT_IN_BSWAP64]
1124 && (optab_handler (bswap_optab, DImode) != CODE_FOR_nothing
1125 || (bswap32_p && word_mode == SImode)));
1127 if (!bswap32_p && !bswap64_p)
1128 return 0;
1130 /* Determine the argument type of the builtins. The code later on
1131 assumes that the return and argument type are the same. */
1132 if (bswap32_p)
1134 tree fndecl = built_in_decls[BUILT_IN_BSWAP32];
1135 bswap32_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
1138 if (bswap64_p)
1140 tree fndecl = built_in_decls[BUILT_IN_BSWAP64];
1141 bswap64_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
1144 FOR_EACH_BB (bb)
1146 gimple_stmt_iterator gsi;
1148 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1150 gimple stmt = gsi_stmt (gsi);
1151 tree bswap_src, bswap_type;
1152 tree bswap_tmp;
1153 tree fndecl = NULL_TREE;
1154 int type_size;
1155 gimple call;
1157 if (!is_gimple_assign (stmt)
1158 || gimple_assign_rhs_code (stmt) != BIT_IOR_EXPR)
1159 continue;
1161 type_size = TYPE_PRECISION (gimple_expr_type (stmt));
1163 switch (type_size)
1165 case 32:
1166 if (bswap32_p)
1168 fndecl = built_in_decls[BUILT_IN_BSWAP32];
1169 bswap_type = bswap32_type;
1171 break;
1172 case 64:
1173 if (bswap64_p)
1175 fndecl = built_in_decls[BUILT_IN_BSWAP64];
1176 bswap_type = bswap64_type;
1178 break;
1179 default:
1180 continue;
1183 if (!fndecl)
1184 continue;
1186 bswap_src = find_bswap (stmt);
1188 if (!bswap_src)
1189 continue;
1191 changed = true;
1193 bswap_tmp = bswap_src;
1195 /* Convert the src expression if necessary. */
1196 if (!useless_type_conversion_p (TREE_TYPE (bswap_tmp), bswap_type))
1198 gimple convert_stmt;
1200 bswap_tmp = create_tmp_var (bswap_type, "bswapsrc");
1201 add_referenced_var (bswap_tmp);
1202 bswap_tmp = make_ssa_name (bswap_tmp, NULL);
1204 convert_stmt = gimple_build_assign_with_ops (
1205 CONVERT_EXPR, bswap_tmp, bswap_src, NULL);
1206 gsi_insert_before (&gsi, convert_stmt, GSI_SAME_STMT);
1209 call = gimple_build_call (fndecl, 1, bswap_tmp);
1211 bswap_tmp = gimple_assign_lhs (stmt);
1213 /* Convert the result if necessary. */
1214 if (!useless_type_conversion_p (TREE_TYPE (bswap_tmp), bswap_type))
1216 gimple convert_stmt;
1218 bswap_tmp = create_tmp_var (bswap_type, "bswapdst");
1219 add_referenced_var (bswap_tmp);
1220 bswap_tmp = make_ssa_name (bswap_tmp, NULL);
1221 convert_stmt = gimple_build_assign_with_ops (
1222 CONVERT_EXPR, gimple_assign_lhs (stmt), bswap_tmp, NULL);
1223 gsi_insert_after (&gsi, convert_stmt, GSI_SAME_STMT);
1226 gimple_call_set_lhs (call, bswap_tmp);
1228 if (dump_file)
1230 fprintf (dump_file, "%d bit bswap implementation found at: ",
1231 (int)type_size);
1232 print_gimple_stmt (dump_file, stmt, 0, 0);
1235 gsi_insert_after (&gsi, call, GSI_SAME_STMT);
1236 gsi_remove (&gsi, true);
1240 return (changed ? TODO_dump_func | TODO_update_ssa | TODO_verify_ssa
1241 | TODO_verify_stmts : 0);
1244 static bool
1245 gate_optimize_bswap (void)
1247 return flag_expensive_optimizations && optimize;
1250 struct gimple_opt_pass pass_optimize_bswap =
1253 GIMPLE_PASS,
1254 "bswap", /* name */
1255 gate_optimize_bswap, /* gate */
1256 execute_optimize_bswap, /* execute */
1257 NULL, /* sub */
1258 NULL, /* next */
1259 0, /* static_pass_number */
1260 TV_NONE, /* tv_id */
1261 PROP_ssa, /* properties_required */
1262 0, /* properties_provided */
1263 0, /* properties_destroyed */
1264 0, /* todo_flags_start */
1265 0 /* todo_flags_finish */
1269 /* Return true if RHS is a suitable operand for a widening multiplication.
1270 There are two cases:
1272 - RHS makes some value twice as wide. Store that value in *NEW_RHS_OUT
1273 if so, and store its type in *TYPE_OUT.
1275 - RHS is an integer constant. Store that value in *NEW_RHS_OUT if so,
1276 but leave *TYPE_OUT untouched. */
1278 static bool
1279 is_widening_mult_rhs_p (tree rhs, tree *type_out, tree *new_rhs_out)
1281 gimple stmt;
1282 tree type, type1, rhs1;
1283 enum tree_code rhs_code;
1285 if (TREE_CODE (rhs) == SSA_NAME)
1287 type = TREE_TYPE (rhs);
1288 stmt = SSA_NAME_DEF_STMT (rhs);
1289 if (!is_gimple_assign (stmt))
1290 return false;
1292 rhs_code = gimple_assign_rhs_code (stmt);
1293 if (TREE_CODE (type) == INTEGER_TYPE
1294 ? !CONVERT_EXPR_CODE_P (rhs_code)
1295 : rhs_code != FIXED_CONVERT_EXPR)
1296 return false;
1298 rhs1 = gimple_assign_rhs1 (stmt);
1299 type1 = TREE_TYPE (rhs1);
1300 if (TREE_CODE (type1) != TREE_CODE (type)
1301 || TYPE_PRECISION (type1) * 2 != TYPE_PRECISION (type))
1302 return false;
1304 *new_rhs_out = rhs1;
1305 *type_out = type1;
1306 return true;
1309 if (TREE_CODE (rhs) == INTEGER_CST)
1311 *new_rhs_out = rhs;
1312 *type_out = NULL;
1313 return true;
1316 return false;
1319 /* Return true if STMT performs a widening multiplication. If so,
1320 store the unwidened types of the operands in *TYPE1_OUT and *TYPE2_OUT
1321 respectively. Also fill *RHS1_OUT and *RHS2_OUT such that converting
1322 those operands to types *TYPE1_OUT and *TYPE2_OUT would give the
1323 operands of the multiplication. */
1325 static bool
1326 is_widening_mult_p (gimple stmt,
1327 tree *type1_out, tree *rhs1_out,
1328 tree *type2_out, tree *rhs2_out)
1330 tree type;
1332 type = TREE_TYPE (gimple_assign_lhs (stmt));
1333 if (TREE_CODE (type) != INTEGER_TYPE
1334 && TREE_CODE (type) != FIXED_POINT_TYPE)
1335 return false;
1337 if (!is_widening_mult_rhs_p (gimple_assign_rhs1 (stmt), type1_out, rhs1_out))
1338 return false;
1340 if (!is_widening_mult_rhs_p (gimple_assign_rhs2 (stmt), type2_out, rhs2_out))
1341 return false;
1343 if (*type1_out == NULL)
1345 if (*type2_out == NULL || !int_fits_type_p (*rhs1_out, *type2_out))
1346 return false;
1347 *type1_out = *type2_out;
1350 if (*type2_out == NULL)
1352 if (!int_fits_type_p (*rhs2_out, *type1_out))
1353 return false;
1354 *type2_out = *type1_out;
1357 return true;
1360 /* Process a single gimple statement STMT, which has a MULT_EXPR as
1361 its rhs, and try to convert it into a WIDEN_MULT_EXPR. The return
1362 value is true iff we converted the statement. */
1364 static bool
1365 convert_mult_to_widen (gimple stmt)
1367 tree lhs, rhs1, rhs2, type, type1, type2;
1368 enum insn_code handler;
1370 lhs = gimple_assign_lhs (stmt);
1371 type = TREE_TYPE (lhs);
1372 if (TREE_CODE (type) != INTEGER_TYPE)
1373 return false;
1375 if (!is_widening_mult_p (stmt, &type1, &rhs1, &type2, &rhs2))
1376 return false;
1378 if (TYPE_UNSIGNED (type1) && TYPE_UNSIGNED (type2))
1379 handler = optab_handler (umul_widen_optab, TYPE_MODE (type));
1380 else if (!TYPE_UNSIGNED (type1) && !TYPE_UNSIGNED (type2))
1381 handler = optab_handler (smul_widen_optab, TYPE_MODE (type));
1382 else
1383 handler = optab_handler (usmul_widen_optab, TYPE_MODE (type));
1385 if (handler == CODE_FOR_nothing)
1386 return false;
1388 gimple_assign_set_rhs1 (stmt, fold_convert (type1, rhs1));
1389 gimple_assign_set_rhs2 (stmt, fold_convert (type2, rhs2));
1390 gimple_assign_set_rhs_code (stmt, WIDEN_MULT_EXPR);
1391 update_stmt (stmt);
1392 return true;
1395 /* Process a single gimple statement STMT, which is found at the
1396 iterator GSI and has a either a PLUS_EXPR or a MINUS_EXPR as its
1397 rhs (given by CODE), and try to convert it into a
1398 WIDEN_MULT_PLUS_EXPR or a WIDEN_MULT_MINUS_EXPR. The return value
1399 is true iff we converted the statement. */
1401 static bool
1402 convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple stmt,
1403 enum tree_code code)
1405 gimple rhs1_stmt = NULL, rhs2_stmt = NULL;
1406 tree type, type1, type2;
1407 tree lhs, rhs1, rhs2, mult_rhs1, mult_rhs2, add_rhs;
1408 enum tree_code rhs1_code = ERROR_MARK, rhs2_code = ERROR_MARK;
1409 optab this_optab;
1410 enum tree_code wmult_code;
1412 lhs = gimple_assign_lhs (stmt);
1413 type = TREE_TYPE (lhs);
1414 if (TREE_CODE (type) != INTEGER_TYPE
1415 && TREE_CODE (type) != FIXED_POINT_TYPE)
1416 return false;
1418 if (code == MINUS_EXPR)
1419 wmult_code = WIDEN_MULT_MINUS_EXPR;
1420 else
1421 wmult_code = WIDEN_MULT_PLUS_EXPR;
1423 rhs1 = gimple_assign_rhs1 (stmt);
1424 rhs2 = gimple_assign_rhs2 (stmt);
1426 if (TREE_CODE (rhs1) == SSA_NAME)
1428 rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
1429 if (is_gimple_assign (rhs1_stmt))
1430 rhs1_code = gimple_assign_rhs_code (rhs1_stmt);
1432 else
1433 return false;
1435 if (TREE_CODE (rhs2) == SSA_NAME)
1437 rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
1438 if (is_gimple_assign (rhs2_stmt))
1439 rhs2_code = gimple_assign_rhs_code (rhs2_stmt);
1441 else
1442 return false;
1444 if (code == PLUS_EXPR && rhs1_code == MULT_EXPR)
1446 if (!is_widening_mult_p (rhs1_stmt, &type1, &mult_rhs1,
1447 &type2, &mult_rhs2))
1448 return false;
1449 add_rhs = rhs2;
1451 else if (rhs2_code == MULT_EXPR)
1453 if (!is_widening_mult_p (rhs2_stmt, &type1, &mult_rhs1,
1454 &type2, &mult_rhs2))
1455 return false;
1456 add_rhs = rhs1;
1458 else if (code == PLUS_EXPR && rhs1_code == WIDEN_MULT_EXPR)
1460 mult_rhs1 = gimple_assign_rhs1 (rhs1_stmt);
1461 mult_rhs2 = gimple_assign_rhs2 (rhs1_stmt);
1462 type1 = TREE_TYPE (mult_rhs1);
1463 type2 = TREE_TYPE (mult_rhs2);
1464 add_rhs = rhs2;
1466 else if (rhs2_code == WIDEN_MULT_EXPR)
1468 mult_rhs1 = gimple_assign_rhs1 (rhs2_stmt);
1469 mult_rhs2 = gimple_assign_rhs2 (rhs2_stmt);
1470 type1 = TREE_TYPE (mult_rhs1);
1471 type2 = TREE_TYPE (mult_rhs2);
1472 add_rhs = rhs1;
1474 else
1475 return false;
1477 if (TYPE_UNSIGNED (type1) != TYPE_UNSIGNED (type2))
1478 return false;
1480 /* Verify that the machine can perform a widening multiply
1481 accumulate in this mode/signedness combination, otherwise
1482 this transformation is likely to pessimize code. */
1483 this_optab = optab_for_tree_code (wmult_code, type1, optab_default);
1484 if (optab_handler (this_optab, TYPE_MODE (type)) == CODE_FOR_nothing)
1485 return false;
1487 /* ??? May need some type verification here? */
1489 gimple_assign_set_rhs_with_ops_1 (gsi, wmult_code,
1490 fold_convert (type1, mult_rhs1),
1491 fold_convert (type2, mult_rhs2),
1492 add_rhs);
1493 update_stmt (gsi_stmt (*gsi));
1494 return true;
1497 /* Combine the multiplication at MUL_STMT with operands MULOP1 and MULOP2
1498 with uses in additions and subtractions to form fused multiply-add
1499 operations. Returns true if successful and MUL_STMT should be removed. */
1501 static bool
1502 convert_mult_to_fma (gimple mul_stmt, tree op1, tree op2)
1504 tree mul_result = gimple_get_lhs (mul_stmt);
1505 tree type = TREE_TYPE (mul_result);
1506 gimple use_stmt, neguse_stmt, fma_stmt;
1507 use_operand_p use_p;
1508 imm_use_iterator imm_iter;
1510 if (FLOAT_TYPE_P (type)
1511 && flag_fp_contract_mode == FP_CONTRACT_OFF)
1512 return false;
1514 /* We don't want to do bitfield reduction ops. */
1515 if (INTEGRAL_TYPE_P (type)
1516 && (TYPE_PRECISION (type)
1517 != GET_MODE_PRECISION (TYPE_MODE (type))))
1518 return false;
1520 /* If the target doesn't support it, don't generate it. We assume that
1521 if fma isn't available then fms, fnma or fnms are not either. */
1522 if (optab_handler (fma_optab, TYPE_MODE (type)) == CODE_FOR_nothing)
1523 return false;
1525 /* Make sure that the multiplication statement becomes dead after
1526 the transformation, thus that all uses are transformed to FMAs.
1527 This means we assume that an FMA operation has the same cost
1528 as an addition. */
1529 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, mul_result)
1531 enum tree_code use_code;
1532 tree result = mul_result;
1533 bool negate_p = false;
1535 use_stmt = USE_STMT (use_p);
1537 if (is_gimple_debug (use_stmt))
1538 continue;
1540 /* For now restrict this operations to single basic blocks. In theory
1541 we would want to support sinking the multiplication in
1542 m = a*b;
1543 if ()
1544 ma = m + c;
1545 else
1546 d = m;
1547 to form a fma in the then block and sink the multiplication to the
1548 else block. */
1549 if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
1550 return false;
1552 if (!is_gimple_assign (use_stmt))
1553 return false;
1555 use_code = gimple_assign_rhs_code (use_stmt);
1557 /* A negate on the multiplication leads to FNMA. */
1558 if (use_code == NEGATE_EXPR)
1560 result = gimple_assign_lhs (use_stmt);
1562 /* Make sure the negate statement becomes dead with this
1563 single transformation. */
1564 if (!single_imm_use (gimple_assign_lhs (use_stmt),
1565 &use_p, &neguse_stmt))
1566 return false;
1568 /* Re-validate. */
1569 use_stmt = neguse_stmt;
1570 if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
1571 return false;
1572 if (!is_gimple_assign (use_stmt))
1573 return false;
1575 use_code = gimple_assign_rhs_code (use_stmt);
1576 negate_p = true;
1579 switch (use_code)
1581 case MINUS_EXPR:
1582 if (gimple_assign_rhs2 (use_stmt) == result)
1583 negate_p = !negate_p;
1584 break;
1585 case PLUS_EXPR:
1586 break;
1587 default:
1588 /* FMA can only be formed from PLUS and MINUS. */
1589 return false;
1592 /* We can't handle a * b + a * b. */
1593 if (gimple_assign_rhs1 (use_stmt) == gimple_assign_rhs2 (use_stmt))
1594 return false;
1596 /* While it is possible to validate whether or not the exact form
1597 that we've recognized is available in the backend, the assumption
1598 is that the transformation is never a loss. For instance, suppose
1599 the target only has the plain FMA pattern available. Consider
1600 a*b-c -> fma(a,b,-c): we've exchanged MUL+SUB for FMA+NEG, which
1601 is still two operations. Consider -(a*b)-c -> fma(-a,b,-c): we
1602 still have 3 operations, but in the FMA form the two NEGs are
1603 independant and could be run in parallel. */
1606 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, mul_result)
1608 gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
1609 enum tree_code use_code;
1610 tree addop, mulop1 = op1, result = mul_result;
1611 bool negate_p = false;
1613 if (is_gimple_debug (use_stmt))
1614 continue;
1616 use_code = gimple_assign_rhs_code (use_stmt);
1617 if (use_code == NEGATE_EXPR)
1619 result = gimple_assign_lhs (use_stmt);
1620 single_imm_use (gimple_assign_lhs (use_stmt), &use_p, &neguse_stmt);
1621 gsi_remove (&gsi, true);
1622 release_defs (use_stmt);
1624 use_stmt = neguse_stmt;
1625 gsi = gsi_for_stmt (use_stmt);
1626 use_code = gimple_assign_rhs_code (use_stmt);
1627 negate_p = true;
1630 if (gimple_assign_rhs1 (use_stmt) == result)
1632 addop = gimple_assign_rhs2 (use_stmt);
1633 /* a * b - c -> a * b + (-c) */
1634 if (gimple_assign_rhs_code (use_stmt) == MINUS_EXPR)
1635 addop = force_gimple_operand_gsi (&gsi,
1636 build1 (NEGATE_EXPR,
1637 type, addop),
1638 true, NULL_TREE, true,
1639 GSI_SAME_STMT);
1641 else
1643 addop = gimple_assign_rhs1 (use_stmt);
1644 /* a - b * c -> (-b) * c + a */
1645 if (gimple_assign_rhs_code (use_stmt) == MINUS_EXPR)
1646 negate_p = !negate_p;
1649 if (negate_p)
1650 mulop1 = force_gimple_operand_gsi (&gsi,
1651 build1 (NEGATE_EXPR,
1652 type, mulop1),
1653 true, NULL_TREE, true,
1654 GSI_SAME_STMT);
1656 fma_stmt = gimple_build_assign_with_ops3 (FMA_EXPR,
1657 gimple_assign_lhs (use_stmt),
1658 mulop1, op2,
1659 addop);
1660 gsi_replace (&gsi, fma_stmt, true);
1663 return true;
1666 /* Find integer multiplications where the operands are extended from
1667 smaller types, and replace the MULT_EXPR with a WIDEN_MULT_EXPR
1668 where appropriate. */
1670 static unsigned int
1671 execute_optimize_widening_mul (void)
1673 basic_block bb;
1674 bool cfg_changed = false;
1676 FOR_EACH_BB (bb)
1678 gimple_stmt_iterator gsi;
1680 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
1682 gimple stmt = gsi_stmt (gsi);
1683 enum tree_code code;
1685 if (is_gimple_assign (stmt))
1687 code = gimple_assign_rhs_code (stmt);
1688 switch (code)
1690 case MULT_EXPR:
1691 if (!convert_mult_to_widen (stmt)
1692 && convert_mult_to_fma (stmt,
1693 gimple_assign_rhs1 (stmt),
1694 gimple_assign_rhs2 (stmt)))
1696 gsi_remove (&gsi, true);
1697 release_defs (stmt);
1698 continue;
1700 break;
1702 case PLUS_EXPR:
1703 case MINUS_EXPR:
1704 convert_plusminus_to_widen (&gsi, stmt, code);
1705 break;
1707 default:;
1710 else if (is_gimple_call (stmt)
1711 && gimple_call_lhs (stmt))
1713 tree fndecl = gimple_call_fndecl (stmt);
1714 if (fndecl
1715 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
1717 switch (DECL_FUNCTION_CODE (fndecl))
1719 case BUILT_IN_POWF:
1720 case BUILT_IN_POW:
1721 case BUILT_IN_POWL:
1722 if (TREE_CODE (gimple_call_arg (stmt, 1)) == REAL_CST
1723 && REAL_VALUES_EQUAL
1724 (TREE_REAL_CST (gimple_call_arg (stmt, 1)),
1725 dconst2)
1726 && convert_mult_to_fma (stmt,
1727 gimple_call_arg (stmt, 0),
1728 gimple_call_arg (stmt, 0)))
1730 unlink_stmt_vdef (stmt);
1731 gsi_remove (&gsi, true);
1732 release_defs (stmt);
1733 if (gimple_purge_dead_eh_edges (bb))
1734 cfg_changed = true;
1735 continue;
1737 break;
1739 default:;
1743 gsi_next (&gsi);
1747 return cfg_changed ? TODO_cleanup_cfg : 0;
1750 static bool
1751 gate_optimize_widening_mul (void)
1753 return flag_expensive_optimizations && optimize;
1756 struct gimple_opt_pass pass_optimize_widening_mul =
1759 GIMPLE_PASS,
1760 "widening_mul", /* name */
1761 gate_optimize_widening_mul, /* gate */
1762 execute_optimize_widening_mul, /* execute */
1763 NULL, /* sub */
1764 NULL, /* next */
1765 0, /* static_pass_number */
1766 TV_NONE, /* tv_id */
1767 PROP_ssa, /* properties_required */
1768 0, /* properties_provided */
1769 0, /* properties_destroyed */
1770 0, /* todo_flags_start */
1771 TODO_verify_ssa
1772 | TODO_verify_stmts
1773 | TODO_dump_func
1774 | TODO_update_ssa /* todo_flags_finish */