1 /* Global, SSA-based optimizations using mathematical identities.
2 Copyright (C) 2005-2015 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* Currently, the only mini-pass in this file tries to CSE reciprocal
21 operations. These are common in sequences such as this one:
23 modulus = sqrt(x*x + y*y + z*z);
28 that can be optimized to
30 modulus = sqrt(x*x + y*y + z*z);
31 rmodulus = 1.0 / modulus;
36 We do this for loop invariant divisors, and with this pass whenever
37 we notice that a division has the same divisor multiple times.
39 Of course, like in PRE, we don't insert a division if a dominator
40 already has one. However, this cannot be done as an extension of
41 PRE for several reasons.
43 First of all, with some experiments it was found out that the
44 transformation is not always useful if there are only two divisions
45 hy the same divisor. This is probably because modern processors
46 can pipeline the divisions; on older, in-order processors it should
47 still be effective to optimize two divisions by the same number.
48 We make this a param, and it shall be called N in the remainder of
51 Second, if trapping math is active, we have less freedom on where
52 to insert divisions: we can only do so in basic blocks that already
53 contain one. (If divisions don't trap, instead, we can insert
54 divisions elsewhere, which will be in blocks that are common dominators
55 of those that have the division).
57 We really don't want to compute the reciprocal unless a division will
58 be found. To do this, we won't insert the division in a basic block
59 that has less than N divisions *post-dominating* it.
61 The algorithm constructs a subset of the dominator tree, holding the
62 blocks containing the divisions and the common dominators to them,
63 and walk it twice. The first walk is in post-order, and it annotates
64 each block with the number of divisions that post-dominate it: this
65 gives information on where divisions can be inserted profitably.
66 The second walk is in pre-order, and it inserts divisions as explained
67 above, and replaces divisions by multiplications.
69 In the best case, the cost of the pass is O(n_statements). In the
70 worst-case, the cost is due to creating the dominator tree subset,
71 with a cost of O(n_basic_blocks ^ 2); however this can only happen
72 for n_statements / n_basic_blocks statements. So, the amortized cost
73 of creating the dominator tree subset is O(n_basic_blocks) and the
74 worst-case cost of the pass is O(n_statements * n_basic_blocks).
76 More practically, the cost will be small because there are few
77 divisions, and they tend to be in the same basic block, so insert_bb
78 is called very few times.
80 If we did this using domwalk.c, an efficient implementation would have
81 to work on all the variables in a single pass, because we could not
82 work on just a subset of the dominator tree, as we do now, and the
83 cost would also be something like O(n_statements * n_basic_blocks).
84 The data structures would be more complex in order to work on all the
85 variables in a single pass. */
89 #include "coretypes.h"
96 #include "alloc-pool.h"
97 #include "tree-pass.h"
99 #include "optabs-tree.h"
100 #include "gimple-pretty-print.h"
102 #include "fold-const.h"
103 #include "gimple-fold.h"
104 #include "gimple-iterator.h"
105 #include "gimplify.h"
106 #include "gimplify-me.h"
107 #include "stor-layout.h"
108 #include "tree-cfg.h"
109 #include "tree-dfa.h"
110 #include "tree-ssa.h"
111 #include "builtins.h"
113 #include "case-cfn-macros.h"
115 /* This structure represents one basic block that either computes a
116 division, or is a common dominator for basic block that compute a
119 /* The basic block represented by this structure. */
122 /* If non-NULL, the SSA_NAME holding the definition for a reciprocal
126 /* If non-NULL, the GIMPLE_ASSIGN for a reciprocal computation that
127 was inserted in BB. */
128 gimple
*recip_def_stmt
;
130 /* Pointer to a list of "struct occurrence"s for blocks dominated
132 struct occurrence
*children
;
134 /* Pointer to the next "struct occurrence"s in the list of blocks
135 sharing a common dominator. */
136 struct occurrence
*next
;
138 /* The number of divisions that are in BB before compute_merit. The
139 number of divisions that are in BB or post-dominate it after
143 /* True if the basic block has a division, false if it is a common
144 dominator for basic blocks that do. If it is false and trapping
145 math is active, BB is not a candidate for inserting a reciprocal. */
146 bool bb_has_division
;
151 /* Number of 1.0/X ops inserted. */
154 /* Number of 1.0/FUNC ops inserted. */
160 /* Number of cexpi calls inserted. */
166 /* Number of hand-written 16-bit nop / bswaps found. */
169 /* Number of hand-written 32-bit nop / bswaps found. */
172 /* Number of hand-written 64-bit nop / bswaps found. */
174 } nop_stats
, bswap_stats
;
178 /* Number of widening multiplication ops inserted. */
179 int widen_mults_inserted
;
181 /* Number of integer multiply-and-accumulate ops inserted. */
184 /* Number of fp fused multiply-add ops inserted. */
188 /* The instance of "struct occurrence" representing the highest
189 interesting block in the dominator tree. */
190 static struct occurrence
*occ_head
;
192 /* Allocation pool for getting instances of "struct occurrence". */
193 static object_allocator
<occurrence
> *occ_pool
;
197 /* Allocate and return a new struct occurrence for basic block BB, and
198 whose children list is headed by CHILDREN. */
199 static struct occurrence
*
200 occ_new (basic_block bb
, struct occurrence
*children
)
202 struct occurrence
*occ
;
204 bb
->aux
= occ
= occ_pool
->allocate ();
205 memset (occ
, 0, sizeof (struct occurrence
));
208 occ
->children
= children
;
213 /* Insert NEW_OCC into our subset of the dominator tree. P_HEAD points to a
214 list of "struct occurrence"s, one per basic block, having IDOM as
215 their common dominator.
217 We try to insert NEW_OCC as deep as possible in the tree, and we also
218 insert any other block that is a common dominator for BB and one
219 block already in the tree. */
222 insert_bb (struct occurrence
*new_occ
, basic_block idom
,
223 struct occurrence
**p_head
)
225 struct occurrence
*occ
, **p_occ
;
227 for (p_occ
= p_head
; (occ
= *p_occ
) != NULL
; )
229 basic_block bb
= new_occ
->bb
, occ_bb
= occ
->bb
;
230 basic_block dom
= nearest_common_dominator (CDI_DOMINATORS
, occ_bb
, bb
);
233 /* BB dominates OCC_BB. OCC becomes NEW_OCC's child: remove OCC
236 occ
->next
= new_occ
->children
;
237 new_occ
->children
= occ
;
239 /* Try the next block (it may as well be dominated by BB). */
242 else if (dom
== occ_bb
)
244 /* OCC_BB dominates BB. Tail recurse to look deeper. */
245 insert_bb (new_occ
, dom
, &occ
->children
);
249 else if (dom
!= idom
)
251 gcc_assert (!dom
->aux
);
253 /* There is a dominator between IDOM and BB, add it and make
254 two children out of NEW_OCC and OCC. First, remove OCC from
260 /* None of the previous blocks has DOM as a dominator: if we tail
261 recursed, we would reexamine them uselessly. Just switch BB with
262 DOM, and go on looking for blocks dominated by DOM. */
263 new_occ
= occ_new (dom
, new_occ
);
268 /* Nothing special, go on with the next element. */
273 /* No place was found as a child of IDOM. Make BB a sibling of IDOM. */
274 new_occ
->next
= *p_head
;
278 /* Register that we found a division in BB. */
281 register_division_in (basic_block bb
)
283 struct occurrence
*occ
;
285 occ
= (struct occurrence
*) bb
->aux
;
288 occ
= occ_new (bb
, NULL
);
289 insert_bb (occ
, ENTRY_BLOCK_PTR_FOR_FN (cfun
), &occ_head
);
292 occ
->bb_has_division
= true;
293 occ
->num_divisions
++;
297 /* Compute the number of divisions that postdominate each block in OCC and
301 compute_merit (struct occurrence
*occ
)
303 struct occurrence
*occ_child
;
304 basic_block dom
= occ
->bb
;
306 for (occ_child
= occ
->children
; occ_child
; occ_child
= occ_child
->next
)
309 if (occ_child
->children
)
310 compute_merit (occ_child
);
313 bb
= single_noncomplex_succ (dom
);
317 if (dominated_by_p (CDI_POST_DOMINATORS
, bb
, occ_child
->bb
))
318 occ
->num_divisions
+= occ_child
->num_divisions
;
323 /* Return whether USE_STMT is a floating-point division by DEF. */
325 is_division_by (gimple
*use_stmt
, tree def
)
327 return is_gimple_assign (use_stmt
)
328 && gimple_assign_rhs_code (use_stmt
) == RDIV_EXPR
329 && gimple_assign_rhs2 (use_stmt
) == def
330 /* Do not recognize x / x as valid division, as we are getting
331 confused later by replacing all immediate uses x in such
333 && gimple_assign_rhs1 (use_stmt
) != def
;
336 /* Walk the subset of the dominator tree rooted at OCC, setting the
337 RECIP_DEF field to a definition of 1.0 / DEF that can be used in
338 the given basic block. The field may be left NULL, of course,
339 if it is not possible or profitable to do the optimization.
341 DEF_BSI is an iterator pointing at the statement defining DEF.
342 If RECIP_DEF is set, a dominator already has a computation that can
346 insert_reciprocals (gimple_stmt_iterator
*def_gsi
, struct occurrence
*occ
,
347 tree def
, tree recip_def
, int threshold
)
351 gimple_stmt_iterator gsi
;
352 struct occurrence
*occ_child
;
355 && (occ
->bb_has_division
|| !flag_trapping_math
)
356 && occ
->num_divisions
>= threshold
)
358 /* Make a variable with the replacement and substitute it. */
359 type
= TREE_TYPE (def
);
360 recip_def
= create_tmp_reg (type
, "reciptmp");
361 new_stmt
= gimple_build_assign (recip_def
, RDIV_EXPR
,
362 build_one_cst (type
), def
);
364 if (occ
->bb_has_division
)
366 /* Case 1: insert before an existing division. */
367 gsi
= gsi_after_labels (occ
->bb
);
368 while (!gsi_end_p (gsi
) && !is_division_by (gsi_stmt (gsi
), def
))
371 gsi_insert_before (&gsi
, new_stmt
, GSI_SAME_STMT
);
373 else if (def_gsi
&& occ
->bb
== def_gsi
->bb
)
375 /* Case 2: insert right after the definition. Note that this will
376 never happen if the definition statement can throw, because in
377 that case the sole successor of the statement's basic block will
378 dominate all the uses as well. */
379 gsi_insert_after (def_gsi
, new_stmt
, GSI_NEW_STMT
);
383 /* Case 3: insert in a basic block not containing defs/uses. */
384 gsi
= gsi_after_labels (occ
->bb
);
385 gsi_insert_before (&gsi
, new_stmt
, GSI_SAME_STMT
);
388 reciprocal_stats
.rdivs_inserted
++;
390 occ
->recip_def_stmt
= new_stmt
;
393 occ
->recip_def
= recip_def
;
394 for (occ_child
= occ
->children
; occ_child
; occ_child
= occ_child
->next
)
395 insert_reciprocals (def_gsi
, occ_child
, def
, recip_def
, threshold
);
399 /* Replace the division at USE_P with a multiplication by the reciprocal, if
403 replace_reciprocal (use_operand_p use_p
)
405 gimple
*use_stmt
= USE_STMT (use_p
);
406 basic_block bb
= gimple_bb (use_stmt
);
407 struct occurrence
*occ
= (struct occurrence
*) bb
->aux
;
409 if (optimize_bb_for_speed_p (bb
)
410 && occ
->recip_def
&& use_stmt
!= occ
->recip_def_stmt
)
412 gimple_stmt_iterator gsi
= gsi_for_stmt (use_stmt
);
413 gimple_assign_set_rhs_code (use_stmt
, MULT_EXPR
);
414 SET_USE (use_p
, occ
->recip_def
);
415 fold_stmt_inplace (&gsi
);
416 update_stmt (use_stmt
);
421 /* Free OCC and return one more "struct occurrence" to be freed. */
423 static struct occurrence
*
424 free_bb (struct occurrence
*occ
)
426 struct occurrence
*child
, *next
;
428 /* First get the two pointers hanging off OCC. */
430 child
= occ
->children
;
432 occ_pool
->remove (occ
);
434 /* Now ensure that we don't recurse unless it is necessary. */
440 next
= free_bb (next
);
447 /* Look for floating-point divisions among DEF's uses, and try to
448 replace them by multiplications with the reciprocal. Add
449 as many statements computing the reciprocal as needed.
451 DEF must be a GIMPLE register of a floating-point type. */
454 execute_cse_reciprocals_1 (gimple_stmt_iterator
*def_gsi
, tree def
)
457 imm_use_iterator use_iter
;
458 struct occurrence
*occ
;
459 int count
= 0, threshold
;
461 gcc_assert (FLOAT_TYPE_P (TREE_TYPE (def
)) && is_gimple_reg (def
));
463 FOR_EACH_IMM_USE_FAST (use_p
, use_iter
, def
)
465 gimple
*use_stmt
= USE_STMT (use_p
);
466 if (is_division_by (use_stmt
, def
))
468 register_division_in (gimple_bb (use_stmt
));
473 /* Do the expensive part only if we can hope to optimize something. */
474 threshold
= targetm
.min_divisions_for_recip_mul (TYPE_MODE (TREE_TYPE (def
)));
475 if (count
>= threshold
)
478 for (occ
= occ_head
; occ
; occ
= occ
->next
)
481 insert_reciprocals (def_gsi
, occ
, def
, NULL
, threshold
);
484 FOR_EACH_IMM_USE_STMT (use_stmt
, use_iter
, def
)
486 if (is_division_by (use_stmt
, def
))
488 FOR_EACH_IMM_USE_ON_STMT (use_p
, use_iter
)
489 replace_reciprocal (use_p
);
494 for (occ
= occ_head
; occ
; )
500 /* Go through all the floating-point SSA_NAMEs, and call
501 execute_cse_reciprocals_1 on each of them. */
504 const pass_data pass_data_cse_reciprocals
=
506 GIMPLE_PASS
, /* type */
508 OPTGROUP_NONE
, /* optinfo_flags */
510 PROP_ssa
, /* properties_required */
511 0, /* properties_provided */
512 0, /* properties_destroyed */
513 0, /* todo_flags_start */
514 TODO_update_ssa
, /* todo_flags_finish */
517 class pass_cse_reciprocals
: public gimple_opt_pass
520 pass_cse_reciprocals (gcc::context
*ctxt
)
521 : gimple_opt_pass (pass_data_cse_reciprocals
, ctxt
)
524 /* opt_pass methods: */
525 virtual bool gate (function
*) { return optimize
&& flag_reciprocal_math
; }
526 virtual unsigned int execute (function
*);
528 }; // class pass_cse_reciprocals
531 pass_cse_reciprocals::execute (function
*fun
)
536 occ_pool
= new object_allocator
<occurrence
> ("dominators for recip");
538 memset (&reciprocal_stats
, 0, sizeof (reciprocal_stats
));
539 calculate_dominance_info (CDI_DOMINATORS
);
540 calculate_dominance_info (CDI_POST_DOMINATORS
);
543 FOR_EACH_BB_FN (bb
, fun
)
544 gcc_assert (!bb
->aux
);
546 for (arg
= DECL_ARGUMENTS (fun
->decl
); arg
; arg
= DECL_CHAIN (arg
))
547 if (FLOAT_TYPE_P (TREE_TYPE (arg
))
548 && is_gimple_reg (arg
))
550 tree name
= ssa_default_def (fun
, arg
);
552 execute_cse_reciprocals_1 (NULL
, name
);
555 FOR_EACH_BB_FN (bb
, fun
)
559 for (gphi_iterator gsi
= gsi_start_phis (bb
); !gsi_end_p (gsi
);
562 gphi
*phi
= gsi
.phi ();
563 def
= PHI_RESULT (phi
);
564 if (! virtual_operand_p (def
)
565 && FLOAT_TYPE_P (TREE_TYPE (def
)))
566 execute_cse_reciprocals_1 (NULL
, def
);
569 for (gimple_stmt_iterator gsi
= gsi_after_labels (bb
); !gsi_end_p (gsi
);
572 gimple
*stmt
= gsi_stmt (gsi
);
574 if (gimple_has_lhs (stmt
)
575 && (def
= SINGLE_SSA_TREE_OPERAND (stmt
, SSA_OP_DEF
)) != NULL
576 && FLOAT_TYPE_P (TREE_TYPE (def
))
577 && TREE_CODE (def
) == SSA_NAME
)
578 execute_cse_reciprocals_1 (&gsi
, def
);
581 if (optimize_bb_for_size_p (bb
))
584 /* Scan for a/func(b) and convert it to reciprocal a*rfunc(b). */
585 for (gimple_stmt_iterator gsi
= gsi_after_labels (bb
); !gsi_end_p (gsi
);
588 gimple
*stmt
= gsi_stmt (gsi
);
591 if (is_gimple_assign (stmt
)
592 && gimple_assign_rhs_code (stmt
) == RDIV_EXPR
)
594 tree arg1
= gimple_assign_rhs2 (stmt
);
597 if (TREE_CODE (arg1
) != SSA_NAME
)
600 stmt1
= SSA_NAME_DEF_STMT (arg1
);
602 if (is_gimple_call (stmt1
)
603 && gimple_call_lhs (stmt1
)
604 && (fndecl
= gimple_call_fndecl (stmt1
))
605 && (DECL_BUILT_IN_CLASS (fndecl
) == BUILT_IN_NORMAL
606 || DECL_BUILT_IN_CLASS (fndecl
) == BUILT_IN_MD
))
608 enum built_in_function code
;
613 code
= DECL_FUNCTION_CODE (fndecl
);
614 md_code
= DECL_BUILT_IN_CLASS (fndecl
) == BUILT_IN_MD
;
616 fndecl
= targetm
.builtin_reciprocal (code
, md_code
, false);
620 /* Check that all uses of the SSA name are divisions,
621 otherwise replacing the defining statement will do
624 FOR_EACH_IMM_USE_FAST (use_p
, ui
, arg1
)
626 gimple
*stmt2
= USE_STMT (use_p
);
627 if (is_gimple_debug (stmt2
))
629 if (!is_gimple_assign (stmt2
)
630 || gimple_assign_rhs_code (stmt2
) != RDIV_EXPR
631 || gimple_assign_rhs1 (stmt2
) == arg1
632 || gimple_assign_rhs2 (stmt2
) != arg1
)
641 gimple_replace_ssa_lhs (stmt1
, arg1
);
642 gimple_call_set_fndecl (stmt1
, fndecl
);
644 reciprocal_stats
.rfuncs_inserted
++;
646 FOR_EACH_IMM_USE_STMT (stmt
, ui
, arg1
)
648 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
649 gimple_assign_set_rhs_code (stmt
, MULT_EXPR
);
650 fold_stmt_inplace (&gsi
);
658 statistics_counter_event (fun
, "reciprocal divs inserted",
659 reciprocal_stats
.rdivs_inserted
);
660 statistics_counter_event (fun
, "reciprocal functions inserted",
661 reciprocal_stats
.rfuncs_inserted
);
663 free_dominance_info (CDI_DOMINATORS
);
664 free_dominance_info (CDI_POST_DOMINATORS
);
672 make_pass_cse_reciprocals (gcc::context
*ctxt
)
674 return new pass_cse_reciprocals (ctxt
);
677 /* Records an occurrence at statement USE_STMT in the vector of trees
678 STMTS if it is dominated by *TOP_BB or dominates it or this basic block
679 is not yet initialized. Returns true if the occurrence was pushed on
680 the vector. Adjusts *TOP_BB to be the basic block dominating all
681 statements in the vector. */
684 maybe_record_sincos (vec
<gimple
*> *stmts
,
685 basic_block
*top_bb
, gimple
*use_stmt
)
687 basic_block use_bb
= gimple_bb (use_stmt
);
689 && (*top_bb
== use_bb
690 || dominated_by_p (CDI_DOMINATORS
, use_bb
, *top_bb
)))
691 stmts
->safe_push (use_stmt
);
693 || dominated_by_p (CDI_DOMINATORS
, *top_bb
, use_bb
))
695 stmts
->safe_push (use_stmt
);
704 /* Look for sin, cos and cexpi calls with the same argument NAME and
705 create a single call to cexpi CSEing the result in this case.
706 We first walk over all immediate uses of the argument collecting
707 statements that we can CSE in a vector and in a second pass replace
708 the statement rhs with a REALPART or IMAGPART expression on the
709 result of the cexpi call we insert before the use statement that
710 dominates all other candidates. */
713 execute_cse_sincos_1 (tree name
)
715 gimple_stmt_iterator gsi
;
716 imm_use_iterator use_iter
;
717 tree fndecl
, res
, type
;
718 gimple
*def_stmt
, *use_stmt
, *stmt
;
719 int seen_cos
= 0, seen_sin
= 0, seen_cexpi
= 0;
720 auto_vec
<gimple
*> stmts
;
721 basic_block top_bb
= NULL
;
723 bool cfg_changed
= false;
725 type
= TREE_TYPE (name
);
726 FOR_EACH_IMM_USE_STMT (use_stmt
, use_iter
, name
)
728 if (gimple_code (use_stmt
) != GIMPLE_CALL
729 || !gimple_call_lhs (use_stmt
))
732 switch (gimple_call_combined_fn (use_stmt
))
735 seen_cos
|= maybe_record_sincos (&stmts
, &top_bb
, use_stmt
) ? 1 : 0;
739 seen_sin
|= maybe_record_sincos (&stmts
, &top_bb
, use_stmt
) ? 1 : 0;
743 seen_cexpi
|= maybe_record_sincos (&stmts
, &top_bb
, use_stmt
) ? 1 : 0;
750 if (seen_cos
+ seen_sin
+ seen_cexpi
<= 1)
753 /* Simply insert cexpi at the beginning of top_bb but not earlier than
754 the name def statement. */
755 fndecl
= mathfn_built_in (type
, BUILT_IN_CEXPI
);
758 stmt
= gimple_build_call (fndecl
, 1, name
);
759 res
= make_temp_ssa_name (TREE_TYPE (TREE_TYPE (fndecl
)), stmt
, "sincostmp");
760 gimple_call_set_lhs (stmt
, res
);
762 def_stmt
= SSA_NAME_DEF_STMT (name
);
763 if (!SSA_NAME_IS_DEFAULT_DEF (name
)
764 && gimple_code (def_stmt
) != GIMPLE_PHI
765 && gimple_bb (def_stmt
) == top_bb
)
767 gsi
= gsi_for_stmt (def_stmt
);
768 gsi_insert_after (&gsi
, stmt
, GSI_SAME_STMT
);
772 gsi
= gsi_after_labels (top_bb
);
773 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
775 sincos_stats
.inserted
++;
777 /* And adjust the recorded old call sites. */
778 for (i
= 0; stmts
.iterate (i
, &use_stmt
); ++i
)
782 switch (gimple_call_combined_fn (use_stmt
))
785 rhs
= fold_build1 (REALPART_EXPR
, type
, res
);
789 rhs
= fold_build1 (IMAGPART_EXPR
, type
, res
);
800 /* Replace call with a copy. */
801 stmt
= gimple_build_assign (gimple_call_lhs (use_stmt
), rhs
);
803 gsi
= gsi_for_stmt (use_stmt
);
804 gsi_replace (&gsi
, stmt
, true);
805 if (gimple_purge_dead_eh_edges (gimple_bb (stmt
)))
812 /* To evaluate powi(x,n), the floating point value x raised to the
813 constant integer exponent n, we use a hybrid algorithm that
814 combines the "window method" with look-up tables. For an
815 introduction to exponentiation algorithms and "addition chains",
816 see section 4.6.3, "Evaluation of Powers" of Donald E. Knuth,
817 "Seminumerical Algorithms", Vol. 2, "The Art of Computer Programming",
818 3rd Edition, 1998, and Daniel M. Gordon, "A Survey of Fast Exponentiation
819 Methods", Journal of Algorithms, Vol. 27, pp. 129-146, 1998. */
821 /* Provide a default value for POWI_MAX_MULTS, the maximum number of
822 multiplications to inline before calling the system library's pow
823 function. powi(x,n) requires at worst 2*bits(n)-2 multiplications,
824 so this default never requires calling pow, powf or powl. */
826 #ifndef POWI_MAX_MULTS
827 #define POWI_MAX_MULTS (2*HOST_BITS_PER_WIDE_INT-2)
830 /* The size of the "optimal power tree" lookup table. All
831 exponents less than this value are simply looked up in the
832 powi_table below. This threshold is also used to size the
833 cache of pseudo registers that hold intermediate results. */
834 #define POWI_TABLE_SIZE 256
836 /* The size, in bits of the window, used in the "window method"
837 exponentiation algorithm. This is equivalent to a radix of
838 (1<<POWI_WINDOW_SIZE) in the corresponding "m-ary method". */
839 #define POWI_WINDOW_SIZE 3
841 /* The following table is an efficient representation of an
842 "optimal power tree". For each value, i, the corresponding
843 value, j, in the table states than an optimal evaluation
844 sequence for calculating pow(x,i) can be found by evaluating
845 pow(x,j)*pow(x,i-j). An optimal power tree for the first
846 100 integers is given in Knuth's "Seminumerical algorithms". */
848 static const unsigned char powi_table
[POWI_TABLE_SIZE
] =
850 0, 1, 1, 2, 2, 3, 3, 4, /* 0 - 7 */
851 4, 6, 5, 6, 6, 10, 7, 9, /* 8 - 15 */
852 8, 16, 9, 16, 10, 12, 11, 13, /* 16 - 23 */
853 12, 17, 13, 18, 14, 24, 15, 26, /* 24 - 31 */
854 16, 17, 17, 19, 18, 33, 19, 26, /* 32 - 39 */
855 20, 25, 21, 40, 22, 27, 23, 44, /* 40 - 47 */
856 24, 32, 25, 34, 26, 29, 27, 44, /* 48 - 55 */
857 28, 31, 29, 34, 30, 60, 31, 36, /* 56 - 63 */
858 32, 64, 33, 34, 34, 46, 35, 37, /* 64 - 71 */
859 36, 65, 37, 50, 38, 48, 39, 69, /* 72 - 79 */
860 40, 49, 41, 43, 42, 51, 43, 58, /* 80 - 87 */
861 44, 64, 45, 47, 46, 59, 47, 76, /* 88 - 95 */
862 48, 65, 49, 66, 50, 67, 51, 66, /* 96 - 103 */
863 52, 70, 53, 74, 54, 104, 55, 74, /* 104 - 111 */
864 56, 64, 57, 69, 58, 78, 59, 68, /* 112 - 119 */
865 60, 61, 61, 80, 62, 75, 63, 68, /* 120 - 127 */
866 64, 65, 65, 128, 66, 129, 67, 90, /* 128 - 135 */
867 68, 73, 69, 131, 70, 94, 71, 88, /* 136 - 143 */
868 72, 128, 73, 98, 74, 132, 75, 121, /* 144 - 151 */
869 76, 102, 77, 124, 78, 132, 79, 106, /* 152 - 159 */
870 80, 97, 81, 160, 82, 99, 83, 134, /* 160 - 167 */
871 84, 86, 85, 95, 86, 160, 87, 100, /* 168 - 175 */
872 88, 113, 89, 98, 90, 107, 91, 122, /* 176 - 183 */
873 92, 111, 93, 102, 94, 126, 95, 150, /* 184 - 191 */
874 96, 128, 97, 130, 98, 133, 99, 195, /* 192 - 199 */
875 100, 128, 101, 123, 102, 164, 103, 138, /* 200 - 207 */
876 104, 145, 105, 146, 106, 109, 107, 149, /* 208 - 215 */
877 108, 200, 109, 146, 110, 170, 111, 157, /* 216 - 223 */
878 112, 128, 113, 130, 114, 182, 115, 132, /* 224 - 231 */
879 116, 200, 117, 132, 118, 158, 119, 206, /* 232 - 239 */
880 120, 240, 121, 162, 122, 147, 123, 152, /* 240 - 247 */
881 124, 166, 125, 214, 126, 138, 127, 153, /* 248 - 255 */
885 /* Return the number of multiplications required to calculate
886 powi(x,n) where n is less than POWI_TABLE_SIZE. This is a
887 subroutine of powi_cost. CACHE is an array indicating
888 which exponents have already been calculated. */
891 powi_lookup_cost (unsigned HOST_WIDE_INT n
, bool *cache
)
893 /* If we've already calculated this exponent, then this evaluation
894 doesn't require any additional multiplications. */
899 return powi_lookup_cost (n
- powi_table
[n
], cache
)
900 + powi_lookup_cost (powi_table
[n
], cache
) + 1;
903 /* Return the number of multiplications required to calculate
904 powi(x,n) for an arbitrary x, given the exponent N. This
905 function needs to be kept in sync with powi_as_mults below. */
908 powi_cost (HOST_WIDE_INT n
)
910 bool cache
[POWI_TABLE_SIZE
];
911 unsigned HOST_WIDE_INT digit
;
912 unsigned HOST_WIDE_INT val
;
918 /* Ignore the reciprocal when calculating the cost. */
919 val
= (n
< 0) ? -n
: n
;
921 /* Initialize the exponent cache. */
922 memset (cache
, 0, POWI_TABLE_SIZE
* sizeof (bool));
927 while (val
>= POWI_TABLE_SIZE
)
931 digit
= val
& ((1 << POWI_WINDOW_SIZE
) - 1);
932 result
+= powi_lookup_cost (digit
, cache
)
933 + POWI_WINDOW_SIZE
+ 1;
934 val
>>= POWI_WINDOW_SIZE
;
943 return result
+ powi_lookup_cost (val
, cache
);
946 /* Recursive subroutine of powi_as_mults. This function takes the
947 array, CACHE, of already calculated exponents and an exponent N and
948 returns a tree that corresponds to CACHE[1]**N, with type TYPE. */
951 powi_as_mults_1 (gimple_stmt_iterator
*gsi
, location_t loc
, tree type
,
952 HOST_WIDE_INT n
, tree
*cache
)
954 tree op0
, op1
, ssa_target
;
955 unsigned HOST_WIDE_INT digit
;
958 if (n
< POWI_TABLE_SIZE
&& cache
[n
])
961 ssa_target
= make_temp_ssa_name (type
, NULL
, "powmult");
963 if (n
< POWI_TABLE_SIZE
)
965 cache
[n
] = ssa_target
;
966 op0
= powi_as_mults_1 (gsi
, loc
, type
, n
- powi_table
[n
], cache
);
967 op1
= powi_as_mults_1 (gsi
, loc
, type
, powi_table
[n
], cache
);
971 digit
= n
& ((1 << POWI_WINDOW_SIZE
) - 1);
972 op0
= powi_as_mults_1 (gsi
, loc
, type
, n
- digit
, cache
);
973 op1
= powi_as_mults_1 (gsi
, loc
, type
, digit
, cache
);
977 op0
= powi_as_mults_1 (gsi
, loc
, type
, n
>> 1, cache
);
981 mult_stmt
= gimple_build_assign (ssa_target
, MULT_EXPR
, op0
, op1
);
982 gimple_set_location (mult_stmt
, loc
);
983 gsi_insert_before (gsi
, mult_stmt
, GSI_SAME_STMT
);
988 /* Convert ARG0**N to a tree of multiplications of ARG0 with itself.
989 This function needs to be kept in sync with powi_cost above. */
992 powi_as_mults (gimple_stmt_iterator
*gsi
, location_t loc
,
993 tree arg0
, HOST_WIDE_INT n
)
995 tree cache
[POWI_TABLE_SIZE
], result
, type
= TREE_TYPE (arg0
);
1000 return build_real (type
, dconst1
);
1002 memset (cache
, 0, sizeof (cache
));
1005 result
= powi_as_mults_1 (gsi
, loc
, type
, (n
< 0) ? -n
: n
, cache
);
1009 /* If the original exponent was negative, reciprocate the result. */
1010 target
= make_temp_ssa_name (type
, NULL
, "powmult");
1011 div_stmt
= gimple_build_assign (target
, RDIV_EXPR
,
1012 build_real (type
, dconst1
), result
);
1013 gimple_set_location (div_stmt
, loc
);
1014 gsi_insert_before (gsi
, div_stmt
, GSI_SAME_STMT
);
1019 /* ARG0 and N are the two arguments to a powi builtin in GSI with
1020 location info LOC. If the arguments are appropriate, create an
1021 equivalent sequence of statements prior to GSI using an optimal
1022 number of multiplications, and return an expession holding the
1026 gimple_expand_builtin_powi (gimple_stmt_iterator
*gsi
, location_t loc
,
1027 tree arg0
, HOST_WIDE_INT n
)
1029 /* Avoid largest negative number. */
1031 && ((n
>= -1 && n
<= 2)
1032 || (optimize_function_for_speed_p (cfun
)
1033 && powi_cost (n
) <= POWI_MAX_MULTS
)))
1034 return powi_as_mults (gsi
, loc
, arg0
, n
);
1039 /* Build a gimple call statement that calls FN with argument ARG.
1040 Set the lhs of the call statement to a fresh SSA name. Insert the
1041 statement prior to GSI's current position, and return the fresh
1045 build_and_insert_call (gimple_stmt_iterator
*gsi
, location_t loc
,
1051 call_stmt
= gimple_build_call (fn
, 1, arg
);
1052 ssa_target
= make_temp_ssa_name (TREE_TYPE (arg
), NULL
, "powroot");
1053 gimple_set_lhs (call_stmt
, ssa_target
);
1054 gimple_set_location (call_stmt
, loc
);
1055 gsi_insert_before (gsi
, call_stmt
, GSI_SAME_STMT
);
1060 /* Build a gimple binary operation with the given CODE and arguments
1061 ARG0, ARG1, assigning the result to a new SSA name for variable
1062 TARGET. Insert the statement prior to GSI's current position, and
1063 return the fresh SSA name.*/
1066 build_and_insert_binop (gimple_stmt_iterator
*gsi
, location_t loc
,
1067 const char *name
, enum tree_code code
,
1068 tree arg0
, tree arg1
)
1070 tree result
= make_temp_ssa_name (TREE_TYPE (arg0
), NULL
, name
);
1071 gassign
*stmt
= gimple_build_assign (result
, code
, arg0
, arg1
);
1072 gimple_set_location (stmt
, loc
);
1073 gsi_insert_before (gsi
, stmt
, GSI_SAME_STMT
);
1077 /* Build a gimple reference operation with the given CODE and argument
1078 ARG, assigning the result to a new SSA name of TYPE with NAME.
1079 Insert the statement prior to GSI's current position, and return
1080 the fresh SSA name. */
1083 build_and_insert_ref (gimple_stmt_iterator
*gsi
, location_t loc
, tree type
,
1084 const char *name
, enum tree_code code
, tree arg0
)
1086 tree result
= make_temp_ssa_name (type
, NULL
, name
);
1087 gimple
*stmt
= gimple_build_assign (result
, build1 (code
, type
, arg0
));
1088 gimple_set_location (stmt
, loc
);
1089 gsi_insert_before (gsi
, stmt
, GSI_SAME_STMT
);
1093 /* Build a gimple assignment to cast VAL to TYPE. Insert the statement
1094 prior to GSI's current position, and return the fresh SSA name. */
1097 build_and_insert_cast (gimple_stmt_iterator
*gsi
, location_t loc
,
1098 tree type
, tree val
)
1100 tree result
= make_ssa_name (type
);
1101 gassign
*stmt
= gimple_build_assign (result
, NOP_EXPR
, val
);
1102 gimple_set_location (stmt
, loc
);
1103 gsi_insert_before (gsi
, stmt
, GSI_SAME_STMT
);
1107 struct pow_synth_sqrt_info
1110 unsigned int deepest
;
1111 unsigned int num_mults
;
1114 /* Return true iff the real value C can be represented as a
1115 sum of powers of 0.5 up to N. That is:
1116 C == SUM<i from 1..N> (a[i]*(0.5**i)) where a[i] is either 0 or 1.
1117 Record in INFO the various parameters of the synthesis algorithm such
1118 as the factors a[i], the maximum 0.5 power and the number of
1119 multiplications that will be required. */
1122 representable_as_half_series_p (REAL_VALUE_TYPE c
, unsigned n
,
1123 struct pow_synth_sqrt_info
*info
)
1125 REAL_VALUE_TYPE factor
= dconsthalf
;
1126 REAL_VALUE_TYPE remainder
= c
;
1129 info
->num_mults
= 0;
1130 memset (info
->factors
, 0, n
* sizeof (bool));
1132 for (unsigned i
= 0; i
< n
; i
++)
1134 REAL_VALUE_TYPE res
;
1136 /* If something inexact happened bail out now. */
1137 if (real_arithmetic (&res
, MINUS_EXPR
, &remainder
, &factor
))
1140 /* We have hit zero. The number is representable as a sum
1141 of powers of 0.5. */
1142 if (real_equal (&res
, &dconst0
))
1144 info
->factors
[i
] = true;
1145 info
->deepest
= i
+ 1;
1148 else if (!REAL_VALUE_NEGATIVE (res
))
1151 info
->factors
[i
] = true;
1155 info
->factors
[i
] = false;
1157 real_arithmetic (&factor
, MULT_EXPR
, &factor
, &dconsthalf
);
1162 /* Return the tree corresponding to FN being applied
1163 to ARG N times at GSI and LOC.
1164 Look up previous results from CACHE if need be.
1165 cache[0] should contain just plain ARG i.e. FN applied to ARG 0 times. */
1168 get_fn_chain (tree arg
, unsigned int n
, gimple_stmt_iterator
*gsi
,
1169 tree fn
, location_t loc
, tree
*cache
)
1171 tree res
= cache
[n
];
1174 tree prev
= get_fn_chain (arg
, n
- 1, gsi
, fn
, loc
, cache
);
1175 res
= build_and_insert_call (gsi
, loc
, fn
, prev
);
1182 /* Print to STREAM the repeated application of function FNAME to ARG
1183 N times. So, for FNAME = "foo", ARG = "x", N = 2 it would print:
1187 print_nested_fn (FILE* stream
, const char *fname
, const char* arg
,
1191 fprintf (stream
, "%s", arg
);
1194 fprintf (stream
, "%s (", fname
);
1195 print_nested_fn (stream
, fname
, arg
, n
- 1);
1196 fprintf (stream
, ")");
1200 /* Print to STREAM the fractional sequence of sqrt chains
1201 applied to ARG, described by INFO. Used for the dump file. */
1204 dump_fractional_sqrt_sequence (FILE *stream
, const char *arg
,
1205 struct pow_synth_sqrt_info
*info
)
1207 for (unsigned int i
= 0; i
< info
->deepest
; i
++)
1209 bool is_set
= info
->factors
[i
];
1212 print_nested_fn (stream
, "sqrt", arg
, i
+ 1);
1213 if (i
!= info
->deepest
- 1)
1214 fprintf (stream
, " * ");
1219 /* Print to STREAM a representation of raising ARG to an integer
1220 power N. Used for the dump file. */
1223 dump_integer_part (FILE *stream
, const char* arg
, HOST_WIDE_INT n
)
1226 fprintf (stream
, "powi (%s, " HOST_WIDE_INT_PRINT_DEC
")", arg
, n
);
1228 fprintf (stream
, "%s", arg
);
1231 /* Attempt to synthesize a POW[F] (ARG0, ARG1) call using chains of
1232 square roots. Place at GSI and LOC. Limit the maximum depth
1233 of the sqrt chains to MAX_DEPTH. Return the tree holding the
1234 result of the expanded sequence or NULL_TREE if the expansion failed.
1236 This routine assumes that ARG1 is a real number with a fractional part
1237 (the integer exponent case will have been handled earlier in
1238 gimple_expand_builtin_pow).
1241 * For ARG1 composed of a whole part WHOLE_PART and a fractional part
1242 FRAC_PART i.e. WHOLE_PART == floor (ARG1) and
1243 FRAC_PART == ARG1 - WHOLE_PART:
1244 Produce POWI (ARG0, WHOLE_PART) * POW (ARG0, FRAC_PART) where
1245 POW (ARG0, FRAC_PART) is expanded as a product of square root chains
1246 if it can be expressed as such, that is if FRAC_PART satisfies:
1247 FRAC_PART == <SUM from i = 1 until MAX_DEPTH> (a[i] * (0.5**i))
1248 where integer a[i] is either 0 or 1.
1251 POW (x, 3.625) == POWI (x, 3) * POW (x, 0.625)
1252 --> POWI (x, 3) * SQRT (x) * SQRT (SQRT (SQRT (x)))
1254 For ARG1 < 0.0 there are two approaches:
1255 * (A) Expand to 1.0 / POW (ARG0, -ARG1) where POW (ARG0, -ARG1)
1256 is calculated as above.
1259 POW (x, -5.625) == 1.0 / POW (x, 5.625)
1260 --> 1.0 / (POWI (x, 5) * SQRT (x) * SQRT (SQRT (SQRT (x))))
1262 * (B) : WHOLE_PART := - ceil (abs (ARG1))
1263 FRAC_PART := ARG1 - WHOLE_PART
1264 and expand to POW (x, FRAC_PART) / POWI (x, WHOLE_PART).
1266 POW (x, -5.875) == POW (x, 0.125) / POWI (X, 6)
1267 --> SQRT (SQRT (SQRT (x))) / (POWI (x, 6))
1269 For ARG1 < 0.0 we choose between (A) and (B) depending on
1270 how many multiplications we'd have to do.
1271 So, for the example in (B): POW (x, -5.875), if we were to
1272 follow algorithm (A) we would produce:
1273 1.0 / POWI (X, 5) * SQRT (X) * SQRT (SQRT (X)) * SQRT (SQRT (SQRT (X)))
1274 which contains more multiplications than approach (B).
1276 Hopefully, this approach will eliminate potentially expensive POW library
1277 calls when unsafe floating point math is enabled and allow the compiler to
1278 further optimise the multiplies, square roots and divides produced by this
1282 expand_pow_as_sqrts (gimple_stmt_iterator
*gsi
, location_t loc
,
1283 tree arg0
, tree arg1
, HOST_WIDE_INT max_depth
)
1285 tree type
= TREE_TYPE (arg0
);
1286 machine_mode mode
= TYPE_MODE (type
);
1287 tree sqrtfn
= mathfn_built_in (type
, BUILT_IN_SQRT
);
1288 bool one_over
= true;
1293 if (TREE_CODE (arg1
) != REAL_CST
)
1296 REAL_VALUE_TYPE exp_init
= TREE_REAL_CST (arg1
);
1298 gcc_assert (max_depth
> 0);
1299 tree
*cache
= XALLOCAVEC (tree
, max_depth
+ 1);
1301 struct pow_synth_sqrt_info synth_info
;
1302 synth_info
.factors
= XALLOCAVEC (bool, max_depth
+ 1);
1303 synth_info
.deepest
= 0;
1304 synth_info
.num_mults
= 0;
1306 bool neg_exp
= REAL_VALUE_NEGATIVE (exp_init
);
1307 REAL_VALUE_TYPE exp
= real_value_abs (&exp_init
);
1309 /* The whole and fractional parts of exp. */
1310 REAL_VALUE_TYPE whole_part
;
1311 REAL_VALUE_TYPE frac_part
;
1313 real_floor (&whole_part
, mode
, &exp
);
1314 real_arithmetic (&frac_part
, MINUS_EXPR
, &exp
, &whole_part
);
1317 REAL_VALUE_TYPE ceil_whole
= dconst0
;
1318 REAL_VALUE_TYPE ceil_fract
= dconst0
;
1322 real_ceil (&ceil_whole
, mode
, &exp
);
1323 real_arithmetic (&ceil_fract
, MINUS_EXPR
, &ceil_whole
, &exp
);
1326 if (!representable_as_half_series_p (frac_part
, max_depth
, &synth_info
))
1329 /* Check whether it's more profitable to not use 1.0 / ... */
1332 struct pow_synth_sqrt_info alt_synth_info
;
1333 alt_synth_info
.factors
= XALLOCAVEC (bool, max_depth
+ 1);
1334 alt_synth_info
.deepest
= 0;
1335 alt_synth_info
.num_mults
= 0;
1337 if (representable_as_half_series_p (ceil_fract
, max_depth
,
1339 && alt_synth_info
.deepest
<= synth_info
.deepest
1340 && alt_synth_info
.num_mults
< synth_info
.num_mults
)
1342 whole_part
= ceil_whole
;
1343 frac_part
= ceil_fract
;
1344 synth_info
.deepest
= alt_synth_info
.deepest
;
1345 synth_info
.num_mults
= alt_synth_info
.num_mults
;
1346 memcpy (synth_info
.factors
, alt_synth_info
.factors
,
1347 (max_depth
+ 1) * sizeof (bool));
1352 HOST_WIDE_INT n
= real_to_integer (&whole_part
);
1353 REAL_VALUE_TYPE cint
;
1354 real_from_integer (&cint
, VOIDmode
, n
, SIGNED
);
1356 if (!real_identical (&whole_part
, &cint
))
1359 if (powi_cost (n
) + synth_info
.num_mults
> POWI_MAX_MULTS
)
1362 memset (cache
, 0, (max_depth
+ 1) * sizeof (tree
));
1364 tree integer_res
= n
== 0 ? build_real (type
, dconst1
) : arg0
;
1366 /* Calculate the integer part of the exponent. */
1369 integer_res
= gimple_expand_builtin_powi (gsi
, loc
, arg0
, n
);
1378 real_to_decimal (string
, &exp_init
, sizeof (string
), 0, 1);
1379 fprintf (dump_file
, "synthesizing pow (x, %s) as:\n", string
);
1385 fprintf (dump_file
, "1.0 / (");
1386 dump_integer_part (dump_file
, "x", n
);
1388 fprintf (dump_file
, " * ");
1389 dump_fractional_sqrt_sequence (dump_file
, "x", &synth_info
);
1390 fprintf (dump_file
, ")");
1394 dump_fractional_sqrt_sequence (dump_file
, "x", &synth_info
);
1395 fprintf (dump_file
, " / (");
1396 dump_integer_part (dump_file
, "x", n
);
1397 fprintf (dump_file
, ")");
1402 dump_fractional_sqrt_sequence (dump_file
, "x", &synth_info
);
1404 fprintf (dump_file
, " * ");
1405 dump_integer_part (dump_file
, "x", n
);
1408 fprintf (dump_file
, "\ndeepest sqrt chain: %d\n", synth_info
.deepest
);
1412 tree fract_res
= NULL_TREE
;
1415 /* Calculate the fractional part of the exponent. */
1416 for (unsigned i
= 0; i
< synth_info
.deepest
; i
++)
1418 if (synth_info
.factors
[i
])
1420 tree sqrt_chain
= get_fn_chain (arg0
, i
+ 1, gsi
, sqrtfn
, loc
, cache
);
1423 fract_res
= sqrt_chain
;
1426 fract_res
= build_and_insert_binop (gsi
, loc
, "powroot", MULT_EXPR
,
1427 fract_res
, sqrt_chain
);
1431 tree res
= NULL_TREE
;
1438 res
= build_and_insert_binop (gsi
, loc
, "powroot", MULT_EXPR
,
1439 fract_res
, integer_res
);
1443 res
= build_and_insert_binop (gsi
, loc
, "powrootrecip", RDIV_EXPR
,
1444 build_real (type
, dconst1
), res
);
1448 res
= build_and_insert_binop (gsi
, loc
, "powroot", RDIV_EXPR
,
1449 fract_res
, integer_res
);
1453 res
= build_and_insert_binop (gsi
, loc
, "powroot", MULT_EXPR
,
1454 fract_res
, integer_res
);
1458 /* ARG0 and ARG1 are the two arguments to a pow builtin call in GSI
1459 with location info LOC. If possible, create an equivalent and
1460 less expensive sequence of statements prior to GSI, and return an
1461 expession holding the result. */
1464 gimple_expand_builtin_pow (gimple_stmt_iterator
*gsi
, location_t loc
,
1465 tree arg0
, tree arg1
)
1467 REAL_VALUE_TYPE c
, cint
, dconst1_3
, dconst1_4
, dconst1_6
;
1468 REAL_VALUE_TYPE c2
, dconst3
;
1470 tree type
, sqrtfn
, cbrtfn
, sqrt_arg0
, result
, cbrt_x
, powi_cbrt_x
;
1472 bool speed_p
= optimize_bb_for_speed_p (gsi_bb (*gsi
));
1473 bool hw_sqrt_exists
, c_is_int
, c2_is_int
;
1475 dconst1_4
= dconst1
;
1476 SET_REAL_EXP (&dconst1_4
, REAL_EXP (&dconst1_4
) - 2);
1478 /* If the exponent isn't a constant, there's nothing of interest
1480 if (TREE_CODE (arg1
) != REAL_CST
)
1483 /* If the exponent is equivalent to an integer, expand to an optimal
1484 multiplication sequence when profitable. */
1485 c
= TREE_REAL_CST (arg1
);
1486 n
= real_to_integer (&c
);
1487 real_from_integer (&cint
, VOIDmode
, n
, SIGNED
);
1488 c_is_int
= real_identical (&c
, &cint
);
1491 && ((n
>= -1 && n
<= 2)
1492 || (flag_unsafe_math_optimizations
1494 && powi_cost (n
) <= POWI_MAX_MULTS
)))
1495 return gimple_expand_builtin_powi (gsi
, loc
, arg0
, n
);
1497 /* Attempt various optimizations using sqrt and cbrt. */
1498 type
= TREE_TYPE (arg0
);
1499 mode
= TYPE_MODE (type
);
1500 sqrtfn
= mathfn_built_in (type
, BUILT_IN_SQRT
);
1502 /* Optimize pow(x,0.5) = sqrt(x). This replacement is always safe
1503 unless signed zeros must be maintained. pow(-0,0.5) = +0, while
1506 && real_equal (&c
, &dconsthalf
)
1507 && !HONOR_SIGNED_ZEROS (mode
))
1508 return build_and_insert_call (gsi
, loc
, sqrtfn
, arg0
);
1510 hw_sqrt_exists
= optab_handler (sqrt_optab
, mode
) != CODE_FOR_nothing
;
1512 /* Optimize pow(x,1./3.) = cbrt(x). This requires unsafe math
1513 optimizations since 1./3. is not exactly representable. If x
1514 is negative and finite, the correct value of pow(x,1./3.) is
1515 a NaN with the "invalid" exception raised, because the value
1516 of 1./3. actually has an even denominator. The correct value
1517 of cbrt(x) is a negative real value. */
1518 cbrtfn
= mathfn_built_in (type
, BUILT_IN_CBRT
);
1519 dconst1_3
= real_value_truncate (mode
, dconst_third ());
1521 if (flag_unsafe_math_optimizations
1523 && (!HONOR_NANS (mode
) || tree_expr_nonnegative_p (arg0
))
1524 && real_equal (&c
, &dconst1_3
))
1525 return build_and_insert_call (gsi
, loc
, cbrtfn
, arg0
);
1527 /* Optimize pow(x,1./6.) = cbrt(sqrt(x)). Don't do this optimization
1528 if we don't have a hardware sqrt insn. */
1529 dconst1_6
= dconst1_3
;
1530 SET_REAL_EXP (&dconst1_6
, REAL_EXP (&dconst1_6
) - 1);
1532 if (flag_unsafe_math_optimizations
1535 && (!HONOR_NANS (mode
) || tree_expr_nonnegative_p (arg0
))
1538 && real_equal (&c
, &dconst1_6
))
1541 sqrt_arg0
= build_and_insert_call (gsi
, loc
, sqrtfn
, arg0
);
1544 return build_and_insert_call (gsi
, loc
, cbrtfn
, sqrt_arg0
);
1548 /* Attempt to expand the POW as a product of square root chains.
1549 Expand the 0.25 case even when otpimising for size. */
1550 if (flag_unsafe_math_optimizations
1553 && (speed_p
|| real_equal (&c
, &dconst1_4
))
1554 && !HONOR_SIGNED_ZEROS (mode
))
1556 unsigned int max_depth
= speed_p
1557 ? PARAM_VALUE (PARAM_MAX_POW_SQRT_DEPTH
)
1560 tree expand_with_sqrts
1561 = expand_pow_as_sqrts (gsi
, loc
, arg0
, arg1
, max_depth
);
1563 if (expand_with_sqrts
)
1564 return expand_with_sqrts
;
1567 real_arithmetic (&c2
, MULT_EXPR
, &c
, &dconst2
);
1568 n
= real_to_integer (&c2
);
1569 real_from_integer (&cint
, VOIDmode
, n
, SIGNED
);
1570 c2_is_int
= real_identical (&c2
, &cint
);
1572 /* Optimize pow(x,c), where 3c = n for some nonzero integer n, into
1574 powi(x, n/3) * powi(cbrt(x), n%3), n > 0;
1575 1.0 / (powi(x, abs(n)/3) * powi(cbrt(x), abs(n)%3)), n < 0.
1577 Do not calculate the first factor when n/3 = 0. As cbrt(x) is
1578 different from pow(x, 1./3.) due to rounding and behavior with
1579 negative x, we need to constrain this transformation to unsafe
1580 math and positive x or finite math. */
1581 real_from_integer (&dconst3
, VOIDmode
, 3, SIGNED
);
1582 real_arithmetic (&c2
, MULT_EXPR
, &c
, &dconst3
);
1583 real_round (&c2
, mode
, &c2
);
1584 n
= real_to_integer (&c2
);
1585 real_from_integer (&cint
, VOIDmode
, n
, SIGNED
);
1586 real_arithmetic (&c2
, RDIV_EXPR
, &cint
, &dconst3
);
1587 real_convert (&c2
, mode
, &c2
);
1589 if (flag_unsafe_math_optimizations
1591 && (!HONOR_NANS (mode
) || tree_expr_nonnegative_p (arg0
))
1592 && real_identical (&c2
, &c
)
1594 && optimize_function_for_speed_p (cfun
)
1595 && powi_cost (n
/ 3) <= POWI_MAX_MULTS
)
1597 tree powi_x_ndiv3
= NULL_TREE
;
1599 /* Attempt to fold powi(arg0, abs(n/3)) into multiplies. If not
1600 possible or profitable, give up. Skip the degenerate case when
1601 abs(n) < 3, where the result is always 1. */
1602 if (absu_hwi (n
) >= 3)
1604 powi_x_ndiv3
= gimple_expand_builtin_powi (gsi
, loc
, arg0
,
1610 /* Calculate powi(cbrt(x), n%3). Don't use gimple_expand_builtin_powi
1611 as that creates an unnecessary variable. Instead, just produce
1612 either cbrt(x) or cbrt(x) * cbrt(x). */
1613 cbrt_x
= build_and_insert_call (gsi
, loc
, cbrtfn
, arg0
);
1615 if (absu_hwi (n
) % 3 == 1)
1616 powi_cbrt_x
= cbrt_x
;
1618 powi_cbrt_x
= build_and_insert_binop (gsi
, loc
, "powroot", MULT_EXPR
,
1621 /* Multiply the two subexpressions, unless powi(x,abs(n)/3) = 1. */
1622 if (absu_hwi (n
) < 3)
1623 result
= powi_cbrt_x
;
1625 result
= build_and_insert_binop (gsi
, loc
, "powroot", MULT_EXPR
,
1626 powi_x_ndiv3
, powi_cbrt_x
);
1628 /* If n is negative, reciprocate the result. */
1630 result
= build_and_insert_binop (gsi
, loc
, "powroot", RDIV_EXPR
,
1631 build_real (type
, dconst1
), result
);
1636 /* No optimizations succeeded. */
1640 /* ARG is the argument to a cabs builtin call in GSI with location info
1641 LOC. Create a sequence of statements prior to GSI that calculates
1642 sqrt(R*R + I*I), where R and I are the real and imaginary components
1643 of ARG, respectively. Return an expression holding the result. */
1646 gimple_expand_builtin_cabs (gimple_stmt_iterator
*gsi
, location_t loc
, tree arg
)
1648 tree real_part
, imag_part
, addend1
, addend2
, sum
, result
;
1649 tree type
= TREE_TYPE (TREE_TYPE (arg
));
1650 tree sqrtfn
= mathfn_built_in (type
, BUILT_IN_SQRT
);
1651 machine_mode mode
= TYPE_MODE (type
);
1653 if (!flag_unsafe_math_optimizations
1654 || !optimize_bb_for_speed_p (gimple_bb (gsi_stmt (*gsi
)))
1656 || optab_handler (sqrt_optab
, mode
) == CODE_FOR_nothing
)
1659 real_part
= build_and_insert_ref (gsi
, loc
, type
, "cabs",
1660 REALPART_EXPR
, arg
);
1661 addend1
= build_and_insert_binop (gsi
, loc
, "cabs", MULT_EXPR
,
1662 real_part
, real_part
);
1663 imag_part
= build_and_insert_ref (gsi
, loc
, type
, "cabs",
1664 IMAGPART_EXPR
, arg
);
1665 addend2
= build_and_insert_binop (gsi
, loc
, "cabs", MULT_EXPR
,
1666 imag_part
, imag_part
);
1667 sum
= build_and_insert_binop (gsi
, loc
, "cabs", PLUS_EXPR
, addend1
, addend2
);
1668 result
= build_and_insert_call (gsi
, loc
, sqrtfn
, sum
);
1673 /* Go through all calls to sin, cos and cexpi and call execute_cse_sincos_1
1674 on the SSA_NAME argument of each of them. Also expand powi(x,n) into
1675 an optimal number of multiplies, when n is a constant. */
1679 const pass_data pass_data_cse_sincos
=
1681 GIMPLE_PASS
, /* type */
1682 "sincos", /* name */
1683 OPTGROUP_NONE
, /* optinfo_flags */
1684 TV_NONE
, /* tv_id */
1685 PROP_ssa
, /* properties_required */
1686 PROP_gimple_opt_math
, /* properties_provided */
1687 0, /* properties_destroyed */
1688 0, /* todo_flags_start */
1689 TODO_update_ssa
, /* todo_flags_finish */
1692 class pass_cse_sincos
: public gimple_opt_pass
1695 pass_cse_sincos (gcc::context
*ctxt
)
1696 : gimple_opt_pass (pass_data_cse_sincos
, ctxt
)
1699 /* opt_pass methods: */
1700 virtual bool gate (function
*)
1702 /* We no longer require either sincos or cexp, since powi expansion
1703 piggybacks on this pass. */
1707 virtual unsigned int execute (function
*);
1709 }; // class pass_cse_sincos
1712 pass_cse_sincos::execute (function
*fun
)
1715 bool cfg_changed
= false;
1717 calculate_dominance_info (CDI_DOMINATORS
);
1718 memset (&sincos_stats
, 0, sizeof (sincos_stats
));
1720 FOR_EACH_BB_FN (bb
, fun
)
1722 gimple_stmt_iterator gsi
;
1723 bool cleanup_eh
= false;
1725 for (gsi
= gsi_after_labels (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1727 gimple
*stmt
= gsi_stmt (gsi
);
1729 /* Only the last stmt in a bb could throw, no need to call
1730 gimple_purge_dead_eh_edges if we change something in the middle
1731 of a basic block. */
1734 if (is_gimple_call (stmt
)
1735 && gimple_call_lhs (stmt
))
1737 tree arg
, arg0
, arg1
, result
;
1741 switch (gimple_call_combined_fn (stmt
))
1746 /* Make sure we have either sincos or cexp. */
1747 if (!targetm
.libc_has_function (function_c99_math_complex
)
1748 && !targetm
.libc_has_function (function_sincos
))
1751 arg
= gimple_call_arg (stmt
, 0);
1752 if (TREE_CODE (arg
) == SSA_NAME
)
1753 cfg_changed
|= execute_cse_sincos_1 (arg
);
1757 arg0
= gimple_call_arg (stmt
, 0);
1758 arg1
= gimple_call_arg (stmt
, 1);
1760 loc
= gimple_location (stmt
);
1761 result
= gimple_expand_builtin_pow (&gsi
, loc
, arg0
, arg1
);
1765 tree lhs
= gimple_get_lhs (stmt
);
1766 gassign
*new_stmt
= gimple_build_assign (lhs
, result
);
1767 gimple_set_location (new_stmt
, loc
);
1768 unlink_stmt_vdef (stmt
);
1769 gsi_replace (&gsi
, new_stmt
, true);
1771 if (gimple_vdef (stmt
))
1772 release_ssa_name (gimple_vdef (stmt
));
1777 arg0
= gimple_call_arg (stmt
, 0);
1778 arg1
= gimple_call_arg (stmt
, 1);
1779 loc
= gimple_location (stmt
);
1781 if (real_minus_onep (arg0
))
1783 tree t0
, t1
, cond
, one
, minus_one
;
1786 t0
= TREE_TYPE (arg0
);
1787 t1
= TREE_TYPE (arg1
);
1788 one
= build_real (t0
, dconst1
);
1789 minus_one
= build_real (t0
, dconstm1
);
1791 cond
= make_temp_ssa_name (t1
, NULL
, "powi_cond");
1792 stmt
= gimple_build_assign (cond
, BIT_AND_EXPR
,
1793 arg1
, build_int_cst (t1
, 1));
1794 gimple_set_location (stmt
, loc
);
1795 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
1797 result
= make_temp_ssa_name (t0
, NULL
, "powi");
1798 stmt
= gimple_build_assign (result
, COND_EXPR
, cond
,
1800 gimple_set_location (stmt
, loc
);
1801 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
1805 if (!tree_fits_shwi_p (arg1
))
1808 n
= tree_to_shwi (arg1
);
1809 result
= gimple_expand_builtin_powi (&gsi
, loc
, arg0
, n
);
1814 tree lhs
= gimple_get_lhs (stmt
);
1815 gassign
*new_stmt
= gimple_build_assign (lhs
, result
);
1816 gimple_set_location (new_stmt
, loc
);
1817 unlink_stmt_vdef (stmt
);
1818 gsi_replace (&gsi
, new_stmt
, true);
1820 if (gimple_vdef (stmt
))
1821 release_ssa_name (gimple_vdef (stmt
));
1826 arg0
= gimple_call_arg (stmt
, 0);
1827 loc
= gimple_location (stmt
);
1828 result
= gimple_expand_builtin_cabs (&gsi
, loc
, arg0
);
1832 tree lhs
= gimple_get_lhs (stmt
);
1833 gassign
*new_stmt
= gimple_build_assign (lhs
, result
);
1834 gimple_set_location (new_stmt
, loc
);
1835 unlink_stmt_vdef (stmt
);
1836 gsi_replace (&gsi
, new_stmt
, true);
1838 if (gimple_vdef (stmt
))
1839 release_ssa_name (gimple_vdef (stmt
));
1848 cfg_changed
|= gimple_purge_dead_eh_edges (bb
);
1851 statistics_counter_event (fun
, "sincos statements inserted",
1852 sincos_stats
.inserted
);
1854 return cfg_changed
? TODO_cleanup_cfg
: 0;
1860 make_pass_cse_sincos (gcc::context
*ctxt
)
1862 return new pass_cse_sincos (ctxt
);
1865 /* A symbolic number is used to detect byte permutation and selection
1866 patterns. Therefore the field N contains an artificial number
1867 consisting of octet sized markers:
1869 0 - target byte has the value 0
1870 FF - target byte has an unknown value (eg. due to sign extension)
1871 1..size - marker value is the target byte index minus one.
1873 To detect permutations on memory sources (arrays and structures), a symbolic
1874 number is also associated a base address (the array or structure the load is
1875 made from), an offset from the base address and a range which gives the
1876 difference between the highest and lowest accessed memory location to make
1877 such a symbolic number. The range is thus different from size which reflects
1878 the size of the type of current expression. Note that for non memory source,
1879 range holds the same value as size.
1881 For instance, for an array char a[], (short) a[0] | (short) a[3] would have
1882 a size of 2 but a range of 4 while (short) a[0] | ((short) a[0] << 1) would
1883 still have a size of 2 but this time a range of 1. */
1885 struct symbolic_number
{
1890 HOST_WIDE_INT bytepos
;
1893 unsigned HOST_WIDE_INT range
;
1896 #define BITS_PER_MARKER 8
1897 #define MARKER_MASK ((1 << BITS_PER_MARKER) - 1)
1898 #define MARKER_BYTE_UNKNOWN MARKER_MASK
1899 #define HEAD_MARKER(n, size) \
1900 ((n) & ((uint64_t) MARKER_MASK << (((size) - 1) * BITS_PER_MARKER)))
1902 /* The number which the find_bswap_or_nop_1 result should match in
1903 order to have a nop. The number is masked according to the size of
1904 the symbolic number before using it. */
1905 #define CMPNOP (sizeof (int64_t) < 8 ? 0 : \
1906 (uint64_t)0x08070605 << 32 | 0x04030201)
1908 /* The number which the find_bswap_or_nop_1 result should match in
1909 order to have a byte swap. The number is masked according to the
1910 size of the symbolic number before using it. */
1911 #define CMPXCHG (sizeof (int64_t) < 8 ? 0 : \
1912 (uint64_t)0x01020304 << 32 | 0x05060708)
1914 /* Perform a SHIFT or ROTATE operation by COUNT bits on symbolic
1915 number N. Return false if the requested operation is not permitted
1916 on a symbolic number. */
1919 do_shift_rotate (enum tree_code code
,
1920 struct symbolic_number
*n
,
1923 int i
, size
= TYPE_PRECISION (n
->type
) / BITS_PER_UNIT
;
1924 unsigned head_marker
;
1926 if (count
% BITS_PER_UNIT
!= 0)
1928 count
= (count
/ BITS_PER_UNIT
) * BITS_PER_MARKER
;
1930 /* Zero out the extra bits of N in order to avoid them being shifted
1931 into the significant bits. */
1932 if (size
< 64 / BITS_PER_MARKER
)
1933 n
->n
&= ((uint64_t) 1 << (size
* BITS_PER_MARKER
)) - 1;
1941 head_marker
= HEAD_MARKER (n
->n
, size
);
1943 /* Arithmetic shift of signed type: result is dependent on the value. */
1944 if (!TYPE_UNSIGNED (n
->type
) && head_marker
)
1945 for (i
= 0; i
< count
/ BITS_PER_MARKER
; i
++)
1946 n
->n
|= (uint64_t) MARKER_BYTE_UNKNOWN
1947 << ((size
- 1 - i
) * BITS_PER_MARKER
);
1950 n
->n
= (n
->n
<< count
) | (n
->n
>> ((size
* BITS_PER_MARKER
) - count
));
1953 n
->n
= (n
->n
>> count
) | (n
->n
<< ((size
* BITS_PER_MARKER
) - count
));
1958 /* Zero unused bits for size. */
1959 if (size
< 64 / BITS_PER_MARKER
)
1960 n
->n
&= ((uint64_t) 1 << (size
* BITS_PER_MARKER
)) - 1;
1964 /* Perform sanity checking for the symbolic number N and the gimple
1968 verify_symbolic_number_p (struct symbolic_number
*n
, gimple
*stmt
)
1972 lhs_type
= gimple_expr_type (stmt
);
1974 if (TREE_CODE (lhs_type
) != INTEGER_TYPE
)
1977 if (TYPE_PRECISION (lhs_type
) != TYPE_PRECISION (n
->type
))
1983 /* Initialize the symbolic number N for the bswap pass from the base element
1984 SRC manipulated by the bitwise OR expression. */
1987 init_symbolic_number (struct symbolic_number
*n
, tree src
)
1991 n
->base_addr
= n
->offset
= n
->alias_set
= n
->vuse
= NULL_TREE
;
1993 /* Set up the symbolic number N by setting each byte to a value between 1 and
1994 the byte size of rhs1. The highest order byte is set to n->size and the
1995 lowest order byte to 1. */
1996 n
->type
= TREE_TYPE (src
);
1997 size
= TYPE_PRECISION (n
->type
);
1998 if (size
% BITS_PER_UNIT
!= 0)
2000 size
/= BITS_PER_UNIT
;
2001 if (size
> 64 / BITS_PER_MARKER
)
2006 if (size
< 64 / BITS_PER_MARKER
)
2007 n
->n
&= ((uint64_t) 1 << (size
* BITS_PER_MARKER
)) - 1;
2012 /* Check if STMT might be a byte swap or a nop from a memory source and returns
2013 the answer. If so, REF is that memory source and the base of the memory area
2014 accessed and the offset of the access from that base are recorded in N. */
2017 find_bswap_or_nop_load (gimple
*stmt
, tree ref
, struct symbolic_number
*n
)
2019 /* Leaf node is an array or component ref. Memorize its base and
2020 offset from base to compare to other such leaf node. */
2021 HOST_WIDE_INT bitsize
, bitpos
;
2023 int unsignedp
, reversep
, volatilep
;
2024 tree offset
, base_addr
;
2026 /* Not prepared to handle PDP endian. */
2027 if (BYTES_BIG_ENDIAN
!= WORDS_BIG_ENDIAN
)
2030 if (!gimple_assign_load_p (stmt
) || gimple_has_volatile_ops (stmt
))
2033 base_addr
= get_inner_reference (ref
, &bitsize
, &bitpos
, &offset
, &mode
,
2034 &unsignedp
, &reversep
, &volatilep
, false);
2036 if (TREE_CODE (base_addr
) == MEM_REF
)
2038 offset_int bit_offset
= 0;
2039 tree off
= TREE_OPERAND (base_addr
, 1);
2041 if (!integer_zerop (off
))
2043 offset_int boff
, coff
= mem_ref_offset (base_addr
);
2044 boff
= wi::lshift (coff
, LOG2_BITS_PER_UNIT
);
2048 base_addr
= TREE_OPERAND (base_addr
, 0);
2050 /* Avoid returning a negative bitpos as this may wreak havoc later. */
2051 if (wi::neg_p (bit_offset
))
2053 offset_int mask
= wi::mask
<offset_int
> (LOG2_BITS_PER_UNIT
, false);
2054 offset_int tem
= bit_offset
.and_not (mask
);
2055 /* TEM is the bitpos rounded to BITS_PER_UNIT towards -Inf.
2056 Subtract it to BIT_OFFSET and add it (scaled) to OFFSET. */
2058 tem
= wi::arshift (tem
, LOG2_BITS_PER_UNIT
);
2060 offset
= size_binop (PLUS_EXPR
, offset
,
2061 wide_int_to_tree (sizetype
, tem
));
2063 offset
= wide_int_to_tree (sizetype
, tem
);
2066 bitpos
+= bit_offset
.to_shwi ();
2069 if (bitpos
% BITS_PER_UNIT
)
2071 if (bitsize
% BITS_PER_UNIT
)
2076 if (!init_symbolic_number (n
, ref
))
2078 n
->base_addr
= base_addr
;
2080 n
->bytepos
= bitpos
/ BITS_PER_UNIT
;
2081 n
->alias_set
= reference_alias_ptr_type (ref
);
2082 n
->vuse
= gimple_vuse (stmt
);
2086 /* Compute the symbolic number N representing the result of a bitwise OR on 2
2087 symbolic number N1 and N2 whose source statements are respectively
2088 SOURCE_STMT1 and SOURCE_STMT2. */
2091 perform_symbolic_merge (gimple
*source_stmt1
, struct symbolic_number
*n1
,
2092 gimple
*source_stmt2
, struct symbolic_number
*n2
,
2093 struct symbolic_number
*n
)
2097 gimple
*source_stmt
;
2098 struct symbolic_number
*n_start
;
2100 /* Sources are different, cancel bswap if they are not memory location with
2101 the same base (array, structure, ...). */
2102 if (gimple_assign_rhs1 (source_stmt1
) != gimple_assign_rhs1 (source_stmt2
))
2105 HOST_WIDE_INT start_sub
, end_sub
, end1
, end2
, end
;
2106 struct symbolic_number
*toinc_n_ptr
, *n_end
;
2108 if (!n1
->base_addr
|| !n2
->base_addr
2109 || !operand_equal_p (n1
->base_addr
, n2
->base_addr
, 0))
2112 if (!n1
->offset
!= !n2
->offset
2113 || (n1
->offset
&& !operand_equal_p (n1
->offset
, n2
->offset
, 0)))
2116 if (n1
->bytepos
< n2
->bytepos
)
2119 start_sub
= n2
->bytepos
- n1
->bytepos
;
2120 source_stmt
= source_stmt1
;
2125 start_sub
= n1
->bytepos
- n2
->bytepos
;
2126 source_stmt
= source_stmt2
;
2129 /* Find the highest address at which a load is performed and
2130 compute related info. */
2131 end1
= n1
->bytepos
+ (n1
->range
- 1);
2132 end2
= n2
->bytepos
+ (n2
->range
- 1);
2136 end_sub
= end2
- end1
;
2141 end_sub
= end1
- end2
;
2143 n_end
= (end2
> end1
) ? n2
: n1
;
2145 /* Find symbolic number whose lsb is the most significant. */
2146 if (BYTES_BIG_ENDIAN
)
2147 toinc_n_ptr
= (n_end
== n1
) ? n2
: n1
;
2149 toinc_n_ptr
= (n_start
== n1
) ? n2
: n1
;
2151 n
->range
= end
- n_start
->bytepos
+ 1;
2153 /* Check that the range of memory covered can be represented by
2154 a symbolic number. */
2155 if (n
->range
> 64 / BITS_PER_MARKER
)
2158 /* Reinterpret byte marks in symbolic number holding the value of
2159 bigger weight according to target endianness. */
2160 inc
= BYTES_BIG_ENDIAN
? end_sub
: start_sub
;
2161 size
= TYPE_PRECISION (n1
->type
) / BITS_PER_UNIT
;
2162 for (i
= 0; i
< size
; i
++, inc
<<= BITS_PER_MARKER
)
2165 = (toinc_n_ptr
->n
>> (i
* BITS_PER_MARKER
)) & MARKER_MASK
;
2166 if (marker
&& marker
!= MARKER_BYTE_UNKNOWN
)
2167 toinc_n_ptr
->n
+= inc
;
2172 n
->range
= n1
->range
;
2174 source_stmt
= source_stmt1
;
2178 || alias_ptr_types_compatible_p (n1
->alias_set
, n2
->alias_set
))
2179 n
->alias_set
= n1
->alias_set
;
2181 n
->alias_set
= ptr_type_node
;
2182 n
->vuse
= n_start
->vuse
;
2183 n
->base_addr
= n_start
->base_addr
;
2184 n
->offset
= n_start
->offset
;
2185 n
->bytepos
= n_start
->bytepos
;
2186 n
->type
= n_start
->type
;
2187 size
= TYPE_PRECISION (n
->type
) / BITS_PER_UNIT
;
2189 for (i
= 0, mask
= MARKER_MASK
; i
< size
; i
++, mask
<<= BITS_PER_MARKER
)
2191 uint64_t masked1
, masked2
;
2193 masked1
= n1
->n
& mask
;
2194 masked2
= n2
->n
& mask
;
2195 if (masked1
&& masked2
&& masked1
!= masked2
)
2198 n
->n
= n1
->n
| n2
->n
;
2203 /* find_bswap_or_nop_1 invokes itself recursively with N and tries to perform
2204 the operation given by the rhs of STMT on the result. If the operation
2205 could successfully be executed the function returns a gimple stmt whose
2206 rhs's first tree is the expression of the source operand and NULL
2210 find_bswap_or_nop_1 (gimple
*stmt
, struct symbolic_number
*n
, int limit
)
2212 enum tree_code code
;
2213 tree rhs1
, rhs2
= NULL
;
2214 gimple
*rhs1_stmt
, *rhs2_stmt
, *source_stmt1
;
2215 enum gimple_rhs_class rhs_class
;
2217 if (!limit
|| !is_gimple_assign (stmt
))
2220 rhs1
= gimple_assign_rhs1 (stmt
);
2222 if (find_bswap_or_nop_load (stmt
, rhs1
, n
))
2225 if (TREE_CODE (rhs1
) != SSA_NAME
)
2228 code
= gimple_assign_rhs_code (stmt
);
2229 rhs_class
= gimple_assign_rhs_class (stmt
);
2230 rhs1_stmt
= SSA_NAME_DEF_STMT (rhs1
);
2232 if (rhs_class
== GIMPLE_BINARY_RHS
)
2233 rhs2
= gimple_assign_rhs2 (stmt
);
2235 /* Handle unary rhs and binary rhs with integer constants as second
2238 if (rhs_class
== GIMPLE_UNARY_RHS
2239 || (rhs_class
== GIMPLE_BINARY_RHS
2240 && TREE_CODE (rhs2
) == INTEGER_CST
))
2242 if (code
!= BIT_AND_EXPR
2243 && code
!= LSHIFT_EXPR
2244 && code
!= RSHIFT_EXPR
2245 && code
!= LROTATE_EXPR
2246 && code
!= RROTATE_EXPR
2247 && !CONVERT_EXPR_CODE_P (code
))
2250 source_stmt1
= find_bswap_or_nop_1 (rhs1_stmt
, n
, limit
- 1);
2252 /* If find_bswap_or_nop_1 returned NULL, STMT is a leaf node and
2253 we have to initialize the symbolic number. */
2256 if (gimple_assign_load_p (stmt
)
2257 || !init_symbolic_number (n
, rhs1
))
2259 source_stmt1
= stmt
;
2266 int i
, size
= TYPE_PRECISION (n
->type
) / BITS_PER_UNIT
;
2267 uint64_t val
= int_cst_value (rhs2
), mask
= 0;
2268 uint64_t tmp
= (1 << BITS_PER_UNIT
) - 1;
2270 /* Only constants masking full bytes are allowed. */
2271 for (i
= 0; i
< size
; i
++, tmp
<<= BITS_PER_UNIT
)
2272 if ((val
& tmp
) != 0 && (val
& tmp
) != tmp
)
2275 mask
|= (uint64_t) MARKER_MASK
<< (i
* BITS_PER_MARKER
);
2284 if (!do_shift_rotate (code
, n
, (int) TREE_INT_CST_LOW (rhs2
)))
2289 int i
, type_size
, old_type_size
;
2292 type
= gimple_expr_type (stmt
);
2293 type_size
= TYPE_PRECISION (type
);
2294 if (type_size
% BITS_PER_UNIT
!= 0)
2296 type_size
/= BITS_PER_UNIT
;
2297 if (type_size
> 64 / BITS_PER_MARKER
)
2300 /* Sign extension: result is dependent on the value. */
2301 old_type_size
= TYPE_PRECISION (n
->type
) / BITS_PER_UNIT
;
2302 if (!TYPE_UNSIGNED (n
->type
) && type_size
> old_type_size
2303 && HEAD_MARKER (n
->n
, old_type_size
))
2304 for (i
= 0; i
< type_size
- old_type_size
; i
++)
2305 n
->n
|= (uint64_t) MARKER_BYTE_UNKNOWN
2306 << ((type_size
- 1 - i
) * BITS_PER_MARKER
);
2308 if (type_size
< 64 / BITS_PER_MARKER
)
2310 /* If STMT casts to a smaller type mask out the bits not
2311 belonging to the target type. */
2312 n
->n
&= ((uint64_t) 1 << (type_size
* BITS_PER_MARKER
)) - 1;
2316 n
->range
= type_size
;
2322 return verify_symbolic_number_p (n
, stmt
) ? source_stmt1
: NULL
;
2325 /* Handle binary rhs. */
2327 if (rhs_class
== GIMPLE_BINARY_RHS
)
2329 struct symbolic_number n1
, n2
;
2330 gimple
*source_stmt
, *source_stmt2
;
2332 if (code
!= BIT_IOR_EXPR
)
2335 if (TREE_CODE (rhs2
) != SSA_NAME
)
2338 rhs2_stmt
= SSA_NAME_DEF_STMT (rhs2
);
2343 source_stmt1
= find_bswap_or_nop_1 (rhs1_stmt
, &n1
, limit
- 1);
2348 source_stmt2
= find_bswap_or_nop_1 (rhs2_stmt
, &n2
, limit
- 1);
2353 if (TYPE_PRECISION (n1
.type
) != TYPE_PRECISION (n2
.type
))
2356 if (!n1
.vuse
!= !n2
.vuse
2357 || (n1
.vuse
&& !operand_equal_p (n1
.vuse
, n2
.vuse
, 0)))
2361 = perform_symbolic_merge (source_stmt1
, &n1
, source_stmt2
, &n2
, n
);
2366 if (!verify_symbolic_number_p (n
, stmt
))
2378 /* Check if STMT completes a bswap implementation or a read in a given
2379 endianness consisting of ORs, SHIFTs and ANDs and sets *BSWAP
2380 accordingly. It also sets N to represent the kind of operations
2381 performed: size of the resulting expression and whether it works on
2382 a memory source, and if so alias-set and vuse. At last, the
2383 function returns a stmt whose rhs's first tree is the source
2387 find_bswap_or_nop (gimple
*stmt
, struct symbolic_number
*n
, bool *bswap
)
2389 /* The number which the find_bswap_or_nop_1 result should match in order
2390 to have a full byte swap. The number is shifted to the right
2391 according to the size of the symbolic number before using it. */
2392 uint64_t cmpxchg
= CMPXCHG
;
2393 uint64_t cmpnop
= CMPNOP
;
2395 gimple
*source_stmt
;
2398 /* The last parameter determines the depth search limit. It usually
2399 correlates directly to the number n of bytes to be touched. We
2400 increase that number by log2(n) + 1 here in order to also
2401 cover signed -> unsigned conversions of the src operand as can be seen
2402 in libgcc, and for initial shift/and operation of the src operand. */
2403 limit
= TREE_INT_CST_LOW (TYPE_SIZE_UNIT (gimple_expr_type (stmt
)));
2404 limit
+= 1 + (int) ceil_log2 ((unsigned HOST_WIDE_INT
) limit
);
2405 source_stmt
= find_bswap_or_nop_1 (stmt
, n
, limit
);
2410 /* Find real size of result (highest non-zero byte). */
2416 for (tmpn
= n
->n
, rsize
= 0; tmpn
; tmpn
>>= BITS_PER_MARKER
, rsize
++);
2420 /* Zero out the extra bits of N and CMP*. */
2421 if (n
->range
< (int) sizeof (int64_t))
2425 mask
= ((uint64_t) 1 << (n
->range
* BITS_PER_MARKER
)) - 1;
2426 cmpxchg
>>= (64 / BITS_PER_MARKER
- n
->range
) * BITS_PER_MARKER
;
2430 /* A complete byte swap should make the symbolic number to start with
2431 the largest digit in the highest order byte. Unchanged symbolic
2432 number indicates a read with same endianness as target architecture. */
2435 else if (n
->n
== cmpxchg
)
2440 /* Useless bit manipulation performed by code. */
2441 if (!n
->base_addr
&& n
->n
== cmpnop
)
2444 n
->range
*= BITS_PER_UNIT
;
2450 const pass_data pass_data_optimize_bswap
=
2452 GIMPLE_PASS
, /* type */
2454 OPTGROUP_NONE
, /* optinfo_flags */
2455 TV_NONE
, /* tv_id */
2456 PROP_ssa
, /* properties_required */
2457 0, /* properties_provided */
2458 0, /* properties_destroyed */
2459 0, /* todo_flags_start */
2460 0, /* todo_flags_finish */
2463 class pass_optimize_bswap
: public gimple_opt_pass
2466 pass_optimize_bswap (gcc::context
*ctxt
)
2467 : gimple_opt_pass (pass_data_optimize_bswap
, ctxt
)
2470 /* opt_pass methods: */
2471 virtual bool gate (function
*)
2473 return flag_expensive_optimizations
&& optimize
;
2476 virtual unsigned int execute (function
*);
2478 }; // class pass_optimize_bswap
2480 /* Perform the bswap optimization: replace the expression computed in the rhs
2481 of CUR_STMT by an equivalent bswap, load or load + bswap expression.
2482 Which of these alternatives replace the rhs is given by N->base_addr (non
2483 null if a load is needed) and BSWAP. The type, VUSE and set-alias of the
2484 load to perform are also given in N while the builtin bswap invoke is given
2485 in FNDEL. Finally, if a load is involved, SRC_STMT refers to one of the
2486 load statements involved to construct the rhs in CUR_STMT and N->range gives
2487 the size of the rhs expression for maintaining some statistics.
2489 Note that if the replacement involve a load, CUR_STMT is moved just after
2490 SRC_STMT to do the load with the same VUSE which can lead to CUR_STMT
2491 changing of basic block. */
2494 bswap_replace (gimple
*cur_stmt
, gimple
*src_stmt
, tree fndecl
,
2495 tree bswap_type
, tree load_type
, struct symbolic_number
*n
,
2498 gimple_stmt_iterator gsi
;
2502 gsi
= gsi_for_stmt (cur_stmt
);
2503 src
= gimple_assign_rhs1 (src_stmt
);
2504 tgt
= gimple_assign_lhs (cur_stmt
);
2506 /* Need to load the value from memory first. */
2509 gimple_stmt_iterator gsi_ins
= gsi_for_stmt (src_stmt
);
2510 tree addr_expr
, addr_tmp
, val_expr
, val_tmp
;
2511 tree load_offset_ptr
, aligned_load_type
;
2512 gimple
*addr_stmt
, *load_stmt
;
2514 HOST_WIDE_INT load_offset
= 0;
2516 align
= get_object_alignment (src
);
2517 /* If the new access is smaller than the original one, we need
2518 to perform big endian adjustment. */
2519 if (BYTES_BIG_ENDIAN
)
2521 HOST_WIDE_INT bitsize
, bitpos
;
2523 int unsignedp
, reversep
, volatilep
;
2526 get_inner_reference (src
, &bitsize
, &bitpos
, &offset
, &mode
,
2527 &unsignedp
, &reversep
, &volatilep
, false);
2528 if (n
->range
< (unsigned HOST_WIDE_INT
) bitsize
)
2530 load_offset
= (bitsize
- n
->range
) / BITS_PER_UNIT
;
2531 unsigned HOST_WIDE_INT l
2532 = (load_offset
* BITS_PER_UNIT
) & (align
- 1);
2539 && align
< GET_MODE_ALIGNMENT (TYPE_MODE (load_type
))
2540 && SLOW_UNALIGNED_ACCESS (TYPE_MODE (load_type
), align
))
2543 /* Move cur_stmt just before one of the load of the original
2544 to ensure it has the same VUSE. See PR61517 for what could
2546 gsi_move_before (&gsi
, &gsi_ins
);
2547 gsi
= gsi_for_stmt (cur_stmt
);
2549 /* Compute address to load from and cast according to the size
2551 addr_expr
= build_fold_addr_expr (unshare_expr (src
));
2552 if (is_gimple_mem_ref_addr (addr_expr
))
2553 addr_tmp
= addr_expr
;
2556 addr_tmp
= make_temp_ssa_name (TREE_TYPE (addr_expr
), NULL
,
2558 addr_stmt
= gimple_build_assign (addr_tmp
, addr_expr
);
2559 gsi_insert_before (&gsi
, addr_stmt
, GSI_SAME_STMT
);
2562 /* Perform the load. */
2563 aligned_load_type
= load_type
;
2564 if (align
< TYPE_ALIGN (load_type
))
2565 aligned_load_type
= build_aligned_type (load_type
, align
);
2566 load_offset_ptr
= build_int_cst (n
->alias_set
, load_offset
);
2567 val_expr
= fold_build2 (MEM_REF
, aligned_load_type
, addr_tmp
,
2573 nop_stats
.found_16bit
++;
2574 else if (n
->range
== 32)
2575 nop_stats
.found_32bit
++;
2578 gcc_assert (n
->range
== 64);
2579 nop_stats
.found_64bit
++;
2582 /* Convert the result of load if necessary. */
2583 if (!useless_type_conversion_p (TREE_TYPE (tgt
), load_type
))
2585 val_tmp
= make_temp_ssa_name (aligned_load_type
, NULL
,
2587 load_stmt
= gimple_build_assign (val_tmp
, val_expr
);
2588 gimple_set_vuse (load_stmt
, n
->vuse
);
2589 gsi_insert_before (&gsi
, load_stmt
, GSI_SAME_STMT
);
2590 gimple_assign_set_rhs_with_ops (&gsi
, NOP_EXPR
, val_tmp
);
2594 gimple_assign_set_rhs_with_ops (&gsi
, MEM_REF
, val_expr
);
2595 gimple_set_vuse (cur_stmt
, n
->vuse
);
2597 update_stmt (cur_stmt
);
2602 "%d bit load in target endianness found at: ",
2604 print_gimple_stmt (dump_file
, cur_stmt
, 0, 0);
2610 val_tmp
= make_temp_ssa_name (aligned_load_type
, NULL
, "load_dst");
2611 load_stmt
= gimple_build_assign (val_tmp
, val_expr
);
2612 gimple_set_vuse (load_stmt
, n
->vuse
);
2613 gsi_insert_before (&gsi
, load_stmt
, GSI_SAME_STMT
);
2619 bswap_stats
.found_16bit
++;
2620 else if (n
->range
== 32)
2621 bswap_stats
.found_32bit
++;
2624 gcc_assert (n
->range
== 64);
2625 bswap_stats
.found_64bit
++;
2630 /* Convert the src expression if necessary. */
2631 if (!useless_type_conversion_p (TREE_TYPE (tmp
), bswap_type
))
2633 gimple
*convert_stmt
;
2635 tmp
= make_temp_ssa_name (bswap_type
, NULL
, "bswapsrc");
2636 convert_stmt
= gimple_build_assign (tmp
, NOP_EXPR
, src
);
2637 gsi_insert_before (&gsi
, convert_stmt
, GSI_SAME_STMT
);
2640 /* Canonical form for 16 bit bswap is a rotate expression. Only 16bit values
2641 are considered as rotation of 2N bit values by N bits is generally not
2642 equivalent to a bswap. Consider for instance 0x01020304 r>> 16 which
2643 gives 0x03040102 while a bswap for that value is 0x04030201. */
2644 if (bswap
&& n
->range
== 16)
2646 tree count
= build_int_cst (NULL
, BITS_PER_UNIT
);
2647 src
= fold_build2 (LROTATE_EXPR
, bswap_type
, tmp
, count
);
2648 bswap_stmt
= gimple_build_assign (NULL
, src
);
2651 bswap_stmt
= gimple_build_call (fndecl
, 1, tmp
);
2655 /* Convert the result if necessary. */
2656 if (!useless_type_conversion_p (TREE_TYPE (tgt
), bswap_type
))
2658 gimple
*convert_stmt
;
2660 tmp
= make_temp_ssa_name (bswap_type
, NULL
, "bswapdst");
2661 convert_stmt
= gimple_build_assign (tgt
, NOP_EXPR
, tmp
);
2662 gsi_insert_after (&gsi
, convert_stmt
, GSI_SAME_STMT
);
2665 gimple_set_lhs (bswap_stmt
, tmp
);
2669 fprintf (dump_file
, "%d bit bswap implementation found at: ",
2671 print_gimple_stmt (dump_file
, cur_stmt
, 0, 0);
2674 gsi_insert_after (&gsi
, bswap_stmt
, GSI_SAME_STMT
);
2675 gsi_remove (&gsi
, true);
2679 /* Find manual byte swap implementations as well as load in a given
2680 endianness. Byte swaps are turned into a bswap builtin invokation
2681 while endian loads are converted to bswap builtin invokation or
2682 simple load according to the target endianness. */
2685 pass_optimize_bswap::execute (function
*fun
)
2688 bool bswap32_p
, bswap64_p
;
2689 bool changed
= false;
2690 tree bswap32_type
= NULL_TREE
, bswap64_type
= NULL_TREE
;
2692 if (BITS_PER_UNIT
!= 8)
2695 bswap32_p
= (builtin_decl_explicit_p (BUILT_IN_BSWAP32
)
2696 && optab_handler (bswap_optab
, SImode
) != CODE_FOR_nothing
);
2697 bswap64_p
= (builtin_decl_explicit_p (BUILT_IN_BSWAP64
)
2698 && (optab_handler (bswap_optab
, DImode
) != CODE_FOR_nothing
2699 || (bswap32_p
&& word_mode
== SImode
)));
2701 /* Determine the argument type of the builtins. The code later on
2702 assumes that the return and argument type are the same. */
2705 tree fndecl
= builtin_decl_explicit (BUILT_IN_BSWAP32
);
2706 bswap32_type
= TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl
)));
2711 tree fndecl
= builtin_decl_explicit (BUILT_IN_BSWAP64
);
2712 bswap64_type
= TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl
)));
2715 memset (&nop_stats
, 0, sizeof (nop_stats
));
2716 memset (&bswap_stats
, 0, sizeof (bswap_stats
));
2718 FOR_EACH_BB_FN (bb
, fun
)
2720 gimple_stmt_iterator gsi
;
2722 /* We do a reverse scan for bswap patterns to make sure we get the
2723 widest match. As bswap pattern matching doesn't handle previously
2724 inserted smaller bswap replacements as sub-patterns, the wider
2725 variant wouldn't be detected. */
2726 for (gsi
= gsi_last_bb (bb
); !gsi_end_p (gsi
);)
2728 gimple
*src_stmt
, *cur_stmt
= gsi_stmt (gsi
);
2729 tree fndecl
= NULL_TREE
, bswap_type
= NULL_TREE
, load_type
;
2730 enum tree_code code
;
2731 struct symbolic_number n
;
2734 /* This gsi_prev (&gsi) is not part of the for loop because cur_stmt
2735 might be moved to a different basic block by bswap_replace and gsi
2736 must not points to it if that's the case. Moving the gsi_prev
2737 there make sure that gsi points to the statement previous to
2738 cur_stmt while still making sure that all statements are
2739 considered in this basic block. */
2742 if (!is_gimple_assign (cur_stmt
))
2745 code
= gimple_assign_rhs_code (cur_stmt
);
2750 if (!tree_fits_uhwi_p (gimple_assign_rhs2 (cur_stmt
))
2751 || tree_to_uhwi (gimple_assign_rhs2 (cur_stmt
))
2761 src_stmt
= find_bswap_or_nop (cur_stmt
, &n
, &bswap
);
2769 /* Already in canonical form, nothing to do. */
2770 if (code
== LROTATE_EXPR
|| code
== RROTATE_EXPR
)
2772 load_type
= bswap_type
= uint16_type_node
;
2775 load_type
= uint32_type_node
;
2778 fndecl
= builtin_decl_explicit (BUILT_IN_BSWAP32
);
2779 bswap_type
= bswap32_type
;
2783 load_type
= uint64_type_node
;
2786 fndecl
= builtin_decl_explicit (BUILT_IN_BSWAP64
);
2787 bswap_type
= bswap64_type
;
2794 if (bswap
&& !fndecl
&& n
.range
!= 16)
2797 if (bswap_replace (cur_stmt
, src_stmt
, fndecl
, bswap_type
, load_type
,
2803 statistics_counter_event (fun
, "16-bit nop implementations found",
2804 nop_stats
.found_16bit
);
2805 statistics_counter_event (fun
, "32-bit nop implementations found",
2806 nop_stats
.found_32bit
);
2807 statistics_counter_event (fun
, "64-bit nop implementations found",
2808 nop_stats
.found_64bit
);
2809 statistics_counter_event (fun
, "16-bit bswap implementations found",
2810 bswap_stats
.found_16bit
);
2811 statistics_counter_event (fun
, "32-bit bswap implementations found",
2812 bswap_stats
.found_32bit
);
2813 statistics_counter_event (fun
, "64-bit bswap implementations found",
2814 bswap_stats
.found_64bit
);
2816 return (changed
? TODO_update_ssa
: 0);
2822 make_pass_optimize_bswap (gcc::context
*ctxt
)
2824 return new pass_optimize_bswap (ctxt
);
2827 /* Return true if stmt is a type conversion operation that can be stripped
2828 when used in a widening multiply operation. */
2830 widening_mult_conversion_strippable_p (tree result_type
, gimple
*stmt
)
2832 enum tree_code rhs_code
= gimple_assign_rhs_code (stmt
);
2834 if (TREE_CODE (result_type
) == INTEGER_TYPE
)
2839 if (!CONVERT_EXPR_CODE_P (rhs_code
))
2842 op_type
= TREE_TYPE (gimple_assign_lhs (stmt
));
2844 /* If the type of OP has the same precision as the result, then
2845 we can strip this conversion. The multiply operation will be
2846 selected to create the correct extension as a by-product. */
2847 if (TYPE_PRECISION (result_type
) == TYPE_PRECISION (op_type
))
2850 /* We can also strip a conversion if it preserves the signed-ness of
2851 the operation and doesn't narrow the range. */
2852 inner_op_type
= TREE_TYPE (gimple_assign_rhs1 (stmt
));
2854 /* If the inner-most type is unsigned, then we can strip any
2855 intermediate widening operation. If it's signed, then the
2856 intermediate widening operation must also be signed. */
2857 if ((TYPE_UNSIGNED (inner_op_type
)
2858 || TYPE_UNSIGNED (op_type
) == TYPE_UNSIGNED (inner_op_type
))
2859 && TYPE_PRECISION (op_type
) > TYPE_PRECISION (inner_op_type
))
2865 return rhs_code
== FIXED_CONVERT_EXPR
;
2868 /* Return true if RHS is a suitable operand for a widening multiplication,
2869 assuming a target type of TYPE.
2870 There are two cases:
2872 - RHS makes some value at least twice as wide. Store that value
2873 in *NEW_RHS_OUT if so, and store its type in *TYPE_OUT.
2875 - RHS is an integer constant. Store that value in *NEW_RHS_OUT if so,
2876 but leave *TYPE_OUT untouched. */
2879 is_widening_mult_rhs_p (tree type
, tree rhs
, tree
*type_out
,
2885 if (TREE_CODE (rhs
) == SSA_NAME
)
2887 stmt
= SSA_NAME_DEF_STMT (rhs
);
2888 if (is_gimple_assign (stmt
))
2890 if (! widening_mult_conversion_strippable_p (type
, stmt
))
2894 rhs1
= gimple_assign_rhs1 (stmt
);
2896 if (TREE_CODE (rhs1
) == INTEGER_CST
)
2898 *new_rhs_out
= rhs1
;
2907 type1
= TREE_TYPE (rhs1
);
2909 if (TREE_CODE (type1
) != TREE_CODE (type
)
2910 || TYPE_PRECISION (type1
) * 2 > TYPE_PRECISION (type
))
2913 *new_rhs_out
= rhs1
;
2918 if (TREE_CODE (rhs
) == INTEGER_CST
)
2928 /* Return true if STMT performs a widening multiplication, assuming the
2929 output type is TYPE. If so, store the unwidened types of the operands
2930 in *TYPE1_OUT and *TYPE2_OUT respectively. Also fill *RHS1_OUT and
2931 *RHS2_OUT such that converting those operands to types *TYPE1_OUT
2932 and *TYPE2_OUT would give the operands of the multiplication. */
2935 is_widening_mult_p (gimple
*stmt
,
2936 tree
*type1_out
, tree
*rhs1_out
,
2937 tree
*type2_out
, tree
*rhs2_out
)
2939 tree type
= TREE_TYPE (gimple_assign_lhs (stmt
));
2941 if (TREE_CODE (type
) != INTEGER_TYPE
2942 && TREE_CODE (type
) != FIXED_POINT_TYPE
)
2945 if (!is_widening_mult_rhs_p (type
, gimple_assign_rhs1 (stmt
), type1_out
,
2949 if (!is_widening_mult_rhs_p (type
, gimple_assign_rhs2 (stmt
), type2_out
,
2953 if (*type1_out
== NULL
)
2955 if (*type2_out
== NULL
|| !int_fits_type_p (*rhs1_out
, *type2_out
))
2957 *type1_out
= *type2_out
;
2960 if (*type2_out
== NULL
)
2962 if (!int_fits_type_p (*rhs2_out
, *type1_out
))
2964 *type2_out
= *type1_out
;
2967 /* Ensure that the larger of the two operands comes first. */
2968 if (TYPE_PRECISION (*type1_out
) < TYPE_PRECISION (*type2_out
))
2970 std::swap (*type1_out
, *type2_out
);
2971 std::swap (*rhs1_out
, *rhs2_out
);
2977 /* Process a single gimple statement STMT, which has a MULT_EXPR as
2978 its rhs, and try to convert it into a WIDEN_MULT_EXPR. The return
2979 value is true iff we converted the statement. */
2982 convert_mult_to_widen (gimple
*stmt
, gimple_stmt_iterator
*gsi
)
2984 tree lhs
, rhs1
, rhs2
, type
, type1
, type2
;
2985 enum insn_code handler
;
2986 machine_mode to_mode
, from_mode
, actual_mode
;
2988 int actual_precision
;
2989 location_t loc
= gimple_location (stmt
);
2990 bool from_unsigned1
, from_unsigned2
;
2992 lhs
= gimple_assign_lhs (stmt
);
2993 type
= TREE_TYPE (lhs
);
2994 if (TREE_CODE (type
) != INTEGER_TYPE
)
2997 if (!is_widening_mult_p (stmt
, &type1
, &rhs1
, &type2
, &rhs2
))
3000 to_mode
= TYPE_MODE (type
);
3001 from_mode
= TYPE_MODE (type1
);
3002 from_unsigned1
= TYPE_UNSIGNED (type1
);
3003 from_unsigned2
= TYPE_UNSIGNED (type2
);
3005 if (from_unsigned1
&& from_unsigned2
)
3006 op
= umul_widen_optab
;
3007 else if (!from_unsigned1
&& !from_unsigned2
)
3008 op
= smul_widen_optab
;
3010 op
= usmul_widen_optab
;
3012 handler
= find_widening_optab_handler_and_mode (op
, to_mode
, from_mode
,
3015 if (handler
== CODE_FOR_nothing
)
3017 if (op
!= smul_widen_optab
)
3019 /* We can use a signed multiply with unsigned types as long as
3020 there is a wider mode to use, or it is the smaller of the two
3021 types that is unsigned. Note that type1 >= type2, always. */
3022 if ((TYPE_UNSIGNED (type1
)
3023 && TYPE_PRECISION (type1
) == GET_MODE_PRECISION (from_mode
))
3024 || (TYPE_UNSIGNED (type2
)
3025 && TYPE_PRECISION (type2
) == GET_MODE_PRECISION (from_mode
)))
3027 from_mode
= GET_MODE_WIDER_MODE (from_mode
);
3028 if (GET_MODE_SIZE (to_mode
) <= GET_MODE_SIZE (from_mode
))
3032 op
= smul_widen_optab
;
3033 handler
= find_widening_optab_handler_and_mode (op
, to_mode
,
3037 if (handler
== CODE_FOR_nothing
)
3040 from_unsigned1
= from_unsigned2
= false;
3046 /* Ensure that the inputs to the handler are in the correct precison
3047 for the opcode. This will be the full mode size. */
3048 actual_precision
= GET_MODE_PRECISION (actual_mode
);
3049 if (2 * actual_precision
> TYPE_PRECISION (type
))
3051 if (actual_precision
!= TYPE_PRECISION (type1
)
3052 || from_unsigned1
!= TYPE_UNSIGNED (type1
))
3053 rhs1
= build_and_insert_cast (gsi
, loc
,
3054 build_nonstandard_integer_type
3055 (actual_precision
, from_unsigned1
), rhs1
);
3056 if (actual_precision
!= TYPE_PRECISION (type2
)
3057 || from_unsigned2
!= TYPE_UNSIGNED (type2
))
3058 rhs2
= build_and_insert_cast (gsi
, loc
,
3059 build_nonstandard_integer_type
3060 (actual_precision
, from_unsigned2
), rhs2
);
3062 /* Handle constants. */
3063 if (TREE_CODE (rhs1
) == INTEGER_CST
)
3064 rhs1
= fold_convert (type1
, rhs1
);
3065 if (TREE_CODE (rhs2
) == INTEGER_CST
)
3066 rhs2
= fold_convert (type2
, rhs2
);
3068 gimple_assign_set_rhs1 (stmt
, rhs1
);
3069 gimple_assign_set_rhs2 (stmt
, rhs2
);
3070 gimple_assign_set_rhs_code (stmt
, WIDEN_MULT_EXPR
);
3072 widen_mul_stats
.widen_mults_inserted
++;
3076 /* Process a single gimple statement STMT, which is found at the
3077 iterator GSI and has a either a PLUS_EXPR or a MINUS_EXPR as its
3078 rhs (given by CODE), and try to convert it into a
3079 WIDEN_MULT_PLUS_EXPR or a WIDEN_MULT_MINUS_EXPR. The return value
3080 is true iff we converted the statement. */
3083 convert_plusminus_to_widen (gimple_stmt_iterator
*gsi
, gimple
*stmt
,
3084 enum tree_code code
)
3086 gimple
*rhs1_stmt
= NULL
, *rhs2_stmt
= NULL
;
3087 gimple
*conv1_stmt
= NULL
, *conv2_stmt
= NULL
, *conv_stmt
;
3088 tree type
, type1
, type2
, optype
;
3089 tree lhs
, rhs1
, rhs2
, mult_rhs1
, mult_rhs2
, add_rhs
;
3090 enum tree_code rhs1_code
= ERROR_MARK
, rhs2_code
= ERROR_MARK
;
3092 enum tree_code wmult_code
;
3093 enum insn_code handler
;
3094 machine_mode to_mode
, from_mode
, actual_mode
;
3095 location_t loc
= gimple_location (stmt
);
3096 int actual_precision
;
3097 bool from_unsigned1
, from_unsigned2
;
3099 lhs
= gimple_assign_lhs (stmt
);
3100 type
= TREE_TYPE (lhs
);
3101 if (TREE_CODE (type
) != INTEGER_TYPE
3102 && TREE_CODE (type
) != FIXED_POINT_TYPE
)
3105 if (code
== MINUS_EXPR
)
3106 wmult_code
= WIDEN_MULT_MINUS_EXPR
;
3108 wmult_code
= WIDEN_MULT_PLUS_EXPR
;
3110 rhs1
= gimple_assign_rhs1 (stmt
);
3111 rhs2
= gimple_assign_rhs2 (stmt
);
3113 if (TREE_CODE (rhs1
) == SSA_NAME
)
3115 rhs1_stmt
= SSA_NAME_DEF_STMT (rhs1
);
3116 if (is_gimple_assign (rhs1_stmt
))
3117 rhs1_code
= gimple_assign_rhs_code (rhs1_stmt
);
3120 if (TREE_CODE (rhs2
) == SSA_NAME
)
3122 rhs2_stmt
= SSA_NAME_DEF_STMT (rhs2
);
3123 if (is_gimple_assign (rhs2_stmt
))
3124 rhs2_code
= gimple_assign_rhs_code (rhs2_stmt
);
3127 /* Allow for one conversion statement between the multiply
3128 and addition/subtraction statement. If there are more than
3129 one conversions then we assume they would invalidate this
3130 transformation. If that's not the case then they should have
3131 been folded before now. */
3132 if (CONVERT_EXPR_CODE_P (rhs1_code
))
3134 conv1_stmt
= rhs1_stmt
;
3135 rhs1
= gimple_assign_rhs1 (rhs1_stmt
);
3136 if (TREE_CODE (rhs1
) == SSA_NAME
)
3138 rhs1_stmt
= SSA_NAME_DEF_STMT (rhs1
);
3139 if (is_gimple_assign (rhs1_stmt
))
3140 rhs1_code
= gimple_assign_rhs_code (rhs1_stmt
);
3145 if (CONVERT_EXPR_CODE_P (rhs2_code
))
3147 conv2_stmt
= rhs2_stmt
;
3148 rhs2
= gimple_assign_rhs1 (rhs2_stmt
);
3149 if (TREE_CODE (rhs2
) == SSA_NAME
)
3151 rhs2_stmt
= SSA_NAME_DEF_STMT (rhs2
);
3152 if (is_gimple_assign (rhs2_stmt
))
3153 rhs2_code
= gimple_assign_rhs_code (rhs2_stmt
);
3159 /* If code is WIDEN_MULT_EXPR then it would seem unnecessary to call
3160 is_widening_mult_p, but we still need the rhs returns.
3162 It might also appear that it would be sufficient to use the existing
3163 operands of the widening multiply, but that would limit the choice of
3164 multiply-and-accumulate instructions.
3166 If the widened-multiplication result has more than one uses, it is
3167 probably wiser not to do the conversion. */
3168 if (code
== PLUS_EXPR
3169 && (rhs1_code
== MULT_EXPR
|| rhs1_code
== WIDEN_MULT_EXPR
))
3171 if (!has_single_use (rhs1
)
3172 || !is_widening_mult_p (rhs1_stmt
, &type1
, &mult_rhs1
,
3173 &type2
, &mult_rhs2
))
3176 conv_stmt
= conv1_stmt
;
3178 else if (rhs2_code
== MULT_EXPR
|| rhs2_code
== WIDEN_MULT_EXPR
)
3180 if (!has_single_use (rhs2
)
3181 || !is_widening_mult_p (rhs2_stmt
, &type1
, &mult_rhs1
,
3182 &type2
, &mult_rhs2
))
3185 conv_stmt
= conv2_stmt
;
3190 to_mode
= TYPE_MODE (type
);
3191 from_mode
= TYPE_MODE (type1
);
3192 from_unsigned1
= TYPE_UNSIGNED (type1
);
3193 from_unsigned2
= TYPE_UNSIGNED (type2
);
3196 /* There's no such thing as a mixed sign madd yet, so use a wider mode. */
3197 if (from_unsigned1
!= from_unsigned2
)
3199 if (!INTEGRAL_TYPE_P (type
))
3201 /* We can use a signed multiply with unsigned types as long as
3202 there is a wider mode to use, or it is the smaller of the two
3203 types that is unsigned. Note that type1 >= type2, always. */
3205 && TYPE_PRECISION (type1
) == GET_MODE_PRECISION (from_mode
))
3207 && TYPE_PRECISION (type2
) == GET_MODE_PRECISION (from_mode
)))
3209 from_mode
= GET_MODE_WIDER_MODE (from_mode
);
3210 if (GET_MODE_SIZE (from_mode
) >= GET_MODE_SIZE (to_mode
))
3214 from_unsigned1
= from_unsigned2
= false;
3215 optype
= build_nonstandard_integer_type (GET_MODE_PRECISION (from_mode
),
3219 /* If there was a conversion between the multiply and addition
3220 then we need to make sure it fits a multiply-and-accumulate.
3221 The should be a single mode change which does not change the
3225 /* We use the original, unmodified data types for this. */
3226 tree from_type
= TREE_TYPE (gimple_assign_rhs1 (conv_stmt
));
3227 tree to_type
= TREE_TYPE (gimple_assign_lhs (conv_stmt
));
3228 int data_size
= TYPE_PRECISION (type1
) + TYPE_PRECISION (type2
);
3229 bool is_unsigned
= TYPE_UNSIGNED (type1
) && TYPE_UNSIGNED (type2
);
3231 if (TYPE_PRECISION (from_type
) > TYPE_PRECISION (to_type
))
3233 /* Conversion is a truncate. */
3234 if (TYPE_PRECISION (to_type
) < data_size
)
3237 else if (TYPE_PRECISION (from_type
) < TYPE_PRECISION (to_type
))
3239 /* Conversion is an extend. Check it's the right sort. */
3240 if (TYPE_UNSIGNED (from_type
) != is_unsigned
3241 && !(is_unsigned
&& TYPE_PRECISION (from_type
) > data_size
))
3244 /* else convert is a no-op for our purposes. */
3247 /* Verify that the machine can perform a widening multiply
3248 accumulate in this mode/signedness combination, otherwise
3249 this transformation is likely to pessimize code. */
3250 this_optab
= optab_for_tree_code (wmult_code
, optype
, optab_default
);
3251 handler
= find_widening_optab_handler_and_mode (this_optab
, to_mode
,
3252 from_mode
, 0, &actual_mode
);
3254 if (handler
== CODE_FOR_nothing
)
3257 /* Ensure that the inputs to the handler are in the correct precison
3258 for the opcode. This will be the full mode size. */
3259 actual_precision
= GET_MODE_PRECISION (actual_mode
);
3260 if (actual_precision
!= TYPE_PRECISION (type1
)
3261 || from_unsigned1
!= TYPE_UNSIGNED (type1
))
3262 mult_rhs1
= build_and_insert_cast (gsi
, loc
,
3263 build_nonstandard_integer_type
3264 (actual_precision
, from_unsigned1
),
3266 if (actual_precision
!= TYPE_PRECISION (type2
)
3267 || from_unsigned2
!= TYPE_UNSIGNED (type2
))
3268 mult_rhs2
= build_and_insert_cast (gsi
, loc
,
3269 build_nonstandard_integer_type
3270 (actual_precision
, from_unsigned2
),
3273 if (!useless_type_conversion_p (type
, TREE_TYPE (add_rhs
)))
3274 add_rhs
= build_and_insert_cast (gsi
, loc
, type
, add_rhs
);
3276 /* Handle constants. */
3277 if (TREE_CODE (mult_rhs1
) == INTEGER_CST
)
3278 mult_rhs1
= fold_convert (type1
, mult_rhs1
);
3279 if (TREE_CODE (mult_rhs2
) == INTEGER_CST
)
3280 mult_rhs2
= fold_convert (type2
, mult_rhs2
);
3282 gimple_assign_set_rhs_with_ops (gsi
, wmult_code
, mult_rhs1
, mult_rhs2
,
3284 update_stmt (gsi_stmt (*gsi
));
3285 widen_mul_stats
.maccs_inserted
++;
3289 /* Combine the multiplication at MUL_STMT with operands MULOP1 and MULOP2
3290 with uses in additions and subtractions to form fused multiply-add
3291 operations. Returns true if successful and MUL_STMT should be removed. */
3294 convert_mult_to_fma (gimple
*mul_stmt
, tree op1
, tree op2
)
3296 tree mul_result
= gimple_get_lhs (mul_stmt
);
3297 tree type
= TREE_TYPE (mul_result
);
3298 gimple
*use_stmt
, *neguse_stmt
;
3300 use_operand_p use_p
;
3301 imm_use_iterator imm_iter
;
3303 if (FLOAT_TYPE_P (type
)
3304 && flag_fp_contract_mode
== FP_CONTRACT_OFF
)
3307 /* We don't want to do bitfield reduction ops. */
3308 if (INTEGRAL_TYPE_P (type
)
3309 && (TYPE_PRECISION (type
)
3310 != GET_MODE_PRECISION (TYPE_MODE (type
))))
3313 /* If the target doesn't support it, don't generate it. We assume that
3314 if fma isn't available then fms, fnma or fnms are not either. */
3315 if (optab_handler (fma_optab
, TYPE_MODE (type
)) == CODE_FOR_nothing
)
3318 /* If the multiplication has zero uses, it is kept around probably because
3319 of -fnon-call-exceptions. Don't optimize it away in that case,
3321 if (has_zero_uses (mul_result
))
3324 /* Make sure that the multiplication statement becomes dead after
3325 the transformation, thus that all uses are transformed to FMAs.
3326 This means we assume that an FMA operation has the same cost
3328 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, mul_result
)
3330 enum tree_code use_code
;
3331 tree result
= mul_result
;
3332 bool negate_p
= false;
3334 use_stmt
= USE_STMT (use_p
);
3336 if (is_gimple_debug (use_stmt
))
3339 /* For now restrict this operations to single basic blocks. In theory
3340 we would want to support sinking the multiplication in
3346 to form a fma in the then block and sink the multiplication to the
3348 if (gimple_bb (use_stmt
) != gimple_bb (mul_stmt
))
3351 if (!is_gimple_assign (use_stmt
))
3354 use_code
= gimple_assign_rhs_code (use_stmt
);
3356 /* A negate on the multiplication leads to FNMA. */
3357 if (use_code
== NEGATE_EXPR
)
3362 result
= gimple_assign_lhs (use_stmt
);
3364 /* Make sure the negate statement becomes dead with this
3365 single transformation. */
3366 if (!single_imm_use (gimple_assign_lhs (use_stmt
),
3367 &use_p
, &neguse_stmt
))
3370 /* Make sure the multiplication isn't also used on that stmt. */
3371 FOR_EACH_PHI_OR_STMT_USE (usep
, neguse_stmt
, iter
, SSA_OP_USE
)
3372 if (USE_FROM_PTR (usep
) == mul_result
)
3376 use_stmt
= neguse_stmt
;
3377 if (gimple_bb (use_stmt
) != gimple_bb (mul_stmt
))
3379 if (!is_gimple_assign (use_stmt
))
3382 use_code
= gimple_assign_rhs_code (use_stmt
);
3389 if (gimple_assign_rhs2 (use_stmt
) == result
)
3390 negate_p
= !negate_p
;
3395 /* FMA can only be formed from PLUS and MINUS. */
3399 /* If the subtrahend (gimple_assign_rhs2 (use_stmt)) is computed
3400 by a MULT_EXPR that we'll visit later, we might be able to
3401 get a more profitable match with fnma.
3402 OTOH, if we don't, a negate / fma pair has likely lower latency
3403 that a mult / subtract pair. */
3404 if (use_code
== MINUS_EXPR
&& !negate_p
3405 && gimple_assign_rhs1 (use_stmt
) == result
3406 && optab_handler (fms_optab
, TYPE_MODE (type
)) == CODE_FOR_nothing
3407 && optab_handler (fnma_optab
, TYPE_MODE (type
)) != CODE_FOR_nothing
)
3409 tree rhs2
= gimple_assign_rhs2 (use_stmt
);
3411 if (TREE_CODE (rhs2
) == SSA_NAME
)
3413 gimple
*stmt2
= SSA_NAME_DEF_STMT (rhs2
);
3414 if (has_single_use (rhs2
)
3415 && is_gimple_assign (stmt2
)
3416 && gimple_assign_rhs_code (stmt2
) == MULT_EXPR
)
3421 /* We can't handle a * b + a * b. */
3422 if (gimple_assign_rhs1 (use_stmt
) == gimple_assign_rhs2 (use_stmt
))
3425 /* While it is possible to validate whether or not the exact form
3426 that we've recognized is available in the backend, the assumption
3427 is that the transformation is never a loss. For instance, suppose
3428 the target only has the plain FMA pattern available. Consider
3429 a*b-c -> fma(a,b,-c): we've exchanged MUL+SUB for FMA+NEG, which
3430 is still two operations. Consider -(a*b)-c -> fma(-a,b,-c): we
3431 still have 3 operations, but in the FMA form the two NEGs are
3432 independent and could be run in parallel. */
3435 FOR_EACH_IMM_USE_STMT (use_stmt
, imm_iter
, mul_result
)
3437 gimple_stmt_iterator gsi
= gsi_for_stmt (use_stmt
);
3438 enum tree_code use_code
;
3439 tree addop
, mulop1
= op1
, result
= mul_result
;
3440 bool negate_p
= false;
3442 if (is_gimple_debug (use_stmt
))
3445 use_code
= gimple_assign_rhs_code (use_stmt
);
3446 if (use_code
== NEGATE_EXPR
)
3448 result
= gimple_assign_lhs (use_stmt
);
3449 single_imm_use (gimple_assign_lhs (use_stmt
), &use_p
, &neguse_stmt
);
3450 gsi_remove (&gsi
, true);
3451 release_defs (use_stmt
);
3453 use_stmt
= neguse_stmt
;
3454 gsi
= gsi_for_stmt (use_stmt
);
3455 use_code
= gimple_assign_rhs_code (use_stmt
);
3459 if (gimple_assign_rhs1 (use_stmt
) == result
)
3461 addop
= gimple_assign_rhs2 (use_stmt
);
3462 /* a * b - c -> a * b + (-c) */
3463 if (gimple_assign_rhs_code (use_stmt
) == MINUS_EXPR
)
3464 addop
= force_gimple_operand_gsi (&gsi
,
3465 build1 (NEGATE_EXPR
,
3467 true, NULL_TREE
, true,
3472 addop
= gimple_assign_rhs1 (use_stmt
);
3473 /* a - b * c -> (-b) * c + a */
3474 if (gimple_assign_rhs_code (use_stmt
) == MINUS_EXPR
)
3475 negate_p
= !negate_p
;
3479 mulop1
= force_gimple_operand_gsi (&gsi
,
3480 build1 (NEGATE_EXPR
,
3482 true, NULL_TREE
, true,
3485 fma_stmt
= gimple_build_assign (gimple_assign_lhs (use_stmt
),
3486 FMA_EXPR
, mulop1
, op2
, addop
);
3487 gsi_replace (&gsi
, fma_stmt
, true);
3488 widen_mul_stats
.fmas_inserted
++;
3494 /* Find integer multiplications where the operands are extended from
3495 smaller types, and replace the MULT_EXPR with a WIDEN_MULT_EXPR
3496 where appropriate. */
3500 const pass_data pass_data_optimize_widening_mul
=
3502 GIMPLE_PASS
, /* type */
3503 "widening_mul", /* name */
3504 OPTGROUP_NONE
, /* optinfo_flags */
3505 TV_NONE
, /* tv_id */
3506 PROP_ssa
, /* properties_required */
3507 0, /* properties_provided */
3508 0, /* properties_destroyed */
3509 0, /* todo_flags_start */
3510 TODO_update_ssa
, /* todo_flags_finish */
3513 class pass_optimize_widening_mul
: public gimple_opt_pass
3516 pass_optimize_widening_mul (gcc::context
*ctxt
)
3517 : gimple_opt_pass (pass_data_optimize_widening_mul
, ctxt
)
3520 /* opt_pass methods: */
3521 virtual bool gate (function
*)
3523 return flag_expensive_optimizations
&& optimize
;
3526 virtual unsigned int execute (function
*);
3528 }; // class pass_optimize_widening_mul
3531 pass_optimize_widening_mul::execute (function
*fun
)
3534 bool cfg_changed
= false;
3536 memset (&widen_mul_stats
, 0, sizeof (widen_mul_stats
));
3538 FOR_EACH_BB_FN (bb
, fun
)
3540 gimple_stmt_iterator gsi
;
3542 for (gsi
= gsi_after_labels (bb
); !gsi_end_p (gsi
);)
3544 gimple
*stmt
= gsi_stmt (gsi
);
3545 enum tree_code code
;
3547 if (is_gimple_assign (stmt
))
3549 code
= gimple_assign_rhs_code (stmt
);
3553 if (!convert_mult_to_widen (stmt
, &gsi
)
3554 && convert_mult_to_fma (stmt
,
3555 gimple_assign_rhs1 (stmt
),
3556 gimple_assign_rhs2 (stmt
)))
3558 gsi_remove (&gsi
, true);
3559 release_defs (stmt
);
3566 convert_plusminus_to_widen (&gsi
, stmt
, code
);
3572 else if (is_gimple_call (stmt
)
3573 && gimple_call_lhs (stmt
))
3575 tree fndecl
= gimple_call_fndecl (stmt
);
3577 && DECL_BUILT_IN_CLASS (fndecl
) == BUILT_IN_NORMAL
)
3579 switch (DECL_FUNCTION_CODE (fndecl
))
3584 if (TREE_CODE (gimple_call_arg (stmt
, 1)) == REAL_CST
3586 (&TREE_REAL_CST (gimple_call_arg (stmt
, 1)),
3588 && convert_mult_to_fma (stmt
,
3589 gimple_call_arg (stmt
, 0),
3590 gimple_call_arg (stmt
, 0)))
3592 unlink_stmt_vdef (stmt
);
3593 if (gsi_remove (&gsi
, true)
3594 && gimple_purge_dead_eh_edges (bb
))
3596 release_defs (stmt
);
3609 statistics_counter_event (fun
, "widening multiplications inserted",
3610 widen_mul_stats
.widen_mults_inserted
);
3611 statistics_counter_event (fun
, "widening maccs inserted",
3612 widen_mul_stats
.maccs_inserted
);
3613 statistics_counter_event (fun
, "fused multiply-adds inserted",
3614 widen_mul_stats
.fmas_inserted
);
3616 return cfg_changed
? TODO_cleanup_cfg
: 0;
3622 make_pass_optimize_widening_mul (gcc::context
*ctxt
)
3624 return new pass_optimize_widening_mul (ctxt
);