ada: Fix minor glitch in finish_record_type
[official-gcc.git] / gcc / tree-ssa-math-opts.cc
blob3db69ad5733cf86a197ad2f0a169f926e7b6b755
1 /* Global, SSA-based optimizations using mathematical identities.
2 Copyright (C) 2005-2023 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* Currently, the only mini-pass in this file tries to CSE reciprocal
21 operations. These are common in sequences such as this one:
23 modulus = sqrt(x*x + y*y + z*z);
24 x = x / modulus;
25 y = y / modulus;
26 z = z / modulus;
28 that can be optimized to
30 modulus = sqrt(x*x + y*y + z*z);
31 rmodulus = 1.0 / modulus;
32 x = x * rmodulus;
33 y = y * rmodulus;
34 z = z * rmodulus;
36 We do this for loop invariant divisors, and with this pass whenever
37 we notice that a division has the same divisor multiple times.
39 Of course, like in PRE, we don't insert a division if a dominator
40 already has one. However, this cannot be done as an extension of
41 PRE for several reasons.
43 First of all, with some experiments it was found out that the
44 transformation is not always useful if there are only two divisions
45 by the same divisor. This is probably because modern processors
46 can pipeline the divisions; on older, in-order processors it should
47 still be effective to optimize two divisions by the same number.
48 We make this a param, and it shall be called N in the remainder of
49 this comment.
51 Second, if trapping math is active, we have less freedom on where
52 to insert divisions: we can only do so in basic blocks that already
53 contain one. (If divisions don't trap, instead, we can insert
54 divisions elsewhere, which will be in blocks that are common dominators
55 of those that have the division).
57 We really don't want to compute the reciprocal unless a division will
58 be found. To do this, we won't insert the division in a basic block
59 that has less than N divisions *post-dominating* it.
61 The algorithm constructs a subset of the dominator tree, holding the
62 blocks containing the divisions and the common dominators to them,
63 and walk it twice. The first walk is in post-order, and it annotates
64 each block with the number of divisions that post-dominate it: this
65 gives information on where divisions can be inserted profitably.
66 The second walk is in pre-order, and it inserts divisions as explained
67 above, and replaces divisions by multiplications.
69 In the best case, the cost of the pass is O(n_statements). In the
70 worst-case, the cost is due to creating the dominator tree subset,
71 with a cost of O(n_basic_blocks ^ 2); however this can only happen
72 for n_statements / n_basic_blocks statements. So, the amortized cost
73 of creating the dominator tree subset is O(n_basic_blocks) and the
74 worst-case cost of the pass is O(n_statements * n_basic_blocks).
76 More practically, the cost will be small because there are few
77 divisions, and they tend to be in the same basic block, so insert_bb
78 is called very few times.
80 If we did this using domwalk.cc, an efficient implementation would have
81 to work on all the variables in a single pass, because we could not
82 work on just a subset of the dominator tree, as we do now, and the
83 cost would also be something like O(n_statements * n_basic_blocks).
84 The data structures would be more complex in order to work on all the
85 variables in a single pass. */
87 #include "config.h"
88 #include "system.h"
89 #include "coretypes.h"
90 #include "backend.h"
91 #include "target.h"
92 #include "rtl.h"
93 #include "tree.h"
94 #include "gimple.h"
95 #include "predict.h"
96 #include "alloc-pool.h"
97 #include "tree-pass.h"
98 #include "ssa.h"
99 #include "optabs-tree.h"
100 #include "gimple-pretty-print.h"
101 #include "alias.h"
102 #include "fold-const.h"
103 #include "gimple-iterator.h"
104 #include "gimple-fold.h"
105 #include "gimplify.h"
106 #include "gimplify-me.h"
107 #include "stor-layout.h"
108 #include "tree-cfg.h"
109 #include "tree-dfa.h"
110 #include "tree-ssa.h"
111 #include "builtins.h"
112 #include "internal-fn.h"
113 #include "case-cfn-macros.h"
114 #include "optabs-libfuncs.h"
115 #include "tree-eh.h"
116 #include "targhooks.h"
117 #include "domwalk.h"
118 #include "tree-ssa-math-opts.h"
119 #include "dbgcnt.h"
121 /* This structure represents one basic block that either computes a
122 division, or is a common dominator for basic block that compute a
123 division. */
124 struct occurrence {
125 /* The basic block represented by this structure. */
126 basic_block bb = basic_block();
128 /* If non-NULL, the SSA_NAME holding the definition for a reciprocal
129 inserted in BB. */
130 tree recip_def = tree();
132 /* If non-NULL, the SSA_NAME holding the definition for a squared
133 reciprocal inserted in BB. */
134 tree square_recip_def = tree();
136 /* If non-NULL, the GIMPLE_ASSIGN for a reciprocal computation that
137 was inserted in BB. */
138 gimple *recip_def_stmt = nullptr;
140 /* Pointer to a list of "struct occurrence"s for blocks dominated
141 by BB. */
142 struct occurrence *children = nullptr;
144 /* Pointer to the next "struct occurrence"s in the list of blocks
145 sharing a common dominator. */
146 struct occurrence *next = nullptr;
148 /* The number of divisions that are in BB before compute_merit. The
149 number of divisions that are in BB or post-dominate it after
150 compute_merit. */
151 int num_divisions = 0;
153 /* True if the basic block has a division, false if it is a common
154 dominator for basic blocks that do. If it is false and trapping
155 math is active, BB is not a candidate for inserting a reciprocal. */
156 bool bb_has_division = false;
158 /* Construct a struct occurrence for basic block BB, and whose
159 children list is headed by CHILDREN. */
160 occurrence (basic_block bb, struct occurrence *children)
161 : bb (bb), children (children)
163 bb->aux = this;
166 /* Destroy a struct occurrence and remove it from its basic block. */
167 ~occurrence ()
169 bb->aux = nullptr;
172 /* Allocate memory for a struct occurrence from OCC_POOL. */
173 static void* operator new (size_t);
175 /* Return memory for a struct occurrence to OCC_POOL. */
176 static void operator delete (void*, size_t);
179 static struct
181 /* Number of 1.0/X ops inserted. */
182 int rdivs_inserted;
184 /* Number of 1.0/FUNC ops inserted. */
185 int rfuncs_inserted;
186 } reciprocal_stats;
188 static struct
190 /* Number of cexpi calls inserted. */
191 int inserted;
193 /* Number of conversions removed. */
194 int conv_removed;
196 } sincos_stats;
198 static struct
200 /* Number of widening multiplication ops inserted. */
201 int widen_mults_inserted;
203 /* Number of integer multiply-and-accumulate ops inserted. */
204 int maccs_inserted;
206 /* Number of fp fused multiply-add ops inserted. */
207 int fmas_inserted;
209 /* Number of divmod calls inserted. */
210 int divmod_calls_inserted;
212 /* Number of highpart multiplication ops inserted. */
213 int highpart_mults_inserted;
214 } widen_mul_stats;
216 /* The instance of "struct occurrence" representing the highest
217 interesting block in the dominator tree. */
218 static struct occurrence *occ_head;
220 /* Allocation pool for getting instances of "struct occurrence". */
221 static object_allocator<occurrence> *occ_pool;
223 void* occurrence::operator new (size_t n)
225 gcc_assert (n == sizeof(occurrence));
226 return occ_pool->allocate_raw ();
229 void occurrence::operator delete (void *occ, size_t n)
231 gcc_assert (n == sizeof(occurrence));
232 occ_pool->remove_raw (occ);
235 /* Insert NEW_OCC into our subset of the dominator tree. P_HEAD points to a
236 list of "struct occurrence"s, one per basic block, having IDOM as
237 their common dominator.
239 We try to insert NEW_OCC as deep as possible in the tree, and we also
240 insert any other block that is a common dominator for BB and one
241 block already in the tree. */
243 static void
244 insert_bb (struct occurrence *new_occ, basic_block idom,
245 struct occurrence **p_head)
247 struct occurrence *occ, **p_occ;
249 for (p_occ = p_head; (occ = *p_occ) != NULL; )
251 basic_block bb = new_occ->bb, occ_bb = occ->bb;
252 basic_block dom = nearest_common_dominator (CDI_DOMINATORS, occ_bb, bb);
253 if (dom == bb)
255 /* BB dominates OCC_BB. OCC becomes NEW_OCC's child: remove OCC
256 from its list. */
257 *p_occ = occ->next;
258 occ->next = new_occ->children;
259 new_occ->children = occ;
261 /* Try the next block (it may as well be dominated by BB). */
264 else if (dom == occ_bb)
266 /* OCC_BB dominates BB. Tail recurse to look deeper. */
267 insert_bb (new_occ, dom, &occ->children);
268 return;
271 else if (dom != idom)
273 gcc_assert (!dom->aux);
275 /* There is a dominator between IDOM and BB, add it and make
276 two children out of NEW_OCC and OCC. First, remove OCC from
277 its list. */
278 *p_occ = occ->next;
279 new_occ->next = occ;
280 occ->next = NULL;
282 /* None of the previous blocks has DOM as a dominator: if we tail
283 recursed, we would reexamine them uselessly. Just switch BB with
284 DOM, and go on looking for blocks dominated by DOM. */
285 new_occ = new occurrence (dom, new_occ);
288 else
290 /* Nothing special, go on with the next element. */
291 p_occ = &occ->next;
295 /* No place was found as a child of IDOM. Make BB a sibling of IDOM. */
296 new_occ->next = *p_head;
297 *p_head = new_occ;
300 /* Register that we found a division in BB.
301 IMPORTANCE is a measure of how much weighting to give
302 that division. Use IMPORTANCE = 2 to register a single
303 division. If the division is going to be found multiple
304 times use 1 (as it is with squares). */
306 static inline void
307 register_division_in (basic_block bb, int importance)
309 struct occurrence *occ;
311 occ = (struct occurrence *) bb->aux;
312 if (!occ)
314 occ = new occurrence (bb, NULL);
315 insert_bb (occ, ENTRY_BLOCK_PTR_FOR_FN (cfun), &occ_head);
318 occ->bb_has_division = true;
319 occ->num_divisions += importance;
323 /* Compute the number of divisions that postdominate each block in OCC and
324 its children. */
326 static void
327 compute_merit (struct occurrence *occ)
329 struct occurrence *occ_child;
330 basic_block dom = occ->bb;
332 for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
334 basic_block bb;
335 if (occ_child->children)
336 compute_merit (occ_child);
338 if (flag_exceptions)
339 bb = single_noncomplex_succ (dom);
340 else
341 bb = dom;
343 if (dominated_by_p (CDI_POST_DOMINATORS, bb, occ_child->bb))
344 occ->num_divisions += occ_child->num_divisions;
349 /* Return whether USE_STMT is a floating-point division by DEF. */
350 static inline bool
351 is_division_by (gimple *use_stmt, tree def)
353 return is_gimple_assign (use_stmt)
354 && gimple_assign_rhs_code (use_stmt) == RDIV_EXPR
355 && gimple_assign_rhs2 (use_stmt) == def
356 /* Do not recognize x / x as valid division, as we are getting
357 confused later by replacing all immediate uses x in such
358 a stmt. */
359 && gimple_assign_rhs1 (use_stmt) != def
360 && !stmt_can_throw_internal (cfun, use_stmt);
363 /* Return TRUE if USE_STMT is a multiplication of DEF by A. */
364 static inline bool
365 is_mult_by (gimple *use_stmt, tree def, tree a)
367 if (gimple_code (use_stmt) == GIMPLE_ASSIGN
368 && gimple_assign_rhs_code (use_stmt) == MULT_EXPR)
370 tree op0 = gimple_assign_rhs1 (use_stmt);
371 tree op1 = gimple_assign_rhs2 (use_stmt);
373 return (op0 == def && op1 == a)
374 || (op0 == a && op1 == def);
376 return 0;
379 /* Return whether USE_STMT is DEF * DEF. */
380 static inline bool
381 is_square_of (gimple *use_stmt, tree def)
383 return is_mult_by (use_stmt, def, def);
386 /* Return whether USE_STMT is a floating-point division by
387 DEF * DEF. */
388 static inline bool
389 is_division_by_square (gimple *use_stmt, tree def)
391 if (gimple_code (use_stmt) == GIMPLE_ASSIGN
392 && gimple_assign_rhs_code (use_stmt) == RDIV_EXPR
393 && gimple_assign_rhs1 (use_stmt) != gimple_assign_rhs2 (use_stmt)
394 && !stmt_can_throw_internal (cfun, use_stmt))
396 tree denominator = gimple_assign_rhs2 (use_stmt);
397 if (TREE_CODE (denominator) == SSA_NAME)
398 return is_square_of (SSA_NAME_DEF_STMT (denominator), def);
400 return 0;
403 /* Walk the subset of the dominator tree rooted at OCC, setting the
404 RECIP_DEF field to a definition of 1.0 / DEF that can be used in
405 the given basic block. The field may be left NULL, of course,
406 if it is not possible or profitable to do the optimization.
408 DEF_BSI is an iterator pointing at the statement defining DEF.
409 If RECIP_DEF is set, a dominator already has a computation that can
410 be used.
412 If should_insert_square_recip is set, then this also inserts
413 the square of the reciprocal immediately after the definition
414 of the reciprocal. */
416 static void
417 insert_reciprocals (gimple_stmt_iterator *def_gsi, struct occurrence *occ,
418 tree def, tree recip_def, tree square_recip_def,
419 int should_insert_square_recip, int threshold)
421 tree type;
422 gassign *new_stmt, *new_square_stmt;
423 gimple_stmt_iterator gsi;
424 struct occurrence *occ_child;
426 if (!recip_def
427 && (occ->bb_has_division || !flag_trapping_math)
428 /* Divide by two as all divisions are counted twice in
429 the costing loop. */
430 && occ->num_divisions / 2 >= threshold)
432 /* Make a variable with the replacement and substitute it. */
433 type = TREE_TYPE (def);
434 recip_def = create_tmp_reg (type, "reciptmp");
435 new_stmt = gimple_build_assign (recip_def, RDIV_EXPR,
436 build_one_cst (type), def);
438 if (should_insert_square_recip)
440 square_recip_def = create_tmp_reg (type, "powmult_reciptmp");
441 new_square_stmt = gimple_build_assign (square_recip_def, MULT_EXPR,
442 recip_def, recip_def);
445 if (occ->bb_has_division)
447 /* Case 1: insert before an existing division. */
448 gsi = gsi_after_labels (occ->bb);
449 while (!gsi_end_p (gsi)
450 && (!is_division_by (gsi_stmt (gsi), def))
451 && (!is_division_by_square (gsi_stmt (gsi), def)))
452 gsi_next (&gsi);
454 gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
455 if (should_insert_square_recip)
456 gsi_insert_before (&gsi, new_square_stmt, GSI_SAME_STMT);
458 else if (def_gsi && occ->bb == gsi_bb (*def_gsi))
460 /* Case 2: insert right after the definition. Note that this will
461 never happen if the definition statement can throw, because in
462 that case the sole successor of the statement's basic block will
463 dominate all the uses as well. */
464 gsi_insert_after (def_gsi, new_stmt, GSI_NEW_STMT);
465 if (should_insert_square_recip)
466 gsi_insert_after (def_gsi, new_square_stmt, GSI_NEW_STMT);
468 else
470 /* Case 3: insert in a basic block not containing defs/uses. */
471 gsi = gsi_after_labels (occ->bb);
472 gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
473 if (should_insert_square_recip)
474 gsi_insert_before (&gsi, new_square_stmt, GSI_SAME_STMT);
477 reciprocal_stats.rdivs_inserted++;
479 occ->recip_def_stmt = new_stmt;
482 occ->recip_def = recip_def;
483 occ->square_recip_def = square_recip_def;
484 for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
485 insert_reciprocals (def_gsi, occ_child, def, recip_def,
486 square_recip_def, should_insert_square_recip,
487 threshold);
490 /* Replace occurrences of expr / (x * x) with expr * ((1 / x) * (1 / x)).
491 Take as argument the use for (x * x). */
492 static inline void
493 replace_reciprocal_squares (use_operand_p use_p)
495 gimple *use_stmt = USE_STMT (use_p);
496 basic_block bb = gimple_bb (use_stmt);
497 struct occurrence *occ = (struct occurrence *) bb->aux;
499 if (optimize_bb_for_speed_p (bb) && occ->square_recip_def
500 && occ->recip_def)
502 gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
503 gimple_assign_set_rhs_code (use_stmt, MULT_EXPR);
504 gimple_assign_set_rhs2 (use_stmt, occ->square_recip_def);
505 SET_USE (use_p, occ->square_recip_def);
506 fold_stmt_inplace (&gsi);
507 update_stmt (use_stmt);
512 /* Replace the division at USE_P with a multiplication by the reciprocal, if
513 possible. */
515 static inline void
516 replace_reciprocal (use_operand_p use_p)
518 gimple *use_stmt = USE_STMT (use_p);
519 basic_block bb = gimple_bb (use_stmt);
520 struct occurrence *occ = (struct occurrence *) bb->aux;
522 if (optimize_bb_for_speed_p (bb)
523 && occ->recip_def && use_stmt != occ->recip_def_stmt)
525 gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
526 gimple_assign_set_rhs_code (use_stmt, MULT_EXPR);
527 SET_USE (use_p, occ->recip_def);
528 fold_stmt_inplace (&gsi);
529 update_stmt (use_stmt);
534 /* Free OCC and return one more "struct occurrence" to be freed. */
536 static struct occurrence *
537 free_bb (struct occurrence *occ)
539 struct occurrence *child, *next;
541 /* First get the two pointers hanging off OCC. */
542 next = occ->next;
543 child = occ->children;
544 delete occ;
546 /* Now ensure that we don't recurse unless it is necessary. */
547 if (!child)
548 return next;
549 else
551 while (next)
552 next = free_bb (next);
554 return child;
558 /* Transform sequences like
559 t = sqrt (a)
560 x = 1.0 / t;
561 r1 = x * x;
562 r2 = a * x;
563 into:
564 t = sqrt (a)
565 r1 = 1.0 / a;
566 r2 = t;
567 x = r1 * r2;
568 depending on the uses of x, r1, r2. This removes one multiplication and
569 allows the sqrt and division operations to execute in parallel.
570 DEF_GSI is the gsi of the initial division by sqrt that defines
571 DEF (x in the example above). */
573 static void
574 optimize_recip_sqrt (gimple_stmt_iterator *def_gsi, tree def)
576 gimple *use_stmt;
577 imm_use_iterator use_iter;
578 gimple *stmt = gsi_stmt (*def_gsi);
579 tree x = def;
580 tree orig_sqrt_ssa_name = gimple_assign_rhs2 (stmt);
581 tree div_rhs1 = gimple_assign_rhs1 (stmt);
583 if (TREE_CODE (orig_sqrt_ssa_name) != SSA_NAME
584 || TREE_CODE (div_rhs1) != REAL_CST
585 || !real_equal (&TREE_REAL_CST (div_rhs1), &dconst1))
586 return;
588 gcall *sqrt_stmt
589 = dyn_cast <gcall *> (SSA_NAME_DEF_STMT (orig_sqrt_ssa_name));
591 if (!sqrt_stmt || !gimple_call_lhs (sqrt_stmt))
592 return;
594 switch (gimple_call_combined_fn (sqrt_stmt))
596 CASE_CFN_SQRT:
597 CASE_CFN_SQRT_FN:
598 break;
600 default:
601 return;
603 tree a = gimple_call_arg (sqrt_stmt, 0);
605 /* We have 'a' and 'x'. Now analyze the uses of 'x'. */
607 /* Statements that use x in x * x. */
608 auto_vec<gimple *> sqr_stmts;
609 /* Statements that use x in a * x. */
610 auto_vec<gimple *> mult_stmts;
611 bool has_other_use = false;
612 bool mult_on_main_path = false;
614 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, x)
616 if (is_gimple_debug (use_stmt))
617 continue;
618 if (is_square_of (use_stmt, x))
620 sqr_stmts.safe_push (use_stmt);
621 if (gimple_bb (use_stmt) == gimple_bb (stmt))
622 mult_on_main_path = true;
624 else if (is_mult_by (use_stmt, x, a))
626 mult_stmts.safe_push (use_stmt);
627 if (gimple_bb (use_stmt) == gimple_bb (stmt))
628 mult_on_main_path = true;
630 else
631 has_other_use = true;
634 /* In the x * x and a * x cases we just rewire stmt operands or
635 remove multiplications. In the has_other_use case we introduce
636 a multiplication so make sure we don't introduce a multiplication
637 on a path where there was none. */
638 if (has_other_use && !mult_on_main_path)
639 return;
641 if (sqr_stmts.is_empty () && mult_stmts.is_empty ())
642 return;
644 /* If x = 1.0 / sqrt (a) has uses other than those optimized here we want
645 to be able to compose it from the sqr and mult cases. */
646 if (has_other_use && (sqr_stmts.is_empty () || mult_stmts.is_empty ()))
647 return;
649 if (dump_file)
651 fprintf (dump_file, "Optimizing reciprocal sqrt multiplications of\n");
652 print_gimple_stmt (dump_file, sqrt_stmt, 0, TDF_NONE);
653 print_gimple_stmt (dump_file, stmt, 0, TDF_NONE);
654 fprintf (dump_file, "\n");
657 bool delete_div = !has_other_use;
658 tree sqr_ssa_name = NULL_TREE;
659 if (!sqr_stmts.is_empty ())
661 /* r1 = x * x. Transform the original
662 x = 1.0 / t
663 into
664 tmp1 = 1.0 / a
665 r1 = tmp1. */
667 sqr_ssa_name
668 = make_temp_ssa_name (TREE_TYPE (a), NULL, "recip_sqrt_sqr");
670 if (dump_file)
672 fprintf (dump_file, "Replacing original division\n");
673 print_gimple_stmt (dump_file, stmt, 0, TDF_NONE);
674 fprintf (dump_file, "with new division\n");
676 stmt
677 = gimple_build_assign (sqr_ssa_name, gimple_assign_rhs_code (stmt),
678 gimple_assign_rhs1 (stmt), a);
679 gsi_insert_before (def_gsi, stmt, GSI_SAME_STMT);
680 gsi_remove (def_gsi, true);
681 *def_gsi = gsi_for_stmt (stmt);
682 fold_stmt_inplace (def_gsi);
683 update_stmt (stmt);
685 if (dump_file)
686 print_gimple_stmt (dump_file, stmt, 0, TDF_NONE);
688 delete_div = false;
689 gimple *sqr_stmt;
690 unsigned int i;
691 FOR_EACH_VEC_ELT (sqr_stmts, i, sqr_stmt)
693 gimple_stmt_iterator gsi2 = gsi_for_stmt (sqr_stmt);
694 gimple_assign_set_rhs_from_tree (&gsi2, sqr_ssa_name);
695 update_stmt (sqr_stmt);
698 if (!mult_stmts.is_empty ())
700 /* r2 = a * x. Transform this into:
701 r2 = t (The original sqrt (a)). */
702 unsigned int i;
703 gimple *mult_stmt = NULL;
704 FOR_EACH_VEC_ELT (mult_stmts, i, mult_stmt)
706 gimple_stmt_iterator gsi2 = gsi_for_stmt (mult_stmt);
708 if (dump_file)
710 fprintf (dump_file, "Replacing squaring multiplication\n");
711 print_gimple_stmt (dump_file, mult_stmt, 0, TDF_NONE);
712 fprintf (dump_file, "with assignment\n");
714 gimple_assign_set_rhs_from_tree (&gsi2, orig_sqrt_ssa_name);
715 fold_stmt_inplace (&gsi2);
716 update_stmt (mult_stmt);
717 if (dump_file)
718 print_gimple_stmt (dump_file, mult_stmt, 0, TDF_NONE);
722 if (has_other_use)
724 /* Using the two temporaries tmp1, tmp2 from above
725 the original x is now:
726 x = tmp1 * tmp2. */
727 gcc_assert (orig_sqrt_ssa_name);
728 gcc_assert (sqr_ssa_name);
730 gimple *new_stmt
731 = gimple_build_assign (x, MULT_EXPR,
732 orig_sqrt_ssa_name, sqr_ssa_name);
733 gsi_insert_after (def_gsi, new_stmt, GSI_NEW_STMT);
734 update_stmt (stmt);
736 else if (delete_div)
738 /* Remove the original division. */
739 gimple_stmt_iterator gsi2 = gsi_for_stmt (stmt);
740 gsi_remove (&gsi2, true);
741 release_defs (stmt);
743 else
744 release_ssa_name (x);
747 /* Look for floating-point divisions among DEF's uses, and try to
748 replace them by multiplications with the reciprocal. Add
749 as many statements computing the reciprocal as needed.
751 DEF must be a GIMPLE register of a floating-point type. */
753 static void
754 execute_cse_reciprocals_1 (gimple_stmt_iterator *def_gsi, tree def)
756 use_operand_p use_p, square_use_p;
757 imm_use_iterator use_iter, square_use_iter;
758 tree square_def;
759 struct occurrence *occ;
760 int count = 0;
761 int threshold;
762 int square_recip_count = 0;
763 int sqrt_recip_count = 0;
765 gcc_assert (FLOAT_TYPE_P (TREE_TYPE (def)) && TREE_CODE (def) == SSA_NAME);
766 threshold = targetm.min_divisions_for_recip_mul (TYPE_MODE (TREE_TYPE (def)));
768 /* If DEF is a square (x * x), count the number of divisions by x.
769 If there are more divisions by x than by (DEF * DEF), prefer to optimize
770 the reciprocal of x instead of DEF. This improves cases like:
771 def = x * x
772 t0 = a / def
773 t1 = b / def
774 t2 = c / x
775 Reciprocal optimization of x results in 1 division rather than 2 or 3. */
776 gimple *def_stmt = SSA_NAME_DEF_STMT (def);
778 if (is_gimple_assign (def_stmt)
779 && gimple_assign_rhs_code (def_stmt) == MULT_EXPR
780 && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME
781 && gimple_assign_rhs1 (def_stmt) == gimple_assign_rhs2 (def_stmt))
783 tree op0 = gimple_assign_rhs1 (def_stmt);
785 FOR_EACH_IMM_USE_FAST (use_p, use_iter, op0)
787 gimple *use_stmt = USE_STMT (use_p);
788 if (is_division_by (use_stmt, op0))
789 sqrt_recip_count++;
793 FOR_EACH_IMM_USE_FAST (use_p, use_iter, def)
795 gimple *use_stmt = USE_STMT (use_p);
796 if (is_division_by (use_stmt, def))
798 register_division_in (gimple_bb (use_stmt), 2);
799 count++;
802 if (is_square_of (use_stmt, def))
804 square_def = gimple_assign_lhs (use_stmt);
805 FOR_EACH_IMM_USE_FAST (square_use_p, square_use_iter, square_def)
807 gimple *square_use_stmt = USE_STMT (square_use_p);
808 if (is_division_by (square_use_stmt, square_def))
810 /* This is executed twice for each division by a square. */
811 register_division_in (gimple_bb (square_use_stmt), 1);
812 square_recip_count++;
818 /* Square reciprocals were counted twice above. */
819 square_recip_count /= 2;
821 /* If it is more profitable to optimize 1 / x, don't optimize 1 / (x * x). */
822 if (sqrt_recip_count > square_recip_count)
823 goto out;
825 /* Do the expensive part only if we can hope to optimize something. */
826 if (count + square_recip_count >= threshold && count >= 1)
828 gimple *use_stmt;
829 for (occ = occ_head; occ; occ = occ->next)
831 compute_merit (occ);
832 insert_reciprocals (def_gsi, occ, def, NULL, NULL,
833 square_recip_count, threshold);
836 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, def)
838 if (is_division_by (use_stmt, def))
840 FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter)
841 replace_reciprocal (use_p);
843 else if (square_recip_count > 0 && is_square_of (use_stmt, def))
845 FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter)
847 /* Find all uses of the square that are divisions and
848 * replace them by multiplications with the inverse. */
849 imm_use_iterator square_iterator;
850 gimple *powmult_use_stmt = USE_STMT (use_p);
851 tree powmult_def_name = gimple_assign_lhs (powmult_use_stmt);
853 FOR_EACH_IMM_USE_STMT (powmult_use_stmt,
854 square_iterator, powmult_def_name)
855 FOR_EACH_IMM_USE_ON_STMT (square_use_p, square_iterator)
857 gimple *powmult_use_stmt = USE_STMT (square_use_p);
858 if (is_division_by (powmult_use_stmt, powmult_def_name))
859 replace_reciprocal_squares (square_use_p);
866 out:
867 for (occ = occ_head; occ; )
868 occ = free_bb (occ);
870 occ_head = NULL;
873 /* Return an internal function that implements the reciprocal of CALL,
874 or IFN_LAST if there is no such function that the target supports. */
876 internal_fn
877 internal_fn_reciprocal (gcall *call)
879 internal_fn ifn;
881 switch (gimple_call_combined_fn (call))
883 CASE_CFN_SQRT:
884 CASE_CFN_SQRT_FN:
885 ifn = IFN_RSQRT;
886 break;
888 default:
889 return IFN_LAST;
892 tree_pair types = direct_internal_fn_types (ifn, call);
893 if (!direct_internal_fn_supported_p (ifn, types, OPTIMIZE_FOR_SPEED))
894 return IFN_LAST;
896 return ifn;
899 /* Go through all the floating-point SSA_NAMEs, and call
900 execute_cse_reciprocals_1 on each of them. */
901 namespace {
903 const pass_data pass_data_cse_reciprocals =
905 GIMPLE_PASS, /* type */
906 "recip", /* name */
907 OPTGROUP_NONE, /* optinfo_flags */
908 TV_TREE_RECIP, /* tv_id */
909 PROP_ssa, /* properties_required */
910 0, /* properties_provided */
911 0, /* properties_destroyed */
912 0, /* todo_flags_start */
913 TODO_update_ssa, /* todo_flags_finish */
916 class pass_cse_reciprocals : public gimple_opt_pass
918 public:
919 pass_cse_reciprocals (gcc::context *ctxt)
920 : gimple_opt_pass (pass_data_cse_reciprocals, ctxt)
923 /* opt_pass methods: */
924 bool gate (function *) final override
926 return optimize && flag_reciprocal_math;
928 unsigned int execute (function *) final override;
930 }; // class pass_cse_reciprocals
932 unsigned int
933 pass_cse_reciprocals::execute (function *fun)
935 basic_block bb;
936 tree arg;
938 occ_pool = new object_allocator<occurrence> ("dominators for recip");
940 memset (&reciprocal_stats, 0, sizeof (reciprocal_stats));
941 calculate_dominance_info (CDI_DOMINATORS);
942 calculate_dominance_info (CDI_POST_DOMINATORS);
944 if (flag_checking)
945 FOR_EACH_BB_FN (bb, fun)
946 gcc_assert (!bb->aux);
948 for (arg = DECL_ARGUMENTS (fun->decl); arg; arg = DECL_CHAIN (arg))
949 if (FLOAT_TYPE_P (TREE_TYPE (arg))
950 && is_gimple_reg (arg))
952 tree name = ssa_default_def (fun, arg);
953 if (name)
954 execute_cse_reciprocals_1 (NULL, name);
957 FOR_EACH_BB_FN (bb, fun)
959 tree def;
961 for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
962 gsi_next (&gsi))
964 gphi *phi = gsi.phi ();
965 def = PHI_RESULT (phi);
966 if (! virtual_operand_p (def)
967 && FLOAT_TYPE_P (TREE_TYPE (def)))
968 execute_cse_reciprocals_1 (NULL, def);
971 for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (gsi);
972 gsi_next (&gsi))
974 gimple *stmt = gsi_stmt (gsi);
976 if (gimple_has_lhs (stmt)
977 && (def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF)) != NULL
978 && FLOAT_TYPE_P (TREE_TYPE (def))
979 && TREE_CODE (def) == SSA_NAME)
981 execute_cse_reciprocals_1 (&gsi, def);
982 stmt = gsi_stmt (gsi);
983 if (flag_unsafe_math_optimizations
984 && is_gimple_assign (stmt)
985 && gimple_assign_lhs (stmt) == def
986 && !stmt_can_throw_internal (cfun, stmt)
987 && gimple_assign_rhs_code (stmt) == RDIV_EXPR)
988 optimize_recip_sqrt (&gsi, def);
992 if (optimize_bb_for_size_p (bb))
993 continue;
995 /* Scan for a/func(b) and convert it to reciprocal a*rfunc(b). */
996 for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (gsi);
997 gsi_next (&gsi))
999 gimple *stmt = gsi_stmt (gsi);
1001 if (is_gimple_assign (stmt)
1002 && gimple_assign_rhs_code (stmt) == RDIV_EXPR)
1004 tree arg1 = gimple_assign_rhs2 (stmt);
1005 gimple *stmt1;
1007 if (TREE_CODE (arg1) != SSA_NAME)
1008 continue;
1010 stmt1 = SSA_NAME_DEF_STMT (arg1);
1012 if (is_gimple_call (stmt1)
1013 && gimple_call_lhs (stmt1))
1015 bool fail;
1016 imm_use_iterator ui;
1017 use_operand_p use_p;
1018 tree fndecl = NULL_TREE;
1020 gcall *call = as_a <gcall *> (stmt1);
1021 internal_fn ifn = internal_fn_reciprocal (call);
1022 if (ifn == IFN_LAST)
1024 fndecl = gimple_call_fndecl (call);
1025 if (!fndecl
1026 || !fndecl_built_in_p (fndecl, BUILT_IN_MD))
1027 continue;
1028 fndecl = targetm.builtin_reciprocal (fndecl);
1029 if (!fndecl)
1030 continue;
1033 /* Check that all uses of the SSA name are divisions,
1034 otherwise replacing the defining statement will do
1035 the wrong thing. */
1036 fail = false;
1037 FOR_EACH_IMM_USE_FAST (use_p, ui, arg1)
1039 gimple *stmt2 = USE_STMT (use_p);
1040 if (is_gimple_debug (stmt2))
1041 continue;
1042 if (!is_gimple_assign (stmt2)
1043 || gimple_assign_rhs_code (stmt2) != RDIV_EXPR
1044 || gimple_assign_rhs1 (stmt2) == arg1
1045 || gimple_assign_rhs2 (stmt2) != arg1)
1047 fail = true;
1048 break;
1051 if (fail)
1052 continue;
1054 gimple_replace_ssa_lhs (call, arg1);
1055 if (gimple_call_internal_p (call) != (ifn != IFN_LAST))
1057 auto_vec<tree, 4> args;
1058 for (unsigned int i = 0;
1059 i < gimple_call_num_args (call); i++)
1060 args.safe_push (gimple_call_arg (call, i));
1061 gcall *stmt2;
1062 if (ifn == IFN_LAST)
1063 stmt2 = gimple_build_call_vec (fndecl, args);
1064 else
1065 stmt2 = gimple_build_call_internal_vec (ifn, args);
1066 gimple_call_set_lhs (stmt2, arg1);
1067 gimple_move_vops (stmt2, call);
1068 gimple_call_set_nothrow (stmt2,
1069 gimple_call_nothrow_p (call));
1070 gimple_stmt_iterator gsi2 = gsi_for_stmt (call);
1071 gsi_replace (&gsi2, stmt2, true);
1073 else
1075 if (ifn == IFN_LAST)
1076 gimple_call_set_fndecl (call, fndecl);
1077 else
1078 gimple_call_set_internal_fn (call, ifn);
1079 update_stmt (call);
1081 reciprocal_stats.rfuncs_inserted++;
1083 FOR_EACH_IMM_USE_STMT (stmt, ui, arg1)
1085 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1086 gimple_assign_set_rhs_code (stmt, MULT_EXPR);
1087 fold_stmt_inplace (&gsi);
1088 update_stmt (stmt);
1095 statistics_counter_event (fun, "reciprocal divs inserted",
1096 reciprocal_stats.rdivs_inserted);
1097 statistics_counter_event (fun, "reciprocal functions inserted",
1098 reciprocal_stats.rfuncs_inserted);
1100 free_dominance_info (CDI_DOMINATORS);
1101 free_dominance_info (CDI_POST_DOMINATORS);
1102 delete occ_pool;
1103 return 0;
1106 } // anon namespace
1108 gimple_opt_pass *
1109 make_pass_cse_reciprocals (gcc::context *ctxt)
1111 return new pass_cse_reciprocals (ctxt);
1114 /* If NAME is the result of a type conversion, look for other
1115 equivalent dominating or dominated conversions, and replace all
1116 uses with the earliest dominating name, removing the redundant
1117 conversions. Return the prevailing name. */
1119 static tree
1120 execute_cse_conv_1 (tree name, bool *cfg_changed)
1122 if (SSA_NAME_IS_DEFAULT_DEF (name)
1123 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (name))
1124 return name;
1126 gimple *def_stmt = SSA_NAME_DEF_STMT (name);
1128 if (!gimple_assign_cast_p (def_stmt))
1129 return name;
1131 tree src = gimple_assign_rhs1 (def_stmt);
1133 if (TREE_CODE (src) != SSA_NAME)
1134 return name;
1136 imm_use_iterator use_iter;
1137 gimple *use_stmt;
1139 /* Find the earliest dominating def. */
1140 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, src)
1142 if (use_stmt == def_stmt
1143 || !gimple_assign_cast_p (use_stmt))
1144 continue;
1146 tree lhs = gimple_assign_lhs (use_stmt);
1148 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs)
1149 || (gimple_assign_rhs1 (use_stmt)
1150 != gimple_assign_rhs1 (def_stmt))
1151 || !types_compatible_p (TREE_TYPE (name), TREE_TYPE (lhs)))
1152 continue;
1154 bool use_dominates;
1155 if (gimple_bb (def_stmt) == gimple_bb (use_stmt))
1157 gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
1158 while (!gsi_end_p (gsi) && gsi_stmt (gsi) != def_stmt)
1159 gsi_next (&gsi);
1160 use_dominates = !gsi_end_p (gsi);
1162 else if (dominated_by_p (CDI_DOMINATORS, gimple_bb (use_stmt),
1163 gimple_bb (def_stmt)))
1164 use_dominates = false;
1165 else if (dominated_by_p (CDI_DOMINATORS, gimple_bb (def_stmt),
1166 gimple_bb (use_stmt)))
1167 use_dominates = true;
1168 else
1169 continue;
1171 if (use_dominates)
1173 std::swap (name, lhs);
1174 std::swap (def_stmt, use_stmt);
1178 /* Now go through all uses of SRC again, replacing the equivalent
1179 dominated conversions. We may replace defs that were not
1180 dominated by the then-prevailing defs when we first visited
1181 them. */
1182 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, src)
1184 if (use_stmt == def_stmt
1185 || !gimple_assign_cast_p (use_stmt))
1186 continue;
1188 tree lhs = gimple_assign_lhs (use_stmt);
1190 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs)
1191 || (gimple_assign_rhs1 (use_stmt)
1192 != gimple_assign_rhs1 (def_stmt))
1193 || !types_compatible_p (TREE_TYPE (name), TREE_TYPE (lhs)))
1194 continue;
1196 basic_block use_bb = gimple_bb (use_stmt);
1197 if (gimple_bb (def_stmt) == use_bb
1198 || dominated_by_p (CDI_DOMINATORS, use_bb, gimple_bb (def_stmt)))
1200 sincos_stats.conv_removed++;
1202 gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
1203 replace_uses_by (lhs, name);
1204 if (gsi_remove (&gsi, true)
1205 && gimple_purge_dead_eh_edges (use_bb))
1206 *cfg_changed = true;
1207 release_defs (use_stmt);
1211 return name;
1214 /* Records an occurrence at statement USE_STMT in the vector of trees
1215 STMTS if it is dominated by *TOP_BB or dominates it or this basic block
1216 is not yet initialized. Returns true if the occurrence was pushed on
1217 the vector. Adjusts *TOP_BB to be the basic block dominating all
1218 statements in the vector. */
1220 static bool
1221 maybe_record_sincos (vec<gimple *> *stmts,
1222 basic_block *top_bb, gimple *use_stmt)
1224 basic_block use_bb = gimple_bb (use_stmt);
1225 if (*top_bb
1226 && (*top_bb == use_bb
1227 || dominated_by_p (CDI_DOMINATORS, use_bb, *top_bb)))
1228 stmts->safe_push (use_stmt);
1229 else if (!*top_bb
1230 || dominated_by_p (CDI_DOMINATORS, *top_bb, use_bb))
1232 stmts->safe_push (use_stmt);
1233 *top_bb = use_bb;
1235 else
1236 return false;
1238 return true;
1241 /* Look for sin, cos and cexpi calls with the same argument NAME and
1242 create a single call to cexpi CSEing the result in this case.
1243 We first walk over all immediate uses of the argument collecting
1244 statements that we can CSE in a vector and in a second pass replace
1245 the statement rhs with a REALPART or IMAGPART expression on the
1246 result of the cexpi call we insert before the use statement that
1247 dominates all other candidates. */
1249 static bool
1250 execute_cse_sincos_1 (tree name)
1252 gimple_stmt_iterator gsi;
1253 imm_use_iterator use_iter;
1254 tree fndecl, res, type = NULL_TREE;
1255 gimple *def_stmt, *use_stmt, *stmt;
1256 int seen_cos = 0, seen_sin = 0, seen_cexpi = 0;
1257 auto_vec<gimple *> stmts;
1258 basic_block top_bb = NULL;
1259 int i;
1260 bool cfg_changed = false;
1262 name = execute_cse_conv_1 (name, &cfg_changed);
1264 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, name)
1266 if (gimple_code (use_stmt) != GIMPLE_CALL
1267 || !gimple_call_lhs (use_stmt))
1268 continue;
1270 switch (gimple_call_combined_fn (use_stmt))
1272 CASE_CFN_COS:
1273 seen_cos |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
1274 break;
1276 CASE_CFN_SIN:
1277 seen_sin |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
1278 break;
1280 CASE_CFN_CEXPI:
1281 seen_cexpi |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
1282 break;
1284 default:;
1285 continue;
1288 tree t = mathfn_built_in_type (gimple_call_combined_fn (use_stmt));
1289 if (!type)
1291 type = t;
1292 t = TREE_TYPE (name);
1294 /* This checks that NAME has the right type in the first round,
1295 and, in subsequent rounds, that the built_in type is the same
1296 type, or a compatible type. */
1297 if (type != t && !types_compatible_p (type, t))
1298 return false;
1300 if (seen_cos + seen_sin + seen_cexpi <= 1)
1301 return false;
1303 /* Simply insert cexpi at the beginning of top_bb but not earlier than
1304 the name def statement. */
1305 fndecl = mathfn_built_in (type, BUILT_IN_CEXPI);
1306 if (!fndecl)
1307 return false;
1308 stmt = gimple_build_call (fndecl, 1, name);
1309 res = make_temp_ssa_name (TREE_TYPE (TREE_TYPE (fndecl)), stmt, "sincostmp");
1310 gimple_call_set_lhs (stmt, res);
1312 def_stmt = SSA_NAME_DEF_STMT (name);
1313 if (!SSA_NAME_IS_DEFAULT_DEF (name)
1314 && gimple_code (def_stmt) != GIMPLE_PHI
1315 && gimple_bb (def_stmt) == top_bb)
1317 gsi = gsi_for_stmt (def_stmt);
1318 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
1320 else
1322 gsi = gsi_after_labels (top_bb);
1323 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1325 sincos_stats.inserted++;
1327 /* And adjust the recorded old call sites. */
1328 for (i = 0; stmts.iterate (i, &use_stmt); ++i)
1330 tree rhs = NULL;
1332 switch (gimple_call_combined_fn (use_stmt))
1334 CASE_CFN_COS:
1335 rhs = fold_build1 (REALPART_EXPR, type, res);
1336 break;
1338 CASE_CFN_SIN:
1339 rhs = fold_build1 (IMAGPART_EXPR, type, res);
1340 break;
1342 CASE_CFN_CEXPI:
1343 rhs = res;
1344 break;
1346 default:;
1347 gcc_unreachable ();
1350 /* Replace call with a copy. */
1351 stmt = gimple_build_assign (gimple_call_lhs (use_stmt), rhs);
1353 gsi = gsi_for_stmt (use_stmt);
1354 gsi_replace (&gsi, stmt, true);
1355 if (gimple_purge_dead_eh_edges (gimple_bb (stmt)))
1356 cfg_changed = true;
1359 return cfg_changed;
1362 /* To evaluate powi(x,n), the floating point value x raised to the
1363 constant integer exponent n, we use a hybrid algorithm that
1364 combines the "window method" with look-up tables. For an
1365 introduction to exponentiation algorithms and "addition chains",
1366 see section 4.6.3, "Evaluation of Powers" of Donald E. Knuth,
1367 "Seminumerical Algorithms", Vol. 2, "The Art of Computer Programming",
1368 3rd Edition, 1998, and Daniel M. Gordon, "A Survey of Fast Exponentiation
1369 Methods", Journal of Algorithms, Vol. 27, pp. 129-146, 1998. */
1371 /* Provide a default value for POWI_MAX_MULTS, the maximum number of
1372 multiplications to inline before calling the system library's pow
1373 function. powi(x,n) requires at worst 2*bits(n)-2 multiplications,
1374 so this default never requires calling pow, powf or powl. */
1376 #ifndef POWI_MAX_MULTS
1377 #define POWI_MAX_MULTS (2*HOST_BITS_PER_WIDE_INT-2)
1378 #endif
1380 /* The size of the "optimal power tree" lookup table. All
1381 exponents less than this value are simply looked up in the
1382 powi_table below. This threshold is also used to size the
1383 cache of pseudo registers that hold intermediate results. */
1384 #define POWI_TABLE_SIZE 256
1386 /* The size, in bits of the window, used in the "window method"
1387 exponentiation algorithm. This is equivalent to a radix of
1388 (1<<POWI_WINDOW_SIZE) in the corresponding "m-ary method". */
1389 #define POWI_WINDOW_SIZE 3
1391 /* The following table is an efficient representation of an
1392 "optimal power tree". For each value, i, the corresponding
1393 value, j, in the table states than an optimal evaluation
1394 sequence for calculating pow(x,i) can be found by evaluating
1395 pow(x,j)*pow(x,i-j). An optimal power tree for the first
1396 100 integers is given in Knuth's "Seminumerical algorithms". */
1398 static const unsigned char powi_table[POWI_TABLE_SIZE] =
1400 0, 1, 1, 2, 2, 3, 3, 4, /* 0 - 7 */
1401 4, 6, 5, 6, 6, 10, 7, 9, /* 8 - 15 */
1402 8, 16, 9, 16, 10, 12, 11, 13, /* 16 - 23 */
1403 12, 17, 13, 18, 14, 24, 15, 26, /* 24 - 31 */
1404 16, 17, 17, 19, 18, 33, 19, 26, /* 32 - 39 */
1405 20, 25, 21, 40, 22, 27, 23, 44, /* 40 - 47 */
1406 24, 32, 25, 34, 26, 29, 27, 44, /* 48 - 55 */
1407 28, 31, 29, 34, 30, 60, 31, 36, /* 56 - 63 */
1408 32, 64, 33, 34, 34, 46, 35, 37, /* 64 - 71 */
1409 36, 65, 37, 50, 38, 48, 39, 69, /* 72 - 79 */
1410 40, 49, 41, 43, 42, 51, 43, 58, /* 80 - 87 */
1411 44, 64, 45, 47, 46, 59, 47, 76, /* 88 - 95 */
1412 48, 65, 49, 66, 50, 67, 51, 66, /* 96 - 103 */
1413 52, 70, 53, 74, 54, 104, 55, 74, /* 104 - 111 */
1414 56, 64, 57, 69, 58, 78, 59, 68, /* 112 - 119 */
1415 60, 61, 61, 80, 62, 75, 63, 68, /* 120 - 127 */
1416 64, 65, 65, 128, 66, 129, 67, 90, /* 128 - 135 */
1417 68, 73, 69, 131, 70, 94, 71, 88, /* 136 - 143 */
1418 72, 128, 73, 98, 74, 132, 75, 121, /* 144 - 151 */
1419 76, 102, 77, 124, 78, 132, 79, 106, /* 152 - 159 */
1420 80, 97, 81, 160, 82, 99, 83, 134, /* 160 - 167 */
1421 84, 86, 85, 95, 86, 160, 87, 100, /* 168 - 175 */
1422 88, 113, 89, 98, 90, 107, 91, 122, /* 176 - 183 */
1423 92, 111, 93, 102, 94, 126, 95, 150, /* 184 - 191 */
1424 96, 128, 97, 130, 98, 133, 99, 195, /* 192 - 199 */
1425 100, 128, 101, 123, 102, 164, 103, 138, /* 200 - 207 */
1426 104, 145, 105, 146, 106, 109, 107, 149, /* 208 - 215 */
1427 108, 200, 109, 146, 110, 170, 111, 157, /* 216 - 223 */
1428 112, 128, 113, 130, 114, 182, 115, 132, /* 224 - 231 */
1429 116, 200, 117, 132, 118, 158, 119, 206, /* 232 - 239 */
1430 120, 240, 121, 162, 122, 147, 123, 152, /* 240 - 247 */
1431 124, 166, 125, 214, 126, 138, 127, 153, /* 248 - 255 */
1435 /* Return the number of multiplications required to calculate
1436 powi(x,n) where n is less than POWI_TABLE_SIZE. This is a
1437 subroutine of powi_cost. CACHE is an array indicating
1438 which exponents have already been calculated. */
1440 static int
1441 powi_lookup_cost (unsigned HOST_WIDE_INT n, bool *cache)
1443 /* If we've already calculated this exponent, then this evaluation
1444 doesn't require any additional multiplications. */
1445 if (cache[n])
1446 return 0;
1448 cache[n] = true;
1449 return powi_lookup_cost (n - powi_table[n], cache)
1450 + powi_lookup_cost (powi_table[n], cache) + 1;
1453 /* Return the number of multiplications required to calculate
1454 powi(x,n) for an arbitrary x, given the exponent N. This
1455 function needs to be kept in sync with powi_as_mults below. */
1457 static int
1458 powi_cost (HOST_WIDE_INT n)
1460 bool cache[POWI_TABLE_SIZE];
1461 unsigned HOST_WIDE_INT digit;
1462 unsigned HOST_WIDE_INT val;
1463 int result;
1465 if (n == 0)
1466 return 0;
1468 /* Ignore the reciprocal when calculating the cost. */
1469 val = absu_hwi (n);
1471 /* Initialize the exponent cache. */
1472 memset (cache, 0, POWI_TABLE_SIZE * sizeof (bool));
1473 cache[1] = true;
1475 result = 0;
1477 while (val >= POWI_TABLE_SIZE)
1479 if (val & 1)
1481 digit = val & ((1 << POWI_WINDOW_SIZE) - 1);
1482 result += powi_lookup_cost (digit, cache)
1483 + POWI_WINDOW_SIZE + 1;
1484 val >>= POWI_WINDOW_SIZE;
1486 else
1488 val >>= 1;
1489 result++;
1493 return result + powi_lookup_cost (val, cache);
1496 /* Recursive subroutine of powi_as_mults. This function takes the
1497 array, CACHE, of already calculated exponents and an exponent N and
1498 returns a tree that corresponds to CACHE[1]**N, with type TYPE. */
1500 static tree
1501 powi_as_mults_1 (gimple_stmt_iterator *gsi, location_t loc, tree type,
1502 unsigned HOST_WIDE_INT n, tree *cache)
1504 tree op0, op1, ssa_target;
1505 unsigned HOST_WIDE_INT digit;
1506 gassign *mult_stmt;
1508 if (n < POWI_TABLE_SIZE && cache[n])
1509 return cache[n];
1511 ssa_target = make_temp_ssa_name (type, NULL, "powmult");
1513 if (n < POWI_TABLE_SIZE)
1515 cache[n] = ssa_target;
1516 op0 = powi_as_mults_1 (gsi, loc, type, n - powi_table[n], cache);
1517 op1 = powi_as_mults_1 (gsi, loc, type, powi_table[n], cache);
1519 else if (n & 1)
1521 digit = n & ((1 << POWI_WINDOW_SIZE) - 1);
1522 op0 = powi_as_mults_1 (gsi, loc, type, n - digit, cache);
1523 op1 = powi_as_mults_1 (gsi, loc, type, digit, cache);
1525 else
1527 op0 = powi_as_mults_1 (gsi, loc, type, n >> 1, cache);
1528 op1 = op0;
1531 mult_stmt = gimple_build_assign (ssa_target, MULT_EXPR, op0, op1);
1532 gimple_set_location (mult_stmt, loc);
1533 gsi_insert_before (gsi, mult_stmt, GSI_SAME_STMT);
1535 return ssa_target;
1538 /* Convert ARG0**N to a tree of multiplications of ARG0 with itself.
1539 This function needs to be kept in sync with powi_cost above. */
1541 tree
1542 powi_as_mults (gimple_stmt_iterator *gsi, location_t loc,
1543 tree arg0, HOST_WIDE_INT n)
1545 tree cache[POWI_TABLE_SIZE], result, type = TREE_TYPE (arg0);
1546 gassign *div_stmt;
1547 tree target;
1549 if (n == 0)
1550 return build_one_cst (type);
1552 memset (cache, 0, sizeof (cache));
1553 cache[1] = arg0;
1555 result = powi_as_mults_1 (gsi, loc, type, absu_hwi (n), cache);
1556 if (n >= 0)
1557 return result;
1559 /* If the original exponent was negative, reciprocate the result. */
1560 target = make_temp_ssa_name (type, NULL, "powmult");
1561 div_stmt = gimple_build_assign (target, RDIV_EXPR,
1562 build_real (type, dconst1), result);
1563 gimple_set_location (div_stmt, loc);
1564 gsi_insert_before (gsi, div_stmt, GSI_SAME_STMT);
1566 return target;
1569 /* ARG0 and N are the two arguments to a powi builtin in GSI with
1570 location info LOC. If the arguments are appropriate, create an
1571 equivalent sequence of statements prior to GSI using an optimal
1572 number of multiplications, and return an expession holding the
1573 result. */
1575 static tree
1576 gimple_expand_builtin_powi (gimple_stmt_iterator *gsi, location_t loc,
1577 tree arg0, HOST_WIDE_INT n)
1579 if ((n >= -1 && n <= 2)
1580 || (optimize_function_for_speed_p (cfun)
1581 && powi_cost (n) <= POWI_MAX_MULTS))
1582 return powi_as_mults (gsi, loc, arg0, n);
1584 return NULL_TREE;
1587 /* Build a gimple call statement that calls FN with argument ARG.
1588 Set the lhs of the call statement to a fresh SSA name. Insert the
1589 statement prior to GSI's current position, and return the fresh
1590 SSA name. */
1592 static tree
1593 build_and_insert_call (gimple_stmt_iterator *gsi, location_t loc,
1594 tree fn, tree arg)
1596 gcall *call_stmt;
1597 tree ssa_target;
1599 call_stmt = gimple_build_call (fn, 1, arg);
1600 ssa_target = make_temp_ssa_name (TREE_TYPE (arg), NULL, "powroot");
1601 gimple_set_lhs (call_stmt, ssa_target);
1602 gimple_set_location (call_stmt, loc);
1603 gsi_insert_before (gsi, call_stmt, GSI_SAME_STMT);
1605 return ssa_target;
1608 /* Build a gimple binary operation with the given CODE and arguments
1609 ARG0, ARG1, assigning the result to a new SSA name for variable
1610 TARGET. Insert the statement prior to GSI's current position, and
1611 return the fresh SSA name.*/
1613 static tree
1614 build_and_insert_binop (gimple_stmt_iterator *gsi, location_t loc,
1615 const char *name, enum tree_code code,
1616 tree arg0, tree arg1)
1618 tree result = make_temp_ssa_name (TREE_TYPE (arg0), NULL, name);
1619 gassign *stmt = gimple_build_assign (result, code, arg0, arg1);
1620 gimple_set_location (stmt, loc);
1621 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1622 return result;
1625 /* Build a gimple reference operation with the given CODE and argument
1626 ARG, assigning the result to a new SSA name of TYPE with NAME.
1627 Insert the statement prior to GSI's current position, and return
1628 the fresh SSA name. */
1630 static inline tree
1631 build_and_insert_ref (gimple_stmt_iterator *gsi, location_t loc, tree type,
1632 const char *name, enum tree_code code, tree arg0)
1634 tree result = make_temp_ssa_name (type, NULL, name);
1635 gimple *stmt = gimple_build_assign (result, build1 (code, type, arg0));
1636 gimple_set_location (stmt, loc);
1637 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1638 return result;
1641 /* Build a gimple assignment to cast VAL to TYPE. Insert the statement
1642 prior to GSI's current position, and return the fresh SSA name. */
1644 static tree
1645 build_and_insert_cast (gimple_stmt_iterator *gsi, location_t loc,
1646 tree type, tree val)
1648 tree result = make_ssa_name (type);
1649 gassign *stmt = gimple_build_assign (result, NOP_EXPR, val);
1650 gimple_set_location (stmt, loc);
1651 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1652 return result;
1655 struct pow_synth_sqrt_info
1657 bool *factors;
1658 unsigned int deepest;
1659 unsigned int num_mults;
1662 /* Return true iff the real value C can be represented as a
1663 sum of powers of 0.5 up to N. That is:
1664 C == SUM<i from 1..N> (a[i]*(0.5**i)) where a[i] is either 0 or 1.
1665 Record in INFO the various parameters of the synthesis algorithm such
1666 as the factors a[i], the maximum 0.5 power and the number of
1667 multiplications that will be required. */
1669 bool
1670 representable_as_half_series_p (REAL_VALUE_TYPE c, unsigned n,
1671 struct pow_synth_sqrt_info *info)
1673 REAL_VALUE_TYPE factor = dconsthalf;
1674 REAL_VALUE_TYPE remainder = c;
1676 info->deepest = 0;
1677 info->num_mults = 0;
1678 memset (info->factors, 0, n * sizeof (bool));
1680 for (unsigned i = 0; i < n; i++)
1682 REAL_VALUE_TYPE res;
1684 /* If something inexact happened bail out now. */
1685 if (real_arithmetic (&res, MINUS_EXPR, &remainder, &factor))
1686 return false;
1688 /* We have hit zero. The number is representable as a sum
1689 of powers of 0.5. */
1690 if (real_equal (&res, &dconst0))
1692 info->factors[i] = true;
1693 info->deepest = i + 1;
1694 return true;
1696 else if (!REAL_VALUE_NEGATIVE (res))
1698 remainder = res;
1699 info->factors[i] = true;
1700 info->num_mults++;
1702 else
1703 info->factors[i] = false;
1705 real_arithmetic (&factor, MULT_EXPR, &factor, &dconsthalf);
1707 return false;
1710 /* Return the tree corresponding to FN being applied
1711 to ARG N times at GSI and LOC.
1712 Look up previous results from CACHE if need be.
1713 cache[0] should contain just plain ARG i.e. FN applied to ARG 0 times. */
1715 static tree
1716 get_fn_chain (tree arg, unsigned int n, gimple_stmt_iterator *gsi,
1717 tree fn, location_t loc, tree *cache)
1719 tree res = cache[n];
1720 if (!res)
1722 tree prev = get_fn_chain (arg, n - 1, gsi, fn, loc, cache);
1723 res = build_and_insert_call (gsi, loc, fn, prev);
1724 cache[n] = res;
1727 return res;
1730 /* Print to STREAM the repeated application of function FNAME to ARG
1731 N times. So, for FNAME = "foo", ARG = "x", N = 2 it would print:
1732 "foo (foo (x))". */
1734 static void
1735 print_nested_fn (FILE* stream, const char *fname, const char* arg,
1736 unsigned int n)
1738 if (n == 0)
1739 fprintf (stream, "%s", arg);
1740 else
1742 fprintf (stream, "%s (", fname);
1743 print_nested_fn (stream, fname, arg, n - 1);
1744 fprintf (stream, ")");
1748 /* Print to STREAM the fractional sequence of sqrt chains
1749 applied to ARG, described by INFO. Used for the dump file. */
1751 static void
1752 dump_fractional_sqrt_sequence (FILE *stream, const char *arg,
1753 struct pow_synth_sqrt_info *info)
1755 for (unsigned int i = 0; i < info->deepest; i++)
1757 bool is_set = info->factors[i];
1758 if (is_set)
1760 print_nested_fn (stream, "sqrt", arg, i + 1);
1761 if (i != info->deepest - 1)
1762 fprintf (stream, " * ");
1767 /* Print to STREAM a representation of raising ARG to an integer
1768 power N. Used for the dump file. */
1770 static void
1771 dump_integer_part (FILE *stream, const char* arg, HOST_WIDE_INT n)
1773 if (n > 1)
1774 fprintf (stream, "powi (%s, " HOST_WIDE_INT_PRINT_DEC ")", arg, n);
1775 else if (n == 1)
1776 fprintf (stream, "%s", arg);
1779 /* Attempt to synthesize a POW[F] (ARG0, ARG1) call using chains of
1780 square roots. Place at GSI and LOC. Limit the maximum depth
1781 of the sqrt chains to MAX_DEPTH. Return the tree holding the
1782 result of the expanded sequence or NULL_TREE if the expansion failed.
1784 This routine assumes that ARG1 is a real number with a fractional part
1785 (the integer exponent case will have been handled earlier in
1786 gimple_expand_builtin_pow).
1788 For ARG1 > 0.0:
1789 * For ARG1 composed of a whole part WHOLE_PART and a fractional part
1790 FRAC_PART i.e. WHOLE_PART == floor (ARG1) and
1791 FRAC_PART == ARG1 - WHOLE_PART:
1792 Produce POWI (ARG0, WHOLE_PART) * POW (ARG0, FRAC_PART) where
1793 POW (ARG0, FRAC_PART) is expanded as a product of square root chains
1794 if it can be expressed as such, that is if FRAC_PART satisfies:
1795 FRAC_PART == <SUM from i = 1 until MAX_DEPTH> (a[i] * (0.5**i))
1796 where integer a[i] is either 0 or 1.
1798 Example:
1799 POW (x, 3.625) == POWI (x, 3) * POW (x, 0.625)
1800 --> POWI (x, 3) * SQRT (x) * SQRT (SQRT (SQRT (x)))
1802 For ARG1 < 0.0 there are two approaches:
1803 * (A) Expand to 1.0 / POW (ARG0, -ARG1) where POW (ARG0, -ARG1)
1804 is calculated as above.
1806 Example:
1807 POW (x, -5.625) == 1.0 / POW (x, 5.625)
1808 --> 1.0 / (POWI (x, 5) * SQRT (x) * SQRT (SQRT (SQRT (x))))
1810 * (B) : WHOLE_PART := - ceil (abs (ARG1))
1811 FRAC_PART := ARG1 - WHOLE_PART
1812 and expand to POW (x, FRAC_PART) / POWI (x, WHOLE_PART).
1813 Example:
1814 POW (x, -5.875) == POW (x, 0.125) / POWI (X, 6)
1815 --> SQRT (SQRT (SQRT (x))) / (POWI (x, 6))
1817 For ARG1 < 0.0 we choose between (A) and (B) depending on
1818 how many multiplications we'd have to do.
1819 So, for the example in (B): POW (x, -5.875), if we were to
1820 follow algorithm (A) we would produce:
1821 1.0 / POWI (X, 5) * SQRT (X) * SQRT (SQRT (X)) * SQRT (SQRT (SQRT (X)))
1822 which contains more multiplications than approach (B).
1824 Hopefully, this approach will eliminate potentially expensive POW library
1825 calls when unsafe floating point math is enabled and allow the compiler to
1826 further optimise the multiplies, square roots and divides produced by this
1827 function. */
1829 static tree
1830 expand_pow_as_sqrts (gimple_stmt_iterator *gsi, location_t loc,
1831 tree arg0, tree arg1, HOST_WIDE_INT max_depth)
1833 tree type = TREE_TYPE (arg0);
1834 machine_mode mode = TYPE_MODE (type);
1835 tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
1836 bool one_over = true;
1838 if (!sqrtfn)
1839 return NULL_TREE;
1841 if (TREE_CODE (arg1) != REAL_CST)
1842 return NULL_TREE;
1844 REAL_VALUE_TYPE exp_init = TREE_REAL_CST (arg1);
1846 gcc_assert (max_depth > 0);
1847 tree *cache = XALLOCAVEC (tree, max_depth + 1);
1849 struct pow_synth_sqrt_info synth_info;
1850 synth_info.factors = XALLOCAVEC (bool, max_depth + 1);
1851 synth_info.deepest = 0;
1852 synth_info.num_mults = 0;
1854 bool neg_exp = REAL_VALUE_NEGATIVE (exp_init);
1855 REAL_VALUE_TYPE exp = real_value_abs (&exp_init);
1857 /* The whole and fractional parts of exp. */
1858 REAL_VALUE_TYPE whole_part;
1859 REAL_VALUE_TYPE frac_part;
1861 real_floor (&whole_part, mode, &exp);
1862 real_arithmetic (&frac_part, MINUS_EXPR, &exp, &whole_part);
1865 REAL_VALUE_TYPE ceil_whole = dconst0;
1866 REAL_VALUE_TYPE ceil_fract = dconst0;
1868 if (neg_exp)
1870 real_ceil (&ceil_whole, mode, &exp);
1871 real_arithmetic (&ceil_fract, MINUS_EXPR, &ceil_whole, &exp);
1874 if (!representable_as_half_series_p (frac_part, max_depth, &synth_info))
1875 return NULL_TREE;
1877 /* Check whether it's more profitable to not use 1.0 / ... */
1878 if (neg_exp)
1880 struct pow_synth_sqrt_info alt_synth_info;
1881 alt_synth_info.factors = XALLOCAVEC (bool, max_depth + 1);
1882 alt_synth_info.deepest = 0;
1883 alt_synth_info.num_mults = 0;
1885 if (representable_as_half_series_p (ceil_fract, max_depth,
1886 &alt_synth_info)
1887 && alt_synth_info.deepest <= synth_info.deepest
1888 && alt_synth_info.num_mults < synth_info.num_mults)
1890 whole_part = ceil_whole;
1891 frac_part = ceil_fract;
1892 synth_info.deepest = alt_synth_info.deepest;
1893 synth_info.num_mults = alt_synth_info.num_mults;
1894 memcpy (synth_info.factors, alt_synth_info.factors,
1895 (max_depth + 1) * sizeof (bool));
1896 one_over = false;
1900 HOST_WIDE_INT n = real_to_integer (&whole_part);
1901 REAL_VALUE_TYPE cint;
1902 real_from_integer (&cint, VOIDmode, n, SIGNED);
1904 if (!real_identical (&whole_part, &cint))
1905 return NULL_TREE;
1907 if (powi_cost (n) + synth_info.num_mults > POWI_MAX_MULTS)
1908 return NULL_TREE;
1910 memset (cache, 0, (max_depth + 1) * sizeof (tree));
1912 tree integer_res = n == 0 ? build_real (type, dconst1) : arg0;
1914 /* Calculate the integer part of the exponent. */
1915 if (n > 1)
1917 integer_res = gimple_expand_builtin_powi (gsi, loc, arg0, n);
1918 if (!integer_res)
1919 return NULL_TREE;
1922 if (dump_file)
1924 char string[64];
1926 real_to_decimal (string, &exp_init, sizeof (string), 0, 1);
1927 fprintf (dump_file, "synthesizing pow (x, %s) as:\n", string);
1929 if (neg_exp)
1931 if (one_over)
1933 fprintf (dump_file, "1.0 / (");
1934 dump_integer_part (dump_file, "x", n);
1935 if (n > 0)
1936 fprintf (dump_file, " * ");
1937 dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1938 fprintf (dump_file, ")");
1940 else
1942 dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1943 fprintf (dump_file, " / (");
1944 dump_integer_part (dump_file, "x", n);
1945 fprintf (dump_file, ")");
1948 else
1950 dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1951 if (n > 0)
1952 fprintf (dump_file, " * ");
1953 dump_integer_part (dump_file, "x", n);
1956 fprintf (dump_file, "\ndeepest sqrt chain: %d\n", synth_info.deepest);
1960 tree fract_res = NULL_TREE;
1961 cache[0] = arg0;
1963 /* Calculate the fractional part of the exponent. */
1964 for (unsigned i = 0; i < synth_info.deepest; i++)
1966 if (synth_info.factors[i])
1968 tree sqrt_chain = get_fn_chain (arg0, i + 1, gsi, sqrtfn, loc, cache);
1970 if (!fract_res)
1971 fract_res = sqrt_chain;
1973 else
1974 fract_res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1975 fract_res, sqrt_chain);
1979 tree res = NULL_TREE;
1981 if (neg_exp)
1983 if (one_over)
1985 if (n > 0)
1986 res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1987 fract_res, integer_res);
1988 else
1989 res = fract_res;
1991 res = build_and_insert_binop (gsi, loc, "powrootrecip", RDIV_EXPR,
1992 build_real (type, dconst1), res);
1994 else
1996 res = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR,
1997 fract_res, integer_res);
2000 else
2001 res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
2002 fract_res, integer_res);
2003 return res;
2006 /* ARG0 and ARG1 are the two arguments to a pow builtin call in GSI
2007 with location info LOC. If possible, create an equivalent and
2008 less expensive sequence of statements prior to GSI, and return an
2009 expession holding the result. */
2011 static tree
2012 gimple_expand_builtin_pow (gimple_stmt_iterator *gsi, location_t loc,
2013 tree arg0, tree arg1)
2015 REAL_VALUE_TYPE c, cint, dconst1_3, dconst1_4, dconst1_6;
2016 REAL_VALUE_TYPE c2, dconst3;
2017 HOST_WIDE_INT n;
2018 tree type, sqrtfn, cbrtfn, sqrt_arg0, result, cbrt_x, powi_cbrt_x;
2019 machine_mode mode;
2020 bool speed_p = optimize_bb_for_speed_p (gsi_bb (*gsi));
2021 bool hw_sqrt_exists, c_is_int, c2_is_int;
2023 dconst1_4 = dconst1;
2024 SET_REAL_EXP (&dconst1_4, REAL_EXP (&dconst1_4) - 2);
2026 /* If the exponent isn't a constant, there's nothing of interest
2027 to be done. */
2028 if (TREE_CODE (arg1) != REAL_CST)
2029 return NULL_TREE;
2031 /* Don't perform the operation if flag_signaling_nans is on
2032 and the operand is a signaling NaN. */
2033 if (HONOR_SNANS (TYPE_MODE (TREE_TYPE (arg1)))
2034 && ((TREE_CODE (arg0) == REAL_CST
2035 && REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (arg0)))
2036 || REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (arg1))))
2037 return NULL_TREE;
2039 /* If the exponent is equivalent to an integer, expand to an optimal
2040 multiplication sequence when profitable. */
2041 c = TREE_REAL_CST (arg1);
2042 n = real_to_integer (&c);
2043 real_from_integer (&cint, VOIDmode, n, SIGNED);
2044 c_is_int = real_identical (&c, &cint);
2046 if (c_is_int
2047 && ((n >= -1 && n <= 2)
2048 || (flag_unsafe_math_optimizations
2049 && speed_p
2050 && powi_cost (n) <= POWI_MAX_MULTS)))
2051 return gimple_expand_builtin_powi (gsi, loc, arg0, n);
2053 /* Attempt various optimizations using sqrt and cbrt. */
2054 type = TREE_TYPE (arg0);
2055 mode = TYPE_MODE (type);
2056 sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
2058 /* Optimize pow(x,0.5) = sqrt(x). This replacement is always safe
2059 unless signed zeros must be maintained. pow(-0,0.5) = +0, while
2060 sqrt(-0) = -0. */
2061 if (sqrtfn
2062 && real_equal (&c, &dconsthalf)
2063 && !HONOR_SIGNED_ZEROS (mode))
2064 return build_and_insert_call (gsi, loc, sqrtfn, arg0);
2066 hw_sqrt_exists = optab_handler (sqrt_optab, mode) != CODE_FOR_nothing;
2068 /* Optimize pow(x,1./3.) = cbrt(x). This requires unsafe math
2069 optimizations since 1./3. is not exactly representable. If x
2070 is negative and finite, the correct value of pow(x,1./3.) is
2071 a NaN with the "invalid" exception raised, because the value
2072 of 1./3. actually has an even denominator. The correct value
2073 of cbrt(x) is a negative real value. */
2074 cbrtfn = mathfn_built_in (type, BUILT_IN_CBRT);
2075 dconst1_3 = real_value_truncate (mode, dconst_third ());
2077 if (flag_unsafe_math_optimizations
2078 && cbrtfn
2079 && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
2080 && real_equal (&c, &dconst1_3))
2081 return build_and_insert_call (gsi, loc, cbrtfn, arg0);
2083 /* Optimize pow(x,1./6.) = cbrt(sqrt(x)). Don't do this optimization
2084 if we don't have a hardware sqrt insn. */
2085 dconst1_6 = dconst1_3;
2086 SET_REAL_EXP (&dconst1_6, REAL_EXP (&dconst1_6) - 1);
2088 if (flag_unsafe_math_optimizations
2089 && sqrtfn
2090 && cbrtfn
2091 && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
2092 && speed_p
2093 && hw_sqrt_exists
2094 && real_equal (&c, &dconst1_6))
2096 /* sqrt(x) */
2097 sqrt_arg0 = build_and_insert_call (gsi, loc, sqrtfn, arg0);
2099 /* cbrt(sqrt(x)) */
2100 return build_and_insert_call (gsi, loc, cbrtfn, sqrt_arg0);
2104 /* Attempt to expand the POW as a product of square root chains.
2105 Expand the 0.25 case even when otpimising for size. */
2106 if (flag_unsafe_math_optimizations
2107 && sqrtfn
2108 && hw_sqrt_exists
2109 && (speed_p || real_equal (&c, &dconst1_4))
2110 && !HONOR_SIGNED_ZEROS (mode))
2112 unsigned int max_depth = speed_p
2113 ? param_max_pow_sqrt_depth
2114 : 2;
2116 tree expand_with_sqrts
2117 = expand_pow_as_sqrts (gsi, loc, arg0, arg1, max_depth);
2119 if (expand_with_sqrts)
2120 return expand_with_sqrts;
2123 real_arithmetic (&c2, MULT_EXPR, &c, &dconst2);
2124 n = real_to_integer (&c2);
2125 real_from_integer (&cint, VOIDmode, n, SIGNED);
2126 c2_is_int = real_identical (&c2, &cint);
2128 /* Optimize pow(x,c), where 3c = n for some nonzero integer n, into
2130 powi(x, n/3) * powi(cbrt(x), n%3), n > 0;
2131 1.0 / (powi(x, abs(n)/3) * powi(cbrt(x), abs(n)%3)), n < 0.
2133 Do not calculate the first factor when n/3 = 0. As cbrt(x) is
2134 different from pow(x, 1./3.) due to rounding and behavior with
2135 negative x, we need to constrain this transformation to unsafe
2136 math and positive x or finite math. */
2137 real_from_integer (&dconst3, VOIDmode, 3, SIGNED);
2138 real_arithmetic (&c2, MULT_EXPR, &c, &dconst3);
2139 real_round (&c2, mode, &c2);
2140 n = real_to_integer (&c2);
2141 real_from_integer (&cint, VOIDmode, n, SIGNED);
2142 real_arithmetic (&c2, RDIV_EXPR, &cint, &dconst3);
2143 real_convert (&c2, mode, &c2);
2145 if (flag_unsafe_math_optimizations
2146 && cbrtfn
2147 && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
2148 && real_identical (&c2, &c)
2149 && !c2_is_int
2150 && optimize_function_for_speed_p (cfun)
2151 && powi_cost (n / 3) <= POWI_MAX_MULTS)
2153 tree powi_x_ndiv3 = NULL_TREE;
2155 /* Attempt to fold powi(arg0, abs(n/3)) into multiplies. If not
2156 possible or profitable, give up. Skip the degenerate case when
2157 abs(n) < 3, where the result is always 1. */
2158 if (absu_hwi (n) >= 3)
2160 powi_x_ndiv3 = gimple_expand_builtin_powi (gsi, loc, arg0,
2161 abs_hwi (n / 3));
2162 if (!powi_x_ndiv3)
2163 return NULL_TREE;
2166 /* Calculate powi(cbrt(x), n%3). Don't use gimple_expand_builtin_powi
2167 as that creates an unnecessary variable. Instead, just produce
2168 either cbrt(x) or cbrt(x) * cbrt(x). */
2169 cbrt_x = build_and_insert_call (gsi, loc, cbrtfn, arg0);
2171 if (absu_hwi (n) % 3 == 1)
2172 powi_cbrt_x = cbrt_x;
2173 else
2174 powi_cbrt_x = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
2175 cbrt_x, cbrt_x);
2177 /* Multiply the two subexpressions, unless powi(x,abs(n)/3) = 1. */
2178 if (absu_hwi (n) < 3)
2179 result = powi_cbrt_x;
2180 else
2181 result = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
2182 powi_x_ndiv3, powi_cbrt_x);
2184 /* If n is negative, reciprocate the result. */
2185 if (n < 0)
2186 result = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR,
2187 build_real (type, dconst1), result);
2189 return result;
2192 /* No optimizations succeeded. */
2193 return NULL_TREE;
2196 /* ARG is the argument to a cabs builtin call in GSI with location info
2197 LOC. Create a sequence of statements prior to GSI that calculates
2198 sqrt(R*R + I*I), where R and I are the real and imaginary components
2199 of ARG, respectively. Return an expression holding the result. */
2201 static tree
2202 gimple_expand_builtin_cabs (gimple_stmt_iterator *gsi, location_t loc, tree arg)
2204 tree real_part, imag_part, addend1, addend2, sum, result;
2205 tree type = TREE_TYPE (TREE_TYPE (arg));
2206 tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
2207 machine_mode mode = TYPE_MODE (type);
2209 if (!flag_unsafe_math_optimizations
2210 || !optimize_bb_for_speed_p (gimple_bb (gsi_stmt (*gsi)))
2211 || !sqrtfn
2212 || optab_handler (sqrt_optab, mode) == CODE_FOR_nothing)
2213 return NULL_TREE;
2215 real_part = build_and_insert_ref (gsi, loc, type, "cabs",
2216 REALPART_EXPR, arg);
2217 addend1 = build_and_insert_binop (gsi, loc, "cabs", MULT_EXPR,
2218 real_part, real_part);
2219 imag_part = build_and_insert_ref (gsi, loc, type, "cabs",
2220 IMAGPART_EXPR, arg);
2221 addend2 = build_and_insert_binop (gsi, loc, "cabs", MULT_EXPR,
2222 imag_part, imag_part);
2223 sum = build_and_insert_binop (gsi, loc, "cabs", PLUS_EXPR, addend1, addend2);
2224 result = build_and_insert_call (gsi, loc, sqrtfn, sum);
2226 return result;
2229 /* Go through all calls to sin, cos and cexpi and call execute_cse_sincos_1
2230 on the SSA_NAME argument of each of them. */
2232 namespace {
2234 const pass_data pass_data_cse_sincos =
2236 GIMPLE_PASS, /* type */
2237 "sincos", /* name */
2238 OPTGROUP_NONE, /* optinfo_flags */
2239 TV_TREE_SINCOS, /* tv_id */
2240 PROP_ssa, /* properties_required */
2241 0, /* properties_provided */
2242 0, /* properties_destroyed */
2243 0, /* todo_flags_start */
2244 TODO_update_ssa, /* todo_flags_finish */
2247 class pass_cse_sincos : public gimple_opt_pass
2249 public:
2250 pass_cse_sincos (gcc::context *ctxt)
2251 : gimple_opt_pass (pass_data_cse_sincos, ctxt)
2254 /* opt_pass methods: */
2255 bool gate (function *) final override
2257 return optimize;
2260 unsigned int execute (function *) final override;
2262 }; // class pass_cse_sincos
2264 unsigned int
2265 pass_cse_sincos::execute (function *fun)
2267 basic_block bb;
2268 bool cfg_changed = false;
2270 calculate_dominance_info (CDI_DOMINATORS);
2271 memset (&sincos_stats, 0, sizeof (sincos_stats));
2273 FOR_EACH_BB_FN (bb, fun)
2275 gimple_stmt_iterator gsi;
2277 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2279 gimple *stmt = gsi_stmt (gsi);
2281 if (is_gimple_call (stmt)
2282 && gimple_call_lhs (stmt))
2284 tree arg;
2285 switch (gimple_call_combined_fn (stmt))
2287 CASE_CFN_COS:
2288 CASE_CFN_SIN:
2289 CASE_CFN_CEXPI:
2290 arg = gimple_call_arg (stmt, 0);
2291 /* Make sure we have either sincos or cexp. */
2292 if (!targetm.libc_has_function (function_c99_math_complex,
2293 TREE_TYPE (arg))
2294 && !targetm.libc_has_function (function_sincos,
2295 TREE_TYPE (arg)))
2296 break;
2298 if (TREE_CODE (arg) == SSA_NAME)
2299 cfg_changed |= execute_cse_sincos_1 (arg);
2300 break;
2301 default:
2302 break;
2308 statistics_counter_event (fun, "sincos statements inserted",
2309 sincos_stats.inserted);
2310 statistics_counter_event (fun, "conv statements removed",
2311 sincos_stats.conv_removed);
2313 return cfg_changed ? TODO_cleanup_cfg : 0;
2316 } // anon namespace
2318 gimple_opt_pass *
2319 make_pass_cse_sincos (gcc::context *ctxt)
2321 return new pass_cse_sincos (ctxt);
2324 /* Expand powi(x,n) into an optimal number of multiplies, when n is a constant.
2325 Also expand CABS. */
2326 namespace {
2328 const pass_data pass_data_expand_powcabs =
2330 GIMPLE_PASS, /* type */
2331 "powcabs", /* name */
2332 OPTGROUP_NONE, /* optinfo_flags */
2333 TV_TREE_POWCABS, /* tv_id */
2334 PROP_ssa, /* properties_required */
2335 PROP_gimple_opt_math, /* properties_provided */
2336 0, /* properties_destroyed */
2337 0, /* todo_flags_start */
2338 TODO_update_ssa, /* todo_flags_finish */
2341 class pass_expand_powcabs : public gimple_opt_pass
2343 public:
2344 pass_expand_powcabs (gcc::context *ctxt)
2345 : gimple_opt_pass (pass_data_expand_powcabs, ctxt)
2348 /* opt_pass methods: */
2349 bool gate (function *) final override
2351 return optimize;
2354 unsigned int execute (function *) final override;
2356 }; // class pass_expand_powcabs
2358 unsigned int
2359 pass_expand_powcabs::execute (function *fun)
2361 basic_block bb;
2362 bool cfg_changed = false;
2364 calculate_dominance_info (CDI_DOMINATORS);
2366 FOR_EACH_BB_FN (bb, fun)
2368 gimple_stmt_iterator gsi;
2369 bool cleanup_eh = false;
2371 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2373 gimple *stmt = gsi_stmt (gsi);
2375 /* Only the last stmt in a bb could throw, no need to call
2376 gimple_purge_dead_eh_edges if we change something in the middle
2377 of a basic block. */
2378 cleanup_eh = false;
2380 if (is_gimple_call (stmt)
2381 && gimple_call_lhs (stmt))
2383 tree arg0, arg1, result;
2384 HOST_WIDE_INT n;
2385 location_t loc;
2387 switch (gimple_call_combined_fn (stmt))
2389 CASE_CFN_POW:
2390 arg0 = gimple_call_arg (stmt, 0);
2391 arg1 = gimple_call_arg (stmt, 1);
2393 loc = gimple_location (stmt);
2394 result = gimple_expand_builtin_pow (&gsi, loc, arg0, arg1);
2396 if (result)
2398 tree lhs = gimple_get_lhs (stmt);
2399 gassign *new_stmt = gimple_build_assign (lhs, result);
2400 gimple_set_location (new_stmt, loc);
2401 unlink_stmt_vdef (stmt);
2402 gsi_replace (&gsi, new_stmt, true);
2403 cleanup_eh = true;
2404 if (gimple_vdef (stmt))
2405 release_ssa_name (gimple_vdef (stmt));
2407 break;
2409 CASE_CFN_POWI:
2410 arg0 = gimple_call_arg (stmt, 0);
2411 arg1 = gimple_call_arg (stmt, 1);
2412 loc = gimple_location (stmt);
2414 if (real_minus_onep (arg0))
2416 tree t0, t1, cond, one, minus_one;
2417 gassign *stmt;
2419 t0 = TREE_TYPE (arg0);
2420 t1 = TREE_TYPE (arg1);
2421 one = build_real (t0, dconst1);
2422 minus_one = build_real (t0, dconstm1);
2424 cond = make_temp_ssa_name (t1, NULL, "powi_cond");
2425 stmt = gimple_build_assign (cond, BIT_AND_EXPR,
2426 arg1, build_int_cst (t1, 1));
2427 gimple_set_location (stmt, loc);
2428 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
2430 result = make_temp_ssa_name (t0, NULL, "powi");
2431 stmt = gimple_build_assign (result, COND_EXPR, cond,
2432 minus_one, one);
2433 gimple_set_location (stmt, loc);
2434 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
2436 else
2438 if (!tree_fits_shwi_p (arg1))
2439 break;
2441 n = tree_to_shwi (arg1);
2442 result = gimple_expand_builtin_powi (&gsi, loc, arg0, n);
2445 if (result)
2447 tree lhs = gimple_get_lhs (stmt);
2448 gassign *new_stmt = gimple_build_assign (lhs, result);
2449 gimple_set_location (new_stmt, loc);
2450 unlink_stmt_vdef (stmt);
2451 gsi_replace (&gsi, new_stmt, true);
2452 cleanup_eh = true;
2453 if (gimple_vdef (stmt))
2454 release_ssa_name (gimple_vdef (stmt));
2456 break;
2458 CASE_CFN_CABS:
2459 arg0 = gimple_call_arg (stmt, 0);
2460 loc = gimple_location (stmt);
2461 result = gimple_expand_builtin_cabs (&gsi, loc, arg0);
2463 if (result)
2465 tree lhs = gimple_get_lhs (stmt);
2466 gassign *new_stmt = gimple_build_assign (lhs, result);
2467 gimple_set_location (new_stmt, loc);
2468 unlink_stmt_vdef (stmt);
2469 gsi_replace (&gsi, new_stmt, true);
2470 cleanup_eh = true;
2471 if (gimple_vdef (stmt))
2472 release_ssa_name (gimple_vdef (stmt));
2474 break;
2476 default:;
2480 if (cleanup_eh)
2481 cfg_changed |= gimple_purge_dead_eh_edges (bb);
2484 return cfg_changed ? TODO_cleanup_cfg : 0;
2487 } // anon namespace
2489 gimple_opt_pass *
2490 make_pass_expand_powcabs (gcc::context *ctxt)
2492 return new pass_expand_powcabs (ctxt);
2495 /* Return true if stmt is a type conversion operation that can be stripped
2496 when used in a widening multiply operation. */
2497 static bool
2498 widening_mult_conversion_strippable_p (tree result_type, gimple *stmt)
2500 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
2502 if (TREE_CODE (result_type) == INTEGER_TYPE)
2504 tree op_type;
2505 tree inner_op_type;
2507 if (!CONVERT_EXPR_CODE_P (rhs_code))
2508 return false;
2510 op_type = TREE_TYPE (gimple_assign_lhs (stmt));
2512 /* If the type of OP has the same precision as the result, then
2513 we can strip this conversion. The multiply operation will be
2514 selected to create the correct extension as a by-product. */
2515 if (TYPE_PRECISION (result_type) == TYPE_PRECISION (op_type))
2516 return true;
2518 /* We can also strip a conversion if it preserves the signed-ness of
2519 the operation and doesn't narrow the range. */
2520 inner_op_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
2522 /* If the inner-most type is unsigned, then we can strip any
2523 intermediate widening operation. If it's signed, then the
2524 intermediate widening operation must also be signed. */
2525 if ((TYPE_UNSIGNED (inner_op_type)
2526 || TYPE_UNSIGNED (op_type) == TYPE_UNSIGNED (inner_op_type))
2527 && TYPE_PRECISION (op_type) > TYPE_PRECISION (inner_op_type))
2528 return true;
2530 return false;
2533 return rhs_code == FIXED_CONVERT_EXPR;
2536 /* Return true if RHS is a suitable operand for a widening multiplication,
2537 assuming a target type of TYPE.
2538 There are two cases:
2540 - RHS makes some value at least twice as wide. Store that value
2541 in *NEW_RHS_OUT if so, and store its type in *TYPE_OUT.
2543 - RHS is an integer constant. Store that value in *NEW_RHS_OUT if so,
2544 but leave *TYPE_OUT untouched. */
2546 static bool
2547 is_widening_mult_rhs_p (tree type, tree rhs, tree *type_out,
2548 tree *new_rhs_out)
2550 gimple *stmt;
2551 tree type1, rhs1;
2553 if (TREE_CODE (rhs) == SSA_NAME)
2555 stmt = SSA_NAME_DEF_STMT (rhs);
2556 if (is_gimple_assign (stmt))
2558 if (! widening_mult_conversion_strippable_p (type, stmt))
2559 rhs1 = rhs;
2560 else
2562 rhs1 = gimple_assign_rhs1 (stmt);
2564 if (TREE_CODE (rhs1) == INTEGER_CST)
2566 *new_rhs_out = rhs1;
2567 *type_out = NULL;
2568 return true;
2572 else
2573 rhs1 = rhs;
2575 type1 = TREE_TYPE (rhs1);
2577 if (TREE_CODE (type1) != TREE_CODE (type)
2578 || TYPE_PRECISION (type1) * 2 > TYPE_PRECISION (type))
2579 return false;
2581 *new_rhs_out = rhs1;
2582 *type_out = type1;
2583 return true;
2586 if (TREE_CODE (rhs) == INTEGER_CST)
2588 *new_rhs_out = rhs;
2589 *type_out = NULL;
2590 return true;
2593 return false;
2596 /* Return true if STMT performs a widening multiplication, assuming the
2597 output type is TYPE. If so, store the unwidened types of the operands
2598 in *TYPE1_OUT and *TYPE2_OUT respectively. Also fill *RHS1_OUT and
2599 *RHS2_OUT such that converting those operands to types *TYPE1_OUT
2600 and *TYPE2_OUT would give the operands of the multiplication. */
2602 static bool
2603 is_widening_mult_p (gimple *stmt,
2604 tree *type1_out, tree *rhs1_out,
2605 tree *type2_out, tree *rhs2_out)
2607 tree type = TREE_TYPE (gimple_assign_lhs (stmt));
2609 if (TREE_CODE (type) == INTEGER_TYPE)
2611 if (TYPE_OVERFLOW_TRAPS (type))
2612 return false;
2614 else if (TREE_CODE (type) != FIXED_POINT_TYPE)
2615 return false;
2617 if (!is_widening_mult_rhs_p (type, gimple_assign_rhs1 (stmt), type1_out,
2618 rhs1_out))
2619 return false;
2621 if (!is_widening_mult_rhs_p (type, gimple_assign_rhs2 (stmt), type2_out,
2622 rhs2_out))
2623 return false;
2625 if (*type1_out == NULL)
2627 if (*type2_out == NULL || !int_fits_type_p (*rhs1_out, *type2_out))
2628 return false;
2629 *type1_out = *type2_out;
2632 if (*type2_out == NULL)
2634 if (!int_fits_type_p (*rhs2_out, *type1_out))
2635 return false;
2636 *type2_out = *type1_out;
2639 /* Ensure that the larger of the two operands comes first. */
2640 if (TYPE_PRECISION (*type1_out) < TYPE_PRECISION (*type2_out))
2642 std::swap (*type1_out, *type2_out);
2643 std::swap (*rhs1_out, *rhs2_out);
2646 return true;
2649 /* Check to see if the CALL statement is an invocation of copysign
2650 with 1. being the first argument. */
2651 static bool
2652 is_copysign_call_with_1 (gimple *call)
2654 gcall *c = dyn_cast <gcall *> (call);
2655 if (! c)
2656 return false;
2658 enum combined_fn code = gimple_call_combined_fn (c);
2660 if (code == CFN_LAST)
2661 return false;
2663 if (builtin_fn_p (code))
2665 switch (as_builtin_fn (code))
2667 CASE_FLT_FN (BUILT_IN_COPYSIGN):
2668 CASE_FLT_FN_FLOATN_NX (BUILT_IN_COPYSIGN):
2669 return real_onep (gimple_call_arg (c, 0));
2670 default:
2671 return false;
2675 if (internal_fn_p (code))
2677 switch (as_internal_fn (code))
2679 case IFN_COPYSIGN:
2680 return real_onep (gimple_call_arg (c, 0));
2681 default:
2682 return false;
2686 return false;
2689 /* Try to expand the pattern x * copysign (1, y) into xorsign (x, y).
2690 This only happens when the xorsign optab is defined, if the
2691 pattern is not a xorsign pattern or if expansion fails FALSE is
2692 returned, otherwise TRUE is returned. */
2693 static bool
2694 convert_expand_mult_copysign (gimple *stmt, gimple_stmt_iterator *gsi)
2696 tree treeop0, treeop1, lhs, type;
2697 location_t loc = gimple_location (stmt);
2698 lhs = gimple_assign_lhs (stmt);
2699 treeop0 = gimple_assign_rhs1 (stmt);
2700 treeop1 = gimple_assign_rhs2 (stmt);
2701 type = TREE_TYPE (lhs);
2702 machine_mode mode = TYPE_MODE (type);
2704 if (HONOR_SNANS (type))
2705 return false;
2707 if (TREE_CODE (treeop0) == SSA_NAME && TREE_CODE (treeop1) == SSA_NAME)
2709 gimple *call0 = SSA_NAME_DEF_STMT (treeop0);
2710 if (!has_single_use (treeop0) || !is_copysign_call_with_1 (call0))
2712 call0 = SSA_NAME_DEF_STMT (treeop1);
2713 if (!has_single_use (treeop1) || !is_copysign_call_with_1 (call0))
2714 return false;
2716 treeop1 = treeop0;
2718 if (optab_handler (xorsign_optab, mode) == CODE_FOR_nothing)
2719 return false;
2721 gcall *c = as_a<gcall*> (call0);
2722 treeop0 = gimple_call_arg (c, 1);
2724 gcall *call_stmt
2725 = gimple_build_call_internal (IFN_XORSIGN, 2, treeop1, treeop0);
2726 gimple_set_lhs (call_stmt, lhs);
2727 gimple_set_location (call_stmt, loc);
2728 gsi_replace (gsi, call_stmt, true);
2729 return true;
2732 return false;
2735 /* Process a single gimple statement STMT, which has a MULT_EXPR as
2736 its rhs, and try to convert it into a WIDEN_MULT_EXPR. The return
2737 value is true iff we converted the statement. */
2739 static bool
2740 convert_mult_to_widen (gimple *stmt, gimple_stmt_iterator *gsi)
2742 tree lhs, rhs1, rhs2, type, type1, type2;
2743 enum insn_code handler;
2744 scalar_int_mode to_mode, from_mode, actual_mode;
2745 optab op;
2746 int actual_precision;
2747 location_t loc = gimple_location (stmt);
2748 bool from_unsigned1, from_unsigned2;
2750 lhs = gimple_assign_lhs (stmt);
2751 type = TREE_TYPE (lhs);
2752 if (TREE_CODE (type) != INTEGER_TYPE)
2753 return false;
2755 if (!is_widening_mult_p (stmt, &type1, &rhs1, &type2, &rhs2))
2756 return false;
2758 to_mode = SCALAR_INT_TYPE_MODE (type);
2759 from_mode = SCALAR_INT_TYPE_MODE (type1);
2760 if (to_mode == from_mode)
2761 return false;
2763 from_unsigned1 = TYPE_UNSIGNED (type1);
2764 from_unsigned2 = TYPE_UNSIGNED (type2);
2766 if (from_unsigned1 && from_unsigned2)
2767 op = umul_widen_optab;
2768 else if (!from_unsigned1 && !from_unsigned2)
2769 op = smul_widen_optab;
2770 else
2771 op = usmul_widen_optab;
2773 handler = find_widening_optab_handler_and_mode (op, to_mode, from_mode,
2774 &actual_mode);
2776 if (handler == CODE_FOR_nothing)
2778 if (op != smul_widen_optab)
2780 /* We can use a signed multiply with unsigned types as long as
2781 there is a wider mode to use, or it is the smaller of the two
2782 types that is unsigned. Note that type1 >= type2, always. */
2783 if ((TYPE_UNSIGNED (type1)
2784 && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode))
2785 || (TYPE_UNSIGNED (type2)
2786 && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode)))
2788 if (!GET_MODE_WIDER_MODE (from_mode).exists (&from_mode)
2789 || GET_MODE_SIZE (to_mode) <= GET_MODE_SIZE (from_mode))
2790 return false;
2793 op = smul_widen_optab;
2794 handler = find_widening_optab_handler_and_mode (op, to_mode,
2795 from_mode,
2796 &actual_mode);
2798 if (handler == CODE_FOR_nothing)
2799 return false;
2801 from_unsigned1 = from_unsigned2 = false;
2803 else
2805 /* Expand can synthesize smul_widen_optab if the target
2806 supports umul_widen_optab. */
2807 op = umul_widen_optab;
2808 handler = find_widening_optab_handler_and_mode (op, to_mode,
2809 from_mode,
2810 &actual_mode);
2811 if (handler == CODE_FOR_nothing)
2812 return false;
2816 /* Ensure that the inputs to the handler are in the correct precison
2817 for the opcode. This will be the full mode size. */
2818 actual_precision = GET_MODE_PRECISION (actual_mode);
2819 if (2 * actual_precision > TYPE_PRECISION (type))
2820 return false;
2821 if (actual_precision != TYPE_PRECISION (type1)
2822 || from_unsigned1 != TYPE_UNSIGNED (type1))
2823 rhs1 = build_and_insert_cast (gsi, loc,
2824 build_nonstandard_integer_type
2825 (actual_precision, from_unsigned1), rhs1);
2826 if (actual_precision != TYPE_PRECISION (type2)
2827 || from_unsigned2 != TYPE_UNSIGNED (type2))
2828 rhs2 = build_and_insert_cast (gsi, loc,
2829 build_nonstandard_integer_type
2830 (actual_precision, from_unsigned2), rhs2);
2832 /* Handle constants. */
2833 if (TREE_CODE (rhs1) == INTEGER_CST)
2834 rhs1 = fold_convert (type1, rhs1);
2835 if (TREE_CODE (rhs2) == INTEGER_CST)
2836 rhs2 = fold_convert (type2, rhs2);
2838 gimple_assign_set_rhs1 (stmt, rhs1);
2839 gimple_assign_set_rhs2 (stmt, rhs2);
2840 gimple_assign_set_rhs_code (stmt, WIDEN_MULT_EXPR);
2841 update_stmt (stmt);
2842 widen_mul_stats.widen_mults_inserted++;
2843 return true;
2846 /* Process a single gimple statement STMT, which is found at the
2847 iterator GSI and has a either a PLUS_EXPR or a MINUS_EXPR as its
2848 rhs (given by CODE), and try to convert it into a
2849 WIDEN_MULT_PLUS_EXPR or a WIDEN_MULT_MINUS_EXPR. The return value
2850 is true iff we converted the statement. */
2852 static bool
2853 convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple *stmt,
2854 enum tree_code code)
2856 gimple *rhs1_stmt = NULL, *rhs2_stmt = NULL;
2857 gimple *conv1_stmt = NULL, *conv2_stmt = NULL, *conv_stmt;
2858 tree type, type1, type2, optype;
2859 tree lhs, rhs1, rhs2, mult_rhs1, mult_rhs2, add_rhs;
2860 enum tree_code rhs1_code = ERROR_MARK, rhs2_code = ERROR_MARK;
2861 optab this_optab;
2862 enum tree_code wmult_code;
2863 enum insn_code handler;
2864 scalar_mode to_mode, from_mode, actual_mode;
2865 location_t loc = gimple_location (stmt);
2866 int actual_precision;
2867 bool from_unsigned1, from_unsigned2;
2869 lhs = gimple_assign_lhs (stmt);
2870 type = TREE_TYPE (lhs);
2871 if (TREE_CODE (type) != INTEGER_TYPE
2872 && TREE_CODE (type) != FIXED_POINT_TYPE)
2873 return false;
2875 if (code == MINUS_EXPR)
2876 wmult_code = WIDEN_MULT_MINUS_EXPR;
2877 else
2878 wmult_code = WIDEN_MULT_PLUS_EXPR;
2880 rhs1 = gimple_assign_rhs1 (stmt);
2881 rhs2 = gimple_assign_rhs2 (stmt);
2883 if (TREE_CODE (rhs1) == SSA_NAME)
2885 rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
2886 if (is_gimple_assign (rhs1_stmt))
2887 rhs1_code = gimple_assign_rhs_code (rhs1_stmt);
2890 if (TREE_CODE (rhs2) == SSA_NAME)
2892 rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
2893 if (is_gimple_assign (rhs2_stmt))
2894 rhs2_code = gimple_assign_rhs_code (rhs2_stmt);
2897 /* Allow for one conversion statement between the multiply
2898 and addition/subtraction statement. If there are more than
2899 one conversions then we assume they would invalidate this
2900 transformation. If that's not the case then they should have
2901 been folded before now. */
2902 if (CONVERT_EXPR_CODE_P (rhs1_code))
2904 conv1_stmt = rhs1_stmt;
2905 rhs1 = gimple_assign_rhs1 (rhs1_stmt);
2906 if (TREE_CODE (rhs1) == SSA_NAME)
2908 rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
2909 if (is_gimple_assign (rhs1_stmt))
2910 rhs1_code = gimple_assign_rhs_code (rhs1_stmt);
2912 else
2913 return false;
2915 if (CONVERT_EXPR_CODE_P (rhs2_code))
2917 conv2_stmt = rhs2_stmt;
2918 rhs2 = gimple_assign_rhs1 (rhs2_stmt);
2919 if (TREE_CODE (rhs2) == SSA_NAME)
2921 rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
2922 if (is_gimple_assign (rhs2_stmt))
2923 rhs2_code = gimple_assign_rhs_code (rhs2_stmt);
2925 else
2926 return false;
2929 /* If code is WIDEN_MULT_EXPR then it would seem unnecessary to call
2930 is_widening_mult_p, but we still need the rhs returns.
2932 It might also appear that it would be sufficient to use the existing
2933 operands of the widening multiply, but that would limit the choice of
2934 multiply-and-accumulate instructions.
2936 If the widened-multiplication result has more than one uses, it is
2937 probably wiser not to do the conversion. Also restrict this operation
2938 to single basic block to avoid moving the multiply to a different block
2939 with a higher execution frequency. */
2940 if (code == PLUS_EXPR
2941 && (rhs1_code == MULT_EXPR || rhs1_code == WIDEN_MULT_EXPR))
2943 if (!has_single_use (rhs1)
2944 || gimple_bb (rhs1_stmt) != gimple_bb (stmt)
2945 || !is_widening_mult_p (rhs1_stmt, &type1, &mult_rhs1,
2946 &type2, &mult_rhs2))
2947 return false;
2948 add_rhs = rhs2;
2949 conv_stmt = conv1_stmt;
2951 else if (rhs2_code == MULT_EXPR || rhs2_code == WIDEN_MULT_EXPR)
2953 if (!has_single_use (rhs2)
2954 || gimple_bb (rhs2_stmt) != gimple_bb (stmt)
2955 || !is_widening_mult_p (rhs2_stmt, &type1, &mult_rhs1,
2956 &type2, &mult_rhs2))
2957 return false;
2958 add_rhs = rhs1;
2959 conv_stmt = conv2_stmt;
2961 else
2962 return false;
2964 to_mode = SCALAR_TYPE_MODE (type);
2965 from_mode = SCALAR_TYPE_MODE (type1);
2966 if (to_mode == from_mode)
2967 return false;
2969 from_unsigned1 = TYPE_UNSIGNED (type1);
2970 from_unsigned2 = TYPE_UNSIGNED (type2);
2971 optype = type1;
2973 /* There's no such thing as a mixed sign madd yet, so use a wider mode. */
2974 if (from_unsigned1 != from_unsigned2)
2976 if (!INTEGRAL_TYPE_P (type))
2977 return false;
2978 /* We can use a signed multiply with unsigned types as long as
2979 there is a wider mode to use, or it is the smaller of the two
2980 types that is unsigned. Note that type1 >= type2, always. */
2981 if ((from_unsigned1
2982 && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode))
2983 || (from_unsigned2
2984 && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode)))
2986 if (!GET_MODE_WIDER_MODE (from_mode).exists (&from_mode)
2987 || GET_MODE_SIZE (from_mode) >= GET_MODE_SIZE (to_mode))
2988 return false;
2991 from_unsigned1 = from_unsigned2 = false;
2992 optype = build_nonstandard_integer_type (GET_MODE_PRECISION (from_mode),
2993 false);
2996 /* If there was a conversion between the multiply and addition
2997 then we need to make sure it fits a multiply-and-accumulate.
2998 The should be a single mode change which does not change the
2999 value. */
3000 if (conv_stmt)
3002 /* We use the original, unmodified data types for this. */
3003 tree from_type = TREE_TYPE (gimple_assign_rhs1 (conv_stmt));
3004 tree to_type = TREE_TYPE (gimple_assign_lhs (conv_stmt));
3005 int data_size = TYPE_PRECISION (type1) + TYPE_PRECISION (type2);
3006 bool is_unsigned = TYPE_UNSIGNED (type1) && TYPE_UNSIGNED (type2);
3008 if (TYPE_PRECISION (from_type) > TYPE_PRECISION (to_type))
3010 /* Conversion is a truncate. */
3011 if (TYPE_PRECISION (to_type) < data_size)
3012 return false;
3014 else if (TYPE_PRECISION (from_type) < TYPE_PRECISION (to_type))
3016 /* Conversion is an extend. Check it's the right sort. */
3017 if (TYPE_UNSIGNED (from_type) != is_unsigned
3018 && !(is_unsigned && TYPE_PRECISION (from_type) > data_size))
3019 return false;
3021 /* else convert is a no-op for our purposes. */
3024 /* Verify that the machine can perform a widening multiply
3025 accumulate in this mode/signedness combination, otherwise
3026 this transformation is likely to pessimize code. */
3027 this_optab = optab_for_tree_code (wmult_code, optype, optab_default);
3028 handler = find_widening_optab_handler_and_mode (this_optab, to_mode,
3029 from_mode, &actual_mode);
3031 if (handler == CODE_FOR_nothing)
3032 return false;
3034 /* Ensure that the inputs to the handler are in the correct precison
3035 for the opcode. This will be the full mode size. */
3036 actual_precision = GET_MODE_PRECISION (actual_mode);
3037 if (actual_precision != TYPE_PRECISION (type1)
3038 || from_unsigned1 != TYPE_UNSIGNED (type1))
3039 mult_rhs1 = build_and_insert_cast (gsi, loc,
3040 build_nonstandard_integer_type
3041 (actual_precision, from_unsigned1),
3042 mult_rhs1);
3043 if (actual_precision != TYPE_PRECISION (type2)
3044 || from_unsigned2 != TYPE_UNSIGNED (type2))
3045 mult_rhs2 = build_and_insert_cast (gsi, loc,
3046 build_nonstandard_integer_type
3047 (actual_precision, from_unsigned2),
3048 mult_rhs2);
3050 if (!useless_type_conversion_p (type, TREE_TYPE (add_rhs)))
3051 add_rhs = build_and_insert_cast (gsi, loc, type, add_rhs);
3053 /* Handle constants. */
3054 if (TREE_CODE (mult_rhs1) == INTEGER_CST)
3055 mult_rhs1 = fold_convert (type1, mult_rhs1);
3056 if (TREE_CODE (mult_rhs2) == INTEGER_CST)
3057 mult_rhs2 = fold_convert (type2, mult_rhs2);
3059 gimple_assign_set_rhs_with_ops (gsi, wmult_code, mult_rhs1, mult_rhs2,
3060 add_rhs);
3061 update_stmt (gsi_stmt (*gsi));
3062 widen_mul_stats.maccs_inserted++;
3063 return true;
3066 /* Given a result MUL_RESULT which is a result of a multiplication of OP1 and
3067 OP2 and which we know is used in statements that can be, together with the
3068 multiplication, converted to FMAs, perform the transformation. */
3070 static void
3071 convert_mult_to_fma_1 (tree mul_result, tree op1, tree op2)
3073 tree type = TREE_TYPE (mul_result);
3074 gimple *use_stmt;
3075 imm_use_iterator imm_iter;
3076 gcall *fma_stmt;
3078 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, mul_result)
3080 gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
3081 tree addop, mulop1 = op1, result = mul_result;
3082 bool negate_p = false;
3083 gimple_seq seq = NULL;
3085 if (is_gimple_debug (use_stmt))
3086 continue;
3088 if (is_gimple_assign (use_stmt)
3089 && gimple_assign_rhs_code (use_stmt) == NEGATE_EXPR)
3091 result = gimple_assign_lhs (use_stmt);
3092 use_operand_p use_p;
3093 gimple *neguse_stmt;
3094 single_imm_use (gimple_assign_lhs (use_stmt), &use_p, &neguse_stmt);
3095 gsi_remove (&gsi, true);
3096 release_defs (use_stmt);
3098 use_stmt = neguse_stmt;
3099 gsi = gsi_for_stmt (use_stmt);
3100 negate_p = true;
3103 tree cond, else_value, ops[3], len, bias;
3104 tree_code code;
3105 if (!can_interpret_as_conditional_op_p (use_stmt, &cond, &code,
3106 ops, &else_value,
3107 &len, &bias))
3108 gcc_unreachable ();
3109 addop = ops[0] == result ? ops[1] : ops[0];
3111 if (code == MINUS_EXPR)
3113 if (ops[0] == result)
3114 /* a * b - c -> a * b + (-c) */
3115 addop = gimple_build (&seq, NEGATE_EXPR, type, addop);
3116 else
3117 /* a - b * c -> (-b) * c + a */
3118 negate_p = !negate_p;
3121 if (negate_p)
3122 mulop1 = gimple_build (&seq, NEGATE_EXPR, type, mulop1);
3124 if (seq)
3125 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
3127 if (len)
3128 fma_stmt
3129 = gimple_build_call_internal (IFN_COND_LEN_FMA, 7, cond, mulop1, op2,
3130 addop, else_value, len, bias);
3131 else if (cond)
3132 fma_stmt = gimple_build_call_internal (IFN_COND_FMA, 5, cond, mulop1,
3133 op2, addop, else_value);
3134 else
3135 fma_stmt = gimple_build_call_internal (IFN_FMA, 3, mulop1, op2, addop);
3136 gimple_set_lhs (fma_stmt, gimple_get_lhs (use_stmt));
3137 gimple_call_set_nothrow (fma_stmt, !stmt_can_throw_internal (cfun,
3138 use_stmt));
3139 gsi_replace (&gsi, fma_stmt, true);
3140 /* Follow all SSA edges so that we generate FMS, FNMA and FNMS
3141 regardless of where the negation occurs. */
3142 gimple *orig_stmt = gsi_stmt (gsi);
3143 if (fold_stmt (&gsi, follow_all_ssa_edges))
3145 if (maybe_clean_or_replace_eh_stmt (orig_stmt, gsi_stmt (gsi)))
3146 gcc_unreachable ();
3147 update_stmt (gsi_stmt (gsi));
3150 if (dump_file && (dump_flags & TDF_DETAILS))
3152 fprintf (dump_file, "Generated FMA ");
3153 print_gimple_stmt (dump_file, gsi_stmt (gsi), 0, TDF_NONE);
3154 fprintf (dump_file, "\n");
3157 /* If the FMA result is negated in a single use, fold the negation
3158 too. */
3159 orig_stmt = gsi_stmt (gsi);
3160 use_operand_p use_p;
3161 gimple *neg_stmt;
3162 if (is_gimple_call (orig_stmt)
3163 && gimple_call_internal_p (orig_stmt)
3164 && gimple_call_lhs (orig_stmt)
3165 && TREE_CODE (gimple_call_lhs (orig_stmt)) == SSA_NAME
3166 && single_imm_use (gimple_call_lhs (orig_stmt), &use_p, &neg_stmt)
3167 && is_gimple_assign (neg_stmt)
3168 && gimple_assign_rhs_code (neg_stmt) == NEGATE_EXPR
3169 && !stmt_could_throw_p (cfun, neg_stmt))
3171 gsi = gsi_for_stmt (neg_stmt);
3172 if (fold_stmt (&gsi, follow_all_ssa_edges))
3174 if (maybe_clean_or_replace_eh_stmt (neg_stmt, gsi_stmt (gsi)))
3175 gcc_unreachable ();
3176 update_stmt (gsi_stmt (gsi));
3177 if (dump_file && (dump_flags & TDF_DETAILS))
3179 fprintf (dump_file, "Folded FMA negation ");
3180 print_gimple_stmt (dump_file, gsi_stmt (gsi), 0, TDF_NONE);
3181 fprintf (dump_file, "\n");
3186 widen_mul_stats.fmas_inserted++;
3190 /* Data necessary to perform the actual transformation from a multiplication
3191 and an addition to an FMA after decision is taken it should be done and to
3192 then delete the multiplication statement from the function IL. */
3194 struct fma_transformation_info
3196 gimple *mul_stmt;
3197 tree mul_result;
3198 tree op1;
3199 tree op2;
3202 /* Structure containing the current state of FMA deferring, i.e. whether we are
3203 deferring, whether to continue deferring, and all data necessary to come
3204 back and perform all deferred transformations. */
3206 class fma_deferring_state
3208 public:
3209 /* Class constructor. Pass true as PERFORM_DEFERRING in order to actually
3210 do any deferring. */
3212 fma_deferring_state (bool perform_deferring)
3213 : m_candidates (), m_mul_result_set (), m_initial_phi (NULL),
3214 m_last_result (NULL_TREE), m_deferring_p (perform_deferring) {}
3216 /* List of FMA candidates for which we the transformation has been determined
3217 possible but we at this point in BB analysis we do not consider them
3218 beneficial. */
3219 auto_vec<fma_transformation_info, 8> m_candidates;
3221 /* Set of results of multiplication that are part of an already deferred FMA
3222 candidates. */
3223 hash_set<tree> m_mul_result_set;
3225 /* The PHI that supposedly feeds back result of a FMA to another over loop
3226 boundary. */
3227 gphi *m_initial_phi;
3229 /* Result of the last produced FMA candidate or NULL if there has not been
3230 one. */
3231 tree m_last_result;
3233 /* If true, deferring might still be profitable. If false, transform all
3234 candidates and no longer defer. */
3235 bool m_deferring_p;
3238 /* Transform all deferred FMA candidates and mark STATE as no longer
3239 deferring. */
3241 static void
3242 cancel_fma_deferring (fma_deferring_state *state)
3244 if (!state->m_deferring_p)
3245 return;
3247 for (unsigned i = 0; i < state->m_candidates.length (); i++)
3249 if (dump_file && (dump_flags & TDF_DETAILS))
3250 fprintf (dump_file, "Generating deferred FMA\n");
3252 const fma_transformation_info &fti = state->m_candidates[i];
3253 convert_mult_to_fma_1 (fti.mul_result, fti.op1, fti.op2);
3255 gimple_stmt_iterator gsi = gsi_for_stmt (fti.mul_stmt);
3256 gsi_remove (&gsi, true);
3257 release_defs (fti.mul_stmt);
3259 state->m_deferring_p = false;
3262 /* If OP is an SSA name defined by a PHI node, return the PHI statement.
3263 Otherwise return NULL. */
3265 static gphi *
3266 result_of_phi (tree op)
3268 if (TREE_CODE (op) != SSA_NAME)
3269 return NULL;
3271 return dyn_cast <gphi *> (SSA_NAME_DEF_STMT (op));
3274 /* After processing statements of a BB and recording STATE, return true if the
3275 initial phi is fed by the last FMA candidate result ore one such result from
3276 previously processed BBs marked in LAST_RESULT_SET. */
3278 static bool
3279 last_fma_candidate_feeds_initial_phi (fma_deferring_state *state,
3280 hash_set<tree> *last_result_set)
3282 ssa_op_iter iter;
3283 use_operand_p use;
3284 FOR_EACH_PHI_ARG (use, state->m_initial_phi, iter, SSA_OP_USE)
3286 tree t = USE_FROM_PTR (use);
3287 if (t == state->m_last_result
3288 || last_result_set->contains (t))
3289 return true;
3292 return false;
3295 /* Combine the multiplication at MUL_STMT with operands MULOP1 and MULOP2
3296 with uses in additions and subtractions to form fused multiply-add
3297 operations. Returns true if successful and MUL_STMT should be removed.
3298 If MUL_COND is nonnull, the multiplication in MUL_STMT is conditional
3299 on MUL_COND, otherwise it is unconditional.
3301 If STATE indicates that we are deferring FMA transformation, that means
3302 that we do not produce FMAs for basic blocks which look like:
3304 <bb 6>
3305 # accumulator_111 = PHI <0.0(5), accumulator_66(6)>
3306 _65 = _14 * _16;
3307 accumulator_66 = _65 + accumulator_111;
3309 or its unrolled version, i.e. with several FMA candidates that feed result
3310 of one into the addend of another. Instead, we add them to a list in STATE
3311 and if we later discover an FMA candidate that is not part of such a chain,
3312 we go back and perform all deferred past candidates. */
3314 static bool
3315 convert_mult_to_fma (gimple *mul_stmt, tree op1, tree op2,
3316 fma_deferring_state *state, tree mul_cond = NULL_TREE,
3317 tree mul_len = NULL_TREE, tree mul_bias = NULL_TREE)
3319 tree mul_result = gimple_get_lhs (mul_stmt);
3320 /* If there isn't a LHS then this can't be an FMA. There can be no LHS
3321 if the statement was left just for the side-effects. */
3322 if (!mul_result)
3323 return false;
3324 tree type = TREE_TYPE (mul_result);
3325 gimple *use_stmt, *neguse_stmt;
3326 use_operand_p use_p;
3327 imm_use_iterator imm_iter;
3329 if (FLOAT_TYPE_P (type)
3330 && flag_fp_contract_mode != FP_CONTRACT_FAST)
3331 return false;
3333 /* We don't want to do bitfield reduction ops. */
3334 if (INTEGRAL_TYPE_P (type)
3335 && (!type_has_mode_precision_p (type) || TYPE_OVERFLOW_TRAPS (type)))
3336 return false;
3338 /* If the target doesn't support it, don't generate it. We assume that
3339 if fma isn't available then fms, fnma or fnms are not either. */
3340 optimization_type opt_type = bb_optimization_type (gimple_bb (mul_stmt));
3341 if (!direct_internal_fn_supported_p (IFN_FMA, type, opt_type))
3342 return false;
3344 /* If the multiplication has zero uses, it is kept around probably because
3345 of -fnon-call-exceptions. Don't optimize it away in that case,
3346 it is DCE job. */
3347 if (has_zero_uses (mul_result))
3348 return false;
3350 bool check_defer
3351 = (state->m_deferring_p
3352 && maybe_le (tree_to_poly_int64 (TYPE_SIZE (type)),
3353 param_avoid_fma_max_bits));
3354 bool defer = check_defer;
3355 bool seen_negate_p = false;
3357 /* There is no numerical difference between fused and unfused integer FMAs,
3358 and the assumption below that FMA is as cheap as addition is unlikely
3359 to be true, especially if the multiplication occurs multiple times on
3360 the same chain. E.g., for something like:
3362 (((a * b) + c) >> 1) + (a * b)
3364 we do not want to duplicate the a * b into two additions, not least
3365 because the result is not a natural FMA chain. */
3366 if (ANY_INTEGRAL_TYPE_P (type)
3367 && !has_single_use (mul_result))
3368 return false;
3370 if (!dbg_cnt (form_fma))
3371 return false;
3373 /* Make sure that the multiplication statement becomes dead after
3374 the transformation, thus that all uses are transformed to FMAs.
3375 This means we assume that an FMA operation has the same cost
3376 as an addition. */
3377 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, mul_result)
3379 tree result = mul_result;
3380 bool negate_p = false;
3382 use_stmt = USE_STMT (use_p);
3384 if (is_gimple_debug (use_stmt))
3385 continue;
3387 /* For now restrict this operations to single basic blocks. In theory
3388 we would want to support sinking the multiplication in
3389 m = a*b;
3390 if ()
3391 ma = m + c;
3392 else
3393 d = m;
3394 to form a fma in the then block and sink the multiplication to the
3395 else block. */
3396 if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
3397 return false;
3399 /* A negate on the multiplication leads to FNMA. */
3400 if (is_gimple_assign (use_stmt)
3401 && gimple_assign_rhs_code (use_stmt) == NEGATE_EXPR)
3403 ssa_op_iter iter;
3404 use_operand_p usep;
3406 /* If (due to earlier missed optimizations) we have two
3407 negates of the same value, treat them as equivalent
3408 to a single negate with multiple uses. */
3409 if (seen_negate_p)
3410 return false;
3412 result = gimple_assign_lhs (use_stmt);
3414 /* Make sure the negate statement becomes dead with this
3415 single transformation. */
3416 if (!single_imm_use (gimple_assign_lhs (use_stmt),
3417 &use_p, &neguse_stmt))
3418 return false;
3420 /* Make sure the multiplication isn't also used on that stmt. */
3421 FOR_EACH_PHI_OR_STMT_USE (usep, neguse_stmt, iter, SSA_OP_USE)
3422 if (USE_FROM_PTR (usep) == mul_result)
3423 return false;
3425 /* Re-validate. */
3426 use_stmt = neguse_stmt;
3427 if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
3428 return false;
3430 negate_p = seen_negate_p = true;
3433 tree cond, else_value, ops[3], len, bias;
3434 tree_code code;
3435 if (!can_interpret_as_conditional_op_p (use_stmt, &cond, &code, ops,
3436 &else_value, &len, &bias))
3437 return false;
3439 switch (code)
3441 case MINUS_EXPR:
3442 if (ops[1] == result)
3443 negate_p = !negate_p;
3444 break;
3445 case PLUS_EXPR:
3446 break;
3447 default:
3448 /* FMA can only be formed from PLUS and MINUS. */
3449 return false;
3452 if (len)
3454 /* For COND_LEN_* operations, we may have dummpy mask which is
3455 the all true mask. Such TREE type may be mul_cond != cond
3456 but we still consider they are equal. */
3457 if (mul_cond && cond != mul_cond
3458 && !(integer_truep (mul_cond) && integer_truep (cond)))
3459 return false;
3461 if (else_value == result)
3462 return false;
3464 if (!direct_internal_fn_supported_p (IFN_COND_LEN_FMA, type,
3465 opt_type))
3466 return false;
3468 if (mul_len)
3470 poly_int64 mul_value, value;
3471 if (poly_int_tree_p (mul_len, &mul_value)
3472 && poly_int_tree_p (len, &value)
3473 && maybe_ne (mul_value, value))
3474 return false;
3475 else if (mul_len != len)
3476 return false;
3478 if (wi::to_widest (mul_bias) != wi::to_widest (bias))
3479 return false;
3482 else
3484 if (mul_cond && cond != mul_cond)
3485 return false;
3487 if (cond)
3489 if (cond == result || else_value == result)
3490 return false;
3491 if (!direct_internal_fn_supported_p (IFN_COND_FMA, type,
3492 opt_type))
3493 return false;
3497 /* If the subtrahend (OPS[1]) is computed by a MULT_EXPR that
3498 we'll visit later, we might be able to get a more profitable
3499 match with fnma.
3500 OTOH, if we don't, a negate / fma pair has likely lower latency
3501 that a mult / subtract pair. */
3502 if (code == MINUS_EXPR
3503 && !negate_p
3504 && ops[0] == result
3505 && !direct_internal_fn_supported_p (IFN_FMS, type, opt_type)
3506 && direct_internal_fn_supported_p (IFN_FNMA, type, opt_type)
3507 && TREE_CODE (ops[1]) == SSA_NAME
3508 && has_single_use (ops[1]))
3510 gimple *stmt2 = SSA_NAME_DEF_STMT (ops[1]);
3511 if (is_gimple_assign (stmt2)
3512 && gimple_assign_rhs_code (stmt2) == MULT_EXPR)
3513 return false;
3516 /* We can't handle a * b + a * b. */
3517 if (ops[0] == ops[1])
3518 return false;
3519 /* If deferring, make sure we are not looking at an instruction that
3520 wouldn't have existed if we were not. */
3521 if (state->m_deferring_p
3522 && (state->m_mul_result_set.contains (ops[0])
3523 || state->m_mul_result_set.contains (ops[1])))
3524 return false;
3526 if (check_defer)
3528 tree use_lhs = gimple_get_lhs (use_stmt);
3529 if (state->m_last_result)
3531 if (ops[1] == state->m_last_result
3532 || ops[0] == state->m_last_result)
3533 defer = true;
3534 else
3535 defer = false;
3537 else
3539 gcc_checking_assert (!state->m_initial_phi);
3540 gphi *phi;
3541 if (ops[0] == result)
3542 phi = result_of_phi (ops[1]);
3543 else
3545 gcc_assert (ops[1] == result);
3546 phi = result_of_phi (ops[0]);
3549 if (phi)
3551 state->m_initial_phi = phi;
3552 defer = true;
3554 else
3555 defer = false;
3558 state->m_last_result = use_lhs;
3559 check_defer = false;
3561 else
3562 defer = false;
3564 /* While it is possible to validate whether or not the exact form that
3565 we've recognized is available in the backend, the assumption is that
3566 if the deferring logic above did not trigger, the transformation is
3567 never a loss. For instance, suppose the target only has the plain FMA
3568 pattern available. Consider a*b-c -> fma(a,b,-c): we've exchanged
3569 MUL+SUB for FMA+NEG, which is still two operations. Consider
3570 -(a*b)-c -> fma(-a,b,-c): we still have 3 operations, but in the FMA
3571 form the two NEGs are independent and could be run in parallel. */
3574 if (defer)
3576 fma_transformation_info fti;
3577 fti.mul_stmt = mul_stmt;
3578 fti.mul_result = mul_result;
3579 fti.op1 = op1;
3580 fti.op2 = op2;
3581 state->m_candidates.safe_push (fti);
3582 state->m_mul_result_set.add (mul_result);
3584 if (dump_file && (dump_flags & TDF_DETAILS))
3586 fprintf (dump_file, "Deferred generating FMA for multiplication ");
3587 print_gimple_stmt (dump_file, mul_stmt, 0, TDF_NONE);
3588 fprintf (dump_file, "\n");
3591 return false;
3593 else
3595 if (state->m_deferring_p)
3596 cancel_fma_deferring (state);
3597 convert_mult_to_fma_1 (mul_result, op1, op2);
3598 return true;
3603 /* Helper function of match_arith_overflow. For MUL_OVERFLOW, if we have
3604 a check for non-zero like:
3605 _1 = x_4(D) * y_5(D);
3606 *res_7(D) = _1;
3607 if (x_4(D) != 0)
3608 goto <bb 3>; [50.00%]
3609 else
3610 goto <bb 4>; [50.00%]
3612 <bb 3> [local count: 536870913]:
3613 _2 = _1 / x_4(D);
3614 _9 = _2 != y_5(D);
3615 _10 = (int) _9;
3617 <bb 4> [local count: 1073741824]:
3618 # iftmp.0_3 = PHI <_10(3), 0(2)>
3619 then in addition to using .MUL_OVERFLOW (x_4(D), y_5(D)) we can also
3620 optimize the x_4(D) != 0 condition to 1. */
3622 static void
3623 maybe_optimize_guarding_check (vec<gimple *> &mul_stmts, gimple *cond_stmt,
3624 gimple *div_stmt, bool *cfg_changed)
3626 basic_block bb = gimple_bb (cond_stmt);
3627 if (gimple_bb (div_stmt) != bb || !single_pred_p (bb))
3628 return;
3629 edge pred_edge = single_pred_edge (bb);
3630 basic_block pred_bb = pred_edge->src;
3631 if (EDGE_COUNT (pred_bb->succs) != 2)
3632 return;
3633 edge other_edge = EDGE_SUCC (pred_bb, EDGE_SUCC (pred_bb, 0) == pred_edge);
3634 edge other_succ_edge = NULL;
3635 if (gimple_code (cond_stmt) == GIMPLE_COND)
3637 if (EDGE_COUNT (bb->succs) != 2)
3638 return;
3639 other_succ_edge = EDGE_SUCC (bb, 0);
3640 if (gimple_cond_code (cond_stmt) == NE_EXPR)
3642 if (other_succ_edge->flags & EDGE_TRUE_VALUE)
3643 other_succ_edge = EDGE_SUCC (bb, 1);
3645 else if (other_succ_edge->flags & EDGE_FALSE_VALUE)
3646 other_succ_edge = EDGE_SUCC (bb, 0);
3647 if (other_edge->dest != other_succ_edge->dest)
3648 return;
3650 else if (!single_succ_p (bb) || other_edge->dest != single_succ (bb))
3651 return;
3652 gcond *zero_cond = safe_dyn_cast <gcond *> (*gsi_last_bb (pred_bb));
3653 if (zero_cond == NULL
3654 || (gimple_cond_code (zero_cond)
3655 != ((pred_edge->flags & EDGE_TRUE_VALUE) ? NE_EXPR : EQ_EXPR))
3656 || !integer_zerop (gimple_cond_rhs (zero_cond)))
3657 return;
3658 tree zero_cond_lhs = gimple_cond_lhs (zero_cond);
3659 if (TREE_CODE (zero_cond_lhs) != SSA_NAME)
3660 return;
3661 if (gimple_assign_rhs2 (div_stmt) != zero_cond_lhs)
3663 /* Allow the divisor to be result of a same precision cast
3664 from zero_cond_lhs. */
3665 tree rhs2 = gimple_assign_rhs2 (div_stmt);
3666 if (TREE_CODE (rhs2) != SSA_NAME)
3667 return;
3668 gimple *g = SSA_NAME_DEF_STMT (rhs2);
3669 if (!gimple_assign_cast_p (g)
3670 || gimple_assign_rhs1 (g) != gimple_cond_lhs (zero_cond)
3671 || !INTEGRAL_TYPE_P (TREE_TYPE (zero_cond_lhs))
3672 || (TYPE_PRECISION (TREE_TYPE (zero_cond_lhs))
3673 != TYPE_PRECISION (TREE_TYPE (rhs2))))
3674 return;
3676 gimple_stmt_iterator gsi = gsi_after_labels (bb);
3677 mul_stmts.quick_push (div_stmt);
3678 if (is_gimple_debug (gsi_stmt (gsi)))
3679 gsi_next_nondebug (&gsi);
3680 unsigned cast_count = 0;
3681 while (gsi_stmt (gsi) != cond_stmt)
3683 /* If original mul_stmt has a single use, allow it in the same bb,
3684 we are looking then just at __builtin_mul_overflow_p.
3685 Though, in that case the original mul_stmt will be replaced
3686 by .MUL_OVERFLOW, REALPART_EXPR and IMAGPART_EXPR stmts. */
3687 gimple *mul_stmt;
3688 unsigned int i;
3689 bool ok = false;
3690 FOR_EACH_VEC_ELT (mul_stmts, i, mul_stmt)
3692 if (gsi_stmt (gsi) == mul_stmt)
3694 ok = true;
3695 break;
3698 if (!ok && gimple_assign_cast_p (gsi_stmt (gsi)) && ++cast_count < 4)
3699 ok = true;
3700 if (!ok)
3701 return;
3702 gsi_next_nondebug (&gsi);
3704 if (gimple_code (cond_stmt) == GIMPLE_COND)
3706 basic_block succ_bb = other_edge->dest;
3707 for (gphi_iterator gpi = gsi_start_phis (succ_bb); !gsi_end_p (gpi);
3708 gsi_next (&gpi))
3710 gphi *phi = gpi.phi ();
3711 tree v1 = gimple_phi_arg_def (phi, other_edge->dest_idx);
3712 tree v2 = gimple_phi_arg_def (phi, other_succ_edge->dest_idx);
3713 if (!operand_equal_p (v1, v2, 0))
3714 return;
3717 else
3719 tree lhs = gimple_assign_lhs (cond_stmt);
3720 if (!lhs || !INTEGRAL_TYPE_P (TREE_TYPE (lhs)))
3721 return;
3722 gsi_next_nondebug (&gsi);
3723 if (!gsi_end_p (gsi))
3725 if (gimple_assign_rhs_code (cond_stmt) == COND_EXPR)
3726 return;
3727 gimple *cast_stmt = gsi_stmt (gsi);
3728 if (!gimple_assign_cast_p (cast_stmt))
3729 return;
3730 tree new_lhs = gimple_assign_lhs (cast_stmt);
3731 gsi_next_nondebug (&gsi);
3732 if (!gsi_end_p (gsi)
3733 || !new_lhs
3734 || !INTEGRAL_TYPE_P (TREE_TYPE (new_lhs))
3735 || TYPE_PRECISION (TREE_TYPE (new_lhs)) <= 1)
3736 return;
3737 lhs = new_lhs;
3739 edge succ_edge = single_succ_edge (bb);
3740 basic_block succ_bb = succ_edge->dest;
3741 gsi = gsi_start_phis (succ_bb);
3742 if (gsi_end_p (gsi))
3743 return;
3744 gphi *phi = as_a <gphi *> (gsi_stmt (gsi));
3745 gsi_next (&gsi);
3746 if (!gsi_end_p (gsi))
3747 return;
3748 if (gimple_phi_arg_def (phi, succ_edge->dest_idx) != lhs)
3749 return;
3750 tree other_val = gimple_phi_arg_def (phi, other_edge->dest_idx);
3751 if (gimple_assign_rhs_code (cond_stmt) == COND_EXPR)
3753 tree cond = gimple_assign_rhs1 (cond_stmt);
3754 if (TREE_CODE (cond) == NE_EXPR)
3756 if (!operand_equal_p (other_val,
3757 gimple_assign_rhs3 (cond_stmt), 0))
3758 return;
3760 else if (!operand_equal_p (other_val,
3761 gimple_assign_rhs2 (cond_stmt), 0))
3762 return;
3764 else if (gimple_assign_rhs_code (cond_stmt) == NE_EXPR)
3766 if (!integer_zerop (other_val))
3767 return;
3769 else if (!integer_onep (other_val))
3770 return;
3772 if (pred_edge->flags & EDGE_TRUE_VALUE)
3773 gimple_cond_make_true (zero_cond);
3774 else
3775 gimple_cond_make_false (zero_cond);
3776 update_stmt (zero_cond);
3777 *cfg_changed = true;
3780 /* Helper function for arith_overflow_check_p. Return true
3781 if VAL1 is equal to VAL2 cast to corresponding integral type
3782 with other signedness or vice versa. */
3784 static bool
3785 arith_cast_equal_p (tree val1, tree val2)
3787 if (TREE_CODE (val1) == INTEGER_CST && TREE_CODE (val2) == INTEGER_CST)
3788 return wi::eq_p (wi::to_wide (val1), wi::to_wide (val2));
3789 else if (TREE_CODE (val1) != SSA_NAME || TREE_CODE (val2) != SSA_NAME)
3790 return false;
3791 if (gimple_assign_cast_p (SSA_NAME_DEF_STMT (val1))
3792 && gimple_assign_rhs1 (SSA_NAME_DEF_STMT (val1)) == val2)
3793 return true;
3794 if (gimple_assign_cast_p (SSA_NAME_DEF_STMT (val2))
3795 && gimple_assign_rhs1 (SSA_NAME_DEF_STMT (val2)) == val1)
3796 return true;
3797 return false;
3800 /* Helper function of match_arith_overflow. Return 1
3801 if USE_STMT is unsigned overflow check ovf != 0 for
3802 STMT, -1 if USE_STMT is unsigned overflow check ovf == 0
3803 and 0 otherwise. */
3805 static int
3806 arith_overflow_check_p (gimple *stmt, gimple *cast_stmt, gimple *&use_stmt,
3807 tree maxval, tree *other)
3809 enum tree_code ccode = ERROR_MARK;
3810 tree crhs1 = NULL_TREE, crhs2 = NULL_TREE;
3811 enum tree_code code = gimple_assign_rhs_code (stmt);
3812 tree lhs = gimple_assign_lhs (cast_stmt ? cast_stmt : stmt);
3813 tree rhs1 = gimple_assign_rhs1 (stmt);
3814 tree rhs2 = gimple_assign_rhs2 (stmt);
3815 tree multop = NULL_TREE, divlhs = NULL_TREE;
3816 gimple *cur_use_stmt = use_stmt;
3818 if (code == MULT_EXPR)
3820 if (!is_gimple_assign (use_stmt))
3821 return 0;
3822 if (gimple_assign_rhs_code (use_stmt) != TRUNC_DIV_EXPR)
3823 return 0;
3824 if (gimple_assign_rhs1 (use_stmt) != lhs)
3825 return 0;
3826 if (cast_stmt)
3828 if (arith_cast_equal_p (gimple_assign_rhs2 (use_stmt), rhs1))
3829 multop = rhs2;
3830 else if (arith_cast_equal_p (gimple_assign_rhs2 (use_stmt), rhs2))
3831 multop = rhs1;
3832 else
3833 return 0;
3835 else if (gimple_assign_rhs2 (use_stmt) == rhs1)
3836 multop = rhs2;
3837 else if (operand_equal_p (gimple_assign_rhs2 (use_stmt), rhs2, 0))
3838 multop = rhs1;
3839 else
3840 return 0;
3841 if (stmt_ends_bb_p (use_stmt))
3842 return 0;
3843 divlhs = gimple_assign_lhs (use_stmt);
3844 if (!divlhs)
3845 return 0;
3846 use_operand_p use;
3847 if (!single_imm_use (divlhs, &use, &cur_use_stmt))
3848 return 0;
3849 if (cast_stmt && gimple_assign_cast_p (cur_use_stmt))
3851 tree cast_lhs = gimple_assign_lhs (cur_use_stmt);
3852 if (INTEGRAL_TYPE_P (TREE_TYPE (cast_lhs))
3853 && TYPE_UNSIGNED (TREE_TYPE (cast_lhs))
3854 && (TYPE_PRECISION (TREE_TYPE (cast_lhs))
3855 == TYPE_PRECISION (TREE_TYPE (divlhs)))
3856 && single_imm_use (cast_lhs, &use, &cur_use_stmt))
3858 cast_stmt = NULL;
3859 divlhs = cast_lhs;
3861 else
3862 return 0;
3865 if (gimple_code (cur_use_stmt) == GIMPLE_COND)
3867 ccode = gimple_cond_code (cur_use_stmt);
3868 crhs1 = gimple_cond_lhs (cur_use_stmt);
3869 crhs2 = gimple_cond_rhs (cur_use_stmt);
3871 else if (is_gimple_assign (cur_use_stmt))
3873 if (gimple_assign_rhs_class (cur_use_stmt) == GIMPLE_BINARY_RHS)
3875 ccode = gimple_assign_rhs_code (cur_use_stmt);
3876 crhs1 = gimple_assign_rhs1 (cur_use_stmt);
3877 crhs2 = gimple_assign_rhs2 (cur_use_stmt);
3879 else if (gimple_assign_rhs_code (cur_use_stmt) == COND_EXPR)
3881 tree cond = gimple_assign_rhs1 (cur_use_stmt);
3882 if (COMPARISON_CLASS_P (cond))
3884 ccode = TREE_CODE (cond);
3885 crhs1 = TREE_OPERAND (cond, 0);
3886 crhs2 = TREE_OPERAND (cond, 1);
3888 else
3889 return 0;
3891 else
3892 return 0;
3894 else
3895 return 0;
3897 if (TREE_CODE_CLASS (ccode) != tcc_comparison)
3898 return 0;
3900 switch (ccode)
3902 case GT_EXPR:
3903 case LE_EXPR:
3904 if (maxval)
3906 /* r = a + b; r > maxval or r <= maxval */
3907 if (crhs1 == lhs
3908 && TREE_CODE (crhs2) == INTEGER_CST
3909 && tree_int_cst_equal (crhs2, maxval))
3910 return ccode == GT_EXPR ? 1 : -1;
3911 break;
3913 /* r = a - b; r > a or r <= a
3914 r = a + b; a > r or a <= r or b > r or b <= r. */
3915 if ((code == MINUS_EXPR && crhs1 == lhs && crhs2 == rhs1)
3916 || (code == PLUS_EXPR && (crhs1 == rhs1 || crhs1 == rhs2)
3917 && crhs2 == lhs))
3918 return ccode == GT_EXPR ? 1 : -1;
3919 /* r = ~a; b > r or b <= r. */
3920 if (code == BIT_NOT_EXPR && crhs2 == lhs)
3922 if (other)
3923 *other = crhs1;
3924 return ccode == GT_EXPR ? 1 : -1;
3926 break;
3927 case LT_EXPR:
3928 case GE_EXPR:
3929 if (maxval)
3930 break;
3931 /* r = a - b; a < r or a >= r
3932 r = a + b; r < a or r >= a or r < b or r >= b. */
3933 if ((code == MINUS_EXPR && crhs1 == rhs1 && crhs2 == lhs)
3934 || (code == PLUS_EXPR && crhs1 == lhs
3935 && (crhs2 == rhs1 || crhs2 == rhs2)))
3936 return ccode == LT_EXPR ? 1 : -1;
3937 /* r = ~a; r < b or r >= b. */
3938 if (code == BIT_NOT_EXPR && crhs1 == lhs)
3940 if (other)
3941 *other = crhs2;
3942 return ccode == LT_EXPR ? 1 : -1;
3944 break;
3945 case EQ_EXPR:
3946 case NE_EXPR:
3947 /* r = a * b; _1 = r / a; _1 == b
3948 r = a * b; _1 = r / b; _1 == a
3949 r = a * b; _1 = r / a; _1 != b
3950 r = a * b; _1 = r / b; _1 != a. */
3951 if (code == MULT_EXPR)
3953 if (cast_stmt)
3955 if ((crhs1 == divlhs && arith_cast_equal_p (crhs2, multop))
3956 || (crhs2 == divlhs && arith_cast_equal_p (crhs1, multop)))
3958 use_stmt = cur_use_stmt;
3959 return ccode == NE_EXPR ? 1 : -1;
3962 else if ((crhs1 == divlhs && operand_equal_p (crhs2, multop, 0))
3963 || (crhs2 == divlhs && crhs1 == multop))
3965 use_stmt = cur_use_stmt;
3966 return ccode == NE_EXPR ? 1 : -1;
3969 break;
3970 default:
3971 break;
3973 return 0;
3976 /* Recognize for unsigned x
3977 x = y - z;
3978 if (x > y)
3979 where there are other uses of x and replace it with
3980 _7 = .SUB_OVERFLOW (y, z);
3981 x = REALPART_EXPR <_7>;
3982 _8 = IMAGPART_EXPR <_7>;
3983 if (_8)
3984 and similarly for addition.
3986 Also recognize:
3987 yc = (type) y;
3988 zc = (type) z;
3989 x = yc + zc;
3990 if (x > max)
3991 where y and z have unsigned types with maximum max
3992 and there are other uses of x and all of those cast x
3993 back to that unsigned type and again replace it with
3994 _7 = .ADD_OVERFLOW (y, z);
3995 _9 = REALPART_EXPR <_7>;
3996 _8 = IMAGPART_EXPR <_7>;
3997 if (_8)
3998 and replace (utype) x with _9.
4000 Also recognize:
4001 x = ~z;
4002 if (y > x)
4003 and replace it with
4004 _7 = .ADD_OVERFLOW (y, z);
4005 _8 = IMAGPART_EXPR <_7>;
4006 if (_8)
4008 And also recognize:
4009 z = x * y;
4010 if (x != 0)
4011 goto <bb 3>; [50.00%]
4012 else
4013 goto <bb 4>; [50.00%]
4015 <bb 3> [local count: 536870913]:
4016 _2 = z / x;
4017 _9 = _2 != y;
4018 _10 = (int) _9;
4020 <bb 4> [local count: 1073741824]:
4021 # iftmp.0_3 = PHI <_10(3), 0(2)>
4022 and replace it with
4023 _7 = .MUL_OVERFLOW (x, y);
4024 z = IMAGPART_EXPR <_7>;
4025 _8 = IMAGPART_EXPR <_7>;
4026 _9 = _8 != 0;
4027 iftmp.0_3 = (int) _9; */
4029 static bool
4030 match_arith_overflow (gimple_stmt_iterator *gsi, gimple *stmt,
4031 enum tree_code code, bool *cfg_changed)
4033 tree lhs = gimple_assign_lhs (stmt);
4034 tree type = TREE_TYPE (lhs);
4035 use_operand_p use_p;
4036 imm_use_iterator iter;
4037 bool use_seen = false;
4038 bool ovf_use_seen = false;
4039 gimple *use_stmt;
4040 gimple *add_stmt = NULL;
4041 bool add_first = false;
4042 gimple *cond_stmt = NULL;
4043 gimple *cast_stmt = NULL;
4044 tree cast_lhs = NULL_TREE;
4046 gcc_checking_assert (code == PLUS_EXPR
4047 || code == MINUS_EXPR
4048 || code == MULT_EXPR
4049 || code == BIT_NOT_EXPR);
4050 if (!INTEGRAL_TYPE_P (type)
4051 || !TYPE_UNSIGNED (type)
4052 || has_zero_uses (lhs)
4053 || (code != PLUS_EXPR
4054 && code != MULT_EXPR
4055 && optab_handler (code == MINUS_EXPR ? usubv4_optab : uaddv4_optab,
4056 TYPE_MODE (type)) == CODE_FOR_nothing))
4057 return false;
4059 tree rhs1 = gimple_assign_rhs1 (stmt);
4060 tree rhs2 = gimple_assign_rhs2 (stmt);
4061 FOR_EACH_IMM_USE_FAST (use_p, iter, lhs)
4063 use_stmt = USE_STMT (use_p);
4064 if (is_gimple_debug (use_stmt))
4065 continue;
4067 tree other = NULL_TREE;
4068 if (arith_overflow_check_p (stmt, NULL, use_stmt, NULL_TREE, &other))
4070 if (code == BIT_NOT_EXPR)
4072 gcc_assert (other);
4073 if (TREE_CODE (other) != SSA_NAME)
4074 return false;
4075 if (rhs2 == NULL)
4076 rhs2 = other;
4077 else
4078 return false;
4079 cond_stmt = use_stmt;
4081 ovf_use_seen = true;
4083 else
4085 use_seen = true;
4086 if (code == MULT_EXPR
4087 && cast_stmt == NULL
4088 && gimple_assign_cast_p (use_stmt))
4090 cast_lhs = gimple_assign_lhs (use_stmt);
4091 if (INTEGRAL_TYPE_P (TREE_TYPE (cast_lhs))
4092 && !TYPE_UNSIGNED (TREE_TYPE (cast_lhs))
4093 && (TYPE_PRECISION (TREE_TYPE (cast_lhs))
4094 == TYPE_PRECISION (TREE_TYPE (lhs))))
4095 cast_stmt = use_stmt;
4096 else
4097 cast_lhs = NULL_TREE;
4100 if (ovf_use_seen && use_seen)
4101 break;
4104 if (!ovf_use_seen
4105 && code == MULT_EXPR
4106 && cast_stmt)
4108 if (TREE_CODE (rhs1) != SSA_NAME
4109 || (TREE_CODE (rhs2) != SSA_NAME && TREE_CODE (rhs2) != INTEGER_CST))
4110 return false;
4111 FOR_EACH_IMM_USE_FAST (use_p, iter, cast_lhs)
4113 use_stmt = USE_STMT (use_p);
4114 if (is_gimple_debug (use_stmt))
4115 continue;
4117 if (arith_overflow_check_p (stmt, cast_stmt, use_stmt,
4118 NULL_TREE, NULL))
4119 ovf_use_seen = true;
4122 else
4124 cast_stmt = NULL;
4125 cast_lhs = NULL_TREE;
4128 tree maxval = NULL_TREE;
4129 if (!ovf_use_seen
4130 || (code != MULT_EXPR && (code == BIT_NOT_EXPR ? use_seen : !use_seen))
4131 || (code == PLUS_EXPR
4132 && optab_handler (uaddv4_optab,
4133 TYPE_MODE (type)) == CODE_FOR_nothing)
4134 || (code == MULT_EXPR
4135 && optab_handler (cast_stmt ? mulv4_optab : umulv4_optab,
4136 TYPE_MODE (type)) == CODE_FOR_nothing
4137 && (use_seen
4138 || cast_stmt
4139 || !can_mult_highpart_p (TYPE_MODE (type), true))))
4141 if (code != PLUS_EXPR)
4142 return false;
4143 if (TREE_CODE (rhs1) != SSA_NAME
4144 || !gimple_assign_cast_p (SSA_NAME_DEF_STMT (rhs1)))
4145 return false;
4146 rhs1 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (rhs1));
4147 tree type1 = TREE_TYPE (rhs1);
4148 if (!INTEGRAL_TYPE_P (type1)
4149 || !TYPE_UNSIGNED (type1)
4150 || TYPE_PRECISION (type1) >= TYPE_PRECISION (type)
4151 || (TYPE_PRECISION (type1)
4152 != GET_MODE_BITSIZE (SCALAR_INT_TYPE_MODE (type1))))
4153 return false;
4154 if (TREE_CODE (rhs2) == INTEGER_CST)
4156 if (wi::ne_p (wi::rshift (wi::to_wide (rhs2),
4157 TYPE_PRECISION (type1),
4158 UNSIGNED), 0))
4159 return false;
4160 rhs2 = fold_convert (type1, rhs2);
4162 else
4164 if (TREE_CODE (rhs2) != SSA_NAME
4165 || !gimple_assign_cast_p (SSA_NAME_DEF_STMT (rhs2)))
4166 return false;
4167 rhs2 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (rhs2));
4168 tree type2 = TREE_TYPE (rhs2);
4169 if (!INTEGRAL_TYPE_P (type2)
4170 || !TYPE_UNSIGNED (type2)
4171 || TYPE_PRECISION (type2) >= TYPE_PRECISION (type)
4172 || (TYPE_PRECISION (type2)
4173 != GET_MODE_BITSIZE (SCALAR_INT_TYPE_MODE (type2))))
4174 return false;
4176 if (TYPE_PRECISION (type1) >= TYPE_PRECISION (TREE_TYPE (rhs2)))
4177 type = type1;
4178 else
4179 type = TREE_TYPE (rhs2);
4181 if (TREE_CODE (type) != INTEGER_TYPE
4182 || optab_handler (uaddv4_optab,
4183 TYPE_MODE (type)) == CODE_FOR_nothing)
4184 return false;
4186 maxval = wide_int_to_tree (type, wi::max_value (TYPE_PRECISION (type),
4187 UNSIGNED));
4188 ovf_use_seen = false;
4189 use_seen = false;
4190 basic_block use_bb = NULL;
4191 FOR_EACH_IMM_USE_FAST (use_p, iter, lhs)
4193 use_stmt = USE_STMT (use_p);
4194 if (is_gimple_debug (use_stmt))
4195 continue;
4197 if (arith_overflow_check_p (stmt, NULL, use_stmt, maxval, NULL))
4199 ovf_use_seen = true;
4200 use_bb = gimple_bb (use_stmt);
4202 else
4204 if (!gimple_assign_cast_p (use_stmt)
4205 || gimple_assign_rhs_code (use_stmt) == VIEW_CONVERT_EXPR)
4206 return false;
4207 tree use_lhs = gimple_assign_lhs (use_stmt);
4208 if (!INTEGRAL_TYPE_P (TREE_TYPE (use_lhs))
4209 || (TYPE_PRECISION (TREE_TYPE (use_lhs))
4210 > TYPE_PRECISION (type)))
4211 return false;
4212 use_seen = true;
4215 if (!ovf_use_seen)
4216 return false;
4217 if (!useless_type_conversion_p (type, TREE_TYPE (rhs1)))
4219 if (!use_seen)
4220 return false;
4221 tree new_rhs1 = make_ssa_name (type);
4222 gimple *g = gimple_build_assign (new_rhs1, NOP_EXPR, rhs1);
4223 gsi_insert_before (gsi, g, GSI_SAME_STMT);
4224 rhs1 = new_rhs1;
4226 else if (!useless_type_conversion_p (type, TREE_TYPE (rhs2)))
4228 if (!use_seen)
4229 return false;
4230 tree new_rhs2 = make_ssa_name (type);
4231 gimple *g = gimple_build_assign (new_rhs2, NOP_EXPR, rhs2);
4232 gsi_insert_before (gsi, g, GSI_SAME_STMT);
4233 rhs2 = new_rhs2;
4235 else if (!use_seen)
4237 /* If there are no uses of the wider addition, check if
4238 forwprop has not created a narrower addition.
4239 Require it to be in the same bb as the overflow check. */
4240 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs1)
4242 use_stmt = USE_STMT (use_p);
4243 if (is_gimple_debug (use_stmt))
4244 continue;
4246 if (use_stmt == stmt)
4247 continue;
4249 if (!is_gimple_assign (use_stmt)
4250 || gimple_bb (use_stmt) != use_bb
4251 || gimple_assign_rhs_code (use_stmt) != PLUS_EXPR)
4252 continue;
4254 if (gimple_assign_rhs1 (use_stmt) == rhs1)
4256 if (!operand_equal_p (gimple_assign_rhs2 (use_stmt),
4257 rhs2, 0))
4258 continue;
4260 else if (gimple_assign_rhs2 (use_stmt) == rhs1)
4262 if (gimple_assign_rhs1 (use_stmt) != rhs2)
4263 continue;
4265 else
4266 continue;
4268 add_stmt = use_stmt;
4269 break;
4271 if (add_stmt == NULL)
4272 return false;
4274 /* If stmt and add_stmt are in the same bb, we need to find out
4275 which one is earlier. If they are in different bbs, we've
4276 checked add_stmt is in the same bb as one of the uses of the
4277 stmt lhs, so stmt needs to dominate add_stmt too. */
4278 if (gimple_bb (stmt) == gimple_bb (add_stmt))
4280 gimple_stmt_iterator gsif = *gsi;
4281 gimple_stmt_iterator gsib = *gsi;
4282 int i;
4283 /* Search both forward and backward from stmt and have a small
4284 upper bound. */
4285 for (i = 0; i < 128; i++)
4287 if (!gsi_end_p (gsib))
4289 gsi_prev_nondebug (&gsib);
4290 if (gsi_stmt (gsib) == add_stmt)
4292 add_first = true;
4293 break;
4296 else if (gsi_end_p (gsif))
4297 break;
4298 if (!gsi_end_p (gsif))
4300 gsi_next_nondebug (&gsif);
4301 if (gsi_stmt (gsif) == add_stmt)
4302 break;
4305 if (i == 128)
4306 return false;
4307 if (add_first)
4308 *gsi = gsi_for_stmt (add_stmt);
4313 if (code == BIT_NOT_EXPR)
4314 *gsi = gsi_for_stmt (cond_stmt);
4316 auto_vec<gimple *, 8> mul_stmts;
4317 if (code == MULT_EXPR && cast_stmt)
4319 type = TREE_TYPE (cast_lhs);
4320 gimple *g = SSA_NAME_DEF_STMT (rhs1);
4321 if (gimple_assign_cast_p (g)
4322 && useless_type_conversion_p (type,
4323 TREE_TYPE (gimple_assign_rhs1 (g)))
4324 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g)))
4325 rhs1 = gimple_assign_rhs1 (g);
4326 else
4328 g = gimple_build_assign (make_ssa_name (type), NOP_EXPR, rhs1);
4329 gsi_insert_before (gsi, g, GSI_SAME_STMT);
4330 rhs1 = gimple_assign_lhs (g);
4331 mul_stmts.quick_push (g);
4333 if (TREE_CODE (rhs2) == INTEGER_CST)
4334 rhs2 = fold_convert (type, rhs2);
4335 else
4337 g = SSA_NAME_DEF_STMT (rhs2);
4338 if (gimple_assign_cast_p (g)
4339 && useless_type_conversion_p (type,
4340 TREE_TYPE (gimple_assign_rhs1 (g)))
4341 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g)))
4342 rhs2 = gimple_assign_rhs1 (g);
4343 else
4345 g = gimple_build_assign (make_ssa_name (type), NOP_EXPR, rhs2);
4346 gsi_insert_before (gsi, g, GSI_SAME_STMT);
4347 rhs2 = gimple_assign_lhs (g);
4348 mul_stmts.quick_push (g);
4352 tree ctype = build_complex_type (type);
4353 gcall *g = gimple_build_call_internal (code == MULT_EXPR
4354 ? IFN_MUL_OVERFLOW
4355 : code != MINUS_EXPR
4356 ? IFN_ADD_OVERFLOW : IFN_SUB_OVERFLOW,
4357 2, rhs1, rhs2);
4358 tree ctmp = make_ssa_name (ctype);
4359 gimple_call_set_lhs (g, ctmp);
4360 gsi_insert_before (gsi, g, GSI_SAME_STMT);
4361 tree new_lhs = (maxval || cast_stmt) ? make_ssa_name (type) : lhs;
4362 gassign *g2;
4363 if (code != BIT_NOT_EXPR)
4365 g2 = gimple_build_assign (new_lhs, REALPART_EXPR,
4366 build1 (REALPART_EXPR, type, ctmp));
4367 if (maxval || cast_stmt)
4369 gsi_insert_before (gsi, g2, GSI_SAME_STMT);
4370 if (add_first)
4371 *gsi = gsi_for_stmt (stmt);
4373 else
4374 gsi_replace (gsi, g2, true);
4375 if (code == MULT_EXPR)
4377 mul_stmts.quick_push (g);
4378 mul_stmts.quick_push (g2);
4379 if (cast_stmt)
4381 g2 = gimple_build_assign (lhs, NOP_EXPR, new_lhs);
4382 gsi_replace (gsi, g2, true);
4383 mul_stmts.quick_push (g2);
4387 tree ovf = make_ssa_name (type);
4388 g2 = gimple_build_assign (ovf, IMAGPART_EXPR,
4389 build1 (IMAGPART_EXPR, type, ctmp));
4390 if (code != BIT_NOT_EXPR)
4391 gsi_insert_after (gsi, g2, GSI_NEW_STMT);
4392 else
4393 gsi_insert_before (gsi, g2, GSI_SAME_STMT);
4394 if (code == MULT_EXPR)
4395 mul_stmts.quick_push (g2);
4397 FOR_EACH_IMM_USE_STMT (use_stmt, iter, cast_lhs ? cast_lhs : lhs)
4399 if (is_gimple_debug (use_stmt))
4400 continue;
4402 gimple *orig_use_stmt = use_stmt;
4403 int ovf_use = arith_overflow_check_p (stmt, cast_stmt, use_stmt,
4404 maxval, NULL);
4405 if (ovf_use == 0)
4407 gcc_assert (code != BIT_NOT_EXPR);
4408 if (maxval)
4410 tree use_lhs = gimple_assign_lhs (use_stmt);
4411 gimple_assign_set_rhs1 (use_stmt, new_lhs);
4412 if (useless_type_conversion_p (TREE_TYPE (use_lhs),
4413 TREE_TYPE (new_lhs)))
4414 gimple_assign_set_rhs_code (use_stmt, SSA_NAME);
4415 update_stmt (use_stmt);
4417 continue;
4419 if (gimple_code (use_stmt) == GIMPLE_COND)
4421 gcond *cond_stmt = as_a <gcond *> (use_stmt);
4422 gimple_cond_set_lhs (cond_stmt, ovf);
4423 gimple_cond_set_rhs (cond_stmt, build_int_cst (type, 0));
4424 gimple_cond_set_code (cond_stmt, ovf_use == 1 ? NE_EXPR : EQ_EXPR);
4426 else
4428 gcc_checking_assert (is_gimple_assign (use_stmt));
4429 if (gimple_assign_rhs_class (use_stmt) == GIMPLE_BINARY_RHS)
4431 gimple_assign_set_rhs1 (use_stmt, ovf);
4432 gimple_assign_set_rhs2 (use_stmt, build_int_cst (type, 0));
4433 gimple_assign_set_rhs_code (use_stmt,
4434 ovf_use == 1 ? NE_EXPR : EQ_EXPR);
4436 else
4438 gcc_checking_assert (gimple_assign_rhs_code (use_stmt)
4439 == COND_EXPR);
4440 tree cond = build2 (ovf_use == 1 ? NE_EXPR : EQ_EXPR,
4441 boolean_type_node, ovf,
4442 build_int_cst (type, 0));
4443 gimple_assign_set_rhs1 (use_stmt, cond);
4446 update_stmt (use_stmt);
4447 if (code == MULT_EXPR && use_stmt != orig_use_stmt)
4449 gimple_stmt_iterator gsi2 = gsi_for_stmt (orig_use_stmt);
4450 maybe_optimize_guarding_check (mul_stmts, use_stmt, orig_use_stmt,
4451 cfg_changed);
4452 use_operand_p use;
4453 gimple *cast_stmt;
4454 if (single_imm_use (gimple_assign_lhs (orig_use_stmt), &use,
4455 &cast_stmt)
4456 && gimple_assign_cast_p (cast_stmt))
4458 gimple_stmt_iterator gsi3 = gsi_for_stmt (cast_stmt);
4459 gsi_remove (&gsi3, true);
4460 release_ssa_name (gimple_assign_lhs (cast_stmt));
4462 gsi_remove (&gsi2, true);
4463 release_ssa_name (gimple_assign_lhs (orig_use_stmt));
4466 if (maxval)
4468 gimple_stmt_iterator gsi2 = gsi_for_stmt (stmt);
4469 gsi_remove (&gsi2, true);
4470 if (add_stmt)
4472 gimple *g = gimple_build_assign (gimple_assign_lhs (add_stmt),
4473 new_lhs);
4474 gsi2 = gsi_for_stmt (add_stmt);
4475 gsi_replace (&gsi2, g, true);
4478 else if (code == BIT_NOT_EXPR)
4480 *gsi = gsi_for_stmt (stmt);
4481 gsi_remove (gsi, true);
4482 release_ssa_name (lhs);
4483 return true;
4485 return false;
4488 /* Helper of match_uaddc_usubc. Look through an integral cast
4489 which should preserve [0, 1] range value (unless source has
4490 1-bit signed type) and the cast has single use. */
4492 static gimple *
4493 uaddc_cast (gimple *g)
4495 if (!gimple_assign_cast_p (g))
4496 return g;
4497 tree op = gimple_assign_rhs1 (g);
4498 if (TREE_CODE (op) == SSA_NAME
4499 && INTEGRAL_TYPE_P (TREE_TYPE (op))
4500 && (TYPE_PRECISION (TREE_TYPE (op)) > 1
4501 || TYPE_UNSIGNED (TREE_TYPE (op)))
4502 && has_single_use (gimple_assign_lhs (g)))
4503 return SSA_NAME_DEF_STMT (op);
4504 return g;
4507 /* Helper of match_uaddc_usubc. Look through a NE_EXPR
4508 comparison with 0 which also preserves [0, 1] value range. */
4510 static gimple *
4511 uaddc_ne0 (gimple *g)
4513 if (is_gimple_assign (g)
4514 && gimple_assign_rhs_code (g) == NE_EXPR
4515 && integer_zerop (gimple_assign_rhs2 (g))
4516 && TREE_CODE (gimple_assign_rhs1 (g)) == SSA_NAME
4517 && has_single_use (gimple_assign_lhs (g)))
4518 return SSA_NAME_DEF_STMT (gimple_assign_rhs1 (g));
4519 return g;
4522 /* Return true if G is {REAL,IMAG}PART_EXPR PART with SSA_NAME
4523 operand. */
4525 static bool
4526 uaddc_is_cplxpart (gimple *g, tree_code part)
4528 return (is_gimple_assign (g)
4529 && gimple_assign_rhs_code (g) == part
4530 && TREE_CODE (TREE_OPERAND (gimple_assign_rhs1 (g), 0)) == SSA_NAME);
4533 /* Try to match e.g.
4534 _29 = .ADD_OVERFLOW (_3, _4);
4535 _30 = REALPART_EXPR <_29>;
4536 _31 = IMAGPART_EXPR <_29>;
4537 _32 = .ADD_OVERFLOW (_30, _38);
4538 _33 = REALPART_EXPR <_32>;
4539 _34 = IMAGPART_EXPR <_32>;
4540 _35 = _31 + _34;
4542 _36 = .UADDC (_3, _4, _38);
4543 _33 = REALPART_EXPR <_36>;
4544 _35 = IMAGPART_EXPR <_36>;
4546 _22 = .SUB_OVERFLOW (_6, _5);
4547 _23 = REALPART_EXPR <_22>;
4548 _24 = IMAGPART_EXPR <_22>;
4549 _25 = .SUB_OVERFLOW (_23, _37);
4550 _26 = REALPART_EXPR <_25>;
4551 _27 = IMAGPART_EXPR <_25>;
4552 _28 = _24 | _27;
4554 _29 = .USUBC (_6, _5, _37);
4555 _26 = REALPART_EXPR <_29>;
4556 _288 = IMAGPART_EXPR <_29>;
4557 provided _38 or _37 above have [0, 1] range
4558 and _3, _4 and _30 or _6, _5 and _23 are unsigned
4559 integral types with the same precision. Whether + or | or ^ is
4560 used on the IMAGPART_EXPR results doesn't matter, with one of
4561 added or subtracted operands in [0, 1] range at most one
4562 .ADD_OVERFLOW or .SUB_OVERFLOW will indicate overflow. */
4564 static bool
4565 match_uaddc_usubc (gimple_stmt_iterator *gsi, gimple *stmt, tree_code code)
4567 tree rhs[4];
4568 rhs[0] = gimple_assign_rhs1 (stmt);
4569 rhs[1] = gimple_assign_rhs2 (stmt);
4570 rhs[2] = NULL_TREE;
4571 rhs[3] = NULL_TREE;
4572 tree type = TREE_TYPE (rhs[0]);
4573 if (!INTEGRAL_TYPE_P (type) || !TYPE_UNSIGNED (type))
4574 return false;
4576 if (code != BIT_IOR_EXPR && code != BIT_XOR_EXPR)
4578 /* If overflow flag is ignored on the MSB limb, we can end up with
4579 the most significant limb handled as r = op1 + op2 + ovf1 + ovf2;
4580 or r = op1 - op2 - ovf1 - ovf2; or various equivalent expressions
4581 thereof. Handle those like the ovf = ovf1 + ovf2; case to recognize
4582 the limb below the MSB, but also create another .UADDC/.USUBC call
4583 for the last limb.
4585 First look through assignments with the same rhs code as CODE,
4586 with the exception that subtraction of a constant is canonicalized
4587 into addition of its negation. rhs[0] will be minuend for
4588 subtractions and one of addends for addition, all other assigned
4589 rhs[i] operands will be subtrahends or other addends. */
4590 while (TREE_CODE (rhs[0]) == SSA_NAME && !rhs[3])
4592 gimple *g = SSA_NAME_DEF_STMT (rhs[0]);
4593 if (has_single_use (rhs[0])
4594 && is_gimple_assign (g)
4595 && (gimple_assign_rhs_code (g) == code
4596 || (code == MINUS_EXPR
4597 && gimple_assign_rhs_code (g) == PLUS_EXPR
4598 && TREE_CODE (gimple_assign_rhs2 (g)) == INTEGER_CST)))
4600 tree r2 = gimple_assign_rhs2 (g);
4601 if (gimple_assign_rhs_code (g) != code)
4603 r2 = const_unop (NEGATE_EXPR, TREE_TYPE (r2), r2);
4604 if (!r2)
4605 break;
4607 rhs[0] = gimple_assign_rhs1 (g);
4608 tree &r = rhs[2] ? rhs[3] : rhs[2];
4609 r = r2;
4611 else
4612 break;
4614 while (TREE_CODE (rhs[1]) == SSA_NAME && !rhs[3])
4616 gimple *g = SSA_NAME_DEF_STMT (rhs[1]);
4617 if (has_single_use (rhs[1])
4618 && is_gimple_assign (g)
4619 && gimple_assign_rhs_code (g) == PLUS_EXPR)
4621 rhs[1] = gimple_assign_rhs1 (g);
4622 if (rhs[2])
4623 rhs[3] = gimple_assign_rhs2 (g);
4624 else
4625 rhs[2] = gimple_assign_rhs2 (g);
4627 else
4628 break;
4630 /* If there are just 3 addends or one minuend and two subtrahends,
4631 check for UADDC or USUBC being pattern recognized earlier.
4632 Say r = op1 + op2 + ovf1 + ovf2; where the (ovf1 + ovf2) part
4633 got pattern matched earlier as __imag__ .UADDC (arg1, arg2, arg3)
4634 etc. */
4635 if (rhs[2] && !rhs[3])
4637 for (int i = (code == MINUS_EXPR ? 1 : 0); i < 3; ++i)
4638 if (TREE_CODE (rhs[i]) == SSA_NAME)
4640 gimple *im = uaddc_cast (SSA_NAME_DEF_STMT (rhs[i]));
4641 im = uaddc_ne0 (im);
4642 if (uaddc_is_cplxpart (im, IMAGPART_EXPR))
4644 /* We found one of the 3 addends or 2 subtrahends to be
4645 __imag__ of something, verify it is .UADDC/.USUBC. */
4646 tree rhs1 = gimple_assign_rhs1 (im);
4647 gimple *ovf = SSA_NAME_DEF_STMT (TREE_OPERAND (rhs1, 0));
4648 tree ovf_lhs = NULL_TREE;
4649 tree ovf_arg1 = NULL_TREE, ovf_arg2 = NULL_TREE;
4650 if (gimple_call_internal_p (ovf, code == PLUS_EXPR
4651 ? IFN_ADD_OVERFLOW
4652 : IFN_SUB_OVERFLOW))
4654 /* Or verify it is .ADD_OVERFLOW/.SUB_OVERFLOW.
4655 This is for the case of 2 chained .UADDC/.USUBC,
4656 where the first one uses 0 carry-in and the second
4657 one ignores the carry-out.
4658 So, something like:
4659 _16 = .ADD_OVERFLOW (_1, _2);
4660 _17 = REALPART_EXPR <_16>;
4661 _18 = IMAGPART_EXPR <_16>;
4662 _15 = _3 + _4;
4663 _12 = _15 + _18;
4664 where the first 3 statements come from the lower
4665 limb addition and the last 2 from the higher limb
4666 which ignores carry-out. */
4667 ovf_lhs = gimple_call_lhs (ovf);
4668 tree ovf_lhs_type = TREE_TYPE (TREE_TYPE (ovf_lhs));
4669 ovf_arg1 = gimple_call_arg (ovf, 0);
4670 ovf_arg2 = gimple_call_arg (ovf, 1);
4671 /* In that case we need to punt if the types don't
4672 mismatch. */
4673 if (!types_compatible_p (type, ovf_lhs_type)
4674 || !types_compatible_p (type, TREE_TYPE (ovf_arg1))
4675 || !types_compatible_p (type,
4676 TREE_TYPE (ovf_arg2)))
4677 ovf_lhs = NULL_TREE;
4678 else
4680 for (int i = (code == PLUS_EXPR ? 1 : 0);
4681 i >= 0; --i)
4683 tree r = gimple_call_arg (ovf, i);
4684 if (TREE_CODE (r) != SSA_NAME)
4685 continue;
4686 if (uaddc_is_cplxpart (SSA_NAME_DEF_STMT (r),
4687 REALPART_EXPR))
4689 /* Punt if one of the args which isn't
4690 subtracted isn't __real__; that could
4691 then prevent better match later.
4692 Consider:
4693 _3 = .ADD_OVERFLOW (_1, _2);
4694 _4 = REALPART_EXPR <_3>;
4695 _5 = IMAGPART_EXPR <_3>;
4696 _7 = .ADD_OVERFLOW (_4, _6);
4697 _8 = REALPART_EXPR <_7>;
4698 _9 = IMAGPART_EXPR <_7>;
4699 _12 = _10 + _11;
4700 _13 = _12 + _9;
4701 _14 = _13 + _5;
4702 We want to match this when called on
4703 the last stmt as a pair of .UADDC calls,
4704 but without this check we could turn
4705 that prematurely on _13 = _12 + _9;
4706 stmt into .UADDC with 0 carry-in just
4707 on the second .ADD_OVERFLOW call and
4708 another replacing the _12 and _13
4709 additions. */
4710 ovf_lhs = NULL_TREE;
4711 break;
4715 if (ovf_lhs)
4717 use_operand_p use_p;
4718 imm_use_iterator iter;
4719 tree re_lhs = NULL_TREE;
4720 FOR_EACH_IMM_USE_FAST (use_p, iter, ovf_lhs)
4722 gimple *use_stmt = USE_STMT (use_p);
4723 if (is_gimple_debug (use_stmt))
4724 continue;
4725 if (use_stmt == im)
4726 continue;
4727 if (!uaddc_is_cplxpart (use_stmt,
4728 REALPART_EXPR))
4730 ovf_lhs = NULL_TREE;
4731 break;
4733 re_lhs = gimple_assign_lhs (use_stmt);
4735 if (ovf_lhs && re_lhs)
4737 FOR_EACH_IMM_USE_FAST (use_p, iter, re_lhs)
4739 gimple *use_stmt = USE_STMT (use_p);
4740 if (is_gimple_debug (use_stmt))
4741 continue;
4742 internal_fn ifn
4743 = gimple_call_internal_fn (ovf);
4744 /* Punt if the __real__ of lhs is used
4745 in the same .*_OVERFLOW call.
4746 Consider:
4747 _3 = .ADD_OVERFLOW (_1, _2);
4748 _4 = REALPART_EXPR <_3>;
4749 _5 = IMAGPART_EXPR <_3>;
4750 _7 = .ADD_OVERFLOW (_4, _6);
4751 _8 = REALPART_EXPR <_7>;
4752 _9 = IMAGPART_EXPR <_7>;
4753 _12 = _10 + _11;
4754 _13 = _12 + _5;
4755 _14 = _13 + _9;
4756 We want to match this when called on
4757 the last stmt as a pair of .UADDC calls,
4758 but without this check we could turn
4759 that prematurely on _13 = _12 + _5;
4760 stmt into .UADDC with 0 carry-in just
4761 on the first .ADD_OVERFLOW call and
4762 another replacing the _12 and _13
4763 additions. */
4764 if (gimple_call_internal_p (use_stmt, ifn))
4766 ovf_lhs = NULL_TREE;
4767 break;
4773 if ((ovf_lhs
4774 || gimple_call_internal_p (ovf,
4775 code == PLUS_EXPR
4776 ? IFN_UADDC : IFN_USUBC))
4777 && (optab_handler (code == PLUS_EXPR
4778 ? uaddc5_optab : usubc5_optab,
4779 TYPE_MODE (type))
4780 != CODE_FOR_nothing))
4782 /* And in that case build another .UADDC/.USUBC
4783 call for the most significand limb addition.
4784 Overflow bit is ignored here. */
4785 if (i != 2)
4786 std::swap (rhs[i], rhs[2]);
4787 gimple *g
4788 = gimple_build_call_internal (code == PLUS_EXPR
4789 ? IFN_UADDC
4790 : IFN_USUBC,
4791 3, rhs[0], rhs[1],
4792 rhs[2]);
4793 tree nlhs = make_ssa_name (build_complex_type (type));
4794 gimple_call_set_lhs (g, nlhs);
4795 gsi_insert_before (gsi, g, GSI_SAME_STMT);
4796 tree ilhs = gimple_assign_lhs (stmt);
4797 g = gimple_build_assign (ilhs, REALPART_EXPR,
4798 build1 (REALPART_EXPR,
4799 TREE_TYPE (ilhs),
4800 nlhs));
4801 gsi_replace (gsi, g, true);
4802 /* And if it is initialized from result of __imag__
4803 of .{ADD,SUB}_OVERFLOW call, replace that
4804 call with .U{ADD,SUB}C call with the same arguments,
4805 just 0 added as third argument. This isn't strictly
4806 necessary, .ADD_OVERFLOW (x, y) and .UADDC (x, y, 0)
4807 produce the same result, but may result in better
4808 generated code on some targets where the backend can
4809 better prepare in how the result will be used. */
4810 if (ovf_lhs)
4812 tree zero = build_zero_cst (type);
4813 g = gimple_build_call_internal (code == PLUS_EXPR
4814 ? IFN_UADDC
4815 : IFN_USUBC,
4816 3, ovf_arg1,
4817 ovf_arg2, zero);
4818 gimple_call_set_lhs (g, ovf_lhs);
4819 gimple_stmt_iterator gsi2 = gsi_for_stmt (ovf);
4820 gsi_replace (&gsi2, g, true);
4822 return true;
4826 return false;
4828 if (code == MINUS_EXPR && !rhs[2])
4829 return false;
4830 if (code == MINUS_EXPR)
4831 /* Code below expects rhs[0] and rhs[1] to have the IMAGPART_EXPRs.
4832 So, for MINUS_EXPR swap the single added rhs operand (others are
4833 subtracted) to rhs[3]. */
4834 std::swap (rhs[0], rhs[3]);
4836 /* Walk from both operands of STMT (for +/- even sometimes from
4837 all the 4 addends or 3 subtrahends), see through casts and != 0
4838 statements which would preserve [0, 1] range of values and
4839 check which is initialized from __imag__. */
4840 gimple *im1 = NULL, *im2 = NULL;
4841 for (int i = 0; i < (code == MINUS_EXPR ? 3 : 4); i++)
4842 if (rhs[i] && TREE_CODE (rhs[i]) == SSA_NAME)
4844 gimple *im = uaddc_cast (SSA_NAME_DEF_STMT (rhs[i]));
4845 im = uaddc_ne0 (im);
4846 if (uaddc_is_cplxpart (im, IMAGPART_EXPR))
4848 if (im1 == NULL)
4850 im1 = im;
4851 if (i != 0)
4852 std::swap (rhs[0], rhs[i]);
4854 else
4856 im2 = im;
4857 if (i != 1)
4858 std::swap (rhs[1], rhs[i]);
4859 break;
4863 /* If we don't find at least two, punt. */
4864 if (!im2)
4865 return false;
4866 /* Check they are __imag__ of .ADD_OVERFLOW or .SUB_OVERFLOW call results,
4867 either both .ADD_OVERFLOW or both .SUB_OVERFLOW and that we have
4868 uaddc5/usubc5 named pattern for the corresponding mode. */
4869 gimple *ovf1
4870 = SSA_NAME_DEF_STMT (TREE_OPERAND (gimple_assign_rhs1 (im1), 0));
4871 gimple *ovf2
4872 = SSA_NAME_DEF_STMT (TREE_OPERAND (gimple_assign_rhs1 (im2), 0));
4873 internal_fn ifn;
4874 if (!is_gimple_call (ovf1)
4875 || !gimple_call_internal_p (ovf1)
4876 || ((ifn = gimple_call_internal_fn (ovf1)) != IFN_ADD_OVERFLOW
4877 && ifn != IFN_SUB_OVERFLOW)
4878 || !gimple_call_internal_p (ovf2, ifn)
4879 || optab_handler (ifn == IFN_ADD_OVERFLOW ? uaddc5_optab : usubc5_optab,
4880 TYPE_MODE (type)) == CODE_FOR_nothing
4881 || (rhs[2]
4882 && optab_handler (code == PLUS_EXPR ? uaddc5_optab : usubc5_optab,
4883 TYPE_MODE (type)) == CODE_FOR_nothing))
4884 return false;
4885 tree arg1, arg2, arg3 = NULL_TREE;
4886 gimple *re1 = NULL, *re2 = NULL;
4887 /* On one of the two calls, one of the .ADD_OVERFLOW/.SUB_OVERFLOW arguments
4888 should be initialized from __real__ of the other of the two calls.
4889 Though, for .SUB_OVERFLOW, it has to be the first argument, not the
4890 second one. */
4891 for (int i = (ifn == IFN_ADD_OVERFLOW ? 1 : 0); i >= 0; --i)
4892 for (gimple *ovf = ovf1; ovf; ovf = (ovf == ovf1 ? ovf2 : NULL))
4894 tree arg = gimple_call_arg (ovf, i);
4895 if (TREE_CODE (arg) != SSA_NAME)
4896 continue;
4897 re1 = SSA_NAME_DEF_STMT (arg);
4898 if (uaddc_is_cplxpart (re1, REALPART_EXPR)
4899 && (SSA_NAME_DEF_STMT (TREE_OPERAND (gimple_assign_rhs1 (re1), 0))
4900 == (ovf == ovf1 ? ovf2 : ovf1)))
4902 if (ovf == ovf1)
4904 /* Make sure ovf2 is the .*_OVERFLOW call with argument
4905 initialized from __real__ of ovf1. */
4906 std::swap (rhs[0], rhs[1]);
4907 std::swap (im1, im2);
4908 std::swap (ovf1, ovf2);
4910 arg3 = gimple_call_arg (ovf, 1 - i);
4911 i = -1;
4912 break;
4915 if (!arg3)
4916 return false;
4917 arg1 = gimple_call_arg (ovf1, 0);
4918 arg2 = gimple_call_arg (ovf1, 1);
4919 if (!types_compatible_p (type, TREE_TYPE (arg1)))
4920 return false;
4921 int kind[2] = { 0, 0 };
4922 tree arg_im[2] = { NULL_TREE, NULL_TREE };
4923 /* At least one of arg2 and arg3 should have type compatible
4924 with arg1/rhs[0], and the other one should have value in [0, 1]
4925 range. If both are in [0, 1] range and type compatible with
4926 arg1/rhs[0], try harder to find after looking through casts,
4927 != 0 comparisons which one is initialized to __imag__ of
4928 .{ADD,SUB}_OVERFLOW or .U{ADD,SUB}C call results. */
4929 for (int i = 0; i < 2; ++i)
4931 tree arg = i == 0 ? arg2 : arg3;
4932 if (types_compatible_p (type, TREE_TYPE (arg)))
4933 kind[i] = 1;
4934 if (!INTEGRAL_TYPE_P (TREE_TYPE (arg))
4935 || (TYPE_PRECISION (TREE_TYPE (arg)) == 1
4936 && !TYPE_UNSIGNED (TREE_TYPE (arg))))
4937 continue;
4938 if (tree_zero_one_valued_p (arg))
4939 kind[i] |= 2;
4940 if (TREE_CODE (arg) == SSA_NAME)
4942 gimple *g = SSA_NAME_DEF_STMT (arg);
4943 if (gimple_assign_cast_p (g))
4945 tree op = gimple_assign_rhs1 (g);
4946 if (TREE_CODE (op) == SSA_NAME
4947 && INTEGRAL_TYPE_P (TREE_TYPE (op)))
4948 g = SSA_NAME_DEF_STMT (op);
4950 g = uaddc_ne0 (g);
4951 if (!uaddc_is_cplxpart (g, IMAGPART_EXPR))
4952 continue;
4953 arg_im[i] = gimple_assign_lhs (g);
4954 g = SSA_NAME_DEF_STMT (TREE_OPERAND (gimple_assign_rhs1 (g), 0));
4955 if (!is_gimple_call (g) || !gimple_call_internal_p (g))
4956 continue;
4957 switch (gimple_call_internal_fn (g))
4959 case IFN_ADD_OVERFLOW:
4960 case IFN_SUB_OVERFLOW:
4961 case IFN_UADDC:
4962 case IFN_USUBC:
4963 break;
4964 default:
4965 continue;
4967 kind[i] |= 4;
4970 /* Make arg2 the one with compatible type and arg3 the one
4971 with [0, 1] range. If both is true for both operands,
4972 prefer as arg3 result of __imag__ of some ifn. */
4973 if ((kind[0] & 1) == 0 || ((kind[1] & 1) != 0 && kind[0] > kind[1]))
4975 std::swap (arg2, arg3);
4976 std::swap (kind[0], kind[1]);
4977 std::swap (arg_im[0], arg_im[1]);
4979 if ((kind[0] & 1) == 0 || (kind[1] & 6) == 0)
4980 return false;
4981 if (!has_single_use (gimple_assign_lhs (im1))
4982 || !has_single_use (gimple_assign_lhs (im2))
4983 || !has_single_use (gimple_assign_lhs (re1))
4984 || num_imm_uses (gimple_call_lhs (ovf1)) != 2)
4985 return false;
4986 /* Check that ovf2's result is used in __real__ and set re2
4987 to that statement. */
4988 use_operand_p use_p;
4989 imm_use_iterator iter;
4990 tree lhs = gimple_call_lhs (ovf2);
4991 FOR_EACH_IMM_USE_FAST (use_p, iter, lhs)
4993 gimple *use_stmt = USE_STMT (use_p);
4994 if (is_gimple_debug (use_stmt))
4995 continue;
4996 if (use_stmt == im2)
4997 continue;
4998 if (re2)
4999 return false;
5000 if (!uaddc_is_cplxpart (use_stmt, REALPART_EXPR))
5001 return false;
5002 re2 = use_stmt;
5004 /* Build .UADDC/.USUBC call which will be placed before the stmt. */
5005 gimple_stmt_iterator gsi2 = gsi_for_stmt (ovf2);
5006 gimple *g;
5007 if ((kind[1] & 4) != 0 && types_compatible_p (type, TREE_TYPE (arg_im[1])))
5008 arg3 = arg_im[1];
5009 if ((kind[1] & 1) == 0)
5011 if (TREE_CODE (arg3) == INTEGER_CST)
5012 arg3 = fold_convert (type, arg3);
5013 else
5015 g = gimple_build_assign (make_ssa_name (type), NOP_EXPR, arg3);
5016 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
5017 arg3 = gimple_assign_lhs (g);
5020 g = gimple_build_call_internal (ifn == IFN_ADD_OVERFLOW
5021 ? IFN_UADDC : IFN_USUBC,
5022 3, arg1, arg2, arg3);
5023 tree nlhs = make_ssa_name (TREE_TYPE (lhs));
5024 gimple_call_set_lhs (g, nlhs);
5025 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
5026 /* In the case where stmt is | or ^ of two overflow flags
5027 or addition of those, replace stmt with __imag__ of the above
5028 added call. In case of arg1 + arg2 + (ovf1 + ovf2) or
5029 arg1 - arg2 - (ovf1 + ovf2) just emit it before stmt. */
5030 tree ilhs = rhs[2] ? make_ssa_name (type) : gimple_assign_lhs (stmt);
5031 g = gimple_build_assign (ilhs, IMAGPART_EXPR,
5032 build1 (IMAGPART_EXPR, TREE_TYPE (ilhs), nlhs));
5033 if (rhs[2])
5034 gsi_insert_before (gsi, g, GSI_SAME_STMT);
5035 else
5036 gsi_replace (gsi, g, true);
5037 /* Remove some statements which can't be kept in the IL because they
5038 use SSA_NAME whose setter is going to be removed too. */
5039 tree rhs1 = rhs[1];
5040 for (int i = 0; i < 2; i++)
5041 if (rhs1 == gimple_assign_lhs (im2))
5042 break;
5043 else
5045 g = SSA_NAME_DEF_STMT (rhs1);
5046 rhs1 = gimple_assign_rhs1 (g);
5047 gsi2 = gsi_for_stmt (g);
5048 gsi_remove (&gsi2, true);
5050 gcc_checking_assert (rhs1 == gimple_assign_lhs (im2));
5051 gsi2 = gsi_for_stmt (im2);
5052 gsi_remove (&gsi2, true);
5053 /* Replace the re2 statement with __real__ of the newly added
5054 .UADDC/.USUBC call. */
5055 if (re2)
5057 gsi2 = gsi_for_stmt (re2);
5058 tree rlhs = gimple_assign_lhs (re2);
5059 g = gimple_build_assign (rlhs, REALPART_EXPR,
5060 build1 (REALPART_EXPR, TREE_TYPE (rlhs), nlhs));
5061 gsi_replace (&gsi2, g, true);
5063 if (rhs[2])
5065 /* If this is the arg1 + arg2 + (ovf1 + ovf2) or
5066 arg1 - arg2 - (ovf1 + ovf2) case for the most significant limb,
5067 replace stmt with __real__ of another .UADDC/.USUBC call which
5068 handles the most significant limb. Overflow flag from this is
5069 ignored. */
5070 g = gimple_build_call_internal (code == PLUS_EXPR
5071 ? IFN_UADDC : IFN_USUBC,
5072 3, rhs[3], rhs[2], ilhs);
5073 nlhs = make_ssa_name (TREE_TYPE (lhs));
5074 gimple_call_set_lhs (g, nlhs);
5075 gsi_insert_before (gsi, g, GSI_SAME_STMT);
5076 ilhs = gimple_assign_lhs (stmt);
5077 g = gimple_build_assign (ilhs, REALPART_EXPR,
5078 build1 (REALPART_EXPR, TREE_TYPE (ilhs), nlhs));
5079 gsi_replace (gsi, g, true);
5081 if (TREE_CODE (arg3) == SSA_NAME)
5083 /* When pattern recognizing the second least significant limb
5084 above (i.e. first pair of .{ADD,SUB}_OVERFLOW calls for one limb),
5085 check if the [0, 1] range argument (i.e. carry in) isn't the
5086 result of another .{ADD,SUB}_OVERFLOW call (one handling the
5087 least significant limb). Again look through casts and != 0. */
5088 gimple *im3 = SSA_NAME_DEF_STMT (arg3);
5089 for (int i = 0; i < 2; ++i)
5091 gimple *im4 = uaddc_cast (im3);
5092 if (im4 == im3)
5093 break;
5094 else
5095 im3 = im4;
5097 im3 = uaddc_ne0 (im3);
5098 if (uaddc_is_cplxpart (im3, IMAGPART_EXPR))
5100 gimple *ovf3
5101 = SSA_NAME_DEF_STMT (TREE_OPERAND (gimple_assign_rhs1 (im3), 0));
5102 if (gimple_call_internal_p (ovf3, ifn))
5104 lhs = gimple_call_lhs (ovf3);
5105 arg1 = gimple_call_arg (ovf3, 0);
5106 arg2 = gimple_call_arg (ovf3, 1);
5107 if (types_compatible_p (type, TREE_TYPE (TREE_TYPE (lhs)))
5108 && types_compatible_p (type, TREE_TYPE (arg1))
5109 && types_compatible_p (type, TREE_TYPE (arg2)))
5111 /* And if it is initialized from result of __imag__
5112 of .{ADD,SUB}_OVERFLOW call, replace that
5113 call with .U{ADD,SUB}C call with the same arguments,
5114 just 0 added as third argument. This isn't strictly
5115 necessary, .ADD_OVERFLOW (x, y) and .UADDC (x, y, 0)
5116 produce the same result, but may result in better
5117 generated code on some targets where the backend can
5118 better prepare in how the result will be used. */
5119 g = gimple_build_call_internal (ifn == IFN_ADD_OVERFLOW
5120 ? IFN_UADDC : IFN_USUBC,
5121 3, arg1, arg2,
5122 build_zero_cst (type));
5123 gimple_call_set_lhs (g, lhs);
5124 gsi2 = gsi_for_stmt (ovf3);
5125 gsi_replace (&gsi2, g, true);
5130 return true;
5133 /* Return true if target has support for divmod. */
5135 static bool
5136 target_supports_divmod_p (optab divmod_optab, optab div_optab, machine_mode mode)
5138 /* If target supports hardware divmod insn, use it for divmod. */
5139 if (optab_handler (divmod_optab, mode) != CODE_FOR_nothing)
5140 return true;
5142 /* Check if libfunc for divmod is available. */
5143 rtx libfunc = optab_libfunc (divmod_optab, mode);
5144 if (libfunc != NULL_RTX)
5146 /* If optab_handler exists for div_optab, perhaps in a wider mode,
5147 we don't want to use the libfunc even if it exists for given mode. */
5148 machine_mode div_mode;
5149 FOR_EACH_MODE_FROM (div_mode, mode)
5150 if (optab_handler (div_optab, div_mode) != CODE_FOR_nothing)
5151 return false;
5153 return targetm.expand_divmod_libfunc != NULL;
5156 return false;
5159 /* Check if stmt is candidate for divmod transform. */
5161 static bool
5162 divmod_candidate_p (gassign *stmt)
5164 tree type = TREE_TYPE (gimple_assign_lhs (stmt));
5165 machine_mode mode = TYPE_MODE (type);
5166 optab divmod_optab, div_optab;
5168 if (TYPE_UNSIGNED (type))
5170 divmod_optab = udivmod_optab;
5171 div_optab = udiv_optab;
5173 else
5175 divmod_optab = sdivmod_optab;
5176 div_optab = sdiv_optab;
5179 tree op1 = gimple_assign_rhs1 (stmt);
5180 tree op2 = gimple_assign_rhs2 (stmt);
5182 /* Disable the transform if either is a constant, since division-by-constant
5183 may have specialized expansion. */
5184 if (CONSTANT_CLASS_P (op1))
5185 return false;
5187 if (CONSTANT_CLASS_P (op2))
5189 if (integer_pow2p (op2))
5190 return false;
5192 if (element_precision (type) <= HOST_BITS_PER_WIDE_INT
5193 && element_precision (type) <= BITS_PER_WORD)
5194 return false;
5196 /* If the divisor is not power of 2 and the precision wider than
5197 HWI, expand_divmod punts on that, so in that case it is better
5198 to use divmod optab or libfunc. Similarly if choose_multiplier
5199 might need pre/post shifts of BITS_PER_WORD or more. */
5202 /* Exclude the case where TYPE_OVERFLOW_TRAPS (type) as that should
5203 expand using the [su]divv optabs. */
5204 if (TYPE_OVERFLOW_TRAPS (type))
5205 return false;
5207 if (!target_supports_divmod_p (divmod_optab, div_optab, mode))
5208 return false;
5210 return true;
5213 /* This function looks for:
5214 t1 = a TRUNC_DIV_EXPR b;
5215 t2 = a TRUNC_MOD_EXPR b;
5216 and transforms it to the following sequence:
5217 complex_tmp = DIVMOD (a, b);
5218 t1 = REALPART_EXPR(a);
5219 t2 = IMAGPART_EXPR(b);
5220 For conditions enabling the transform see divmod_candidate_p().
5222 The pass has three parts:
5223 1) Find top_stmt which is trunc_div or trunc_mod stmt and dominates all
5224 other trunc_div_expr and trunc_mod_expr stmts.
5225 2) Add top_stmt and all trunc_div and trunc_mod stmts dominated by top_stmt
5226 to stmts vector.
5227 3) Insert DIVMOD call just before top_stmt and update entries in
5228 stmts vector to use return value of DIMOVD (REALEXPR_PART for div,
5229 IMAGPART_EXPR for mod). */
5231 static bool
5232 convert_to_divmod (gassign *stmt)
5234 if (stmt_can_throw_internal (cfun, stmt)
5235 || !divmod_candidate_p (stmt))
5236 return false;
5238 tree op1 = gimple_assign_rhs1 (stmt);
5239 tree op2 = gimple_assign_rhs2 (stmt);
5241 imm_use_iterator use_iter;
5242 gimple *use_stmt;
5243 auto_vec<gimple *> stmts;
5245 gimple *top_stmt = stmt;
5246 basic_block top_bb = gimple_bb (stmt);
5248 /* Part 1: Try to set top_stmt to "topmost" stmt that dominates
5249 at-least stmt and possibly other trunc_div/trunc_mod stmts
5250 having same operands as stmt. */
5252 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, op1)
5254 if (is_gimple_assign (use_stmt)
5255 && (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR
5256 || gimple_assign_rhs_code (use_stmt) == TRUNC_MOD_EXPR)
5257 && operand_equal_p (op1, gimple_assign_rhs1 (use_stmt), 0)
5258 && operand_equal_p (op2, gimple_assign_rhs2 (use_stmt), 0))
5260 if (stmt_can_throw_internal (cfun, use_stmt))
5261 continue;
5263 basic_block bb = gimple_bb (use_stmt);
5265 if (bb == top_bb)
5267 if (gimple_uid (use_stmt) < gimple_uid (top_stmt))
5268 top_stmt = use_stmt;
5270 else if (dominated_by_p (CDI_DOMINATORS, top_bb, bb))
5272 top_bb = bb;
5273 top_stmt = use_stmt;
5278 tree top_op1 = gimple_assign_rhs1 (top_stmt);
5279 tree top_op2 = gimple_assign_rhs2 (top_stmt);
5281 stmts.safe_push (top_stmt);
5282 bool div_seen = (gimple_assign_rhs_code (top_stmt) == TRUNC_DIV_EXPR);
5284 /* Part 2: Add all trunc_div/trunc_mod statements domianted by top_bb
5285 to stmts vector. The 2nd loop will always add stmt to stmts vector, since
5286 gimple_bb (top_stmt) dominates gimple_bb (stmt), so the
5287 2nd loop ends up adding at-least single trunc_mod_expr stmt. */
5289 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, top_op1)
5291 if (is_gimple_assign (use_stmt)
5292 && (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR
5293 || gimple_assign_rhs_code (use_stmt) == TRUNC_MOD_EXPR)
5294 && operand_equal_p (top_op1, gimple_assign_rhs1 (use_stmt), 0)
5295 && operand_equal_p (top_op2, gimple_assign_rhs2 (use_stmt), 0))
5297 if (use_stmt == top_stmt
5298 || stmt_can_throw_internal (cfun, use_stmt)
5299 || !dominated_by_p (CDI_DOMINATORS, gimple_bb (use_stmt), top_bb))
5300 continue;
5302 stmts.safe_push (use_stmt);
5303 if (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR)
5304 div_seen = true;
5308 if (!div_seen)
5309 return false;
5311 /* Part 3: Create libcall to internal fn DIVMOD:
5312 divmod_tmp = DIVMOD (op1, op2). */
5314 gcall *call_stmt = gimple_build_call_internal (IFN_DIVMOD, 2, op1, op2);
5315 tree res = make_temp_ssa_name (build_complex_type (TREE_TYPE (op1)),
5316 call_stmt, "divmod_tmp");
5317 gimple_call_set_lhs (call_stmt, res);
5318 /* We rejected throwing statements above. */
5319 gimple_call_set_nothrow (call_stmt, true);
5321 /* Insert the call before top_stmt. */
5322 gimple_stmt_iterator top_stmt_gsi = gsi_for_stmt (top_stmt);
5323 gsi_insert_before (&top_stmt_gsi, call_stmt, GSI_SAME_STMT);
5325 widen_mul_stats.divmod_calls_inserted++;
5327 /* Update all statements in stmts vector:
5328 lhs = op1 TRUNC_DIV_EXPR op2 -> lhs = REALPART_EXPR<divmod_tmp>
5329 lhs = op1 TRUNC_MOD_EXPR op2 -> lhs = IMAGPART_EXPR<divmod_tmp>. */
5331 for (unsigned i = 0; stmts.iterate (i, &use_stmt); ++i)
5333 tree new_rhs;
5335 switch (gimple_assign_rhs_code (use_stmt))
5337 case TRUNC_DIV_EXPR:
5338 new_rhs = fold_build1 (REALPART_EXPR, TREE_TYPE (op1), res);
5339 break;
5341 case TRUNC_MOD_EXPR:
5342 new_rhs = fold_build1 (IMAGPART_EXPR, TREE_TYPE (op1), res);
5343 break;
5345 default:
5346 gcc_unreachable ();
5349 gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
5350 gimple_assign_set_rhs_from_tree (&gsi, new_rhs);
5351 update_stmt (use_stmt);
5354 return true;
5357 /* Process a single gimple assignment STMT, which has a RSHIFT_EXPR as
5358 its rhs, and try to convert it into a MULT_HIGHPART_EXPR. The return
5359 value is true iff we converted the statement. */
5361 static bool
5362 convert_mult_to_highpart (gassign *stmt, gimple_stmt_iterator *gsi)
5364 tree lhs = gimple_assign_lhs (stmt);
5365 tree stype = TREE_TYPE (lhs);
5366 tree sarg0 = gimple_assign_rhs1 (stmt);
5367 tree sarg1 = gimple_assign_rhs2 (stmt);
5369 if (TREE_CODE (stype) != INTEGER_TYPE
5370 || TREE_CODE (sarg1) != INTEGER_CST
5371 || TREE_CODE (sarg0) != SSA_NAME
5372 || !tree_fits_uhwi_p (sarg1)
5373 || !has_single_use (sarg0))
5374 return false;
5376 gassign *def = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (sarg0));
5377 if (!def)
5378 return false;
5380 enum tree_code mcode = gimple_assign_rhs_code (def);
5381 if (mcode == NOP_EXPR)
5383 tree tmp = gimple_assign_rhs1 (def);
5384 if (TREE_CODE (tmp) != SSA_NAME || !has_single_use (tmp))
5385 return false;
5386 def = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (tmp));
5387 if (!def)
5388 return false;
5389 mcode = gimple_assign_rhs_code (def);
5392 if (mcode != WIDEN_MULT_EXPR
5393 || gimple_bb (def) != gimple_bb (stmt))
5394 return false;
5395 tree mtype = TREE_TYPE (gimple_assign_lhs (def));
5396 if (TREE_CODE (mtype) != INTEGER_TYPE
5397 || TYPE_PRECISION (mtype) != TYPE_PRECISION (stype))
5398 return false;
5400 tree mop1 = gimple_assign_rhs1 (def);
5401 tree mop2 = gimple_assign_rhs2 (def);
5402 tree optype = TREE_TYPE (mop1);
5403 bool unsignedp = TYPE_UNSIGNED (optype);
5404 unsigned int prec = TYPE_PRECISION (optype);
5406 if (unsignedp != TYPE_UNSIGNED (mtype)
5407 || TYPE_PRECISION (mtype) != 2 * prec)
5408 return false;
5410 unsigned HOST_WIDE_INT bits = tree_to_uhwi (sarg1);
5411 if (bits < prec || bits >= 2 * prec)
5412 return false;
5414 /* For the time being, require operands to have the same sign. */
5415 if (unsignedp != TYPE_UNSIGNED (TREE_TYPE (mop2)))
5416 return false;
5418 machine_mode mode = TYPE_MODE (optype);
5419 optab tab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
5420 if (optab_handler (tab, mode) == CODE_FOR_nothing)
5421 return false;
5423 location_t loc = gimple_location (stmt);
5424 tree highpart1 = build_and_insert_binop (gsi, loc, "highparttmp",
5425 MULT_HIGHPART_EXPR, mop1, mop2);
5426 tree highpart2 = highpart1;
5427 tree ntype = optype;
5429 if (TYPE_UNSIGNED (stype) != TYPE_UNSIGNED (optype))
5431 ntype = TYPE_UNSIGNED (stype) ? unsigned_type_for (optype)
5432 : signed_type_for (optype);
5433 highpart2 = build_and_insert_cast (gsi, loc, ntype, highpart1);
5435 if (bits > prec)
5436 highpart2 = build_and_insert_binop (gsi, loc, "highparttmp",
5437 RSHIFT_EXPR, highpart2,
5438 build_int_cst (ntype, bits - prec));
5440 gassign *new_stmt = gimple_build_assign (lhs, NOP_EXPR, highpart2);
5441 gsi_replace (gsi, new_stmt, true);
5443 widen_mul_stats.highpart_mults_inserted++;
5444 return true;
5447 /* If target has spaceship<MODE>3 expander, pattern recognize
5448 <bb 2> [local count: 1073741824]:
5449 if (a_2(D) == b_3(D))
5450 goto <bb 6>; [34.00%]
5451 else
5452 goto <bb 3>; [66.00%]
5454 <bb 3> [local count: 708669601]:
5455 if (a_2(D) < b_3(D))
5456 goto <bb 6>; [1.04%]
5457 else
5458 goto <bb 4>; [98.96%]
5460 <bb 4> [local count: 701299439]:
5461 if (a_2(D) > b_3(D))
5462 goto <bb 5>; [48.89%]
5463 else
5464 goto <bb 6>; [51.11%]
5466 <bb 5> [local count: 342865295]:
5468 <bb 6> [local count: 1073741824]:
5469 and turn it into:
5470 <bb 2> [local count: 1073741824]:
5471 _1 = .SPACESHIP (a_2(D), b_3(D));
5472 if (_1 == 0)
5473 goto <bb 6>; [34.00%]
5474 else
5475 goto <bb 3>; [66.00%]
5477 <bb 3> [local count: 708669601]:
5478 if (_1 == -1)
5479 goto <bb 6>; [1.04%]
5480 else
5481 goto <bb 4>; [98.96%]
5483 <bb 4> [local count: 701299439]:
5484 if (_1 == 1)
5485 goto <bb 5>; [48.89%]
5486 else
5487 goto <bb 6>; [51.11%]
5489 <bb 5> [local count: 342865295]:
5491 <bb 6> [local count: 1073741824]:
5492 so that the backend can emit optimal comparison and
5493 conditional jump sequence. */
5495 static void
5496 optimize_spaceship (gcond *stmt)
5498 enum tree_code code = gimple_cond_code (stmt);
5499 if (code != EQ_EXPR && code != NE_EXPR)
5500 return;
5501 tree arg1 = gimple_cond_lhs (stmt);
5502 tree arg2 = gimple_cond_rhs (stmt);
5503 if (!SCALAR_FLOAT_TYPE_P (TREE_TYPE (arg1))
5504 || optab_handler (spaceship_optab,
5505 TYPE_MODE (TREE_TYPE (arg1))) == CODE_FOR_nothing
5506 || operand_equal_p (arg1, arg2, 0))
5507 return;
5509 basic_block bb0 = gimple_bb (stmt), bb1, bb2 = NULL;
5510 edge em1 = NULL, e1 = NULL, e2 = NULL;
5511 bb1 = EDGE_SUCC (bb0, 1)->dest;
5512 if (((EDGE_SUCC (bb0, 0)->flags & EDGE_TRUE_VALUE) != 0) ^ (code == EQ_EXPR))
5513 bb1 = EDGE_SUCC (bb0, 0)->dest;
5515 gcond *g = safe_dyn_cast <gcond *> (*gsi_last_bb (bb1));
5516 if (g == NULL
5517 || !single_pred_p (bb1)
5518 || (operand_equal_p (gimple_cond_lhs (g), arg1, 0)
5519 ? !operand_equal_p (gimple_cond_rhs (g), arg2, 0)
5520 : (!operand_equal_p (gimple_cond_lhs (g), arg2, 0)
5521 || !operand_equal_p (gimple_cond_rhs (g), arg1, 0)))
5522 || !cond_only_block_p (bb1))
5523 return;
5525 enum tree_code ccode = (operand_equal_p (gimple_cond_lhs (g), arg1, 0)
5526 ? LT_EXPR : GT_EXPR);
5527 switch (gimple_cond_code (g))
5529 case LT_EXPR:
5530 case LE_EXPR:
5531 break;
5532 case GT_EXPR:
5533 case GE_EXPR:
5534 ccode = ccode == LT_EXPR ? GT_EXPR : LT_EXPR;
5535 break;
5536 default:
5537 return;
5540 for (int i = 0; i < 2; ++i)
5542 /* With NaNs, </<=/>/>= are false, so we need to look for the
5543 third comparison on the false edge from whatever non-equality
5544 comparison the second comparison is. */
5545 if (HONOR_NANS (TREE_TYPE (arg1))
5546 && (EDGE_SUCC (bb1, i)->flags & EDGE_TRUE_VALUE) != 0)
5547 continue;
5549 bb2 = EDGE_SUCC (bb1, i)->dest;
5550 g = safe_dyn_cast <gcond *> (*gsi_last_bb (bb2));
5551 if (g == NULL
5552 || !single_pred_p (bb2)
5553 || (operand_equal_p (gimple_cond_lhs (g), arg1, 0)
5554 ? !operand_equal_p (gimple_cond_rhs (g), arg2, 0)
5555 : (!operand_equal_p (gimple_cond_lhs (g), arg2, 0)
5556 || !operand_equal_p (gimple_cond_rhs (g), arg1, 0)))
5557 || !cond_only_block_p (bb2)
5558 || EDGE_SUCC (bb2, 0)->dest == EDGE_SUCC (bb2, 1)->dest)
5559 continue;
5561 enum tree_code ccode2
5562 = (operand_equal_p (gimple_cond_lhs (g), arg1, 0) ? LT_EXPR : GT_EXPR);
5563 switch (gimple_cond_code (g))
5565 case LT_EXPR:
5566 case LE_EXPR:
5567 break;
5568 case GT_EXPR:
5569 case GE_EXPR:
5570 ccode2 = ccode2 == LT_EXPR ? GT_EXPR : LT_EXPR;
5571 break;
5572 default:
5573 continue;
5575 if (HONOR_NANS (TREE_TYPE (arg1)) && ccode == ccode2)
5576 continue;
5578 if ((ccode == LT_EXPR)
5579 ^ ((EDGE_SUCC (bb1, i)->flags & EDGE_TRUE_VALUE) != 0))
5581 em1 = EDGE_SUCC (bb1, 1 - i);
5582 e1 = EDGE_SUCC (bb2, 0);
5583 e2 = EDGE_SUCC (bb2, 1);
5584 if ((ccode2 == LT_EXPR) ^ ((e1->flags & EDGE_TRUE_VALUE) == 0))
5585 std::swap (e1, e2);
5587 else
5589 e1 = EDGE_SUCC (bb1, 1 - i);
5590 em1 = EDGE_SUCC (bb2, 0);
5591 e2 = EDGE_SUCC (bb2, 1);
5592 if ((ccode2 != LT_EXPR) ^ ((em1->flags & EDGE_TRUE_VALUE) == 0))
5593 std::swap (em1, e2);
5595 break;
5598 if (em1 == NULL)
5600 if ((ccode == LT_EXPR)
5601 ^ ((EDGE_SUCC (bb1, 0)->flags & EDGE_TRUE_VALUE) != 0))
5603 em1 = EDGE_SUCC (bb1, 1);
5604 e1 = EDGE_SUCC (bb1, 0);
5605 e2 = (e1->flags & EDGE_TRUE_VALUE) ? em1 : e1;
5607 else
5609 em1 = EDGE_SUCC (bb1, 0);
5610 e1 = EDGE_SUCC (bb1, 1);
5611 e2 = (e1->flags & EDGE_TRUE_VALUE) ? em1 : e1;
5615 gcall *gc = gimple_build_call_internal (IFN_SPACESHIP, 2, arg1, arg2);
5616 tree lhs = make_ssa_name (integer_type_node);
5617 gimple_call_set_lhs (gc, lhs);
5618 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
5619 gsi_insert_before (&gsi, gc, GSI_SAME_STMT);
5621 gimple_cond_set_lhs (stmt, lhs);
5622 gimple_cond_set_rhs (stmt, integer_zero_node);
5623 update_stmt (stmt);
5625 gcond *cond = as_a <gcond *> (*gsi_last_bb (bb1));
5626 gimple_cond_set_lhs (cond, lhs);
5627 if (em1->src == bb1 && e2 != em1)
5629 gimple_cond_set_rhs (cond, integer_minus_one_node);
5630 gimple_cond_set_code (cond, (em1->flags & EDGE_TRUE_VALUE)
5631 ? EQ_EXPR : NE_EXPR);
5633 else
5635 gcc_assert (e1->src == bb1 && e2 != e1);
5636 gimple_cond_set_rhs (cond, integer_one_node);
5637 gimple_cond_set_code (cond, (e1->flags & EDGE_TRUE_VALUE)
5638 ? EQ_EXPR : NE_EXPR);
5640 update_stmt (cond);
5642 if (e2 != e1 && e2 != em1)
5644 cond = as_a <gcond *> (*gsi_last_bb (bb2));
5645 gimple_cond_set_lhs (cond, lhs);
5646 if (em1->src == bb2)
5647 gimple_cond_set_rhs (cond, integer_minus_one_node);
5648 else
5650 gcc_assert (e1->src == bb2);
5651 gimple_cond_set_rhs (cond, integer_one_node);
5653 gimple_cond_set_code (cond,
5654 (e2->flags & EDGE_TRUE_VALUE) ? NE_EXPR : EQ_EXPR);
5655 update_stmt (cond);
5658 wide_int wm1 = wi::minus_one (TYPE_PRECISION (integer_type_node));
5659 wide_int w2 = wi::two (TYPE_PRECISION (integer_type_node));
5660 value_range vr (TREE_TYPE (lhs), wm1, w2);
5661 set_range_info (lhs, vr);
5665 /* Find integer multiplications where the operands are extended from
5666 smaller types, and replace the MULT_EXPR with a WIDEN_MULT_EXPR
5667 or MULT_HIGHPART_EXPR where appropriate. */
5669 namespace {
5671 const pass_data pass_data_optimize_widening_mul =
5673 GIMPLE_PASS, /* type */
5674 "widening_mul", /* name */
5675 OPTGROUP_NONE, /* optinfo_flags */
5676 TV_TREE_WIDEN_MUL, /* tv_id */
5677 PROP_ssa, /* properties_required */
5678 0, /* properties_provided */
5679 0, /* properties_destroyed */
5680 0, /* todo_flags_start */
5681 TODO_update_ssa, /* todo_flags_finish */
5684 class pass_optimize_widening_mul : public gimple_opt_pass
5686 public:
5687 pass_optimize_widening_mul (gcc::context *ctxt)
5688 : gimple_opt_pass (pass_data_optimize_widening_mul, ctxt)
5691 /* opt_pass methods: */
5692 bool gate (function *) final override
5694 return flag_expensive_optimizations && optimize;
5697 unsigned int execute (function *) final override;
5699 }; // class pass_optimize_widening_mul
5701 /* Walker class to perform the transformation in reverse dominance order. */
5703 class math_opts_dom_walker : public dom_walker
5705 public:
5706 /* Constructor, CFG_CHANGED is a pointer to a boolean flag that will be set
5707 if walking modidifes the CFG. */
5709 math_opts_dom_walker (bool *cfg_changed_p)
5710 : dom_walker (CDI_DOMINATORS), m_last_result_set (),
5711 m_cfg_changed_p (cfg_changed_p) {}
5713 /* The actual actions performed in the walk. */
5715 void after_dom_children (basic_block) final override;
5717 /* Set of results of chains of multiply and add statement combinations that
5718 were not transformed into FMAs because of active deferring. */
5719 hash_set<tree> m_last_result_set;
5721 /* Pointer to a flag of the user that needs to be set if CFG has been
5722 modified. */
5723 bool *m_cfg_changed_p;
5726 void
5727 math_opts_dom_walker::after_dom_children (basic_block bb)
5729 gimple_stmt_iterator gsi;
5731 fma_deferring_state fma_state (param_avoid_fma_max_bits > 0);
5733 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
5735 gimple *stmt = gsi_stmt (gsi);
5736 enum tree_code code;
5738 if (is_gimple_assign (stmt))
5740 code = gimple_assign_rhs_code (stmt);
5741 switch (code)
5743 case MULT_EXPR:
5744 if (!convert_mult_to_widen (stmt, &gsi)
5745 && !convert_expand_mult_copysign (stmt, &gsi)
5746 && convert_mult_to_fma (stmt,
5747 gimple_assign_rhs1 (stmt),
5748 gimple_assign_rhs2 (stmt),
5749 &fma_state))
5751 gsi_remove (&gsi, true);
5752 release_defs (stmt);
5753 continue;
5755 match_arith_overflow (&gsi, stmt, code, m_cfg_changed_p);
5756 break;
5758 case PLUS_EXPR:
5759 case MINUS_EXPR:
5760 if (!convert_plusminus_to_widen (&gsi, stmt, code))
5762 match_arith_overflow (&gsi, stmt, code, m_cfg_changed_p);
5763 if (gsi_stmt (gsi) == stmt)
5764 match_uaddc_usubc (&gsi, stmt, code);
5766 break;
5768 case BIT_NOT_EXPR:
5769 if (match_arith_overflow (&gsi, stmt, code, m_cfg_changed_p))
5770 continue;
5771 break;
5773 case TRUNC_MOD_EXPR:
5774 convert_to_divmod (as_a<gassign *> (stmt));
5775 break;
5777 case RSHIFT_EXPR:
5778 convert_mult_to_highpart (as_a<gassign *> (stmt), &gsi);
5779 break;
5781 case BIT_IOR_EXPR:
5782 case BIT_XOR_EXPR:
5783 match_uaddc_usubc (&gsi, stmt, code);
5784 break;
5786 default:;
5789 else if (is_gimple_call (stmt))
5791 switch (gimple_call_combined_fn (stmt))
5793 CASE_CFN_POW:
5794 if (gimple_call_lhs (stmt)
5795 && TREE_CODE (gimple_call_arg (stmt, 1)) == REAL_CST
5796 && real_equal (&TREE_REAL_CST (gimple_call_arg (stmt, 1)),
5797 &dconst2)
5798 && convert_mult_to_fma (stmt,
5799 gimple_call_arg (stmt, 0),
5800 gimple_call_arg (stmt, 0),
5801 &fma_state))
5803 unlink_stmt_vdef (stmt);
5804 if (gsi_remove (&gsi, true)
5805 && gimple_purge_dead_eh_edges (bb))
5806 *m_cfg_changed_p = true;
5807 release_defs (stmt);
5808 continue;
5810 break;
5812 case CFN_COND_MUL:
5813 if (convert_mult_to_fma (stmt,
5814 gimple_call_arg (stmt, 1),
5815 gimple_call_arg (stmt, 2),
5816 &fma_state,
5817 gimple_call_arg (stmt, 0)))
5820 gsi_remove (&gsi, true);
5821 release_defs (stmt);
5822 continue;
5824 break;
5826 case CFN_COND_LEN_MUL:
5827 if (convert_mult_to_fma (stmt,
5828 gimple_call_arg (stmt, 1),
5829 gimple_call_arg (stmt, 2),
5830 &fma_state,
5831 gimple_call_arg (stmt, 0),
5832 gimple_call_arg (stmt, 4),
5833 gimple_call_arg (stmt, 5)))
5836 gsi_remove (&gsi, true);
5837 release_defs (stmt);
5838 continue;
5840 break;
5842 case CFN_LAST:
5843 cancel_fma_deferring (&fma_state);
5844 break;
5846 default:
5847 break;
5850 else if (gimple_code (stmt) == GIMPLE_COND)
5851 optimize_spaceship (as_a <gcond *> (stmt));
5852 gsi_next (&gsi);
5854 if (fma_state.m_deferring_p
5855 && fma_state.m_initial_phi)
5857 gcc_checking_assert (fma_state.m_last_result);
5858 if (!last_fma_candidate_feeds_initial_phi (&fma_state,
5859 &m_last_result_set))
5860 cancel_fma_deferring (&fma_state);
5861 else
5862 m_last_result_set.add (fma_state.m_last_result);
5867 unsigned int
5868 pass_optimize_widening_mul::execute (function *fun)
5870 bool cfg_changed = false;
5872 memset (&widen_mul_stats, 0, sizeof (widen_mul_stats));
5873 calculate_dominance_info (CDI_DOMINATORS);
5874 renumber_gimple_stmt_uids (cfun);
5876 math_opts_dom_walker (&cfg_changed).walk (ENTRY_BLOCK_PTR_FOR_FN (cfun));
5878 statistics_counter_event (fun, "widening multiplications inserted",
5879 widen_mul_stats.widen_mults_inserted);
5880 statistics_counter_event (fun, "widening maccs inserted",
5881 widen_mul_stats.maccs_inserted);
5882 statistics_counter_event (fun, "fused multiply-adds inserted",
5883 widen_mul_stats.fmas_inserted);
5884 statistics_counter_event (fun, "divmod calls inserted",
5885 widen_mul_stats.divmod_calls_inserted);
5886 statistics_counter_event (fun, "highpart multiplications inserted",
5887 widen_mul_stats.highpart_mults_inserted);
5889 return cfg_changed ? TODO_cleanup_cfg : 0;
5892 } // anon namespace
5894 gimple_opt_pass *
5895 make_pass_optimize_widening_mul (gcc::context *ctxt)
5897 return new pass_optimize_widening_mul (ctxt);